fixed force update for new data and changes without new_event change
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -22,7 +22,9 @@ target/
|
|||||||
|
|
||||||
/target
|
/target
|
||||||
|
|
||||||
/chromedriver-win64/*
|
# /chromedriver-win64/*
|
||||||
|
|
||||||
|
# data folders
|
||||||
/economic_events*
|
/economic_events*
|
||||||
/economic_event_changes*
|
/economic_event_changes*
|
||||||
/corporate_events*
|
/corporate_events*
|
||||||
|
|||||||
27
chromedriver-win64/LICENSE.chromedriver
Normal file
27
chromedriver-win64/LICENSE.chromedriver
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
// Copyright 2015 The Chromium Authors
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
// * Redistributions in binary form must reproduce the above
|
||||||
|
// copyright notice, this list of conditions and the following disclaimer
|
||||||
|
// in the documentation and/or other materials provided with the
|
||||||
|
// distribution.
|
||||||
|
// * Neither the name of Google LLC nor the names of its
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
14682
chromedriver-win64/THIRD_PARTY_NOTICES.chromedriver
Normal file
14682
chromedriver-win64/THIRD_PARTY_NOTICES.chromedriver
Normal file
File diff suppressed because it is too large
Load Diff
BIN
chromedriver-win64/chromedriver.exe
Normal file
BIN
chromedriver-win64/chromedriver.exe
Normal file
Binary file not shown.
@@ -50,7 +50,6 @@ pub fn detect_changes(old: &EconomicEvent, new: &EconomicEvent, today: &str) ->
|
|||||||
date: new.date.clone(),
|
date: new.date.clone(),
|
||||||
event: new.event.clone(),
|
event: new.event.clone(),
|
||||||
country: new.country.clone(),
|
country: new.country.clone(),
|
||||||
change_type: field.to_string(),
|
|
||||||
field_changed: field.to_string(),
|
field_changed: field.to_string(),
|
||||||
old_value: old_val.clone(),
|
old_value: old_val.clone(),
|
||||||
new_value: new_val.clone(),
|
new_value: new_val.clone(),
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
// src/economic/storage.rs
|
// src/economic/storage.rs
|
||||||
use super::types::*;
|
use super::types::*;
|
||||||
use super::helpers::*;
|
use super::helpers::*;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use chrono::{Local, NaiveDate, Datelike};
|
use chrono::{Local, NaiveDate, Datelike};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
||||||
let dir = std::path::Path::new("economic_events");
|
let dir = std::path::Path::new("economic_events");
|
||||||
@@ -49,21 +49,29 @@ pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> an
|
|||||||
let dir = std::path::Path::new("economic_events");
|
let dir = std::path::Path::new("economic_events");
|
||||||
fs::create_dir_all(dir).await?;
|
fs::create_dir_all(dir).await?;
|
||||||
|
|
||||||
|
// Delete all old chunk files to prevent duplicates and overlaps
|
||||||
|
println!("Removing old chunks...");
|
||||||
|
|
||||||
|
let mut entries = fs::read_dir(dir).await?;
|
||||||
|
while let Some(entry) = entries.next_entry().await? {
|
||||||
|
let path = entry.path();
|
||||||
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||||
|
if name.starts_with("chunk_") && path.extension().map(|e| e == "json").unwrap_or(false) {
|
||||||
|
fs::remove_file(&path).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mut sorted: Vec<_> = events.into_values().collect();
|
let mut sorted: Vec<_> = events.into_values().collect();
|
||||||
sorted.sort_by_key(|e| e.date.clone());
|
sorted.sort_by_key(|e| e.date.clone());
|
||||||
|
|
||||||
let mut chunk = Vec::new();
|
let mut chunk: Vec<EconomicEvent> = Vec::new();
|
||||||
let mut start: Option<NaiveDate> = None;
|
const MAX_EVENTS_PER_CHUNK: usize = ( 30000 / 2 ) / 11; // (30000 - 2) / 11 = 2727
|
||||||
|
|
||||||
for e in sorted {
|
for e in sorted {
|
||||||
let date = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d")?;
|
if !chunk.is_empty() && chunk.len() >= MAX_EVENTS_PER_CHUNK {
|
||||||
if let Some(s) = start {
|
|
||||||
if (date - s).num_days() > 100 || chunk.len() >= 500 {
|
|
||||||
save_chunk(&chunk, dir).await?;
|
save_chunk(&chunk, dir).await?;
|
||||||
chunk.clear();
|
chunk.clear();
|
||||||
start = Some(date);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
start = Some(date);
|
|
||||||
}
|
}
|
||||||
chunk.push(e);
|
chunk.push(e);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ pub struct EventChange {
|
|||||||
pub date: String,
|
pub date: String,
|
||||||
pub event: String,
|
pub event: String,
|
||||||
pub country: String,
|
pub country: String,
|
||||||
pub change_type: String, // actual|forecast|time|newly_added|removed
|
|
||||||
pub field_changed: String,
|
pub field_changed: String,
|
||||||
pub old_value: String,
|
pub old_value: String,
|
||||||
pub new_value: String,
|
pub new_value: String,
|
||||||
|
|||||||
@@ -84,7 +84,6 @@ pub fn process_batch(
|
|||||||
date: new.date.clone(),
|
date: new.date.clone(),
|
||||||
event: new.event.clone(),
|
event: new.event.clone(),
|
||||||
country: new.country.clone(),
|
country: new.country.clone(),
|
||||||
change_type: "time".to_string(),
|
|
||||||
field_changed: "time".to_string(),
|
field_changed: "time".to_string(),
|
||||||
old_value: old_event.time.clone(),
|
old_value: old_event.time.clone(),
|
||||||
new_value: new.time.clone(),
|
new_value: new.time.clone(),
|
||||||
@@ -96,19 +95,6 @@ pub fn process_batch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if new.date.as_str() > today {
|
|
||||||
changes.push(EventChange {
|
|
||||||
date: new.date.clone(),
|
|
||||||
event: new.event.clone(),
|
|
||||||
country: new.country.clone(),
|
|
||||||
change_type: "newly_added".to_string(),
|
|
||||||
field_changed: "new_event".to_string(),
|
|
||||||
old_value: "".to_string(),
|
|
||||||
new_value: format!("{} @ {}", new.date, new.time),
|
|
||||||
detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
existing.insert(key, new.clone());
|
existing.insert(key, new.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user