adding corporate data to webscraper
This commit is contained in:
113
src/economic/storage.rs
Normal file
113
src/economic/storage.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
// src/economic/storage.rs
|
||||
use super::types::*;
|
||||
use super::helpers::*;
|
||||
use tokio::fs;
|
||||
use chrono::{Local, NaiveDate, Datelike};
|
||||
|
||||
pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
||||
let dir = std::path::Path::new("economic_events");
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
if dir.exists() {
|
||||
let mut entries = fs::read_dir(dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().map(|e| e == "json").unwrap_or(false) {
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
if name.starts_with("chunk_") {
|
||||
if let Some(content) = fs::read_to_string(&path).await.ok() {
|
||||
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
|
||||
let start = name[6..16].to_string();
|
||||
let end = name[17..27].to_string();
|
||||
chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: events.len() });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
chunks.sort_by_key(|c| c.start_date.clone());
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMap<String, EconomicEvent>> {
|
||||
let mut map = HashMap::new();
|
||||
for chunk in chunks {
|
||||
let content = fs::read_to_string(&chunk.path).await?;
|
||||
let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
|
||||
for e in events {
|
||||
map.insert(event_key(&e), e);
|
||||
}
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
|
||||
let dir = std::path::Path::new("economic_events");
|
||||
fs::create_dir_all(dir).await?;
|
||||
|
||||
let mut sorted: Vec<_> = events.into_values().collect();
|
||||
sorted.sort_by_key(|e| e.date.clone());
|
||||
|
||||
let mut chunk = Vec::new();
|
||||
let mut start: Option<NaiveDate> = None;
|
||||
for e in sorted {
|
||||
let date = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d")?;
|
||||
if let Some(s) = start {
|
||||
if (date - s).num_days() > 100 || chunk.len() >= 500 {
|
||||
save_chunk(&chunk, dir).await?;
|
||||
chunk.clear();
|
||||
start = Some(date);
|
||||
}
|
||||
} else {
|
||||
start = Some(date);
|
||||
}
|
||||
chunk.push(e);
|
||||
}
|
||||
if !chunk.is_empty() {
|
||||
save_chunk(&chunk, dir).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::Result<()> {
|
||||
let start = events.iter().map(|e| &e.date).min().unwrap().clone();
|
||||
let end = events.iter().map(|e| &e.date).max().unwrap().clone();
|
||||
let path = dir.join(format!("chunk_{}_{}.json", start, end));
|
||||
fs::write(&path, serde_json::to_string_pretty(events)?).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
|
||||
if changes.is_empty() { return Ok(()); }
|
||||
let dir = std::path::Path::new("economic_event_changes");
|
||||
fs::create_dir_all(dir).await?;
|
||||
|
||||
let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
|
||||
for c in changes {
|
||||
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
||||
let key = format!("{:02}_{}", d.month(), d.year());
|
||||
by_month.entry(key).or_default().push(c.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for (month, list) in by_month {
|
||||
let path = dir.join(format!("event_changes_{}.json", month));
|
||||
let mut all = if path.exists() {
|
||||
let s = fs::read_to_string(&path).await?;
|
||||
serde_json::from_str(&s).unwrap_or_default()
|
||||
} else { vec![] };
|
||||
all.extend(list);
|
||||
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn target_end_date() -> String {
|
||||
let now = Local::now().naive_local().date();
|
||||
let future = now + chrono::Duration::days(90);
|
||||
future.format("%Y-%m-%d").to_string()
|
||||
}
|
||||
Reference in New Issue
Block a user