adding corporate data to webscraper

This commit is contained in:
2025-11-21 00:17:59 +01:00
parent 0ea3fcc3b5
commit 9d0d15f3f8
18 changed files with 2128 additions and 970 deletions

113
src/economic/storage.rs Normal file
View File

@@ -0,0 +1,113 @@
use std::collections::HashMap;
// src/economic/storage.rs
use super::types::*;
use super::helpers::*;
use tokio::fs;
use chrono::{Local, NaiveDate, Datelike};
pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
let dir = std::path::Path::new("economic_events");
let mut chunks = Vec::new();
if dir.exists() {
let mut entries = fs::read_dir(dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.extension().map(|e| e == "json").unwrap_or(false) {
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if name.starts_with("chunk_") {
if let Some(content) = fs::read_to_string(&path).await.ok() {
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
let start = name[6..16].to_string();
let end = name[17..27].to_string();
chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: events.len() });
}
}
}
}
}
}
}
chunks.sort_by_key(|c| c.start_date.clone());
Ok(chunks)
}
pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMap<String, EconomicEvent>> {
let mut map = HashMap::new();
for chunk in chunks {
let content = fs::read_to_string(&chunk.path).await?;
let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
for e in events {
map.insert(event_key(&e), e);
}
}
Ok(map)
}
pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
let dir = std::path::Path::new("economic_events");
fs::create_dir_all(dir).await?;
let mut sorted: Vec<_> = events.into_values().collect();
sorted.sort_by_key(|e| e.date.clone());
let mut chunk = Vec::new();
let mut start: Option<NaiveDate> = None;
for e in sorted {
let date = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d")?;
if let Some(s) = start {
if (date - s).num_days() > 100 || chunk.len() >= 500 {
save_chunk(&chunk, dir).await?;
chunk.clear();
start = Some(date);
}
} else {
start = Some(date);
}
chunk.push(e);
}
if !chunk.is_empty() {
save_chunk(&chunk, dir).await?;
}
Ok(())
}
async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::Result<()> {
let start = events.iter().map(|e| &e.date).min().unwrap().clone();
let end = events.iter().map(|e| &e.date).max().unwrap().clone();
let path = dir.join(format!("chunk_{}_{}.json", start, end));
fs::write(&path, serde_json::to_string_pretty(events)?).await?;
Ok(())
}
pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
if changes.is_empty() { return Ok(()); }
let dir = std::path::Path::new("economic_event_changes");
fs::create_dir_all(dir).await?;
let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
for c in changes {
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
let key = format!("{:02}_{}", d.month(), d.year());
by_month.entry(key).or_default().push(c.clone());
}
}
for (month, list) in by_month {
let path = dir.join(format!("event_changes_{}.json", month));
let mut all = if path.exists() {
let s = fs::read_to_string(&path).await?;
serde_json::from_str(&s).unwrap_or_default()
} else { vec![] };
all.extend(list);
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
}
Ok(())
}
pub fn target_end_date() -> String {
let now = Local::now().naive_local().date();
let future = now + chrono::Duration::days(90);
future.format("%Y-%m-%d").to_string()
}