// src/economic/storage.rs use super::types::*; use super::helpers::*; use crate::util::directories::DataPaths; use crate::util::logger; use tokio::fs; use chrono::{NaiveDate, Datelike}; use std::collections::HashMap; use serde_json; const CHUNK_SIZE: usize = 500; // Process 500 events at a time const MAX_EVENTS_PER_FILE: usize = 3000; pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result> { let dir = paths.economic_events_dir(); let mut chunks = Vec::new(); if dir.exists() { let mut entries = fs::read_dir(dir).await?; while let Some(entry) = entries.next_entry().await? { let path = entry.path(); if path.extension().map(|e| e == "json").unwrap_or(false) { if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if name.starts_with("chunk_") { // Don't load the events here, just record the chunk info let start = name[6..16].to_string(); let end = name[17..27].to_string(); chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: 0 // We'll count later if needed }); } } } } } chunks.sort_by_key(|c| c.start_date.clone()); logger::log_info(&format!("Economic Storage: Found {} event chunks", chunks.len())).await; Ok(chunks) } /// Stream events from a single chunk file pub async fn stream_chunk_events( chunk: &ChunkInfo, callback: impl Fn(EconomicEvent) -> anyhow::Result<()> ) -> anyhow::Result { let content = fs::read_to_string(&chunk.path).await?; let events: Vec = serde_json::from_str(&content)?; let count = events.len(); for event in events { callback(event)?; } Ok(count) } /// Load events in batches to avoid memory explosion pub async fn load_events_in_batches( chunks: &[ChunkInfo], batch_size: usize, ) -> anyhow::Result> { let mut all_events = Vec::new(); for chunk in chunks { logger::log_info(&format!("Loading chunk: {:?}", chunk.path.file_name())).await; let content = fs::read_to_string(&chunk.path).await?; let events: Vec = serde_json::from_str(&content)?; for e in events { all_events.push((event_key(&e), e)); } // If we've accumulated enough, yield them if all_events.len() >= batch_size { break; } } logger::log_info(&format!("Loaded {} events in batch", all_events.len())).await; Ok(all_events.into_iter()) } /// Build a lightweight index instead of loading all events #[derive(Debug, Clone)] pub struct EventIndex { pub key: String, pub identity_key: String, pub date: String, pub chunk_file: std::path::PathBuf, } pub async fn build_event_index(chunks: &[ChunkInfo]) -> anyhow::Result> { let mut index = Vec::new(); for chunk in chunks { logger::log_info(&format!("Indexing chunk: {:?}", chunk.path.file_name())).await; let content = fs::read_to_string(&chunk.path).await?; let events: Vec = serde_json::from_str(&content)?; for e in events { index.push(EventIndex { key: event_key(&e), identity_key: identity_key(&e), date: e.date.clone(), chunk_file: chunk.path.clone(), }); } } logger::log_info(&format!("Built index with {} entries", index.len())).await; Ok(index) } /// Look up a specific event by loading only its chunk pub async fn lookup_event_by_key(key: &str, index: &[EventIndex]) -> anyhow::Result> { // Find which chunk contains this event let entry = index.iter().find(|e| e.key == key); if let Some(entry) = entry { // Load only that chunk let content = fs::read_to_string(&entry.chunk_file).await?; let events: Vec = serde_json::from_str(&content)?; // Find the specific event Ok(events.into_iter().find(|e| event_key(e) == key)) } else { Ok(None) } } /// Save events in smaller, more manageable chunks pub async fn save_optimized_chunks( paths: &DataPaths, events: Vec // Changed from HashMap to Vec ) -> anyhow::Result<()> { let dir = paths.economic_events_dir(); fs::create_dir_all(dir).await?; logger::log_info("Economic Storage: Removing old chunk files...").await; let mut entries = fs::read_dir(dir).await?; let mut removed_count = 0; while let Some(entry) = entries.next_entry().await? { let path = entry.path(); if let Some(name) = path.file_name().and_then(|n| n.to_str()) { if name.starts_with("chunk_") && path.extension().map(|e| e == "json").unwrap_or(false) { fs::remove_file(&path).await?; removed_count += 1; } } } logger::log_info(&format!("Economic Storage: Removed {} old chunk files", removed_count)).await; let mut sorted = events; sorted.sort_by(|a, b| a.date.cmp(&b.date)); // Save in smaller chunks let mut chunk_num = 0; for chunk in sorted.chunks(MAX_EVENTS_PER_FILE) { save_chunk_vec(chunk, dir, chunk_num).await?; chunk_num += 1; // Allow other tasks to run tokio::task::yield_now().await; } logger::log_info(&format!("Economic Storage: Saved {} chunks to {:?}", chunk_num, dir)).await; Ok(()) } async fn save_chunk_vec(events: &[EconomicEvent], dir: &std::path::Path, chunk_num: usize) -> anyhow::Result<()> { if events.is_empty() { return Ok(()); } let start = &events[0].date; let end = &events[events.len() - 1].date; let path = dir.join(format!("chunk_{:04}_{}_{}.json", chunk_num, start, end)); // Write incrementally to avoid large memory allocation let json = serde_json::to_string_pretty(events)?; fs::write(&path, json).await?; logger::log_info(&format!("Economic Storage: Saved chunk {} - {} ({} events)", start, end, events.len())).await; Ok(()) } pub async fn save_changes(paths: &DataPaths, changes: &[EventChange]) -> anyhow::Result<()> { if changes.is_empty() { logger::log_info("Economic Storage: No changes to save").await; return Ok(()); } let dir = paths.economic_changes_dir(); fs::create_dir_all(dir).await?; logger::log_info(&format!("Economic Storage: Saving {} changes to {:?}", changes.len(), dir)).await; let mut by_month: HashMap> = HashMap::new(); for c in changes { if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") { let key = format!("{:02}_{}", d.month(), d.year()); by_month.entry(key).or_default().push(c.clone()); } } for (month, list) in by_month { let path = dir.join(format!("event_changes_{}.json", month)); let mut all = if path.exists() { let s = fs::read_to_string(&path).await?; serde_json::from_str(&s).unwrap_or_default() } else { vec![] }; all.extend(list.clone()); fs::write(&path, serde_json::to_string_pretty(&all)?).await?; logger::log_info(&format!("Economic Storage: Saved {} changes for month {}", list.len(), month)).await; } logger::log_info("Economic Storage: All changes saved successfully").await; Ok(()) }