// src/corporate/storage.rs use super::{types::*, helpers::*}; use crate::util::directories::DataPaths; use crate::util::logger; use tokio::fs; use tokio::io::AsyncWriteExt; use std::collections::HashMap; use std::path::{PathBuf, Path}; /// Lightweight index entry - only metadata, no full event data #[derive(Debug, Clone)] pub struct EventIndex { pub key: String, pub ticker: String, pub date: String, pub file_path: PathBuf, } /// Build index of all events without loading them into memory pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result> { let dir = paths.corporate_events_dir(); if !dir.exists() { logger::log_info("Corporate Storage: No events directory found").await; return Ok(Vec::new()); } let mut index = Vec::new(); let mut entries = fs::read_dir(dir).await?; while let Some(entry) = entries.next_entry().await? { let path = entry.path(); if path.extension().and_then(|s| s.to_str()) == Some("json") { let name = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); if name.starts_with("events_") && name.len() == 17 { let content = fs::read_to_string(&path).await?; let events: Vec = serde_json::from_str(&content)?; for event in events { index.push(EventIndex { key: event_key(&event), ticker: event.ticker.clone(), date: event.date.clone(), file_path: path.clone(), }); } } } } logger::log_info(&format!("Corporate Storage: Built index with {} entries", index.len())).await; Ok(index) } pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf { paths.corporate_prices_dir().join(lei) } pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> { let base = get_company_dir(paths, isin); let paths_to_create = [ base.clone(), base.join("5min"), base.join("daily"), base.join("aggregated").join("5min"), base.join("aggregated").join("daily"), ]; for p in paths_to_create { fs::create_dir_all(&p).await?; } Ok(()) } /// Stream companies to JSONL incrementally pub async fn save_companies_to_jsonl_streaming( paths: &DataPaths, companies_iter: impl Iterator)>, ) -> anyhow::Result { let file_path = paths.data_dir().join("companies.jsonl"); if let Some(parent) = file_path.parent() { tokio::fs::create_dir_all(parent).await?; } let mut file = tokio::fs::File::create(&file_path).await?; let mut count = 0; for (name, securities) in companies_iter { let line = serde_json::json!({ "name": name, "securities": securities }); file.write_all(line.to_string().as_bytes()).await?; file.write_all(b"\n").await?; count += 1; if count % 100 == 0 { tokio::task::yield_now().await; } } logger::log_info(&format!("Saved {} companies to JSONL", count)).await; Ok(count) } /// Stream read companies from JSONL pub async fn stream_companies_from_jsonl( path: &Path, mut callback: F ) -> anyhow::Result where F: FnMut(String, HashMap) -> anyhow::Result<()>, { if !path.exists() { return Ok(0); } let content = tokio::fs::read_to_string(path).await?; let mut count = 0; for line in content.lines() { if line.trim().is_empty() { continue; } let entry: serde_json::Value = serde_json::from_str(line)?; let name = entry["name"].as_str().unwrap_or("").to_string(); let securities: HashMap = serde_json::from_value( entry["securities"].clone() )?; callback(name, securities)?; count += 1; if count % 100 == 0 { tokio::task::yield_now().await; } } Ok(count) }