// src/corporate/aggregation.rs use super::types::CompanyPrice; use super::storage::*; use crate::util::directories::DataPaths; use tokio::fs; use std::collections::HashMap; #[derive(Debug)] struct DayData { sources: Vec<(CompanyPrice, String)>, // (price, source_ticker) total_volume: u64, vwap: f64, open: f64, high: f64, low: f64, close: f64, } /// Aggregate price data from multiple exchanges, converting all to USD pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> { let company_dir = get_company_dir(paths, lei); for timeframe in ["daily", "5min"].iter() { let source_dir = company_dir.join(timeframe); if !source_dir.exists() { continue; } let mut all_prices: Vec<(CompanyPrice, String)> = Vec::new(); let mut by_date_time: HashMap = HashMap::new(); // Load all sources with their ticker names let mut entries = tokio::fs::read_dir(&source_dir).await?; let mut source_count = 0; let mut sources_used = std::collections::HashSet::new(); while let Some(entry) = entries.next_entry().await? { let source_dir_path = entry.path(); if !source_dir_path.is_dir() { continue; } let source_ticker = source_dir_path .file_name() .and_then(|n| n.to_str()) .unwrap_or("unknown") .to_string(); let prices_path = source_dir_path.join("prices.json"); if !prices_path.exists() { continue; } let content = tokio::fs::read_to_string(&prices_path).await?; let mut prices: Vec = serde_json::from_str(&content)?; if !prices.is_empty() { sources_used.insert(source_ticker.clone()); source_count += 1; } for price in prices { all_prices.push((price, source_ticker.clone())); } } if all_prices.is_empty() { continue; } println!(" Aggregating from {} exchanges: {}", sources_used.len(), sources_used.iter() .map(|s| s.as_str()) .collect::>() .join(", ") ); // Group by date + time (for 5min) or just date for (p, source) in all_prices { let key = if timeframe == &"5min" && !p.time.is_empty() { format!("{}_{}", p.date, p.time) } else { p.date.clone() }; // Convert to USD immediately let usd_rate = super::fx::get_usd_rate(&p.currency).await.unwrap_or(1.0); let mut p_usd = p.clone(); p_usd.open *= usd_rate; p_usd.high *= usd_rate; p_usd.low *= usd_rate; p_usd.close *= usd_rate; p_usd.adj_close *= usd_rate; p_usd.currency = "USD".to_string(); let entry = by_date_time.entry(key.clone()).or_insert(DayData { sources: vec![], total_volume: 0, vwap: 0.0, open: p_usd.open, high: p_usd.high, low: p_usd.low, close: p_usd.close, }); let volume = p.volume.max(1); // avoid div0 let vwap_contrib = p_usd.close * volume as f64; entry.sources.push((p_usd.clone(), source)); entry.total_volume += volume; entry.vwap += vwap_contrib; // Use first open, last close, max high, min low if entry.sources.len() == 1 { entry.open = p_usd.open; } entry.close = p_usd.close; entry.high = entry.high.max(p_usd.high); entry.low = entry.low.min(p_usd.low); } // Finalize aggregated data let mut aggregated: Vec = Vec::new(); for (key, data) in by_date_time { let vwap = data.vwap / data.total_volume as f64; let (date, time) = if key.contains('_') { let parts: Vec<&str> = key.split('_').collect(); (parts[0].to_string(), parts[1].to_string()) } else { (key, "".to_string()) }; // Track which exchange contributed most volume let best_source = data.sources.iter() .max_by_key(|(p, _)| p.volume) .map(|(_, src)| src.clone()) .unwrap_or_else(|| "unknown".to_string()); aggregated.push(CompanyPrice { ticker: format!("{lei}@agg"), // Mark as aggregated date, time, open: data.open, high: data.high, low: data.low, close: data.close, adj_close: vwap, volume: data.total_volume, currency: "USD".to_string(), }); } aggregated.sort_by_key(|p| (p.date.clone(), p.time.clone())); // Save aggregated result let agg_dir = company_dir.join("aggregated").join(timeframe); fs::create_dir_all(&agg_dir).await?; let path = agg_dir.join("prices.json"); fs::write(&path, serde_json::to_string_pretty(&aggregated)?).await?; // Save aggregation metadata let meta = AggregationMetadata { lei: lei.to_string(), // ← CHANGE THIS timeframe: timeframe.to_string(), sources: sources_used.into_iter().collect(), total_bars: aggregated.len(), date_range: ( aggregated.first().map(|p| p.date.clone()).unwrap_or_default(), aggregated.last().map(|p| p.date.clone()).unwrap_or_default(), ), aggregated_at: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), }; let meta_path = agg_dir.join("metadata.json"); fs::write(&meta_path, serde_json::to_string_pretty(&meta)?).await?; println!(" ✓ {} {} bars from {} sources (USD)", aggregated.len(), timeframe, source_count ); } Ok(()) } #[derive(Debug, serde::Serialize, serde::Deserialize)] struct AggregationMetadata { lei: String, timeframe: String, sources: Vec, total_bars: usize, date_range: (String, String), aggregated_at: String, }