195 lines
6.5 KiB
Rust
195 lines
6.5 KiB
Rust
// src/corporate/aggregation.rs
|
|
use super::types::CompanyPrice;
|
|
use super::storage::*;
|
|
use crate::util::directories::DataPaths;
|
|
use tokio::fs;
|
|
use std::collections::HashMap;
|
|
|
|
#[derive(Debug)]
|
|
struct DayData {
|
|
sources: Vec<(CompanyPrice, String)>, // (price, source_ticker)
|
|
total_volume: u64,
|
|
vwap: f64,
|
|
open: f64,
|
|
high: f64,
|
|
low: f64,
|
|
close: f64,
|
|
}
|
|
|
|
/// Aggregate price data from multiple exchanges, converting all to USD
|
|
pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> {
|
|
let company_dir = get_company_dir(paths, lei);
|
|
|
|
for timeframe in ["daily", "5min"].iter() {
|
|
let source_dir = company_dir.join(timeframe);
|
|
if !source_dir.exists() {
|
|
continue;
|
|
}
|
|
|
|
let mut all_prices: Vec<(CompanyPrice, String)> = Vec::new();
|
|
let mut by_date_time: HashMap<String, DayData> = HashMap::new();
|
|
|
|
// Load all sources with their ticker names
|
|
let mut entries = tokio::fs::read_dir(&source_dir).await?;
|
|
let mut source_count = 0;
|
|
let mut sources_used = std::collections::HashSet::new();
|
|
|
|
while let Some(entry) = entries.next_entry().await? {
|
|
let source_dir_path = entry.path();
|
|
if !source_dir_path.is_dir() { continue; }
|
|
|
|
let source_ticker = source_dir_path
|
|
.file_name()
|
|
.and_then(|n| n.to_str())
|
|
.unwrap_or("unknown")
|
|
.to_string();
|
|
|
|
let prices_path = source_dir_path.join("prices.json");
|
|
if !prices_path.exists() { continue; }
|
|
|
|
let content = tokio::fs::read_to_string(&prices_path).await?;
|
|
let mut prices: Vec<CompanyPrice> = serde_json::from_str(&content)?;
|
|
|
|
if !prices.is_empty() {
|
|
sources_used.insert(source_ticker.clone());
|
|
source_count += 1;
|
|
}
|
|
|
|
for price in prices {
|
|
all_prices.push((price, source_ticker.clone()));
|
|
}
|
|
}
|
|
|
|
if all_prices.is_empty() {
|
|
continue;
|
|
}
|
|
|
|
println!(" Aggregating from {} exchanges: {}",
|
|
sources_used.len(),
|
|
sources_used.iter()
|
|
.map(|s| s.as_str())
|
|
.collect::<Vec<_>>()
|
|
.join(", ")
|
|
);
|
|
|
|
// Group by date + time (for 5min) or just date
|
|
for (p, source) in all_prices {
|
|
let key = if timeframe == &"5min" && !p.time.is_empty() {
|
|
format!("{}_{}", p.date, p.time)
|
|
} else {
|
|
p.date.clone()
|
|
};
|
|
|
|
// Convert to USD immediately
|
|
let usd_rate = super::fx::get_usd_rate(&p.currency).await.unwrap_or(1.0);
|
|
|
|
let mut p_usd = p.clone();
|
|
p_usd.open *= usd_rate;
|
|
p_usd.high *= usd_rate;
|
|
p_usd.low *= usd_rate;
|
|
p_usd.close *= usd_rate;
|
|
p_usd.adj_close *= usd_rate;
|
|
p_usd.currency = "USD".to_string();
|
|
|
|
let entry = by_date_time.entry(key.clone()).or_insert(DayData {
|
|
sources: vec![],
|
|
total_volume: 0,
|
|
vwap: 0.0,
|
|
open: p_usd.open,
|
|
high: p_usd.high,
|
|
low: p_usd.low,
|
|
close: p_usd.close,
|
|
});
|
|
|
|
let volume = p.volume.max(1); // avoid div0
|
|
let vwap_contrib = p_usd.close * volume as f64;
|
|
|
|
entry.sources.push((p_usd.clone(), source));
|
|
entry.total_volume += volume;
|
|
entry.vwap += vwap_contrib;
|
|
|
|
// Use first open, last close, max high, min low
|
|
if entry.sources.len() == 1 {
|
|
entry.open = p_usd.open;
|
|
}
|
|
entry.close = p_usd.close;
|
|
entry.high = entry.high.max(p_usd.high);
|
|
entry.low = entry.low.min(p_usd.low);
|
|
}
|
|
|
|
// Finalize aggregated data
|
|
let mut aggregated: Vec<CompanyPrice> = Vec::new();
|
|
|
|
for (key, data) in by_date_time {
|
|
let vwap = data.vwap / data.total_volume as f64;
|
|
|
|
let (date, time) = if key.contains('_') {
|
|
let parts: Vec<&str> = key.split('_').collect();
|
|
(parts[0].to_string(), parts[1].to_string())
|
|
} else {
|
|
(key, "".to_string())
|
|
};
|
|
|
|
// Track which exchange contributed most volume
|
|
let best_source = data.sources.iter()
|
|
.max_by_key(|(p, _)| p.volume)
|
|
.map(|(_, src)| src.clone())
|
|
.unwrap_or_else(|| "unknown".to_string());
|
|
|
|
aggregated.push(CompanyPrice {
|
|
ticker: format!("{lei}@agg"), // Mark as aggregated
|
|
date,
|
|
time,
|
|
open: data.open,
|
|
high: data.high,
|
|
low: data.low,
|
|
close: data.close,
|
|
adj_close: vwap,
|
|
volume: data.total_volume,
|
|
currency: "USD".to_string(),
|
|
});
|
|
}
|
|
|
|
aggregated.sort_by_key(|p| (p.date.clone(), p.time.clone()));
|
|
|
|
// Save aggregated result
|
|
let agg_dir = company_dir.join("aggregated").join(timeframe);
|
|
fs::create_dir_all(&agg_dir).await?;
|
|
let path = agg_dir.join("prices.json");
|
|
fs::write(&path, serde_json::to_string_pretty(&aggregated)?).await?;
|
|
|
|
// Save aggregation metadata
|
|
let meta = AggregationMetadata {
|
|
lei: lei.to_string(), // ← CHANGE THIS
|
|
timeframe: timeframe.to_string(),
|
|
sources: sources_used.into_iter().collect(),
|
|
total_bars: aggregated.len(),
|
|
date_range: (
|
|
aggregated.first().map(|p| p.date.clone()).unwrap_or_default(),
|
|
aggregated.last().map(|p| p.date.clone()).unwrap_or_default(),
|
|
),
|
|
aggregated_at: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
|
|
};
|
|
|
|
let meta_path = agg_dir.join("metadata.json");
|
|
fs::write(&meta_path, serde_json::to_string_pretty(&meta)?).await?;
|
|
|
|
println!(" ✓ {} {} bars from {} sources (USD)",
|
|
aggregated.len(),
|
|
timeframe,
|
|
source_count
|
|
);
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[derive(Debug, serde::Serialize, serde::Deserialize)]
|
|
struct AggregationMetadata {
|
|
lei: String,
|
|
timeframe: String,
|
|
sources: Vec<String>,
|
|
total_bars: usize,
|
|
date_range: (String, String),
|
|
aggregated_at: String,
|
|
} |