added integrity check to forex and exchange collection functiosn

This commit is contained in:
2026-01-10 19:46:21 +01:00
parent ac1345798d
commit 6f05dc8c99
4 changed files with 89 additions and 51 deletions

View File

@@ -1,5 +1,6 @@
// src/corporate/collect_exchanges.rs
use crate::util::directories::DataPaths;
use crate::util::integrity::{DataStage, StateManager, file_reference};
use crate::util::logger;
use crate::scraper::yahoo::ChartData;
@@ -238,10 +239,28 @@ fn get_fallback_rate(currency: &str) -> f64 {
/// - Extracts exchange data from core/data.jsonl
/// - Groups companies by exchange
/// - Sums up market caps for each exchange
/// - **NEW**: Converts all market caps to USD using FX rates
/// - Converts all market caps to USD using FX rates
/// - Saves consolidated mapping to data/yahoo_exchanges.json
/// - Handles missing or invalid data gracefully
/// - Integrity tracking with content hash validation
pub async fn collect_and_save_exchanges(paths: &DataPaths) -> anyhow::Result<usize> {
let state_path = paths.data_dir().join("state.jsonl");
let manager = StateManager::new(&state_path, &paths.data_dir().to_path_buf());
let step_name = "exchange_collection_complete";
let output_path = paths.data_dir().join("yahoo_exchanges.json");
if manager.is_step_valid(step_name).await? {
logger::log_info(" Exchange collection already completed and valid").await;
// Load and count exchanges
if output_path.exists() {
let content = fs::read_to_string(&output_path).await?;
let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
logger::log_info(&format!(" ✓ Found {} valid exchanges", exchanges.len())).await;
return Ok(exchanges.len());
}
}
logger::log_info("Collecting exchange information from company directories...").await;
let corporate_dir = paths.corporate_dir();
@@ -353,13 +372,15 @@ pub async fn collect_and_save_exchanges(paths: &DataPaths) -> anyhow::Result<usi
}
// Save to yahoo_exchanges.json
let output_path = paths.data_dir().join("yahoo_exchanges.json");
save_exchanges_json(&output_path, &exchanges).await?;
logger::log_info(&format!(
" ✓ Saved exchange mapping to {}",
output_path.display()
)).await;
track_exchange_collection_completion(&manager, &output_path, step_name).await?;
logger::log_info(" ✓ Exchange collection marked as complete with integrity tracking").await;
// Print summary statistics
print_exchange_statistics(&exchanges, &fx_cache).await;
@@ -367,6 +388,32 @@ pub async fn collect_and_save_exchanges(paths: &DataPaths) -> anyhow::Result<usi
Ok(exchanges.len())
}
/// Track exchange collection completion with content hash verification
async fn track_exchange_collection_completion(
manager: &StateManager,
output_path: &std::path::Path,
step_name: &str,
) -> anyhow::Result<()> {
// Create content reference for the output file
let content_reference = file_reference(output_path);
// Track completion with:
// - Content reference: The yahoo_exchanges.json file
// - Data stage: Data (7-day TTL by default)
// - Dependencies: None (this is a collection step, not dependent on other tracked steps)
// Note: In practice, it depends on core data, but we track the output file
// which will change if core data changes, so explicit dependency not needed
manager.update_entry(
step_name.to_string(),
content_reference,
DataStage::Data,
vec![], // No explicit dependencies - output file serves as verification
None, // Use default TTL (7 days for Data stage)
).await?;
Ok(())
}
/// Extract exchange information from a company's core data file
async fn extract_exchange_info(
core_data_path: &std::path::Path,