added yahoo exchange extraction

This commit is contained in:
2026-01-09 19:09:42 +01:00
parent ea128f6187
commit 8dd75f7bdf
13 changed files with 1538 additions and 408 deletions

View File

@@ -5,6 +5,8 @@ use crate::corporate::update_companies::build_companies_jsonl_streaming_parallel
use crate::corporate::update_companies_cleanse::{companies_yahoo_cleansed_low_profile, companies_yahoo_cleansed_no_data};
use crate::corporate::update_companies_enrich::enrich_companies_with_events;
use crate::corporate::update_companies_enrich_options_chart::{enrich_companies_with_options, enrich_companies_with_chart};
use crate::corporate::collect_exchanges::collect_and_save_exchanges;
use crate::economic::update_forex::collect_fx_rates;
use crate::util::directories::DataPaths;
use crate::util::logger;
use crate::scraper::webdriver::ChromeDriverPool;
@@ -40,105 +42,105 @@ pub async fn run_full_update(
logger::log_warn("Shutdown detected after GLEIF download").await;
return Ok(());
}
logger::log_info("Step 2: Loading OpenFIGI metadata...").await;
load_figi_type_lists().await.ok();
logger::log_info(" ✓ OpenFIGI metadata loaded").await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after OpenFIGI load").await;
return Ok(());
}
logger::log_info("Step 3: Checking LEI-FIGI mapping status...").await;
let all_mapped = ensure_all_leis_mapped(&gleif_csv_path, None).await?;
if !all_mapped {
logger::log_warn(" ⚠ Some LEIs failed to map - continuing with partial data").await;
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 2: Loading OpenFIGI metadata...").await;
load_figi_type_lists().await.ok();
logger::log_info(" ✓ OpenFIGI metadata loaded").await;
} else {
logger::log_info(" ✓ All LEIs successfully mapped").await;
logger::log_warn("Shutdown detected, skipping event index build").await;
}
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after LEI-FIGI mapping").await;
return Ok(());
}
logger::log_info("Step 4: Building securities map (streaming)...").await;
let date_dir = find_most_recent_figi_date_dir(&paths).await?;
if let Some(date_dir) = date_dir {
logger::log_info(&format!(" Using FIGI data from: {:?}", date_dir)).await;
load_or_build_all_securities(&date_dir).await?;
logger::log_info(" ✓ Securities map updated").await;
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 2: Loading OpenFIGI metadata...").await;
load_figi_type_lists().await.ok();
logger::log_info(" ✓ OpenFIGI metadata loaded").await;
} else {
logger::log_warn(" ✗ No FIGI data directory found").await;
logger::log_warn("Shutdown detected, skipping event index build").await;
}
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after securities map build").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 3: Checking LEI-FIGI mapping status...").await;
let all_mapped = ensure_all_leis_mapped(&gleif_csv_path, None).await?;
if !all_mapped {
logger::log_warn(" ⚠ Some LEIs failed to map - continuing with partial data").await;
} else {
logger::log_info(" ✓ All LEIs successfully mapped").await;
}
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
logger::log_info("Step 5: Building companies.jsonl with parallel processing and validation...").await;
let count = build_companies_jsonl_streaming_parallel(&paths, pool, shutdown_flag, config, &None).await?;
logger::log_info(&format!(" ✓ Saved {} companies", count)).await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after companies.jsonl build").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 4: Building securities map (streaming)...").await;
let date_dir = find_most_recent_figi_date_dir(&paths).await?;
if let Some(date_dir) = date_dir {
logger::log_info(&format!(" Using FIGI data from: {:?}", date_dir)).await;
load_or_build_all_securities(&date_dir).await?;
logger::log_info(" ✓ Securities map updated").await;
} else {
logger::log_warn(" ✗ No FIGI data directory found").await;
}
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
logger::log_info("Step 6: Cleansing companies with missing essential data...").await;
let cleansed_count = companies_yahoo_cleansed_no_data(&paths).await?;
logger::log_info(&format!("{} companies found on Yahoo ready for further use in companies_yahoo.jsonl", cleansed_count)).await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after no-data cleansing").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 5: Building companies.jsonl with parallel processing and validation...").await;
let count = build_companies_jsonl_streaming_parallel(&paths, pool, shutdown_flag, config, &None).await?;
logger::log_info(&format!(" ✓ Saved {} companies", count)).await;
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 6: Cleansing companies with missing essential data...").await;
let cleansed_count = companies_yahoo_cleansed_no_data(&paths).await?;
logger::log_info(&format!("{} companies found on Yahoo ready for further use in companies_yahoo.jsonl", cleansed_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
logger::log_info("Step 7: Cleansing companies with too low profile (with abort-safe persistence)...").await;
let proxy_pool = pool.get_proxy_pool()
.ok_or_else(|| anyhow::anyhow!("ChromeDriverPool must be created with VPN proxy rotation enabled"))?;
.ok_or_else(|| anyhow::anyhow!("ChromeDriverPool must be created with VPN proxy rotation enabled"))?;
logger::log_info("Creating YahooClientPool with proxy rotation...").await;
let yahoo_pool = Arc::new(YahooClientPool::new(proxy_pool, config, None).await?);
logger::log_info(&format!("✓ YahooClientPool ready with {} clients", yahoo_pool.num_clients().await)).await;
let cleansed_count = companies_yahoo_cleansed_low_profile(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies with sufficient profile ready for analytics", cleansed_count)).await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after low-profile cleansing").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 7: Cleansing companies with too low profile (with abort-safe persistence)...").await;
let cleansed_count = companies_yahoo_cleansed_low_profile(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies with sufficient profile ready for analytics", cleansed_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
logger::log_info("Step 8: Enriching companies with Yahoo Events (with abort-safe persistence)...").await;
let enriched_count = enrich_companies_with_events(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies enriched with event data", enriched_count)).await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after event enrichment").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 8: Enriching companies with Yahoo Events (with abort-safe persistence)...").await;
let enriched_count = enrich_companies_with_events(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies enriched with event data", enriched_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
logger::log_info("Step 9: Enriching companies with Yahoo Options (with abort-safe persistence)...").await;
let options_count = enrich_companies_with_options(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies enriched with options data", options_count)).await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after options enrichment").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 9: Enriching companies with Yahoo Options (with abort-safe persistence)...").await;
let options_count = enrich_companies_with_options(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies enriched with options data", options_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
logger::log_info("Step 10: Enriching companies with Yahoo Chart (with abort-safe persistence)...").await;
let chart_count = enrich_companies_with_chart(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies enriched with chart data", chart_count)).await;
if shutdown_flag.load(Ordering::SeqCst) {
logger::log_warn("Shutdown detected after chart enrichment").await;
return Ok(());
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 10: Enriching companies with Yahoo Chart (with abort-safe persistence)...").await;
let chart_count = enrich_companies_with_chart(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!("{} companies enriched with chart data", chart_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping event index build").await;
}
if !shutdown_flag.load(Ordering::SeqCst) {
@@ -149,6 +151,28 @@ pub async fn run_full_update(
logger::log_warn("Shutdown detected, skipping event index build").await;
}
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 12: Collecting FX rates...").await;
let proxy_pool = pool.get_proxy_pool()
.ok_or_else(|| anyhow::anyhow!("ChromeDriverPool must have proxy rotation"))?;
let yahoo_pool = Arc::new(YahooClientPool::new(proxy_pool, config, None).await?);
let fx_count = collect_fx_rates(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
logger::log_info(&format!(" ✓ Collected {} FX rates", fx_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping FX rates collection").await;
}
if !shutdown_flag.load(Ordering::SeqCst) {
logger::log_info("Step 13: Collecting exchange information...").await;
let exchange_count = collect_and_save_exchanges(&paths).await?;
logger::log_info(&format!(" ✓ Collected {} exchanges", exchange_count)).await;
} else {
logger::log_warn("Shutdown detected, skipping exchange collection").await;
}
logger::log_info("✅ Corporate update complete").await;
Ok(())
}