added yahoo exchange extraction

This commit is contained in:
2026-01-09 19:09:42 +01:00
parent ea128f6187
commit 8dd75f7bdf
13 changed files with 1538 additions and 408 deletions

View File

@@ -266,8 +266,36 @@ pub async fn companies_yahoo_cleansed_low_profile(
existing_companies.len()
)).await;
// === CONSOLIDATE LOG BEFORE EARLY EXIT ===
if pending.is_empty() {
logger::log_info(" ✓ All companies already processed").await;
// Consolidate log into checkpoint before exiting
if log_path.exists() {
let log_metadata = tokio::fs::metadata(&log_path).await.ok();
if log_metadata.map(|m| m.len() > 0).unwrap_or(false) {
logger::log_info(" Consolidating update log into checkpoint...").await;
let temp_checkpoint = checkpoint_path.with_extension("tmp");
let mut temp_file = File::create(&temp_checkpoint).await?;
for company in existing_companies.values() {
let json_line = serde_json::to_string(company)?;
temp_file.write_all(json_line.as_bytes()).await?;
temp_file.write_all(b"\n").await?;
}
temp_file.flush().await?;
temp_file.sync_data().await?;
drop(temp_file);
tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
tokio::fs::remove_file(&log_path).await.ok();
logger::log_info(&format!(" ✓ Consolidated {} companies", existing_companies.len())).await;
}
}
return Ok(existing_companies.len());
}
@@ -575,6 +603,36 @@ pub async fn companies_yahoo_cleansed_low_profile(
final_valid, final_filtered_low_cap, final_filtered_no_price, final_failed
)).await;
// === VERIFY AND RECREATE FINAL OUTPUT ===
logger::log_info("Verifying final output integrity...").await;
let final_companies_map = existing_companies_writer.lock().await;
let expected_count = final_companies_map.len();
// Always write final consolidated checkpoint
let temp_checkpoint = checkpoint_path.with_extension("tmp");
let mut temp_file = File::create(&temp_checkpoint).await?;
for company in final_companies_map.values() {
let json_line = serde_json::to_string(company)?;
temp_file.write_all(json_line.as_bytes()).await?;
temp_file.write_all(b"\n").await?;
}
temp_file.flush().await?;
temp_file.sync_data().await?;
drop(temp_file);
tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
drop(final_companies_map);
// Clear log since everything is in checkpoint
if log_path.exists() {
tokio::fs::remove_file(&log_path).await.ok();
}
logger::log_info(&format!("✓ Final output: {} companies in {:?}", expected_count, checkpoint_path)).await;
// Shutdown Yahoo pool
yahoo_pool.shutdown().await?;
@@ -706,7 +764,7 @@ async fn process_company_with_validation(
// Validate market cap
let market_cap = extract_market_cap(&summary);
if market_cap < 1_000_000.0 {
if market_cap < 100_000_000.0 {
return CompanyProcessResult::FilteredLowCap {
name: company.name.clone(),
market_cap,