added yahoo exchange extraction
This commit is contained in:
@@ -266,8 +266,36 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
||||
existing_companies.len()
|
||||
)).await;
|
||||
|
||||
// === CONSOLIDATE LOG BEFORE EARLY EXIT ===
|
||||
if pending.is_empty() {
|
||||
logger::log_info(" ✓ All companies already processed").await;
|
||||
|
||||
// Consolidate log into checkpoint before exiting
|
||||
if log_path.exists() {
|
||||
let log_metadata = tokio::fs::metadata(&log_path).await.ok();
|
||||
if log_metadata.map(|m| m.len() > 0).unwrap_or(false) {
|
||||
logger::log_info(" Consolidating update log into checkpoint...").await;
|
||||
|
||||
let temp_checkpoint = checkpoint_path.with_extension("tmp");
|
||||
let mut temp_file = File::create(&temp_checkpoint).await?;
|
||||
|
||||
for company in existing_companies.values() {
|
||||
let json_line = serde_json::to_string(company)?;
|
||||
temp_file.write_all(json_line.as_bytes()).await?;
|
||||
temp_file.write_all(b"\n").await?;
|
||||
}
|
||||
|
||||
temp_file.flush().await?;
|
||||
temp_file.sync_data().await?;
|
||||
drop(temp_file);
|
||||
|
||||
tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
|
||||
tokio::fs::remove_file(&log_path).await.ok();
|
||||
|
||||
logger::log_info(&format!(" ✓ Consolidated {} companies", existing_companies.len())).await;
|
||||
}
|
||||
}
|
||||
|
||||
return Ok(existing_companies.len());
|
||||
}
|
||||
|
||||
@@ -575,6 +603,36 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
||||
final_valid, final_filtered_low_cap, final_filtered_no_price, final_failed
|
||||
)).await;
|
||||
|
||||
// === VERIFY AND RECREATE FINAL OUTPUT ===
|
||||
logger::log_info("Verifying final output integrity...").await;
|
||||
|
||||
let final_companies_map = existing_companies_writer.lock().await;
|
||||
let expected_count = final_companies_map.len();
|
||||
|
||||
// Always write final consolidated checkpoint
|
||||
let temp_checkpoint = checkpoint_path.with_extension("tmp");
|
||||
let mut temp_file = File::create(&temp_checkpoint).await?;
|
||||
|
||||
for company in final_companies_map.values() {
|
||||
let json_line = serde_json::to_string(company)?;
|
||||
temp_file.write_all(json_line.as_bytes()).await?;
|
||||
temp_file.write_all(b"\n").await?;
|
||||
}
|
||||
|
||||
temp_file.flush().await?;
|
||||
temp_file.sync_data().await?;
|
||||
drop(temp_file);
|
||||
|
||||
tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
|
||||
drop(final_companies_map);
|
||||
|
||||
// Clear log since everything is in checkpoint
|
||||
if log_path.exists() {
|
||||
tokio::fs::remove_file(&log_path).await.ok();
|
||||
}
|
||||
|
||||
logger::log_info(&format!("✓ Final output: {} companies in {:?}", expected_count, checkpoint_path)).await;
|
||||
|
||||
// Shutdown Yahoo pool
|
||||
yahoo_pool.shutdown().await?;
|
||||
|
||||
@@ -706,7 +764,7 @@ async fn process_company_with_validation(
|
||||
|
||||
// Validate market cap
|
||||
let market_cap = extract_market_cap(&summary);
|
||||
if market_cap < 1_000_000.0 {
|
||||
if market_cap < 100_000_000.0 {
|
||||
return CompanyProcessResult::FilteredLowCap {
|
||||
name: company.name.clone(),
|
||||
market_cap,
|
||||
|
||||
Reference in New Issue
Block a user