From f9ce5bad995b008be25a72e8771685cd2449ba8a Mon Sep 17 00:00:00 2001 From: donpat1to Date: Tue, 6 Jan 2026 00:15:57 +0100 Subject: [PATCH] fixed yahoo api calls for cleansing low profile data --- src/corporate/update_companies_enrich.rs | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/corporate/update_companies_enrich.rs b/src/corporate/update_companies_enrich.rs index e69de29..884f7b7 100644 --- a/src/corporate/update_companies_enrich.rs +++ b/src/corporate/update_companies_enrich.rs @@ -0,0 +1,34 @@ +// src/corporate/update_companies_enrich.rs +use super::{helpers::*, types::*}; +use crate::config::Config; +use crate::util::directories::DataPaths; +use crate::util::logger; +use crate::scraper::yahoo::{YahooClientPool, QuoteSummaryModule}; + +use std::result::Result::Ok; +use chrono::{Local, Utc}; +use std::collections::HashMap; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use futures::stream::{FuturesUnordered, StreamExt}; +use serde_json::json; +use tokio::sync::mpsc; + +/// Yahoo enriching data per corporate +/// +/// # Features +/// - Graceful shutdown (abort-safe) +/// - Task panic isolation (tasks fail independently) +/// - Crash-safe persistence (checkpoint + log with fsync) +/// - Smart skip logic (only process incomplete data) +/// - Uses pending queue instead of retry mechanism +/// - Reuses companies_update.log for persistence +/// +/// # Persistence Strategy +/// - Checkpoint: companies_yahoo_cleaned.jsonl (atomic state) +/// - Log: companies_update.log (append-only updates) +/// - On restart: Load checkpoint + replay log +/// - Periodic checkpoints (every 50 companies) +/// - Batched fsync (every 10 writes or 10 seconds) \ No newline at end of file