removed claudes md

added cross compatiblity between shutdown flag and state entries
removed crossplatformcompany from types
2026-01-15 00:23:29 +01:00 · 2026-01-15 00:22:55 +01:00 · 2026-01-14 14:49:00 +01:00 · 2026-01-14 14:28:16 +01:00 · 2026-01-12 23:03:01 +01:00 · 2026-01-12 22:23:34 +01:00
51 changed files with 11569 additions and 3861 deletions
--- a/.env.example
+++ b/.env.example
@@ -14,8 +14,8 @@ CORPORATE_START_DATE=2010-01-01
 # How far into the future we scrape economic events (in months)
 ECONOMIC_LOOKAHEAD_MONTHS=3
-# Maximum number of parallel scraping tasks (default: 10)
+# Maximum number of parallel scraping tasks (default: 4)
-MAX_PARALLEL_TASKS=10
+MAX_PARALLEL_INSTANCES=10
 # ===== VPN ROTATION (ProtonVPN Integration) =====
 # Enable automatic VPN rotation between sessions?
@@ -38,3 +38,5 @@ TASKS_PER_VPN_SESSION=50
 MAX_REQUESTS_PER_SESSION=25
 MIN_REQUEST_INTERVAL_MS=300
 MAX_RETRY_ATTEMPTS=3
 PROXY_INSTANCES_PER_CERTIFICATE=2
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,7 @@ target/
 **/*.log
 **/*.ovpn
 **/*.tmp
 **/*.txt
 #/economic_events*
 #/economic_event_changes*
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2465,9 +2465,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
 [[package]]
 name = "rustix"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
 dependencies = [
 "bitflags",
 "errno",
@@ -2743,6 +2743,15 @@ dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "serde_spanned"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
 dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "serde_urlencoded"
 version = "0.7.1"
@@ -2974,9 +2983,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
 [[package]]
 name = "tempfile"
-version = "3.23.0"
+version = "3.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
 dependencies = [
 "fastrand",
 "getrandom 0.3.4",
@@ -3213,10 +3222,25 @@ dependencies = [
 ]
 [[package]]
-name = "toml_datetime"
+name = "toml"
-version = "0.7.3"
+version = "0.9.11+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533"
+checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
 dependencies = [
 "indexmap",
 "serde_core",
 "serde_spanned",
 "toml_datetime",
 "toml_parser",
 "toml_writer",
 "winnow",
 ]
 [[package]]
 name = "toml_datetime"
 version = "0.7.5+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
 dependencies = [
 "serde_core",
 ]
@@ -3235,13 +3259,19 @@ dependencies = [
 [[package]]
 name = "toml_parser"
-version = "1.0.4"
+version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
+checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
 dependencies = [
 "winnow",
 ]
 [[package]]
 name = "toml_writer"
 version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"
 [[package]]
 name = "tower"
 version = "0.5.2"
@@ -3454,6 +3484,12 @@ dependencies = [
 "serde",
 ]
 [[package]]
 name = "urlencoding"
 version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
 [[package]]
 name = "utf-8"
 version = "0.7.6"
@@ -3472,6 +3508,7 @@ version = "1.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
 dependencies = [
 "getrandom 0.3.4",
 "js-sys",
 "wasm-bindgen",
 ]
@@ -3626,11 +3663,15 @@ dependencies = [
 "scraper",
 "serde",
 "serde_json",
 "sha2",
 "tokio",
 "tokio-tungstenite 0.21.0",
 "toml",
 "tracing",
 "tracing-subscriber",
 "url",
 "urlencoding",
 "uuid",
 "walkdir",
 "yfinance-rs",
 "zip",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,11 +17,12 @@ categories = ["finance", "data-structures", "asynchronous"]
 tokio = { version = "1.38", features = ["full"] }
 # Web scraping & HTTP
-reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "blocking"] }
+reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "blocking", "socks", "cookies"] }
 scraper = "0.19"                    # HTML parsing for Yahoo earnings pages
 fantoccini = { version = "0.20", features = ["rustls-tls"] }  # Headless Chrome for finanzen.net
 yfinance-rs = "0.7.2"
 url = "2.5.7"
 urlencoding = "2.1"
 # Serialization
 serde = { version = "1.0", features = ["derive"] }
@@ -39,6 +40,7 @@ rand = "0.9.2"
 # Environment handling
 dotenvy = "0.15"
 toml = "0.9.8"
 # Date & time
 chrono = { version = "0.4", features = ["serde"] }
@@ -58,3 +60,10 @@ rayon = "1.10"  # optional: for parallel price downloads
 # Web server for dashboard
 axum = { version = "0.7", features = ["ws"] }
 tokio-tungstenite = "0.21"  # For WebSocket support
 # tests
 #tempfile = "3.24.0"
 # data integrity
 sha2 = "0.10.9"
 uuid = { version = "1.0", features = ["v4", "v7"] }
--- a/README.md
+++ b/README.md
@@ -249,3 +249,7 @@ Der Scraper unterstützt 52 Länder und Regionen (siehe `countries.json`), darun
 https://chromedriver.storage.googleapis.com/index.html
 https://googlechromelabs.github.io/chrome-for-testing/
 ## Gaphviz.org Download
 https://graphviz.org/download/
--- a/integrity/checkpoint_dependencies.dot
+++ b/integrity/checkpoint_dependencies.dot
@@ -0,0 +1,28 @@
 digraph Dependencies {
  rankdir=LR;
  node [shape=box];
  "yahoo_options_enrichment_complete" [label="yahoo_options_enrichment_complete
 Options data enriched for all companies"];
  "yahoo_events_enrichment_complete" [label="yahoo_events_enrichment_complete
 Corporate events enriched for all companies"];
  "yahoo_companies_cleansed_no_data" [label="yahoo_companies_cleansed_no_data
 Companies cleansed of data with no Yahoo results"];
  "yahoo_chart_enrichment_complete" [label="yahoo_chart_enrichment_complete
 Chart data enriched for all companies"];
  "enrichment_group" [label="enrichment_group
 Yahoo exchanges collected and validated"];
  "yahoo_companies_cleansed_low_profile" [label="yahoo_companies_cleansed_low_profile
 Companies cleansed of low profile (insufficient market cap/price data)"];
  "lei_figi_mapping_complete" [label="lei_figi_mapping_complete
 LEI-to-FIGI mappings from OpenFIGI API"];
  "securities_data_complete" [label="securities_data_complete
 Securities data built from FIGI mappings"];
  "yahoo_options_enrichment_complete" -> "yahoo_companies_cleansed_low_profile" [label="via group enrichment_group"];
  "yahoo_events_enrichment_complete" -> "yahoo_companies_cleansed_low_profile" [label="via group enrichment_group"];
  "yahoo_companies_cleansed_no_data" -> "securities_data_complete";
  "yahoo_chart_enrichment_complete" -> "yahoo_companies_cleansed_low_profile" [label="via group enrichment_group"];
  "yahoo_companies_cleansed_low_profile" -> "yahoo_companies_cleansed_no_data";
  "securities_data_complete" -> "lei_figi_mapping_complete";
 }
--- a/integrity/checkpoint_dependencies.toml
+++ b/integrity/checkpoint_dependencies.toml
@@ -0,0 +1,61 @@
 # checkpoint_dependencies.toml - Complete configuration
 # ============================================================================
 # COLLECTION STAGE (No dependencies)
 # ============================================================================
 [checkpoints.lei_figi_mapping_complete]
 description = "LEI-to-FIGI mappings from OpenFIGI API"
 depends_on = []
 [checkpoints.securities_data_complete]
 description = "Securities data built from FIGI mappings"
 depends_on = ["lei_figi_mapping_complete"]
 # ============================================================================
 # CLEANSING STAGE (Depends on collection)
 # ============================================================================
 [checkpoints.yahoo_companies_cleansed_no_data]
 description = "Companies cleansed of data with no Yahoo results"
 depends_on = ["securities_data_complete"]
 [checkpoints.yahoo_companies_cleansed_low_profile]
 description = "Companies cleansed of low profile (insufficient market cap/price data)"
 depends_on = ["yahoo_companies_cleansed_no_data"]
 # ============================================================================
 # ENRICHMENT GROUP (All depend on cleansed companies)
 # ============================================================================
 [groups.enrichment_group]
 description = "Yahoo Finance enrichment functions"
 members = [
    "yahoo_events_enrichment_complete",
    "yahoo_options_enrichment_complete",
    "yahoo_chart_enrichment_complete"
 ]
 depends_on = ["yahoo_companies_cleansed_low_profile"]
 [checkpoints.yahoo_events_enrichment_complete]
 description = "Corporate events enriched for all companies"
 depends_on = []
 group = "enrichment_group"
 [checkpoints.yahoo_options_enrichment_complete]
 description = "Options data enriched for all companies"
 depends_on = []
 group = "enrichment_group"
 [checkpoints.yahoo_chart_enrichment_complete]
 description = "Chart data enriched for all companies"
 depends_on = []
 group = "enrichment_group"
 # ============================================================================
 # SECURITIES PROCESSING (Depends on LEI mapping)
 # ============================================================================
 [checkpoints.enrichment_group]
 description = "Yahoo exchanges collected and validated"
 depends_on = []
--- a/src/config.rs
+++ b/src/config.rs
@@ -27,6 +27,26 @@ pub struct Config {
    #[serde(default = "default_max_retry_attempts")]
    pub max_retry_attempts: u32,
    #[serde(default = "default_proxy_instances_per_certificate")]
    pub proxy_instances_per_certificate: Option<usize>,
 }
 impl Default for Config {
    fn default() -> Self {
        Self {
            economic_start_date: "2007-02-13".to_string(),
            corporate_start_date: "2010-01-01".to_string(),
            economic_lookahead_months: 3,
            max_parallel_instances: default_max_parallel_instances(),
            max_tasks_per_instance: 0,
            max_requests_per_session: default_max_requests_per_session(),
            min_request_interval_ms: default_min_request_interval_ms(),
            max_retry_attempts: default_max_retry_attempts(),
            enable_vpn_rotation: false,
            proxy_instances_per_certificate: default_proxy_instances_per_certificate(),
        }
    }
 }
 fn default_enable_vpn_rotation() -> bool {
@@ -47,24 +67,10 @@ fn default_min_request_interval_ms() -> u64 {
 fn default_max_retry_attempts() -> u32 { 3 }
-impl Default for Config {
+fn default_proxy_instances_per_certificate() -> Option<usize> {
-    fn default() -> Self {
+    Some(1)
        Self {
            economic_start_date: "2007-02-13".to_string(),
            corporate_start_date: "2010-01-01".to_string(),
            economic_lookahead_months: 3,
            max_parallel_instances: default_max_parallel_instances(),
            max_tasks_per_instance: 0,
            max_requests_per_session: default_max_requests_per_session(),
            min_request_interval_ms: default_min_request_interval_ms(),
            max_retry_attempts: default_max_retry_attempts(),
            enable_vpn_rotation: false,
        }
    }
 }
 impl Config {
    /// Loads configuration from environment variables using dotenvy.
    pub fn load() -> Result<Self> {
@@ -112,6 +118,11 @@ impl Config {
            .parse()
            .context("Failed to parse MAX_RETRY_ATTEMPTS as u32")?;
        let proxy_instances_per_certificate: Option<usize> = match dotenvy::var("PROXY_INSTANCES_PER_CERTIFICATE") {
            Ok(val) => Some(val.parse().context("Failed to parse PROXY_INSTANCES_PER_CERTIFICATE as usize")?),
            Err(_) => Some(1),
        };
        Ok(Self {
            economic_start_date,
            corporate_start_date,
@@ -122,6 +133,7 @@ impl Config {
            max_requests_per_session,
            min_request_interval_ms,
            max_retry_attempts,
            proxy_instances_per_certificate,
        })
    }
--- a/src/corporate/aggregation.rs
+++ b/src/corporate/aggregation.rs
@@ -1,195 +0,0 @@
 // src/corporate/aggregation.rs
 use super::types::CompanyPrice;
 use super::storage::*;
 use crate::util::directories::DataPaths;
 use tokio::fs;
 use std::collections::HashMap;
 #[derive(Debug)]
 struct DayData {
    sources: Vec<(CompanyPrice, String)>, // (price, source_ticker)
    total_volume: u64,
    vwap: f64,
    open: f64,
    high: f64,
    low: f64,
    close: f64,
 }
 /// Aggregate price data from multiple exchanges, converting all to USD
 pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> {
    let company_dir = get_company_dir(paths, lei);
    for timeframe in ["daily", "5min"].iter() {
        let source_dir = company_dir.join(timeframe);
        if !source_dir.exists() {
            continue;
        }
        let mut all_prices: Vec<(CompanyPrice, String)> = Vec::new();
        let mut by_date_time: HashMap<String, DayData> = HashMap::new();
        // Load all sources with their ticker names
        let mut entries = tokio::fs::read_dir(&source_dir).await?;
        let mut source_count = 0;
        let mut sources_used = std::collections::HashSet::new();
        while let Some(entry) = entries.next_entry().await? {
            let source_dir_path = entry.path();
            if !source_dir_path.is_dir() { continue; }
            let source_ticker = source_dir_path
                .file_name()
                .and_then(|n| n.to_str())
                .unwrap_or("unknown")
                .to_string();
            let prices_path = source_dir_path.join("prices.json");
            if !prices_path.exists() { continue; }
            let content = tokio::fs::read_to_string(&prices_path).await?;
            let mut prices: Vec<CompanyPrice> = serde_json::from_str(&content)?;
            if !prices.is_empty() {
                sources_used.insert(source_ticker.clone());
                source_count += 1;
            }
            for price in prices {
                all_prices.push((price, source_ticker.clone()));
            }
        }
        if all_prices.is_empty() {
            continue;
        }
        println!("    Aggregating from {} exchanges: {}", 
            sources_used.len(),
            sources_used.iter()
                .map(|s| s.as_str())
                .collect::<Vec<_>>()
                .join(", ")
        );
        // Group by date + time (for 5min) or just date
        for (p, source) in all_prices {
            let key = if timeframe == &"5min" && !p.time.is_empty() {
                format!("{}_{}", p.date, p.time)
            } else {
                p.date.clone()
            };
            // Convert to USD immediately
            let usd_rate = super::fx::get_usd_rate(&p.currency).await.unwrap_or(1.0);
            let mut p_usd = p.clone();
            p_usd.open *= usd_rate;
            p_usd.high *= usd_rate;
            p_usd.low *= usd_rate;
            p_usd.close *= usd_rate;
            p_usd.adj_close *= usd_rate;
            p_usd.currency = "USD".to_string();
            let entry = by_date_time.entry(key.clone()).or_insert(DayData {
                sources: vec![],
                total_volume: 0,
                vwap: 0.0,
                open: p_usd.open,
                high: p_usd.high,
                low: p_usd.low,
                close: p_usd.close,
            });
            let volume = p.volume.max(1); // avoid div0
            let vwap_contrib = p_usd.close * volume as f64;
            entry.sources.push((p_usd.clone(), source));
            entry.total_volume += volume;
            entry.vwap += vwap_contrib;
            // Use first open, last close, max high, min low
            if entry.sources.len() == 1 {
                entry.open = p_usd.open;
            }
            entry.close = p_usd.close;
            entry.high = entry.high.max(p_usd.high);
            entry.low = entry.low.min(p_usd.low);
        }
        // Finalize aggregated data
        let mut aggregated: Vec<CompanyPrice> = Vec::new();
        for (key, data) in by_date_time {
            let vwap = data.vwap / data.total_volume as f64;
            let (date, time) = if key.contains('_') {
                let parts: Vec<&str> = key.split('_').collect();
                (parts[0].to_string(), parts[1].to_string())
            } else {
                (key, "".to_string())
            };
            // Track which exchange contributed most volume
            let best_source = data.sources.iter()
                .max_by_key(|(p, _)| p.volume)
                .map(|(_, src)| src.clone())
                .unwrap_or_else(|| "unknown".to_string());
            aggregated.push(CompanyPrice {
                ticker: format!("{lei}@agg"), // Mark as aggregated
                date,
                time,
                open: data.open,
                high: data.high,
                low: data.low,
                close: data.close,
                adj_close: vwap,
                volume: data.total_volume,
                currency: "USD".to_string(),
            });
        }
        aggregated.sort_by_key(|p| (p.date.clone(), p.time.clone()));
        // Save aggregated result
        let agg_dir = company_dir.join("aggregated").join(timeframe);
        fs::create_dir_all(&agg_dir).await?;
        let path = agg_dir.join("prices.json");
        fs::write(&path, serde_json::to_string_pretty(&aggregated)?).await?;
        // Save aggregation metadata
        let meta = AggregationMetadata {
            lei: lei.to_string(),  // ← CHANGE THIS
            timeframe: timeframe.to_string(),
            sources: sources_used.into_iter().collect(),
            total_bars: aggregated.len(),
            date_range: (
                aggregated.first().map(|p| p.date.clone()).unwrap_or_default(),
                aggregated.last().map(|p| p.date.clone()).unwrap_or_default(),
            ),
            aggregated_at: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
        };
        let meta_path = agg_dir.join("metadata.json");
        fs::write(&meta_path, serde_json::to_string_pretty(&meta)?).await?;
        println!("    ✓ {} {} bars from {} sources (USD)", 
            aggregated.len(), 
            timeframe,
            source_count
        );
    }
    Ok(())
 }
 #[derive(Debug, serde::Serialize, serde::Deserialize)]
 struct AggregationMetadata {
    lei: String,
    timeframe: String,
    sources: Vec<String>,
    total_bars: usize,
    date_range: (String, String),
    aggregated_at: String,
 }
--- a/src/corporate/atomic_writer.rs
+++ b/src/corporate/atomic_writer.rs
@@ -1,346 +0,0 @@
 // src/corporate/atomic_writer.rs
 //
 // Atomic JSONL writer that prevents partial/corrupted results from being written
 use anyhow::Result;
 use serde::Serialize;
 use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
 use tokio::fs::{File, OpenOptions};
 use tokio::io::AsyncWriteExt;
 use tokio::sync::mpsc;
 /// Command to write or validate data
 #[derive(Debug)]
 pub enum WriteCommand<T> {
    /// Stage a result for writing (held in memory until committed)
    Stage { id: String, data: T },
    /// Commit staged result to disk (atomic write)
    Commit { id: String },
    /// Rollback staged result (discard without writing)
    Rollback { id: String },
    /// Commit all pending staged results and flush
    CommitAll,
    /// Shutdown writer gracefully (only commits valid staged results)
    Shutdown,
 }
 /// Result of a write operation
 #[derive(Debug)]
 pub struct WriteResult {
    pub id: String,
    pub success: bool,
    pub error: Option<String>,
 }
 /// Atomic writer that prevents partial results from being written
 pub struct AtomicJsonlWriter<T> {
    file: File,
    staged: HashMap<String, T>,
    committed_count: usize,
    rollback_count: usize,
 }
 impl<T: Serialize + Clone> AtomicJsonlWriter<T> {
    pub async fn new(path: PathBuf) -> Result<Self> {
        // Ensure parent directory exists
        if let Some(parent) = path.parent() {
            tokio::fs::create_dir_all(parent).await?;
        }
        let file = OpenOptions::new()
            .create(true)
            .append(true)
            .open(&path)
            .await?;
        crate::util::logger::log_info(&format!(
            "Atomic writer initialized: {:?}",
            path
        )).await;
        Ok(Self {
            file,
            staged: HashMap::new(),
            committed_count: 0,
            rollback_count: 0,
        })
    }
    /// Stage data for writing (held in memory, not yet written)
    pub async fn stage(&mut self, id: String, data: T) {
        crate::util::logger::log_info(&format!(
            "Staging result for: {} (total staged: {})",
            id,
            self.staged.len() + 1
        )).await;
        self.staged.insert(id, data);
    }
    /// Commit a staged result to disk (atomic write)
    pub async fn commit(&mut self, id: &str) -> Result<()> {
        if let Some(data) = self.staged.remove(id) {
            // Serialize to JSON
            let json_line = serde_json::to_string(&data)?;
            // Write atomically (single syscall)
            self.file.write_all(json_line.as_bytes()).await?;
            self.file.write_all(b"\n").await?;
            self.file.flush().await?;
            self.committed_count += 1;
            crate::util::logger::log_info(&format!(
                "✓ Committed result for: {} (total committed: {})",
                id, self.committed_count
            )).await;
            Ok(())
        } else {
            Err(anyhow::anyhow!("No staged result found for id: {}", id))
        }
    }
    /// Rollback a staged result (discard without writing)
    pub async fn rollback(&mut self, id: &str) {
        if self.staged.remove(id).is_some() {
            self.rollback_count += 1;
            crate::util::logger::log_warn(&format!(
                "⚠ Rolled back result for: {} (total rollbacks: {})",
                id, self.rollback_count
            )).await;
        }
    }
    /// Commit all staged results
    pub async fn commit_all(&mut self) -> Result<usize> {
        let ids: Vec<String> = self.staged.keys().cloned().collect();
        let mut committed = 0;
        for id in ids {
            if let Ok(()) = self.commit(&id).await {
                committed += 1;
            }
        }
        Ok(committed)
    }
    /// Rollback all staged results (discard everything)
    pub async fn rollback_all(&mut self) -> usize {
        let count = self.staged.len();
        self.staged.clear();
        self.rollback_count += count;
        crate::util::logger::log_warn(&format!(
            "⚠ Rolled back all {} staged results",
            count
        )).await;
        count
    }
    /// Get statistics
    pub fn stats(&self) -> WriterStats {
        WriterStats {
            staged_count: self.staged.len(),
            committed_count: self.committed_count,
            rollback_count: self.rollback_count,
        }
    }
 }
 #[derive(Debug, Clone)]
 pub struct WriterStats {
    pub staged_count: usize,
    pub committed_count: usize,
    pub rollback_count: usize,
 }
 /// Managed writer service that runs in its own task
 pub struct AtomicWriterService<T> {
    rx: mpsc::UnboundedReceiver<WriteCommand<T>>,
    writer: AtomicJsonlWriter<T>,
    shutdown_flag: Arc<AtomicBool>,
 }
 impl<T: Serialize + Clone> AtomicWriterService<T> {
    pub async fn new(
        path: PathBuf,
        rx: mpsc::UnboundedReceiver<WriteCommand<T>>,
        shutdown_flag: Arc<AtomicBool>,
    ) -> Result<Self> {
        let writer = AtomicJsonlWriter::new(path).await?;
        Ok(Self {
            rx,
            writer,
            shutdown_flag,
        })
    }
    /// Main service loop
    pub async fn run(mut self) {
        crate::util::logger::log_info("Atomic writer service started").await;
        while let Some(cmd) = self.rx.recv().await {
            // Check for shutdown flag
            if self.shutdown_flag.load(Ordering::SeqCst) {
                crate::util::logger::log_warn(
                    "Shutdown detected - processing only Commit/Rollback commands"
                ).await;
                // Only process commit/rollback commands during shutdown
                match cmd {
                    WriteCommand::Commit { id } => {
                        if let Err(e) = self.writer.commit(&id).await {
                            crate::util::logger::log_error(&format!(
                                "Failed to commit {}: {}",
                                id, e
                            )).await;
                        }
                    }
                    WriteCommand::Rollback { id } => {
                        self.writer.rollback(&id).await;
                    }
                    WriteCommand::CommitAll => {
                        match self.writer.commit_all().await {
                            Ok(count) => {
                                crate::util::logger::log_info(&format!(
                                    "Committed {} results during shutdown",
                                    count
                                )).await;
                            }
                            Err(e) => {
                                crate::util::logger::log_error(&format!(
                                    "Failed to commit all: {}",
                                    e
                                )).await;
                            }
                        }
                    }
                    WriteCommand::Shutdown => break,
                    _ => {
                        // Ignore Stage commands during shutdown
                        crate::util::logger::log_warn(
                            "Ignoring new Stage command during shutdown"
                        ).await;
                    }
                }
                continue;
            }
            // Normal operation
            match cmd {
                WriteCommand::Stage { id, data } => {
                    self.writer.stage(id, data).await;
                }
                WriteCommand::Commit { id } => {
                    if let Err(e) = self.writer.commit(&id).await {
                        crate::util::logger::log_error(&format!(
                            "Failed to commit {}: {}",
                            id, e
                        )).await;
                    }
                }
                WriteCommand::Rollback { id } => {
                    self.writer.rollback(&id).await;
                }
                WriteCommand::CommitAll => {
                    match self.writer.commit_all().await {
                        Ok(count) => {
                            crate::util::logger::log_info(&format!(
                                "Committed all {} staged results",
                                count
                            )).await;
                        }
                        Err(e) => {
                            crate::util::logger::log_error(&format!(
                                "Failed to commit all: {}",
                                e
                            )).await;
                        }
                    }
                }
                WriteCommand::Shutdown => break,
            }
        }
        // Final shutdown - rollback any remaining staged items
        let stats = self.writer.stats();
        if stats.staged_count > 0 {
            crate::util::logger::log_warn(&format!(
                "⚠ Shutdown with {} uncommitted results - rolling back",
                stats.staged_count
            )).await;
            self.writer.rollback_all().await;
        }
        crate::util::logger::log_info(&format!(
            "Atomic writer service stopped. Final stats: {} committed, {} rolled back",
            stats.committed_count,
            stats.rollback_count
        )).await;
    }
 }
 /// Handle for sending write commands
 #[derive(Clone)]
 pub struct AtomicWriterHandle<T> {
    tx: mpsc::UnboundedSender<WriteCommand<T>>,
 }
 impl<T> AtomicWriterHandle<T> {
    pub fn new(tx: mpsc::UnboundedSender<WriteCommand<T>>) -> Self {
        Self { tx }
    }
    /// Stage data for writing (does not write immediately)
    pub fn stage(&self, id: String, data: T) {
        let _ = self.tx.send(WriteCommand::Stage { id, data });
    }
    /// Commit staged data to disk
    pub fn commit(&self, id: String) {
        let _ = self.tx.send(WriteCommand::Commit { id });
    }
    /// Rollback staged data (discard)
    pub fn rollback(&self, id: String) {
        let _ = self.tx.send(WriteCommand::Rollback { id });
    }
    /// Commit all staged data
    pub fn commit_all(&self) {
        let _ = self.tx.send(WriteCommand::CommitAll);
    }
    /// Shutdown writer gracefully
    pub fn shutdown(&self) {
        let _ = self.tx.send(WriteCommand::Shutdown);
    }
 }
 /// Create atomic writer service
 pub async fn create_atomic_writer<T: Serialize + Clone + Send + 'static>(
    path: PathBuf,
    shutdown_flag: Arc<AtomicBool>,
 ) -> Result<(AtomicWriterHandle<T>, tokio::task::JoinHandle<()>)> {
    let (tx, rx) = mpsc::unbounded_channel();
    let service = AtomicWriterService::new(path, rx, shutdown_flag).await?;
    let handle = tokio::spawn(async move {
        service.run().await;
    });
    Ok((AtomicWriterHandle::new(tx), handle))
 }
--- a/src/corporate/bond_processing.rs
+++ b/src/corporate/bond_processing.rs
@@ -0,0 +1,273 @@
 // src/corporate/bond_processing.rs
 // Bond-specific processing logic for corporate and government bonds
 use super::types::*;
 /// Parse bond details from ticker and security description
 /// 
 /// Examples:
 /// - "WTFC 4.3 01/12/26 0003" -> coupon: 4.3, maturity: 2026-01-12
 /// - "SLOVAK 1.5225 05/10/28 4Y" -> coupon: 1.5225, maturity: 2028-05-10
 /// - "SEK Float 06/30/34" -> floating rate, maturity: 2034-06-30
 /// - "GGB 0 10/15/42" -> zero coupon, maturity: 2042-10-15
 pub fn parse_bond_details(ticker: &str, security_description: &str) -> BondDetails {
    let mut details = BondDetails {
        coupon_rate: None,
        maturity_date: None,
        is_floating: false,
        is_zero_coupon: false,
        tenor_years: None,
        series_identifier: None,
    };
    // Check for floating rate - look for "Float", " F ", "V0" patterns
    if ticker.contains("Float") || ticker.contains(" F ") || ticker.contains(" V0 ")
        || security_description.contains("Float") {
        details.is_floating = true;
    }
    // Parse coupon rate if not floating
    if !details.is_floating {
        if let Some(coupon) = extract_coupon_rate(ticker, security_description) {
            details.coupon_rate = Some(coupon);
            details.is_zero_coupon = coupon == 0.0;
        }
    }
    // Parse maturity date
    if let Some(maturity) = extract_maturity_date(ticker, security_description) {
        details.maturity_date = Some(maturity.clone());
        // Calculate tenor (simplified - just extract year)
        if let Some(year_str) = maturity.split('-').next() {
            if let Ok(mat_year) = year_str.parse::<i32>() {
                let current_year = 2026; // From system prompt
                let years_to_maturity = (mat_year - current_year).max(0) as u32;
                details.tenor_years = Some(years_to_maturity);
            }
        }
    }
    // Extract series identifier
    details.series_identifier = extract_series_identifier(ticker);
    details
 }
 /// Extract coupon rate from ticker/description
 /// Handles: "4.3", "1.5225", "12 1/2" (fractional), "0"
 fn extract_coupon_rate(ticker: &str, description: &str) -> Option<f64> {
    let text = format!("{} {}", ticker, description);
    // Pattern 1: Fractional rates like "12 1/2" -> 12.5
    if let Some(frac_result) = parse_fractional_coupon(&text) {
        return Some(frac_result);
    }
    // Pattern 2: Decimal rates like "4.3" or "1.5225"
    // Look for number followed by space and date pattern
    let parts: Vec<&str> = text.split_whitespace().collect();
    for i in 0..parts.len() {
        if let Ok(rate) = parts[i].parse::<f64>() {
            // Sanity check: coupon rates are typically 0-20%
            if rate >= 0.0 && rate <= 20.0 {
                // Make sure it's before a date-like pattern
                if i + 1 < parts.len() {
                    let next = parts[i + 1];
                    if next.contains('/') || next.len() >= 8 {
                        return Some(rate);
                    }
                }
            }
        }
    }
    None
 }
 /// Parse fractional coupon like "12 1/2" -> 12.5
 fn parse_fractional_coupon(text: &str) -> Option<f64> {
    let parts: Vec<&str> = text.split_whitespace().collect();
    for i in 0..parts.len().saturating_sub(1) {
        // Check if current part is a number
        if let Ok(whole) = parts[i].parse::<f64>() {
            // Check if next part is a fraction like "1/2"
            if let Some(slash_pos) = parts[i + 1].find('/') {
                let frac_str = parts[i + 1];
                let num_str = &frac_str[..slash_pos];
                let den_str = &frac_str[slash_pos + 1..];
                if let (Ok(num), Ok(den)) = (num_str.parse::<f64>(), den_str.parse::<f64>()) {
                    if den != 0.0 {
                        return Some(whole + num / den);
                    }
                }
            }
        }
    }
    None
 }
 /// Extract maturity date from ticker/description
 /// Handles: "01/12/26", "05/10/28", "06/30/2034"
 fn extract_maturity_date(ticker: &str, description: &str) -> Option<String> {
    let text = format!("{} {}", ticker, description);
    // Look for MM/DD/YY or MM/DD/YYYY patterns
    let parts: Vec<&str> = text.split_whitespace().collect();
    for part in parts {
        if let Some(date) = parse_date_pattern(part) {
            return Some(date);
        }
    }
    None
 }
 /// Parse various date formats to YYYY-MM-DD
 fn parse_date_pattern(s: &str) -> Option<String> {
    let slash_count = s.matches('/').count();
    if slash_count != 2 {
        return None;
    }
    let parts: Vec<&str> = s.split('/').collect();
    if parts.len() != 3 {
        return None;
    }
    let month = parts[0];
    let day = parts[1];
    let year_part = parts[2];
    // Parse year - could be 2 or 4 digits
    let year = if year_part.len() == 2 {
        if let Ok(yy) = year_part.parse::<u32>() {
            // Assume 20xx for values <= 50, 19xx for > 50
            if yy <= 50 {
                format!("{}", 2000 + yy)
            } else {
                format!("{}", 1900 + yy)
            }
        } else {
            return None;
        }
    } else if year_part.len() == 4 {
        year_part.to_string()
    } else {
        return None;
    };
    // Validate month and day
    if let (Ok(m), Ok(d)) = (month.parse::<u32>(), day.parse::<u32>()) {
        if m >= 1 && m <= 12 && d >= 1 && d <= 31 {
            return Some(format!("{}-{:02}-{:02}", year, m, d));
        }
    }
    None
 }
 /// Extract series identifier (tokens after the date)
 /// Examples: "0003", "4Y", "144A", "REGS", "MTN", "PSI", "CD"
 fn extract_series_identifier(ticker: &str) -> Option<String> {
    let parts: Vec<&str> = ticker.split_whitespace().collect();
    // Look for date pattern, then take what comes after
    for i in 0..parts.len() {
        if parts[i].contains('/') && parts[i].matches('/').count() == 2 {
            // Found date, check if there's something after
            if i + 1 < parts.len() {
                return Some(parts[i + 1].to_string());
            }
        }
    }
    None
 }
 /// Classify government issuer type
 pub fn classify_government_issuer(name: &str) -> String {
    let name_lower = name.to_lowercase();
    // Sovereign nations
    if name_lower.contains("republic") 
        || name_lower.contains("kingdom")
        || name_lower.contains("federal republic")
        || name_lower.ends_with(" govt")
        || name_lower.ends_with(" government") 
        || name_lower.contains("hellenic") // Greece
        || name_lower.contains("slovak") {
        return "sovereign".to_string();
    }
    // Municipalities (Norwegian communes, cities, etc.)
    if name_lower.contains("kommune") 
        || name_lower.contains("municipality")
        || name_lower.contains("city of")
        || name_lower.contains("town of")
        || name_lower.contains("county council") {
        return "municipal".to_string();
    }
    // States/Provinces/Regions
    if name_lower.contains("state of")
        || name_lower.contains("province")
        || name_lower.contains("region")
        || name_lower.contains("county") {
        return "state".to_string();
    }
    // Government agencies/entities
    if name_lower.contains("export credit")
        || name_lower.contains("development bank")
        || name_lower.contains("housing")
        || name_lower.contains("akademiska")
        || name_lower.contains("byggdastofnun") {
        return "agency".to_string();
    }
    "other".to_string()
 }
 /// Classify government bond type based on security_type
 /// 
 /// Maps OpenFIGI security types to simplified bond categories for government bonds
 /// 
 /// # Examples
 /// - "DOMESTIC" -> "domestic"
 /// - "GLOBAL" -> "global"
 /// - "EURO NON-DOLLAR" -> "euro"
 /// - "DOMESTIC MTN" -> "mtn"
 pub fn classify_government_bond_type(security_type: &str) -> String {
    let security_type_upper = security_type.to_uppercase();
    if security_type_upper.contains("GLOBAL") {
        return "global".to_string();
    }
    if security_type_upper.contains("EURO") {
        if security_type_upper.contains("NON-DOLLAR") || !security_type_upper.contains("DOLLAR") {
            return "euro".to_string();
        }
        return "eurodollar".to_string();
    }
    if security_type_upper.contains("YANKEE") {
        return "yankee".to_string();
    }
    if security_type_upper.contains("MTN") {
        return "mtn".to_string();
    }
    if security_type_upper.contains("DOMESTIC") {
        return "domestic".to_string();
    }
    "other".to_string()
 }
--- a/src/corporate/checkpoint_helpers.rs
+++ b/src/corporate/checkpoint_helpers.rs
@@ -0,0 +1,215 @@
 // src/corporate/checkpoint_helpers.rs
 //! Shared helpers for checkpoint-based recovery and logging
 //! 
 //! This module extracts common patterns used across multiple update modules
 //! to reduce code duplication and improve maintainability.
 use super::types::CompanyData;
 use crate::util::logger;
 use std::collections::HashMap;
 use std::path::{Path};
 use tokio::fs::{File};
 use tokio::io::{AsyncWriteExt};
 use anyhow::Result;
 /// Load companies from checkpoint and replay log for recovery
 /// 
 /// This function implements the checkpoint + write-ahead log pattern:
 /// 1. Loads the main checkpoint file
 /// 2. Replays any pending updates from the log file
 /// 3. Returns the merged state
 pub async fn load_checkpoint_with_log<P1, P2>(
    checkpoint_path: P1,
    log_path: P2,
    checkpoint_desc: &str,
 ) -> Result<HashMap<String, CompanyData>> 
 where
    P1: AsRef<Path>,
    P2: AsRef<Path>,
 {
    let checkpoint_path = checkpoint_path.as_ref();
    let log_path = log_path.as_ref();
    let mut companies: HashMap<String, CompanyData> = HashMap::new();
    // Load checkpoint if it exists
    if checkpoint_path.exists() {
        logger::log_info(&format!("Loading checkpoint from {}...", checkpoint_desc)).await;
        let content = tokio::fs::read_to_string(checkpoint_path).await?;
        for line in content.lines() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<CompanyData>(line) {
                Ok(company) => {
                    companies.insert(company.name.clone(), company);
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
                }
            }
        }
        logger::log_info(&format!("Loaded checkpoint with {} companies", companies.len())).await;
    }
    // Replay log if it exists
    if log_path.exists() {
        logger::log_info("Replaying update log...").await;
        let log_content = tokio::fs::read_to_string(log_path).await?;
        let mut replayed = 0;
        for line in log_content.lines() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<CompanyData>(line) {
                Ok(company) => {
                    companies.insert(company.name.clone(), company);
                    replayed += 1;
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
                }
            }
        }
        if replayed > 0 {
            logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
        }
    }
    Ok(companies)
 }
 /// Consolidate log into checkpoint and clear log
 /// 
 /// Atomically writes all companies to a new checkpoint file and removes the log.
 /// Uses atomic rename to ensure crash safety.
 pub async fn consolidate_checkpoint<P1, P2>(
    checkpoint_path: P1,
    log_path: P2,
    companies: &HashMap<String, CompanyData>,
 ) -> Result<()>
 where
    P1: AsRef<Path>,
    P2: AsRef<Path>,
 {
    let checkpoint_path = checkpoint_path.as_ref();
    let log_path = log_path.as_ref();
    logger::log_info("Consolidating update log into checkpoint...").await;
    let temp_checkpoint = checkpoint_path.with_extension("tmp");
    let mut temp_file = File::create(&temp_checkpoint).await?;
    for company in companies.values() {
        let json_line = serde_json::to_string(company)?;
        temp_file.write_all(json_line.as_bytes()).await?;
        temp_file.write_all(b"\n").await?;
    }
    temp_file.flush().await?;
    temp_file.sync_data().await?;
    drop(temp_file);
    tokio::fs::rename(&temp_checkpoint, checkpoint_path).await?;
    // Remove log after successful consolidation
    if log_path.exists() {
        tokio::fs::remove_file(log_path).await.ok();
    }
    logger::log_info(&format!("✓ Consolidated {} companies", companies.len())).await;
    Ok(())
 }
 /// Check if log file has content
 pub async fn log_has_content<P: AsRef<Path>>(log_path: P) -> bool {
    if let Ok(metadata) = tokio::fs::metadata(log_path.as_ref()).await {
        metadata.len() > 0
    } else {
        false
    }
 }
 /// Load enrichment progress from log file
 /// 
 /// Used by enrichment functions to track which companies have already been processed.
 /// Parses log entries with format: {"company_name": "...", "status": "enriched", ...}
 pub async fn load_enrichment_progress<P>(
    log_path: P,
 ) -> Result<std::collections::HashSet<String>>
 where
    P: AsRef<Path>,
 {
    let mut enriched_companies = std::collections::HashSet::new();
    if !log_path.as_ref().exists() {
        return Ok(enriched_companies);
    }
    logger::log_info("Loading enrichment progress from log...").await;
    let log_content = tokio::fs::read_to_string(log_path.as_ref()).await?;
    for line in log_content.lines() {
        if line.trim().is_empty() || !line.ends_with('}') {
            continue; // Skip incomplete lines
        }
        match serde_json::from_str::<serde_json::Value>(line) {
            Ok(entry) => {
                if let Some(name) = entry.get("company_name").and_then(|v| v.as_str()) {
                    if entry.get("status").and_then(|v| v.as_str()) == Some("enriched") {
                        enriched_companies.insert(name.to_string());
                    }
                }
            }
            Err(e) => {
                logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
            }
        }
    }
    logger::log_info(&format!(
        "Loaded {} enriched companies from log",
        enriched_companies.len()
    )).await;
    Ok(enriched_companies)
 }
 /// Count enriched companies by checking for data files
 /// 
 /// Walks through the corporate directory and counts companies that have
 /// a data file in the specified subdirectory (e.g., "events", "options", "chart").
 pub async fn count_enriched_companies(
    paths: &crate::util::directories::DataPaths,
    data_type: &str,
 ) -> Result<usize> {
    let corporate_dir = paths.corporate_dir();
    if !corporate_dir.exists() {
        return Ok(0);
    }
    let mut count = 0;
    let mut entries = tokio::fs::read_dir(&corporate_dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            let data_dir = path.join(data_type);
            let data_file = data_dir.join("data.jsonl");
            if data_file.exists() {
                count += 1;
            }
        }
    }
    Ok(count)
 }
--- a/src/corporate/collect_exchanges.rs
+++ b/src/corporate/collect_exchanges.rs
@@ -0,0 +1,720 @@
 // src/corporate/collect_exchanges.rs
 use crate::util::directories::DataPaths;
 use crate::util::integrity::{DataStage, StateEntry, StateManager, file_reference};
 use crate::util::logger;
 use crate::corporate::types::*;
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
 /// Exchange information collected from company data
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ExchangeInfo {
    #[serde(rename = "exchangeName")]
    pub exchange_name: String,
    pub currency: String,
    #[serde(rename = "currencySymbol")]
    pub currency_symbol: String,
    #[serde(rename = "exchangeDataDelayedBy")]
    pub exchange_data_delayed_by: i64,
    #[serde(rename = "totalMarketCap")]
    pub total_market_cap: u64,
    #[serde(rename = "totalMarketCapUSD")]
    pub total_market_cap_usd: f64,  // NEW: Market cap converted to USD
    pub companies: Vec<String>,
 }
 /// Extract exchange data from company core data
 #[derive(Debug, Deserialize)]
 struct CompanyCoreData {
    modules: Option<CoreModules>,
 }
 #[derive(Debug, Deserialize)]
 struct CoreModules {
    price: Option<PriceModule>,
 }
 #[derive(Debug, Deserialize)]
 struct PriceModule {
    #[serde(rename = "exchangeName")]
    exchange_name: Option<String>,
    currency: Option<String>,
    #[serde(rename = "currencySymbol")]
    currency_symbol: Option<String>,
    exchange: Option<String>,
    #[serde(rename = "exchangeDataDelayedBy")]
    exchange_data_delayed_by: Option<i64>,
    #[serde(rename = "marketCap")]
    market_cap: Option<MarketCapData>,
 }
 #[derive(Debug, Deserialize)]
 struct MarketCapData {
    raw: Option<u64>,
 }
 /// Normalize currency code and get conversion factor
 /// Handles special cases like GBp (pence) and ZAc (cents)
 fn normalize_currency(currency: &str) -> (&str, f64) {
    match currency {
        "GBp" => ("GBP", 100.0),  // British Pence -> Pounds (divide by 100)
        "ZAc" => ("ZAR", 100.0),  // South African Cents -> Rand (divide by 100)
        _ => (currency, 1.0),     // No conversion needed
    }
 }
 /// FX rate cache for currency conversion
 struct FxRateCache {
    rates: HashMap<String, f64>,
 }
 impl FxRateCache {
    /// Create new FX rate cache by loading all currency charts
    async fn new(paths: &DataPaths) -> anyhow::Result<Self> {
        let mut rates = HashMap::new();
        // USD to USD is always 1.0
        rates.insert("USD".to_string(), 1.0);
        let currency_dir = paths.data_dir().join("economic").join("currency");
        if !currency_dir.exists() {
            logger::log_warn("  FX rates directory not found - will use default rates").await;
            return Ok(Self { rates });
        }
        let mut entries = fs::read_dir(&currency_dir).await?;
        let mut loaded_count = 0;
        while let Some(entry) = entries.next_entry().await? {
            let path = entry.path();
            if !path.is_dir() {
                continue;
            }
            let currency_code = match path.file_name().and_then(|n| n.to_str()) {
                Some(code) => code.to_string(),
                None => continue,
            };
            let chart_path = path.join("chart").join("data.jsonl");
            if !chart_path.exists() {
                continue;
            }
            // Load chart and get latest rate
            match load_latest_fx_rate(&chart_path).await {
                Ok(rate) => {
                    rates.insert(currency_code.clone(), rate);
                    loaded_count += 1;
                }
                Err(e) => {
                    logger::log_warn(&format!(
                        "  Failed to load FX rate for {}: {}",
                        currency_code, e
                    )).await;
                }
            }
        }
        logger::log_info(&format!("  ✓ Loaded {} FX rates", loaded_count)).await;
        Ok(Self { rates })
    }
    /// Convert amount from given currency to USD
    fn to_usd(&self, amount: u64, currency: &str) -> f64 {
        // Normalize currency and get conversion factor
        // e.g., GBp -> (GBP, 100.0), ZAc -> (ZAR, 100.0)
        let (normalized_currency, factor) = normalize_currency(currency);
        // First convert to base currency unit (e.g., pence to pounds)
        let amount_in_base = amount as f64 / factor;
        if normalized_currency == "USD" {
            return amount_in_base;
        }
        // Get rate (USD per currency unit)
        // For USD/EUR = 0.92, this means 1 USD = 0.92 EUR
        // To convert EUR to USD: EUR_amount / 0.92
        match self.rates.get(normalized_currency) {
            Some(&rate) if rate > 0.0 => {
                amount_in_base / rate
            }
            _ => {
                // Fallback: use approximate rates for common currencies
                let fallback_rate = get_fallback_rate(normalized_currency);
                amount_in_base / fallback_rate
            }
        }
    }
    /// Get rate for a currency (USD per unit)
    fn get_rate(&self, currency: &str) -> Option<f64> {
        let (normalized_currency, _) = normalize_currency(currency);
        self.rates.get(normalized_currency).copied()
    }
 }
 /// Load latest FX rate from chart data
 async fn load_latest_fx_rate(chart_path: &std::path::Path) -> anyhow::Result<f64> {
    let content = fs::read_to_string(chart_path).await?;
    for line in content.lines() {
        if line.trim().is_empty() {
            continue;
        }
        let chart: ChartData = serde_json::from_str(line)?;
        if chart.quotes.is_empty() {
            return Err(anyhow::anyhow!("No quotes in chart data"));
        }
        // Get most recent quote with a close price
        let latest_rate = chart.quotes
            .iter()
            .rev()
            .find_map(|q| q.close)
            .ok_or_else(|| anyhow::anyhow!("No valid close prices"))?;
        return Ok(latest_rate);
    }
    Err(anyhow::anyhow!("No data in chart file"))
 }
 /// Fallback rates for common currencies (approximate, as of 2024)
 /// These are USD per currency unit (same format as our FX data)
 fn get_fallback_rate(currency: &str) -> f64 {
    match currency {
        "USD" => 1.0,
        "EUR" => 0.92,      // 1 USD = 0.92 EUR
        "GBP" => 0.79,      // 1 USD = 0.79 GBP
        "JPY" => 150.0,     // 1 USD = 150 JPY
        "CNY" | "RMB" => 7.2,
        "CHF" => 0.88,
        "AUD" => 1.52,
        "CAD" => 1.36,
        "HKD" => 7.8,
        "SGD" => 1.34,
        "SEK" => 10.5,
        "NOK" => 10.8,
        "DKK" => 6.9,
        "PLN" => 4.0,
        "CZK" => 23.0,
        "TRY" => 32.0,
        "ZAR" => 18.5,
        "ILS" => 3.7,
        "RON" => 4.6,
        "KWD" => 0.31,
        "TWD" => 31.5,
        "ISK" => 138.0,
        "NZD" => 1.65,
        "MXN" => 17.0,
        "BRL" => 5.0,
        "INR" => 83.0,
        "KRW" => 1320.0,
        "THB" => 35.0,
        "MYR" => 4.6,
        "IDR" => 15700.0,
        "PHP" => 56.0,
        "VND" => 24500.0,
        _ => {
            // Default: assume similar to USD
            1.0
        }
    }
 }
 /// Collect all exchanges from company directories and create yahoo_exchanges.json
 /// 
 /// # Features
 /// - Iterates through all company directories
 /// - Extracts exchange data from core/data.jsonl
 /// - Groups companies by exchange
 /// - Sums up market caps for each exchange
 /// - Converts all market caps to USD using FX rates
 /// - Saves consolidated mapping to data/yahoo_exchanges.json
 /// - Handles missing or invalid data gracefully
 /// - Integrity tracking with content hash validation
 pub async fn collect_and_save_exchanges(paths: &DataPaths) -> anyhow::Result<usize> {
    let output_path = paths.data_dir().join("yahoo_exchanges.json");
    let manager = StateManager::new(paths.integrity_dir()).await?;
    let step_name = "exchange_collection_complete";
    if manager.is_step_valid(step_name).await? {
        logger::log_info("  Exchange collection already completed and valid").await;
        // Load and count exchanges
        if output_path.exists() {
            let content = fs::read_to_string(&output_path).await?;
            let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
            logger::log_info(&format!("  ✓ Found {} valid exchanges", exchanges.len())).await;
            return Ok(exchanges.len());
        }
    }
    let entry = create_exchange_collection_state_entry(&manager, &output_path, step_name).await?;
    logger::log_info("Collecting exchange information from company directories...").await;
    let corporate_dir = paths.corporate_dir();
    if !corporate_dir.exists() {
        logger::log_warn("  Corporate directory does not exist").await;
        return Ok(0);
    }
    // Load FX rates for currency conversion
    logger::log_info("Loading FX rates for currency conversion...").await;
    let fx_cache = FxRateCache::new(paths).await?;
    // Map of exchange code -> ExchangeInfo
    let mut exchanges: HashMap<String, ExchangeInfo> = HashMap::new();
    let mut entries = fs::read_dir(&corporate_dir).await?;
    let mut processed_count = 0;
    let mut skipped_count = 0;
    while let Some(entry) = entries.next_entry().await? {
        let company_path = entry.path();
        if !company_path.is_dir() {
            continue;
        }
        let company_name = match company_path.file_name().and_then(|n| n.to_str()) {
            Some(name) => name.to_string(),
            None => {
                skipped_count += 1;
                continue;
            }
        };
        // Read core/data.jsonl
        let core_data_path = company_path.join("core").join("data.jsonl");
        if !core_data_path.exists() {
            skipped_count += 1;
            continue;
        }
        // Parse core data
        match extract_exchange_info(&core_data_path, &company_name).await {
            Ok(Some((exchange_code, exchange_name, currency, currency_symbol, delay, market_cap))) => {
                // Convert market cap to USD
                let market_cap_usd = fx_cache.to_usd(market_cap, &currency);
                // Add or update exchange entry
                exchanges
                    .entry(exchange_code.clone())
                    .and_modify(|info| {
                        // Add company to existing exchange and sum market caps
                        info.companies.push(company_name.clone());
                        info.total_market_cap = info.total_market_cap.saturating_add(market_cap);
                        info.total_market_cap_usd += market_cap_usd;
                    })
                    .or_insert_with(|| {
                        // Create new exchange entry
                        ExchangeInfo {
                            exchange_name,
                            currency,
                            currency_symbol,
                            exchange_data_delayed_by: delay,
                            total_market_cap: market_cap,
                            total_market_cap_usd: market_cap_usd,
                            companies: vec![company_name.clone()],
                        }
                    });
                processed_count += 1;
            }
            Ok(None) => {
                // No exchange data found
                skipped_count += 1;
            }
            Err(e) => {
                logger::log_warn(&format!(
                    "  Failed to parse exchange data for {}: {}",
                    company_name, e
                )).await;
                skipped_count += 1;
            }
        }
        // Progress logging every 100 companies
        if (processed_count + skipped_count) % 100 == 0 {
            logger::log_info(&format!(
                "  Progress: {} companies processed, {} skipped",
                processed_count, skipped_count
            )).await;
        }
    }
    logger::log_info(&format!(
        "  ✓ Collected data from {} companies ({} skipped)",
        processed_count, skipped_count
    )).await;
    logger::log_info(&format!(
        "  ✓ Found {} unique exchanges",
        exchanges.len()
    )).await;
    // Sort companies within each exchange for consistency
    for exchange_info in exchanges.values_mut() {
        exchange_info.companies.sort();
    }
    // Save to yahoo_exchanges.json
    save_exchanges_json(&output_path, &exchanges).await?;
    logger::log_info(&format!(
        "  ✓ Saved exchange mapping to {}",
        output_path.display()
    )).await;
    manager.mark_valid(entry).await?;
    logger::log_info("  ✓ Exchange collection marked as complete with integrity tracking").await;
    // Print summary statistics
    print_exchange_statistics(&exchanges, &fx_cache).await;
    Ok(exchanges.len())
 }
 /// Track exchange collection completion with content hash verification
 async fn create_exchange_collection_state_entry(
    manager: &StateManager,
    output_path: &std::path::Path,
    step_name: &str,
 ) -> anyhow::Result<StateEntry> {
    // Create content reference for the output file
    let content_reference = file_reference(output_path);
    // Track completion with:
    // - Content reference: The yahoo_exchanges.json file
    // - Data stage: Data (7-day TTL by default)
    // - Dependencies: None (this is a collection step, not dependent on other tracked steps)
    //   Note: In practice, it depends on core data, but we track the output file
    //   which will change if core data changes, so explicit dependency not needed
    Ok(manager.create_entry(
        step_name.to_string(),
        content_reference,
        DataStage::Data,
    ).await?)
 }
 /// Extract exchange information from a company's core data file
 async fn extract_exchange_info(
    core_data_path: &std::path::Path,
    company_name: &str,
 ) -> anyhow::Result<Option<(String, String, String, String, i64, u64)>> {
    let content = fs::read_to_string(core_data_path).await?;
    // Parse JSONL - should be single line
    for line in content.lines() {
        if line.trim().is_empty() {
            continue;
        }
        match serde_json::from_str::<CompanyCoreData>(line) {
            Ok(data) => {
                // Extract from modules.price
                let price_module = match data.modules.and_then(|m| m.price) {
                    Some(p) => p,
                    None => return Ok(None),
                };
                // Extract required fields
                let exchange = match price_module.exchange {
                    Some(e) if !e.is_empty() => e,
                    _ => return Ok(None),
                };
                // Filter out invalid placeholder exchange codes
                if exchange == "CCC" {
                    return Ok(None);
                }
                let exchange_name = price_module.exchange_name.unwrap_or_else(|| exchange.clone());
                let currency = price_module.currency.unwrap_or_else(|| "USD".to_string());
                let currency_symbol = price_module.currency_symbol.unwrap_or_else(|| "$".to_string());
                let delay = price_module.exchange_data_delayed_by.unwrap_or(0);
                let market_cap = price_module
                    .market_cap
                    .and_then(|mc| mc.raw)
                    .unwrap_or(0);
                return Ok(Some((
                    exchange,
                    exchange_name,
                    currency,
                    currency_symbol,
                    delay,
                    market_cap,
                )));
            }
            Err(e) => {
                // Try to parse as generic JSON to check if exchange field exists in modules.price
                if let Ok(json) = serde_json::from_str::<serde_json::Value>(line) {
                    // Try to access modules.price.exchange
                    if let Some(price) = json.get("modules").and_then(|m| m.get("price")) {
                        if let Some(exchange) = price.get("exchange").and_then(|v| v.as_str()) {
                            if !exchange.is_empty() && exchange != "CCC" {
                                let exchange_name = price
                                    .get("exchangeName")
                                    .and_then(|v| v.as_str())
                                    .unwrap_or(exchange)
                                    .to_string();
                                let currency = price
                                    .get("currency")
                                    .and_then(|v| v.as_str())
                                    .unwrap_or("USD")
                                    .to_string();
                                let currency_symbol = price
                                    .get("currencySymbol")
                                    .and_then(|v| v.as_str())
                                    .unwrap_or("$")
                                    .to_string();
                                let delay = price
                                    .get("exchangeDataDelayedBy")
                                    .and_then(|v| v.as_i64())
                                    .unwrap_or(0);
                                let market_cap = price
                                    .get("marketCap")
                                    .and_then(|mc| mc.get("raw"))
                                    .and_then(|v| v.as_u64())
                                    .unwrap_or(0);
                                return Ok(Some((
                                    exchange.to_string(),
                                    exchange_name,
                                    currency,
                                    currency_symbol,
                                    delay,
                                    market_cap,
                                )));
                            }
                        }
                    }
                }
                return Err(anyhow::anyhow!(
                    "Failed to parse core data for {}: {}",
                    company_name,
                    e
                ));
            }
        }
    }
    Ok(None)
 }
 /// Save exchanges map to JSON file with fsync
 async fn save_exchanges_json(
    path: &std::path::Path,
    exchanges: &HashMap<String, ExchangeInfo>,
 ) -> anyhow::Result<()> {
    // Create sorted output for consistency
    let mut sorted_exchanges: Vec<_> = exchanges.iter().collect();
    sorted_exchanges.sort_by_key(|(code, _)| code.as_str());
    let exchanges_map: HashMap<String, ExchangeInfo> = sorted_exchanges
        .into_iter()
        .map(|(k, v)| (k.clone(), v.clone()))
        .collect();
    // Serialize with pretty printing
    let json_content = serde_json::to_string_pretty(&exchanges_map)?;
    // Write to temporary file first (atomic write pattern)
    let tmp_path = path.with_extension("json.tmp");
    let mut file = fs::File::create(&tmp_path).await?;
    file.write_all(json_content.as_bytes()).await?;
    file.write_all(b"\n").await?;
    file.flush().await?;
    file.sync_all().await?;
    // Atomic rename
    fs::rename(&tmp_path, path).await?;
    Ok(())
 }
 /// Format market cap as a human-readable string
 fn format_market_cap(market_cap: f64) -> String {
    if market_cap >= 1_000_000_000_000.0 {
        format!("{:.2}T", market_cap / 1_000_000_000_000.0)
    } else if market_cap >= 1_000_000_000.0 {
        format!("{:.2}B", market_cap / 1_000_000_000.0)
    } else if market_cap >= 1_000_000.0 {
        format!("{:.2}M", market_cap / 1_000_000.0)
    } else if market_cap >= 1_000.0 {
        format!("{:.2}K", market_cap / 1_000.0)
    } else {
        format!("{:.2}", market_cap)
    }
 }
 /// Print statistics about collected exchanges
 async fn print_exchange_statistics(exchanges: &HashMap<String, ExchangeInfo>, fx_cache: &FxRateCache) {
    logger::log_info("Exchange Statistics (sorted by USD market cap):").await;
    // Sort by total market cap in USD (descending)
    let mut exchange_list: Vec<_> = exchanges.iter().collect();
    exchange_list.sort_by(|a, b| {
        b.1.total_market_cap_usd
            .partial_cmp(&a.1.total_market_cap_usd)
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    // Print top 20 exchanges by total market cap (USD)
    logger::log_info("  Top 20 exchanges by total market cap (USD):").await;
    for (i, (code, info)) in exchange_list.iter().take(20).enumerate() {
        let (normalized_currency, factor) = normalize_currency(&info.currency);
        let fx_rate = fx_cache.get_rate(&info.currency);
        let fx_info = match fx_rate {
            Some(rate) => {
                if factor > 1.0 {
                    // Show conversion for pence/cents
                    format!(" (1 {} = {} {}, {} {} = 1 {})", 
                        normalized_currency, 
                        format!("{:.4}", rate),
                        "USD",
                        factor as i32,
                        info.currency,
                        normalized_currency)
                } else {
                    format!(" (1 USD = {:.4} {})", rate, info.currency)
                }
            }
            None => format!(" (using fallback rate for {})", info.currency),
        };
        logger::log_info(&format!(
            "    {}. {} ({}) - ${} USD ({}{} {}) - {} companies{}",
            i + 1,
            info.exchange_name,
            code,
            format_market_cap(info.total_market_cap_usd),
            info.currency_symbol,
            format_market_cap(info.total_market_cap as f64),
            info.currency,
            info.companies.len(),
            if info.currency != "USD" { &fx_info } else { "" }
        )).await;
    }
    // Count by currency
    let mut currency_counts: HashMap<String, usize> = HashMap::new();
    let mut currency_market_caps: HashMap<String, f64> = HashMap::new();
    for info in exchanges.values() {
        *currency_counts.entry(info.currency.clone()).or_insert(0) += info.companies.len();
        *currency_market_caps.entry(info.currency.clone()).or_insert(0.0) += info.total_market_cap_usd;
    }
    let mut currencies: Vec<_> = currency_counts.iter().collect();
    currencies.sort_by(|a, b| {
        currency_market_caps.get(b.0)
            .unwrap_or(&0.0)
            .partial_cmp(currency_market_caps.get(a.0).unwrap_or(&0.0))
            .unwrap_or(std::cmp::Ordering::Equal)
    });
    logger::log_info("  Market cap by currency (USD equivalent):").await;
    for (currency, count) in currencies.iter().take(10) {
        let market_cap_usd = currency_market_caps.get(*currency).unwrap_or(&0.0);
        let (normalized_currency, factor) = normalize_currency(currency);
        let fx_rate = fx_cache.get_rate(currency);
        let fx_info = match fx_rate {
            Some(rate) => {
                if factor > 1.0 {
                    format!(" (1 {} = {:.4} USD, {} {} = 1 {})", 
                        normalized_currency, rate, factor as i32, currency, normalized_currency)
                } else {
                    format!(" (1 USD = {:.4} {})", rate, currency)
                }
            }
            None => format!(" (fallback)"),
        };
        logger::log_info(&format!(
            "    {}: {} companies, ${} USD{}",
            currency,
            count,
            format_market_cap(*market_cap_usd),
            if *currency != "USD" { &fx_info } else { "" }
        )).await;
    }
    // Delay statistics
    let delayed_exchanges: Vec<_> = exchanges
        .iter()
        .filter(|(_, info)| info.exchange_data_delayed_by > 0)
        .collect();
    if !delayed_exchanges.is_empty() {
        logger::log_info(&format!(
            "  Exchanges with data delay: {} (out of {})",
            delayed_exchanges.len(),
            exchanges.len()
        )).await;
    }
    // Total market cap across all exchanges (in USD)
    let total_market_cap_usd: f64 = exchanges.values()
        .map(|info| info.total_market_cap_usd)
        .sum();
    logger::log_info(&format!(
        "  Total market cap across all exchanges: ${} USD",
        format_market_cap(total_market_cap_usd)
    )).await;
 }
 /// Get exchange information for a specific exchange code
 pub async fn get_exchange_info(
    paths: &DataPaths,
    exchange_code: &str,
 ) -> anyhow::Result<Option<ExchangeInfo>> {
    let exchanges_path = paths.data_dir().join("yahoo_exchanges.json");
    if !exchanges_path.exists() {
        return Ok(None);
    }
    let content = fs::read_to_string(&exchanges_path).await?;
    let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
    Ok(exchanges.get(exchange_code).cloned())
 }
 /// List all available exchanges
 pub async fn list_all_exchanges(paths: &DataPaths) -> anyhow::Result<Vec<(String, ExchangeInfo)>> {
    let exchanges_path = paths.data_dir().join("yahoo_exchanges.json");
    if !exchanges_path.exists() {
        return Ok(Vec::new());
    }
    let content = fs::read_to_string(&exchanges_path).await?;
    let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
    let mut exchange_list: Vec<_> = exchanges.into_iter().collect();
    exchange_list.sort_by(|a, b| a.0.cmp(&b.0));
    Ok(exchange_list)
 }
--- a/src/corporate/fx.rs
+++ b/src/corporate/fx.rs
@@ -1,51 +0,0 @@
 // src/corporate/fx.rs
 use std::collections::HashMap;
 use reqwest;
 use serde_json::Value;
 use tokio::fs;
 use std::path::Path;
 static FX_CACHE_PATH: &str = "fx_rates.json";
 pub async fn get_usd_rate(currency: &str) -> anyhow::Result<f64> {
    if currency == "USD" {
        return Ok(1.0);
    }
    let mut cache: HashMap<String, (f64, String)> = if Path::new(FX_CACHE_PATH).exists() {
        let content = fs::read_to_string(FX_CACHE_PATH).await?;
        serde_json::from_str(&content).unwrap_or_default()
    } else {
        HashMap::new()
    };
    let today = chrono::Local::now().format("%Y-%m-%d").to_string();
    if let Some((rate, date)) = cache.get(currency) {
        if date == &today {
            return Ok(*rate);
        }
    }
    let symbol = format!("{}USD=X", currency);
    let url = format!("https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d", symbol);
    let json: Value = reqwest::Client::new()
        .get(&url)
        .header("User-Agent", "Mozilla/5.0")
        .send()
        .await?
        .json()
        .await?;
    let close = json["chart"]["result"][0]["meta"]["regularMarketPrice"]
        .as_f64()
        .or_else(|| json["chart"]["result"][0]["indicators"]["quote"][0]["close"][0].as_f64())
        .unwrap_or(1.0);
    let rate = if currency == "JPY" || currency == "KRW" { close } else { 1.0 / close }; // inverse pairs
    cache.insert(currency.to_string(), (rate, today.clone()));
    let _ = fs::write(FX_CACHE_PATH, serde_json::to_string_pretty(&cache)?).await;
    Ok(rate)
 }
--- a/src/corporate/helpers.rs
+++ b/src/corporate/helpers.rs
@@ -1,22 +1,25 @@
 // src/corporate/helpers.rs
 use super::types::*;
 use crate::util::directories::DataPaths;
 use chrono::{Local, NaiveDate};
 use std::collections::{HashMap, HashSet};
 use rand::rngs::StdRng;
 use rand::prelude::{Rng, SeedableRng, IndexedRandom};
 use tokio::fs;
 use anyhow::{anyhow};
-pub fn event_key(e: &CompanyEvent) -> String {
+pub fn event_key(e: &CompanyEventData) -> String {
    format!("{}|{}|{}", e.ticker, e.date, e.time)
 }
-pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Vec<CompanyEventChange> {
+pub fn detect_changes(old: &CompanyEventData, new: &CompanyEventData, today: &str) -> Vec<CompanyEventChangeData> {
    let mut changes = Vec::new();
    let ts = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
    if new.date.as_str() <= today { return changes; }
    if old.time != new.time {
-        changes.push(CompanyEventChange {
+        changes.push(CompanyEventChangeData {
            ticker: new.ticker.clone(),
            date: new.date.clone(),
            field_changed: "time".to_string(),
@@ -27,7 +30,7 @@ pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Ve
    }
    if old.eps_forecast != new.eps_forecast {
-        changes.push(CompanyEventChange {
+        changes.push(CompanyEventChangeData {
            ticker: new.ticker.clone(),
            date: new.date.clone(),
            field_changed: "eps_forecast".to_string(),
@@ -38,7 +41,7 @@ pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Ve
    }
    if old.eps_actual != new.eps_actual {
-        changes.push(CompanyEventChange {
+        changes.push(CompanyEventChangeData {
            ticker: new.ticker.clone(),
            date: new.date.clone(),
            field_changed: "eps_actual".to_string(),
@@ -53,14 +56,6 @@ pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Ve
    changes
 }
 pub fn price_key(p: &CompanyPrice) -> String {
    if p.time.is_empty() {
        format!("{}|{}", p.ticker, p.date)
    } else {
        format!("{}|{}|{}", p.ticker, p.date, p.time)
    }
 }
 pub fn parse_float(s: &str) -> Option<f64> {
    s.replace("--", "").replace(",", "").parse::<f64>().ok()
 }
@@ -74,7 +69,7 @@ pub fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
 /// Send-safe random range
 pub fn random_range(min: u64, max: u64) -> u64 {
    let mut rng = StdRng::from_rng(&mut rand::rng());
-    rng.gen_range(min..max)
+    rng.random_range(min..max)
 }
 /// Send-safe random choice
@@ -82,3 +77,108 @@ pub fn choose_random<T: Clone>(items: &[T]) -> T {
    let mut rng = StdRng::from_rng(&mut rand::rng());
    items.choose(&mut rng).unwrap().clone()
 }
 /// Extract first valid Yahoo ticker from company
 pub fn extract_first_yahoo_ticker(company: &CompanyData) -> Option<String> {
    if let Some(isin_tickers_map) = &company.isin_tickers_map {
        for tickers in isin_tickers_map.values() {
            for ticker in tickers {
                if ticker.starts_with("YAHOO:") 
                    && ticker != "YAHOO:NO_RESULTS" 
                    && ticker != "YAHOO:ERROR" 
                {
                    return Some(ticker.trim_start_matches("YAHOO:").to_string());
                }
            }
        }
    }
    None
 }
 /// Sanitize company name for file system use
 pub fn sanitize_company_name(name: &str) -> String {
    name.replace("/", "_")
        .replace("\\", "_")
        .replace(":", "_")
        .replace("*", "_")
        .replace("?", "_")
        .replace("\"", "_")
        .replace("<", "_")
        .replace(">", "_")
        .replace("|", "_")
 }
 /// Load companies from JSONL file
 pub async fn load_companies_from_jsonl(
    path: &std::path::Path
 ) -> anyhow::Result<Vec<CompanyData>> {
    let content = tokio::fs::read_to_string(path).await?;
    let mut companies = Vec::new();
    for line in content.lines() {
        if line.trim().is_empty() {
            continue;
        }
        if let Ok(company) = serde_json::from_str::<CompanyData>(line) {
            companies.push(company);
        }
    }
    Ok(companies)
 }
 pub async fn find_most_recent_figi_date_dir(paths: &DataPaths) -> anyhow::Result<Option<std::path::PathBuf>> {
    let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
    if !map_cache_dir.exists() {
        return Ok(None);
    }
    let mut entries = tokio::fs::read_dir(&map_cache_dir).await?;
    let mut dates = Vec::new();
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
                    dates.push((name.to_string(), path));
                }
            }
        }
    }
    if dates.is_empty() {
        return Ok(None);
    }
    dates.sort_by(|a, b| b.0.cmp(&a.0));
    Ok(Some(dates[0].1.clone()))
 }
 pub async fn determine_gleif_date(
    gleif_date: Option<&str>,
    paths: &DataPaths,
 ) -> anyhow::Result<String> {
    if let Some(d) = gleif_date {
        return Ok(d.to_string());
    }
    let gleif_dir = paths.cache_gleif_dir();
    let mut entries = fs::read_dir(gleif_dir).await?;
    let mut dates = Vec::new();
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
                    dates.push(name.to_string());
                }
            }
        }
    }
    dates.sort();
    dates.last().cloned().ok_or_else(|| anyhow!("No GLEIF date found"))
 }
--- a/src/corporate/mod.rs
+++ b/src/corporate/mod.rs
@@ -2,14 +2,20 @@
 pub mod types;
 pub mod scraper;
 pub mod storage;
 pub mod update;
 pub mod helpers;
-pub mod aggregation;
+pub mod update_openfigi;
-pub mod fx;
+pub mod yahoo_company_extraction;
 pub mod openfigi;
 pub mod yahoo;
 pub mod update_parallel;
 pub mod page_validation;
-pub mod atomic_writer;
+pub mod checkpoint_helpers;
 // Corporate update modules
 pub mod update;
 pub mod update_companies;
 pub mod update_companies_cleanse;
 pub mod update_companies_enrich;
 pub mod collect_exchanges;
 pub mod bond_processing;
 pub mod option_processing;
 pub use update::run_full_update;
--- a/src/corporate/openfigi.rs
+++ b/src/corporate/openfigi.rs
--- a/src/corporate/option_processing.rs
+++ b/src/corporate/option_processing.rs
@@ -0,0 +1,54 @@
 /// Parse strike price from option ticker (e.g., "AAPL 150 CALL" -> 150.0)
 pub fn parse_strike_from_ticker(ticker: &str) -> Option<f64> {
    let parts: Vec<&str> = ticker.split_whitespace().collect();
    for (i, part) in parts.iter().enumerate() {
        if let Ok(strike) = part.parse::<f64>() {
            // Check if next word is CALL/PUT to confirm this is strike
            if i + 1 < parts.len() && (parts[i + 1].to_uppercase() == "CALL" || parts[i + 1].to_uppercase() == "PUT") {
                return Some(strike);
            }
        }
    }
    None
 }
 /// Parse expiration date from option ticker (e.g., "AAPL 150 CALL 01/17/25" -> timestamp)
 pub fn parse_expiration_from_ticker(ticker: &str) -> Option<i64> {
    let parts: Vec<&str> = ticker.split_whitespace().collect();
    for part in parts {
        // Look for date pattern MM/DD/YY
        if part.contains('/') && part.len() >= 8 {
            if let Ok(date) = chrono::NaiveDate::parse_from_str(part, "%m/%d/%y") {
                return Some(date.and_hms_opt(16, 0, 0)?.and_utc().timestamp());
            }
        }
    }
    None
 }
 /// Parse option name to extract underlying company, issuer, and option type
 /// 
 /// Examples:
 /// - "December 25 Calls on ALPHA GA" -> ("ALPHA GA", None, "call")
 /// - "January 26 Puts on TESLA INC" -> ("TESLA INC", None, "put")
 pub fn parse_option_name(name: &str) -> (String, Option<String>, String) {
    let name_upper = name.to_uppercase();
    // Detect option type
    let option_type = if name_upper.contains("CALL") {
        "call".to_string()
    } else if name_upper.contains("PUT") {
        "put".to_string()
    } else {
        "unknown".to_string()
    };
    // Try to extract underlying after "on"
    if let Some(pos) = name_upper.find(" ON ") {
        let underlying = name[pos + 4..].trim().to_string();
        return (underlying, None, option_type);
    }
    // Fallback: return entire name
    (name.to_string(), None, option_type)
 }
--- a/src/corporate/scraper.rs
+++ b/src/corporate/scraper.rs
@@ -1,179 +1,13 @@
 // src/corporate/scraper.rs
-use super::{types::*};
+use crate::{util::directories::DataPaths, util::logger};
 //use crate::corporate::openfigi::OpenFigiClient;
 use crate::{scraper::webdriver::*, util::directories::DataPaths, util::logger};
 use fantoccini::{Client};
 use scraper::{Html, Selector};
 use chrono::{DateTime, Duration, NaiveDate, Utc};
 use tokio::{time::{Duration as TokioDuration, sleep}};
 use reqwest::Client as HttpClient;
 use serde_json::{json, Value};
 use zip::ZipArchive;
 use std::{collections::HashMap};
 use std::io::{Read};
 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
 fn parse_price(v: Option<&Value>) -> f64 {
    v.and_then(|x| x.as_str())
        .and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
        .or_else(|| v.and_then(|x| x.as_f64()))
        .unwrap_or(0.0)
 }
 fn parse_volume(v: Option<&Value>) -> u64 {
    v.and_then(|x| x.as_str())
        .and_then(|s| s.replace(',', "").parse::<u64>().ok())
        .or_else(|| v.and_then(|x| x.as_u64()))
        .unwrap_or(0)
 }
 pub async fn fetch_daily_price_history(
    ticker: &str,
    start_str: &str,
    end_str: &str,
 ) -> anyhow::Result<Vec<CompanyPrice>> {
    let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
    let end   = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1);
    let mut all_prices = Vec::new();
    let mut current = start;
    while current < end {
        let chunk_end = current + Duration::days(730);
        let actual_end = chunk_end.min(end);
        let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
        let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
        println!("    Fetching {ticker} {} → {}", current, actual_end - Duration::days(1));
        let url = format!(
            "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true"
        );
        let json: Value = HttpClient::new()
            .get(&url)
            .header("User-Agent", USER_AGENT)
            .send()
            .await?
            .json()
            .await?;
        let result = &json["chart"]["result"][0];
        let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
        let quote = &result["indicators"]["quote"][0];
        let meta = &result["meta"];
        let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
        let opens = quote["open"].as_array();
        let highs = quote["high"].as_array();
        let lows  = quote["low"].as_array();
        let closes = quote["close"].as_array();
        let adj_closes = result["indicators"]["adjclose"][0]["adjclose"].as_array()
            .or_else(|| closes);
        let volumes = quote["volume"].as_array();
        for (i, ts_val) in timestamps.iter().enumerate() {
            let ts = ts_val.as_i64().unwrap_or(0);
            let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
            let date_str = dt.format("%Y-%m-%d").to_string();
            if date_str < start_str.to_string() || date_str > end_str.to_string() {
                continue;
            }
            let open = parse_price(opens.and_then(|a| a.get(i)));
            let high = parse_price(highs.and_then(|a| a.get(i)));
            let low  = parse_price(lows.and_then(|a| a.get(i)));
            let close = parse_price(closes.and_then(|a| a.get(i)));
            let adj_close = parse_price(adj_closes.and_then(|a| a.get(i)));
            let volume = parse_volume(volumes.and_then(|a| a.get(i)));
            all_prices.push(CompanyPrice {
                ticker: ticker.to_string(),
                date: date_str,
                time: "".to_string(),
                open,
                high,
                low,
                close,
                adj_close,
                volume,
                currency: currency.clone(),
            });
        }
        sleep(TokioDuration::from_millis(200)).await;
        current = actual_end;
    }
    all_prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
    all_prices.dedup_by(|a, b| a.date == b.date && a.time == b.time);
    println!("    Got {} daily bars for {ticker}", all_prices.len());
    Ok(all_prices)
 }
 pub async fn fetch_price_history_5min(
    ticker: &str,
    _start: &str,
    _end: &str,
 ) -> anyhow::Result<Vec<CompanyPrice>> {
    let now = Utc::now().timestamp();
    let period1 = now - 5184000;
    let period2 = now;
    let url = format!(
        "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true"
    );
    let json: Value = HttpClient::new()
        .get(&url)
        .header("User-Agent", USER_AGENT)
        .send()
        .await?
        .json()
        .await?;
    let result = &json["chart"]["result"][0];
    let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
    let quote = &result["indicators"]["quote"][0];
    let meta = &result["meta"];
    let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
    let mut prices = Vec::new();
    for (i, ts_val) in timestamps.iter().enumerate() {
        let ts = ts_val.as_i64().unwrap_or(0);
        let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
        let date_str = dt.format("%Y-%m-%d").to_string();
        let time_str = dt.format("%H:%M:%S").to_string();
        let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i)));
        let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i)));
        let low  = parse_price(quote["low"].as_array().and_then(|a| a.get(i)));
        let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i)));
        let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i)));
        prices.push(CompanyPrice {
            ticker: ticker.to_string(),
            date: date_str,
            time: time_str,
            open,
            high,
            low,
            close,
            adj_close: close,
            volume,
            currency: currency.clone(),
        });
    }
    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
    Ok(prices)
 }
 /// Fetch the URL of the latest ISIN↔LEI mapping CSV from GLEIF
 /// Overengineered; we could just use the static URL, but this shows how to scrape if needed
 pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow::Result<String> {
--- a/src/corporate/storage.rs
+++ b/src/corporate/storage.rs
@@ -1,15 +1,11 @@
 // src/corporate/storage.rs
 use super::{types::*, helpers::*};
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
 use chrono::{Datelike, NaiveDate};
 use std::collections::HashMap;
 use std::path::{PathBuf, Path};
 const BATCH_SIZE: usize = 500;
 /// Lightweight index entry - only metadata, no full event data
 #[derive(Debug, Clone)]
@@ -20,258 +16,6 @@ pub struct EventIndex {
    pub file_path: PathBuf,
 }
 /// Build index of all events without loading them into memory
 pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventIndex>> {
    let dir = paths.corporate_events_dir();
    if !dir.exists() {
        logger::log_info("Corporate Storage: No events directory found").await;
        return Ok(Vec::new());
    }
    let mut index = Vec::new();
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.extension().and_then(|s| s.to_str()) == Some("json") {
            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
            if name.starts_with("events_") && name.len() == 17 {
                let content = fs::read_to_string(&path).await?;
                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
                for event in events {
                    index.push(EventIndex {
                        key: event_key(&event),
                        ticker: event.ticker.clone(),
                        date: event.date.clone(),
                        file_path: path.clone(),
                    });
                }
            }
        }
    }
    logger::log_info(&format!("Corporate Storage: Built index with {} entries", index.len())).await;
    Ok(index)
 }
 /// Load specific event by key (only loads its file)
 pub async fn lookup_event_by_key(
    key: &str,
    index: &[EventIndex]
 ) -> anyhow::Result<Option<CompanyEvent>> {
    let entry = index.iter().find(|e| e.key == key);
    if let Some(entry) = entry {
        let content = fs::read_to_string(&entry.file_path).await?;
        let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
        Ok(events.into_iter().find(|e| event_key(e) == key))
    } else {
        Ok(None)
    }
 }
 /// Stream events file by file with callback
 pub async fn stream_events_with_callback<F>(
    paths: &DataPaths,
    mut callback: F
 ) -> anyhow::Result<usize>
 where
    F: FnMut(CompanyEvent) -> anyhow::Result<()>,
 {
    let dir = paths.corporate_events_dir();
    if !dir.exists() {
        return Ok(0);
    }
    let mut total = 0;
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.extension().and_then(|s| s.to_str()) == Some("json") {
            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
            if name.starts_with("events_") {
                let content = fs::read_to_string(&path).await?;
                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
                for event in events {
                    callback(event)?;
                    total += 1;
                }
                tokio::task::yield_now().await;
            }
        }
    }
    logger::log_info(&format!("Corporate Storage: Streamed {} events", total)).await;
    Ok(total)
 }
 /// Save events organized by month (accepts Vec, not HashMap)
 pub async fn save_optimized_events(
    paths: &DataPaths, 
    events: Vec<CompanyEvent>
 ) -> anyhow::Result<()> {
    let dir = paths.corporate_events_dir();
    fs::create_dir_all(dir).await?;
    logger::log_info("Corporate Storage: Removing old event files...").await;
    let mut removed_count = 0;
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
        if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
            fs::remove_file(&path).await?;
            removed_count += 1;
        }
    }
    logger::log_info(&format!("Corporate Storage: Removed {} old files", removed_count)).await;
    let total_events = events.len();
    let mut sorted = events;
    sorted.sort_by(|a, b| {
        a.ticker.cmp(&b.ticker).then(a.date.cmp(&b.date))
    });
    let mut by_month: HashMap<String, Vec<CompanyEvent>> = HashMap::new();
    for chunk in sorted.chunks(BATCH_SIZE) {
        for e in chunk {
            if let Ok(d) = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d") {
                let key = format!("{}-{:02}", d.year(), d.month());
                by_month.entry(key).or_default().push(e.clone());
            }
        }
        tokio::task::yield_now().await;
    }
    for (month, list) in by_month {
        let path = dir.join(format!("events_{}.json", month));
        fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
        logger::log_info(&format!("Saved {} events for month {}", list.len(), month)).await;
    }
    logger::log_info(&format!("Saved {} total events", total_events)).await;
    Ok(())
 }
 pub async fn save_changes(
    paths: &DataPaths, 
    changes: &[CompanyEventChange]
 ) -> anyhow::Result<()> {
    if changes.is_empty() { 
        logger::log_info("Corporate Storage: No changes to save").await;
        return Ok(()); 
    }
    let dir = paths.corporate_changes_dir();
    fs::create_dir_all(dir).await?;
    let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
    for c in changes {
        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
            let key = format!("{}-{:02}", d.year(), d.month());
            by_month.entry(key).or_default().push(c.clone());
        }
    }
    for (month, list) in by_month {
        let path = dir.join(format!("changes_{}.json", month));
        let mut all = if path.exists() {
            let s = fs::read_to_string(&path).await?;
            serde_json::from_str(&s).unwrap_or_default()
        } else { 
            vec![] 
        };
        all.extend(list.clone());
        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
    }
    Ok(())
 }
 pub async fn save_prices_for_ticker(
    paths: &DataPaths, 
    ticker: &str, 
    timeframe: &str, 
    mut prices: Vec<CompanyPrice>
 ) -> anyhow::Result<()> {
    let base_dir = paths.corporate_prices_dir();
    let company_dir = base_dir.join(ticker.replace(".", "_"));
    let timeframe_dir = company_dir.join(timeframe);
    fs::create_dir_all(&timeframe_dir).await?;
    let path = timeframe_dir.join("prices.json");
    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
    fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
    Ok(())
 }
 pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf {
    paths.corporate_prices_dir().join(lei)
 }
 pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> {
    let base = get_company_dir(paths, isin);
    let paths_to_create = [
        base.clone(),
        base.join("5min"),
        base.join("daily"),
        base.join("aggregated").join("5min"),
        base.join("aggregated").join("daily"),
    ];
    for p in paths_to_create {
        fs::create_dir_all(&p).await?;
    }
    Ok(())
 }
 pub async fn save_available_exchanges(
    paths: &DataPaths, 
    isin: &str, 
    exchanges: Vec<AvailableExchange>
 ) -> anyhow::Result<()> {
    let dir = get_company_dir(paths, isin);
    fs::create_dir_all(&dir).await?;
    let path = dir.join("available_exchanges.json");
    fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
    Ok(())
 }
 pub async fn load_available_exchanges(
    paths: &DataPaths, 
    lei: &str
 ) -> anyhow::Result<Vec<AvailableExchange>> {
    let path = get_company_dir(paths, lei).join("available_exchanges.json");
    if path.exists() {
        let content = fs::read_to_string(&path).await?;
        Ok(serde_json::from_str(&content)?)
    } else {
        Ok(vec![])
    }
 }
 pub async fn save_prices_by_source(
    paths: &DataPaths,
    lei: &str,
    source_ticker: &str,
    timeframe: &str,
    prices: Vec<CompanyPrice>,
 ) -> anyhow::Result<()> {
    let source_safe = source_ticker.replace(".", "_").replace("/", "_");
    let dir = get_company_dir(paths, lei).join(timeframe).join(&source_safe);
    fs::create_dir_all(&dir).await?;
    let path = dir.join("prices.json");
    let mut prices = prices;
    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
    fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
    Ok(())
 }
 /// Stream companies to JSONL incrementally
 pub async fn save_companies_to_jsonl_streaming(
    paths: &DataPaths,
--- a/src/corporate/types.rs
+++ b/src/corporate/types.rs
@@ -2,42 +2,22 @@
 use std::collections::HashMap;
 use serde::{Deserialize, Serialize};
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyEvent {
+pub struct ChartData {
-    pub ticker: String,
+    pub symbol: String,
-    pub date: String,                    // YYYY-MM-DD
+    pub quotes: Vec<Quote>,
-    pub time: String,                    // "AMC", "BMO", "TAS", or ""
+    pub timestamp: i64,
    pub period: String,                  // "Q1 2025", "FY 2024"
    pub eps_forecast: Option<f64>,
    pub eps_actual: Option<f64>,
    pub revenue_forecast: Option<f64>,
    pub revenue_actual: Option<f64>,
    pub surprise_pct: Option<f64>,       // (actual - forecast) / |forecast|
    pub source: String,                  // "Yahoo"
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyPrice {
+pub struct Quote {
-    pub ticker: String,
+    pub timestamp: i64,
-    pub date: String,                    // YYYY-MM-DD
+    pub open: Option<f64>,
-    pub time: String,                    // HH:MM:SS for intraday, "" for daily
+    pub high: Option<f64>,
-    pub open: f64,
+    pub low: Option<f64>,
-    pub high: f64,
+    pub close: Option<f64>,
-    pub low: f64,
+    pub volume: Option<u64>,
-    pub close: f64,
+    pub adjusted_close: Option<f64>,
    pub adj_close: f64,
    pub volume: u64,
    pub currency: String,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CompanyEventChange {
    pub ticker: String,
    pub date: String,
    pub field_changed: String,           // "time", "eps_forecast", "eps_actual", "new_event"
    pub old_value: String,
    pub new_value: String,
    pub detected_at: String,
 }
 /// Figi Info based on API calls [https://www.openfigi.com/]
@@ -47,7 +27,7 @@ pub struct CompanyEventChange {
 /// # Comments
 /// Use Mapping the Object List onto Figi Properties
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FigiInfo {
+pub struct FigiData {
    pub isin: String,
    pub figi: String,
    pub name: String,
@@ -69,71 +49,144 @@ pub struct FigiInfo {
 /// Company Info
 /// # Attributes
-/// * Name as primary key (for one instition) -> might have to changed when first FigiInfo is coming in
+/// * Name as primary key (for one institution) -> might have to changed when first FigiInfo is coming in
 /// * ISIN as the most liquid / preferred traded security (used for fallback)
 /// * securities: Grouped by ISIN, filtered for Common Stock only
 /// * isin_tickers_map: Map of ISINs to their associated tickers across platforms
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyInfo{
+pub struct CompanyData{
    pub name: String,
    pub primary_isin: String,
-    pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
+    pub securities: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo>
    pub yahoo_company_data: Option<Vec<YahooCompanyData>>,
    pub isin_tickers_map: Option<HashMap<String, Vec<String>>>, // ISIN -> Tickers
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct YahooCompanyDetails {
+pub struct YahooCompanyData {
    pub ticker: String,
    pub sector: Option<String>,
    pub exchange: Option<String>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyCrossPlatformInfo {
+pub struct WarrantData {
-    pub name: String,
+    pub company_name: String, // key in CompanyData
-    pub isin_tickers_map: HashMap<String, Vec<String>>, // ISIN -> Tickers
+    pub warrants: HashMap<String, WarrantDetails>, // underlying company name -> Warrant
    pub sector: Option<String>,
    pub exchange: Option<String>,
 }
-/// Warrant Info
+/// Warrant Data
 /// 
-/// Information for Warrant securities fetched out of Name in FigiInfo         
+/// Information for Warrant securities fetched out of Name in FigiData        
 /// example1: "name": "VONTOBE-PW26 LEONARDO SPA",
 /// issued by VONTOBEL Put Warrant for underlying company LEONARDO SPA
 /// example2: "BAYER H-CW25 L'OREAL",
 /// other formats like only on company instead of two, underlying and issuing company are the same, leave issuer_company_name NULL
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct WarrantInfo {
+pub struct WarrantDetails {
-    pub underlying_company_name: String, // key in CompanyInfo, key for WarrantInfo
+    pub company_name: String, // key in CompanyData, key for WarrantDetails
-    pub issuer_company_name: Option<String>, // key in CompanyInfo
+    pub issuer_company_name: Option<String>, // key in CompanyData
    pub warrant_type: String, // "put" or "call"
-    pub warrants: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
+    pub warrants: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiData> (grouped by ISIN)
 }
-/// Option Info
+#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct OptionData {
    pub company_name: String, // key in CompanyData
    pub expiration_dates: Vec<i64>,
    pub strikes: Vec<f64>,
    pub option: Vec<OptionChain>,
    pub timestamp: i64,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct OptionChain {
    pub expiration_date: i64,
    pub calls: Vec<OptionContract>,
    pub puts: Vec<OptionContract>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct OptionContract {
    pub strike: f64,
    pub last_price: Option<f64>,
    pub bid: Option<f64>,
    pub ask: Option<f64>,
    pub volume: Option<u64>,
    pub open_interest: Option<u64>,
    pub implied_volatility: Option<f64>,
 }
 /// Bond parsed details from ticker/description
 /// 
-/// Information for Option securities fetched out of Name in FigiInfo         
+/// Parses bond information from ticker format:
-/// example1: "name": "December 25 Calls on ALPHA GA",
+/// Corporate: "WTFC 4.3 01/12/26 0003"
-/// issued by NULL Call Option for underlying company ALPHA GA
+/// Government: "SLOVAK 1.5225 05/10/28 4Y"
 /// other formats like only on company instead of two, underlying and issuing company are the same, leave issuer_company_name NULL
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OptionInfo {
+pub struct BondDetails {
-    pub underlying_company_name: String, // key in CompanyInfo, key for OptionInfo
+    pub coupon_rate: Option<f64>,        // 4.3, 1.5225
-    pub issuer_company_name: Option<String>, // key in CompanyInfo
+    pub maturity_date: Option<String>,   // "2026-01-12", "2028-05-10"
-    pub option_type: String, // "put" or "call"
+    pub is_floating: bool,               // true if "Float" in description
-    pub options: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
+    pub is_zero_coupon: bool,            // true if coupon is 0
    pub tenor_years: Option<u32>,        // Parsed from maturity or inferred
    pub series_identifier: Option<String>, // "0003", "4Y", "144A", "REGS", etc.
 }
 /// Corporate Bond Info
 /// 
 /// Information for corporate bonds grouped by issuer
 /// Example: "name": "LIBERTYVILLE BK & TRUST"
 /// ticker: "WTFC 4.3 01/12/26 0003"
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CorporateBondData {
    pub underlying_company_name: String,             // key - company name issuing the bond
    pub bonds: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
    #[serde(skip_serializing_if = "HashMap::is_empty", default)]
    pub bond_details: HashMap<String, BondDetails>, // ISIN -> parsed bond details
 }
 /// Government Bond Info
 /// 
 /// Information for government bonds grouped by issuer (country/municipality)
 /// Example: "name": "SLOVAK REPUBLIC"
 /// ticker: "SLOVAK 1.5225 05/10/28 4Y"
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct GovernmentBondData {
    pub issuer_name: String,             // key - government entity name
    pub issuer_type: String,             // "sovereign", "municipal", "state", "province", etc.
    pub bonds: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
    #[serde(skip_serializing_if = "HashMap::is_empty", default)]
    pub bond_details: HashMap<String, BondDetails>, // ISIN -> parsed bond details
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AvailableExchange {
+pub struct ExchangeData {
-    pub exchange_mic: String,
+    pub mic: String,
    pub ticker: String,
    pub has_daily: bool,
    pub has_5min: bool,
    pub last_successful_fetch: Option<String>, // YYYY-MM-DD
    #[serde(default)]
    pub currency: String,
-    #[serde(default)]
+}
-    pub discovered_at: Option<String>,   // When this exchange was first discovered
+
-    #[serde(default)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-    pub fetch_count: u32,                // How many times successfully fetched
+pub struct CompanyEventData {
    pub ticker: String,
    pub date: String,                    // YYYY-MM-DD
    pub time: String,                    // "AMC", "BMO", "TAS", or ""
    pub period: String,                  // "Q1 2025", "FY 2024"
    pub eps_forecast: Option<f64>,
    pub eps_actual: Option<f64>,
    pub revenue_forecast: Option<f64>,
    pub revenue_actual: Option<f64>,
    pub surprise_pct: Option<f64>,       // (actual - forecast) / |forecast|
    pub source: String,                  // "Yahoo"
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CompanyEventChangeData {
    pub ticker: String,
    pub date: String,
    pub field_changed: String,           // "time", "eps_forecast", "eps_actual", "new_event"
    pub old_value: String,
    pub new_value: String,
    pub detected_at: String,
 }
--- a/src/corporate/update.rs
+++ b/src/corporate/update.rs
@@ -1,26 +1,34 @@
-// src/corporate/update.rs - UPDATED WITH DATA INTEGRITY FIXES
+// src/corporate/update.rs
-use super::{scraper::*, storage::*, helpers::*, types::*, openfigi::*, yahoo::*};
+use super::{scraper::*, update_openfigi::*};
 use crate::config::Config;
-use crate::corporate::update_parallel::build_companies_jsonl_streaming_parallel;
+use crate::check_shutdown;
 use crate::corporate::update_companies::update_companies;
 use crate::corporate::update_companies_cleanse::{companies_yahoo_cleansed_low_profile, companies_yahoo_cleansed_no_data};
 use crate::corporate::update_companies_enrich::{enrich_companies_with_events, enrich_companies_with_chart, enrich_companies_with_option};
 use crate::corporate::collect_exchanges::collect_and_save_exchanges;
 use crate::economic::yahoo_update_forex::collect_fx_rates;
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;
 use crate::scraper::yahoo::{YahooClientPool};
 use crate::scraper::openfigi::load_figi_type_lists;
-use chrono::Local;
+use std::result::Result::Ok;
 use std::collections::HashMap;
 use std::sync::Arc;
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::{AtomicBool};
-/// UPDATED: Main corporate update entry point with shutdown awareness
+/// Main corporate update entry point with shutdown awareness
 pub async fn run_full_update(
-    _config: &Config, 
+    config: &Config, 
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<()> {
-    logger::log_info("=== Corporate Update (STREAMING MODE WITH DATA INTEGRITY) ===").await;
+    logger::log_info("=== Corporate Update ===").await;
    let paths = DataPaths::new(".")?;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 1: Downloading GLEIF CSV...").await;
    let gleif_csv_path = match download_isin_lei_csv().await? {
        Some(p) => {
@@ -33,22 +41,16 @@ pub async fn run_full_update(
        }
    };
-    if shutdown_flag.load(Ordering::SeqCst) {
+    check_shutdown!(shutdown_flag);
        logger::log_warn("Shutdown detected after GLEIF download").await;
        return Ok(());
    }
    logger::log_info("Step 2: Loading OpenFIGI metadata...").await;
-    load_figi_type_lists().await.ok();
+    load_figi_type_lists(&paths).await.ok();
    logger::log_info("  ✓ OpenFIGI metadata loaded").await;
-    if shutdown_flag.load(Ordering::SeqCst) {
+    check_shutdown!(shutdown_flag);
        logger::log_warn("Shutdown detected after OpenFIGI load").await;
        return Ok(());
    }
    logger::log_info("Step 3: Checking LEI-FIGI mapping status...").await;
-    let all_mapped = ensure_all_leis_mapped(&gleif_csv_path, None).await?;
+    let all_mapped = update_lei_mapping(&paths, &gleif_csv_path, None).await?;
    if !all_mapped {
        logger::log_warn("  ⚠ Some LEIs failed to map - continuing with partial data").await;
@@ -56,534 +58,72 @@ pub async fn run_full_update(
        logger::log_info("  ✓ All LEIs successfully mapped").await;
    }
-    if shutdown_flag.load(Ordering::SeqCst) {
+    check_shutdown!(shutdown_flag);
        logger::log_warn("Shutdown detected after LEI-FIGI mapping").await;
        return Ok(());
    }
    logger::log_info("Step 4: Building securities map (streaming)...").await;
-    let date_dir = find_most_recent_figi_date_dir(&paths).await?;
+    update_securities(&paths).await?;
    logger::log_info("  ✓ Securities map updated").await;
-    if let Some(date_dir) = date_dir {
+    let paths = DataPaths::new(".")?;
        logger::log_info(&format!("  Using FIGI data from: {:?}", date_dir)).await;
        build_securities_from_figi_streaming(&date_dir).await?;
        logger::log_info("  ✓ Securities map updated").await;
    } else {
        logger::log_warn("  ✗ No FIGI data directory found").await;
    }
-    if shutdown_flag.load(Ordering::SeqCst) {
+    check_shutdown!(shutdown_flag);
        logger::log_warn("Shutdown detected after securities map build").await;
        return Ok(());
    }
-    logger::log_info("Step 5: Building companies.jsonl with parallel processing and validation...").await;
+    logger::log_info("Step 5: Building companies.jsonl with Yahoo Data...").await;
-    let count = build_companies_jsonl_streaming_parallel(&paths, pool, shutdown_flag).await?;
+    let count = update_companies(&paths, pool, shutdown_flag, config, &None).await?;
    logger::log_info(&format!("  ✓ Saved {} companies", count)).await;
-    if !shutdown_flag.load(Ordering::SeqCst) {
+    check_shutdown!(shutdown_flag);
        logger::log_info("Step 6: Processing events (using index)...").await;
        let _event_index = build_event_index(&paths).await?;
        logger::log_info("  ✓ Event index built").await;
    } else {
        logger::log_warn("Shutdown detected, skipping event index build").await;
    }
-    logger::log_info("✓ Corporate update complete").await;
+    logger::log_info("Step 6: Cleansing companies with missing essential data...").await;
    let cleansed_count = companies_yahoo_cleansed_no_data(&paths).await?;
    logger::log_info(&format!("  ✓ {} companies found on Yahoo ready for further use in companies_yahoo.jsonl", cleansed_count)).await;
    check_shutdown!(shutdown_flag);
    let proxy_pool = pool.get_proxy_pool()
        .ok_or_else(|| anyhow::anyhow!("ChromeDriverPool must be created with VPN proxy rotation enabled"))?;
    logger::log_info("Creating YahooClientPool with proxy rotation...").await;
    let yahoo_pool = Arc::new(YahooClientPool::new(proxy_pool, config, None).await?);
    logger::log_info(&format!("✓ YahooClientPool ready with {} clients", yahoo_pool.num_clients().await)).await;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 7: Cleansing companies with too low profile (with abort-safe persistence)...").await;
    let cleansed_count = companies_yahoo_cleansed_low_profile(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
    logger::log_info(&format!("  ✓ {} companies with sufficient profile ready for analytics", cleansed_count)).await;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 8: Enriching companies with Yahoo Events (with abort-safe persistence)...").await;
    let enriched_count = enrich_companies_with_events(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
    logger::log_info(&format!("  ✓ {} companies enriched with event data", enriched_count)).await;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 9: Enriching companies with Yahoo Options (with abort-safe persistence)...").await;
    let options_count = enrich_companies_with_option(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
    logger::log_info(&format!("  ✓ {} companies enriched with options data", options_count)).await;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 10: Enriching companies with Yahoo Chart (with abort-safe persistence)...").await;
    let chart_count = enrich_companies_with_chart(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
    logger::log_info(&format!("  ✓ {} companies enriched with chart data", chart_count)).await;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 11: Collecting FX rates...").await;
    let fx_count = collect_fx_rates(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
    logger::log_info(&format!("  ✓ Collected {} FX rates", fx_count)).await;
    check_shutdown!(shutdown_flag);
    logger::log_info("Step 12: Collecting exchange information...").await;
    let exchange_count = collect_and_save_exchanges(&paths).await?;
    logger::log_info(&format!("  ✓ Collected {} exchanges", exchange_count)).await;
    logger::log_info("=== Corporate update complete === ").await;
    Ok(())
 }
 /// UPDATED: Serial version with validation (kept for compatibility/debugging)
 /// 
 /// This is the non-parallel version that processes companies sequentially.
 /// Updated with same validation and shutdown checks as parallel version.
 /// 
 /// Use this for:
 /// - Debugging issues with specific companies
 /// - Environments where parallel processing isn't desired
 /// - Testing validation logic without concurrency complexity
 async fn build_companies_jsonl_streaming_serial(
    paths: &DataPaths, 
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<usize> {
    // Configuration constants
    const CHECKPOINT_INTERVAL: usize = 50;
    const FSYNC_BATCH_SIZE: usize = 10;
    const FSYNC_INTERVAL_SECS: u64 = 10;
    let path = DataPaths::new(".")?;
    let corporate_path = path.data_dir().join("corporate").join("by_name");
    let securities_path = corporate_path.join("common_stocks.json");
    if !securities_path.exists() {
        logger::log_warn("No common_stocks.json found").await;
        return Ok(0);
    }
    let content = tokio::fs::read_to_string(securities_path).await?;
    let securities: HashMap<String, CompanyInfo> = serde_json::from_str(&content)?;
    let companies_path = paths.data_dir().join("companies.jsonl");
    let log_path = paths.data_dir().join("companies_updates.log");
    if let Some(parent) = companies_path.parent() {
        tokio::fs::create_dir_all(parent).await?;
    }
    // === RECOVERY PHASE: Load checkpoint + replay log ===
    let mut existing_companies: HashMap<String, CompanyCrossPlatformInfo> = HashMap::new();
    let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
    if companies_path.exists() {
        logger::log_info("Loading checkpoint from companies.jsonl...").await;
        let existing_content = tokio::fs::read_to_string(&companies_path).await?;
        for line in existing_content.lines() {
            if line.trim().is_empty() {
                continue;
            }
            match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
                Ok(company) => {
                    processed_names.insert(company.name.clone());
                    existing_companies.insert(company.name.clone(), company);
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
                }
            }
        }
        logger::log_info(&format!("Loaded checkpoint with {} companies", existing_companies.len())).await;
    }
    if log_path.exists() {
        logger::log_info("Replaying update log...").await;
        let log_content = tokio::fs::read_to_string(&log_path).await?;
        let mut replayed = 0;
        for line in log_content.lines() {
            if line.trim().is_empty() {
                continue;
            }
            match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
                Ok(company) => {
                    processed_names.insert(company.name.clone());
                    existing_companies.insert(company.name.clone(), company);
                    replayed += 1;
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
                }
            }
        }
        if replayed > 0 {
            logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
        }
    }
    // === OPEN LOG FILE ===
    use tokio::fs::OpenOptions;
    use tokio::io::AsyncWriteExt;
    let mut log_file = OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_path)
        .await?;
    let mut writes_since_fsync = 0;
    let mut last_fsync = std::time::Instant::now();
    let mut updates_since_checkpoint = 0;
    let mut count = 0;
    let mut new_count = 0;
    let mut updated_count = 0;
    logger::log_info(&format!("Processing {} companies sequentially...", securities.len())).await;
    // === PROCESS COMPANIES SEQUENTIALLY ===
    for (name, company_info) in securities.clone() {
        // Check shutdown before each company
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn(&format!(
                "Shutdown detected at company: {} (progress: {}/{})",
                name, count, count + securities.len()
            )).await;
            break;
        }
        let existing_entry = existing_companies.get(&name).cloned();
        let is_update = existing_entry.is_some();
        // Process company with validation
        match process_single_company_serial(
            name.clone(),
            company_info,
            existing_entry,
            pool,
            shutdown_flag,
        ).await {
            Ok(Some(company_entry)) => {
                // Write to log
                let line = serde_json::to_string(&company_entry)?;
                log_file.write_all(line.as_bytes()).await?;
                log_file.write_all(b"\n").await?;
                writes_since_fsync += 1;
                // Batched + time-based fsync
                let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE 
                    || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
                if should_fsync {
                    log_file.flush().await?;
                    log_file.sync_data().await?;
                    writes_since_fsync = 0;
                    last_fsync = std::time::Instant::now();
                }
                // Update in-memory state
                processed_names.insert(name.clone());
                existing_companies.insert(name.clone(), company_entry);
                count += 1;
                updates_since_checkpoint += 1;
                if is_update {
                    updated_count += 1;
                } else {
                    new_count += 1;
                }
                // Periodic checkpoint
                if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
                    if writes_since_fsync > 0 {
                        log_file.flush().await?;
                        log_file.sync_data().await?;
                        writes_since_fsync = 0;
                        last_fsync = std::time::Instant::now();
                    }
                    logger::log_info(&format!("Creating checkpoint at {} companies...", count)).await;
                    let checkpoint_tmp = companies_path.with_extension("jsonl.tmp");
                    let mut checkpoint_file = tokio::fs::File::create(&checkpoint_tmp).await?;
                    for company in existing_companies.values() {
                        let line = serde_json::to_string(company)?;
                        checkpoint_file.write_all(line.as_bytes()).await?;
                        checkpoint_file.write_all(b"\n").await?;
                    }
                    checkpoint_file.flush().await?;
                    checkpoint_file.sync_all().await?;
                    drop(checkpoint_file);
                    tokio::fs::rename(&checkpoint_tmp, &companies_path).await?;
                    drop(log_file);
                    tokio::fs::remove_file(&log_path).await.ok();
                    log_file = OpenOptions::new()
                        .create(true)
                        .append(true)
                        .open(&log_path)
                        .await?;
                    updates_since_checkpoint = 0;
                    logger::log_info("✓ Checkpoint created and log cleared").await;
                }
                if count % 10 == 0 {
                    logger::log_info(&format!(
                        "Progress: {} companies ({} new, {} updated)",
                        count, new_count, updated_count
                    )).await;
                }
            }
            Ok(None) => {
                // Company had no ISINs or was skipped
                logger::log_info(&format!("Skipped company: {} (no ISINs)", name)).await;
            }
            Err(e) => {
                logger::log_warn(&format!("Error processing company {}: {}", name, e)).await;
            }
        }
        // Time-based fsync
        if writes_since_fsync > 0 && last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS {
            log_file.flush().await?;
            log_file.sync_data().await?;
            writes_since_fsync = 0;
            last_fsync = std::time::Instant::now();
        }
    }
    // === FSYNC PENDING WRITES ===
    if writes_since_fsync > 0 {
        logger::log_info(&format!("Fsyncing {} pending writes...", writes_since_fsync)).await;
        log_file.flush().await?;
        log_file.sync_data().await?;
        logger::log_info("✓ Pending writes saved").await;
    }
    // === FINAL CHECKPOINT ===
    if !shutdown_flag.load(Ordering::SeqCst) && updates_since_checkpoint > 0 {
        logger::log_info("Creating final checkpoint...").await;
        let checkpoint_tmp = companies_path.with_extension("jsonl.tmp");
        let mut checkpoint_file = tokio::fs::File::create(&checkpoint_tmp).await?;
        for company in existing_companies.values() {
            let line = serde_json::to_string(company)?;
            checkpoint_file.write_all(line.as_bytes()).await?;
            checkpoint_file.write_all(b"\n").await?;
        }
        checkpoint_file.flush().await?;
        checkpoint_file.sync_all().await?;
        drop(checkpoint_file);
        tokio::fs::rename(&checkpoint_tmp, &companies_path).await?;
        drop(log_file);
        tokio::fs::remove_file(&log_path).await.ok();
        logger::log_info("✓ Final checkpoint created").await;
    }
    logger::log_info(&format!(
        "Completed: {} total companies ({} new, {} updated)",
        count, new_count, updated_count
    )).await;
    Ok(count)
 }
 /// UPDATED: Process single company serially with validation
 async fn process_single_company_serial(
    name: String,
    company_info: CompanyInfo,
    existing_entry: Option<CompanyCrossPlatformInfo>,
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<Option<CompanyCrossPlatformInfo>> {
    // Check shutdown at start
    if shutdown_flag.load(Ordering::SeqCst) {
        return Ok(None);
    }
    let mut isin_tickers_map: HashMap<String, Vec<String>> = 
        existing_entry
            .as_ref()
            .map(|e| e.isin_tickers_map.clone())
            .unwrap_or_default();
    let mut sector = existing_entry.as_ref().and_then(|e| e.sector.clone());
    let mut exchange = existing_entry.as_ref().and_then(|e| e.exchange.clone());
    // Collect unique ISIN-ticker pairs
    let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
    for figi_infos in company_info.securities.values() {
        for figi_info in figi_infos {
            if !figi_info.isin.is_empty() {
                let tickers = unique_isin_ticker_pairs
                    .entry(figi_info.isin.clone())
                    .or_insert_with(Vec::new);
                if !figi_info.ticker.is_empty() && !tickers.contains(&figi_info.ticker) {
                    tickers.push(figi_info.ticker.clone());
                }
            }
        }
    }
    // Process each ISIN with validation
    for (isin, figi_tickers) in unique_isin_ticker_pairs {
        // Check shutdown before each ISIN
        if shutdown_flag.load(Ordering::SeqCst) {
            return Ok(None);
        }
        let tickers = isin_tickers_map
            .entry(isin.clone())
            .or_insert_with(Vec::new);
        for figi_ticker in figi_tickers {
            if !tickers.contains(&figi_ticker) {
                tickers.push(figi_ticker);
            }
        }
        let has_yahoo_ticker = tickers.iter().any(|t| t.starts_with("YAHOO:"));
        if !has_yahoo_ticker {
            logger::log_info(&format!("Fetching Yahoo details for {} (ISIN: {})", name, isin)).await;
            // Use validated scraping with retry
            match scrape_with_retry_serial(pool, &isin, 3, shutdown_flag).await {
                Ok(Some(details)) => {
                    logger::log_info(&format!(
                        "✓ Found Yahoo ticker {} for ISIN {} (company: {})",
                        details.ticker, isin, name
                    )).await;
                    tickers.push(format!("YAHOO:{}", details.ticker));
                    if sector.is_none() && details.sector.is_some() {
                        sector = details.sector.clone();
                    }
                    if exchange.is_none() && details.exchange.is_some() {
                        exchange = details.exchange.clone();
                    }
                },
                Ok(None) => {
                    logger::log_warn(&format!("◯ No search results for ISIN {} (company: {})", isin, name)).await;
                    tickers.push("YAHOO:NO_RESULTS".to_string());
                },
                Err(e) => {
                    if shutdown_flag.load(Ordering::SeqCst) {
                        return Ok(None);
                    }
                    logger::log_warn(&format!(
                        "✗ Yahoo lookup error for ISIN {} (company: {}): {}",
                        isin, name, e
                    )).await;
                }
            }
        }
    }
    // Final shutdown check
    if shutdown_flag.load(Ordering::SeqCst) {
        return Ok(None);
    }
    if !isin_tickers_map.is_empty() {
        Ok(Some(CompanyCrossPlatformInfo {
            name,
            isin_tickers_map,
            sector,
            exchange,
        }))
    } else {
        Ok(None)
    }
 }
 /// UPDATED: Scrape with retry for serial processing
 async fn scrape_with_retry_serial(
    pool: &Arc<ChromeDriverPool>,
    isin: &str,
    max_retries: u32,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<Option<YahooCompanyDetails>> {
    let mut retries = 0;
    loop {
        if shutdown_flag.load(Ordering::SeqCst) {
            return Err(anyhow::anyhow!("Aborted due to shutdown"));
        }
        match scrape_company_details_by_isin(pool, isin, shutdown_flag).await {
            Ok(result) => return Ok(result),
            Err(e) => {
                if retries >= max_retries {
                    return Err(e);
                }
                let backoff_ms = 1000 * 2u64.pow(retries);
                let jitter_ms = random_range(0, 500);
                let total_delay = backoff_ms + jitter_ms;
                logger::log_warn(&format!(
                    "Retry {}/{} for ISIN {} after {}ms: {}",
                    retries + 1, max_retries, isin, total_delay, e
                )).await;
                tokio::time::sleep(tokio::time::Duration::from_millis(total_delay)).await;
                retries += 1;
            }
        }
    }
 }
 async fn find_most_recent_figi_date_dir(paths: &DataPaths) -> anyhow::Result<Option<std::path::PathBuf>> {
    let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
    if !map_cache_dir.exists() {
        return Ok(None);
    }
    let mut entries = tokio::fs::read_dir(&map_cache_dir).await?;
    let mut dates = Vec::new();
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
                    dates.push((name.to_string(), path));
                }
            }
        }
    }
    if dates.is_empty() {
        return Ok(None);
    }
    dates.sort_by(|a, b| b.0.cmp(&a.0));
    Ok(Some(dates[0].1.clone()))
 }
 pub struct ProcessResult {
    pub changes: Vec<CompanyEventChange>,
 }
 pub fn process_batch(
    new_events: &[CompanyEvent],
    existing: &mut HashMap<String, CompanyEvent>,
    today: &str,
 ) -> ProcessResult {
    let mut changes = Vec::new();
    for new in new_events {
        let key = event_key(new);
        if let Some(old) = existing.get(&key) {
            changes.extend(detect_changes(old, new, today));
            existing.insert(key, new.clone());
            continue;
        }
        let date_key = format!("{}|{}", new.ticker, new.date);
        let mut found_old = None;
        for (k, e) in existing.iter() {
            if format!("{}|{}", e.ticker, e.date) == date_key && k != &key {
                found_old = Some((k.clone(), e.clone()));
                break;
            }
        }
        if let Some((old_key, old_event)) = found_old {
            if new.date.as_str() > today {
                changes.push(CompanyEventChange {
                    ticker: new.ticker.clone(),
                    date: new.date.clone(),
                    field_changed: "time".to_string(),
                    old_value: old_event.time.clone(),
                    new_value: new.time.clone(),
                    detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
                });
            }
            existing.remove(&old_key);
        }
        existing.insert(key, new.clone());
    }
    ProcessResult { changes }
 }
--- a/src/corporate/update_companies.rs
+++ b/src/corporate/update_companies.rs
@@ -0,0 +1,907 @@
 // src/corporate/update_companies.rs
 use super::{types::*, yahoo_company_extraction::*, helpers::*};
 use crate::util::directories::DataPaths;
 use crate::util::integrity::{DataStage, StateManager, file_reference};
 use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;
 use crate::scraper::hard_reset::perform_hard_reset;
 use crate::corporate::checkpoint_helpers;
 use crate::config::Config;
 use tokio::sync::mpsc;
 use tokio::io::AsyncWriteExt;
 use tokio::fs::OpenOptions;
 use tokio::time::sleep;
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::time::Duration;
 use futures::stream::{FuturesUnordered, StreamExt};
 use anyhow::{anyhow, Result};
 /// Represents a write command to be serialized through the log writer
 enum LogCommand {
    Write(CompanyData),
    Checkpoint,
    Shutdown,
 }
 /// Result from processing a single company
 struct CompanyProcessResult {
    company: CompanyData,
    is_update: bool,
 }
 /// Check if a company needs Yahoo data processing
 /// Returns true if company has incomplete data (needs processing)
 fn company_needs_processing(
    company_name: &str,
    company_info: &CompanyData,
    existing_companies: &HashMap<String, CompanyData>,
 ) -> bool {
    // If company not in existing data at all, definitely needs processing
    let Some(existing_entry) = existing_companies.get(company_name) else {
        return true;
    };
    // Collect all ISINs this company should have
    let mut required_isins = std::collections::HashSet::new();
    for figi_infos in company_info.securities.values() {
        for figi_info in figi_infos {
            if !figi_info.isin.is_empty() {
                required_isins.insert(figi_info.isin.clone());
            }
        }
    }
    // Check each required ISIN
    for isin in required_isins {
        // Check if this ISIN exists in the company's ticker map
        if let Some(map) = &existing_entry.isin_tickers_map {
            if let Some(tickers) = map.get(&isin) {
                // Check if this ISIN has valid Yahoo data
                let has_valid_yahoo = tickers.iter().any(|t| {
                    t.starts_with("YAHOO:") && 
                    t != "YAHOO:ERROR" //&&  // Error marker means needs retry
                    //t != "YAHOO:NO_RESULTS"  // This is actually valid (legitimately not found)
                });
                // If no valid Yahoo data for this ISIN, company needs processing
                if !has_valid_yahoo {
                    return true;
                }
            } else {
                // ISIN not in map at all, needs processing
                return true;
            }
        } else {
            // No isin_tickers_map at all, needs processing
            return true;
        }
    }
    // All ISINs have valid Yahoo data, skip this company
    false
 }
 /// Abort-safe incremental JSONL persistence with proper hard reset handling
 pub async fn update_companies(
    paths: &DataPaths, 
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
    config: &Config,
    monitoring: &Option<crate::monitoring::MonitoringHandle>,
 ) -> anyhow::Result<usize> {
    // Configuration constants
    const CHECKPOINT_INTERVAL: usize = 50;
    const FSYNC_BATCH_SIZE: usize = 10;
    const FSYNC_INTERVAL_SECS: u64 = 10;
    const CONCURRENCY_LIMIT: usize = 100;
    // Wrap pool in mutex for potential replacement
    let pool_mutex = Arc::new(tokio::sync::Mutex::new(Arc::clone(pool)));
    // Synchronization for hard reset
    let reset_in_progress = Arc::new(tokio::sync::Mutex::new(false));
    let securities_path = paths.figi_securities_dir();
    let securities_checkpoint = securities_path.join("common_stocks.jsonl");
    let securities_log = securities_path.join("common_stocks.log.jsonl");
    if !securities_checkpoint.exists() {
        logger::log_warn("No common_stocks.jsonl found").await;
        return Ok(0);
    }
    // Load securities from checkpoint and replay log
    logger::log_info("Loading common stocks from JSONL checkpoint and log...").await;
    let securities = load_securities_from_jsonl(&securities_checkpoint, &securities_log).await?;
    logger::log_info(&format!("Loaded {} companies from common stocks", securities.len())).await;
    let companies_path = paths.data_dir().join("companies.jsonl");
    let log_path = paths.data_dir().join("companies_updates.log");
    if let Some(parent) = companies_path.parent() {
        tokio::fs::create_dir_all(parent).await?;
    }
    let manager = StateManager::new(paths.integrity_dir()).await?;
    let content_reference = file_reference(&companies_path);
    let step_name = "corporate_companies_update";
    let data_stage = DataStage::Data;
    if manager.is_step_valid(step_name).await? {
        logger::log_info("  Companies data already built and valid").await;
        return Ok(securities.len());
    }
    logger::log_info("  Companies data incomplete or missing, proceeding with update").await;
    let entry: crate::util::integrity::StateEntry = manager.create_entry(step_name.to_string(), content_reference, data_stage).await?;
    // === RECOVERY PHASE: Load checkpoint + replay log ===
    let existing_companies = checkpoint_helpers::load_checkpoint_with_log(
        &companies_path,
        &log_path,
        "companies.jsonl"
    ).await?;
    // === SETUP LOG WRITER TASK ===
    let (write_tx, mut write_rx) = mpsc::channel::<LogCommand>(1000);
    let log_file_init = OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_path)
        .await?;
    let companies_path_clone = companies_path.clone();
    let log_path_clone = log_path.clone();
    let existing_companies_writer = Arc::new(tokio::sync::Mutex::new(existing_companies.clone()));
    // Clone the Arc for the writer task (Arc clone is cheap, just increments ref count)
    let existing_companies_writer_for_task = Arc::clone(&existing_companies_writer);
    let write_tx_for_writer = write_tx.clone();
    let writer_task = tokio::spawn(async move {
        let mut log_file = log_file_init;
        let mut writes_since_fsync = 0;
        let mut last_fsync = std::time::Instant::now();
        let mut updates_since_checkpoint = 0;
        let mut count = 0;
        let mut new_count = 0;
        let mut updated_count = 0;
        while let Some(cmd) = write_rx.recv().await {
            match cmd {
                LogCommand::Write(company) => {
                    // Write to log
                    let line = serde_json::to_string(&company).unwrap();
                    if let Err(e) = log_file.write_all(line.as_bytes()).await {
                        logger::log_error(&format!("Failed to write to log: {}", e)).await;
                        break;
                    }
                    if let Err(e) = log_file.write_all(b"\n").await {
                        logger::log_error(&format!("Failed to write newline: {}", e)).await;
                        break;
                    }
                    writes_since_fsync += 1;
                    updates_since_checkpoint += 1;
                    count += 1;
                    // Update in-memory state
                    let mut existing_companies = existing_companies_writer_for_task.lock().await;
                    let is_update = existing_companies.contains_key(&company.name);
                    existing_companies.insert(company.name.clone(), company);
                    drop(existing_companies);
                    if is_update {
                        updated_count += 1;
                    } else {
                        new_count += 1;
                    }
                    // Batched + time-based fsync
                    let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE 
                        || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
                    if should_fsync {
                        if let Err(e) = log_file.flush().await {
                            logger::log_error(&format!("Failed to flush: {}", e)).await;
                            break;
                        }
                        if let Err(e) = log_file.sync_data().await {
                            logger::log_error(&format!("Failed to fsync: {}", e)).await;
                            break;
                        }
                        writes_since_fsync = 0;
                        last_fsync = std::time::Instant::now();
                    }
                }
                LogCommand::Checkpoint => {
                    if let Err(e) = log_file.flush().await {
                        logger::log_error(&format!("Failed to flush before checkpoint: {}", e)).await;
                        break;
                    }
                    if let Err(e) = log_file.sync_data().await {
                        logger::log_error(&format!("Failed to fsync before checkpoint: {}", e)).await;
                        break;
                    }
                    let existing_companies = existing_companies_writer_for_task.lock().await;
                    let companies_vec: Vec<_> = existing_companies.values().cloned().collect();
                    drop(existing_companies);
                    let temp_path = companies_path_clone.with_extension("tmp");
                    match tokio::fs::File::create(&temp_path).await {
                        Ok(mut temp_file) => {
                            let mut checkpoint_ok = true;
                            for company in &companies_vec {
                                if let Ok(line) = serde_json::to_string(company) {
                                    if temp_file.write_all(line.as_bytes()).await.is_err() ||
                                       temp_file.write_all(b"\n").await.is_err() {
                                        checkpoint_ok = false;
                                        break;
                                    }
                                }
                            }
                            if checkpoint_ok {
                                if temp_file.flush().await.is_ok() && 
                                   temp_file.sync_data().await.is_ok() {
                                    drop(temp_file);
                                    if tokio::fs::rename(&temp_path, &companies_path_clone).await.is_ok() {
                                        if tokio::fs::remove_file(&log_path_clone).await.is_ok() {
                                            logger::log_info(&format!(
                                                "✓ Checkpoint created ({} companies), log cleared",
                                                companies_vec.len()
                                            )).await;
                                            if let Ok(new_log) = OpenOptions::new()
                                                .create(true)
                                                .append(true)
                                                .open(&log_path_clone)
                                                .await {
                                                log_file = new_log;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        Err(e) => {
                            logger::log_error(&format!("Failed to create checkpoint temp file: {}", e)).await;
                        }
                    }
                    updates_since_checkpoint = 0;
                }
                LogCommand::Shutdown => {
                    logger::log_info("Writer shutting down...").await;
                    break;
                }
            }
            // Periodic checkpoint trigger
            if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
                let _ = write_tx.send(LogCommand::Checkpoint).await;
            }
        }
        // Final fsync
        let _ = log_file.flush().await;
        let _ = log_file.sync_data().await;
        logger::log_info(&format!(
            "Writer finished: {} total ({} new, {} updated)",
            count, new_count, updated_count
        )).await;
        (count, new_count, updated_count)
    });
    // === MAIN PROCESSING LOOP ===
    let total = securities.len();
    logger::log_info(&format!("Processing {} companies with concurrency limit {}", total, CONCURRENCY_LIMIT)).await;
    let mut tasks = FuturesUnordered::new();
    // Build initial pending list with proper filtering
    let mut pending: Vec<(String, CompanyData)> = securities.iter()
        .filter(|(name, info)| company_needs_processing(name, info, &existing_companies))
        .map(|(name, info)| (name.clone(), info.clone()))
        .collect();
    logger::log_info(&format!(
        "Initial scan: {} companies need processing ({} already complete)",
        pending.len(),
        total - pending.len()
    )).await;
    let mut processed = 0;
    let mut hard_reset_count = 0;
    // Spawn initial batch
    for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
        if let Some((name, company_info)) = pending.pop() {
            let current_pool = {
                let pool_guard = pool_mutex.lock().await;
                Arc::clone(&*pool_guard)
            };
            let existing = existing_companies.get(&name).cloned();
            let shutdown_flag_clone = Arc::clone(shutdown_flag);
            let task = tokio::spawn(async move {
                process_single_company_validated(
                    name,
                    company_info,
                    existing,
                    &current_pool,
                    &shutdown_flag_clone,
                ).await
            });
            tasks.push(task);
        }
    }
    // Process results and spawn new tasks
    while let Some(task_result) = tasks.next().await {
        // Check for shutdown
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn("Shutdown signal received, stopping processing").await;
            break;
        }
        match task_result {
            Ok(Ok(Some(result))) => {
                // Success: send to writer
                let _ = write_tx_for_writer.send(LogCommand::Write(result.company)).await;
                processed += 1;
                // Log progress every 100 companies
                if processed % 100 == 0 {
                    logger::log_info(&format!(
                        "Progress: {}/{} companies processed ({} resets)", 
                        processed, 
                        total,
                        hard_reset_count
                    )).await;
                }
                // Spawn next task if available
                if let Some((name, company_info)) = pending.pop() {
                    let current_pool = {
                        let pool_guard = pool_mutex.lock().await;
                        Arc::clone(&*pool_guard)
                    };
                    let existing = existing_companies.get(&name).cloned();
                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
                    let task = tokio::spawn(async move {
                        process_single_company_validated(
                            name,
                            company_info,
                            existing,
                            &current_pool,
                            &shutdown_flag_clone,
                        ).await
                    });
                    tasks.push(task);
                }
            }
            Ok(Ok(None)) => {
                // No result (shutdown or skip)
                processed += 1;
                if let Some((name, company_info)) = pending.pop() {
                    let current_pool = {
                        let pool_guard = pool_mutex.lock().await;
                        Arc::clone(&*pool_guard)
                    };
                    let existing = existing_companies.get(&name).cloned();
                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
                    let task = tokio::spawn(async move {
                        process_single_company_validated(
                            name,
                            company_info,
                            existing,
                            &current_pool,
                            &shutdown_flag_clone,
                        ).await
                    });
                    tasks.push(task);
                }
            }
            Ok(Err(e)) => {
                let error_msg = e.to_string();
                if error_msg.contains("HARD_RESET_REQUIRED") {
                    // Check if reset already in progress (race condition protection)
                    let mut reset_lock = reset_in_progress.lock().await;
                    if *reset_lock {
                        logger::log_info("Hard reset already in progress, skipping duplicate").await;
                        processed += 1;
                        continue;
                    }
                    *reset_lock = true;
                    drop(reset_lock);  // Release lock during reset
                    logger::log_error("🔴 HARD RESET THRESHOLD REACHED - INITIATING RESET SEQUENCE").await;
                    logger::log_warn("Draining active tasks before hard reset...").await;
                    // Save remaining pending count
                    let remaining_count = pending.len();
                    // Stop spawning new tasks
                    pending.clear();
                    // Wait for all active tasks to complete
                    let mut drained = 0;
                    while let Some(_) = tasks.next().await {
                        drained += 1;
                        if drained % 10 == 0 {
                            logger::log_info(&format!("Drained {} tasks...", drained)).await;
                        }
                    }
                    logger::log_info(&format!(
                        "All tasks drained ({} active). {} companies need reprocessing.", 
                        drained,
                        remaining_count
                    )).await;
                    // Perform the actual hard reset
                    match perform_hard_reset(&pool_mutex, config, paths, monitoring, shutdown_flag).await {
                        Ok(()) => {
                            logger::log_info("✅ Hard reset completed successfully").await;
                            hard_reset_count += 1;
                            // Reset the error counter
                            {
                                let pool_guard = pool_mutex.lock().await;
                                let current_pool = Arc::clone(&*pool_guard);
                                current_pool.get_reset_controller().reset();
                            }
                            logger::log_info("✓ Error counter cleared").await;
                            // Rebuild pending list by checking which companies need processing
                            logger::log_info("Rebuilding pending queue with proper Yahoo data checks...").await;
                            // Get current state of written companies
                            let current_existing = {
                                let companies = existing_companies_writer.lock().await;
                                companies.clone()
                            };
                            // Reload all securities from disk (checkpoint + log)
                            logger::log_info("Reloading securities from JSONL...").await;
                            let all_securities = load_securities_from_jsonl(&securities_checkpoint, &securities_log).await?;
                            logger::log_info(&format!("Reloaded {} companies", all_securities.len())).await;
                            // Build pending list: only companies that need processing
                            pending = all_securities.iter()
                                .filter(|(name, info)| company_needs_processing(name, info, &current_existing))
                                .map(|(name, info)| (name.clone(), info.clone()))
                                .collect();
                            logger::log_info(&format!(
                                "Restarting with {} remaining companies (out of {} total)", 
                                pending.len(),
                                total
                            )).await;
                            // Only continue if there's work to do
                            if pending.is_empty() {
                                logger::log_info("All companies have complete data, exiting").await;
                                // Clear reset flag
                                let mut reset_lock = reset_in_progress.lock().await;
                                *reset_lock = false;
                                drop(reset_lock);
                                break;  // Exit main loop
                            }
                            // Respawn initial batch with NEW pool
                            for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
                                if let Some((name, company_info)) = pending.pop() {
                                    let current_pool = {
                                        let pool_guard = pool_mutex.lock().await;
                                        Arc::clone(&*pool_guard)
                                    };
                                    let existing = existing_companies.get(&name).cloned();
                                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
                                    let task = tokio::spawn(async move {
                                        process_single_company_validated(
                                            name,
                                            company_info,
                                            existing,
                                            &current_pool,
                                            &shutdown_flag_clone,
                                        ).await
                                    });
                                    tasks.push(task);
                                }
                            }
                            // Clear reset flag
                            let mut reset_lock = reset_in_progress.lock().await;
                            *reset_lock = false;
                            drop(reset_lock);
                            // ✅ Continue processing (don't spawn duplicate task)
                            continue;
                        }
                        Err(reset_err) => {
                            logger::log_error(&format!("Hard reset failed: {}", reset_err)).await;
                            // Clear reset flag
                            let mut reset_lock = reset_in_progress.lock().await;
                            *reset_lock = false;
                            drop(reset_lock);
                            // Exit if hard reset fails
                            break;
                        }
                    }
                } else {
                    // Regular error
                    logger::log_warn(&format!("Company processing error: {}", error_msg)).await;
                    processed += 1;
                    // Spawn next task
                    if let Some((name, company_info)) = pending.pop() {
                        let current_pool = {
                            let pool_guard = pool_mutex.lock().await;
                            Arc::clone(&*pool_guard)
                        };
                        let existing = existing_companies.get(&name).cloned();
                        let shutdown_flag_clone = Arc::clone(shutdown_flag);
                        let task = tokio::spawn(async move {
                            process_single_company_validated(
                                name,
                                company_info,
                                existing,
                                &current_pool,
                                &shutdown_flag_clone,
                            ).await
                        });
                        tasks.push(task);
                    }
                }
            }
            Err(e) => {
                // Task panic
                logger::log_error(&format!("Task panic: {}", e)).await;
                processed += 1;
                // Spawn next task
                if let Some((name, company_info)) = pending.pop() {
                    let current_pool = {
                        let pool_guard = pool_mutex.lock().await;
                        Arc::clone(&*pool_guard)
                    };
                    let existing = existing_companies.get(&name).cloned();
                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
                    let task = tokio::spawn(async move {
                        process_single_company_validated(
                            name,
                            company_info,
                            existing,
                            &current_pool,
                            &shutdown_flag_clone,
                        ).await
                    });
                    tasks.push(task);
                }
            }
        }
    }
    logger::log_info("Main processing loop completed").await;
    // Signal writer to finish
    let _ = write_tx_for_writer.send(LogCommand::Checkpoint).await;
    let _ = write_tx_for_writer.send(LogCommand::Shutdown).await;
    drop(write_tx_for_writer);
    // Wait for writer to finish
    let (final_count, final_new, final_updated) = writer_task.await
        .unwrap_or((0, 0, 0));
    logger::log_info(&format!(
        "✅ Completed: {} total companies ({} new, {} updated, {} hard resets)", 
        final_count, final_new, final_updated, hard_reset_count
    )).await;
    // Track completion with:
    // - Content reference: All output JSONL files
    // - Data stage: Data (7-day TTL) - Securities data relatively stable
    // - Dependencies: LEI-FIGI mapping must be valid
    // Check for shutdown BEFORE marking complete
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn("Shutdown detected during company update - marking as invalid for retry").await;
        manager.mark_invalid(
            entry,
            format!("Invalid: processed {} of {} companies before shutdown", final_count, total),
        ).await?;
    } else {
        // Only mark complete if we got here without shutdown
        manager.mark_valid(entry).await?;
    }
    Ok(final_count)
 }
 /// Loads CompanyInfo securities from checkpoint and log JSONL files
 async fn load_securities_from_jsonl(
    checkpoint_path: &std::path::Path,
    log_path: &std::path::Path,
 ) -> anyhow::Result<HashMap<String, CompanyData>> {
    let mut securities: HashMap<String, CompanyData> = HashMap::new();
    // Load checkpoint
    if checkpoint_path.exists() {
        let content = tokio::fs::read_to_string(checkpoint_path).await?;
        for (line_num, line) in content.lines().enumerate() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<CompanyData>(line) {
                Ok(company_info) => {
                    securities.insert(company_info.name.clone(), company_info);
                }
                Err(e) => {
                    logger::log_warn(&format!(
                        "Skipping invalid line {} in checkpoint: {}",
                        line_num + 1, e
                    )).await;
                }
            }
        }
    }
    // Replay log (overwrites checkpoint entries if they exist)
    if log_path.exists() {
        let content = tokio::fs::read_to_string(log_path).await?;
        for (line_num, line) in content.lines().enumerate() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<CompanyData>(line) {
                Ok(company_info) => {
                    securities.insert(company_info.name.clone(), company_info);
                }
                Err(e) => {
                    logger::log_warn(&format!(
                        "Skipping invalid line {} in log: {}",
                        line_num + 1, e
                    )).await;
                }
            }
        }
    }
    Ok(securities)
 }
 /// Scrape with retry, validation, and shutdown awareness
 async fn scrape_with_retry(
    pool: &Arc<ChromeDriverPool>,
    isin: &str,
    max_retries: u32,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> Result<Option<YahooCompanyData>> {
    let mut retries = 0;
    loop {
        // Check shutdown before each attempt
        if shutdown_flag.load(Ordering::SeqCst) {
            return Err(anyhow!("Aborted due to shutdown"));
        }
        if pool.should_perform_hard_reset() {
            logger::log_error("HARD_RESET_REQUIRED detected before scrape attempt").await;
            return Err(anyhow!("HARD_RESET_REQUIRED"));
        }
        match scrape_company_details_by_isin(pool, isin, shutdown_flag).await {
            Ok(result) => return Ok(result),
            Err(e) => {
                // Check if this is a hard reset required error
                let error_msg = e.to_string();
                if error_msg.contains("HARD_RESET_REQUIRED") {
                    logger::log_error(&format!(
                        "Hard reset required error for ISIN {}, propagating immediately",
                        isin
                    )).await;
                    return Err(e);  // Propagate immediately, don't retry
                }
                if retries >= max_retries {
                    logger::log_error(&format!(
                        "All {} retries exhausted for ISIN {}: {}",
                        max_retries, isin, e
                    )).await;
                    return Err(e);
                }
                let backoff_ms = 1000 * 2u64.pow(retries);
                let jitter_ms = random_range(0, 500);
                let total_delay = backoff_ms + jitter_ms;
                logger::log_warn(&format!(
                    "Retry {}/{} for ISIN {} after {}ms: {}",
                    retries + 1, max_retries, isin, total_delay, e
                )).await;
                sleep(Duration::from_millis(total_delay)).await;
                retries += 1;
            }
        }
    }
 }
 /// Process single company with validation and shutdown checks
 async fn process_single_company_validated(
    name: String,
    company_info: CompanyData,
    existing_entry: Option<CompanyData>,
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<Option<CompanyProcessResult>> {
    // Check shutdown at start
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn(&format!("Shutdown detected, skipping company: {}", name)).await;
        return Ok(None);
    }
    let is_update = existing_entry.is_some();
    let mut isin_tickers_map: HashMap<String, Vec<String>> = 
        existing_entry
            .as_ref()
            .and_then(|e| e.isin_tickers_map.clone())
            .unwrap_or_default();
    // Collect unique ISIN-ticker pairs
    let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
    for figi_infos in company_info.securities.values() {
        for figi_info in figi_infos {
            if !figi_info.isin.is_empty() {
                let tickers = unique_isin_ticker_pairs
                    .entry(figi_info.isin.clone())
                    .or_insert_with(Vec::new);
                if !figi_info.ticker.is_empty() && !tickers.contains(&figi_info.ticker) {
                    tickers.push(figi_info.ticker.clone());
                }
            }
        }
    }
    // Process each ISIN independently with per-ISIN status checking
    for (isin, figi_tickers) in unique_isin_ticker_pairs {
        // Check shutdown before each ISIN
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn(&format!(
                "Shutdown detected while processing company: {}",
                name
            )).await;
            break;
        }
        let tickers = isin_tickers_map
            .entry(isin.clone())
            .or_insert_with(Vec::new);
        for figi_ticker in figi_tickers {
            if !tickers.contains(&figi_ticker) {
                tickers.push(figi_ticker);
            }
        }
        // Check if THIS SPECIFIC ISIN has valid Yahoo data (not ERROR)
        let has_valid_yahoo = tickers.iter().any(|t| {
            t.starts_with("YAHOO:") && t != "YAHOO:ERROR"
            // Note: YAHOO:NO_RESULTS is valid (legitimately not found)
        });
        if !has_valid_yahoo {
            logger::log_info(&format!("Fetching Yahoo details for {} (ISIN: {})", name, isin)).await;
            tickers.retain(|t| !t.starts_with("YAHOO:"));
            match scrape_with_retry(pool, &isin, 3, shutdown_flag).await {
                Ok(Some(details)) => {
                    logger::log_info(&format!(
                        "✓ Found Yahoo ticker {} for ISIN {} (company: {})",
                        details.ticker, isin, name
                    )).await;
                    tickers.push(format!("YAHOO:{}", details.ticker));
                },
                Ok(None) => {
                    logger::log_warn(&format!("◯ No search results for ISIN {} (company: {})", isin, name)).await;
                    tickers.push("YAHOO:NO_RESULTS".to_string());
                },
                Err(e) => {
                    if shutdown_flag.load(Ordering::SeqCst) {
                        logger::log_warn(&format!("Shutdown during scrape for ISIN {}", isin)).await;
                        break;
                    }
                    // Check if this is a hard reset required error
                    let error_msg = e.to_string();
                    if error_msg.contains("HARD_RESET_REQUIRED") {
                        logger::log_error(&format!(
                            "Hard reset required during ISIN {} processing, propagating error",
                            isin
                        )).await;
                        return Err(e);  // ← CRITICAL: Propagate immediately
                    }
                    logger::log_warn(&format!(
                        "✗ Yahoo lookup error for ISIN {} (company: {}): {}",
                        isin, name, e
                    )).await;
                    // Mark this ISIN as failed to enable retry
                    tickers.push("YAHOO:ERROR".to_string());
                }
            }
        }
    }
    // Final shutdown check before returning result
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn(&format!(
            "Shutdown detected, discarding incomplete result for: {}",
            name
        )).await;
        return Ok(None);
    }
    if pool.should_perform_hard_reset() {
        logger::log_error("HARD_RESET_REQUIRED detected during company processing").await;
        return Err(anyhow!("HARD_RESET_REQUIRED"));
    }
    if !isin_tickers_map.is_empty() {
        let company_entry = CompanyData {
            name: name.clone(),
            primary_isin: company_info.primary_isin.clone(),
            securities: company_info.securities.clone(),
            yahoo_company_data: company_info.yahoo_company_data.clone(),
            isin_tickers_map: Some(isin_tickers_map),
        };
        Ok(Some(CompanyProcessResult {
            company: company_entry,
            is_update,
        }))
    } else {
        logger::log_warn(&format!("No ISINs found for company: {}", name)).await;
        Ok(None)
    }
 }
--- a/src/corporate/update_companies_cleanse.rs
+++ b/src/corporate/update_companies_cleanse.rs
@@ -0,0 +1,911 @@
 // src/corporate/update_companies_cleanse.rs
 use super::{helpers::*, types::*};
 use crate::config::Config;
 use crate::corporate::checkpoint_helpers;
 use crate::util::directories::DataPaths;
 use crate::util::integrity::{DataStage, StateManager, file_reference};
 use crate::util::logger;
 use crate::scraper::yahoo::{YahooClientPool, QuoteSummaryModule};
 use std::result::Result::Ok;
 use chrono::{Utc};
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use tokio::fs::{File, OpenOptions};
 use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
 use futures::stream::{FuturesUnordered, StreamExt};
 use tokio::sync::mpsc;
 /// Result of processing a single company
 #[derive(Debug, Clone)]
 pub enum CompanyProcessResult {
    Valid(CompanyData),
    FilteredLowCap { name: String, market_cap: f64 },
    FilteredNoPrice { name: String },
    Failed { company: CompanyData, error: String, is_transient: bool },
 }
 /// Represents a write command to be serialized through the log writer
 enum LogCommand {
    Write(CompanyData),
    Checkpoint,
    Shutdown,
 }
 /// Cleansing function to remove companies with missing essential yahoo data for integrity
 pub async fn companies_yahoo_cleansed_no_data(paths: &DataPaths) -> Result<usize, anyhow::Error> {
    let data_path = paths.data_dir();
    let input_path = data_path.join("companies.jsonl");
    let output_path = data_path.join("companies_yahoo.jsonl");
    if !input_path.exists() {
        logger::log_warn("companies.jsonl not found, skipping cleansing").await;
        return Ok(0);
    }
    let manager = StateManager::new(paths.integrity_dir()).await?;
    let step_name = "yahoo_companies_cleansed_no_data";
    let content_reference = file_reference(&output_path);
    if manager.is_step_valid(step_name).await? {
        let output_content = tokio::fs::read_to_string(&output_path).await?;
        let count = output_content.lines()
            .filter(|line| !line.trim().is_empty())
            .count();
        logger::log_info(&format!("  ✓ Found {} companies in companies_yahoo.jsonl", count)).await;
        return Ok(count);
    }
    let entry = manager.create_entry(
        step_name.to_string(),
        content_reference.clone(),
        DataStage::Data,
    ).await?;
    logger::log_info("  Cleansing companies with missing Yahoo data...").await;
    logger::log_info(&format!("  Reading from: {:?}", input_path)).await;
    logger::log_info(&format!("  Writing to: {:?}", output_path)).await;
    let file = File::open(&input_path).await?;
    let reader = BufReader::new(file);
    let mut lines = reader.lines();
    let mut output_file = File::create(&output_path).await?;
    let mut valid_count = 0;
    let mut removed_count = 0;
    let mut total_count = 0;
    while let Some(line) = lines.next_line().await? {
        if line.trim().is_empty() {
            continue;
        }
        total_count += 1;
        let company: CompanyData = match serde_json::from_str(&line) {
            Ok(c) => c,
            Err(e) => {
                logger::log_warn(&format!("  Failed to parse company on line {}: {}", total_count, e)).await;
                continue;
            }
        };
        let has_valid_yahoo = company.isin_tickers_map
            .as_ref()
            .map(|map| {
                map.values()
                    .flatten()
                    .any(|ticker| {
                        ticker.starts_with("YAHOO:") 
                        && ticker != "YAHOO:NO_RESULTS" 
                        && ticker != "YAHOO:ERROR"
                    })
            })
            .unwrap_or(false);
        if has_valid_yahoo {
            let json_line = serde_json::to_string(&company)?;
            output_file.write_all(json_line.as_bytes()).await?;
            output_file.write_all(b"\n").await?;
            valid_count += 1;
        } else {
            removed_count += 1;
            if removed_count <= 5 {
                logger::log_info(&format!("  Removed company '{}' (no valid Yahoo ticker)", company.name)).await;
            }
        }
        if total_count % 1000 == 0 {
            logger::log_info(&format!("  Processed {} companies...", total_count)).await;
        }
    }
    output_file.flush().await?;
    logger::log_info(&format!(
        "  ✓ Cleansing complete: {} total → {} valid, {} removed", 
        total_count, valid_count, removed_count
    )).await;
    // Track completion with:
    // - Content reference: All event directories
    // - Data stage: Data (7-day TTL by default)
    // - Dependencies: Depends on cleaned companies data
    manager.mark_valid(entry).await?;
    Ok(valid_count)
 }
 /// Yahoo Low Profile Cleansing WITH ABORT-SAFE INCREMENTAL PERSISTENCE
 /// 
 /// # Features
 /// - Graceful shutdown (abort-safe)
 /// - Task panic isolation (tasks fail independently)
 /// - Crash-safe persistence (checkpoint + log with fsync)
 /// - Smart skip logic (only process incomplete data)
 /// - Uses pending queue instead of retry mechanism
 /// - Reuses companies_update.log for persistence
 /// 
 /// # Persistence Strategy
 /// - Checkpoint: companies_yahoo_cleaned.jsonl (atomic state)
 /// - Log: companies_update.log (append-only updates)
 /// - On restart: Load checkpoint + replay log
 /// - Periodic checkpoints (every 50 companies)
 /// - Batched fsync (every 10 writes or 10 seconds)
 pub async fn companies_yahoo_cleansed_low_profile(
    paths: &DataPaths,
    _config: &Config,
    yahoo_pool: Arc<YahooClientPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<usize> {
    // Configuration constants
    const CHECKPOINT_INTERVAL: usize = 50;
    const FSYNC_BATCH_SIZE: usize = 10;
    const FSYNC_INTERVAL_SECS: u64 = 10;
    const CONCURRENCY_LIMIT: usize = 50; // Limit parallel validation tasks
    let data_path = paths.data_dir();
    // File paths (reusing companies_update.log)
    let input_path = data_path.join("companies_yahoo.jsonl");
    let checkpoint_path = data_path.join("companies_yahoo_cleaned.jsonl");
    let log_path = data_path.join("companies_updates.log");
    // Check input exists
    if !input_path.exists() {
        logger::log_warn("  companies_yahoo.jsonl not found, skipping low profile cleansing").await;
        return Ok(0);
    }
    let manager = StateManager::new(paths.integrity_dir()).await?;
    let step_name = "yahoo_companies_cleansed_low_profile";
    let content_reference = file_reference(&checkpoint_path);
    if manager.is_step_valid(step_name).await? {
        let checkpoint_content = tokio::fs::read_to_string(&checkpoint_path).await?;
        let count = checkpoint_content.lines()
            .filter(|line| !line.trim().is_empty())
            .count();
        logger::log_info(&format!("  ✓ Found {} companies in companies_yahoo_cleaned.jsonl", count)).await;
        return Ok(count);
    }
    let entry = manager.create_entry(
        step_name.to_string(),
        content_reference.clone(),
        DataStage::Data,
    ).await?;
    logger::log_info("  Cleansing companies with low Yahoo profile...").await;
    // === RECOVERY PHASE: Load checkpoint + replay log ===
    let mut existing_companies: HashMap<String, CompanyData> = HashMap::new();
    let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
    if checkpoint_path.exists() {
        logger::log_info("Loading checkpoint from companies_yahoo_cleaned.jsonl...").await;
        let checkpoint_content = tokio::fs::read_to_string(&checkpoint_path).await?;
        for line in checkpoint_content.lines() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<CompanyData>(line) {
                Ok(company) => {
                    processed_names.insert(company.name.clone());
                    existing_companies.insert(company.name.clone(), company);
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
                }
            }
        }
        logger::log_info(&format!("Loaded checkpoint with {} companies", existing_companies.len())).await;
    }
    if log_path.exists() {
        logger::log_info("Replaying update log...").await;
        let log_content = tokio::fs::read_to_string(&log_path).await?;
        let mut replayed = 0;
        for line in log_content.lines() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<CompanyData>(line) {
                Ok(company) => {
                    processed_names.insert(company.name.clone());
                    existing_companies.insert(company.name.clone(), company);
                    replayed += 1;
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
                }
            }
        }
        if replayed > 0 {
            logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
        }
    }
    // === LOAD INPUT COMPANIES ===
    logger::log_info(&format!("Loading companies from: {:?}", input_path)).await;
    let input_companies = load_companies_from_jsonl(&input_path).await?;
    logger::log_info(&format!("Loaded {} companies from input", input_companies.len())).await;
    // === BUILD PENDING LIST (smart skip logic) ===
    let mut pending: Vec<CompanyData> = input_companies
        .into_iter()
        .filter(|company| company_needs_processing(company, &existing_companies))
        .collect();
    logger::log_info(&format!(
        "Initial scan: {} companies need processing ({} already complete)",
        pending.len(),
        existing_companies.len()
    )).await;
    // === CONSOLIDATE LOG BEFORE EARLY EXIT ===
    if pending.is_empty() {
        logger::log_info("  ✓ All companies already processed").await;
        // Consolidate log into checkpoint before exiting
        if checkpoint_helpers::log_has_content(&log_path).await {
            checkpoint_helpers::consolidate_checkpoint(&checkpoint_path, &log_path, &existing_companies).await?;
        }
        return Ok(existing_companies.len());
    }
    // === SETUP LOG WRITER TASK ===
    let (write_tx, mut write_rx) = mpsc::channel::<LogCommand>(1000);
    let log_file_init = OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_path)
        .await?;
    let checkpoint_path_clone = checkpoint_path.clone();
    let log_path_clone = log_path.clone();
    let existing_companies_writer = Arc::new(tokio::sync::Mutex::new(existing_companies.clone()));
    let existing_companies_writer_for_task = Arc::clone(&existing_companies_writer);
    let write_tx_for_writer = write_tx.clone();
    let writer_task = tokio::spawn(async move {
        let mut log_file = log_file_init;
        let mut writes_since_fsync = 0;
        let mut last_fsync = std::time::Instant::now();
        let mut updates_since_checkpoint = 0;
        let mut count = 0;
        let mut new_count = 0;
        let mut updated_count = 0;
        while let Some(cmd) = write_rx.recv().await {
            match cmd {
                LogCommand::Write(company) => {
                    // Write to log
                    let line = serde_json::to_string(&company).unwrap();
                    if let Err(e) = log_file.write_all(line.as_bytes()).await {
                        logger::log_error(&format!("Failed to write to log: {}", e)).await;
                        break;
                    }
                    if let Err(e) = log_file.write_all(b"\n").await {
                        logger::log_error(&format!("Failed to write newline: {}", e)).await;
                        break;
                    }
                    writes_since_fsync += 1;
                    updates_since_checkpoint += 1;
                    count += 1;
                    // Update in-memory state
                    let mut existing_companies = existing_companies_writer_for_task.lock().await;
                    let is_update = existing_companies.contains_key(&company.name);
                    existing_companies.insert(company.name.clone(), company);
                    drop(existing_companies);
                    if is_update {
                        updated_count += 1;
                    } else {
                        new_count += 1;
                    }
                    // Batched + time-based fsync
                    let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE 
                        || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
                    if should_fsync {
                        if let Err(e) = log_file.flush().await {
                            logger::log_error(&format!("Failed to flush: {}", e)).await;
                            break;
                        }
                        if let Err(e) = log_file.sync_data().await {
                            logger::log_error(&format!("Failed to fsync: {}", e)).await;
                            break;
                        }
                        writes_since_fsync = 0;
                        last_fsync = std::time::Instant::now();
                    }
                }
                LogCommand::Checkpoint => {
                    if let Err(e) = log_file.flush().await {
                        logger::log_error(&format!("Failed to flush before checkpoint: {}", e)).await;
                        break;
                    }
                    if let Err(e) = log_file.sync_data().await {
                        logger::log_error(&format!("Failed to fsync before checkpoint: {}", e)).await;
                        break;
                    }
                    let existing_companies = existing_companies_writer_for_task.lock().await;
                    let companies_vec: Vec<_> = existing_companies.values().cloned().collect();
                    drop(existing_companies);
                    let temp_path = checkpoint_path_clone.with_extension("tmp");
                    match tokio::fs::File::create(&temp_path).await {
                        Ok(mut temp_file) => {
                            let mut checkpoint_ok = true;
                            for company in &companies_vec {
                                if let Ok(line) = serde_json::to_string(company) {
                                    if temp_file.write_all(line.as_bytes()).await.is_err() ||
                                       temp_file.write_all(b"\n").await.is_err() {
                                        checkpoint_ok = false;
                                        break;
                                    }
                                }
                            }
                            if checkpoint_ok {
                                if temp_file.flush().await.is_ok() && 
                                   temp_file.sync_data().await.is_ok() {
                                    drop(temp_file);
                                    if tokio::fs::rename(&temp_path, &checkpoint_path_clone).await.is_ok() {
                                        if tokio::fs::remove_file(&log_path_clone).await.is_ok() {
                                            logger::log_info(&format!(
                                                "✓ Checkpoint created ({} companies), log cleared",
                                                companies_vec.len()
                                            )).await;
                                            if let Ok(new_log) = OpenOptions::new()
                                                .create(true)
                                                .append(true)
                                                .open(&log_path_clone)
                                                .await {
                                                log_file = new_log;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        Err(e) => {
                            logger::log_error(&format!("Failed to create checkpoint temp file: {}", e)).await;
                        }
                    }
                    updates_since_checkpoint = 0;
                }
                LogCommand::Shutdown => {
                    logger::log_info("Writer shutting down...").await;
                    break;
                }
            }
            // Periodic checkpoint trigger
            if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
                let _ = write_tx_for_writer.send(LogCommand::Checkpoint).await;
            }
        }
        // Final fsync
        let _ = log_file.flush().await;
        let _ = log_file.sync_data().await;
        logger::log_info(&format!(
            "Writer finished: {} total ({} new, {} updated)",
            count, new_count, updated_count
        )).await;
        (count, new_count, updated_count)
    });
    // Wrap paths in Arc for safe sharing across tasks
    let paths = Arc::new((*paths).clone());
    // === MAIN PROCESSING LOOP WITH TASK PANIC ISOLATION ===
    let total = pending.len();
    let mut tasks = FuturesUnordered::new();
    // Counters
    let processed = Arc::new(AtomicUsize::new(0));
    let valid_count = Arc::new(AtomicUsize::new(0));
    let filtered_low_cap = Arc::new(AtomicUsize::new(0));
    let filtered_no_price = Arc::new(AtomicUsize::new(0));
    let failed_count = Arc::new(AtomicUsize::new(0));
    // Spawn initial batch
    for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
        if let Some(company) = pending.pop() {
            spawn_validation_task(
                company,
                &yahoo_pool,
                &paths,
                &write_tx,
                shutdown_flag,
                &processed,
                &valid_count,
                &filtered_low_cap,
                &filtered_no_price,
                &failed_count,
                total,
                &mut tasks,
            );
        }
    }
    // Process results and spawn new tasks (with task panic isolation)
    while let Some(task_result) = tasks.next().await {
        // Check for shutdown
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn("Shutdown signal received, stopping processing").await;
            break;
        }
        match task_result {
            Ok(Ok(_)) => {
                // Success - spawn next task
                if let Some(company) = pending.pop() {
                    spawn_validation_task(
                        company,
                        &yahoo_pool,
                        &paths,
                        &write_tx,
                        shutdown_flag,
                        &processed,
                        &valid_count,
                        &filtered_low_cap,
                        &filtered_no_price,
                        &failed_count,
                        total,
                        &mut tasks,
                    );
                }
            }
            Ok(Err(e)) => {
                // Processing error
                logger::log_error(&format!("Company processing error: {}", e)).await;
                if let Some(company) = pending.pop() {
                    spawn_validation_task(
                        company,
                        &yahoo_pool,
                        &paths,
                        &write_tx,
                        shutdown_flag,
                        &processed,
                        &valid_count,
                        &filtered_low_cap,
                        &filtered_no_price,
                        &failed_count,
                        total,
                        &mut tasks,
                    );
                }
            }
            Err(e) => {
                // Task panic (isolated - doesn't crash entire process)
                logger::log_error(&format!("Task panic: {}", e)).await;
                if let Some(company) = pending.pop() {
                    spawn_validation_task(
                        company,
                        &yahoo_pool,
                        &paths,
                        &write_tx,
                        shutdown_flag,
                        &processed,
                        &valid_count,
                        &filtered_low_cap,
                        &filtered_no_price,
                        &failed_count,
                        total,
                        &mut tasks,
                    );
                }
            }
        }
    }
    logger::log_info("Main processing loop completed").await;
    // Signal writer to finish
    let _ = write_tx.send(LogCommand::Checkpoint).await;
    let _ = write_tx.send(LogCommand::Shutdown).await;
    drop(write_tx);
    // Wait for writer to finish
    let (final_count, final_new, final_updated) = writer_task.await
        .unwrap_or((0, 0, 0));
    let final_valid = valid_count.load(Ordering::SeqCst);
    let final_filtered_low_cap = filtered_low_cap.load(Ordering::SeqCst);
    let final_filtered_no_price = filtered_no_price.load(Ordering::SeqCst);
    let final_failed = failed_count.load(Ordering::SeqCst);
    logger::log_info(&format!(
        "✅ Completed: {} total companies ({} new, {} updated)", 
        final_count, final_new, final_updated
    )).await;
    logger::log_info(&format!(
        "   Valid: {}, Filtered (low cap): {}, Filtered (no price): {}, Failed: {}",
        final_valid, final_filtered_low_cap, final_filtered_no_price, final_failed
    )).await;
    // === VERIFY AND RECREATE FINAL OUTPUT ===
    logger::log_info("Verifying final output integrity...").await;
    let final_companies_map = existing_companies_writer.lock().await;
    let expected_count = final_companies_map.len();
    // Always write final consolidated checkpoint
    let temp_checkpoint = checkpoint_path.with_extension("tmp");
    let mut temp_file = File::create(&temp_checkpoint).await?;
    for company in final_companies_map.values() {
        let json_line = serde_json::to_string(company)?;
        temp_file.write_all(json_line.as_bytes()).await?;
        temp_file.write_all(b"\n").await?;
    }
    temp_file.flush().await?;
    temp_file.sync_data().await?;
    drop(temp_file);
    tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
    drop(final_companies_map);
    // Clear log since everything is in checkpoint
    if log_path.exists() {
        tokio::fs::remove_file(&log_path).await.ok();
    }
    logger::log_info(&format!("✓ Final output: {} companies in {:?}", expected_count, checkpoint_path)).await;
    // Shutdown Yahoo pool
    yahoo_pool.shutdown().await?;
    // Track completion with:
    // - Content reference: All event directories
    // - Data stage: Data (7-day TTL by default)
    // - Dependencies: Depends on cleaned companies data
    if !shutdown_flag.load(Ordering::SeqCst) {
        manager.mark_valid(entry).await?;
    }
    Ok(final_count)
 }
 /// Helper function to spawn a validation task (reduces code duplication)
 fn spawn_validation_task(
    company: CompanyData,
    yahoo_pool: &Arc<YahooClientPool>,
    paths: &Arc<DataPaths>,
    write_tx: &mpsc::Sender<LogCommand>,
    shutdown_flag: &Arc<AtomicBool>,
    processed: &Arc<AtomicUsize>,
    valid_count: &Arc<AtomicUsize>,
    filtered_low_cap: &Arc<AtomicUsize>,
    filtered_no_price: &Arc<AtomicUsize>,
    failed_count: &Arc<AtomicUsize>,
    total: usize,
    tasks: &mut FuturesUnordered<tokio::task::JoinHandle<anyhow::Result<Option<()>>>>,
 ) {
    let yahoo_pool_clone = Arc::clone(yahoo_pool);
    let paths_clone = Arc::clone(paths);
    let shutdown_flag_clone = Arc::clone(shutdown_flag);
    let write_tx_clone = write_tx.clone();
    let processed_clone = Arc::clone(processed);
    let valid_count_clone = Arc::clone(valid_count);
    let filtered_low_cap_clone = Arc::clone(filtered_low_cap);
    let filtered_no_price_clone = Arc::clone(filtered_no_price);
    let failed_count_clone = Arc::clone(failed_count);
    let task = tokio::spawn(async move {
        // Check shutdown at start
        if shutdown_flag_clone.load(Ordering::SeqCst) {
            return Ok::<_, anyhow::Error>(None);
        }
        let result = process_company_with_validation(
            &company,
            &yahoo_pool_clone,
            &*paths_clone,
        ).await;
        match result {
            CompanyProcessResult::Valid(validated_company) => {
                // Send to writer
                let _ = write_tx_clone.send(LogCommand::Write(validated_company)).await;
                valid_count_clone.fetch_add(1, Ordering::SeqCst);
            }
            CompanyProcessResult::FilteredLowCap { name, market_cap } => {
                filtered_low_cap_clone.fetch_add(1, Ordering::SeqCst);
                if filtered_low_cap_clone.load(Ordering::SeqCst) <= 10 {
                    logger::log_info(&format!("  Filtered {} - low market cap: {:.0} EUR", name, market_cap)).await;
                }
            }
            CompanyProcessResult::FilteredNoPrice { name } => {
                filtered_no_price_clone.fetch_add(1, Ordering::SeqCst);
                if filtered_no_price_clone.load(Ordering::SeqCst) <= 10 {
                    logger::log_info(&format!("  Filtered {} - no recent price data", name)).await;
                }
            }
            CompanyProcessResult::Failed { company: failed_company, error, is_transient: _ } => {
                failed_count_clone.fetch_add(1, Ordering::SeqCst);
                logger::log_warn(&format!("  Failed to process '{}': {}", failed_company.name, error)).await;
            }
        }
        // Progress reporting
        let current = processed_clone.fetch_add(1, Ordering::SeqCst) + 1;
        if current % 100 == 0 {
            logger::log_info(&format!(
                "Progress: {}/{} ({} valid, {} low cap, {} no price, {} failed)",
                current, total,
                valid_count_clone.load(Ordering::SeqCst),
                filtered_low_cap_clone.load(Ordering::SeqCst),
                filtered_no_price_clone.load(Ordering::SeqCst),
                failed_count_clone.load(Ordering::SeqCst)
            )).await;
        }
        Ok(None::<()>)
    });
    tasks.push(task);
 }
 /// Process a single company with full error categorization
 async fn process_company_with_validation(
    company: &CompanyData,
    yahoo_pool: &Arc<YahooClientPool>,
    paths: &DataPaths,
 ) -> CompanyProcessResult {
    // Extract Yahoo ticker
    let ticker = match extract_first_yahoo_ticker(company) {
        Some(t) => t,
        None => {
            return CompanyProcessResult::Failed {
                company: company.clone(),
                error: "No valid Yahoo ticker found".to_string(),
                is_transient: false, // Permanent - no ticker means no data
            };
        }
    };
    // Fetch core modules from Yahoo
    let summary = match yahoo_pool.get_quote_summary(
        &ticker,
        &QuoteSummaryModule::core_modules(),
    ).await {
        Ok(s) => s,
        Err(e) => {
            let error_msg = e.to_string();
            let is_transient = is_transient_error(&error_msg);
            return CompanyProcessResult::Failed {
                company: company.clone(),
                error: format!("API error fetching summary: {}", error_msg),
                is_transient,
            };
        }
    };
    // Validate market cap
    let market_cap = extract_market_cap(&summary);
    if market_cap < 100_000_000.0 {
        return CompanyProcessResult::FilteredLowCap {
            name: company.name.clone(),
            market_cap,
        };
    }
    // Validate recent price activity
    let has_recent_price = match check_recent_price_activity(yahoo_pool, &ticker).await {
        Ok(has) => has,
        Err(e) => {
            let error_msg = e.to_string();
            let is_transient = is_transient_error(&error_msg);
            return CompanyProcessResult::Failed {
                company: company.clone(),
                error: format!("API error fetching price history: {}", error_msg),
                is_transient,
            };
        }
    };
    if !has_recent_price {
        return CompanyProcessResult::FilteredNoPrice {
            name: company.name.clone(),
        };
    }
    // Save core data
    if let Err(e) = save_company_core_data(paths, &company.name, &summary).await {
        logger::log_warn(&format!(
            "  Failed to save core data for {}: {}",
            company.name, e
        )).await;
    }
    CompanyProcessResult::Valid(company.clone())
 }
 /// Determine if an error is transient (should retry) or permanent (skip)
 fn is_transient_error(error: &str) -> bool {
    let error_lower = error.to_lowercase();
    // Transient errors (network, rate limiting, timeouts)
    let transient_patterns = [
        "timeout",
        "timed out",
        "connection",
        "network",
        "rate limit",
        "too many requests",
        "429",
        "503",
        "502",
        "500",
        "temporarily",
        "unavailable",
    ];
    for pattern in &transient_patterns {
        if error_lower.contains(pattern) {
            return true;
        }
    }
    // Permanent errors (invalid ticker, no data, parsing errors)
    let permanent_patterns = [
        "404",
        "not found",
        "invalid",
        "no data",
        "parse error",
        "400",
        "401",
        "403",
    ];
    for pattern in &permanent_patterns {
        if error_lower.contains(pattern) {
            return false;
        }
    }
    // Default: treat unknown errors as transient (safer to retry)
    true
 }
 fn extract_market_cap(summary: &crate::scraper::yahoo::QuoteSummary) -> f64 {
    let price_module = match summary.modules.get("price") {
        Some(m) => m,
        None => return 0.0,
    };
    let market_cap_raw = price_module
        .get("marketCap")
        .and_then(|v| v.get("raw"))
        .and_then(|v| v.as_f64())
        .unwrap_or(0.0);
    let currency = price_module
        .get("currency")
        .and_then(|v| v.as_str())
        .unwrap_or("USD");
    let market_cap_eur = match currency {
        "EUR" => market_cap_raw,
        "USD" => market_cap_raw * 0.92,
        "GBP" => market_cap_raw * 1.17,
        "JPY" => market_cap_raw * 0.0061,
        "CHF" => market_cap_raw * 1.05,
        _ => market_cap_raw * 0.92,
    };
    market_cap_eur
 }
 async fn check_recent_price_activity(
    yahoo_pool: &Arc<YahooClientPool>,
    ticker: &str,
 ) -> anyhow::Result<bool> {
    let now = Utc::now().timestamp();
    let one_year_ago = now - (365 * 24 * 60 * 60);
    let sixty_days_ago = now - (60 * 24 * 60 * 60);
    let chart_data = yahoo_pool.get_chart_data(
        ticker,
        "1d",
        sixty_days_ago,
        now,
    ).await?;
    if chart_data.quotes.is_empty() {
        return Ok(false);
    }
    let most_recent_timestamp = chart_data.quotes
        .iter()
        .map(|q| q.timestamp)
        .max()
        .unwrap_or(0);
    Ok(most_recent_timestamp >= one_year_ago)
 }
 async fn save_company_core_data(
    paths: &DataPaths,
    company_name: &str,
    summary: &crate::scraper::yahoo::QuoteSummary,
 ) -> anyhow::Result<()> {
    use tokio::fs;
    let safe_name = sanitize_company_name(company_name);
    let company_dir = paths.corporate_dir().join(&safe_name).join("core");
    fs::create_dir_all(&company_dir).await?;
    let data_path = company_dir.join("data.jsonl");
    let json_line = serde_json::to_string(summary)?;
    let mut file = fs::File::create(&data_path).await?;
    file.write_all(json_line.as_bytes()).await?;
    file.write_all(b"\n").await?;
    file.flush().await?;
    Ok(())
 }
 /// Check if a company needs processing (validation check)
 fn company_needs_processing(
    company: &CompanyData,
    existing_companies: &HashMap<String, CompanyData>,
 ) -> bool {
    // If company exists in cleaned output, skip it
    !existing_companies.contains_key(&company.name)
 }
--- a/src/corporate/update_companies_enrich.rs
+++ b/src/corporate/update_companies_enrich.rs
--- a/src/corporate/update_openfigi.rs
+++ b/src/corporate/update_openfigi.rs
--- a/src/corporate/update_parallel.rs
+++ b/src/corporate/update_parallel.rs
@@ -1,578 +0,0 @@
 // src/corporate/update_parallel.rs - UPDATED WITH DATA INTEGRITY FIXES
 // PARALLELIZED VERSION with atomic commits and validation
 // 
 // Key improvements over original:
 // - Page validation to prevent stale content extraction
 // - Shutdown-aware task processing
 // - Better error recovery with browser state cleanup
 // - All original fsync and checkpoint logic preserved
 use super::{types::*, yahoo::*, helpers::*};
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;
 use rand::Rng;
 use tokio::sync::mpsc;
 use tokio::io::AsyncWriteExt;
 use tokio::fs::OpenOptions;
 use tokio::time::sleep;
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::time::Duration;
 use futures::stream::{FuturesUnordered, StreamExt};
 use anyhow::{anyhow, Context, Result};
 /// Represents a write command to be serialized through the log writer
 enum LogCommand {
    Write(CompanyCrossPlatformInfo),
    Checkpoint,
    Shutdown,
 }
 /// Result from processing a single company
 struct CompanyProcessResult {
    company: CompanyCrossPlatformInfo,
    is_update: bool,
 }
 /// UPDATED: Abort-safe incremental JSONL persistence with validation
 /// 
 /// New safety features:
 /// - Page validation before extraction
 /// - Shutdown checks at all critical points
 /// - Browser state cleanup on errors
 /// - All writes still atomic with fsync
 pub async fn build_companies_jsonl_streaming_parallel(
    paths: &DataPaths, 
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<usize> {
    // Configuration constants
    const CHECKPOINT_INTERVAL: usize = 50;
    const FSYNC_BATCH_SIZE: usize = 10;
    const FSYNC_INTERVAL_SECS: u64 = 10;
    const CONCURRENCY_LIMIT: usize = 100;
    let path = DataPaths::new(".")?;
    let corporate_path = path.data_dir().join("corporate").join("by_name");
    let securities_path = corporate_path.join("common_stocks.json");
    if !securities_path.exists() {
        logger::log_warn("No common_stocks.json found").await;
        return Ok(0);
    }
    let content = tokio::fs::read_to_string(securities_path).await?;
    let securities: HashMap<String, CompanyInfo> = serde_json::from_str(&content)?;
    let companies_path = paths.data_dir().join("companies.jsonl");
    let log_path = paths.data_dir().join("companies_updates.log");
    if let Some(parent) = companies_path.parent() {
        tokio::fs::create_dir_all(parent).await?;
    }
    // === RECOVERY PHASE: Load checkpoint + replay log ===
    let mut existing_companies: HashMap<String, CompanyCrossPlatformInfo> = HashMap::new();
    let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
    if companies_path.exists() {
        logger::log_info("Loading checkpoint from companies.jsonl...").await;
        let existing_content = tokio::fs::read_to_string(&companies_path).await?;
        for line in existing_content.lines() {
            if line.trim().is_empty() {
                continue;
            }
            match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
                Ok(company) => {
                    processed_names.insert(company.name.clone());
                    existing_companies.insert(company.name.clone(), company);
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
                }
            }
        }
        logger::log_info(&format!("Loaded checkpoint with {} companies", existing_companies.len())).await;
    }
    if log_path.exists() {
        logger::log_info("Replaying update log...").await;
        let log_content = tokio::fs::read_to_string(&log_path).await?;
        let mut replayed = 0;
        for line in log_content.lines() {
            if line.trim().is_empty() {
                continue;
            }
            match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
                Ok(company) => {
                    processed_names.insert(company.name.clone());
                    existing_companies.insert(company.name.clone(), company);
                    replayed += 1;
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
                }
            }
        }
        if replayed > 0 {
            logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
        }
    }
    // === SETUP LOG WRITER TASK ===
    let (write_tx, mut write_rx) = mpsc::channel::<LogCommand>(1000);
    let log_file_init = OpenOptions::new()
        .create(true)
        .append(true)
        .open(&log_path)
        .await?;
    let companies_path_clone = companies_path.clone();
    let log_path_clone = log_path.clone();
    let existing_companies_writer = Arc::new(tokio::sync::Mutex::new(existing_companies.clone()));
    let write_tx_for_writer = write_tx.clone();
    let writer_task = tokio::spawn(async move {
        let mut log_file = log_file_init;
        let mut writes_since_fsync = 0;
        let mut last_fsync = std::time::Instant::now();
        let mut updates_since_checkpoint = 0;
        let mut count = 0;
        let mut new_count = 0;
        let mut updated_count = 0;
        while let Some(cmd) = write_rx.recv().await {
            match cmd {
                LogCommand::Write(company) => {
                    // Write to log
                    let line = serde_json::to_string(&company).unwrap();
                    if let Err(e) = log_file.write_all(line.as_bytes()).await {
                        logger::log_error(&format!("Failed to write to log: {}", e)).await;
                        break;
                    }
                    if let Err(e) = log_file.write_all(b"\n").await {
                        logger::log_error(&format!("Failed to write newline: {}", e)).await;
                        break;
                    }
                    writes_since_fsync += 1;
                    updates_since_checkpoint += 1;
                    count += 1;
                    // Update in-memory state
                    let mut existing_companies = existing_companies_writer.lock().await;
                    let is_update = existing_companies.contains_key(&company.name);
                    existing_companies.insert(company.name.clone(), company);
                    drop(existing_companies);
                    if is_update {
                        updated_count += 1;
                    } else {
                        new_count += 1;
                    }
                    // Batched + time-based fsync
                    let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE 
                        || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
                    if should_fsync {
                        if let Err(e) = log_file.flush().await {
                            logger::log_error(&format!("Failed to flush: {}", e)).await;
                            break;
                        }
                        if let Err(e) = log_file.sync_data().await {
                            logger::log_error(&format!("Failed to fsync: {}", e)).await;
                            break;
                        }
                        writes_since_fsync = 0;
                        last_fsync = std::time::Instant::now();
                    }
                }
                LogCommand::Checkpoint => {
                    if let Err(e) = log_file.flush().await {
                        logger::log_error(&format!("Failed to flush before checkpoint: {}", e)).await;
                        break;
                    }
                    if let Err(e) = log_file.sync_data().await {
                        logger::log_error(&format!("Failed to fsync before checkpoint: {}", e)).await;
                        break;
                    }
                    let existing_companies = existing_companies_writer.lock().await;
                    let companies_vec: Vec<_> = existing_companies.values().cloned().collect();
                    drop(existing_companies);
                    let temp_path = companies_path_clone.with_extension("tmp");
                    match tokio::fs::File::create(&temp_path).await {
                        Ok(mut temp_file) => {
                            let mut checkpoint_ok = true;
                            for company in &companies_vec {
                                if let Ok(line) = serde_json::to_string(company) {
                                    if temp_file.write_all(line.as_bytes()).await.is_err() ||
                                       temp_file.write_all(b"\n").await.is_err() {
                                        checkpoint_ok = false;
                                        break;
                                    }
                                }
                            }
                            if checkpoint_ok {
                                if temp_file.flush().await.is_ok() && 
                                   temp_file.sync_data().await.is_ok() {
                                    drop(temp_file);
                                    if tokio::fs::rename(&temp_path, &companies_path_clone).await.is_ok() {
                                        if tokio::fs::remove_file(&log_path_clone).await.is_ok() {
                                            logger::log_info(&format!(
                                                "✓ Checkpoint created ({} companies), log cleared",
                                                companies_vec.len()
                                            )).await;
                                            if let Ok(new_log) = OpenOptions::new()
                                                .create(true)
                                                .append(true)
                                                .open(&log_path_clone)
                                                .await {
                                                log_file = new_log;
                                            }
                                        }
                                    }
                                }
                            }
                        }
                        Err(e) => {
                            logger::log_error(&format!("Failed to create checkpoint temp file: {}", e)).await;
                        }
                    }
                    updates_since_checkpoint = 0;
                }
                LogCommand::Shutdown => {
                    logger::log_info("Writer shutting down...").await;
                    break;
                }
            }
            // Periodic checkpoint trigger
            if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
                let _ = write_tx.send(LogCommand::Checkpoint).await;
            }
        }
        // Final fsync
        let _ = log_file.flush().await;
        let _ = log_file.sync_data().await;
        logger::log_info(&format!(
            "Writer finished: {} total ({} new, {} updated)",
            count, new_count, updated_count
        )).await;
        (count, new_count, updated_count)
    });
    // === PARALLEL PROCESSING PHASE ===
    logger::log_info(&format!(
        "Starting parallel processing of {} companies (concurrency limit: {})",
        securities.len(),
        CONCURRENCY_LIMIT
    )).await;
    let mut processing_tasks = FuturesUnordered::new();
    let mut processed = 0;
    let total = securities.len();
    for (name, company_info) in securities.into_iter() {
        // Check shutdown before creating new tasks
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn("Shutdown detected, stopping task creation").await;
            break;
        }
        // Wait if we hit concurrency limit
        while processing_tasks.len() >= CONCURRENCY_LIMIT {
            if let Some(result) = processing_tasks.next().await {
                match result {
                    Ok(Ok(Some(company_result))) => {
                        let company_result: CompanyProcessResult = company_result;
                        let _ = write_tx_for_writer.send(LogCommand::Write(company_result.company)).await?;
                        processed += 1;
                    }
                    Ok(Ok(None)) => {
                        processed += 1;
                    }
                    Ok(Err(e)) => {
                        logger::log_warn(&format!("Company processing error: {}", e)).await;
                        processed += 1;
                    }
                    Err(e) => {
                        logger::log_error(&format!("Task panic: {}", e)).await;
                        processed += 1;
                    }
                }
            }
            if shutdown_flag.load(Ordering::SeqCst) {
                break;
            }
        }
        if shutdown_flag.load(Ordering::SeqCst) {
            break;
        }
        // Spawn new task
        let pool = pool.clone();
        let shutdown_flag = shutdown_flag.clone();
        let existing_entry = existing_companies.get(&name).cloned();
        let task = tokio::spawn(async move {
            process_single_company_validated(
                name,
                company_info,
                existing_entry,
                &pool,
                &shutdown_flag
            ).await
        });
        processing_tasks.push(task);
        if processed % 10 == 0 && processed > 0 {
            logger::log_info(&format!("Progress: {}/{} companies processed", processed, total)).await;
        }
    }
    // Wait for remaining tasks
    logger::log_info(&format!(
        "Waiting for {} remaining tasks to complete...",
        processing_tasks.len()
    )).await;
    while let Some(result) = processing_tasks.next().await {
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn("Shutdown detected during final task wait").await;
            break;
        }
        match result {
            Ok(Ok(Some(company_result))) => {
                if write_tx_for_writer.send(LogCommand::Write(company_result.company)).await.is_err() {
                    logger::log_error("Writer task died").await;
                    break;
                }
                processed += 1;
            }
            Ok(Ok(None)) => {
                processed += 1;
            }
            Ok(Err(e)) => {
                logger::log_warn(&format!("Company processing error: {}", e)).await;
                processed += 1;
            }
            Err(e) => {
                logger::log_error(&format!("Task panic: {}", e)).await;
                processed += 1;
            }
        }
    }
    // Signal writer to finish
    let _ = write_tx_for_writer.send(LogCommand::Checkpoint).await;
    let _ = write_tx_for_writer.send(LogCommand::Shutdown).await;
    drop(write_tx_for_writer);
    // Wait for writer to finish
    let (final_count, final_new, final_updated) = writer_task.await
        .unwrap_or((0, 0, 0));
    logger::log_info(&format!(
        "Completed: {} total companies ({} new, {} updated)", 
        final_count, final_new, final_updated
    )).await;
    Ok(final_count)
 }
 /// Scrape with retry, validation, and shutdown awareness
 async fn scrape_with_retry(
    pool: &Arc<ChromeDriverPool>,
    isin: &str,
    max_retries: u32,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> Result<Option<YahooCompanyDetails>> {
    let mut retries = 0;
    loop {
        // Check shutdown before each attempt
        if shutdown_flag.load(Ordering::SeqCst) {
            return Err(anyhow!("Aborted due to shutdown"));
        }
        match scrape_company_details_by_isin(pool, isin, shutdown_flag).await {
            Ok(result) => return Ok(result),
            Err(e) => {
                if retries >= max_retries {
                    logger::log_error(&format!(
                        "All {} retries exhausted for ISIN {}: {}",
                        max_retries, isin, e
                    )).await;
                    return Err(e);
                }
                let backoff_ms = 1000 * 2u64.pow(retries);
                let jitter_ms = random_range(0, 500);
                let total_delay = backoff_ms + jitter_ms;
                logger::log_warn(&format!(
                    "Retry {}/{} for ISIN {} after {}ms: {}",
                    retries + 1, max_retries, isin, total_delay, e
                )).await;
                sleep(Duration::from_millis(total_delay)).await;
                retries += 1;
            }
        }
    }
 }
 /// UPDATED: Process single company with validation and shutdown checks
 async fn process_single_company_validated(
    name: String,
    company_info: CompanyInfo,
    existing_entry: Option<CompanyCrossPlatformInfo>,
    pool: &Arc<ChromeDriverPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<Option<CompanyProcessResult>> {
    // Check shutdown at start
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn(&format!("Shutdown detected, skipping company: {}", name)).await;
        return Ok(None);
    }
    let is_update = existing_entry.is_some();
    let mut isin_tickers_map: HashMap<String, Vec<String>> = 
        existing_entry
            .as_ref()
            .map(|e| e.isin_tickers_map.clone())
            .unwrap_or_default();
    let mut sector = existing_entry.as_ref().and_then(|e| e.sector.clone());
    let mut exchange = existing_entry.as_ref().and_then(|e| e.exchange.clone());
    // Collect unique ISIN-ticker pairs
    let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
    for figi_infos in company_info.securities.values() {
        for figi_info in figi_infos {
            if !figi_info.isin.is_empty() {
                let tickers = unique_isin_ticker_pairs
                    .entry(figi_info.isin.clone())
                    .or_insert_with(Vec::new);
                if !figi_info.ticker.is_empty() && !tickers.contains(&figi_info.ticker) {
                    tickers.push(figi_info.ticker.clone());
                }
            }
        }
    }
    // Process each ISIN with validation
    for (isin, figi_tickers) in unique_isin_ticker_pairs {
        // Check shutdown before each ISIN
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn(&format!(
                "Shutdown detected while processing company: {}",
                name
            )).await;
            break;
        }
        let tickers = isin_tickers_map
            .entry(isin.clone())
            .or_insert_with(Vec::new);
        for figi_ticker in figi_tickers {
            if !tickers.contains(&figi_ticker) {
                tickers.push(figi_ticker);
            }
        }
        let has_yahoo_ticker = tickers.iter().any(|t| t.starts_with("YAHOO:"));
        if !has_yahoo_ticker {
            logger::log_info(&format!("Fetching Yahoo details for {} (ISIN: {})", name, isin)).await;
            match scrape_with_retry(pool, &isin, 3, shutdown_flag).await {
                Ok(Some(details)) => {
                    logger::log_info(&format!(
                        "✓ Found Yahoo ticker {} for ISIN {} (company: {})",
                        details.ticker, isin, name
                    )).await;
                    tickers.push(format!("YAHOO:{}", details.ticker));
                    if sector.is_none() && details.sector.is_some() {
                        sector = details.sector.clone();
                        logger::log_info(&format!("  Sector: {}", details.sector.as_ref().unwrap())).await;
                    }
                    if exchange.is_none() && details.exchange.is_some() {
                        exchange = details.exchange.clone();
                        logger::log_info(&format!("  Exchange: {}", details.exchange.as_ref().unwrap())).await;
                    }
                },
                Ok(None) => {
                    logger::log_warn(&format!("◯ No search results for ISIN {} (company: {})", isin, name)).await;
                    tickers.push("YAHOO:NO_RESULTS".to_string());
                },
                Err(e) => {
                    if shutdown_flag.load(Ordering::SeqCst) {
                        logger::log_warn(&format!("Shutdown during scrape for ISIN {}", isin)).await;
                        break;
                    }
                    logger::log_warn(&format!(
                        "✗ Yahoo lookup error for ISIN {} (company: {}): {}",
                        isin, name, e
                    )).await;
                    // Continue with next ISIN
                }
            }
        }
    }
    // Final shutdown check before returning result
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn(&format!(
            "Shutdown detected, discarding incomplete result for: {}",
            name
        )).await;
        return Ok(None);
    }
    if !isin_tickers_map.is_empty() {
        let company_entry = CompanyCrossPlatformInfo {
            name: name.clone(),
            isin_tickers_map,
            sector,
            exchange,
        };
        Ok(Some(CompanyProcessResult {
            company: company_entry,
            is_update,
        }))
    } else {
        logger::log_warn(&format!("No ISINs found for company: {}", name)).await;
        Ok(None)
    }
 }
--- a/src/corporate/yahoo_company_extraction.js
+++ b/src/corporate/yahoo_company_extraction.js
@@ -20,14 +20,20 @@
 // Using a wrapper to ensure the result is properly captured
 var extractionResult = (function() {
    try {
-        // Check for "No results found" message using exact selector
+        // Check for "No results found" message using very flexible selector
-        const noDataElement = document.querySelector('#main-content-wrapper > section > div.noData.yf-1omxedn');
+        const noDataElement = document.querySelector('[class*="noData"]') ||
                              document.querySelector('[class*="error"]') ||
                              (document.body.innerText && document.body.innerText.includes('No results'));
        if (noDataElement) {
            return { status: 'no_results', ticker: null, sector: null, exchange: null };
        }
-        // Find the results table using exact selector
+        // Find the results table using most flexible selector possible
-        const table = document.querySelector('#main-content-wrapper > section > section.container.yf-1omxedn > div.tableContainer.yf-1omxedn > div > table');
+        // Try multiple strategies to find the table
        const table = document.querySelector('table') ||
                      document.querySelector('[role="table"]') ||
                      document.querySelector('.table') ||
                      document.querySelector('#main-content-wrapper > section > section[class*="container"] > div[class*="tableContainer"] > div > table');
        if (!table) {
            return { status: 'no_results', ticker: null, sector: null, exchange: null };
        }
--- a/src/corporate/yahoo_company_extraction.rs
+++ b/src/corporate/yahoo_company_extraction.rs
@@ -1,4 +1,4 @@
-// src/corporate/yahoo.rs - UPDATED WITH DATA INTEGRITY FIXES
+// src/corporate/yahoo.rs
 use super::{types::*, helpers::*, page_validation::*};
 use crate::{scraper::webdriver::*, util::{directories::DataPaths}};
 use crate::logger;
@@ -63,18 +63,23 @@ impl YahooTickerResult {
    }
 }
-/// UPDATED: Scrape company details with full validation and shutdown support
+/// Scrape company details with full validation and shutdown support
 pub async fn scrape_company_details_by_isin(
    pool: &Arc<ChromeDriverPool>,
    isin: &str,
    shutdown_flag: &Arc<AtomicBool>,
-) -> anyhow::Result<Option<YahooCompanyDetails>> {
+) -> anyhow::Result<Option<YahooCompanyData>> {
    // Check shutdown before starting
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn(&format!("Shutdown detected, skipping ISIN: {}", isin)).await;
        return Ok(None);
    }
    if pool.should_perform_hard_reset() {
        logger::log_warn("HARD_RESET_REQUIRED detected before starting ISIN scrape").await;
        return Err(anyhow!("HARD_RESET_REQUIRED"));
    }
    let isin_owned = isin.to_string();
    let shutdown_clone = Arc::clone(shutdown_flag);
    let url = format!("https://finance.yahoo.com/lookup/?s={}", isin);
@@ -118,13 +123,20 @@ pub async fn scrape_company_details_by_isin(
                }
            }
-            // Additional content validation
+            // Additional content validation - look for table or noData element anywhere on page
            let page_ready: bool = client
                .execute(
                    r#"
-                    const table = document.querySelector('#main-content-wrapper > section > section.container.yf-1omxedn > div.tableContainer.yf-1omxedn > div > table');
+                    // Try multiple selector strategies
-                    const noData = document.querySelector('#main-content-wrapper > section > div.noData.yf-1omxedn');
+                    const table = document.querySelector('table') ||
-                    return !!(table || noData);
+                                  document.querySelector('[role="table"]') ||
                                  document.querySelector('.table');
                    const noData = document.querySelector('[class*="noData"]') ||
                                   document.querySelector('[class*="error"]') ||
                                   document.body.innerText.includes('No results');
                    const hasContent = !!(table || noData);
                    console.log('Page ready check - table:', !!table, 'noData:', !!noData, 'hasContent:', hasContent);
                    return hasContent;
                    "#,
                    vec![],
                )
@@ -162,7 +174,7 @@ pub async fn scrape_company_details_by_isin(
 async fn extract_company_details_validated(
    client: &Client,
    isin: &str,
-) -> Result<Option<YahooCompanyDetails>> {
+) -> Result<Option<YahooCompanyData>> {
    // Double-check URL is still correct before extraction
    let current_url = client.current_url().await?;
    if !current_url.as_str().contains(isin) {
@@ -197,8 +209,8 @@ async fn extract_company_details_validated(
 pub async fn extract_company_details(
    client: &Client,
    _isin: &str,
-) -> Result<Option<YahooCompanyDetails>> {
+) -> Result<Option<YahooCompanyData>> {
-    // Wait for page to load - look for either the table or the no-data element
+    // Wait for page to load - look for either the table or the no-data element using simple selectors
    let wait_result: Result<Result<bool, anyhow::Error>> = timeout(
        TokioDuration::from_secs(30),
        async {
@@ -206,9 +218,14 @@ pub async fn extract_company_details(
                let has_content: bool = client
                    .execute(
                        r#"
-                        const table = document.querySelector('#main-content-wrapper > section > section.container.yf-1omxedn > div.tableContainer.yf-1omxedn > div > table');
+                        // Use flexible selectors that don't depend on exact DOM structure
-                        const noData = document.querySelector('#main-content-wrapper > section > div.noData.yf-1omxedn');
+                        const table = document.querySelector('table') ||
-                        return !!(table || noData);
+                                      document.querySelector('[role="table"]') ||
                                      document.querySelector('.table');
                        const noData = document.querySelector('[class*="noData"]') ||
                                       document.querySelector('[class*="error"]');
                        const hasContent = !!(table || noData);
                        return hasContent;
                        "#,
                        vec![],
                    )
@@ -274,7 +291,7 @@ pub async fn extract_company_details(
                    )).await;
                }
-                Ok(Some(YahooCompanyDetails {
+                Ok(Some(YahooCompanyData {
                    ticker,
                    sector: extraction.sector,
                    exchange: extraction.exchange,
@@ -298,9 +315,11 @@ pub async fn get_all_tickers_from_companies_jsonl(paths: &DataPaths) -> anyhow::
    let content = tokio::fs::read_to_string(companies_file).await?;
    let mut tickers = Vec::new();
    for line in content.lines() {
-        let company: CompanyCrossPlatformInfo = serde_json::from_str(line)?;
+        let company: CompanyData = serde_json::from_str(line)?;
-        for (_isin, ticker_vec) in company.isin_tickers_map {
+        if let Some(isin_tickers_map) = company.isin_tickers_map {
-            tickers.extend(ticker_vec);
+            for (_isin, ticker_vec) in isin_tickers_map {
                tickers.extend(ticker_vec);
            }
        }
    }
    Ok(tickers)
@@ -309,9 +328,9 @@ pub async fn get_all_tickers_from_companies_jsonl(paths: &DataPaths) -> anyhow::
 pub async fn fetch_earnings_with_pool(
    pool: &Arc<ChromeDriverPool>,
    ticker: &str,
-) -> anyhow::Result<Vec<CompanyEvent>> {
+) -> anyhow::Result<Vec<CompanyEventData>> {
    let ticker = ticker.to_string();
-    let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
+    let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
    let ticker_cloned = ticker.clone();
@@ -324,7 +343,7 @@ pub async fn fetch_earnings_with_pool(
    }).await
 }
-pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEvent>> {
+pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEventData>> {
    // Wait for the table to load
    let table = client
        .wait()
@@ -398,7 +417,7 @@ pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Ve
            None
        };
-        events.push(CompanyEvent {
+        events.push(CompanyEventData {
            ticker: ticker.to_string(),
            date,
            time,
--- a/src/economic/mod.rs
+++ b/src/economic/mod.rs
@@ -2,7 +2,9 @@
 pub mod types;
 pub mod scraper;
 pub mod storage;
 pub mod update;
 pub mod helpers;
 pub mod update;
 pub mod yahoo_update_forex;
 pub use update::run_full_update;
--- a/src/economic/scraper.rs
+++ b/src/economic/scraper.rs
@@ -8,7 +8,30 @@ const EXTRACTION_JS: &str = include_str!("extraction_script.js");
 pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
    client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
    dismiss_overlays(client).await?;
    Ok(())
 }
 pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
    for _ in 0..10 {
        let removed: bool = client
            .execute(
                r#"(() => {
                    const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
                    if (iframe && iframe.parentNode) {
                        iframe.parentNode.removeChild(iframe);
                        return true;
                    }
                    return false;
                })()"#,
                vec![],
            )
            .await?
            .as_bool()
            .unwrap_or(false);
        if removed { break; }
        sleep(Duration::from_millis(500)).await;
    }
    Ok(())
 }
--- a/src/economic/storage.rs
+++ b/src/economic/storage.rs
@@ -8,7 +8,6 @@ use chrono::{NaiveDate, Datelike};
 use std::collections::HashMap;
 use serde_json;
 const CHUNK_SIZE: usize = 500; // Process 500 events at a time
 const MAX_EVENTS_PER_FILE: usize = 3000;
 pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
--- a/src/economic/update.rs
+++ b/src/economic/update.rs
@@ -1,12 +1,13 @@
 // src/economic/update.rs
 use super::{scraper::*, storage::*, helpers::*, types::*};
 use crate::check_shutdown;
 use crate::{config::Config, scraper::webdriver::{ScrapeTask, ChromeDriverPool}, util::directories::DataPaths, util::logger};
 use chrono::{Local};
-use std::sync::Arc;
+use std::sync::{Arc, atomic::{AtomicBool}};
 use std::collections::HashMap;
 /// Runs the full update for economic data using streaming to minimize memory usage
-pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
+pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>, shutdown_flag: &Arc<AtomicBool>) -> anyhow::Result<()> {
    let paths = DataPaths::new(".")?;
    logger::log_info("Economic Update: Initializing...").await;
@@ -14,17 +15,22 @@ pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> a
    let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
    let end_date = config.target_end_date();
    logger::log_info("=== Economic Update ===").await;
    check_shutdown!(shutdown_flag);
    // Step 1: Build lightweight index instead of loading all events
-    logger::log_info("Economic Update: Building event index...").await;
+    logger::log_info("Step 1: Building event index...").await;
    let chunks = scan_existing_chunks(&paths).await?;
    let event_index = build_event_index(&chunks).await?;
-    
+    logger::log_info(&format!("  Economic Update: Indexed {} events from {} chunks", 
    logger::log_info(&format!("Economic Update: Indexed {} events from {} chunks", 
        event_index.len(), chunks.len())).await;
    check_shutdown!(shutdown_flag);
    // Step 2: Determine start date
    let start_date = if event_index.is_empty() {
-        logger::log_warn("Economic Update: No existing events found, starting from config date").await;
+        logger::log_warn("Step 2: No existing events found, starting from config date").await;
        config.economic_start_date.clone()
    } else {
        // Find the latest date in the index
@@ -35,7 +41,7 @@ pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> a
            .unwrap_or(today_str.clone());
        if max_date >= today_str {
-            logger::log_info("Economic Update: Events exist for today, starting from today").await;
+            logger::log_info("  Events exist for today, starting from today").await;
            today_str.clone()
        } else {
            let next = chrono::NaiveDate::parse_from_str(&max_date, "%Y-%m-%d")
@@ -43,34 +49,37 @@ pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> a
                .and_then(|d| d.succ_opt())
                .map(|d| d.format("%Y-%m-%d").to_string())
                .unwrap_or(today_str.clone());
-            logger::log_info(&format!("Economic Update: Resuming from: {}", next)).await;
+            logger::log_info(&format!("  Resuming from: {}", next)).await;
            next
        }
    };
-    logger::log_info(&format!("Economic Update: Scraping events from {} → {}", start_date, end_date)).await;
+    check_shutdown!(shutdown_flag);
    // Step 3: Scrape new events in batches
    logger::log_info(&format!("Step 3: Scraping events from {} → {}", start_date, end_date)).await;
    let new_events = scrape_all_economic_events(&start_date, &end_date, pool).await?;
    logger::log_info(&format!("  Scraped {} new events", new_events.len())).await;
-    logger::log_info(&format!("Economic Update: Scraped {} new events", new_events.len())).await;
+    check_shutdown!(shutdown_flag);
    // Step 4: Process events in streaming fashion
    logger::log_info(&format!("Step 4: Detecting changes")).await;
    let (changes, updated_events) = process_events_streaming(&chunks, &new_events, &today_str).await?;
-    
+    logger::log_info(&format!("  Detected {} changes", changes.len())).await;
    logger::log_info(&format!("Economic Update: Detected {} changes", changes.len())).await;
    if !changes.is_empty() {
-        logger::log_info(&format!("Economic Update: Saving {} changes to log", changes.len())).await;
+        logger::log_info(&format!("  Saving {} changes to log", changes.len())).await;
        save_changes(&paths, &changes).await?;
-        logger::log_info("Economic Update: Changes saved successfully").await;
+        logger::log_info("  Changes saved successfully").await;
    }
-    // Step 5: Save consolidated events
+    check_shutdown!(shutdown_flag);
-    logger::log_info(&format!("Economic Update: Saving {} total events to chunks", updated_events.len())).await;
+
-    save_optimized_chunks(&paths, updated_events).await?;
+    // Step 5: Save consolidated events
    logger::log_info(&format!("Step 5: Saving {} total events to chunks", updated_events.len())).await;
    save_optimized_chunks(&paths, updated_events).await?;
    logger::log_info(&format!("  ✓ Economic update complete — {} changes detected", changes.len())).await;
    logger::log_info(&format!("✓ Economic update complete — {} changes detected", changes.len())).await;
    Ok(())
 }
@@ -183,7 +192,7 @@ pub fn process_batch(
    let mut changes = Vec::new();
    let mut removed = std::collections::HashSet::new();
-    let identity_map = build_identity_lookup(existing);
+    //let identity_map = build_identity_lookup(existing);
    let date_map = build_date_event_lookup(existing);
    for new in new_events {
--- a/src/economic/yahoo_update_forex.rs
+++ b/src/economic/yahoo_update_forex.rs
@@ -0,0 +1,477 @@
 // src/forex/update_forex.rs
 use crate::config::Config;
 use crate::util::directories::DataPaths;
 use crate::util::integrity::{DataStage, StateManager, directory_reference};
 use crate::util::logger;
 use crate::scraper::yahoo::{YahooClientPool};
 use crate::corporate::types::*;
 use std::result::Result::Ok;
 use chrono::{TimeZone, Utc};
 use std::collections::HashSet;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use tokio::fs::{OpenOptions};
 use tokio::io::{AsyncWriteExt};
 use futures::stream::{FuturesUnordered, StreamExt};
 use serde_json::json;
 use tokio::sync::mpsc;
 /// Currency information
 #[derive(Debug, Clone)]
 struct CurrencyPair {
    code: String,           // e.g., "EUR", "JPY"
    name: String,           // e.g., "Euro", "Japanese Yen"
    yahoo_symbol: String,   // e.g., "USDEUR=X", "USDJPY=X"
 }
 impl CurrencyPair {
    fn new(code: &str, name: &str) -> Self {
        Self {
            code: code.to_string(),
            name: name.to_string(),
            yahoo_symbol: format!("USD{}=X", code),
        }
    }
 }
 /// Get list of currency pairs to fetch (USD as base currency)
 fn get_currency_pairs() -> Vec<CurrencyPair> {
    vec![
        CurrencyPair::new("EUR", "Euro"),
        CurrencyPair::new("TRY", "Turkish Lira"),
        CurrencyPair::new("CHF", "Swiss Franc"),
        CurrencyPair::new("SEK", "Swedish Krona"),
        CurrencyPair::new("TWD", "New Taiwan Dollar"),
        CurrencyPair::new("AUD", "Australian Dollar"),
        CurrencyPair::new("GBP", "British Pound"),  // Fixed: GBp -> GBP
        CurrencyPair::new("NOK", "Norwegian Krone"),
        CurrencyPair::new("CAD", "Canadian Dollar"),
        CurrencyPair::new("CZK", "Czech Koruna"),
        CurrencyPair::new("SGD", "Singapore Dollar"),
        CurrencyPair::new("ISK", "Icelandic Króna"),
        CurrencyPair::new("ZAR", "South African Rand"),  // Fixed: ZAc -> ZAR
        CurrencyPair::new("JPY", "Japanese Yen"),
        CurrencyPair::new("PLN", "Polish Złoty"),
        CurrencyPair::new("DKK", "Danish Krone"),
        CurrencyPair::new("HKD", "Hong Kong Dollar"),
        CurrencyPair::new("ILS", "Israeli Shekel"),  // Fixed: ILA -> ILS
        CurrencyPair::new("RON", "Romanian Leu"),
        CurrencyPair::new("KWD", "Kuwaiti Dinar"),   // Fixed: KWF -> KWD
    ]
 }
 /// Yahoo Collect Foreign Exchange Charts WITH ABORT-SAFE INCREMENTAL PERSISTENCE
 /// 
 /// # Features
 /// - Graceful shutdown (abort-safe)
 /// - Task panic isolation (tasks fail independently)
 /// - Crash-safe persistence (checkpoint + log with fsync)
 /// - Smart skip logic (only process incomplete data)
 /// - Uses pending queue instead of retry mechanism
 /// 
 /// # Persistence Strategy
 /// - Checkpoint: fx_rates_collected.jsonl (atomic state)
 /// - Log: fx_rates_updates.log (append-only updates)
 /// - On restart: Load checkpoint + replay log
 /// - Periodic checkpoints (every 10 currencies)
 /// - Batched fsync (every 5 writes or 10 seconds)
 pub async fn collect_fx_rates(
    paths: &DataPaths,
    _config: &Config,
    yahoo_pool: Arc<YahooClientPool>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<usize> {
    // Configuration constants
    const CHECKPOINT_INTERVAL: usize = 10;
    const FSYNC_BATCH_SIZE: usize = 5;
    const FSYNC_INTERVAL_SECS: u64 = 10;
    const CONCURRENCY_LIMIT: usize = 10; // Limit parallel fetch tasks
    let data_path = paths.data_dir();
    // File paths
    let output_path = data_path.join("economic").join("currency");
    let log_path = data_path.join("fx_rates_updates.log");
    let manager = StateManager::new(paths.integrity_dir()).await?;
    let step_name = "yahoo_fx_rate_collection_completed";
    let content_reference = directory_reference(&output_path,
        Some(vec![
            "*/chart/*.jsonl".to_string(),      // Main pattern for events data
            "*/chart/data.jsonl".to_string(),   // Specific pattern (more precise)
        ]),
        Some(vec![
            "*.log".to_string(),                 // Exclude log files
            "*.tmp".to_string(),                 // Exclude temp files
            "*.bak".to_string(),                 // Exclude backup files
        ]),
    );
    if manager.is_step_valid(step_name).await? {
        logger::log_info("  FX rates collection already completed").await;
        let count = count_collected_currencies(paths).await?;
        logger::log_info(&format!("  ✓ Found {} currencies with chart data", count)).await;
        return Ok(count);
    }
    let entry = manager.create_entry(
        step_name.to_string(),
        content_reference.clone(),
        DataStage::Data,
    ).await?;
    logger::log_info("  Updating missing forex data...").await;
    // === RECOVERY PHASE: Track collected currencies ===
    let mut collected_currencies: HashSet<String> = HashSet::new();
    if log_path.exists() {
        logger::log_info("Loading FX rates collection progress from log...").await;
        let log_content = tokio::fs::read_to_string(&log_path).await?;
        for line in log_content.lines() {
            if line.trim().is_empty() || !line.ends_with('}') {
                continue; // Skip incomplete lines
            }
            match serde_json::from_str::<serde_json::Value>(line) {
                Ok(entry) => {
                    if let Some(code) = entry.get("currency_code").and_then(|v| v.as_str()) {
                        if entry.get("status").and_then(|v| v.as_str()) == Some("collected") {
                            collected_currencies.insert(code.to_string());
                        }
                    }
                }
                Err(e) => {
                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
                }
            }
        }
        logger::log_info(&format!("Loaded {} collected currencies from log", collected_currencies.len())).await;
    }
    // Get all currency pairs
    let currency_pairs = get_currency_pairs();
    let total_currencies = currency_pairs.len();
    logger::log_info(&format!("Found {} currency pairs to collect", total_currencies)).await;
    // Filter currencies that need collection
    let pending_pairs: Vec<CurrencyPair> = currency_pairs
        .into_iter()
        .filter(|pair| !collected_currencies.contains(&pair.code))
        .collect();
    let pending_count = pending_pairs.len();
    logger::log_info(&format!(
        "  {} already collected, {} pending",
        collected_currencies.len(),
        pending_count
    )).await;
    if pending_count == 0 {
        logger::log_info("  ✓ All currencies already collected").await;
        manager.mark_valid(entry).await?;
        return Ok(collected_currencies.len());
    }
    // === PROCESSING PHASE: Collect FX rates ===
    // Shared counters
    let processed_count = Arc::new(AtomicUsize::new(collected_currencies.len()));
    let success_count = Arc::new(AtomicUsize::new(collected_currencies.len()));
    let failed_count = Arc::new(AtomicUsize::new(0));
    // Log writer channel with batching and fsync
    let (log_tx, mut log_rx) = mpsc::channel::<LogCommand>(1000);
    // Spawn log writer task
    let log_writer_handle = {
        let log_path = log_path.clone();
        let processed_count = Arc::clone(&processed_count);
        let total_currencies = total_currencies;
        tokio::spawn(async move {
            let mut log_file = OpenOptions::new()
                .create(true)
                .append(true)
                .open(&log_path)
                .await
                .expect("Failed to open log file");
            let mut write_count = 0;
            let mut last_fsync = tokio::time::Instant::now();
            while let Some(cmd) = log_rx.recv().await {
                match cmd {
                    LogCommand::Write(entry) => {
                        let json_line = serde_json::to_string(&entry).expect("Serialization failed");
                        log_file.write_all(json_line.as_bytes()).await.expect("Write failed");
                        log_file.write_all(b"\n").await.expect("Write failed");
                        write_count += 1;
                        // Batched fsync
                        if write_count >= FSYNC_BATCH_SIZE 
                            || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS 
                        {
                            log_file.flush().await.expect("Flush failed");
                            log_file.sync_all().await.expect("Fsync failed");
                            write_count = 0;
                            last_fsync = tokio::time::Instant::now();
                        }
                    }
                    LogCommand::Checkpoint => {
                        // Force fsync on checkpoint
                        log_file.flush().await.expect("Flush failed");
                        log_file.sync_all().await.expect("Fsync failed");
                        write_count = 0;
                        last_fsync = tokio::time::Instant::now();
                        let current = processed_count.load(Ordering::SeqCst);
                        logger::log_info(&format!(
                            "  Checkpoint: {}/{} currencies processed",
                            current, total_currencies
                        )).await;
                    }
                    LogCommand::Shutdown => {
                        // Final fsync before shutdown
                        log_file.flush().await.expect("Flush failed");
                        log_file.sync_all().await.expect("Fsync failed");
                        break;
                    }
                }
            }
        })
    };
    // Process currencies concurrently with task panic isolation
    let mut tasks = FuturesUnordered::new();
    let mut pending_iter = pending_pairs.into_iter();
    let semaphore = Arc::new(tokio::sync::Semaphore::new(CONCURRENCY_LIMIT));
    // Initial batch of tasks
    for _ in 0..CONCURRENCY_LIMIT.min(pending_count) {
        if let Some(pair) = pending_iter.next() {
            let task = spawn_collection_task(
                pair,
                Arc::clone(&yahoo_pool),
                paths.clone(),
                Arc::clone(&processed_count),
                Arc::clone(&success_count),
                Arc::clone(&failed_count),
                log_tx.clone(),
                Arc::clone(&semaphore),
                Arc::clone(shutdown_flag),
            );
            tasks.push(task);
        }
    }
    // Process tasks as they complete and spawn new ones
    let mut checkpoint_counter = 0;
    while let Some(_result) = tasks.next().await {
        // Check for shutdown
        if shutdown_flag.load(Ordering::SeqCst) {
            logger::log_warn("Shutdown signal received, stopping FX collection").await;
            break;
        }
        // Spawn new task if more pending
        if let Some(pair) = pending_iter.next() {
            let task = spawn_collection_task(
                pair,
                Arc::clone(&yahoo_pool),
                paths.clone(),
                Arc::clone(&processed_count),
                Arc::clone(&success_count),
                Arc::clone(&failed_count),
                log_tx.clone(),
                Arc::clone(&semaphore),
                Arc::clone(shutdown_flag),
            );
            tasks.push(task);
        }
        // Periodic checkpoint
        checkpoint_counter += 1;
        if checkpoint_counter % CHECKPOINT_INTERVAL == 0 {
            let _ = log_tx.send(LogCommand::Checkpoint).await;
        }
    }
    // Signal shutdown to log writer
    let _ = log_tx.send(LogCommand::Shutdown).await;
    // Wait for log writer to finish
    let _ = log_writer_handle.await;
    // Final statistics
    let final_success = success_count.load(Ordering::SeqCst);
    let final_failed = failed_count.load(Ordering::SeqCst);
    logger::log_info(&format!(
        "  FX collection complete: {} succeeded, {} failed",
        final_success, final_failed
    )).await;
    // Mark as complete if not shutdown
    if !shutdown_flag.load(Ordering::SeqCst) {
        manager.mark_valid(entry).await?;    
    }    
    Ok(final_success)
 }
 /// Spawn a collection task with panic isolation
 fn spawn_collection_task(
    pair: CurrencyPair,
    yahoo_pool: Arc<YahooClientPool>,
    paths: DataPaths,
    processed_count: Arc<AtomicUsize>,
    success_count: Arc<AtomicUsize>,
    failed_count: Arc<AtomicUsize>,
    log_tx: mpsc::Sender<LogCommand>,
    semaphore: Arc<tokio::sync::Semaphore>,
    shutdown_flag: Arc<AtomicBool>,
 ) -> tokio::task::JoinHandle<()> {
    tokio::spawn(async move {
        // Acquire semaphore permit
        let _permit = semaphore.acquire().await.expect("Semaphore closed");
        // Check shutdown before processing
        if shutdown_flag.load(Ordering::SeqCst) {
            return;
        }
        // Perform collection (panic-isolated)
        let result = collect_currency_chart(&pair, &yahoo_pool, &paths).await;
        // Update counters
        processed_count.fetch_add(1, Ordering::SeqCst);
        let status = match result {
            Ok(_) => {
                success_count.fetch_add(1, Ordering::SeqCst);
                logger::log_info(&format!(
                    "  ✓ Collected {} ({})",
                    pair.code, pair.name
                )).await;
                "collected"
            }
            Err(e) => {
                failed_count.fetch_add(1, Ordering::SeqCst);
                logger::log_warn(&format!(
                    "  ✗ Failed to collect {} ({}): {}",
                    pair.code, pair.name, e
                )).await;
                "failed"
            }
        };
        // Log result
        let log_entry = json!({
            "currency_code": pair.code,
            "currency_name": pair.name,
            "yahoo_symbol": pair.yahoo_symbol,
            "status": status,
            "timestamp": Utc::now().to_rfc3339(),
        });
        let _ = log_tx.send(LogCommand::Write(log_entry)).await;
    })
 }
 /// Collect chart data for a single currency pair
 async fn collect_currency_chart(
    pair: &CurrencyPair,
    yahoo_pool: &Arc<YahooClientPool>,
    paths: &DataPaths,
 ) -> anyhow::Result<()> {
    // Get historical data from year 2000 to now
    let now = Utc::now().timestamp();
    let start_2000 = Utc
        .with_ymd_and_hms(2000, 1, 1, 0, 0, 0)
        .unwrap()
        .timestamp();
    // Fetch chart data from Yahoo
    let chart_data = yahoo_pool.get_chart_data(
        &pair.yahoo_symbol,
        "1d",  // Daily interval
        start_2000,
        now,
    ).await?;
    // Validate we got data
    if chart_data.quotes.is_empty() {
        return Err(anyhow::anyhow!(
            "No chart data available for {} ({})",
            pair.code,
            pair.yahoo_symbol
        ));
    }
    // Save chart data to currency directory
    save_currency_chart(paths, &pair.code, &chart_data).await?;
    Ok(())
 }
 /// Save currency chart data to filesystem
 async fn save_currency_chart(
    paths: &DataPaths,
    currency_code: &str,
    chart_data: &ChartData,
 ) -> anyhow::Result<()> {
    use tokio::fs;
    // Create directory structure: data/economic/currency/{code}/chart/
    let economic_dir = paths.data_dir().join("economic");
    let currency_dir = economic_dir.join("currency").join(currency_code);
    let chart_dir = currency_dir.join("chart");
    fs::create_dir_all(&chart_dir).await?;
    // Write chart data to data.jsonl
    let data_path = chart_dir.join("data.jsonl");
    let json_line = serde_json::to_string(chart_data)?;
    let mut file = fs::File::create(&data_path).await?;
    file.write_all(json_line.as_bytes()).await?;
    file.write_all(b"\n").await?;
    file.flush().await?;
    file.sync_all().await?; // Ensure data is persisted
    Ok(())
 }
 /// Count collected currencies (currencies with chart data)
 async fn count_collected_currencies(paths: &DataPaths) -> anyhow::Result<usize> {
    let currency_dir = paths.data_dir().join("economic").join("currency");
    if !currency_dir.exists() {
        return Ok(0);
    }
    let mut count = 0;
    let mut entries = tokio::fs::read_dir(&currency_dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            let chart_file = path.join("chart").join("data.jsonl");
            if chart_file.exists() {
                count += 1;
            }
        }
    }
    Ok(count)
 }
 /// Log command enum
 enum LogCommand {
    Write(serde_json::Value),
    Checkpoint,
    Shutdown,
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -15,3 +15,7 @@ pub use monitoring::{init_monitoring, ConfigSnapshot, MonitoringEvent};
 pub use config::Config;
 pub use scraper::webdriver::{ChromeDriverPool, ChromeInstance, ScrapeTask};
 pub use util::logger;
 pub use util::macros;
 pub use scraper::yahoo::{
    YahooClient, YahooClientPool, QuoteSummaryModule, QuoteSummary, SearchResult
 };
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,46 +1,182 @@
-// src/main.rs
+use web_scraper::util::integrity::StateManager;
-
+// src/main.rs - Cleaned up version with extracted helpers
-use web_scraper::{*, scraper, economic, corporate};
+use web_scraper::{*, scraper, corporate};
-
+use crate::check_shutdown;
-use anyhow::Result;
+use anyhow::{Result};
 use web_scraper::config::Config;
 use scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers};
 use scraper::webdriver::ChromeDriverPool;
 use util::directories::DataPaths;
 use util::{logger, opnv};
 use std::fs::{OpenOptions};
 use std::sync::Arc;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::process::Command;
 use std::time::{Duration, Instant};
-#[tokio::main]
+// ============================================================================
-async fn main() -> Result<()> {
+// HELPER FUNCTIONS - Extracted to reduce duplication
-    let output = if cfg!(target_os = "windows") {
+// ============================================================================
-        Command::new("cmd")
+
 /// Start Docker Desktop on Windows
 async fn start_docker_desktop() {
    if cfg!(target_os = "windows") {
        let _ = Command::new("cmd")
            .args(["/C", "docker desktop start"])
-            .output()
+            .output();
-            .expect("failed to execute process")
+    }
-    } else {
+}
        Command::new("sh")
            .arg("-c")
            .arg("echo hello")
            .output()
            .expect("failed to execute process")
    };
    let _start_docker_desktop = output.stdout;
-    cleanup_all_proxy_containers().await.ok();
+/// Shutdown ChromeDriver pool with error handling
 async fn shutdown_chrome_pool(pool: &ChromeDriverPool) {
    logger::log_info("Shutting down ChromeDriver pool...").await;
    match pool.shutdown().await {
        Ok(()) => logger::log_info("✓ ChromeDriver pool shut down successfully").await,
        Err(e) => logger::log_error(&format!("✗ Pool shutdown error: {}", e)).await,
    }
 }
-    let config = match Config::load() {
+/// Shutdown Docker VPN proxy pool with error handling
-        Ok(cfg) => cfg,
+async fn shutdown_proxy_pool(proxy_pool: &DockerVpnProxyPool) {
-        Err(_) => {
+    logger::log_info("Stopping Docker VPN proxy containers...").await;
-            eprintln!("Using default configuration");
+    match proxy_pool.shutdown().await {
-            Config::default()
+        Ok(()) => logger::log_info("✓ All Docker VPN containers stopped").await,
        Err(e) => logger::log_error(&format!("✗ Proxy shutdown error: {}", e)).await,
    }
 }
 /// Force-kill Chrome and ChromeDriver processes (Windows only)
 #[cfg(target_os = "windows")]
 async fn force_kill_chrome_processes() {
    logger::log_info("Force-killing any remaining Chrome processes...").await;
    let _ = tokio::process::Command::new("taskkill")
        .args(["/F", "/IM", "chrome.exe"])
        .output()
        .await;
    let _ = tokio::process::Command::new("taskkill")
        .args(["/F", "/IM", "chromedriver.exe"])
        .output()
        .await;
 }
 #[cfg(not(target_os = "windows"))]
 async fn force_kill_chrome_processes() {
    // No-op on non-Windows platforms
 }
 /// Verify Chrome processes are cleaned up (Windows only)
 #[cfg(target_os = "windows")]
 async fn verify_chrome_cleanup() {
    if let Ok(output) = tokio::process::Command::new("tasklist")
        .args(["/FI", "IMAGENAME eq chrome.exe"])
        .output()
        .await
    {
        let stdout = String::from_utf8_lossy(&output.stdout);
        let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
        if chrome_count > 0 {
            logger::log_warn(&format!("⚠️ {} Chrome processes still running after cleanup!", chrome_count)).await;
        } else {
            logger::log_info("✓ All Chrome processes cleaned up").await;
        }
-    };
+    }
 }
-    let paths = DataPaths::new(".")?;
+#[cfg(not(target_os = "windows"))]
 async fn verify_chrome_cleanup() {
    // No-op on non-Windows platforms
 }
-    // Initialize monitoring system
+/// Complete cleanup sequence: shutdown pools, cleanup containers, kill processes
 async fn perform_full_cleanup(
    pool: &ChromeDriverPool,
    proxy_pool: Option<&DockerVpnProxyPool>,
 ) {
    shutdown_chrome_pool(pool).await;
    if let Some(pp) = proxy_pool {
        shutdown_proxy_pool(pp).await;
        cleanup_all_proxy_containers().await.ok();
    }
    force_kill_chrome_processes().await;
 }
 /// Create temporary ChromeDriver pool, fetch VPN credentials, and cleanup
 async fn fetch_vpn_credentials_with_temp_pool(
    config: &Config,
    paths: &DataPaths,
    monitoring_handle: &monitoring::MonitoringHandle,
 ) -> Result<Option<Arc<DockerVpnProxyPool>>> {
    logger::log_info("VPN Rotation Enabled – Fetching latest VPNBook configs").await;
    // Create temp pool
    logger::log_info("Creating temporary ChromeDriver pool for VPN credential fetch...").await;
    let temp_pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(
        None, 
        config, 
        Some(monitoring_handle.clone())
    ).await?);
    // Fetch credentials
    logger::log_info("Fetching VPNBook credentials...").await;
    let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?;
    logger::log_info(&format!("VPNBook credentials → User: {}", username)).await;
    // Cleanup temp pool
    logger::log_info("Shutting down temporary pool...").await;
    match temp_pool.shutdown().await {
        Ok(()) => logger::log_info("✓ Temporary pool shut down successfully").await,
        Err(e) => {
            logger::log_error(&format!("✗ Temp pool shutdown error: {}", e)).await;
            force_kill_chrome_processes().await;
        }
    }
    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
    // Count VPN servers and create proxy pool
    let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
        .filter(|e| e.as_ref().unwrap().path().is_dir())
        .count();
    if server_count == 0 {
        logger::log_warn("No VPN servers found – continuing without VPN").await;
        return Ok(None);
    }
    logger::log_info(&format!("Found {} VPN servers – starting Docker proxy containers", server_count)).await;
    let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
    let proxy_pool = Arc::new(DockerVpnProxyPool::new(
        paths.cache_openvpn_dir(),
        username,
        password,
        number_proxy_instances
    ).await?);
    logger::log_info(&format!("All {} Docker proxy containers started and ready", proxy_pool.num_proxies())).await;
    // Emit proxy connection events
    for i in 0..proxy_pool.num_proxies() {
        if let Some(proxy_info) = proxy_pool.get_proxy_info(i) {
            monitoring_handle.emit(monitoring::MonitoringEvent::ProxyConnected {
                container_name: proxy_info.container_name.clone(),
                ip_address: proxy_info.ip_address.clone(),
                port: proxy_info.port,
            });
        }
    }
    Ok(Some(proxy_pool))
 }
 /// Initialize monitoring system
 async fn initialize_monitoring(
    config: &Config,
    paths: &DataPaths,
 ) -> Result<(monitoring::MonitoringHandle, tokio::task::JoinHandle<()>)> {
    let config_snapshot = ConfigSnapshot {
        max_parallel_instances: config.max_parallel_instances,
        max_tasks_per_instance: config.max_tasks_per_instance,
@@ -50,13 +186,12 @@ async fn main() -> Result<()> {
        max_retry_attempts: config.max_retry_attempts,
    };
-    let (monitoring_handle, _monitoring_task) = init_monitoring(
+    let (monitoring_handle, monitoring_task) = init_monitoring(
        config_snapshot,
        paths.logs_dir().to_path_buf(),
-        3030, // Dashboard port
+        3030,
    ).await?;
    // Emit pool initialization event
    monitoring_handle.emit(monitoring::MonitoringEvent::PoolInitialized {
        pool_size: config.max_parallel_instances,
        with_proxy: config.enable_vpn_rotation,
@@ -65,129 +200,160 @@ async fn main() -> Result<()> {
    logger::log_info("Monitoring dashboard available at http://localhost:3030").await;
    Ok((monitoring_handle, monitoring_task))
 }
 /// Setup Ctrl+C handler for graceful shutdown
 fn setup_shutdown_handler(
    shutdown_flag: Arc<AtomicBool>,
    pool: Arc<ChromeDriverPool>,
    proxy_pool: Option<Arc<DockerVpnProxyPool>>,
 ) {
    tokio::spawn(async move {
        tokio::signal::ctrl_c().await.ok();
        logger::log_info("Ctrl+C received – shutting down gracefully...").await;
        shutdown_flag.store(true, Ordering::SeqCst);
        tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
        perform_full_cleanup(&pool, proxy_pool.as_deref()).await;
        logger::log_info("Shutdown complete").await;
        std::process::exit(0);
    });
 }
 fn format_duration(duration: Duration) -> String {
    let total_seconds = duration.as_secs();
    let days = total_seconds / 86400;
    let hours = (total_seconds % 86400) / 3600;
    let minutes = (total_seconds % 3600) / 60;
    let seconds = total_seconds % 60;
    format!("{:02}::{:02}::{:02}::{:02}", days, hours, minutes, seconds)
 }
 async fn create_state_file(paths: &DataPaths) -> Result<()> {
    let integrity_path = paths.integrity_dir().join("state.jsonl");
    // Use OpenOptions to create the file only if it doesn't exist
    OpenOptions::new()
        .create(true)  // Create if it doesn't exist
        .write(true)   // Ensure we can write to the file
        .open(&integrity_path)?;
    logger::log_info(&format!("Checked or created file: {}", integrity_path.display())).await;
    Ok(())
 }
 async fn visualize_checkpoint_dependencies(paths: &DataPaths) -> Result<()> {
    // Add more detailed error handling
    match StateManager::new(
        paths.integrity_dir(),
    ).await {
        Ok(manager) => {
            logger::log_info("✓ Dependency configuration loaded successfully").await;
            manager.print_dependency_graph();
            let dot = manager.get_dependency_config().to_dot();
            let dot_path = paths.integrity_dir().join("checkpoint_dependencies.dot");
            std::fs::write(&dot_path, dot)?;
            logger::log_info(&format!("✓ DOT file written to: {}", dot_path.display())).await;
            Ok(())
        }
        Err(e) => {
            logger::log_error(&format!("✗ Failed to load dependency config: {}", e)).await;
            Err(e)
        }
    }
 }
 // ============================================================================
 // MAIN FUNCTION - Simplified with extracted helpers
 // ============================================================================
 #[tokio::main]
 async fn main() -> Result<()> {
    // Initial setup
    let start = Instant::now();
    let paths = DataPaths::new(".")?;
    start_docker_desktop().await;
    cleanup_all_proxy_containers().await.ok();
    create_state_file(&paths).await.ok();
    visualize_checkpoint_dependencies(&paths).await.ok();
    let config = Config::load().unwrap_or_else(|_| {
        eprintln!("Using default configuration");
        Config::default()
    });
    // Initialize monitoring
    let (monitoring_handle, _monitoring_task) = initialize_monitoring(&config, &paths).await?;
    // Initialize debug logger
    logger::init_debug_logger(paths.logs_dir()).await.ok();
-    logger::log_info("=== Event Backtest Engine Started ===").await;
+    logger::log_info("=== Economic Webscraper Started ===").await;
    logger::log_info(&format!(
-        "Config → parallel_instances: {}, task_limit: {} vpn_rotation: {}",
+        "Config → parallel_instances: {}, task_limit: {}, vpn_rotation: {}, proxy_instances_per_certificate: {:?}",
        config.max_parallel_instances,
        config.max_tasks_per_instance,
-        config.enable_vpn_rotation
+        config.enable_vpn_rotation,
        config.proxy_instances_per_certificate
    )).await;
    // Simple shutdown flag
    let shutdown_flag = Arc::new(AtomicBool::new(false));
-    // === Step 1: Fetch VPNBook configs ===
+    // Fetch VPN credentials and setup proxy pool if enabled
-    let proxy_pool: Option<Arc<DockerVpnProxyPool>> = if config.enable_vpn_rotation {
+    let proxy_pool = if config.enable_vpn_rotation {
-        logger::log_info("VPN Rotation Enabled – Fetching latest VPNBook configs").await;
+        fetch_vpn_credentials_with_temp_pool(&config, &paths, &monitoring_handle).await?
        let temp_pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(None, &config, Some(monitoring_handle.clone())).await?);
        let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?;
        logger::log_info(&format!("VPNBook credentials → User: {}", username)).await;
        let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
            .filter(|e| e.as_ref().unwrap().path().is_dir())
            .count();
        if server_count == 0 {
            logger::log_warn("No VPN servers found – continuing without VPN").await;
            None
        } else {
            logger::log_info(&format!("Found {} VPN servers – starting Docker proxy containers", server_count)).await;
            let pp = Arc::new(DockerVpnProxyPool::new(paths.cache_openvpn_dir(), username, password).await?);
            logger::log_info(&format!("All {} Docker proxy containers started and ready", pp.num_proxies())).await;
            for i in 0..pp.num_proxies() {
                if let Some(proxy_info) = pp.get_proxy_info(i) {
                    monitoring_handle.emit(monitoring::MonitoringEvent::ProxyConnected {
                        container_name: proxy_info.container_name.clone(),
                        ip_address: proxy_info.ip_address.clone(),
                        port: proxy_info.port,
                    });
                }
            }
            Some(pp)
        }
    } else {
        logger::log_info("VPN rotation disabled – using direct connection").await;
        None
    };
-    // === Step 2: Initialize ChromeDriver pool ===
+    // Create main ChromeDriver pool
-    let pool_size = config.max_parallel_instances;
+    logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", config.max_parallel_instances)).await;
    let task_limit = config.max_tasks_per_instance;
-    logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", pool_size)).await;
+    let pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(
        proxy_pool.clone(),
        &config,
        Some(monitoring_handle.clone())
    ).await?);
-    let pool = Arc::new(
+    logger::log_info(&format!("ChromeDriver pool ready with {} instances", config.max_parallel_instances)).await;
-        if task_limit > 0 {
+
-            ChromeDriverPool::new_with_proxy_and_task_limit(proxy_pool.clone(), &config, Some(monitoring_handle.clone())).await?
+    // Setup Ctrl+C handler
-        } else {
+    setup_shutdown_handler(
-            ChromeDriverPool::new_with_proxy_and_task_limit(proxy_pool.clone(), &config, Some(monitoring_handle.clone())).await?
+        Arc::clone(&shutdown_flag),
-        }
+        Arc::clone(&pool),
        proxy_pool.clone(),
    );
-    logger::log_info(&format!("ChromeDriver pool ready with {} instances", pool_size)).await;
+    // Run scraping jobs
    check_shutdown!(&shutdown_flag);
    // === Step 3: Ctrl+C handler ===
    {
        let shutdown_flag_clone = Arc::clone(&shutdown_flag);
        let pool_clone = Arc::clone(&pool);
        let proxy_clone = proxy_pool.clone();
        tokio::spawn(async move {
            tokio::signal::ctrl_c().await.ok();
            logger::log_info("Ctrl+C received – shutting down gracefully...").await;
            // Set flag first
            shutdown_flag_clone.store(true, Ordering::SeqCst);
            // Wait a bit for tasks to notice
            tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
            // Cleanup
            if let Err(e) = (&*pool_clone).shutdown().await {
                logger::log_error(&format!("Error during pool shutdown: {}", e)).await;
            }
            if let Some(pp) = proxy_clone {
                if let Err(e) = pp.shutdown().await {
                    logger::log_warn(&format!("Failed to stop Docker containers: {}", e)).await;
                } else {
                    logger::log_info("All Docker VPN containers stopped").await;
                }
            }
            let _ = cleanup_all_proxy_containers().await;
            std::process::exit(0);
        });
    }
    // === Step 4: Run scraping jobs ===
    logger::log_info("--- Starting ECONOMIC data update ---").await;
-    economic::run_full_update(&config, &pool).await?;
+    economic::run_full_update(&config, &pool, &shutdown_flag).await?;
    logger::log_info("Economic update completed").await;
-    if !shutdown_flag.load(Ordering::SeqCst) {
+    check_shutdown!(&shutdown_flag);
        logger::log_info("--- Starting CORPORATE data update ---").await;
        corporate::run_full_update(&config, &pool, &shutdown_flag).await?;
        logger::log_info("Corporate update completed").await;
    }
-    // === Step 5: Final cleanup ===
+    logger::log_info("--- Starting CORPORATE data update ---").await;
-    if !shutdown_flag.load(Ordering::SeqCst) {
+    corporate::run_full_update(&config, &pool, &shutdown_flag).await?;
-        logger::log_info("Shutting down ChromeDriver pool...").await;
+    logger::log_info("Corporate update completed").await;
        pool.shutdown().await?;
-        if let Some(pp) = proxy_pool {
+    check_shutdown!(&shutdown_flag);
            logger::log_info("Stopping Docker VPN proxy containers...").await;
            pp.shutdown().await?;
            cleanup_all_proxy_containers().await.ok();
        }
-        logger::log_info("=== Application finished successfully ===").await;
+    // Final cleanup if not already shutting down
-    }
+    perform_full_cleanup(&pool, proxy_pool.as_deref()).await;
    verify_chrome_cleanup().await;
    logger::log_info(&format!("=== Application finished after {} ===", format_duration(start.elapsed()))).await;    
    logger::log_info("=== Application finished successfully ===").await;    
    Ok(())
 }
--- a/src/monitoring/dashboard.html
+++ b/src/monitoring/dashboard.html
@@ -250,6 +250,35 @@
            text-transform: uppercase;
        }
        /* Yahoo Stats */
        .yahoo-stats-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
            gap: 12px;
            margin-top: 10px;
        }
        .yahoo-stat-box {
            background: #2a3a4a;
            padding: 15px;
            border-radius: 5px;
            text-align: center;
            border-left: 4px solid #FF9800;
        }
        .yahoo-stat-value {
            font-size: 28px;
            font-weight: bold;
            color: #FF9800;
            margin-bottom: 5px;
        }
        .yahoo-stat-label {
            font-size: 11px;
            color: #aaa;
            text-transform: uppercase;
        }
        /* Logs */
        .log-container {
            max-height: 300px;
@@ -339,6 +368,31 @@
        .pulse {
            animation: pulse 2s infinite;
        }
        /* Yahoo Client Box */
        .yahoo-client-box {
            background: #2a3a4a;
            border: 2px solid #FF9800;
            border-radius: 5px;
            padding: 12px;
            display: flex;
            gap: 0;
            overflow: hidden;
        }
        .yahoo-client-side {
            flex: 1;
            padding: 12px;
        }
        .yahoo-client-side.left {
            background: #3a4a5a;
            border-right: 1px solid #555;
        }
        .yahoo-client-side.right {
            background: #2a3a4a;
        }
    </style>
 </head>
 <body>
@@ -363,6 +417,13 @@
        <div class="instance-grid" id="instances"></div>
    </div>
    <!-- Yahoo API Section -->
    <div class="section">
        <div class="section-title">📈 YAHOO API METRICS</div>
        <div class="yahoo-stats-grid" id="yahoo-stats"></div>
        <div class="instance-grid" id="yahoo-clients"></div>
    </div>
    <!-- Global Metrics Section -->
    <div class="section">
        <div class="section-title">📊 GLOBAL METRICS</div>
@@ -432,6 +493,8 @@
            updateConfig(state.config);
            updateInstances(state.instances);
            updateGlobalStats(state.global);
            updateYahooStats(state.global);
            updateYahooClients(state.yahoo_clients);
            updateLogs(state.logs);
        }
@@ -480,6 +543,10 @@
                    ? ((inst.success_count / inst.total_requests) * 100).toFixed(1)
                    : '0.0';
                const yahooSuccessRate = inst.yahoo_requests > 0
                    ? ((inst.yahoo_success / inst.yahoo_requests) * 100).toFixed(1)
                    : '0.0';
                return `
                    <div class="instance-box ${statusClass}">
                        <div class="instance-side">
@@ -511,6 +578,16 @@
                                    ${successRate}%
                                </span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Yahoo Requests</span>
                                <span class="metric-value">${inst.yahoo_requests}</span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Yahoo Rate</span>
                                <span class="metric-value ${yahooSuccessRate < 50 ? 'danger' : yahooSuccessRate < 80 ? 'warning' : ''}">
                                    ${yahooSuccessRate}%
                                </span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Last Activity</span>
                                <span class="metric-value">${inst.last_activity}</span>
@@ -556,6 +633,115 @@
            }).join('');
        }
        function updateYahooStats(global) {
            const container = document.getElementById('yahoo-stats');
            const yahooSuccessRate = global.total_yahoo_requests > 0 
                ? ((global.successful_yahoo_requests / global.total_yahoo_requests) * 100).toFixed(1)
                : '0.0';
            container.innerHTML = `
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${global.total_yahoo_requests || 0}</div>
                    <div class="yahoo-stat-label">Total Requests</div>
                </div>
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${yahooSuccessRate}%</div>
                    <div class="yahoo-stat-label">Success Rate</div>
                </div>
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${global.successful_yahoo_requests || 0}</div>
                    <div class="yahoo-stat-label">Successful</div>
                </div>
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${global.failed_yahoo_requests || 0}</div>
                    <div class="yahoo-stat-label">Failed</div>
                </div>
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${global.yahoo_client_count || 0}</div>
                    <div class="yahoo-stat-label">Active Clients</div>
                </div>
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${global.yahoo_batch_requests || 0}</div>
                    <div class="yahoo-stat-label">Batch Requests</div>
                </div>
                <div class="yahoo-stat-box">
                    <div class="yahoo-stat-value">${global.yahoo_session_renewals || 0}</div>
                    <div class="yahoo-stat-label">Session Renewals</div>
                </div>
            `;
        }
        function updateYahooClients(yahooClients) {
            const container = document.getElementById('yahoo-clients');
            if (!yahooClients || yahooClients.length === 0) {
                container.innerHTML = '<div style="text-align: center; padding: 40px; color: #666;">No Yahoo clients available</div>';
                return;
            }
            container.innerHTML = yahooClients.map(client => {
                const successRate = client.requests_total > 0 
                    ? ((client.requests_successful / client.requests_total) * 100).toFixed(1)
                    : '0.0';
                return `
                    <div class="yahoo-client-box">
                        <div class="yahoo-client-side left">
                            <div class="side-header">
                                📊 Yahoo Client #${client.instance_id}
                                ${client.has_proxy ? '🔗' : '🌐'}
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Total Requests</span>
                                <span class="metric-value">${client.requests_total}</span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Success / Fail</span>
                                <span class="metric-value">${client.requests_successful} / ${client.requests_failed}</span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Success Rate</span>
                                <span class="metric-value ${successRate < 50 ? 'danger' : successRate < 80 ? 'warning' : ''}">
                                    ${successRate}%
                                </span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Current / Max</span>
                                <span class="metric-value ${client.current_requests >= client.max_requests ? 'danger' : ''}">
                                    ${client.current_requests} / ${client.max_requests}
                                </span>
                            </div>
                            <div class="metric-row">
                                <span class="metric-label">Last Activity</span>
                                <span class="metric-value">${client.last_activity}</span>
                            </div>
                        </div>
                        <div class="yahoo-client-side right">
                            ${client.proxy_info ? `
                                <div class="side-header">🔗 ${client.proxy_info.container_name}</div>
                                <div class="metric-row">
                                    <span class="metric-label">IP Address</span>
                                    <span class="metric-value">${client.proxy_info.ip_address}</span>
                                </div>
                                <div class="metric-row">
                                    <span class="metric-label">Port</span>
                                    <span class="metric-value">${client.proxy_info.port}</span>
                                </div>
                                <div class="metric-row">
                                    <span class="metric-label">Status</span>
                                    <span class="metric-value">${client.proxy_info.status}</span>
                                </div>
                            ` : `
                                <div class="no-proxy">
                                    ${client.has_proxy ? '⚠️' : '🌐'}<br>
                                    ${client.has_proxy ? 'Proxy Not Connected' : 'Direct Connection'}
                                </div>
                            `}
                        </div>
                    </div>
                `;
            }).join('');
        }
        function updateGlobalStats(global) {
            const container = document.getElementById('global-stats');
--- a/src/monitoring/events.rs
+++ b/src/monitoring/events.rs
@@ -23,6 +23,11 @@ pub enum MonitoringEvent {
        status: InstanceStatusChange,
    },
    InstanceSelected {
        instance_id: usize,
        half: usize,
    },
    // Task execution
    TaskStarted {
        instance_id: usize,
@@ -87,6 +92,45 @@ pub enum MonitoringEvent {
        reason: String,
    },
    // Yahoo API events
    YahooRequestStarted {
        instance_id: usize,
        endpoint: String,
        symbol: Option<String>,
    },
    YahooRequestCompleted {
        instance_id: usize,
        success: bool,
        duration_ms: u64,
        error: Option<String>,
    },
    YahooBatchRequestStarted {
        count: usize,
        symbols: Vec<String>,
        endpoint: String,
    },
    YahooBatchRequestCompleted {
        successful: usize,
        failed: usize,
        total: usize,
        duration_ms: u64,
    },
    YahooClientCreated {
        instance_id: usize,
        has_proxy: bool,
        max_requests: u32,
    },
    YahooClientReset {
        instance_id: usize,
        previous_requests: u32,
        reason: String,
    },
    // Logging
    LogMessage {
        level: LogLevel,
--- a/src/monitoring/metrics.rs
+++ b/src/monitoring/metrics.rs
@@ -9,6 +9,7 @@ pub struct DashboardState {
    pub config: ConfigSnapshot,
    pub instances: Vec<InstanceMetrics>,
    pub proxies: Vec<ProxyMetrics>,
    pub yahoo_clients: Vec<YahooClientMetrics>,
    pub global: GlobalMetrics,
    pub logs: Vec<LogEntry>,
 }
@@ -38,6 +39,14 @@ pub struct InstanceMetrics {
    pub failure_count: usize,
    pub connected_proxy: Option<ProxyInfo>,
    pub last_activity: String, // Timestamp
    pub yahoo_requests: usize,
    pub yahoo_success: usize,
    pub yahoo_failures: usize,
    pub yahoo_success_rate: f64,
    pub yahoo_current_requests: u32,
    pub yahoo_max_requests: u32,
    pub yahoo_last_endpoint: Option<String>,
    pub yahoo_last_symbol: Option<String>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -75,6 +84,20 @@ pub struct ProxyMetrics {
    pub instances_using: Vec<usize>,
 }
 /// Metrics for a Yahoo client
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct YahooClientMetrics {
    pub instance_id: usize,
    pub requests_total: usize,
    pub requests_successful: usize,
    pub requests_failed: usize,
    pub current_requests: u32,
    pub max_requests: u32,
    pub has_proxy: bool,
    pub last_activity: String,
    pub proxy_info: Option<ProxyInfo>,
 }
 /// Global pool metrics
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct GlobalMetrics {
@@ -88,6 +111,13 @@ pub struct GlobalMetrics {
    pub bot_detection_hits: usize,
    pub proxy_failures: usize,
    pub uptime_seconds: u64,
    pub total_yahoo_requests: usize,
    pub successful_yahoo_requests: usize,
    pub failed_yahoo_requests: usize,
    pub yahoo_success_rate: f64,
    pub yahoo_batch_requests: usize,
    pub yahoo_session_renewals: usize,
    pub yahoo_client_count: usize,
 }
 /// Log entry for display in dashboard
@@ -111,6 +141,7 @@ pub enum LogLevel {
 pub struct MonitoringState {
    pub instances: HashMap<usize, InstanceState>,
    pub proxies: HashMap<String, ProxyState>,
    pub yahoo_clients: HashMap<usize, YahooClientState>,
    pub global: GlobalState,
    pub start_time: Instant,
 }
@@ -128,6 +159,13 @@ pub struct InstanceState {
    pub failure_count: usize,
    pub connected_proxy: Option<ProxyInfo>,
    pub last_activity: Instant,
    pub yahoo_requests: usize,
    pub yahoo_success: usize,
    pub yahoo_failures: usize,
    pub yahoo_current_requests: u32,
    pub yahoo_max_requests: u32,
    pub yahoo_last_endpoint: Option<String>,
    pub yahoo_last_symbol: Option<String>,
 }
 #[derive(Debug, Clone)]
@@ -139,6 +177,19 @@ pub struct ProxyState {
    pub instances_using: Vec<usize>,
 }
 #[derive(Debug, Clone)]
 pub struct YahooClientState {
    pub instance_id: usize,
    pub requests_total: usize,
    pub requests_successful: usize,
    pub requests_failed: usize,
    pub current_requests: u32,
    pub max_requests: u32,
    pub has_proxy: bool,
    pub last_activity: Instant,
    pub proxy_info: Option<ProxyInfo>,
 }
 #[derive(Debug, Clone)]
 pub struct GlobalState {
    pub total_requests: usize,
@@ -149,6 +200,12 @@ pub struct GlobalState {
    pub navigation_timeouts: usize,
    pub bot_detection_hits: usize,
    pub proxy_failures: usize,
    pub total_yahoo_requests: usize,
    pub successful_yahoo_requests: usize,
    pub failed_yahoo_requests: usize,
    pub yahoo_batch_requests: usize,
    pub yahoo_session_renewals: usize,
    pub yahoo_client_count: usize,
 }
 impl MonitoringState {
@@ -156,6 +213,7 @@ impl MonitoringState {
        Self {
            instances: HashMap::new(),
            proxies: HashMap::new(),
            yahoo_clients: HashMap::new(),
            global: GlobalState {
                total_requests: 0,
                successful_requests: 0,
@@ -165,6 +223,12 @@ impl MonitoringState {
                navigation_timeouts: 0,
                bot_detection_hits: 0,
                proxy_failures: 0,
                total_yahoo_requests: 0,
                successful_yahoo_requests: 0,
                failed_yahoo_requests: 0,
                yahoo_batch_requests: 0,
                yahoo_session_renewals: 0,
                yahoo_client_count: 0,
            },
            start_time: Instant::now(),
        }
@@ -175,18 +239,34 @@ impl MonitoringState {
        let instances: Vec<InstanceMetrics> = self
            .instances
            .values()
-            .map(|inst| InstanceMetrics {
+            .map(|inst| {
-                id: inst.id,
+                let yahoo_success_rate = if inst.yahoo_success + inst.yahoo_failures > 0 {
-                status: inst.status.clone(),
+                    (inst.yahoo_success as f64 / (inst.yahoo_success + inst.yahoo_failures) as f64) * 100.0
-                current_task: inst.current_task.clone(),
+                } else {
-                tasks_current_session: inst.tasks_current_session,
+                    0.0
-                tasks_max: inst.tasks_max,
+                };
-                session_requests: inst.session_requests,
+                
-                total_requests: inst.total_requests,
+                InstanceMetrics {
-                success_count: inst.success_count,
+                    id: inst.id,
-                failure_count: inst.failure_count,
+                    status: inst.status.clone(),
-                connected_proxy: inst.connected_proxy.clone(),
+                    current_task: inst.current_task.clone(),
-                last_activity: format_timestamp(inst.last_activity),
+                    tasks_current_session: inst.tasks_current_session,
                    tasks_max: inst.tasks_max,
                    session_requests: inst.session_requests,
                    total_requests: inst.total_requests,
                    success_count: inst.success_count,
                    failure_count: inst.failure_count,
                    connected_proxy: inst.connected_proxy.clone(),
                    last_activity: format_timestamp(inst.last_activity),
                    yahoo_requests: inst.yahoo_requests,
                    yahoo_success: inst.yahoo_success,
                    yahoo_failures: inst.yahoo_failures,
                    yahoo_success_rate,
                    yahoo_current_requests: inst.yahoo_current_requests,
                    yahoo_max_requests: inst.yahoo_max_requests,
                    yahoo_last_endpoint: inst.yahoo_last_endpoint.clone(),
                    yahoo_last_symbol: inst.yahoo_last_symbol.clone(),
                }
            })
            .collect();
@@ -202,12 +282,34 @@ impl MonitoringState {
            })
            .collect();
        let yahoo_clients: Vec<YahooClientMetrics> = self
            .yahoo_clients
            .values()
            .map(|client| YahooClientMetrics {
                instance_id: client.instance_id,
                requests_total: client.requests_total,
                requests_successful: client.requests_successful,
                requests_failed: client.requests_failed,
                current_requests: client.current_requests,
                max_requests: client.max_requests,
                has_proxy: client.has_proxy,
                last_activity: format_timestamp(client.last_activity),
                proxy_info: client.proxy_info.clone(),
            })
            .collect();
        let success_rate = if self.global.total_requests > 0 {
            (self.global.successful_requests as f64 / self.global.total_requests as f64) * 100.0
        } else {
            0.0
        };
        let yahoo_success_rate = if self.global.total_yahoo_requests > 0 {
            (self.global.successful_yahoo_requests as f64 / self.global.total_yahoo_requests as f64) * 100.0
        } else {
            0.0
        };
        let global = GlobalMetrics {
            total_requests: self.global.total_requests,
            successful_requests: self.global.successful_requests,
@@ -219,12 +321,20 @@ impl MonitoringState {
            bot_detection_hits: self.global.bot_detection_hits,
            proxy_failures: self.global.proxy_failures,
            uptime_seconds: self.start_time.elapsed().as_secs(),
            total_yahoo_requests: self.global.total_yahoo_requests,
            successful_yahoo_requests: self.global.successful_yahoo_requests,
            failed_yahoo_requests: self.global.failed_yahoo_requests,
            yahoo_success_rate,
            yahoo_batch_requests: self.global.yahoo_batch_requests,
            yahoo_session_renewals: self.global.yahoo_session_renewals,
            yahoo_client_count: self.global.yahoo_client_count,
        };
        DashboardState {
            config,
            instances,
            proxies,
            yahoo_clients,
            global,
            logs,
        }
@@ -233,7 +343,6 @@ impl MonitoringState {
 fn format_timestamp(instant: Instant) -> String {
    use chrono::Local;
    // This is a placeholder - in real impl we'd track actual wall-clock time
    Local::now().format("%H:%M:%S").to_string()
 }
--- a/src/monitoring/service.rs
+++ b/src/monitoring/service.rs
@@ -76,6 +76,13 @@ impl MonitoringService {
                        failure_count: 0,
                        connected_proxy: proxy.clone(),
                        last_activity: Instant::now(),
                        yahoo_requests: 0,
                        yahoo_success: 0,
                        yahoo_failures: 0,
                        yahoo_current_requests: 0,
                        yahoo_max_requests: 0,
                        yahoo_last_endpoint: None,
                        yahoo_last_symbol: None,
                    },
                );
@@ -107,6 +114,10 @@ impl MonitoringService {
                }
            }
            MonitoringEvent::InstanceSelected { instance_id, half } => {
                self.log_info(format!("Instance #{} selected (half {})", instance_id, half)).await;
            }
            MonitoringEvent::TaskStarted { instance_id, url } => {
                let mut state = self.state.write().await;
                if let Some(inst) = state.instances.get_mut(&instance_id) {
@@ -189,9 +200,9 @@ impl MonitoringService {
                    if let Some(inst) = state.instances.get(&instance_id) {
                        Some(SessionSummary {
                            instance_id,
-                            session_start: "N/A".to_string(), // We'd need to track this
+                            session_start: "N/A".to_string(),
                            session_end: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
-                            duration_seconds: 0, // We'd need to track session start time
+                            duration_seconds: 0,
                            total_requests: old_request_count,
                            successful_requests: inst.success_count,
                            failed_requests: inst.failure_count,
@@ -279,6 +290,154 @@ impl MonitoringService {
                self.log_info(format!("Pool rotation triggered: {}", reason)).await;
            }
            // Yahoo API Events
            MonitoringEvent::YahooRequestStarted { instance_id, endpoint, symbol } => {
                let mut state = self.state.write().await;
                // Update global Yahoo stats
                state.global.total_yahoo_requests += 1;
                // Update instance stats
                if let Some(inst) = state.instances.get_mut(&instance_id) {
                    inst.yahoo_requests += 1;
                    inst.yahoo_current_requests += 1;
                    inst.yahoo_last_endpoint = Some(endpoint.clone());
                    inst.yahoo_last_symbol = symbol.clone();
                    inst.last_activity = Instant::now();
                }
                // Update Yahoo client stats
                if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
                    client.requests_total += 1;
                    client.current_requests += 1;
                    client.last_activity = Instant::now();
                }
                self.log_info(format!(
                    "YahooClient[{}] started request: {} {}",
                    instance_id,
                    endpoint,
                    symbol.unwrap_or_else(|| "search".to_string())
                )).await;
            }
            MonitoringEvent::YahooRequestCompleted { instance_id, success, duration_ms, error } => {
                let mut state = self.state.write().await;
                // Update global Yahoo stats
                if success {
                    state.global.successful_yahoo_requests += 1;
                } else {
                    state.global.failed_yahoo_requests += 1;
                }
                // Update instance stats
                if let Some(inst) = state.instances.get_mut(&instance_id) {
                    inst.yahoo_current_requests = inst.yahoo_current_requests.saturating_sub(1);
                    if success {
                        inst.yahoo_success += 1;
                    } else {
                        inst.yahoo_failures += 1;
                    }
                    inst.last_activity = Instant::now();
                }
                // Update Yahoo client stats
                if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
                    client.current_requests = client.current_requests.saturating_sub(1);
                    if success {
                        client.requests_successful += 1;
                    } else {
                        client.requests_failed += 1;
                    }
                    client.last_activity = Instant::now();
                }
                if success {
                    self.log_info(format!(
                        "YahooClient[{}] completed request in {}ms",
                        instance_id, duration_ms
                    )).await;
                } else {
                    self.log_error(format!(
                        "YahooClient[{}] failed request in {}ms: {}",
                        instance_id,
                        duration_ms,
                        error.unwrap_or_else(|| "unknown error".to_string())
                    )).await;
                }
            }
            MonitoringEvent::YahooBatchRequestStarted { count, symbols, endpoint } => {
                let mut state = self.state.write().await;
                state.global.yahoo_batch_requests += 1;
                self.log_info(format!(
                    "Yahoo batch request started: {} symbols, endpoint: {}",
                    count, endpoint
                )).await;
                if !symbols.is_empty() {
                    self.log_debug(format!(
                        "Batch symbols: {}",
                        symbols.join(", ")
                    )).await;
                }
            }
            MonitoringEvent::YahooBatchRequestCompleted { successful, failed, total, duration_ms } => {
                let success_rate = if total > 0 {
                    (successful as f64 / total as f64) * 100.0
                } else {
                    0.0
                };
                self.log_info(format!(
                    "Yahoo batch completed: {}/{} successful ({:.1}%) in {}ms",
                    successful, total, success_rate, duration_ms
                )).await;
            }
            MonitoringEvent::YahooClientCreated { instance_id, has_proxy, max_requests } => {
                let mut state = self.state.write().await;
                state.global.yahoo_client_count += 1;
                state.yahoo_clients.insert(
                    instance_id,
                    YahooClientState {
                        instance_id,
                        requests_total: 0,
                        requests_successful: 0,
                        requests_failed: 0,
                        current_requests: 0,
                        max_requests,
                        has_proxy,
                        last_activity: Instant::now(),
                        proxy_info: None,
                    },
                );
                self.log_info(format!(
                    "YahooClient[{}] created (proxy: {}, max requests: {})",
                    instance_id, has_proxy, max_requests
                )).await;
            }
            MonitoringEvent::YahooClientReset { instance_id, previous_requests, reason } => {
                let mut state = self.state.write().await;
                state.global.yahoo_session_renewals += 1;
                if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
                    client.current_requests = 0;
                    client.last_activity = Instant::now();
                }
                self.log_info(format!(
                    "YahooClient[{}] reset (had {} requests, reason: {})",
                    instance_id, previous_requests, reason
                )).await;
            }
            MonitoringEvent::LogMessage { level, message } => {
                match level {
                    crate::monitoring::events::LogLevel::Info => self.log_info(message).await,
@@ -313,6 +472,17 @@ impl MonitoringService {
        }).await;
    }
    async fn log_debug(&self, message: String) {
        // Only log debug if DEBUG_LOGGING is enabled
        if std::env::var("DEBUG_LOGGING").is_ok() {
            self.add_log(LogEntry {
                timestamp: Local::now().format("%H:%M:%S").to_string(),
                level: super::metrics::LogLevel::Info,
                message: format!("[DEBUG] {}", message),
            }).await;
        }
    }
    async fn add_log(&self, entry: LogEntry) {
        let mut logs = self.logs.write().await;
        if logs.len() >= MAX_LOGS {
--- a/src/scraper/docker_vpn_proxy.rs
+++ b/src/scraper/docker_vpn_proxy.rs
@@ -1,16 +1,26 @@
 use anyhow::{anyhow, Context, Result};
 use futures::future::join_all;
-use std::{path::{Path, PathBuf}, time::Duration};
+use std::{collections::HashSet, path::{Path, PathBuf}, sync::{Arc, RwLock}, time::Duration};
 use tokio::{process::Command, time::{sleep}};
 use walkdir::WalkDir;
 pub struct DockerVpnProxyPool {
    container_names: Vec<String>,
    proxy_ports: Vec<u16>,  // e.g., [10801, 10802, ...]
    dead_proxies: Arc<RwLock<HashSet<usize>>>,
 }
 impl DockerVpnProxyPool {
-    pub async fn new(ovpn_dir: &Path, username: String, password: String) -> Result<Self> {
+    pub async fn new(
        ovpn_dir: &Path, 
        username: String, 
        password: String,
        instances_per_ovpn: usize,
    ) -> Result<Self> {
        if instances_per_ovpn == 0 {
            return Err(anyhow!("instances_per_ovpn must be at least 1"));
        }
        // Count hostnames (subdirs in ovpn_dir)
        let hostnames: Vec<_> = std::fs::read_dir(ovpn_dir)?
            .filter_map(Result::ok)
@@ -23,14 +33,21 @@ impl DockerVpnProxyPool {
            return Err(anyhow!("No VPN hostnames found in {:?}", ovpn_dir));
        }
-        crate::util::logger::log_info(&format!("Found {} VPN hostnames", num_servers)).await;
+        // Calculate total containers: hostnames × instances_per_ovpn
        let total_containers = num_servers * instances_per_ovpn;
-        let mut container_names = Vec::with_capacity(num_servers);
+        crate::util::logger::log_info(&format!(
-        let mut proxy_ports = Vec::with_capacity(num_servers);
+            "Found {} VPN hostnames × {} instances = {} total containers",
            num_servers, instances_per_ovpn, total_containers
        )).await;
        let mut container_names = Vec::with_capacity(total_containers);
        let mut proxy_ports = Vec::with_capacity(total_containers);
        let base_port: u16 = 10800;
        let mut port_counter = 0u16;
        // === STEP 1: Start ALL containers first ===
-        for (i, hostname) in hostnames.iter().enumerate() {
+        for hostname in hostnames.iter() {
            // Pick tcp443.ovpn if exists, else first .ovpn
            let hostname_dir = ovpn_dir.join(hostname);
            let mut ovpn_path: Option<PathBuf> = None;
@@ -48,48 +65,58 @@ impl DockerVpnProxyPool {
            let ovpn_path = ovpn_path.ok_or_else(|| anyhow!("No .ovpn found for {}", hostname))?;
-            let name = format!("vpn-proxy-{}", i);
+            // Spawn multiple instances for this .ovpn file
-            let port = base_port + i as u16 + 1;
+            for instance_num in 0..instances_per_ovpn {
                let name = format!("vpn-proxy-{}-{}", hostname, instance_num);
                let port = base_port + port_counter + 1;
                port_counter += 1;
-            // Clean up any existing container with the same name
+                // Clean up any existing container with the same name
-            let _ = Command::new("docker")
+                let _ = Command::new("docker")
-                .args(["rm", "-f", &name])
+                    .args(["rm", "-f", &name])
-                .status()
+                    .status()
-                .await;
+                    .await;
-            // Run Docker container
+                // Run Docker container
-            let status = Command::new("docker")
+                let status = Command::new("docker")
-                .args([
+                    .args([
-                    "run", "-d",
+                        "run", "-d",
-                    "--name", &name,
+                        "--name", &name,
-                    "--cap-add=NET_ADMIN",
+                        "--cap-add=NET_ADMIN",
-                    "--device", "/dev/net/tun",
+                        "--device", "/dev/net/tun",
-                    "--sysctl", "net.ipv4.ip_forward=1",
+                        "--sysctl", "net.ipv4.ip_forward=1",
-                    "-v", &format!("{}:/vpn/config.ovpn", ovpn_path.display()),
+                        "-v", &format!("{}:/vpn/config.ovpn", ovpn_path.display()),
-                    "-e", &format!("VPN_USERNAME={}", username),
+                        "-e", &format!("VPN_USERNAME={}", username),
-                    "-e", &format!("VPN_PASSWORD={}", password),
+                        "-e", &format!("VPN_PASSWORD={}", password),
-                    "-p", &format!("{}:1080", port),
+                        "-p", &format!("{}:1080", port),
-                    "rust-vpn-proxy",
+                        "rust-vpn-proxy",
-                ])
+                    ])
-                .status()
+                    .status()
-                .await
+                    .await
-                .context("Failed to run Docker")?;
+                    .context("Failed to run Docker")?;
-            if !status.success() {
+                if !status.success() {
-                return Err(anyhow!("Docker run failed for {}", name));
+                    return Err(anyhow!("Docker run failed for {}", name));
                }
                crate::util::logger::log_info(&format!(
                    "Started container {} on port {} (using {})", 
                    name, port, ovpn_path.file_name().unwrap().to_string_lossy()
                )).await;
                container_names.push(name);
                proxy_ports.push(port);
            }
            crate::util::logger::log_info(&format!("Started container {} on port {} (waiting for VPN...)", name, port)).await;
            container_names.push(name);
            proxy_ports.push(port);
        }
        // Brief pause to let containers start
        sleep(Duration::from_secs(8)).await;
-        crate::util::logger::log_info(&format!("All {} containers started, beginning health checks...", container_names.len())).await;
+        crate::util::logger::log_info(&format!(
            "All {} containers started, beginning health checks...", 
            container_names.len()
        )).await;
-        // === STEP 2: Test ALL proxies in parallel with 10-second intervals ===
+        // === STEP 2: Test ALL proxies in parallel ===
        let results = Self::test_all_proxies_parallel(&container_names, &proxy_ports).await;
        // Filter out failed containers
@@ -100,8 +127,10 @@ impl DockerVpnProxyPool {
        for (i, (container_name, port)) in container_names.into_iter().zip(proxy_ports.into_iter()).enumerate() {
            match &results[i] {
                Ok(Some(ip)) => {
-                    crate::util::logger::log_info(&format!("✓ Container {} on port {} ready with IP: {}", 
+                    crate::util::logger::log_info(&format!(
-                        container_name, port, ip)).await;
+                        "✓ Container {} on port {} ready with IP: {}", 
                        container_name, port, ip
                    )).await;
                    working_containers.push(container_name);
                    working_ports.push(port);
                }
@@ -113,14 +142,15 @@ impl DockerVpnProxyPool {
                        .ok()
                        .and_then(|output| String::from_utf8_lossy(&output.stdout).to_string().into());
-                    crate::util::logger::log_error(&format!("✗ Container {} on port {} ready but IP detection failed. Logs: {:?}", 
+                    crate::util::logger::log_error(&format!(
-                        container_name, port, logs)).await;
+                        "✗ Container {} on port {} ready but IP detection failed. Logs: {:?}", 
                        container_name, port, logs
                    )).await;
                    failed_count += 1;
                    // Clean up failed container
                    let _ = Self::cleanup_container(&container_name).await;
                }
                Err(e) => {
                    // Get container logs to debug
                    let logs = Command::new("docker")
                        .args(["logs", "--tail", "20", &container_name])
                        .output()
@@ -128,8 +158,10 @@ impl DockerVpnProxyPool {
                        .ok()
                        .and_then(|output| String::from_utf8_lossy(&output.stdout).to_string().into());
-                    crate::util::logger::log_error(&format!("✗ Container {} on port {} failed: {}. Logs: {:?}", 
+                    crate::util::logger::log_error(&format!(
-                        container_name, port, e, logs)).await;
+                        "✗ Container {} on port {} failed: {}. Logs: {:?}", 
                        container_name, port, e, logs
                    )).await;
                    failed_count += 1;
                    // Clean up failed container
                    let _ = Self::cleanup_container(&container_name).await;
@@ -138,19 +170,25 @@ impl DockerVpnProxyPool {
        }
        if working_containers.is_empty() {
-            return Err(anyhow!("All {} VPN proxy containers failed to start", num_servers));
+            return Err(anyhow!("All {} VPN proxy containers failed to start", total_containers));
        }
-        crate::util::logger::log_info(&format!("Started {}/{} VPN proxy containers successfully", 
+        crate::util::logger::log_info(&format!(
-            working_containers.len(), num_servers)).await;
+            "Started {}/{} VPN proxy containers successfully ({} hostnames × {} instances)", 
            working_containers.len(), total_containers, num_servers, instances_per_ovpn
        )).await;
        if failed_count > 0 {
-            crate::util::logger::log_warn(&format!("{} containers failed and were cleaned up", failed_count)).await;
+            crate::util::logger::log_warn(&format!(
                "{} containers failed and were cleaned up", 
                failed_count
            )).await;
        }
        Ok(Self {
            container_names: working_containers,
            proxy_ports: working_ports,
            dead_proxies: Arc::new(RwLock::new(HashSet::new())),
        })
    }
@@ -319,7 +357,7 @@ impl DockerVpnProxyPool {
    pub fn get_proxy_url(&self, index: usize) -> String {
        let port = self.proxy_ports[index % self.proxy_ports.len()];
-        format!("socks5://localhost:{}", port)
+        format!("socks5h://localhost:{}", port)
    }
    pub fn num_proxies(&self) -> usize {
@@ -361,6 +399,69 @@ impl DockerVpnProxyPool {
    pub fn get_container_name(&self, index: usize) -> Option<String> {
        self.container_names.get(index).cloned()
    }
    // Get a healthy proxy URL (skips dead proxies)
    pub async fn get_healthy_proxy_url(&self, start_index: usize) -> Option<(usize, String)> {
        let dead = match self.dead_proxies.read() {
            Ok(value) => value,
            Err(_) => return None,
        };
        let total = self.proxy_ports.len();
        // Try up to 'total' proxies starting from start_index
        for attempt in 0..total {
            let index = (start_index + attempt) % total;
            // Skip if dead
            if dead.contains(&index) {
                continue;
            }
            let port = self.proxy_ports[index];
            return Some((index, format!("socks5h://localhost:{}", port)));
        }
        None
    }
    // Mark a proxy as dead
    pub async fn mark_proxy_dead(&self, index: usize) -> Option<bool> {
        // Acquire lock, perform mutation, and get values for logging
        let (port, remaining, total) = {
            let mut dead = match self.dead_proxies.write() {
                Ok(value) => value,
                Err(_) => return None,
            };
            dead.insert(index);
            let port = self.proxy_ports.get(index).copied().unwrap_or(0);
            let remaining = self.proxy_ports.len() - dead.len();
            let total = self.proxy_ports.len();
            // Lock is automatically dropped here when the scope ends
            (port, remaining, total)
        };
        // Now we can await without holding the lock
        crate::util::logger::log_warn(&format!(
            "⚠ Marked proxy {} (port {}) as DEAD ({}/{} proxies remaining)",
            index,
            port,
            remaining,
            total
        )).await;
        Some(true)
    }
    // Get count of healthy proxies
    pub async fn num_healthy_proxies(&self) -> Option<usize> {
        let dead = match self.dead_proxies.read() {
            Ok(value) => value,
            Err(_) => return None,
        };
        Some(self.proxy_ports.len() - dead.len())
    }
 }
 pub async fn cleanup_all_proxy_containers() -> Result<()> {
--- a/src/scraper/hard_reset.rs
+++ b/src/scraper/hard_reset.rs
@@ -0,0 +1,377 @@
 // src/scraper/hard_reset.rs - FIXED: Proper cleanup without Arc leaks
 use std::sync::{Arc, atomic::{AtomicBool, AtomicUsize, Ordering}};
 use crate::{ChromeDriverPool, Config, logger, scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers}, util::directories::DataPaths};
 /// Simple error counter for triggering hard resets
 pub struct HardResetController {
    consecutive_errors: AtomicUsize,
 }
 impl HardResetController {
    pub fn new() -> Self {
        Self {
            consecutive_errors: AtomicUsize::new(0),
        }
    }
    /// Record success - resets counter
    pub fn record_success(&self) {
        self.consecutive_errors.store(0, Ordering::SeqCst);
    }
    /// Record error - returns new count
    pub fn record_error(&self) -> usize {
        self.consecutive_errors.fetch_add(1, Ordering::SeqCst) + 1
    }
    /// Reset counter
    pub fn reset(&self) {
        self.consecutive_errors.store(0, Ordering::SeqCst);
    }
    /// Get current count
    pub fn get_count(&self) -> usize {
        self.consecutive_errors.load(Ordering::SeqCst)
    }
 }
 /// ✅ FIXED: Perform hard reset without Arc reference leaks
 /// 
 /// Key improvements:
 /// 1. Don't clone old_pool - just shutdown through mutex guard
 /// 2. Verify all processes killed before creating new pool
 /// 3. Explicitly shutdown temp pools with error handling
 /// 4. Add process counting/verification
 pub async fn perform_hard_reset(
    pool_mutex: &Arc<tokio::sync::Mutex<Arc<ChromeDriverPool>>>,
    config: &Config,
    paths: &DataPaths,
    monitoring: &Option<crate::monitoring::MonitoringHandle>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<()> {
    //let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
    logger::log_error("🔴 STARTING HARD RESET SEQUENCE").await;
    // Check if shutdown was requested
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn("Shutdown requested during hard reset, aborting").await;
        return Ok(());
    }
    // ===== STEP 1: ACQUIRE POOL LOCK (NO CLONING!) =====
    logger::log_info("  [1/12] Acquiring pool lock...").await;
    let mut pool_guard = pool_mutex.lock().await;
    // Get instance count before shutdown for verification
    let old_instance_count = pool_guard.get_number_of_instances();
    logger::log_info(&format!("  [1/12] Pool has {} instances", old_instance_count)).await;
    // ===== STEP 2: SHUTDOWN OLD POOL (NO ARC CLONE!) =====
    logger::log_info("  [2/12] Shutting down old pool (NO Arc clone)...").await;
    // Shutdown through the Arc without cloning it
    // This is safe because we hold the mutex lock
    match pool_guard.shutdown().await {
        Ok(()) => {
            logger::log_info("  [2/12] ✓ Pool shutdown complete").await;
        }
        Err(e) => {
            logger::log_error(&format!("  [2/12] ✗ Pool shutdown error: {}", e)).await;
            // Continue anyway - we'll force-kill processes
        }
    }
    // ===== STEP 3: FORCE-KILL ANY REMAINING CHROME PROCESSES =====
    logger::log_info("  [3/12] Force-killing any remaining Chrome/ChromeDriver processes...").await;
    #[cfg(target_os = "windows")]
    {
        // Kill all chrome.exe processes
        let chrome_result = tokio::process::Command::new("taskkill")
            .args(["/F", "/IM", "chrome.exe"])
            .output()
            .await;
        match chrome_result {
            Ok(output) if output.status.success() => {
                logger::log_info("  [3/12] ✓ Chrome processes killed").await;
            }
            _ => {
                logger::log_info("  [3/12] ⊘ No Chrome processes found").await;
            }
        }
        // Kill all chromedriver.exe processes
        let chromedriver_result = tokio::process::Command::new("taskkill")
            .args(["/F", "/IM", "chromedriver.exe"])
            .output()
            .await;
        match chromedriver_result {
            Ok(output) if output.status.success() => {
                logger::log_info("  [3/12] ✓ ChromeDriver processes killed").await;
            }
            _ => {
                logger::log_info("  [3/12] ⊘ No ChromeDriver processes found").await;
            }
        }
    }
    #[cfg(not(target_os = "windows"))]
    {
        // Kill all chrome processes
        let _ = tokio::process::Command::new("pkill")
            .arg("chrome")
            .output()
            .await;
        let _ = tokio::process::Command::new("pkill")
            .arg("chromedriver")
            .output()
            .await;
        logger::log_info("  [3/12] ✓ Force-killed Chrome/ChromeDriver").await;
    }
    // ===== STEP 4: SHUTDOWN PROXIES =====
    logger::log_info("  [4/12] Shutting down proxy containers...").await;
    cleanup_all_proxy_containers().await.ok();
    // ===== STEP 5: WAIT FOR CLEANUP =====
    logger::log_info("  [5/12] Waiting 30 seconds for cleanup...").await;
    tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
    // ===== STEP 6: VERIFY CLEANUP =====
    logger::log_info("  [6/12] Verifying process cleanup...").await;
    #[cfg(target_os = "windows")]
    {
        let check_chrome = tokio::process::Command::new("tasklist")
            .args(["/FI", "IMAGENAME eq chrome.exe"])
            .output()
            .await;
        if let Ok(output) = check_chrome {
            let stdout = String::from_utf8_lossy(&output.stdout);
            let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
            if chrome_count > 0 {
                logger::log_warn(&format!("  [6/12] ⚠️  {} Chrome processes still running!", chrome_count)).await;
            } else {
                logger::log_info("  [6/12] ✓ No Chrome processes running").await;
            }
        }
    }
    // Check shutdown again
    if shutdown_flag.load(Ordering::SeqCst) {
        logger::log_warn("Shutdown requested during cleanup, aborting reset").await;
        return Ok(());
    }
    // ===== STEP 7: RECREATE PROXY POOL =====
    logger::log_info("  [7/12] Recreating proxy pool...").await;
    let new_proxy_pool = if config.enable_vpn_rotation {
        match recreate_proxy_pool_with_fresh_credentials(config, paths, monitoring, shutdown_flag).await {
            Ok(pool) => {
                logger::log_info(&format!(
                    "  [7/12] ✓ Proxy pool created with {} proxies",
                    pool.num_proxies()
                )).await;
                Some(pool)
            }
            Err(e) => {
                logger::log_warn(&format!(
                    "  [7/12] ⚠️  Proxy creation failed: {}. Continuing without proxies.",
                    e
                )).await;
                None
            }
        }
    } else {
        logger::log_info("  [7/12] ⊘ VPN rotation disabled, skipping proxy pool").await;
        None
    };
    // ===== STEP 8: RECREATE CHROMEDRIVER POOL =====
    logger::log_info("  [8/12] Recreating ChromeDriver pool...").await;
    let new_pool = Arc::new(
        ChromeDriverPool::new_with_proxy_and_task_limit(
            new_proxy_pool,
            config,
            monitoring.clone(),
        ).await?
    );
    logger::log_info(&format!(
        "  [8/12] ✓ ChromeDriver pool created with {} instances", 
        new_pool.get_number_of_instances()
    )).await;
    // ===== STEP 9: RESET ERROR COUNTER =====
    logger::log_info("  [9/12] Resetting error counter...").await;
    new_pool.get_reset_controller().reset();
    logger::log_info("  [9/12] ✓ Error counter cleared").await;
    // ===== STEP 10: REPLACE POOL ATOMICALLY =====
    logger::log_info("  [10/12] Activating new pool...").await;
    *pool_guard = new_pool;
    drop(pool_guard);
    logger::log_info("  [10/12] ✓ New pool activated").await;
    // ===== STEP 11: EMIT MONITORING EVENT =====
    logger::log_info("  [11/12] Updating monitoring...").await;
    if let Some(mon) = monitoring {
        mon.emit(crate::monitoring::MonitoringEvent::PoolInitialized {
            pool_size: config.max_parallel_instances,
            with_proxy: config.enable_vpn_rotation,
            with_rotation: config.max_tasks_per_instance > 0,
        });
    }
    // ===== STEP 12: FINAL VERIFICATION =====
    logger::log_info("  [12/12] Final verification...").await;
    #[cfg(target_os = "windows")]
    {
        let check_chrome = tokio::process::Command::new("tasklist")
            .args(["/FI", "IMAGENAME eq chrome.exe"])
            .output()
            .await;
        if let Ok(output) = check_chrome {
            let stdout = String::from_utf8_lossy(&output.stdout);
            let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
            logger::log_info(&format!("  [12/12] Chrome processes: {}", chrome_count)).await;
        }
        let check_chromedriver = tokio::process::Command::new("tasklist")
            .args(["/FI", "IMAGENAME eq chromedriver.exe"])
            .output()
            .await;
        if let Ok(output) = check_chromedriver {
            let stdout = String::from_utf8_lossy(&output.stdout);
            let chromedriver_count = stdout.lines().filter(|line| line.contains("chromedriver.exe")).count();
            logger::log_info(&format!("  [12/12] ChromeDriver processes: {}", chromedriver_count)).await;
        }
    }
    logger::log_info("✅ HARD RESET COMPLETE").await;
    Ok(())
 }
 /// ✅ FIXED: Recreate proxy pool with temp pool that's properly shut down
 async fn recreate_proxy_pool_with_fresh_credentials(
    config: &Config,
    paths: &DataPaths,
    monitoring: &Option<crate::monitoring::MonitoringHandle>,
    shutdown_flag: &Arc<AtomicBool>,
 ) -> anyhow::Result<Arc<DockerVpnProxyPool>> {
    let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
    // Check shutdown
    if shutdown_flag.load(Ordering::SeqCst) {
        return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
    }
    logger::log_info("    [7.1] Creating temporary ChromeDriver pool for credential fetch...").await;
    // Create temporary pool WITHOUT proxy
    let temp_pool = Arc::new(
        ChromeDriverPool::new_with_proxy_and_task_limit(
            None,  // No proxy for temp pool
            config,
            monitoring.clone(),
        ).await?
    );
    logger::log_info("    [7.2] Fetching fresh VPNBook credentials...").await;
    // Fetch fresh VPNBook credentials
    let (username, password, _files) = crate::util::opnv::fetch_vpnbook_configs(
        &temp_pool, 
        paths.cache_dir()
    ).await?;
    logger::log_info(&format!("    [7.3] Got credentials → User: {}", username)).await;
    // ✅ FIXED: Properly shutdown temp pool with error handling
    logger::log_info("    [7.4] Shutting down temporary pool...").await;
    match temp_pool.shutdown().await {
        Ok(()) => {
            logger::log_info("    [7.4] ✓ Temp pool shut down successfully").await;
        }
        Err(e) => {
            logger::log_error(&format!("    [7.4] ✗ Temp pool shutdown error: {}", e)).await;
            // Force-kill processes as backup
            #[cfg(target_os = "windows")]
            {
                let _ = tokio::process::Command::new("taskkill")
                    .args(["/F", "/IM", "chrome.exe"])
                    .output()
                    .await;
                let _ = tokio::process::Command::new("taskkill")
                    .args(["/F", "/IM", "chromedriver.exe"])
                    .output()
                    .await;
            }
        }
    }
    // Wait a moment for temp pool cleanup
    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
    // Check shutdown again
    if shutdown_flag.load(Ordering::SeqCst) {
        return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
    }
    // Check if we have VPN server configs
    let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
        .filter(|e| e.as_ref().unwrap().path().is_dir())
        .count();
    if server_count == 0 {
        return Err(anyhow::anyhow!("No VPN servers found after credential fetch"));
    }
    logger::log_info(&format!(
        "    [7.5] Found {} VPN servers → Creating proxy pool with {} instances per server...",
        server_count,
        number_proxy_instances
    )).await;
    // Create new proxy pool
    let proxy_pool = Arc::new(
        DockerVpnProxyPool::new(
            paths.cache_openvpn_dir(),
            username,
            password,
            number_proxy_instances,
        ).await?
    );
    logger::log_info(&format!(
        "    [7.6] ✓ Proxy pool ready with {} total proxies",
        proxy_pool.num_proxies()
    )).await;
    // Emit proxy connected events for monitoring
    if let Some(mon) = monitoring {
        for i in 0..proxy_pool.num_proxies() {
            if let Some(proxy_info) = proxy_pool.get_proxy_info(i) {
                mon.emit(crate::monitoring::MonitoringEvent::ProxyConnected {
                    container_name: proxy_info.container_name.clone(),
                    ip_address: proxy_info.ip_address.clone(),
                    port: proxy_info.port,
                });
            }
        }
    }
    Ok(proxy_pool)
 }
--- a/src/scraper/mod.rs
+++ b/src/scraper/mod.rs
@@ -1,3 +1,6 @@
 pub mod webdriver;
 pub mod docker_vpn_proxy;
 pub mod helpers;
 pub mod hard_reset;
 pub mod yahoo;
 pub mod openfigi;
--- a/src/scraper/openfigi.rs
+++ b/src/scraper/openfigi.rs
@@ -0,0 +1,367 @@
 // src/scraper/openfigi.rs - STREAMING VERSION
 // Key changes: Never load entire GLEIF CSV or FIGI maps into memory
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use crate::corporate::{types::*};
 use reqwest::Client as HttpClient;
 use reqwest::header::{HeaderMap, HeaderValue};
 use serde_json::{json, Value};
 use std::path::Path;
 use tokio::time::{sleep, Duration};
 use tokio::fs as tokio_fs;
 use anyhow::{Context, anyhow};
 #[derive(Clone)]
 pub struct OpenFigiClient {
    pub client: HttpClient,
    pub has_key: bool,
 }
 impl OpenFigiClient {
    pub async fn new() -> anyhow::Result<Self> {
        let api_key = dotenvy::var("OPENFIGI_API_KEY").ok();
        let has_key = api_key.is_some();
        let mut builder = HttpClient::builder()
            .user_agent("Mozilla/5.0 (compatible; OpenFIGI-Rust/1.0)")
            .timeout(Duration::from_secs(30));
        if let Some(key) = &api_key {
            let mut headers = HeaderMap::new();
            headers.insert("X-OPENFIGI-APIKEY", HeaderValue::from_str(key)?);
            builder = builder.default_headers(headers);
        }
        let client = builder.build().context("Failed to build HTTP client")?;
        logger::log_info(&format!("OpenFIGI client: {}", 
            if has_key { "with API key" } else { "no key" })).await;
        Ok(Self { client, has_key })
    }
    pub async fn map_isins_to_figi_infos(&self, isins: &[String]) -> anyhow::Result<Vec<FigiData>> {
        if isins.is_empty() {
            return Ok(vec![]);
        }
        let mut all_figi_infos = Vec::new();
        let chunk_size = if self.has_key { 100 } else { 5 };
        let inter_sleep = if self.has_key {
            Duration::from_millis(240)
        } else {
            Duration::from_millis(2400)
        };
        for chunk in isins.chunks(chunk_size) {
            let jobs: Vec<Value> = chunk.iter()
                .map(|isin| json!({
                    "idType": "ID_ISIN",
                    "idValue": isin,
                }))
                .collect();
            let mut retry_count = 0;
            let max_retries = 5;
            let mut backoff_ms = 1000u64;
            loop {
                let resp_result = self.client
                    .post("https://api.openfigi.com/v3/mapping")
                    .header("Content-Type", "application/json")
                    .json(&jobs)
                    .send()
                    .await;
                let resp = match resp_result {
                    Ok(r) => r,
                    Err(e) => {
                        retry_count += 1;
                        if retry_count >= max_retries {
                            let err_msg = format!("Failed to send mapping request after {} retries: {}", max_retries, e);
                            logger::log_error(&err_msg).await;
                            return Err(anyhow!(err_msg));
                        }
                        let warn_msg = format!("Transient error sending mapping request (attempt {}/{}): {}", retry_count, max_retries, e);
                        logger::log_warn(&warn_msg).await;
                        let retry_msg = format!("  Retrying in {}ms...", backoff_ms);
                        logger::log_info(&retry_msg).await;
                        sleep(Duration::from_millis(backoff_ms)).await;
                        backoff_ms = (backoff_ms * 2).min(60000); // Cap at 60s
                        continue;
                    }
                };
                let status = resp.status();
                let headers = resp.headers().clone();
                let body = resp.text().await?;
                if status == 429 {
                    let reset_sec = headers
                        .get("ratelimit-reset")
                        .and_then(|v| v.to_str().ok())
                        .and_then(|s| s.parse::<u64>().ok())
                        .unwrap_or(10);
                    sleep(Duration::from_secs(reset_sec.max(10))).await;
                    continue;
                } else if !status.is_success() {
                    if status.is_server_error() && retry_count < max_retries {
                        retry_count += 1;
                        sleep(Duration::from_millis(backoff_ms)).await;
                        backoff_ms = (backoff_ms * 2).min(60000);
                        continue;
                    }
                    return Err(anyhow!("OpenFIGI error {}: {}", status, body));
                }
                let results: Vec<Value> = serde_json::from_str(&body)?;
                for (isin, result) in chunk.iter().zip(results) {
                    if let Some(data) = result["data"].as_array() {
                        for item in data {
                            if let Some(figi) = item["figi"].as_str() {
                                all_figi_infos.push(FigiData {
                                    isin: isin.clone(),
                                    figi: figi.to_string(),
                                    name: item["name"].as_str().unwrap_or("").to_string(),
                                    ticker: item["ticker"].as_str().unwrap_or("").to_string(),
                                    exch_code: item["exchCode"].as_str().unwrap_or("").to_string(),
                                    composite_figi: item["compositeFIGI"].as_str().unwrap_or("").to_string(),
                                    security_type: item["securityType"].as_str().unwrap_or("").to_string(),
                                    market_sector: item["marketSector"].as_str().unwrap_or("").to_string(),
                                    share_class_figi: item["shareClassFIGI"].as_str().unwrap_or("").to_string(),
                                    security_type2: item["securityType2"].as_str().unwrap_or("").to_string(),
                                    security_description: item["securityDescription"].as_str().unwrap_or("").to_string(),
                                });
                            }
                        }
                    }
                }
                break;
            }
            sleep(inter_sleep).await;
        }
        Ok(all_figi_infos)
    }
 }
 /// Fetches and caches the list of valid securityType values.
 ///
 /// # Arguments
 /// * `client` - The OpenFIGI client instance.
 /// * `cache_dir` - Directory to save the cached JSON file.
 ///
 /// # Returns
 /// Ok(()) on success.
 ///
 /// # Errors
 /// Returns an error if the API request fails or file I/O fails.
 async fn get_figi_security_type(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
    let cache_file = cache_dir.join("securityType.json");
    if should_use_cache(&cache_file).await? {
        logger::log_info("  Using cached securityType values").await;
        return Ok(());
    }
    logger::log_info("  Fetching securityType values from OpenFIGI API...").await;
    let resp = client.client
        .get("https://api.openfigi.com/v3/mapping/values/securityType")
        .send()
        .await
        .context("Failed to fetch securityType values")?;
    handle_rate_limit(&resp).await?;
    let values: Value = resp.json().await
        .context("Failed to parse securityType response")?;
    let json_str = serde_json::to_string_pretty(&values)?;
    tokio_fs::write(&cache_file, json_str).await
        .context("Failed to write securityType cache")?;
    logger::log_info("  ✓ Cached securityType values").await;
    sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
    Ok(())
 }
 /// Loads all OpenFIGI mapping value lists (marketSecDes, micCode, securityType).
 ///
 /// This function fetches the available values for each mapping parameter from the OpenFIGI API
 /// and caches them as JSON files in `data/openfigi/`. If the files already exist and are recent
 /// (less than 30 days old), they are reused instead of re-fetching.
 ///
 /// # Returns
 /// Ok(()) on success.
 ///
 /// # Errors
 /// Returns an error if API requests fail, JSON parsing fails, or file I/O fails.
 pub async fn load_figi_type_lists(paths: &DataPaths) -> anyhow::Result<()> {
    logger::log_info("Loading OpenFIGI mapping value lists...").await;
    let cache_openfigi_dir = paths.cache_openfigi_dir();
    tokio_fs::create_dir_all(cache_openfigi_dir).await
        .context("Failed to create data/openfigi directory")?;
    let client = OpenFigiClient::new().await?;
    // Fetch each type list
    get_figi_market_sec_des(&client, cache_openfigi_dir).await?;
    get_figi_mic_code(&client, cache_openfigi_dir).await?;
    get_figi_security_type(&client, cache_openfigi_dir).await?;
    logger::log_info("OpenFIGI mapping value lists loaded successfully").await;
    Ok(())
 }
 /// Fetches and caches the list of valid marketSecDes values.
 ///
 /// # Arguments
 /// * `client` - The OpenFIGI client instance.
 /// * `cache_dir` - Directory to save the cached JSON file.
 ///
 /// # Returns
 /// Ok(()) on success.
 ///
 /// # Errors
 /// Returns an error if the API request fails or file I/O fails.
 async fn get_figi_market_sec_des(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
    let cache_file = cache_dir.join("marketSecDes.json");
    // Check if cache exists and is recent (< 30 days old)
    if should_use_cache(&cache_file).await? {
        logger::log_info("  Using cached marketSecDes values").await;
        return Ok(());
    }
    logger::log_info("  Fetching marketSecDes values from OpenFIGI API...").await;
    let resp = client.client
        .get("https://api.openfigi.com/v3/mapping/values/marketSecDes")
        .send()
        .await
        .context("Failed to fetch marketSecDes values")?;
    handle_rate_limit(&resp).await?;
    let values: Value = resp.json().await
        .context("Failed to parse marketSecDes response")?;
    // Save to cache
    let json_str = serde_json::to_string_pretty(&values)?;
    tokio_fs::write(&cache_file, json_str).await
        .context("Failed to write marketSecDes cache")?;
    logger::log_info("  ✓ Cached marketSecDes values").await;
    // Respect rate limits
    sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
    Ok(())
 }
 /// Fetches and caches the list of valid micCode values.
 ///
 /// # Arguments
 /// * `client` - The OpenFIGI client instance.
 /// * `cache_dir` - Directory to save the cached JSON file.
 ///
 /// # Returns
 /// Ok(()) on success.
 ///
 /// # Errors
 /// Returns an error if the API request fails or file I/O fails.
 async fn get_figi_mic_code(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
    let cache_file = cache_dir.join("micCode.json");
    if should_use_cache(&cache_file).await? {
        logger::log_info("  Using cached micCode values").await;
        return Ok(());
    }
    logger::log_info("  Fetching micCode values from OpenFIGI API...").await;
    let resp = client.client
        .get("https://api.openfigi.com/v3/mapping/values/micCode")
        .send()
        .await
        .context("Failed to fetch micCode values")?;
    handle_rate_limit(&resp).await?;
    let values: Value = resp.json().await
        .context("Failed to parse micCode response")?;
    let json_str = serde_json::to_string_pretty(&values)?;
    tokio_fs::write(&cache_file, json_str).await
        .context("Failed to write micCode cache")?;
    logger::log_info("  ✓ Cached micCode values").await;
    sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
    Ok(())
 }
 /// Handles rate limit responses from the OpenFIGI API.
 ///
 /// If a 429 status is received, this function sleeps for the duration specified
 /// in the `ratelimit-reset` header (or 10 seconds by default).
 ///
 /// # Arguments
 /// * `resp` - The HTTP response to check.
 ///
 /// # Returns
 /// Ok(()) if no rate limit, or after waiting for the reset period.
 ///
 /// # Errors
 /// Returns an error if the response status indicates a non-rate-limit error.
 async fn handle_rate_limit(resp: &reqwest::Response) -> anyhow::Result<()> {
    let status = resp.status();
    if status == 429 {
        let headers = resp.headers();
        let reset_sec = headers
            .get("ratelimit-reset")
            .and_then(|v| v.to_str().ok())
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or(10);
        logger::log_info(&format!("  Rate limited—waiting {}s", reset_sec)).await;
        sleep(std::time::Duration::from_secs(reset_sec.max(10))).await;
        return Err(anyhow!("Rate limited, please retry"));
    } else if status.is_client_error() || status.is_server_error() {
        return Err(anyhow!("OpenFIGI API error: {}", status));
    }
    Ok(())
 }
 /// Checks if a cache file exists and is less than 30 days old.
 ///
 /// # Arguments
 /// * `path` - Path to the cache file.
 ///
 /// # Returns
 /// True if the cache should be used, false if it needs refreshing.
 async fn should_use_cache(path: &Path) -> anyhow::Result<bool> {
    if !path.exists() {
        return Ok(false);
    }
    let metadata = tokio_fs::metadata(path).await?;
    let modified = metadata.modified()?;
    let age = modified.elapsed().unwrap_or(std::time::Duration::from_secs(u64::MAX));
    // Cache is valid for 30 days
    Ok(age < std::time::Duration::from_secs(30 * 24 * 60 * 60))
 }
--- a/src/scraper/webdriver.rs
+++ b/src/scraper/webdriver.rs
@@ -1,5 +1,9 @@
 // src/scraper/webdriver.rs
 use super::helpers::*;
 use super::hard_reset::HardResetController;
 use super::docker_vpn_proxy::DockerVpnProxyPool;
 use crate::Config;
 use crate::logger;
 use anyhow::{anyhow, Context, Result};
 use fantoccini::{Client, ClientBuilder};
@@ -13,8 +17,6 @@ use tokio::process::{Child, Command};
 use tokio::task::JoinHandle;
 use tokio::sync::{Mutex, Semaphore};
 use tokio::time::{sleep, timeout, Duration};
 use crate::scraper::docker_vpn_proxy::{DockerVpnProxyPool};
 use crate::Config;
 /// Manages a pool of ChromeDriver instances for parallel scraping with optional VPN binding.
 pub struct ChromeDriverPool {
@@ -31,10 +33,16 @@ pub struct ChromeDriverPool {
    min_request_interval_ms: u64,
    monitoring: Option<crate::monitoring::MonitoringHandle>,
    hard_reset_controller: Arc<HardResetController>,
    config: Arc<Config>,
 }
 impl ChromeDriverPool {
-/// Creates a new pool without any proxy (direct connection).
+    /// When consecutive errors reach this value, execute() will return a special error
    /// that signals the caller to trigger a hard reset
    const HARD_RESET_ERROR_THRESHOLD: usize = 12;
    /// Creates a new pool without any proxy (direct connection).
    pub async fn _new(config: &Config, monitoring: Option<crate::monitoring::MonitoringHandle>,) -> Result<Self> {
        Self::new_with_proxy_and_task_limit(None, config, monitoring).await
    }
@@ -85,6 +93,11 @@ impl ChromeDriverPool {
        // Rotation is enabled when task limiting is active
        let rotation_enabled = task_per_instance_limit > 0;
        let half_size = if rotation_enabled {
            (actual_pool_size + 1) / 2  // Round up for odd numbers
        } else {
            actual_pool_size
        };
        let mut instances = Vec::with_capacity(actual_pool_size);
@@ -105,8 +118,8 @@ impl ChromeDriverPool {
        for i in 0..actual_pool_size {
            // Pass the entire proxy_pool and the index
            let instance = ChromeInstance::new(
-                proxy_pool.clone(),  // Clone the Arc
+                proxy_pool.clone(),
-                i,                   // This instance's proxy index
+                i,
                config,
                monitoring.clone(),
            ).await?;
@@ -144,7 +157,7 @@ impl ChromeDriverPool {
                mon.emit(crate::monitoring::MonitoringEvent::InstanceCreated {
                    instance_id: i,
                    max_tasks: guard.max_tasks_per_instance,
-                    proxy: proxy_info.clone(), // ✅ Now includes actual proxy info
+                    proxy: proxy_info.clone(),
                });
                // Also emit ProxyConnected event if proxy exists
@@ -162,15 +175,21 @@ impl ChromeDriverPool {
        let min_request_interval_ms = config.min_request_interval_ms;
        let hard_reset_controller = Arc::new(HardResetController::new());
        let config_clone = Arc::new(config.clone());
        Ok(Self {
            instances,
-            semaphore: Arc::new(Semaphore::new(actual_pool_size)),
+            semaphore: Arc::new(Semaphore::new(half_size)),
            proxy_pool,
            rotation_enabled,
            next_instance: Arc::new(Mutex::new(0)),
            last_request_time: Arc::new(Mutex::new(Instant::now())),
            min_request_interval_ms,
            monitoring,
            hard_reset_controller,
            config: config_clone,
        })
    }
@@ -188,10 +207,8 @@ impl ChromeDriverPool {
            if elapsed < self.min_request_interval_ms {
                let wait_ms = self.min_request_interval_ms - elapsed;
-                drop(last_time);  // Lock vor Sleep freigeben!
+                drop(last_time);
                sleep(Duration::from_millis(wait_ms)).await;
                let mut last_time = self.last_request_time.lock().await;
                *last_time = Instant::now();
            } else {
@@ -199,12 +216,20 @@ impl ChromeDriverPool {
            }
        }
-        let random_index = random_range(0, self.instances.len() as u64) as usize;
+        let instance = if self.rotation_enabled {
-        // Index-Auswahl (vereinfacht, siehe unten für vollständige Rotation)
+            self.select_instance_with_rotation().await?
        let index = if self.rotation_enabled {
            self.get_rotated_index().await?
        } else {
-            random_index
+            self.select_instance_round_robin().await
        };
        {
            let mut inst = instance.lock().await;
            inst.increment_task_count();
        }
        let index: usize = {
            let instances = &self.instances;
            instances.iter().position(|inst| Arc::ptr_eq(inst, &instance)).unwrap_or(0)
        };
        if let Some(ref mon) = self.monitoring {
@@ -216,15 +241,10 @@ impl ChromeDriverPool {
                instance_id: index,
                status: crate::monitoring::InstanceStatusChange::Active,
            });
-        }
+        };
        let instance = &self.instances[index];
        let mut guard = instance.lock().await;
        // NEU: Session mit automatischer Erneuerung holen!
        let client = guard.get_or_renew_session().await?;
        guard.increment_task_count();
        let (task_count, session_requests) = guard.get_session_stats().await;
        crate::util::logger::log_info(&format!(
@@ -232,17 +252,17 @@ impl ChromeDriverPool {
            index, task_count, guard.max_tasks_per_instance, session_requests
        )).await;
-        drop(guard);  // Lock freigeben vor Navigation
+        drop(guard);
        let start_time = Instant::now();
-        // Navigation mit Timeout
+        // Navigation with timeout
        let navigation_result = timeout(
            Duration::from_secs(60),
            client.goto(&url)
        ).await;
-        match navigation_result {
+        let result = match navigation_result {
            Ok(Ok(_)) => {
                if let Some(ref mon) = self.monitoring {
                    mon.emit(crate::monitoring::MonitoringEvent::TaskCompleted {
@@ -258,14 +278,111 @@ impl ChromeDriverPool {
                }
                crate::util::logger::log_info(&format!("✓ Navigated to {}", url)).await;
-                // Parse-Funktion ausführen
+                // Execute parse function
-                parse(client).await
+                match parse(client).await {
                    Ok(data) => {
                        // SUCCESS: Record and log
                        let prev_count = self.hard_reset_controller.get_count();
                        self.hard_reset_controller.record_success();
                        if prev_count > 0 {
                            logger::log_info(&format!(
                                "✓ Success - reset counter cleared (was: {}/{})", 
                                prev_count,
                                Self::HARD_RESET_ERROR_THRESHOLD
                            )).await;
                        }
                        Ok(data)
                    }
                    Err(e) => {
                        // PARSE ERROR: Record, check threshold, invalidate session
                        let error_count = self.hard_reset_controller.record_error();
                        {
                            let mut inst = instance.lock().await;
                            inst.invalidate_current_session().await;
                        }
                        // Enhanced logging with threshold status
                        let threshold_pct = (error_count as f64 / Self::HARD_RESET_ERROR_THRESHOLD as f64) * 100.0;
                        logger::log_warn(&format!(
                            "Parse error. Reset counter: {}/{} ({:.0}%)", 
                            error_count,
                            Self::HARD_RESET_ERROR_THRESHOLD,
                            threshold_pct
                        )).await;
                        // Check if threshold reached
                        if error_count >= Self::HARD_RESET_ERROR_THRESHOLD {
                            logger::log_error(&format!(
                                "🔴 HARD RESET THRESHOLD REACHED ({}/{})", 
                                error_count,
                                Self::HARD_RESET_ERROR_THRESHOLD
                            )).await;
                            return Err(anyhow!(
                                "HARD_RESET_REQUIRED: Parse failed: {}. Threshold reached ({}/{})",
                                e,
                                error_count,
                                Self::HARD_RESET_ERROR_THRESHOLD
                            ));
                        }
                        Err(anyhow!(
                            "Parse failed: {}. Hard reset at {}/{}", 
                            e, 
                            error_count, 
                            Self::HARD_RESET_ERROR_THRESHOLD
                        ))
                    }
                }
            }
            Ok(Err(e)) => {
                // ❌ NAVIGATION ERROR: Record, check threshold, invalidate session
                crate::util::logger::log_error(&format!("Navigation failed: {}", e)).await;
-                Err(anyhow!("Navigation failed: {}", e))
+
                {
                    let mut inst = instance.lock().await;
                    inst.invalidate_current_session().await;
                }
                let error_count = self.hard_reset_controller.record_error();
                // Enhanced logging
                let threshold_pct = (error_count as f64 / Self::HARD_RESET_ERROR_THRESHOLD as f64) * 100.0;
                logger::log_warn(&format!(
                    "Navigation error. Reset counter: {}/{} ({:.0}%)", 
                    error_count,
                    Self::HARD_RESET_ERROR_THRESHOLD,
                    threshold_pct
                )).await;
                // Check if threshold reached
                if error_count >= Self::HARD_RESET_ERROR_THRESHOLD {
                    logger::log_error(&format!(
                        "🔴 HARD RESET THRESHOLD REACHED ({}/{})", 
                        error_count,
                        Self::HARD_RESET_ERROR_THRESHOLD
                    )).await;
                    return Err(anyhow!(
                        "HARD_RESET_REQUIRED: Navigation failed: {}. Threshold reached ({}/{})",
                        e,
                        error_count,
                        Self::HARD_RESET_ERROR_THRESHOLD
                    ));
                }
                Err(anyhow!(
                    "Navigation failed: {}. Hard reset at {}/{}", 
                    e, 
                    error_count, 
                    Self::HARD_RESET_ERROR_THRESHOLD
                ))
            }
            Err(_) => {
                // ❌ TIMEOUT ERROR: Record, check threshold, invalidate session
                if let Some(ref mon) = self.monitoring {
                    mon.emit(crate::monitoring::MonitoringEvent::NavigationTimeout {
                        instance_id: index,
@@ -273,69 +390,178 @@ impl ChromeDriverPool {
                    });
                }
                let error_count = self.hard_reset_controller.record_error();
                crate::util::logger::log_error("Navigation timeout (60s)").await;
-                Err(anyhow!("Navigation timeout"))
+
                {
                    let mut inst = instance.lock().await;
                    inst.invalidate_current_session().await;
                }
                // Enhanced logging
                let threshold_pct = (error_count as f64 / Self::HARD_RESET_ERROR_THRESHOLD as f64) * 100.0;
                logger::log_warn(&format!(
                    "Timeout error. Reset counter: {}/{} ({:.0}%)", 
                    error_count,
                    Self::HARD_RESET_ERROR_THRESHOLD,
                    threshold_pct
                )).await;
                // Check if threshold reached
                if error_count >= Self::HARD_RESET_ERROR_THRESHOLD {
                    logger::log_error(&format!(
                        "🔴 HARD RESET THRESHOLD REACHED ({}/{})", 
                        error_count,
                        Self::HARD_RESET_ERROR_THRESHOLD
                    )).await;
                    return Err(anyhow!(
                        "HARD_RESET_REQUIRED: Navigation timeout. Threshold reached ({}/{})",
                        error_count,
                        Self::HARD_RESET_ERROR_THRESHOLD
                    ));
                }
                Err(anyhow!(
                    "Navigation timeout. Hard reset at {}/{}", 
                    error_count, 
                    Self::HARD_RESET_ERROR_THRESHOLD
                ))
            }
        };
        {
            let mut inst = instance.lock().await;
            inst.task_count = inst.task_count.saturating_sub(1);
        }
        result
    }
-    async fn get_rotated_index(&self) -> Result<usize> {
+    /// Simple round-robin instance selection (no rotation)
-        let total = self.instances.len();
+    async fn select_instance_round_robin(&self) -> Arc<Mutex<ChromeInstance>> {
-        let half_size = total / 2;
+        let mut next = self.next_instance.lock().await;
        let index = *next;
        *next = (*next + 1) % self.instances.len();
        drop(next);
        Arc::clone(&self.instances[index])
    }
    /// Round-robin with half-pool rotation
    async fn select_instance_with_rotation(&self) -> Result<Arc<Mutex<ChromeInstance>>> {
        let pool_size = self.instances.len();
        let half_size = pool_size / 2;
        if half_size == 0 {
-            return Ok(0);  // Pool zu klein für Rotation
+            // Pool too small for rotation, fall back to simple round-robin
            return Ok(self.select_instance_round_robin().await);
        }
-        let mut next_idx = self.next_instance.lock().await;
+        let mut next = self.next_instance.lock().await;
-        let current_half_start = if *next_idx < half_size { 0 } else { half_size };
+        let current_half_start = (*next / half_size) * half_size;
-        let current_half_end = if *next_idx < half_size { half_size } else { total };
+        let current_half_end = (current_half_start + half_size).min(pool_size);
-        // Suche verfügbare Instanz in aktueller Hälfte
+        // Try to find available instance in current half
-        for offset in 0..(current_half_end - current_half_start) {
+        let mut attempts = 0;
-            let candidate_idx = current_half_start + ((*next_idx + offset) % half_size);
+        let max_attempts = half_size * 2; // Try both halves
-            let instance = &self.instances[candidate_idx];
+        while attempts < max_attempts {
-            let guard = instance.lock().await;
+            let index = current_half_start + (*next % half_size);
            let instance = &self.instances[index];
-            if guard.max_tasks_per_instance == 0 || 
+            // Check if instance can accept more tasks
-               guard.task_count < guard.max_tasks_per_instance {
+            let mut inst = instance.lock().await;
-                *next_idx = (candidate_idx + 1) % total;
+            let can_accept = inst.get_task_count() < inst.max_tasks_per_instance;
-                drop(guard);
+            drop(inst);
-                return Ok(candidate_idx);
+            
            if can_accept {
                *next = (*next + 1) % pool_size;
                drop(next);
                if let Some(ref mon) = self.monitoring {
                    mon.emit(crate::monitoring::MonitoringEvent::InstanceSelected {
                        instance_id: index,
                        half: if index < half_size { 1 } else { 2 },
                    });
                }
                return Ok(Arc::clone(instance));
            }
            // Current half saturated, try other half
            if attempts == half_size - 1 {
                logger::log_info("Current half saturated, rotating to other half").await;
                *next = if current_half_start == 0 { half_size } else { 0 };
            } else {
                *next = (*next + 1) % pool_size;
            }
            attempts += 1;
        }
-        // Aktuelle Hälfte voll → Zur anderen wechseln
+        drop(next);
        crate::util::logger::log_info("Current half saturated, rotating to other half").await;
-        let new_half_start = if current_half_start == 0 { half_size } else { 0 };
+        // All instances saturated
-        let new_half_end = if current_half_start == 0 { total } else { half_size };
+        Err(anyhow!("All instances at task capacity"))
    }
-        // Alte Hälfte zurücksetzen (für nächste Rotation)
+    pub fn get_reset_controller(&self) -> Arc<HardResetController> {
-        for i in current_half_start..current_half_end {
+        Arc::clone(&self.hard_reset_controller)
-            let mut instance = self.instances[i].lock().await;
+    }
            instance.reset_task_count();
        }
-        *next_idx = new_half_start;
+    /// Check if hard reset threshold has been reached
-        drop(next_idx);
+    pub fn should_perform_hard_reset(&self) -> bool {
        self.hard_reset_controller.get_count() >= Self::HARD_RESET_ERROR_THRESHOLD
    }
-        Ok(new_half_start)
+    /// Get current error count and threshold for monitoring
    pub fn get_reset_status(&self) -> (usize, usize) {
        (
            self.hard_reset_controller.get_count(),
            Self::HARD_RESET_ERROR_THRESHOLD
        )
    }
    /// Gracefully shut down all ChromeDriver processes and Docker proxy containers.
    /// ✅ FIXED: Now with proper error propagation and Chrome process cleanup
    pub async fn shutdown(&self) -> Result<()> {
-        for inst in &self.instances {
+        logger::log_info(&format!("Shutting down {} ChromeDriver instances...", self.instances.len())).await;
        let mut shutdown_errors = Vec::new();
        for (i, inst) in self.instances.iter().enumerate() {
            logger::log_info(&format!("  Shutting down instance {}...", i)).await;
            let mut guard = inst.lock().await;
-            guard.shutdown().await?;
+            if let Err(e) = guard.shutdown().await {
                logger::log_error(&format!("  ✗ Instance {} shutdown error: {}", i, e)).await;
                shutdown_errors.push(format!("Instance {}: {}", i, e));
            } else {
                logger::log_info(&format!("  ✓ Instance {} shut down", i)).await;
            }
        }
        if let Some(pp) = &self.proxy_pool {
-            pp.shutdown().await?;
+            logger::log_info("Shutting down proxy pool...").await;
-            crate::util::logger::log_info("All Docker VPN proxy containers stopped").await;
+            if let Err(e) = pp.shutdown().await {
                logger::log_error(&format!("Proxy pool shutdown error: {}", e)).await;
                shutdown_errors.push(format!("Proxy pool: {}", e));
            } else {
                logger::log_info("✓ Proxy pool shut down").await;
            }
        }
        if !shutdown_errors.is_empty() {
            return Err(anyhow!(
                "Pool shutdown completed with {} error(s): {}",
                shutdown_errors.len(),
                shutdown_errors.join("; ")
            ));
        }
        logger::log_info("✓ All ChromeDriver instances shut down successfully").await;
        Ok(())
    }
@@ -356,6 +582,9 @@ impl ChromeDriverPool {
            self.instances.len()
        }
    }
    pub fn get_proxy_pool(&self) -> Option<Arc<DockerVpnProxyPool>> {
        self.proxy_pool.clone()
    }
 }
 /// Represents a single instance of chromedriver process, optionally bound to a VPN.
@@ -369,9 +598,9 @@ pub struct ChromeInstance {
    current_session: Arc<Mutex<Option<Client>>>, // Current active session
    session_request_count: Arc<Mutex<usize>>,
-    max_requests_per_session: usize,  // z.B. 25
+    max_requests_per_session: usize,
-    proxy_pool: Option<Arc<DockerVpnProxyPool>>,  // Referernce to the proxy pool
+    proxy_pool: Option<Arc<DockerVpnProxyPool>>,  // Reference to the proxy pool
    current_proxy_index: Arc<Mutex<usize>>,       // Current proxy index in use
    instance_id: usize,
@@ -408,15 +637,13 @@ impl ChromeInstance {
        })
    }
-    pub async fn get_or_renew_session(&self) -> Result<Client> {
+    pub async fn get_or_renew_session(&mut self) -> Result<Client> {
        let mut session_opt = self.current_session.lock().await;
        let mut request_count = self.session_request_count.lock().await;
-        let old_request_count = *request_count;
+        // Session renewal conditions:
-        
+        // 1. No session exists
-        // Session erneuern wenn:
+        // 2. Request limit reached
        // 1. Keine Session vorhanden
        // 2. Request-Limit erreicht
        let needs_renewal = session_opt.is_none() || *request_count >= self.max_requests_per_session;
        if needs_renewal {
@@ -427,16 +654,22 @@ impl ChromeInstance {
                });
            }
-            // Alte Session schließen
+            // ✅ FIXED: Close old session with proper error handling
            if let Some(old_session) = session_opt.take() {
                crate::util::logger::log_info("Closing old session").await;
-                let _ = old_session.close().await;
+                
-                // Kurze Pause zwischen Sessions
+                // Try to close gracefully first
                if let Err(e) = old_session.close().await {
                    logger::log_warn(&format!("Session close failed (may leave Chrome tabs open): {}", e)).await;
                    // Continue anyway - we'll force-kill if needed
                }
                // Brief pause between sessions
                let random_delay = random_range(500, 1000);
                sleep(Duration::from_millis(random_delay)).await;
            }
-            // Neue Session mit frischem User-Agent erstellen
+            // Create new session with fresh User-Agent
            crate::util::logger::log_info(&format!(
                "Creating new session (requests in last session: {})", 
                *request_count
@@ -476,29 +709,35 @@ impl ChromeInstance {
                mon.emit(crate::monitoring::MonitoringEvent::SessionRenewed {
                    instance_id: self.instance_id,
                    old_request_count: *request_count,
-                    reason: crate::monitoring::RenewalReason::RequestLimit,
+                    reason: reason,
                    new_proxy: new_proxy_info,
                });
            }
            Ok(new_session)
        } else {
-            // Existierende Session verwenden
+            // Use existing session
            *request_count += 1;
            Ok(session_opt.as_ref().unwrap().clone())
        }
    }
    async fn create_fresh_session(&self) -> Result<Client> {
        // Hole aktuellen Proxy-URL ohne self zu mutieren
        let proxy_url = if let Some(ref pool) = self.proxy_pool {
            let mut proxy_idx = self.current_proxy_index.lock().await;
-            *proxy_idx = (*proxy_idx + 1) % pool.num_proxies();
+            let num_proxies = pool.num_proxies();
            let url = pool.get_proxy_url(*proxy_idx);
-            crate::util::logger::log_info(&format!(
+            // Round-robin through all proxies
-                "Using proxy {} for new session", 
+            let selected_proxy = *proxy_idx % num_proxies;
-                *proxy_idx
+            *proxy_idx = (*proxy_idx + 1) % num_proxies;
            let url = pool.get_proxy_url(selected_proxy);
            logger::log_info(&format!(
                "Instance {} creating session with proxy {}/{} (rotation)", 
                self.instance_id,
                selected_proxy,
                num_proxies
            )).await;
            Some(url)
@@ -509,45 +748,39 @@ impl ChromeInstance {
        let user_agent = Self::chrome_user_agent();
        let capabilities = self.chrome_args_with_ua(user_agent, &proxy_url);
-        ClientBuilder::native()
+        let client = ClientBuilder::native()
            .capabilities(capabilities)
            .connect(&self.base_url)
            .await
-            .context("Failed to connect to ChromeDriver")
+            .context("Failed to connect to ChromeDriver")?;
        // ✅ NEW: Extract and store Chrome PID for cleanup
        // Chrome process info can be extracted from session info if needed
        // For now, we rely on killing the process tree
        Ok(client)
    }
-    fn chrome_args_with_ua(&self, user_agent: &str, proxy_url: &Option<String>) -> Map<String, Value> {
+    pub async fn invalidate_current_session(&self) {
-        let mut args = vec![
+        let mut session_opt = self.current_session.lock().await;
            "--headless=new".to_string(),
            "--disable-gpu".to_string(),
            "--no-sandbox".to_string(),
            "--disable-dev-shm-usage".to_string(),
            "--disable-infobars".to_string(),
            "--disable-extensions".to_string(),
            "--disable-popup-blocking".to_string(),
            "--disable-notifications".to_string(),
            "--disable-autofill".to_string(),
            "--disable-sync".to_string(),
            "--disable-default-apps".to_string(),
            "--disable-translate".to_string(),
            "--disable-blink-features=AutomationControlled".to_string(),
            format!("--user-agent={}", user_agent),
        ];
-        if let Some(proxy) = proxy_url {
+        if let Some(old_session) = session_opt.take() {
-            args.push(format!("--proxy-server={}", proxy));
+            crate::util::logger::log_info(&format!(
                "Invalidating broken session for instance {}",
                self.instance_id
            )).await;
            // ✅ FIXED: Proper error handling instead of silent failure
            if let Err(e) = old_session.close().await {
                logger::log_warn(&format!(
                    "Failed to close broken session (Chrome tabs may remain): {}",
                    e
                )).await;
            }
        }
-        let caps = serde_json::json!({
+        let mut request_count = self.session_request_count.lock().await;
-            "goog:chromeOptions": {
+        *request_count = 0;
                "args": args,
                "excludeSwitches": ["enable-logging", "enable-automation"],
                "prefs": {
                    "profile.default_content_setting_values.notifications": 2
                }
            }
        });
        caps.as_object().cloned().unwrap()
    }
    pub fn reset_task_count(&mut self) {
@@ -567,17 +800,103 @@ impl ChromeInstance {
        self.task_count
    }
    /// ✅ FIXED: Proper Chrome + ChromeDriver shutdown with process tree killing
    pub async fn shutdown(&mut self) -> Result<()> {
        logger::log_info(&format!("Shutting down ChromeInstance {}...", self.instance_id)).await;
        // Step 1: Close any active session to signal Chrome to close
        {
            let mut session_opt = self.current_session.lock().await;
            if let Some(session) = session_opt.take() {
                logger::log_info("  Closing active session...").await;
                if let Err(e) = session.close().await {
                    logger::log_warn(&format!("  Session close failed: {}", e)).await;
                }
            }
        }
        // Step 2: Abort stderr logging task
        if let Some(handle) = self.stderr_log.take() {
            handle.abort();
            let _ = handle.await;
        }
-        let _ = self.process.start_kill();
+        // Step 3: Get ChromeDriver PID before killing
-        let _ = self.process.wait().await;
+        let chromedriver_pid = self.process.id();
        logger::log_info(&format!("  ChromeDriver PID: {:?}", chromedriver_pid)).await;
        // Step 4: Kill ChromeDriver and wait
        if let Err(e) = self.process.start_kill() {
            logger::log_warn(&format!("  Failed to kill ChromeDriver: {}", e)).await;
        }
        // Wait for ChromeDriver to exit (with timeout)
        match timeout(Duration::from_secs(5), self.process.wait()).await {
            Ok(Ok(status)) => {
                logger::log_info(&format!("  ChromeDriver exited with status: {:?}", status)).await;
            }
            Ok(Err(e)) => {
                logger::log_warn(&format!("  Error waiting for ChromeDriver: {}", e)).await;
            }
            Err(_) => {
                logger::log_warn("  ChromeDriver didn't exit within 5s").await;
            }
        }
        // Step 5: ✅ CRITICAL FIX: Force-kill Chrome process tree
        // On Windows, Chrome doesn't die when ChromeDriver dies
        if let Some(pid) = chromedriver_pid {
            logger::log_info(&format!("  Force-killing Chrome process tree for PID {}...", pid)).await;
            #[cfg(target_os = "windows")]
            {
                // Kill entire process tree on Windows
                let _ = Command::new("taskkill")
                    .args(["/F", "/T", "/PID", &pid.to_string()])
                    .output()
                    .await;
                // Also kill any remaining chrome.exe processes
                let _ = Command::new("taskkill")
                    .args(["/F", "/IM", "chrome.exe"])
                    .output()
                    .await;
            }
            #[cfg(not(target_os = "windows"))]
            {
                // Kill process group on Unix
                let _ = Command::new("pkill")
                    .args(["-P", &pid.to_string()])
                    .output()
                    .await;
            }
            logger::log_info("  ✓ Chrome process tree killed").await;
        }
        // Step 6: Wait a moment for processes to fully terminate
        sleep(Duration::from_millis(500)).await;
        logger::log_info(&format!("✓ ChromeInstance {} shut down", self.instance_id)).await;
        Ok(())
    }
    pub fn is_available(&self) -> bool {
        if self.max_tasks_per_instance == 0 {
            return true; // No limit
        }
        self.task_count < self.max_tasks_per_instance
    }
    pub fn tasks_remaining(&self) -> usize {
        if self.max_tasks_per_instance == 0 {
            return usize::MAX;
        }
        self.max_tasks_per_instance.saturating_sub(self.task_count)
    }
    /// Spawns the actual `chromedriver` binary and waits for it to become ready.
    async fn spawn_chromedriver() -> Result<(String, Child, JoinHandle<()>)> {
        let mut process = Command::new("chromedriver-win64/chromedriver.exe")
@@ -624,6 +943,40 @@ impl ChromeInstance {
        Err(anyhow!("ChromeDriver failed to start within 30s"))
    }
    fn chrome_args_with_ua(&self, user_agent: &str, proxy_url: &Option<String>) -> Map<String, Value> {
        let mut args = vec![
            "--headless=new".to_string(),
            "--disable-gpu".to_string(),
            "--no-sandbox".to_string(),
            "--disable-dev-shm-usage".to_string(),
            "--disable-infobars".to_string(),
            "--disable-extensions".to_string(),
            "--disable-popup-blocking".to_string(),
            "--disable-notifications".to_string(),
            "--disable-autofill".to_string(),
            "--disable-sync".to_string(),
            "--disable-default-apps".to_string(),
            "--disable-translate".to_string(),
            "--disable-blink-features=AutomationControlled".to_string(),
            format!("--user-agent={}", user_agent),
        ];
        if let Some(proxy) = proxy_url {
            args.push(format!("--proxy-server={}", proxy));
        }
        let caps = serde_json::json!({
            "goog:chromeOptions": {
                "args": args,
                "excludeSwitches": ["enable-logging", "enable-automation"],
                "prefs": {
                    "profile.default_content_setting_values.notifications": 2
                }
            }
        });
        caps.as_object().cloned().unwrap()
    }
    pub fn chrome_user_agent() -> &'static str {
        static UAS: &[&str] = &[
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.91 Safari/537.36",
@@ -636,6 +989,24 @@ impl ChromeInstance {
    }
 }
 impl Drop for ChromeInstance {
    fn drop(&mut self) {
        // Signal both ChromeDriver and Chrome to terminate
        let _ = self.process.start_kill();
        // Also try to kill Chrome if we know the PID
        if let Some(pid) = self.process.id() {
            #[cfg(target_os = "windows")]
            {
                // Fire and forget - this is best-effort cleanup
                let _ = std::process::Command::new("taskkill")
                    .args(["/F", "/T", "/PID", &pid.to_string()])
                    .output();
            }
        }
    }
 }
 fn parse_chromedriver_address(line: &str) -> Option<String> {
    if line.contains("Starting ChromeDriver") {
        if let Some(port_str) = line.split("on port ").nth(1) {
@@ -656,14 +1027,6 @@ fn parse_chromedriver_address(line: &str) -> Option<String> {
    None
 }
 impl Drop for ChromeInstance {
    fn drop(&mut self) {
        // Signal child to terminate. Do NOT block here; shutdown should be
        // performed with the async `shutdown()` method when possible.
        let _ = self.process.start_kill();
    }
 }
 /// Simplified task execution - uses the pool pattern.
 pub struct ScrapeTask<T> {
    url: String,
--- a/src/scraper/yahoo.rs
+++ b/src/scraper/yahoo.rs
--- a/src/util/directories.rs
+++ b/src/util/directories.rs
@@ -2,23 +2,26 @@ use std::path::{Path, PathBuf};
 use std::fs;
 /// Central configuration for all data paths
 #[derive(Clone)]
 pub struct DataPaths {
  base_dir: PathBuf,
  data_dir: PathBuf,
  cache_dir: PathBuf,
  logs_dir: PathBuf,
  integrity_dir: PathBuf,
  // Cache data subdirectories
  cache_gleif_dir: PathBuf,
  cache_openfigi_dir: PathBuf,
  cache_gleif_openfigi_map_dir: PathBuf,
  cache_openvpn_dir: PathBuf,
  // Figi Securities data subdirectories
  figi_securities_dir: PathBuf,
  // Economic data subdirectories
  economic_events_dir: PathBuf,
  economic_changes_dir: PathBuf,
  economic_currency_dir: PathBuf,
  // Corporate data subdirectories
-  corporate_events_dir: PathBuf,
+  corporate_dir: PathBuf,
  corporate_changes_dir: PathBuf,
  corporate_prices_dir: PathBuf,
 }
 impl DataPaths {
@@ -29,6 +32,7 @@ impl DataPaths {
    let data_dir = base_dir.join("data");
    let cache_dir = base_dir.join("cache");
    let logs_dir = base_dir.join("logs");
    let integrity_dir = base_dir.join("integrity");
    // Cache subdirectories
    let cache_gleif_dir = cache_dir.join("gleif");
@@ -36,44 +40,47 @@ impl DataPaths {
    let cache_gleif_openfigi_map_dir = cache_dir.join("glei_openfigi");
    let cache_openvpn_dir = cache_dir.join("openvpn");
    // Figi Securities subdirectories
    let figi_securities_dir = data_dir.join("figi_securities");
    // Economic subdirectories
    let economic_events_dir = data_dir.join("economic").join("events");
    let economic_changes_dir = economic_events_dir.join("changes");
    let economic_currency_dir = data_dir.join("economic").join("currency");
    // Corporate subdirectories
    let corporate_dir = data_dir.join("corporate");
    let corporate_events_dir = corporate_dir.join("events");
    let corporate_changes_dir = corporate_events_dir.join("changes");
    let corporate_prices_dir = corporate_dir.join("prices");
    // Create all directories if they don't exist
    fs::create_dir_all(&data_dir)?;
    fs::create_dir_all(&cache_dir)?;
    fs::create_dir_all(&logs_dir)?;
    fs::create_dir_all(&integrity_dir)?;
    fs::create_dir_all(&cache_gleif_dir)?;
    fs::create_dir_all(&cache_openfigi_dir)?;
    fs::create_dir_all(&cache_gleif_openfigi_map_dir)?;
    fs::create_dir_all(&cache_openvpn_dir)?;
    fs::create_dir_all(&figi_securities_dir)?;
    fs::create_dir_all(&economic_events_dir)?;
    fs::create_dir_all(&economic_changes_dir)?;
-    fs::create_dir_all(&corporate_events_dir)?;
+    fs::create_dir_all(&economic_currency_dir)?;
-    fs::create_dir_all(&corporate_changes_dir)?;
+    fs::create_dir_all(&corporate_dir)?;
    fs::create_dir_all(&corporate_prices_dir)?;
    Ok(Self {
      base_dir,
      data_dir,
      cache_dir,
      logs_dir,
      integrity_dir,
      cache_gleif_dir,
      cache_openfigi_dir,
      cache_gleif_openfigi_map_dir,
      cache_openvpn_dir,
      figi_securities_dir,
      economic_events_dir,
      economic_changes_dir,
-      corporate_events_dir,
+      economic_currency_dir,
-      corporate_changes_dir,
+      corporate_dir,
      corporate_prices_dir,
    })
  }
@@ -89,6 +96,10 @@ impl DataPaths {
    &self.cache_dir
  }
  pub fn integrity_dir(&self) -> &Path {
    &self.integrity_dir
  }
  pub fn logs_dir(&self) -> &Path {
    &self.logs_dir
  }
@@ -109,6 +120,10 @@ impl DataPaths {
    &self.cache_openvpn_dir
  }
  pub fn figi_securities_dir(&self) -> &Path {
    &self.figi_securities_dir
  }
  /// Get the economic events directory
  pub fn economic_events_dir(&self) -> &Path {
    &self.economic_events_dir
@@ -119,19 +134,13 @@ impl DataPaths {
    &self.economic_changes_dir
  }
  pub fn economic_currency_dir(&self) -> &Path {
    &self.economic_currency_dir
  }
  /// Get the corporate events directory
-  pub fn corporate_events_dir(&self) -> &Path {
+  pub fn corporate_dir(&self) -> &Path {
-    &self.corporate_events_dir
+    &self.corporate_dir
  }
  /// Get the corporate changes directory
  pub fn corporate_changes_dir(&self) -> &Path {
    &self.corporate_changes_dir
  }
  /// Get the corporate prices directory
  pub fn corporate_prices_dir(&self) -> &Path {
    &self.corporate_prices_dir
  }
  /// Get a specific file path within data directory
@@ -162,8 +171,5 @@ mod tests {
    assert!(paths.logs_dir().exists());
    assert!(paths.economic_events_dir().exists());
    assert!(paths.economic_changes_dir().exists());
    assert!(paths.corporate_events_dir().exists());
    assert!(paths.corporate_changes_dir().exists());
    assert!(paths.corporate_prices_dir().exists());
  }
 }
--- a/src/util/integrity.rs
+++ b/src/util/integrity.rs
@@ -0,0 +1,911 @@
 // src/util/integrity.rs
 //! Content integrity and state lifecycle management module
 //! 
 //! Features:
 //! - File and directory hashing (SHA-256)
 //! - Hash validation against content references
 //! - State invalidation based on time or validation failures
 //! - 3-stage data lifecycle: cache → data → storage
 //! - Inline vs. external hash storage based on size
 //! - Centralized dependency configuration (Single Source of Truth)
 //! - Support for checkpoint groups and hierarchies
 //! - Automatic transitive dependency resolution
 //! - Cycle detection in dependency graph
 use anyhow::{Context, Result, bail};
 use chrono::{DateTime, Duration, Utc};
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
 use std::collections::{HashMap, HashSet};
 use std::fs;
 use std::io::{BufReader, Read};
 use std::path::{Path, PathBuf};
 use tokio::fs as async_fs;
 use tokio::io::AsyncWriteExt;
 // ============================================================================
 // CONSTANTS
 // ============================================================================
 const INLINE_HASH_THRESHOLD: usize = 1024;
 const HASH_STORAGE_DIR: &str = ".integrity_hashes";
 const HASH_FILE_EXT: &str = ".hash";
 const DEFAULT_DEPENDENCY_CONFIG: &str = "checkpoint_dependencies.toml";
 // ============================================================================
 // DEPENDENCY CONFIGURATION
 // ============================================================================
 #[derive(Debug, Clone, Serialize, Deserialize, Default)]
 pub struct DependencyConfig {
    #[serde(default)]
    pub checkpoints: HashMap<String, CheckpointConfig>,
    #[serde(default)]
    pub groups: HashMap<String, GroupConfig>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CheckpointConfig {
    #[serde(default)]
    pub description: String,
    #[serde(default)]
    pub depends_on: Vec<String>,
    #[serde(default)]
    pub group: Option<String>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct GroupConfig {
    #[serde(default)]
    pub description: String,
    pub members: Vec<String>,
    #[serde(default)]
    pub depends_on: Vec<String>,
 }
 impl DependencyConfig {
    /// Load from file or return empty config
    pub async fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
        let path = path.as_ref();
        if !path.exists() {
            return Ok(Self::default());
        }
        let content = async_fs::read_to_string(path).await
            .with_context(|| format!("Failed to read: {}", path.display()))?;
        let config: Self = toml::from_str(&content)
            .context("Failed to parse dependency config")?;
        config.validate()?;
        Ok(config)
    }
    /// Validate configuration (checks for cycles and invalid references)
    pub fn validate(&self) -> Result<()> {
        // Check for cycles
        for checkpoint in self.checkpoints.keys() {
            self.detect_cycle(checkpoint)?;
        }
        // Validate group memberships
        for (group_name, group) in &self.groups {
            for member in &group.members {
                if !self.checkpoints.contains_key(member) {
                    bail!("Group '{}' references unknown checkpoint: {}", group_name, member);
                }
            }
        }
        // Validate checkpoint group declarations
        for (checkpoint_name, checkpoint) in &self.checkpoints {
            if let Some(group_name) = &checkpoint.group {
                let group = self.groups.get(group_name)
                    .ok_or_else(|| anyhow::anyhow!("Checkpoint '{}' references unknown group: {}", checkpoint_name, group_name))?;
                if !group.members.contains(checkpoint_name) {
                    bail!("Checkpoint '{}' claims group '{}' but group doesn't list it", 
                          checkpoint_name, group_name);
                }
            }
        }
        Ok(())
    }
    /// Detect cycles using DFS
    fn detect_cycle(&self, start: &str) -> Result<()> {
        let mut visited = HashSet::new();
        let mut stack = HashSet::new();
        self.dfs_cycle_check(start, &mut visited, &mut stack)
    }
    fn dfs_cycle_check(&self, node: &str, visited: &mut HashSet<String>, stack: &mut HashSet<String>) -> Result<()> {
        if stack.contains(node) {
            bail!("Cycle detected at checkpoint: {}", node);
        }
        if visited.contains(node) {
            return Ok(());
        }
        visited.insert(node.to_string());
        stack.insert(node.to_string());
        if let Some(config) = self.checkpoints.get(node) {
            for dep in &config.depends_on {
                self.dfs_cycle_check(dep, visited, stack)?;
            }
        }
        stack.remove(node);
        Ok(())
    }
    /// Get all dependencies (including transitive and group dependencies)
    pub fn get_all_dependencies(&self, checkpoint: &str) -> Result<Vec<String>> {
        let mut deps = Vec::new();
        let mut visited = HashSet::new();
        self.collect_deps(checkpoint, &mut deps, &mut visited)?;
        // Remove duplicates while preserving order
        let mut seen = HashSet::new();
        deps.retain(|d| seen.insert(d.clone()));
        Ok(deps)
    }
    fn collect_deps(&self, node: &str, deps: &mut Vec<String>, visited: &mut HashSet<String>) -> Result<()> {
        if visited.contains(node) {
            return Ok(());
        }
        visited.insert(node.to_string());
        let config = self.checkpoints.get(node)
            .ok_or_else(|| anyhow::anyhow!("Unknown checkpoint: {}", node))?;
        // Add group dependencies first
        if let Some(group_name) = &config.group {
            if let Some(group) = self.groups.get(group_name) {
                for dep in &group.depends_on {
                    if !visited.contains(dep) {
                        deps.push(dep.clone());
                        self.collect_deps(dep, deps, visited)?;
                    }
                }
            }
        }
        // Add direct dependencies
        for dep in &config.depends_on {
            if !visited.contains(dep) {
                deps.push(dep.clone());
                self.collect_deps(dep, deps, visited)?;
            }
        }
        Ok(())
    }
    /// Generate DOT format for visualization
    pub fn to_dot(&self) -> String {
        let mut dot = String::from("digraph Dependencies {\n  rankdir=LR;\n  node [shape=box];\n\n");
        // Nodes
        for (name, config) in &self.checkpoints {
            let label = if config.description.is_empty() {
                name.clone()
            } else {
                format!("{}\\n{}", name, config.description)
            };
            dot.push_str(&format!("  \"{}\" [label=\"{}\"];\n", name, label));
        }
        // Edges
        dot.push_str("\n");
        for (name, config) in &self.checkpoints {
            // Group dependencies
            if let Some(group_name) = &config.group {
                if let Some(group) = self.groups.get(group_name) {
                    for dep in &group.depends_on {
                        dot.push_str(&format!("  \"{}\" -> \"{}\" [label=\"via {}\"];\n", name, dep, group_name));
                    }
                }
            }
            // Direct dependencies
            for dep in &config.depends_on {
                dot.push_str(&format!("  \"{}\" -> \"{}\";\n", name, dep));
            }
        }
        dot.push_str("}\n");
        dot
    }
 }
 // ============================================================================
 // DATA STRUCTURES
 // ============================================================================
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(tag = "type", rename_all = "lowercase")]
 pub enum ContentReference {
    File { path: PathBuf },
    Directory { 
        path: PathBuf,
        include_patterns: Option<Vec<String>>,
        exclude_patterns: Option<Vec<String>>,
    },
    Composite { references: Vec<ContentReference> },
 }
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(tag = "storage", rename_all = "lowercase")]
 pub enum HashStorage {
    Inline { hash: String },
    External { hash_file: PathBuf },
 }
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
 #[serde(rename_all = "lowercase")]
 pub enum DataStage {
    Cache,
    Data,
    Storage,
 }
 impl DataStage {
    pub fn default_ttl(&self) -> Duration {
        match self {
            Self::Cache => Duration::hours(24),
            Self::Data => Duration::days(7),
            Self::Storage => Duration::days(365),
        }
    }
    pub fn revalidation_interval(&self) -> Duration {
        match self {
            Self::Cache => Duration::hours(6),
            Self::Data => Duration::days(1),
            Self::Storage => Duration::days(30),
        }
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct StateEntry {
    pub step_name: String,
    pub completed: bool,
    pub completed_at: Option<DateTime<Utc>>,
    pub content_reference: Option<ContentReference>,
    pub content_hash: Option<HashStorage>,
    pub data_stage: Option<DataStage>,
    pub ttl_override: Option<Duration>,
    pub last_validated_at: Option<DateTime<Utc>>,
    pub validation_status: ValidationStatus,
    #[serde(default)]
    pub dependencies: Vec<String>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ValidationStatus {
    Unknown,
    Valid,
    Invalid { reason: String },
    Expired,
    DependencyFailed { failed_dependency: String },
 }
 // ============================================================================
 // HASH COMPUTATION
 // ============================================================================
 /// Hash a single file using SHA-256
 pub fn hash_file<P: AsRef<Path>>(path: P) -> Result<String> {
    let path = path.as_ref();
    let file = fs::File::open(path)
        .with_context(|| format!("Failed to open: {}", path.display()))?;
    let mut reader = BufReader::new(file);
    let mut hasher = Sha256::new();
    let mut buffer = [0u8; 8192];
    loop {
        let bytes_read = reader.read(&mut buffer)?;
        if bytes_read == 0 { break; }
        hasher.update(&buffer[..bytes_read]);
    }
    Ok(format!("{:x}", hasher.finalize()))
 }
 /// Hash a directory recursively
 pub fn hash_directory<P: AsRef<Path>>(
    path: P,
    include_patterns: Option<&[String]>,
    exclude_patterns: Option<&[String]>,
 ) -> Result<String> {
    let path = path.as_ref();
    if !path.is_dir() {
        bail!("Not a directory: {}", path.display());
    }
    let mut files = Vec::new();
    collect_files_recursive(path, &mut files, include_patterns, exclude_patterns)?;
    files.sort();
    if files.is_empty() {
        return Ok(String::from("d41d8cd98f00b204e9800998ecf8427e")); // Empty hash
    }
    let mut hasher = Sha256::new();
    for file_path in files {
        let rel_path = file_path.strip_prefix(path)
            .unwrap_or(&file_path)
            .to_string_lossy();
        hasher.update(rel_path.as_bytes());
        hasher.update(hash_file(&file_path)?.as_bytes());
    }
    Ok(format!("{:x}", hasher.finalize()))
 }
 fn collect_files_recursive(
    dir: &Path,
    files: &mut Vec<PathBuf>,
    include: Option<&[String]>,
    exclude: Option<&[String]>,
 ) -> Result<()> {
    for entry in fs::read_dir(dir)? {
        let path = entry?.path();
        // Skip hidden files
        if path.file_name()
            .and_then(|n| n.to_str())
            .map_or(false, |n| n.starts_with('.')) {
            continue;
        }
        if path.is_dir() {
            collect_files_recursive(&path, files, include, exclude)?;
        } else if path.is_file() && should_include(&path, include, exclude) {
            files.push(path);
        }
    }
    Ok(())
 }
 fn should_include(path: &Path, include: Option<&[String]>, exclude: Option<&[String]>) -> bool {
    let path_str = path.to_string_lossy();
    // Check exclusions first
    if let Some(patterns) = exclude {
        if patterns.iter().any(|p| glob_match(&path_str, p)) {
            return false;
        }
    }
    // Check inclusions
    match include {
        Some(patterns) => patterns.iter().any(|p| glob_match(&path_str, p)),
        None => true,
    }
 }
 fn glob_match(path: &str, pattern: &str) -> bool {
    if pattern.contains('*') {
        let parts: Vec<&str> = pattern.split('*').collect();
        if parts.len() == 2 {
            path.contains(parts[0]) && path.ends_with(parts[1])
        } else {
            false
        }
    } else {
        path.ends_with(pattern)
    }
 }
 /// Hash content based on reference type
 pub fn hash_content_reference(reference: &ContentReference) -> Result<String> {
    match reference {
        ContentReference::File { path } => hash_file(path),
        ContentReference::Directory { path, include_patterns, exclude_patterns } => {
            hash_directory(path, include_patterns.as_deref(), exclude_patterns.as_deref())
        }
        ContentReference::Composite { references } => {
            let mut hasher = Sha256::new();
            for ref_item in references {
                hasher.update(hash_content_reference(ref_item)?.as_bytes());
            }
            Ok(format!("{:x}", hasher.finalize()))
        }
    }
 }
 // ============================================================================
 // HASH STORAGE
 // ============================================================================
 fn determine_storage(hash: &str, base_dir: &Path) -> HashStorage {
    if hash.len() > INLINE_HASH_THRESHOLD {
        let hash_dir = base_dir.join(HASH_STORAGE_DIR);
        let hash_file = hash_dir.join(format!("{}{}", &hash[..16], HASH_FILE_EXT));
        HashStorage::External { hash_file }
    } else {
        HashStorage::Inline { hash: hash.to_string() }
    }
 }
 async fn store_hash(hash: &str, storage: &HashStorage) -> Result<()> {
    if let HashStorage::External { hash_file } = storage {
        if let Some(parent) = hash_file.parent() {
            async_fs::create_dir_all(parent).await?;
        }
        async_fs::write(hash_file, hash.as_bytes()).await?;
    }
    Ok(())
 }
 async fn load_hash(storage: &HashStorage) -> Result<String> {
    match storage {
        HashStorage::Inline { hash } => Ok(hash.clone()),
        HashStorage::External { hash_file } => {
            Ok(async_fs::read_to_string(hash_file).await?.trim().to_string())
        }
    }
 }
 // ============================================================================
 // VALIDATION
 // ============================================================================
 /// Validate a single state entry
 async fn validate_entry(entry: &StateEntry) -> Result<ValidationStatus> {
    // Check if completed
    if !entry.completed {
        return Ok(ValidationStatus::Unknown);
    }
    // Get content reference and hash
    let (content_ref, hash_storage) = match (&entry.content_reference, &entry.content_hash) {
        (Some(r), Some(h)) => (r, h),
        _ => return Ok(ValidationStatus::Unknown),
    };
    // Load stored hash
    let stored_hash = load_hash(hash_storage).await?;
    // Compute current hash
    let current_hash = match hash_content_reference(content_ref) {
        Ok(h) => h,
        Err(e) => return Ok(ValidationStatus::Invalid { 
            reason: format!("Failed to compute hash: {}", e) 
        }),
    };
    // Check hash match
    if stored_hash != current_hash {
        return Ok(ValidationStatus::Invalid { reason: "Hash mismatch".to_string() });
    }
    // Check TTL
    if let Some(stage) = entry.data_stage {
        let ttl = entry.ttl_override.unwrap_or_else(|| stage.default_ttl());
        if let Some(completed_at) = entry.completed_at {
            if Utc::now() - completed_at > ttl {
                return Ok(ValidationStatus::Expired);
            }
        }
    }
    Ok(ValidationStatus::Valid)
 }
 /// Validate all entries with cascade invalidation
 async fn validate_all_entries(entries: &mut HashMap<String, StateEntry>) -> Result<ValidationReport> {
    let mut report = ValidationReport::default();
    // Validate each entry
    for (name, entry) in entries.iter_mut() {
        let status = validate_entry(entry).await?;
        entry.validation_status = status.clone();
        entry.last_validated_at = Some(Utc::now());
        match status {
            ValidationStatus::Valid => report.valid_count += 1,
            ValidationStatus::Invalid { .. } => {
                report.invalid_count += 1;
                report.invalid_entries.push(name.clone());
            }
            ValidationStatus::Expired => {
                report.expired_count += 1;
                report.expired_entries.push(name.clone());
            }
            ValidationStatus::Unknown => report.unknown_count += 1,
            ValidationStatus::DependencyFailed { .. } => {}
        }
    }
    // Cascade invalidation
    let mut invalidated: HashSet<String> = report.invalid_entries.iter().cloned().collect();
    loop {
        let mut newly_invalidated = Vec::new();
        for (name, entry) in entries.iter() {
            if invalidated.contains(name) {
                continue;
            }
            // Check if any dependency is invalidated
            if let Some(failed_dep) = entry.dependencies.iter().find(|d| invalidated.contains(*d)) {
                newly_invalidated.push((name.clone(), failed_dep.clone()));
            }
        }
        if newly_invalidated.is_empty() {
            break;
        }
        for (name, failed_dep) in newly_invalidated {
            invalidated.insert(name.clone());
            report.cascaded_invalidations.push(name.clone());
            if let Some(entry) = entries.get_mut(&name) {
                entry.validation_status = ValidationStatus::DependencyFailed { failed_dependency: failed_dep };
            }
        }
    }
    Ok(report)
 }
 #[derive(Debug, Default)]
 pub struct ValidationReport {
    pub valid_count: usize,
    pub invalid_count: usize,
    pub expired_count: usize,
    pub unknown_count: usize,
    pub invalid_entries: Vec<String>,
    pub expired_entries: Vec<String>,
    pub cascaded_invalidations: Vec<String>,
 }
 impl ValidationReport {
    pub fn print_summary(&self) {
        println!("=== Validation Report ===");
        println!("Valid:   {}", self.valid_count);
        println!("Invalid: {}", self.invalid_count);
        println!("Expired: {}", self.expired_count);
        println!("Unknown: {}", self.unknown_count);
        if !self.invalid_entries.is_empty() {
            println!("\nInvalid entries:");
            for entry in &self.invalid_entries {
                println!("  - {}", entry);
            }
        }
        if !self.expired_entries.is_empty() {
            println!("\nExpired entries:");
            for entry in &self.expired_entries {
                println!("  - {}", entry);
            }
        }
        if !self.cascaded_invalidations.is_empty() {
            println!("\nCascaded invalidations:");
            for entry in &self.cascaded_invalidations {
                println!("  - {}", entry);
            }
        }
    }
 }
 // ============================================================================
 // STATE MANAGEMENT
 // ============================================================================
 /// State manager with centralized dependency configuration
 /// 
 /// # Orchestration: Shutdown Flag + State Management
 /// 
 /// ## Happy Path (Normal Completion)
 /// 1. Work completes successfully
 /// 2. Call `update_entry()` with `completed: true`
 /// 3. StateEntry saved with timestamp and valid hash
 /// 4. On next run: skips already-completed step
 /// 
 /// ## Shutdown Path (Interrupted Work)  
 /// 1. Shutdown flag is set via Ctrl+C handler
 /// 2. Long-running code checks: `if shutdown_flag.load(Ordering::SeqCst) { break }`
 /// 3. Before returning, call `mark_invalid()` 
 /// 4. StateEntry saved with `completed: false` and ValidationStatus::Invalid
 /// 5. On next run: retries invalid step
 /// 
 /// ## Usage Pattern
 /// 
 /// ```rust
 /// let manager = StateManager::new(&paths.integrity_dir()).await?;
 /// let content_ref = directory_reference(&output_dir, None, None);
 /// 
 /// loop {
 ///     if shutdown_flag.load(Ordering::SeqCst) {
 ///         manager.mark_invalid(
 ///             step_name.to_string(),
 ///             Some(content_ref.clone()),
 ///             Some(DataStage::Data),
 ///             "invalid due to shutdown".to_string(),
 ///         ).await?;
 ///         return Ok(());
 ///     }
 ///     // Do work...
 /// }
 /// 
 /// // Completed successfully
 /// manager.update_entry(step_name.to_string(), content_ref, DataStage::Data, None).await?;
 /// ```
 pub struct StateManager {
    base_dir: PathBuf,
    dependency_config: DependencyConfig,
 }
 impl StateManager {
    /// Create new state manager and load dependency configuration
    pub async fn new<P: AsRef<Path>>(base_dir: P) -> Result<Self> {
        let base_dir = base_dir.as_ref().to_path_buf();
        let config_path = base_dir.join(DEFAULT_DEPENDENCY_CONFIG);
        let dependency_config = DependencyConfig::from_file(config_path).await?;
        Ok(Self { base_dir, dependency_config })
    }
    /// Create with explicit dependency configuration
    pub fn with_config<P: AsRef<Path>>(base_dir: P, dependency_config: DependencyConfig) -> Result<Self> {
        dependency_config.validate()?;
        Ok(Self {
            base_dir: base_dir.as_ref().to_path_buf(),
            dependency_config,
        })
    }
    /// Get the dependency configuration
    pub fn get_dependency_config(&self) -> &DependencyConfig {
        &self.dependency_config
    }
    /// Load all state entries from state.jsonl
    pub async fn load_entries(&self) -> Result<HashMap<String, StateEntry>> {
        let state_file = self.base_dir.join("state.jsonl");
        if !state_file.exists() {
            return Ok(HashMap::new());
        }
        let content = async_fs::read_to_string(&state_file).await?;
        let mut entries = HashMap::new();
        for line in content.lines() {
            if line.trim().is_empty() {
                continue;
            }
            if let Ok(entry) = serde_json::from_str::<StateEntry>(line) {
                entries.insert(entry.step_name.clone(), entry);
            }
        }
        Ok(entries)
    }
    /// Save all state entries to state.jsonl
    pub async fn save_entries(&self, entries: &HashMap<String, StateEntry>) -> Result<()> {
        if let Some(parent) = self.base_dir.parent() {
            async_fs::create_dir_all(parent).await?;
        }
        let mut file = async_fs::File::create(self.base_dir.join("state.jsonl")).await?;
        for entry in entries.values() {
            file.write_all((serde_json::to_string(&entry)? + "\n").as_bytes()).await?;
        }
        file.sync_all().await?;
        Ok(())
    }
    /// Create an empty entry for a step (can be updated later)
    /// 
    /// Creates a placeholder entry that marks the step as incomplete and unknown,
    /// allowing you to later mark it as valid or invalid via `mark_valid()` or `mark_invalid()`.
    /// 
    /// # Example
    /// ```rust
    /// let manager = StateManager::new(&paths.integrity_dir()).await?;
    /// 
    /// // Start tracking a long step
    /// let mut entry = manager.create_entry("long_operation".to_string()).await?;
    /// 
    /// // Do work...
    /// 
    /// // Mark as valid when done
    /// entry.content_reference = Some(content_ref);
    /// entry.data_stage = Some(DataStage::Data);
    /// manager.mark_valid(entry).await?;
    /// ```
    pub async fn create_entry(&self, step_name: String, content_reference: ContentReference, data_stage: DataStage) -> Result<StateEntry> {
        // Resolve dependencies from configuration
        let dependencies = self.dependency_config
            .get_all_dependencies(&step_name)
            .unwrap_or_default();
        // Create empty entry with Unknown status
        let entry = StateEntry {
            step_name: step_name.clone(),
            completed: false,
            completed_at: None,
            content_reference: Some(content_reference),
            content_hash: None,
            data_stage: Some(data_stage),
            ttl_override: None,
            last_validated_at: Some(Utc::now()),
            validation_status: ValidationStatus::Unknown,
            dependencies,
        };
        // Update and save
        let mut entries = self.load_entries().await?;
        entries.insert(step_name, entry.clone());
        self.save_entries(&entries).await?;
        Ok(entry)
    }
    /// Mark a StateEntry as valid and save to disk
    /// 
    /// Updates the entry with:
    /// - `completed: true`
    /// - `completed_at: now`
    /// - `validation_status: Valid`
    /// - Computes and stores content hash
    /// 
    /// # Requires
    /// - `entry.content_reference` must be `Some()`
    /// - `entry.data_stage` must be `Some()`
    pub async fn mark_valid(&self, mut entry: StateEntry) -> Result<StateEntry> {
        // Get content reference and data stage (required)
        let content_reference = entry.content_reference.as_ref()
            .ok_or_else(|| anyhow::anyhow!("content_reference is required to mark entry valid"))?;
        let data_stage = entry.data_stage
            .ok_or_else(|| anyhow::anyhow!("data_stage is required to mark entry valid"))?;
        // Compute and store hash
        let hash = hash_content_reference(content_reference)?;
        let storage = determine_storage(&hash, &self.base_dir);
        store_hash(&hash, &storage).await?;
        // Update entry
        entry.completed = true;
        entry.completed_at = Some(Utc::now());
        entry.content_hash = Some(storage);
        entry.data_stage = Some(data_stage);
        entry.last_validated_at = Some(Utc::now());
        entry.validation_status = ValidationStatus::Valid;
        // Save
        let mut entries = self.load_entries().await?;
        entries.insert(entry.step_name.clone(), entry.clone());
        self.save_entries(&entries).await?;
        Ok(entry)
    }
    /// Mark a StateEntry as invalid and save to disk
    /// 
    /// Updates the entry with:
    /// - `completed: false`
    /// - `completed_at: None`
    /// - `validation_status: Invalid { reason }`
    pub async fn mark_invalid(&self, mut entry: StateEntry, reason: String) -> Result<StateEntry> {
        // Update entry
        entry.completed = false;
        entry.completed_at = None;
        entry.last_validated_at = Some(Utc::now());
        entry.validation_status = ValidationStatus::Invalid { reason };
        // Save
        let mut entries = self.load_entries().await?;
        entries.insert(entry.step_name.clone(), entry.clone());
        self.save_entries(&entries).await?;
        Ok(entry)
    }
    /// Check if a step is valid and completed
    pub async fn is_step_valid(&self, step_name: &str) -> Result<bool> {
        let entries = self.load_entries().await?;
        if let Some(entry) = entries.get(step_name) {
            let status = validate_entry(entry).await?;
            Ok(matches!(status, ValidationStatus::Valid))
        } else {
            Ok(false)
        }
    }
    /// Run full validation on all entries
    pub async fn validate_all(&self) -> Result<ValidationReport> {
        let mut entries = self.load_entries().await?;
        let report = validate_all_entries(&mut entries).await?;
        self.save_entries(&entries).await?;
        Ok(report)
    }
    /// Print dependency graph information
    pub fn print_dependency_graph(&self) {
        println!("=== Dependency Configuration ===");
        println!("\nCheckpoints: {}", self.dependency_config.checkpoints.len());
        println!("Groups: {}", self.dependency_config.groups.len());
        println!("\n--- Checkpoints ---");
        for (name, config) in &self.dependency_config.checkpoints {
            println!("{}", name);
            if !config.description.is_empty() {
                println!("  Description: {}", config.description);
            }
            if let Some(group) = &config.group {
                println!("  Group: {}", group);
            }
            if !config.depends_on.is_empty() {
                println!("  Depends on: {}", config.depends_on.join(", "));
            }
            // Show resolved dependencies
            if let Ok(all_deps) = self.dependency_config.get_all_dependencies(name) {
                if !all_deps.is_empty() {
                    println!("  Resolved (including transitive): {}", all_deps.join(", "));
                }
            }
            println!();
        }
        println!("\n--- Groups ---");
        for (name, group) in &self.dependency_config.groups {
            println!("{}", name);
            if !group.description.is_empty() {
                println!("  Description: {}", group.description);
            }
            println!("  Members: {}", group.members.join(", "));
            if !group.depends_on.is_empty() {
                println!("  Group dependencies: {}", group.depends_on.join(", "));
            }
            println!();
        }
    }
 }
 // ============================================================================
 // HELPER FUNCTIONS
 // ============================================================================
 /// Create a simple file reference
 pub fn file_reference<P: AsRef<Path>>(path: P) -> ContentReference {
    ContentReference::File { path: path.as_ref().to_path_buf() }
 }
 /// Create a directory reference
 pub fn directory_reference<P: AsRef<Path>>(
    path: P,
    include_patterns: Option<Vec<String>>,
    exclude_patterns: Option<Vec<String>>,
 ) -> ContentReference {
    ContentReference::Directory {
        path: path.as_ref().to_path_buf(),
        include_patterns,
        exclude_patterns,
    }
 }
 /// Create a composite reference
 pub fn composite_reference(references: Vec<ContentReference>) -> ContentReference {
    ContentReference::Composite { references }
 }
--- a/src/util/logger.rs
+++ b/src/util/logger.rs
@@ -5,8 +5,6 @@ use tokio::sync::Mutex;
 use std::fs::{self, OpenOptions};
 use std::io::Write;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
 static LOGGER: Lazy<Mutex<Option<DebugLogger>>> = Lazy::new(|| Mutex::new(None));
@@ -78,83 +76,3 @@ pub async fn log_warn(msg: &str) {
 pub async fn log_error(msg: &str) {
    log_detailed("ERROR", msg).await;
 }
 struct PoolLogger {
    file: std::fs::File,
    log_path: PathBuf,
 }
 impl PoolLogger {
    fn new(log_dir: &std::path::Path) -> std::io::Result<Self> {
        fs::create_dir_all(log_dir)?;
        let filename = format!("webdriver_{}.log", Local::now().format("%Y%m%d_%H%M%S"));
        let log_path = log_dir.join(&filename);
        let file = OpenOptions::new()
            .create(true)
            .append(true)
            .open(&log_path)?;
        Ok(Self { file, log_path })
    }
    async fn log(&mut self, msg: &str) {
        let line = format!("[{}] {}\n", Local::now().format("%H:%M:%S"), msg);
        let _ = self.file.write_all(line.as_bytes());
        let _ = self.file.flush();
        println!("{}", line.trim_end());
    }
 }
 pub struct PoolMetrics {
    pub total_requests: Arc<AtomicUsize>,
    pub successful_requests: Arc<AtomicUsize>,
    pub failed_requests: Arc<AtomicUsize>,
    pub session_renewals: Arc<AtomicUsize>,
    pub rotation_events: Arc<AtomicUsize>,
    pub retries: Arc<AtomicUsize>,
    // IMPROVEMENT: Neue Metriken für besseres Monitoring
    pub navigation_timeouts: Arc<AtomicUsize>,
    pub bot_detection_hits: Arc<AtomicUsize>,
    pub proxy_failures: Arc<AtomicUsize>,
 }
 impl PoolMetrics {
    pub fn new() -> Self {
        Self {
            total_requests: Arc::new(AtomicUsize::new(0)),
            successful_requests: Arc::new(AtomicUsize::new(0)),
            failed_requests: Arc::new(AtomicUsize::new(0)),
            session_renewals: Arc::new(AtomicUsize::new(0)),
            rotation_events: Arc::new(AtomicUsize::new(0)),
            retries: Arc::new(AtomicUsize::new(0)),
            navigation_timeouts: Arc::new(AtomicUsize::new(0)),
            bot_detection_hits: Arc::new(AtomicUsize::new(0)),
            proxy_failures: Arc::new(AtomicUsize::new(0)),
        }
    }
    pub async fn log_stats(&self) {
        let total = self.total_requests.load(Ordering::Relaxed);
        let success = self.successful_requests.load(Ordering::Relaxed);
        // FIX: Prefix unused variable with underscore
        let _failed = self.failed_requests.load(Ordering::Relaxed);
        let renewals = self.session_renewals.load(Ordering::Relaxed);
        let rotations = self.rotation_events.load(Ordering::Relaxed);
        let retries = self.retries.load(Ordering::Relaxed);
        let timeouts = self.navigation_timeouts.load(Ordering::Relaxed);
        let bot_hits = self.bot_detection_hits.load(Ordering::Relaxed);
        let proxy_fails = self.proxy_failures.load(Ordering::Relaxed);
        let success_rate = if total > 0 {
            (success as f64 / total as f64) * 100.0
        } else {
            0.0
        };
        crate::util::logger::log_info(&format!(
            "Pool Metrics: {} total requests, {:.1}% success rate, {} renewals, {} rotations, {} retries, {} timeouts, {} bot detections, {} proxy failures",
            total, success_rate, renewals, rotations, retries, timeouts, bot_hits, proxy_fails
        )).await;
    }
 }
--- a/src/util/macros.rs
+++ b/src/util/macros.rs
@@ -0,0 +1,28 @@
 // src/macros.rs
 #[macro_export]
 macro_rules! check_shutdown {
    ($shutdown_flag:expr) => {
        if $shutdown_flag.load(std::sync::atomic::Ordering::SeqCst) {
            logger::log_warn("Shutdown detected, stopping processes").await;
            return Ok(());
        }
    };
 }
 /// Mark incomplete state on shutdown
 /// Usage: mark_incomplete_on_shutdown!(&manager, "step_name", content_ref, DataStage::Data, &shutdown_flag)?;
 #[macro_export]
 macro_rules! mark_incomplete_on_shutdown {
    ($manager:expr, $step_name:expr, $content_ref:expr, $data_stage:expr, $shutdown_flag:expr) => {
        if $shutdown_flag.load(std::sync::atomic::Ordering::SeqCst) {
            $manager
                .mark_incomplete(
                    $step_name.to_string(),
                    $content_ref,
                    $data_stage,
                    "Incomplete due to shutdown".to_string(),
                )
                .await?;
        }
    };
 }
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -2,3 +2,5 @@
 pub mod logger;
 pub mod directories;
 pub mod opnv;
 pub mod macros;
 pub mod integrity;
Author	SHA1	Message	Date
donpat1to	eff1412c0f	removed claudes md	2026-01-15 00:23:29 +01:00
donpat1to	75ab1969c7	added cross compatiblity between shutdown flag and state entries	2026-01-15 00:22:55 +01:00
donpat1to	f4b20f824d	removed crossplatformcompany from types	2026-01-14 14:49:00 +01:00
donpat1to	93fbefc9d4	removed id creation on scrape	2026-01-14 14:28:16 +01:00
donpat1to	4ea0c78d3d	added ids for companies	2026-01-12 23:03:01 +01:00
donpat1to	1d025a04ce	updated securities directory	2026-01-12 22:23:34 +01:00
donpat1to	98e1bca12f	moved helper functions into helpers.rs	2026-01-12 22:06:13 +01:00
donpat1to	29d8f1d89e	moved structs to types.rs	2026-01-12 18:50:44 +01:00
donpat1to	c0c9bc0ed9	added bond extraction from figi	2026-01-12 15:58:06 +01:00
donpat1to	659757482d	öi	2026-01-12 01:01:19 +01:00
donpat1to	bd74f36f4c	added integrity dir for set data collection; one state.jsonl	2026-01-11 16:57:36 +01:00
donpat1to	e6f8393660	merged enriching functions into one module	2026-01-11 14:24:18 +01:00
donpat1to	aff340ee2f	migrated checkpoint handling in integrity.rs to ssot principle	2026-01-11 13:05:31 +01:00
donpat1to	0487c2ec49	changed file names for openfigi	2026-01-11 12:21:10 +01:00
donpat1to	04f4b0d0c4	added integrity check to openfigi functions	2026-01-11 00:06:25 +01:00
donpat1to	6f05dc8c99	added integrity check to forex and exchange collection functiosn	2026-01-10 19:46:21 +01:00
donpat1to	ac1345798d	added integrity check to cleanse functions	2026-01-10 18:42:39 +01:00
donpat1to	766eb803f1	added integrity check to enrichment functions	2026-01-10 17:40:16 +01:00
donpat1to	151c96e35f	working code :)	2026-01-10 15:11:06 +01:00
donpat1to	ae1876b014	cleaned up main	2026-01-10 00:30:59 +01:00
donpat1to	c86d828940	cleaned up main	2026-01-10 00:30:42 +01:00
donpat1to	c6d301d434	added helper functions to reduce bloat	2026-01-09 21:24:18 +01:00
donpat1to	ba841248f0	cleaned up update.rs eco and corp	2026-01-09 19:52:26 +01:00
donpat1to	8dd75f7bdf	added yahoo exchange extraction	2026-01-09 19:09:42 +01:00
donpat1to	ea128f6187	added options chart enrichment	2026-01-08 11:35:25 +01:00
donpat1to	1720716144	added event enrichment	2026-01-08 00:35:10 +01:00
donpat1to	f9ce5bad99	fixed yahoo api calls for cleansing low profile data	2026-01-06 00:15:57 +01:00
donpat1to	fc25f32cbc	fixed yahoo api calls for cleansing low profile data	2026-01-06 00:15:46 +01:00
donpat1to	3d16475b79	readded yahoo	2026-01-05 17:00:42 +01:00
donpat1to	86944a9c58	cleaned yahoo hits	2025-12-24 00:00:21 +01:00
donpat1to	f9f09d0291	added working hard reset	2025-12-23 15:07:40 +01:00
donpat1to	fb0876309f	added hard reset for navigation timeout after 3 hours	2025-12-22 00:31:28 +01:00
donpat1to	c01b47000f	removed serial data scraping for yahoo tickers	2025-12-19 16:58:22 +01:00