added data streaming instead of laoding

implement vpn pool
capabable spawning multiple openvpn instances
2025-12-12 10:54:01 +01:00 · 2025-12-11 23:18:04 +01:00 · 2025-12-11 00:36:46 +01:00 · 2025-12-09 23:27:51 +01:00 · 2025-12-09 23:27:14 +01:00 · 2025-12-09 23:24:51 +01:00
30 changed files with 3731 additions and 987 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,48 @@
 # WebScraper Configuration File (.env)
 # ====================================
 # This file configures the behavior of the WebScraper application
 # Copy to .env and adjust values as needed
 # ===== ECONOMIC DATA =====
 # Start date for economic event scraping
 ECONOMIC_START_DATE=2007-02-13
 # How far into the future to look ahead for economic events (in months)
 ECONOMIC_LOOKAHEAD_MONTHS=3
 # ===== CORPORATE DATA =====
 # Start date for corporate earnings/data scraping
 CORPORATE_START_DATE=2010-01-01
 # ===== PERFORMANCE & CONCURRENCY =====
 # Maximum number of parallel ChromeDriver instances
 # Higher = more concurrent tasks, but higher resource usage
 MAX_PARALLEL_INSTANCES=3
 # Maximum tasks per ChromeDriver instance before recycling
 # 0 = unlimited (instance lives for entire application runtime)
 MAX_TASKS_PER_INSTANCE=0
 # ===== VPN ROTATION (ProtonVPN Integration) =====
 # Enable automatic VPN rotation between sessions?
 # If false, all traffic goes through system without VPN tunneling
 ENABLE_VPN_ROTATION=false
 # Comma-separated list of ProtonVPN servers to rotate through
 # Examples:
 #   "US-Free#1,US-Free#2,UK-Free#1"
 #   "US,UK,JP,DE,NL"
 # NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect
 VPN_SERVERS=
 # Number of tasks per VPN session before rotating to new server/IP
 # 0 = rotate between economic and corporate phases (one phase = one IP)
 # 5 = rotate every 5 tasks
 # NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect
 TASKS_PER_VPN_SESSION=0
 # ===== LOGGING =====
 # Set via RUST_LOG environment variable:
 #   RUST_LOG=info cargo run
 #   RUST_LOG=debug cargo run
 # Leave empty or unset for default logging level
--- a/.gitignore
+++ b/.gitignore
@@ -27,10 +27,17 @@ target/
 # /chromedriver-win64/*
-# data folders
+# data files
-/economic_events*
+**/*.json
-/economic_event_changes*
+**/*.jsonl
-/corporate_events*
+**/*.csv
-/corporate_prices*
+**/*.zip
-/corporate_event_changes*
+**/*.log
-/data*
+**/*.ovpn
 #/economic_events*
 #/economic_event_changes*
 #/corporate_events*
 #/corporate_prices*
 #/corporate_event_changes*
 #/data*
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -671,16 +671,19 @@ dependencies = [
 "fantoccini",
 "flate2",
 "futures",
 "once_cell",
 "rand 0.9.2",
 "rayon",
 "regex",
 "reqwest",
 "scraper",
 "serde",
 "serde_json",
 "tokio",
 "toml",
 "tracing",
 "tracing-subscriber",
 "url",
 "walkdir",
 "yfinance-rs",
 "zip",
 ]
@@ -2527,6 +2530,15 @@ version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 [[package]]
 name = "same-file"
 version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
 dependencies = [
 "winapi-util",
 ]
 [[package]]
 name = "schannel"
 version = "0.1.28"
@@ -2672,15 +2684,6 @@ dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "serde_spanned"
 version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392"
 dependencies = [
 "serde_core",
 ]
 [[package]]
 name = "serde_urlencoded"
 version = "0.7.1"
@@ -3126,21 +3129,6 @@ dependencies = [
 "tokio",
 ]
 [[package]]
 name = "toml"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8"
 dependencies = [
 "indexmap",
 "serde_core",
 "serde_spanned",
 "toml_datetime",
 "toml_parser",
 "toml_writer",
 "winnow",
 ]
 [[package]]
 name = "toml_datetime"
 version = "0.7.3"
@@ -3171,12 +3159,6 @@ dependencies = [
 "winnow",
 ]
 [[package]]
 name = "toml_writer"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2"
 [[package]]
 name = "tower"
 version = "0.5.2"
@@ -3390,6 +3372,16 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
 [[package]]
 name = "walkdir"
 version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
 dependencies = [
 "same-file",
 "winapi-util",
 ]
 [[package]]
 name = "want"
 version = "0.3.1"
@@ -3521,6 +3513,15 @@ dependencies = [
 "rustls-pki-types",
 ]
 [[package]]
 name = "winapi-util"
 version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
 "windows-sys 0.61.2",
 ]
 [[package]]
 name = "windows-core"
 version = "0.62.2"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "event_backtest_engine"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 authors = ["Your Name <you@example.com>"]
 description = "High-impact economic & corporate earnings data collector for short-event backtesting (overnight/weekend gaps)"
 license = "MIT OR Apache-2.0"
@@ -21,6 +21,7 @@ reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "
 scraper = "0.19"                    # HTML parsing for Yahoo earnings pages
 fantoccini = { version = "0.20", features = ["rustls-tls"] }  # Headless Chrome for finanzen.net
 yfinance-rs = "0.7.2"
 url = "2.5.7"
 # Serialization
 serde = { version = "1.0", features = ["derive"] }
@@ -29,12 +30,15 @@ csv = "1.3"
 zip = "6.0.0"
 flate2 = "1.1.5"
 # Formatting
 regex = "1.12.2"
 walkdir = "2"
 # Generating
 rand = "0.9.2"
 # Environment handling
 dotenvy = "0.15"
 toml = "0.9.8"
 # Date & time
 chrono = { version = "0.4", features = ["serde"] }
@@ -45,6 +49,7 @@ anyhow = "1.0"
 # Logging (optional but recommended)
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
 once_cell = "1.21.3"
 # Parallel processing (for batch tickers)
 futures = "0.3"
--- a/cache/openfigi/INFO.md
+++ b/cache/openfigi/INFO.md
@@ -0,0 +1,15 @@
 # Openfigi Data
 ## Market Security Description
 | Code       | Meaning                                                   |
 | ---------- | --------------------------------------------------------- |
 | **Comdty** | Commodity (e.g., oil, gold futures, physical commodities) |
 | **Corp**   | Corporate bond / corporate debt security                  |
 | **Curncy** | Currency or FX pair (e.g., EURUSD)                        |
 | **Equity** | Stocks / shares                                           |
 | **Govt**   | Government bond (Treasuries, Bunds, Gilts, etc.)          |
 | **Index**  | Market indices (S&P 500, DAX, NYSE Composite…)            |
 | **M-Mkt**  | Money market instruments (commercial paper, CDs, T-bills) |
 | **Mtge**   | Mortgage-backed securities (MBS)                          |
 | **Muni**   | Municipal bonds (US state/local government debt)          |
 | **Pfd**    | Preferred shares                                          |
--- a/check.txt
+++ b/check.txt
--- a/data/INFO.md
+++ b/data/INFO.md
@@ -0,0 +1,15 @@
 # Global Data Info
 ## Exchanges
 Source: Wikipedia
 ## Gleif
 Data Download [.zip] over Website
 ## OpenFigi
 Data Scraping over open API
 Api Key: .env
--- a/data/economic/INFO.md
+++ b/data/economic/INFO.md
@@ -0,0 +1,6 @@
 # Economic Info
 ## Sources
 * continents: finanzen.net
 * countries: finanzen.net
--- a/event_backtest_engine.exe
+++ b/event_backtest_engine.exe
--- a/src/config.rs
+++ b/src/config.rs
@@ -12,21 +12,40 @@ pub struct Config {
    pub economic_lookahead_months: u32,  // default: 3
    /// Maximum number of parallel scraping tasks (default: 10).
    /// This limits concurrency to protect system load and prevent website spamming.
-    #[serde(default = "default_max_parallel")]
+    #[serde(default = "default_max_parallel_instances")]
-    pub max_parallel_tasks: usize,
+    pub max_parallel_instances: usize,
    pub max_tasks_per_instance: usize,
    /// VPN rotation configuration
    /// If set to "true", enables automatic VPN rotation between sessions
    #[serde(default)]
    pub enable_vpn_rotation: bool,
    /// Number of tasks per session before rotating VPN
    /// If set to 0, rotates VPN between economic and corporate phases
    #[serde(default = "default_tasks_per_session")]
    pub tasks_per_vpn_session: usize,
 }
-fn default_max_parallel() -> usize {
+fn default_max_parallel_instances() -> usize {
    10
 }
 fn default_tasks_per_session() -> usize {
    0 // 0 = rotate between economic/corporate
 }
 impl Default for Config {
    fn default() -> Self {
        Self {
            economic_start_date: "2007-02-13".to_string(),
            corporate_start_date: "2010-01-01".to_string(),
            economic_lookahead_months: 3,
-            max_parallel_tasks: default_max_parallel(),
+            max_parallel_instances: default_max_parallel_instances(),
            max_tasks_per_instance: 0,
            enable_vpn_rotation: false,
            tasks_per_vpn_session: default_tasks_per_session(),
        }
    }
 }
@@ -59,16 +78,34 @@ impl Config {
            .parse()
            .context("Failed to parse ECONOMIC_LOOKAHEAD_MONTHS as u32")?;
-        let max_parallel_tasks: usize = dotenvy::var("MAX_PARALLEL_TASKS")
+        let max_parallel_instances: usize = dotenvy::var("MAX_PARALLEL_INSTANCES")
            .unwrap_or_else(|_| "10".to_string())
            .parse()
-            .context("Failed to parse MAX_PARALLEL_TASKS as usize")?;
+            .context("Failed to parse MAX_PARALLEL_INSTANCES as usize")?;
        let max_tasks_per_instance: usize = dotenvy::var("MAX_TASKS_PER_INSTANCE")
            .unwrap_or_else(|_| "0".to_string())
            .parse()
            .context("Failed to parse MAX_TASKS_PER_INSTANCE as usize")?;
        let enable_vpn_rotation = dotenvy::var("ENABLE_VPN_ROTATION")
            .unwrap_or_else(|_| "false".to_string())
            .parse::<bool>()
            .context("Failed to parse ENABLE_VPN_ROTATION as bool")?;
        let tasks_per_vpn_session: usize = dotenvy::var("TASKS_PER_VPN_SESSION")
            .unwrap_or_else(|_| "0".to_string())
            .parse()
            .context("Failed to parse TASKS_PER_VPN_SESSION as usize")?;
        Ok(Self {
            economic_start_date,
            corporate_start_date,
            economic_lookahead_months,
-            max_parallel_tasks,
+            max_parallel_instances,
            max_tasks_per_instance,
            enable_vpn_rotation,
            tasks_per_vpn_session,
        })
    }
--- a/src/corporate/aggregation.rs
+++ b/src/corporate/aggregation.rs
@@ -1,6 +1,7 @@
 // src/corporate/aggregation.rs
 use super::types::CompanyPrice;
 use super::storage::*;
 use crate::util::directories::DataPaths;
 use tokio::fs;
 use std::collections::HashMap;
@@ -16,8 +17,8 @@ struct DayData {
 }
 /// Aggregate price data from multiple exchanges, converting all to USD
-pub async fn aggregate_best_price_data(lei: &str) -> anyhow::Result<()> {
+pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> {
-    let company_dir = get_company_dir(lei);
+    let company_dir = get_company_dir(paths, lei);
    for timeframe in ["daily", "5min"].iter() {
        let source_dir = company_dir.join(timeframe);
--- a/src/corporate/openfigi.rs
+++ b/src/corporate/openfigi.rs
--- a/src/corporate/scraper.rs
+++ b/src/corporate/scraper.rs
@@ -1,7 +1,7 @@
 // src/corporate/scraper.rs
 use super::{types::*, helpers::*, openfigi::*};
 //use crate::corporate::openfigi::OpenFigiClient;
-use crate::{scraper::webdriver::*};
+use crate::{scraper::webdriver::*, util::directories::DataPaths, util::logger};
 use fantoccini::{Client, Locator};
 use scraper::{Html, Selector};
 use chrono::{DateTime, Duration, NaiveDate, Utc};
@@ -15,160 +15,6 @@ use anyhow::{anyhow, Result};
 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
 /// Discover all exchanges where this ISIN trades by querying Yahoo Finance and enriching with OpenFIGI API calls.
 /// 
 /// # Arguments
 /// * `isin` - The ISIN to search for.
 /// * `known_ticker` - A known ticker symbol for fallback or initial check.
 /// 
 /// # Returns
 /// A vector of FigiInfo structs containing enriched data from API calls.
 /// 
 /// # Errors
 /// Returns an error if HTTP requests fail, JSON parsing fails, or OpenFIGI API responds with an error.
 pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> anyhow::Result<Vec<FigiInfo>> {
    println!("    Discovering exchanges for ISIN {}", isin);
    let mut potential: Vec<(String, PrimaryInfo)> = Vec::new();
    // Try the primary ticker first
    if let Ok(info) = check_ticker_exists(known_ticker).await {
        potential.push((known_ticker.to_string(), info));
    }
    // Search for ISIN directly on Yahoo to find other listings
    let search_url = format!(
        "https://query2.finance.yahoo.com/v1/finance/search?q={}&quotesCount=20&newsCount=0",
        isin
    );
    let resp = HttpClient::new()
        .get(&search_url)
        .header("User-Agent", USER_AGENT)
        .send()
        .await?;
    let json = resp.json::<Value>().await?;
    if let Some(quotes) = json["quotes"].as_array() {
        for quote in quotes {
            // First: filter by quoteType directly from search results (faster rejection)
            let quote_type = quote["quoteType"].as_str().unwrap_or("");
            if quote_type.to_uppercase() != "EQUITY" {
                continue; // Skip bonds, ETFs, mutual funds, options, etc.
            }
            if let Some(symbol) = quote["symbol"].as_str() {
                // Avoid duplicates
                if potential.iter().any(|(s, _)| s == symbol) {
                    continue;
                }
                // Double-check with full quote data (some search results are misleading)
                if let Ok(info) = check_ticker_exists(symbol).await {
                    potential.push((symbol.to_string(), info));
                }
            }
        }
    }
    if potential.is_empty() {
        return Ok(vec![]);
    }
    // Enrich with OpenFIGI API
    let client = OpenFigiClient::new()?;
    let mut discovered_figis = Vec::new();
    if !client.has_key() {
        // Fallback without API key - create FigiInfo with default/empty fields
        for (symbol, info) in potential {
            println!("      Found equity listing: {} on {} ({}) - no FIGI (fallback mode)", symbol, info.exchange_mic, info.currency);
            let figi_info = FigiInfo {
                isin: info.isin,
                figi: String::new(),
                name: info.name,
                ticker: symbol,
                mic_code: info.exchange_mic,
                currency: info.currency,
                compositeFIGI: String::new(),
                securityType: String::new(),
                marketSector: String::new(),
                shareClassFIGI: String::new(),
                securityType2: String::new(),
                securityDescription: String::new(),
            };
            discovered_figis.push(figi_info);
        }
        return Ok(discovered_figis);
    }
    // With API key, batch the mapping requests
    let chunk_size = 100;
    for chunk in potential.chunks(chunk_size) {
        let mut jobs = vec![];
        for (symbol, info) in chunk {
            jobs.push(json!({
                "idType": "TICKER",
                "idValue": symbol,
                "micCode": info.exchange_mic,
                "marketSecDes": "Equity",
            }));
        }
        let resp = client.get_figi_client()
            .post("https://api.openfigi.com/v3/mapping")
            .header("Content-Type", "application/json")
            .json(&jobs)
            .send()
            .await?;
        if !resp.status().is_success() {
            return Err(anyhow::anyhow!("OpenFIGI mapping failed with status: {}", resp.status()));
        }
        let parsed: Vec<Value> = resp.json().await?;
        for (i, item) in parsed.iter().enumerate() {
            let (symbol, info) = &chunk[i];
            if let Some(data) = item["data"].as_array() {
                if let Some(entry) = data.first() {
                    let market_sec = entry["marketSector"].as_str().unwrap_or("");
                    if market_sec != "Equity" {
                        continue;
                    }
                    println!("      Found equity listing: {} on {} ({}) - FIGI: {}", symbol, info.exchange_mic, info.currency, entry["figi"]);
                    let figi_info = FigiInfo {
                        isin: info.isin.clone(),
                        figi: entry["figi"].as_str().unwrap_or("").to_string(),
                        name: entry["name"].as_str().unwrap_or(&info.name).to_string(),
                        ticker: symbol.clone(),
                        mic_code: info.exchange_mic.clone(),
                        currency: info.currency.clone(),
                        compositeFIGI: entry["compositeFIGI"].as_str().unwrap_or("").to_string(),
                        securityType: entry["securityType"].as_str().unwrap_or("").to_string(),
                        marketSector: market_sec.to_string(),
                        shareClassFIGI: entry["shareClassFIGI"].as_str().unwrap_or("").to_string(),
                        securityType2: entry["securityType2"].as_str().unwrap_or("").to_string(),
                        securityDescription: entry["securityDescription"].as_str().unwrap_or("").to_string(),
                    };
                    discovered_figis.push(figi_info);
                } else {
                    println!("      No data returned for ticker {} on MIC {}", symbol, info.exchange_mic);
                }
            } else if let Some(error) = item["error"].as_str() {
                println!("      OpenFIGI error for ticker {}: {}", symbol, error);
            }
        }
        // Respect rate limit (6 seconds between requests with key)
        sleep(TokioDuration::from_secs(6)).await;
    }
    Ok(discovered_figis)
 }
 /// Check if a ticker exists on Yahoo Finance and return core metadata.
 ///
 /// This function calls the public Yahoo Finance quoteSummary endpoint and extracts:
@@ -190,7 +36,7 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
 /// - Not an equity (ETF, bond, etc.)
 /// - Missing critical fields
 /// - Network or JSON parsing errors
-pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
+/*pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
    let url = format!(
        "https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price%2CassetProfile",
        ticker
@@ -303,34 +149,7 @@ pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
        exchange_mic,
        currency,
    })
-}
+}*/
 /// Convert Yahoo's exchange name to MIC code (best effort)
 fn exchange_name_to_mic(name: &str) -> String {
    match name {
        "NMS" | "NasdaqGS" | "NASDAQ" => "XNAS",
        "NYQ" | "NYSE" => "XNYS",
        "LSE" | "London" => "XLON",
        "FRA" | "Frankfurt" | "GER" | "XETRA" => "XFRA",
        "PAR" | "Paris" => "XPAR",
        "AMS" | "Amsterdam" => "XAMS",
        "MIL" | "Milan" => "XMIL",
        "JPX" | "Tokyo" => "XJPX",
        "HKG" | "Hong Kong" => "XHKG",
        "SHH" | "Shanghai" => "XSHG",
        "SHZ" | "Shenzhen" => "XSHE",
        "TOR" | "Toronto" => "XTSE",
        "ASX" | "Australia" => "XASX",
        "SAU" | "Saudi" => "XSAU",
        "SWX" | "Switzerland" => "XSWX",
        "BSE" | "Bombay" => "XBSE",
        "NSE" | "NSI" => "XNSE",
        "TAI" | "Taiwan" => "XTAI",
        "SAO" | "Sao Paulo" => "BVMF",
        "MCE" | "Madrid" => "XMAD",
        _ => name, // Fallback to name itself
    }.to_string()
 }
 /// Fetches earnings events for a ticker using a dedicated ScrapeTask.
 ///
@@ -670,60 +489,164 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow
 pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
    let zip_path = "data/gleif/isin_lei.zip";
    let csv_path = "data/gleif/isin_lei.csv";
-    if let Err(e) = std::fs::create_dir_all("data") {
+    // Initialize DataPaths and create cache/gleif directory
-        println!("Failed to create data directory: {e}");
+    let paths = DataPaths::new(".")?;
    let gleif_cache_dir = paths.cache_gleif_dir();
    if let Err(e) = std::fs::create_dir_all(&gleif_cache_dir) {
        let msg = format!("Failed to create cache/gleif directory: {}", e);
        logger::log_error(&msg).await;
        println!("{}", msg);
        return Ok(None);
    }
-    // Download ZIP
+    logger::log_info("Corporate Scraper: Downloading ISIN/LEI mapping from GLEIF...").await;
-    let bytes = match reqwest::Client::builder()
+
    // Download ZIP and get the filename from Content-Disposition header
    let client = match reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .timeout(std::time::Duration::from_secs(30))
        .build()
        .and_then(|c| Ok(c))
    {
-        Ok(client) => match client.get(url).send().await {
+        Ok(c) => c,
            Ok(resp) if resp.status().is_success() => match resp.bytes().await {
                Ok(b) => b,
                Err(e) => {
                    println!("Failed to read ZIP bytes: {e}");
                    return Ok(None);
                }
            },
            Ok(resp) => {
                println!("Server returned HTTP {}", resp.status());
                return Ok(None);
            }
            Err(e) => {
                println!("Failed to download ISIN/LEI ZIP: {e}");
                return Ok(None);
            }
        },
        Err(e) => {
-            println!("Failed to create HTTP client: {e}");
+            let msg = format!("Failed to create HTTP client: {}", e);
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
    };
-    if let Err(e) = tokio::fs::write(zip_path, &bytes).await {
+    let resp = match client.get(url).send().await {
-        println!("Failed to write ZIP file: {e}");
+        Ok(r) if r.status().is_success() => r,
-        return Ok(None);
+        Ok(resp) => {
            let msg = format!("Server returned HTTP {}", resp.status());
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
        Err(e) => {
            let msg = format!("Failed to download ISIN/LEI ZIP: {}", e);
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
    };
    // Extract filename from Content-Disposition header or use default
    let filename = resp
        .headers()
        .get("content-disposition")
        .and_then(|h| h.to_str().ok())
        .and_then(|s| s.split("filename=").nth(1).map(|f| f.trim_matches('"').to_string()))
        .unwrap_or_else(|| "isin_lei.zip".to_string());
    // Parse timestamp from filename and convert to DDMMYYYY format
    let parsed_filename = parse_gleif_filename(&filename);
    logger::log_info(&format!("Corporate Scraper: Downloaded file: {} -> {}", filename, parsed_filename)).await;
    // Determine date (DDMMYYYY) from parsed filename: "isin-lei-DDMMYYYY.csv"
    let mut date_str = String::new();
    if let Some(start_idx) = parsed_filename.find("isin-lei-") {
        let rest = &parsed_filename[start_idx + 9..];
        if rest.len() >= 8 {
            date_str = rest[0..8].to_string();
        }
    }
    // If we parsed a date, use/create a date folder under cache/gleif and operate inside it; otherwise use cache root.
    let date_dir = if !date_str.is_empty() {
        let p = gleif_cache_dir.join(&date_str);
        // Ensure the date folder exists (create if necessary)
        if let Err(e) = std::fs::create_dir_all(&p) {
            let msg = format!("Failed to create date directory {:?}: {}", p, e);
            logger::log_warn(&msg).await;
            None
        } else {
            Some(p)
        }
    } else {
        None
    };
    // Choose the directory where we'll look for existing files and where we'll save the new ones
    let target_dir = date_dir.clone().unwrap_or_else(|| gleif_cache_dir.to_path_buf());
    // If the date folder exists (or was created), prefer any *_clean.csv inside it and return that immediately
    if let Some(ref ddir) = date_dir {
        if let Ok(entries) = std::fs::read_dir(ddir) {
            for entry in entries.flatten() {
                if let Some(name) = entry.file_name().to_str() {
                    if name.to_lowercase().ends_with("_clean.csv") {
                        let path = ddir.join(name);
                        logger::log_info(&format!("Found existing clean GLEIF CSV: {}", path.display())).await;
                        return Ok(Some(path.to_string_lossy().to_string()));
                    }
                }
            }
        }
    }
    // If no clean file found in the date folder (or date folder doesn't exist), check whether the csv/zip already exist in the target dir
    let csv_candidate_name = parsed_filename.replace(".zip", ".csv");
    let csv_candidate = target_dir.join(&csv_candidate_name);
    let zip_candidate = target_dir.join(&parsed_filename);
    if csv_candidate.exists() {
        logger::log_info(&format!("Found existing GLEIF CSV: {}", csv_candidate.display())).await;
        return Ok(Some(csv_candidate.to_string_lossy().to_string()));
    }
    if zip_candidate.exists() {
        // If zip exists but csv does not, extract later; for now prefer returning csv path (may be created by extraction step)
        let inferred_csv = target_dir.join(csv_candidate_name);
        if inferred_csv.exists() {
            logger::log_info(&format!("Found existing extracted CSV next to ZIP: {}", inferred_csv.display())).await;
            return Ok(Some(inferred_csv.to_string_lossy().to_string()));
        }
        // otherwise we'll overwrite/extract into target_dir below
    }
    let bytes = match resp.bytes().await {
        Ok(b) => b,
        Err(e) => {
            let msg = format!("Failed to read ZIP bytes: {}", e);
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
    };
    // Ensure target directory exists (create if it's the date folder and was absent earlier)
    if let Some(ref ddir) = date_dir {
        let _ = std::fs::create_dir_all(ddir);
    }
    let zip_path = target_dir.join(&parsed_filename);
    let csv_path = target_dir.join(parsed_filename.replace(".zip", ".csv"));
    if let Err(e) = tokio::fs::write(&zip_path, &bytes).await {
        let msg = format!("Failed to write ZIP file: {}", e);
        logger::log_error(&msg).await;
        println!("{}", msg);
        return Ok(None);
    }
    logger::log_info(&format!("Corporate Scraper: Saved ZIP to {:?}", zip_path)).await;
    // Extract CSV
-    let archive = match std::fs::File::open(zip_path)
+    let archive = match std::fs::File::open(&zip_path)
        .map(ZipArchive::new)
    {
        Ok(Ok(a)) => a,
        Ok(Err(e)) => {
-            println!("Invalid ZIP: {e}");
+            let msg = format!("Invalid ZIP: {}", e);
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
        Err(e) => {
-            println!("Cannot open ZIP file: {e}");
+            let msg = format!("Cannot open ZIP file: {}", e);
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
    };
@@ -737,7 +660,9 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    }) {
        Some(i) => i,
        None => {
-            println!("ZIP did not contain a CSV file");
+            let msg = "ZIP did not contain a CSV file";
            logger::log_error(msg).await;
            println!("{}", msg);
            return Ok(None);
        }
    };
@@ -745,23 +670,55 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let mut csv_file = match archive.by_index(idx) {
        Ok(f) => f,
        Err(e) => {
-            println!("Failed to read CSV entry: {e}");
+            let msg = format!("Failed to read CSV entry: {}", e);
            logger::log_error(&msg).await;
            println!("{}", msg);
            return Ok(None);
        }
    };
    let mut csv_bytes = Vec::new();
    if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
-        println!("Failed to extract CSV: {e}");
+        let msg = format!("Failed to extract CSV: {}", e);
        logger::log_error(&msg).await;
        return Ok(None);
    }
-    if let Err(e) = tokio::fs::write(csv_path, &csv_bytes).await {
+    if let Err(e) = tokio::fs::write(&csv_path, &csv_bytes).await {
-        println!("Failed to save CSV file: {e}");
+        let msg = format!("Failed to save CSV file: {}", e);
        logger::log_error(&msg).await;
        return Ok(None);
    }
-    Ok(Some(csv_path.to_string()))
+    let msg = format!("✓ ISIN/LEI CSV extracted: {:?}", csv_path);
    logger::log_info(&msg).await;
    Ok(Some(csv_path.to_string_lossy().to_string()))
 }
 /// Parse GLEIF filename and convert timestamp to DDMMYYYY format
 /// Example: "isin-lei-20251124T080254.csv" -> "isin-lei-24112025.csv"
 fn parse_gleif_filename(filename: &str) -> String {
    // Try to find pattern: isin-lei-YYYYMMDDTHHMMSS.zip/csv
    if let Some(start_idx) = filename.find("isin-lei-") {
        let rest = &filename[start_idx + 9..]; // After "isin-lei-"
        // Extract the 8 digits (YYYYMMDD)
        if rest.len() >= 8 && rest[0..8].chars().all(|c| c.is_numeric()) {
            let date_part = &rest[0..8];
            // date_part is YYYYMMDD, convert to DDMMYYYY
            if date_part.len() == 8 {
                let year = &date_part[0..4];
                let month = &date_part[4..6];
                let day = &date_part[6..8];
                let extension = if filename.ends_with(".zip") { ".zip" } else { ".csv" };
                return format!("isin-lei-{}{}{}{}", day, month, year, extension);
            }
        }
    }
    // Fallback: return original filename if parsing fails
    filename.to_string()
 }
--- a/src/corporate/storage.rs
+++ b/src/corporate/storage.rs
@@ -1,20 +1,30 @@
 // src/corporate/storage.rs
 use super::{types::*, helpers::*};
-use crate::config;
+use crate::util::directories::DataPaths;
 use crate::util::logger;
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
 use chrono::{Datelike, NaiveDate};
-use std::collections::{HashMap, HashSet};
+use std::collections::{HashMap};
-use std::path::{Path, PathBuf};
+use std::path::{PathBuf, Path};
-pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
+const BATCH_SIZE: usize = 500; // Process 500 events at a time
-    let mut map = HashMap::new();
+
-    let dir = std::path::Path::new("corporate_events");
+/// Load events in streaming fashion to avoid memory buildup
 pub async fn load_existing_events_streaming(
    paths: &DataPaths,
    callback: impl Fn(CompanyEvent) -> anyhow::Result<()>
 ) -> anyhow::Result<usize> {
    let dir = paths.corporate_events_dir();
    if !dir.exists() {
-        return Ok(map);
+        logger::log_info("Corporate Storage: No existing events directory found").await;
        return Ok(0);
    }
    let mut total = 0;
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.extension().and_then(|s| s.to_str()) == Some("json") {
@@ -22,51 +32,141 @@ pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEve
            if name.starts_with("events_") && name.len() == 17 {
                let content = fs::read_to_string(&path).await?;
                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
                for event in events {
-                    map.insert(event_key(&event), event);
+                    callback(event)?;
                    total += 1;
                }
                // Yield to prevent blocking
                tokio::task::yield_now().await;
            }
        }
    }
    logger::log_info(&format!("Corporate Storage: Streamed {} events", total)).await;
    Ok(total)
 }
 /// Build lightweight index of events instead of loading everything
 #[derive(Debug, Clone)]
 pub struct EventIndex {
    pub key: String,
    pub ticker: String,
    pub date: String,
    pub file_path: PathBuf,
 }
 pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventIndex>> {
    let dir = paths.corporate_events_dir();
    if !dir.exists() {
        return Ok(Vec::new());
    }
    let mut index = Vec::new();
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.extension().and_then(|s| s.to_str()) == Some("json") {
            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
            if name.starts_with("events_") && name.len() == 17 {
                let content = fs::read_to_string(&path).await?;
                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
                for event in events {
                    index.push(EventIndex {
                        key: event_key(&event),
                        ticker: event.ticker.clone(),
                        date: event.date.clone(),
                        file_path: path.clone(),
                    });
                }
            }
        }
    }
-    Ok(map)
+    
    logger::log_info(&format!("Corporate Storage: Built index with {} entries", index.len())).await;
    Ok(index)
 }
-pub async fn save_optimized_events(events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
+/// Lookup specific event by loading only its file
-    let dir = std::path::Path::new("corporate_events");
+pub async fn lookup_event_by_key(
    key: &str,
    index: &[EventIndex]
 ) -> anyhow::Result<Option<CompanyEvent>> {
    let entry = index.iter().find(|e| e.key == key);
    if let Some(entry) = entry {
        let content = fs::read_to_string(&entry.file_path).await?;
        let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
        Ok(events.into_iter().find(|e| event_key(e) == key))
    } else {
        Ok(None)
    }
 }
 pub async fn save_optimized_events(
    paths: &DataPaths, 
    events: Vec<CompanyEvent> // Changed from HashMap to Vec
 ) -> anyhow::Result<()> {
    let dir = paths.corporate_events_dir();
    fs::create_dir_all(dir).await?;
    logger::log_info("Corporate Storage: Removing old event files...").await;
    let mut removed_count = 0;
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
        if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
            fs::remove_file(&path).await?;
            removed_count += 1;
        }
    }
    logger::log_info(&format!("Corporate Storage: Removed {} old event files", removed_count)).await;
-    let mut sorted: Vec<_> = events.into_values().collect();
+    let total_events = events.len();
-    sorted.sort_by_key(|e| (e.ticker.clone(), e.date.clone()));
+    let mut sorted = events;
    sorted.sort_by(|a, b| {
        a.ticker.cmp(&b.ticker)
            .then(a.date.cmp(&b.date))
    });
    // Process in batches to avoid memory buildup
    let mut by_month: HashMap<String, Vec<CompanyEvent>> = HashMap::new();
-    for e in sorted {
+    
-        if let Ok(d) = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d") {
+    for chunk in sorted.chunks(BATCH_SIZE) {
-            let key = format!("{}-{:02}", d.year(), d.month());
+        for e in chunk {
-            by_month.entry(key).or_default().push(e);
+            if let Ok(d) = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d") {
                let key = format!("{}-{:02}", d.year(), d.month());
                by_month.entry(key).or_default().push(e.clone());
            }
        }
        tokio::task::yield_now().await;
    }
    let total_months = by_month.len();
    for (month, list) in by_month {
        let path = dir.join(format!("events_{}.json", month));
        fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
        logger::log_info(&format!("Corporate Storage: Saved {} events for month {}", list.len(), month)).await;
    }
    logger::log_info(&format!("Corporate Storage: Saved {} total events in {} month files", total_events, total_months)).await;
    Ok(())
 }
-pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()> {
+pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) -> anyhow::Result<()> {
-    if changes.is_empty() { return Ok(()); }
+    if changes.is_empty() { 
-    let dir = std::path::Path::new("corporate_event_changes");
+        logger::log_info("Corporate Storage: No changes to save").await;
        return Ok(()); 
    }
    let dir = paths.corporate_changes_dir();
    fs::create_dir_all(dir).await?;
    logger::log_info(&format!("Corporate Storage: Saving {} changes", changes.len())).await;
    let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
    for c in changes {
        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
@@ -81,14 +181,21 @@ pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()>
            let s = fs::read_to_string(&path).await?;
            serde_json::from_str(&s).unwrap_or_default()
        } else { vec![] };
-        all.extend(list);
+        all.extend(list.clone());
        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
        logger::log_info(&format!("Corporate Storage: Saved {} changes for month {}", list.len(), month)).await;
    }
    logger::log_info("Corporate Storage: All changes saved successfully").await;
    Ok(())
 }
-pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
+pub async fn save_prices_for_ticker(
-    let base_dir = Path::new("corporate_prices");
+    paths: &DataPaths, 
    ticker: &str, 
    timeframe: &str, 
    mut prices: Vec<CompanyPrice>
 ) -> anyhow::Result<()> {
    let base_dir = paths.corporate_prices_dir();
    let company_dir = base_dir.join(ticker.replace(".", "_"));
    let timeframe_dir = company_dir.join(timeframe);
@@ -102,35 +209,39 @@ pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: V
    Ok(())
 }
-pub fn get_company_dir(lei: &str) -> PathBuf {
+pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf {
-    PathBuf::from("corporate_prices").join(lei)
+    paths.corporate_prices_dir().join(lei)
 }
-pub async fn ensure_company_dirs(isin: &str) -> anyhow::Result<()> {
+pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> {
-    let base = get_company_dir(isin);
+    let base = get_company_dir(paths, isin);
-    let paths = [
+    let paths_to_create = [
        base.clone(),
        base.join("5min"),
        base.join("daily"),
        base.join("aggregated").join("5min"),
        base.join("aggregated").join("daily"),
    ];
-    for p in paths {
+    for p in paths_to_create {
        fs::create_dir_all(&p).await?;
    }
    Ok(())
 }
-pub async fn save_available_exchanges(isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
+pub async fn save_available_exchanges(
-    let dir = get_company_dir(isin);
+    paths: &DataPaths, 
    isin: &str, 
    exchanges: Vec<AvailableExchange>
 ) -> anyhow::Result<()> {
    let dir = get_company_dir(paths, isin);
    fs::create_dir_all(&dir).await?;
    let path = dir.join("available_exchanges.json");
    fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
    Ok(())
 }
-pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
+pub async fn load_available_exchanges(paths: &DataPaths, lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
-    let path = get_company_dir(lei).join("available_exchanges.json");
+    let path = get_company_dir(paths, lei).join("available_exchanges.json");
    if path.exists() {
        let content = fs::read_to_string(&path).await?;
        Ok(serde_json::from_str(&content)?)
@@ -140,13 +251,14 @@ pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<Available
 }
 pub async fn save_prices_by_source(
    paths: &DataPaths,
    lei: &str,
    source_ticker: &str,
    timeframe: &str,
    prices: Vec<CompanyPrice>,
 ) -> anyhow::Result<()> {
    let source_safe = source_ticker.replace(".", "_").replace("/", "_");
-    let dir = get_company_dir(lei).join(timeframe).join(&source_safe);
+    let dir = get_company_dir(paths, lei).join(timeframe).join(&source_safe);
    fs::create_dir_all(&dir).await?;
    let path = dir.join("prices.json");
    let mut prices = prices;
@@ -155,83 +267,73 @@ pub async fn save_prices_by_source(
    Ok(())
 }
-/// Update available_exchanges.json with fetch results
+/// Saves companies data to a JSONL file in streaming fashion
-pub async fn update_available_exchange(
+pub async fn save_companies_to_jsonl_streaming(
-    isin: &str,
+    paths: &DataPaths,
-    ticker: &str,
+    companies: &HashMap<String, HashMap<String, String>>,
    exchange_mic: &str,
    has_daily: bool,
    has_5min: bool,
 ) -> anyhow::Result<()> {
-    let mut exchanges = load_available_exchanges(isin).await?;
+    let file_path = paths.data_dir().join("companies.jsonl");
-    if let Some(entry) = exchanges.iter_mut().find(|e| e.ticker == ticker) {
+    logger::log_info(&format!("Corporate Storage: Saving {} companies to JSONL", companies.len())).await;
-        // Update existing entry
+    
-        entry.record_success(has_daily, has_5min);
+    if let Some(parent) = file_path.parent() {
-    } else {
+        tokio::fs::create_dir_all(parent).await?;
        // Create new entry - need to get currency from somewhere
        // Try to infer from the ticker or use a default
        let currency = infer_currency_from_ticker(ticker);
        let mut new_entry = AvailableExchange::new(
            ticker.to_string(),
            exchange_mic.to_string(),
            currency,
        );
        new_entry.record_success(has_daily, has_5min);
        exchanges.push(new_entry);
    }
-    save_available_exchanges(isin, exchanges).await
+    let mut file = tokio::fs::File::create(&file_path).await?;
-}
+    let mut count = 0;
-/// Add a newly discovered exchange before fetching
+    // Process in batches
-/// 
+    for (name, securities) in companies.iter() {
-/// # Arguments
+        let line = serde_json::json!({
-/// * `isin` - The ISIN associated with the exchange.
+            "name": name,
-/// * `figi_info` - The FigiInfo containing ticker, mic_code, and currency.
+            "securities": securities
-/// 
+        });
-/// # Returns
+        file.write_all(line.to_string().as_bytes()).await?;
-/// Ok(()) on success.
+        file.write_all(b"\n").await?;
 /// 
 /// # Errors
 /// Returns an error if loading or saving available exchanges fails.
 pub async fn add_discovered_exchange(
    isin: &str,
    figi_info: &FigiInfo,
 ) -> anyhow::Result<()> {
    let mut exchanges = load_available_exchanges(isin).await?;
-    // Only add if not already present
+        count += 1;
-    if !exchanges.iter().any(|e| e.ticker == figi_info.ticker && e.exchange_mic == figi_info.mic_code) {
+        if count % 100 == 0 {
-        let new_entry = AvailableExchange::new(
+            tokio::task::yield_now().await;
-            figi_info.ticker.clone(),
+        }
            figi_info.mic_code.clone(),
            figi_info.currency.clone(),
        );
        exchanges.push(new_entry);
        save_available_exchanges(isin, exchanges).await?;
    }
    let msg = format!("✓ Saved {} companies to {:?}", companies.len(), file_path);
    println!("{}", msg);
    logger::log_info(&msg).await;
    Ok(())
 }
-/// Infer currency from ticker suffix
+/// Load companies from JSONL in streaming fashion
-fn infer_currency_from_ticker(ticker: &str) -> String {
+pub async fn load_companies_from_jsonl_streaming(
-    if ticker.ends_with(".L") { return "GBP".to_string(); }
+    path: &Path,
-    if ticker.ends_with(".PA") { return "EUR".to_string(); }
+    callback: impl Fn(String, HashMap<String, String>) -> anyhow::Result<()>
-    if ticker.ends_with(".DE") { return "EUR".to_string(); }
+) -> anyhow::Result<usize> {
-    if ticker.ends_with(".AS") { return "EUR".to_string(); }
+    if !path.exists() {
-    if ticker.ends_with(".MI") { return "EUR".to_string(); }
+        return Ok(0);
-    if ticker.ends_with(".SW") { return "CHF".to_string(); }
+    }
    if ticker.ends_with(".T") { return "JPY".to_string(); }
    if ticker.ends_with(".HK") { return "HKD".to_string(); }
    if ticker.ends_with(".SS") { return "CNY".to_string(); }
    if ticker.ends_with(".SZ") { return "CNY".to_string(); }
    if ticker.ends_with(".TO") { return "CAD".to_string(); }
    if ticker.ends_with(".AX") { return "AUD".to_string(); }
    if ticker.ends_with(".SA") { return "BRL".to_string(); }
    if ticker.ends_with(".MC") { return "EUR".to_string(); }
    if ticker.ends_with(".BO") || ticker.ends_with(".NS") { return "INR".to_string(); }
-    "USD".to_string() // Default
+    let content = tokio::fs::read_to_string(path).await?;
    let mut count = 0;
    for line in content.lines() {
        if line.trim().is_empty() {
            continue;
        }
        let entry: serde_json::Value = serde_json::from_str(line)?;
        let name = entry["name"].as_str().unwrap_or("").to_string();
        let securities: HashMap<String, String> = serde_json::from_value(
            entry["securities"].clone()
        )?;
        callback(name, securities)?;
        count += 1;
        if count % 100 == 0 {
            tokio::task::yield_now().await;
        }
    }
    Ok(count)
 }
--- a/src/corporate/types.rs
+++ b/src/corporate/types.rs
@@ -1,6 +1,5 @@
 use std::collections::HashMap;
 // src/corporate/types.rs
 use std::collections::HashMap;
 use serde::{Deserialize, Serialize};
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -53,24 +52,19 @@ pub struct FigiInfo {
    pub figi: String,
    pub name: String,
    pub ticker: String,
-    pub mic_code: String,
+    pub exch_code: String,
-    pub currency: String,
+    #[serde(rename = "compositeFIGI")]
-    pub compositeFIGI: String,
+    pub composite_figi: String,
-    pub securityType: String,
+    #[serde(rename = "securityType")]
-    pub marketSector: String,
+    pub security_type: String,
-    pub shareClassFIGI: String,
+    #[serde(rename = "marketSector")]
-    pub securityType2: String,
+    pub market_sector: String,
-    pub securityDescription: String,
+    #[serde(rename = "shareClassFIGI")]
-}
+    pub share_class_figi: String,
-
+    #[serde(rename = "securityType2")]
-/// Company Meta Data
+    pub security_type2: String,
-/// # Attributes
+    #[serde(rename = "securityDescription")]
-/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
+    pub security_description: String,
 /// * figi: metadata with ISIN as key
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CompanyMetadata {
    pub lei: String,
    pub figi: Option<Vec<FigiInfo>>,
 }
 /// Company Info
@@ -85,6 +79,15 @@ pub struct CompanyInfo{
    pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
 }
 /// Company Meta Data
 /// # Attributes
 /// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
 /// * figi: metadata with ISIN as key
 /*#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CompanyMetadata {
    pub lei: String,
    pub figi: Option<Vec<FigiInfo>>,
 }*/
 /// Warrant Info
 /// 
@@ -115,13 +118,13 @@ pub struct OptionInfo {
    pub options: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
 }
-#[derive(Debug, Clone, Serialize, Deserialize)]
+/*#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct PrimaryInfo {
    pub isin: String,
    pub name: String,
    pub exchange_mic: String,
    pub currency: String,
-}
+}*/
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AvailableExchange {
@@ -137,27 +140,3 @@ pub struct AvailableExchange {
    #[serde(default)]
    pub fetch_count: u32,                // How many times successfully fetched
 }
 impl AvailableExchange {
    pub fn new(ticker: String, exchange_mic: String, currency: String) -> Self {
        Self {
            exchange_mic,
            ticker,
            has_daily: false,
            has_5min: false,
            last_successful_fetch: None,
            currency,
            discovered_at: Some(chrono::Local::now().format("%Y-%m-%d").to_string()),
            fetch_count: 0,
        }
    }
    pub fn record_success(&mut self, has_daily: bool, has_5min: bool) {
        let today = chrono::Local::now().format("%Y-%m-%d").to_string();
        self.has_daily |= has_daily;
        self.has_5min |= has_5min;
        self.last_successful_fetch = Some(today);
        self.fetch_count += 1;
    }
 }
--- a/src/corporate/update.rs
+++ b/src/corporate/update.rs
@@ -1,104 +1,242 @@
 // src/corporate/update.rs
 use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
 use crate::config::Config;
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;
 use chrono::Local;
 use std::collections::{HashMap};
 use std::sync::Arc;
-/// Main function: Full update for all companies (LEI-based) with optimized parallel execution.
+/// Main function: Full update for all companies with streaming to minimize memory usage
 ///
 /// This function coordinates the entire update process:
 /// - Loads GLEIF mappings
 /// - Builds FIGI-LEI map
 /// - Loads existing events
 /// - Processes each company: discovers exchanges via FIGI, fetches prices & earnings, aggregates data
 /// - Uses the provided shared ChromeDriver pool for efficient parallel scraping
 /// - Saves optimized events
 ///
 /// # Arguments
 /// * `config` - The application configuration.
 /// * `pool` - Shared pool of ChromeDriver instances for scraping.
 ///
 /// # Errors
 /// Returns an error if any step in the update process fails.
 pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
-    println!("=== Starting LEI-based corporate full update ===");
+    let msg = "=== Starting LEI-based corporate full update (STREAMING) ===";
    println!("{}", msg);
    logger::log_info(msg).await;
-    // 1. Load fresh GLEIF ISIN ↔ LEI mapping
+    let paths = DataPaths::new(".")?;
-    let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
+
-        Ok(map) => map,
+    // Step 1: Download/locate GLEIF CSV (don't load into memory yet)
-        Err(e) => {
+    logger::log_info("Corporate Update: Downloading/locating GLEIF CSV...").await;
-            eprintln!("Warning: Could not load GLEIF ISIN↔LEI mapping: {}", e);
+    let gleif_csv_path = match download_isin_lei_csv().await? {
-            HashMap::new()
+        Some(p) => {
            logger::log_info(&format!("Corporate Update: GLEIF CSV at: {}", p)).await;
            p
        }
        None => {
            logger::log_warn("Corporate Update: Could not obtain GLEIF CSV, continuing with limited data").await;
            return Ok(());
        }
    };
-    // 2. Load OpenFIGI mapping value lists (cached)
+    // Step 2: Load OpenFIGI type lists (small, cached)
    logger::log_info("Corporate Update: Loading OpenFIGI type lists...").await;
    if let Err(e) = load_figi_type_lists().await {
-        eprintln!("Warning: Could not load OpenFIGI type lists: {}", e);
+        logger::log_warn(&format!("Could not load OpenFIGI type lists: {}", e)).await;
    }
-    // 3. Build FIGI → LEI map
+    // Step 3: Process GLEIF → FIGI mapping in streaming fashion
-    // # Attributes
+    logger::log_info("Corporate Update: Building FIGI mappings (streaming)...").await;
-    // * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
+    
-    // * figi: metadata with ISIN as key
+    // Build LEI→ISINs map by streaming the CSV
-    let figi_to_lei:HashMap<String, Vec<FigiInfo>> = match build_lei_to_figi_infos(&lei_to_isins).await {
+    let mut lei_to_isins: HashMap<String, Vec<String>> = HashMap::new();
-        Ok(map) => map,
+    let mut lei_batch = Vec::new();
-        Err(e) => {
+    const LEI_BATCH_SIZE: usize = 1000;
-            eprintln!("Warning: Could not build FIGI→LEI map: {}", e);
+    
-            HashMap::new()
+    stream_gleif_csv(&gleif_csv_path, |lei, isin| {
        lei_to_isins.entry(lei.clone()).or_default().push(isin);
        lei_batch.push(lei);
        // Process in batches
        if lei_batch.len() >= LEI_BATCH_SIZE {
            lei_batch.clear();
        }
        Ok(())
    }).await?;
    logger::log_info(&format!("Corporate Update: Collected {} LEIs", lei_to_isins.len())).await;
    // Step 4: Build FIGI mappings in batches (process and save incrementally)
    logger::log_info("Corporate Update: Processing FIGI mappings in batches...").await;
    let figi_result = build_lei_to_figi_infos(&lei_to_isins, None).await;
    // Don't keep the full result in memory - it's already saved to JSONL files
    drop(figi_result);
    drop(lei_to_isins); // Release this too
    logger::log_info("Corporate Update: FIGI mappings saved to cache").await;
    // Step 5: Load or build securities (streaming from JSONL files)
    logger::log_info("Corporate Update: Building securities map (streaming)...").await;
    let dir = DataPaths::new(".")?;
    let map_cache_dir = dir.cache_gleif_openfigi_map_dir();
    // Find the most recent date directory
    let date_dir = find_most_recent_date_dir(&map_cache_dir).await?;
    let (common_stocks, _warrants, _options) = if let Some(date_dir) = date_dir {
        logger::log_info(&format!("Using FIGI data from: {:?}", date_dir)).await;
        load_or_build_all_securities_streaming(&date_dir).await?
    } else {
        logger::log_warn("No FIGI date directory found, using empty maps").await;
        (HashMap::new(), HashMap::new(), HashMap::new())
    };
-    // 4. Load or build companies
+    logger::log_info(&format!("Corporate Update: Processing {} companies", common_stocks.len())).await;
    let mut companies = load_or_build_all_securities(&figi_to_lei).await?;
    println!("Processing {} companies", companies.0.len());
-    // 5. Load existing earnings events (for change detection)
+    // Step 6: Convert to simplified companies map and save incrementally
-    let today = Local::now().format("%Y-%m-%d").to_string();
+    logger::log_info("Corporate Update: Building companies JSONL (streaming)...").await;
    let mut existing_events = match load_existing_events().await {
        Ok(events) => events,
        Err(e) => {
            eprintln!("Warning: Could not load existing events: {}", e);
            HashMap::new()
        }
    };
-    // 5. Use the provided pool (no need to create a new one)
+    let companies_path = paths.data_dir().join("companies.jsonl");
    let pool_size = pool.get_number_of_instances(); // Use the size from the shared pool
-    // Process companies in parallel using the shared pool
+    // Create file and write incrementally
-    /*let results: Vec<_> = stream::iter(companies.into_iter())
+    if let Some(parent) = companies_path.parent() {
-        .map(|company| {
+        tokio::fs::create_dir_all(parent).await?;
-            let pool_clone = pool.clone();
+    }
-            async move {
+    
-                process_company_data(&company, &pool_clone, &mut existing_events).await
+    let mut file = tokio::fs::File::create(&companies_path).await?;
    let mut processed = 0;
    for (name, company_info) in common_stocks.iter() {
        let mut isin_ticker_pairs: HashMap<String, String> = HashMap::new();
        for figi_infos in company_info.securities.values() {
            for figi_info in figi_infos {
                if !figi_info.isin.is_empty() && !figi_info.ticker.is_empty() {
                    isin_ticker_pairs.insert(figi_info.isin.clone(), figi_info.ticker.clone());
                }
            }
        })
        .buffer_unordered(pool_size)
        .collect().await;
    // Handle results (e.g., collect changes)
    let mut all_changes = Vec::new();
    for result in results {
        if let Ok(ProcessResult { changes }) = result {
            all_changes.extend(changes);
        }
    }*/
-    save_optimized_events(existing_events).await?;
+        if !isin_ticker_pairs.is_empty() {
-    //save_changes(&all_changes).await?;
+            use tokio::io::AsyncWriteExt;
-    //println!("Corporate update complete — {} changes detected", all_changes.len());
+            let line = serde_json::json!({
                "name": name,
                "securities": isin_ticker_pairs
            });
            file.write_all(line.to_string().as_bytes()).await?;
            file.write_all(b"\n").await?;
            processed += 1;
            // Yield periodically
            if processed % 100 == 0 {
                tokio::task::yield_now().await;
                logger::log_info(&format!("Saved {} companies so far...", processed)).await;
            }
        }
    }
    logger::log_info(&format!("Corporate Update: Saved {} companies to JSONL", processed)).await;
    // Step 7: Process events in streaming fashion
    logger::log_info("Corporate Update: Processing events (streaming)...").await;
    let event_index = build_event_index(&paths).await?;
    logger::log_info(&format!("Corporate Update: Built index of {} events", event_index.len())).await;
    // For now, we just maintain the index
    // In a full implementation, you'd stream through tickers and update events
    // Step 8: Save any updates
    logger::log_info("Corporate Update: Finalizing...").await;
    let msg = "✓ Corporate update complete (streaming)";
    println!("{}", msg);
    logger::log_info(msg).await;
    Ok(())
 }
 /// Helper to find the most recent date directory in the FIGI cache
 async fn find_most_recent_date_dir(map_cache_dir: &std::path::Path) -> anyhow::Result<Option<std::path::PathBuf>> {
    if !map_cache_dir.exists() {
        return Ok(None);
    }
    let mut entries = tokio::fs::read_dir(map_cache_dir).await?;
    let mut dates = Vec::new();
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                // Date format: DDMMYYYY
                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
                    dates.push((name.to_string(), path));
                }
            }
        }
    }
    if dates.is_empty() {
        return Ok(None);
    }
    // Sort by date (DDMMYYYY format)
    dates.sort_by(|a, b| b.0.cmp(&a.0)); // Descending order
    Ok(Some(dates[0].1.clone()))
 }
 pub struct ProcessResult {
    pub changes: Vec<CompanyEventChange>,
 }
 /// Process events in batches to avoid memory buildup
 pub async fn process_events_streaming(
    index: &[EventIndex],
    new_events: &[CompanyEvent],
    today: &str,
 ) -> anyhow::Result<(Vec<CompanyEventChange>, Vec<CompanyEvent>)> {
    let mut all_changes = Vec::new();
    let mut final_events: HashMap<String, CompanyEvent> = HashMap::new();
    // Step 1: Load existing events in batches using the index
    logger::log_info("Loading existing events in batches...").await;
    let mut loaded_files = std::collections::HashSet::new();
    for entry in index {
        if loaded_files.contains(&entry.file_path) {
            continue;
        }
        let content = tokio::fs::read_to_string(&entry.file_path).await?;
        let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
        for e in events {
            final_events.insert(event_key(&e), e);
        }
        loaded_files.insert(entry.file_path.clone());
        if final_events.len() % 1000 == 0 {
            logger::log_info(&format!("Loaded {} events so far...", final_events.len())).await;
            tokio::task::yield_now().await;
        }
    }
    logger::log_info(&format!("Loaded {} existing events", final_events.len())).await;
    // Step 2: Process new events in batches
    for (idx, batch) in new_events.chunks(500).enumerate() {
        logger::log_info(&format!("Processing batch {} ({} events)", idx + 1, batch.len())).await;
        let batch_result = process_batch(batch, &mut final_events, today);
        all_changes.extend(batch_result.changes);
        tokio::task::yield_now().await;
    }
    let events_vec: Vec<CompanyEvent> = final_events.into_values().collect();
    Ok((all_changes, events_vec))
 }
 pub fn process_batch(
    new_events: &[CompanyEvent],
    existing: &mut HashMap<String, CompanyEvent>,
--- a/src/economic/scraper.rs
+++ b/src/economic/scraper.rs
@@ -7,39 +7,10 @@ const EXTRACTION_JS: &str = include_str!("extraction_script.js");
 pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
    client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
    //dismiss_overlays(client).await?;
    /*if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
        tab.click().await?;
        println!("High importance tab selected");
        sleep(Duration::from_secs(2)).await;
    }*/
    Ok(())
 }
 /*pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
    for _ in 0..10 {
        let removed: bool = client
            .execute(
                r#"(() => {
                    const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
                    if (iframe && iframe.parentNode) {
                        iframe.parentNode.removeChild(iframe);
                        return true;
                    }
                    return false;
                })()"#,
                vec![],
            )
            .await?
            .as_bool()
            .unwrap_or(false);
        if removed { break; }
        sleep(Duration::from_millis(500)).await;
    }
    Ok(())
 }*/
 pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
    let script = format!(
        r#"
--- a/src/economic/storage.rs
+++ b/src/economic/storage.rs
@@ -1,12 +1,18 @@
 // src/economic/storage.rs
 use super::types::*;
 use super::helpers::*;
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use tokio::fs;
 use chrono::{NaiveDate, Datelike};
 use std::collections::HashMap;
 use serde_json;
-pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
+const CHUNK_SIZE: usize = 500; // Process 500 events at a time
-    let dir = std::path::Path::new("data/economic/events");
+const MAX_EVENTS_PER_FILE: usize = 3000;
 pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
    let dir = paths.economic_events_dir();
    let mut chunks = Vec::new();
    if dir.exists() {
@@ -16,83 +22,184 @@ pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
            if path.extension().map(|e| e == "json").unwrap_or(false) {
                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                    if name.starts_with("chunk_") {
-                        if let Some(content) = fs::read_to_string(&path).await.ok() {
+                        // Don't load the events here, just record the chunk info
-                            if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
+                        let start = name[6..16].to_string();
-                                let start = name[6..16].to_string();
+                        let end = name[17..27].to_string();
-                                let end = name[17..27].to_string();
+                        chunks.push(ChunkInfo { 
-                                chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: events.len() });
+                            start_date: start, 
-                            }
+                            end_date: end, 
-                        }
+                            path, 
                            event_count: 0 // We'll count later if needed
                        });
                    }
                }
            }
        }
    }
    chunks.sort_by_key(|c| c.start_date.clone());
    logger::log_info(&format!("Economic Storage: Found {} event chunks", chunks.len())).await;
    Ok(chunks)
 }
-pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMap<String, EconomicEvent>> {
+/// Stream events from a single chunk file
-    let mut map = HashMap::new();
+pub async fn stream_chunk_events(
-    for chunk in chunks {
+    chunk: &ChunkInfo,
-        let content = fs::read_to_string(&chunk.path).await?;
+    callback: impl Fn(EconomicEvent) -> anyhow::Result<()>
-        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
+) -> anyhow::Result<usize> {
-        for e in events {
+    let content = fs::read_to_string(&chunk.path).await?;
-            map.insert(event_key(&e), e);
+    let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
-        }
+    let count = events.len();
    for event in events {
        callback(event)?;
    }
-    Ok(map)
+    
    Ok(count)
 }
-pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
+/// Load events in batches to avoid memory explosion
-    let dir = std::path::Path::new("data/economic/events");
+pub async fn load_events_in_batches(
    chunks: &[ChunkInfo],
    batch_size: usize,
 ) -> anyhow::Result<impl Iterator<Item = (String, EconomicEvent)>> {
    let mut all_events = Vec::new();
    for chunk in chunks {
        logger::log_info(&format!("Loading chunk: {:?}", chunk.path.file_name())).await;
        let content = fs::read_to_string(&chunk.path).await?;
        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
        for e in events {
            all_events.push((event_key(&e), e));
        }
        // If we've accumulated enough, yield them
        if all_events.len() >= batch_size {
            break;
        }
    }
    logger::log_info(&format!("Loaded {} events in batch", all_events.len())).await;
    Ok(all_events.into_iter())
 }
 /// NEW: Build a lightweight index instead of loading all events
 #[derive(Debug, Clone)]
 pub struct EventIndex {
    pub key: String,
    pub identity_key: String,
    pub date: String,
    pub chunk_file: std::path::PathBuf,
 }
 pub async fn build_event_index(chunks: &[ChunkInfo]) -> anyhow::Result<Vec<EventIndex>> {
    let mut index = Vec::new();
    for chunk in chunks {
        logger::log_info(&format!("Indexing chunk: {:?}", chunk.path.file_name())).await;
        let content = fs::read_to_string(&chunk.path).await?;
        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
        for e in events {
            index.push(EventIndex {
                key: event_key(&e),
                identity_key: identity_key(&e),
                date: e.date.clone(),
                chunk_file: chunk.path.clone(),
            });
        }
    }
    logger::log_info(&format!("Built index with {} entries", index.len())).await;
    Ok(index)
 }
 /// NEW: Look up a specific event by loading only its chunk
 pub async fn lookup_event_by_key(key: &str, index: &[EventIndex]) -> anyhow::Result<Option<EconomicEvent>> {
    // Find which chunk contains this event
    let entry = index.iter().find(|e| e.key == key);
    if let Some(entry) = entry {
        // Load only that chunk
        let content = fs::read_to_string(&entry.chunk_file).await?;
        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
        // Find the specific event
        Ok(events.into_iter().find(|e| event_key(e) == key))
    } else {
        Ok(None)
    }
 }
 /// Save events in smaller, more manageable chunks
 pub async fn save_optimized_chunks(
    paths: &DataPaths, 
    events: Vec<EconomicEvent> // Changed from HashMap to Vec
 ) -> anyhow::Result<()> {
    let dir = paths.economic_events_dir();
    fs::create_dir_all(dir).await?;
-    // Delete all old chunk files to prevent duplicates and overlaps
+    logger::log_info("Economic Storage: Removing old chunk files...").await;
    println!("Removing old chunks...");
    let mut entries = fs::read_dir(dir).await?;
    let mut removed_count = 0;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
            if name.starts_with("chunk_") && path.extension().map(|e| e == "json").unwrap_or(false) {
                fs::remove_file(&path).await?;
                removed_count += 1;
            }
        }
    }
    logger::log_info(&format!("Economic Storage: Removed {} old chunk files", removed_count)).await;
-    let mut sorted: Vec<_> = events.into_values().collect();
+    let mut sorted = events;
-    sorted.sort_by_key(|e| e.date.clone());
+    sorted.sort_by(|a, b| a.date.cmp(&b.date));
-    let mut chunk: Vec<EconomicEvent> = Vec::new();
+    // Save in smaller chunks
-    const MAX_EVENTS_PER_CHUNK: usize = ( 30000 / 2 ) / 11;  // (30000 - 2) / 11 = 2727
+    let mut chunk_num = 0;
    for chunk in sorted.chunks(MAX_EVENTS_PER_FILE) {
        save_chunk_vec(chunk, dir, chunk_num).await?;
        chunk_num += 1;
-    for e in sorted {
+        // Allow other tasks to run
-        if !chunk.is_empty() && chunk.len() >= MAX_EVENTS_PER_CHUNK {
+        tokio::task::yield_now().await;
            save_chunk(&chunk, dir).await?;
            chunk.clear();
        }
        chunk.push(e);
    }
    if !chunk.is_empty() {
        save_chunk(&chunk, dir).await?;
    }
    logger::log_info(&format!("Economic Storage: Saved {} chunks to {:?}", chunk_num, dir)).await;
    Ok(())
 }
-async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::Result<()> {
+async fn save_chunk_vec(events: &[EconomicEvent], dir: &std::path::Path, chunk_num: usize) -> anyhow::Result<()> {
-    let start = events.iter().map(|e| &e.date).min().unwrap().clone();
+    if events.is_empty() {
-    let end = events.iter().map(|e| &e.date).max().unwrap().clone();
+        return Ok(());
-    let path = dir.join(format!("chunk_{}_{}.json", start, end));
+    }
-    fs::write(&path, serde_json::to_string_pretty(events)?).await?;
+    
    let start = &events[0].date;
    let end = &events[events.len() - 1].date;
    let path = dir.join(format!("chunk_{:04}_{}_{}.json", chunk_num, start, end));
    // Write incrementally to avoid large memory allocation
    let json = serde_json::to_string_pretty(events)?;
    fs::write(&path, json).await?;
    logger::log_info(&format!("Economic Storage: Saved chunk {} - {} ({} events)", start, end, events.len())).await;
    Ok(())
 }
-pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
+pub async fn save_changes(paths: &DataPaths, changes: &[EventChange]) -> anyhow::Result<()> {
-    if changes.is_empty() { return Ok(()); }
+    if changes.is_empty() { 
-    let dir = std::path::Path::new("economic_event_changes");
+        logger::log_info("Economic Storage: No changes to save").await;
        return Ok(()); 
    }
    let dir = paths.economic_changes_dir();
    fs::create_dir_all(dir).await?;
    logger::log_info(&format!("Economic Storage: Saving {} changes to {:?}", changes.len(), dir)).await;
    let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
    for c in changes {
        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
@@ -107,8 +214,10 @@ pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
            let s = fs::read_to_string(&path).await?;
            serde_json::from_str(&s).unwrap_or_default()
        } else { vec![] };
-        all.extend(list);
+        all.extend(list.clone());
        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
        logger::log_info(&format!("Economic Storage: Saved {} changes for month {}", list.len(), month)).await;
    }
    logger::log_info("Economic Storage: All changes saved successfully").await;
    Ok(())
 }
--- a/src/economic/update.rs
+++ b/src/economic/update.rs
@@ -1,70 +1,137 @@
 // src/economic/update.rs
 use super::{scraper::*, storage::*, helpers::*, types::*};
-use crate::{config::Config, scraper::webdriver::ScrapeTask};
+use crate::{config::Config, scraper::webdriver::{ScrapeTask, ChromeDriverPool}, util::directories::DataPaths, util::logger};
 use crate::scraper::webdriver::ChromeDriverPool;
 use chrono::{Local};
 use std::sync::Arc;
 use std::collections::HashMap;
-/// Runs the full update for economic data, using the provided ChromeDriver pool.
+/// Runs the full update for economic data using streaming to minimize memory usage
 ///
 /// # Arguments
 /// * `config` - The application configuration.
 /// * `pool` - Shared pool of ChromeDriver instances for scraping.
 ///
 /// # Errors
 /// Returns an error if scraping, loading, or saving fails.
 pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
    let paths = DataPaths::new(".")?;
    logger::log_info("Economic Update: Initializing...").await;
    let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
    let end_date = config.target_end_date();
-    let chunks = scan_existing_chunks().await?;
+    // Step 1: Build lightweight index instead of loading all events
-    let mut events = load_existing_events(&chunks).await?;
+    logger::log_info("Economic Update: Building event index...").await;
-    println!("Loaded {} events from {} chunks", events.len(), chunks.len());
+    let chunks = scan_existing_chunks(&paths).await?;
    let event_index = build_event_index(&chunks).await?;
-    let start_date = if events.is_empty() {
+    logger::log_info(&format!("Economic Update: Indexed {} events from {} chunks", 
        event_index.len(), chunks.len())).await;
    // Step 2: Determine start date
    let start_date = if event_index.is_empty() {
        logger::log_warn("Economic Update: No existing events found, starting from config date").await;
        config.economic_start_date.clone()
    } else if events.values().any(|e| e.date >= today_str) {
        today_str.clone()
    } else {
-        events.values()
+        // Find the latest date in the index
-            .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
+        let max_date = event_index.iter()
            .map(|e| &e.date)
            .max()
-            .and_then(|d| d.succ_opt())
+            .cloned()
-            .map(|d| d.format("%Y-%m-%d").to_string())
+            .unwrap_or(today_str.clone());
-            .unwrap_or(today_str.clone())
+        
        if max_date >= today_str {
            logger::log_info("Economic Update: Events exist for today, starting from today").await;
            today_str.clone()
        } else {
            let next = chrono::NaiveDate::parse_from_str(&max_date, "%Y-%m-%d")
                .ok()
                .and_then(|d| d.succ_opt())
                .map(|d| d.format("%Y-%m-%d").to_string())
                .unwrap_or(today_str.clone());
            logger::log_info(&format!("Economic Update: Resuming from: {}", next)).await;
            next
        }
    };
-    println!("Scraping economic events: {} → {}", start_date, end_date);
+    logger::log_info(&format!("Economic Update: Scraping events from {} → {}", start_date, end_date)).await;
-    // Pass the pool to the scraping function
+    // Step 3: Scrape new events in batches
-    let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
+    let new_events = scrape_all_economic_events(&start_date, &end_date, pool).await?;
-    // Process all at once or in batches
+    logger::log_info(&format!("Economic Update: Scraped {} new events", new_events.len())).await;
    let result = process_batch(&new_events_all, &mut events, &today_str);
    let total_changes = result.changes.len();
    save_changes(&result.changes).await?;
-    save_optimized_chunks(events).await?;
+    // Step 4: Process events in streaming fashion
-    println!("Economic update complete — {} changes detected", total_changes);
+    let (changes, updated_events) = process_events_streaming(&chunks, &new_events, &today_str).await?;
    logger::log_info(&format!("Economic Update: Detected {} changes", changes.len())).await;
    if !changes.is_empty() {
        logger::log_info(&format!("Economic Update: Saving {} changes to log", changes.len())).await;
        save_changes(&paths, &changes).await?;
        logger::log_info("Economic Update: Changes saved successfully").await;
    }
    // Step 5: Save consolidated events
    logger::log_info(&format!("Economic Update: Saving {} total events to chunks", updated_events.len())).await;
    save_optimized_chunks(&paths, updated_events).await?;
    logger::log_info(&format!("✓ Economic update complete — {} changes detected", changes.len())).await;
    Ok(())
 }
-/// Scrapes all economic events from start to end date using a dedicated ScrapeTask with the provided pool.
+/// Process events using streaming to minimize memory usage
-///
+async fn process_events_streaming(
-/// This function creates a ScrapeTask to navigate to the Finanzen.net page, prepare it,
+    chunks: &[ChunkInfo],
-/// and then loop through date ranges to extract events.
+    new_events: &[EconomicEvent],
-///
+    today: &str,
-/// # Arguments
+) -> anyhow::Result<(Vec<EventChange>, Vec<EconomicEvent>)> {
-/// * `start` - Start date in YYYY-MM-DD.
+    let mut all_changes = Vec::new();
-/// * `end` - End date in YYYY-MM-DD.
+    let mut final_events: HashMap<String, EconomicEvent> = HashMap::new();
-/// * `pool` - Shared pool of ChromeDriver instances.
+    
-///
+    // Step 1: Load existing events in batches
-/// # Returns
+    logger::log_info("Processing existing events in batches...").await;
-/// A vector of all extracted EconomicEvent structs.
+    
-///
+    for chunk in chunks {
-/// # Errors
+        logger::log_info(&format!("Loading chunk: {:?}", chunk.path.file_name())).await;
-/// Returns an error if task execution fails or extraction issues occur.
+        
-pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<Vec<EconomicEvent>> {
+        let content = tokio::fs::read_to_string(&chunk.path).await?;
        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
        // Add to final events map
        for e in events {
            final_events.insert(event_key(&e), e);
        }
        // Clear memory periodically
        if final_events.len() > 10000 {
            logger::log_info(&format!("Loaded {} events so far...", final_events.len())).await;
        }
    }
    logger::log_info(&format!("Loaded {} existing events total", final_events.len())).await;
    // Step 2: Process new events in batches
    logger::log_info("Processing new events...").await;
    for (idx, batch) in new_events.chunks(500).enumerate() {
        logger::log_info(&format!("Processing batch {} ({} events)", idx + 1, batch.len())).await;
        let batch_result = process_batch(batch, &mut final_events, today);
        all_changes.extend(batch_result.changes);
        // Yield to prevent blocking
        tokio::task::yield_now().await;
    }
    logger::log_info(&format!("Processing complete. Total events: {}", final_events.len())).await;
    // Convert HashMap to Vec for saving
    let events_vec: Vec<EconomicEvent> = final_events.into_values().collect();
    Ok((all_changes, events_vec))
 }
 /// Scrapes all economic events from start to end date
 pub async fn scrape_all_economic_events(
    start: &str, 
    end: &str, 
    pool: &Arc<ChromeDriverPool>
 ) -> anyhow::Result<Vec<EconomicEvent>> {
    let url = "https://www.finanzen.net/termine/wirtschaftsdaten/".to_string();
    let start_clone = start.to_string();
    let end_clone = end.to_string();
@@ -78,9 +145,18 @@ pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<Chrom
            set_date_range(&client, &current, &end_clone).await?;
            tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
            let new_events = extract_events(&client).await?;
-            if new_events.is_empty() { break; }
+            
            if new_events.is_empty() { 
                break; 
            }
            all_events.extend(new_events.clone());
            // Prevent memory buildup - process in chunks if too large
            if all_events.len() > 5000 {
                logger::log_info(&format!("Scraped {} events so far, continuing...", all_events.len())).await;
            }
            let next = new_events.iter()
                .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
                .max()
@@ -91,16 +167,17 @@ pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<Chrom
            if next > end_clone { break; }
            current = next;
        }
        Ok(all_events)
    });
    // Use the pool for execution
    task.execute_with_pool(pool).await
 }
 /// Process a batch of events and detect changes
 pub fn process_batch(
    new_events: &[EconomicEvent],
-    existing: &mut std::collections::HashMap<String, EconomicEvent>,
+    existing: &mut HashMap<String, EconomicEvent>,
    today: &str,
 ) -> ScrapeResult {
    let mut changes = Vec::new();
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1,15 @@
 // src/lib.rs
 //! Event Backtest Engine - Core Library
 //!
 //! Exposes all public modules for use in examples and tests
 pub mod config;
 pub mod scraper;
 pub mod util;
 // Re-export commonly used types for convenience
 pub use config::Config;
 pub use scraper::webdriver::{ChromeDriverPool, ChromeInstance, ScrapeTask};
 pub use util::directories::DataPaths;
 pub use util::logger;
 pub use util::opnv;
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,43 +1,169 @@
 // src/main.rs
-mod economic;
+
 mod corporate;
 mod config;
 mod corporate;
 mod economic;
 mod util;
 mod scraper;
 use anyhow::Result;
 use config::Config;
 use scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers};
 use scraper::webdriver::ChromeDriverPool;
 use util::directories::DataPaths;
 use util::{logger, opnv};
 use std::sync::Arc;
-/// The entry point of the application.
+/// Application entry point
-///
+// src/main.rs
-/// This function loads the configuration, initializes a shared ChromeDriver pool,
+
-/// and sequentially runs the full updates for corporate and economic data.
+// ... existing imports ...
-/// Sequential execution helps prevent resource exhaustion from concurrent
+
 /// chromedriver instances and avoids spamming the target websites with too many requests.
 ///
 /// # Errors
 ///
 /// Returns an error if configuration loading fails, pool initialization fails,
 /// or if either update function encounters an issue (e.g., network errors,
 /// scraping failures, or chromedriver spawn failures like "program not found").
 #[tokio::main]
 async fn main() -> Result<()> {
    cleanup_all_proxy_containers().await.ok();
    // Load configuration from .env
    let config = Config::load().map_err(|err| {
-        println!("Failed to load Config .env: {}", err);
+        eprintln!("Failed to load config: {}", err);
        err
    })?;
-    // Initialize the shared ChromeDriver pool once
+    // Initialize paths and logger
-    let pool_size = config.max_parallel_tasks;
+    let paths = DataPaths::new(".")?;
-    let pool = Arc::new(ChromeDriverPool::new(pool_size).await?);
+    logger::init_debug_logger(paths.logs_dir()).await.ok();
    logger::log_info("=== Event Backtest Engine Started ===").await;
    logger::log_info(&format!(
        "Config → parallel_instances: {}, task_limit: {} vpn_rotation: {}",
        config.max_parallel_instances,
        config.max_tasks_per_instance,
        config.enable_vpn_rotation
    )).await;
-    // Run economic update first, passing the shared pool
+    // === Step 1: Fetch fresh VPNBook credentials and .ovpn files (if rotation enabled) ===
    let proxy_pool: Option<Arc<DockerVpnProxyPool>> = if config.enable_vpn_rotation {
        logger::log_info("VPN Rotation Enabled — Fetching latest VPNBook configs").await;
        // We only need 1 Chrome instance to scrape vpnbook.com (no proxy yet)
        let temp_pool = Arc::new(ChromeDriverPool::new(1).await?);
        let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?;
        logger::log_info(&format!("VPNBook credentials → User: {}", username)).await;
        // Count how many distinct servers (subfolders) we have in cache/openvpn/
        let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
            .filter(|e| e.as_ref().unwrap().path().is_dir())
            .count();
        if server_count == 0 {
            logger::log_warn("No VPN servers found — continuing without VPN").await;
            None
        } else {
            logger::log_info(&format!("Found {} VPN servers — starting Docker proxy containers", server_count)).await;
            let pp = Arc::new(
                DockerVpnProxyPool::new(paths.cache_openvpn_dir(), username, password).await?
            );
            // Verify all proxies are working before proceeding
            logger::log_info("Verifying all proxy connections...").await;
            let mut all_working = true;
            for i in 0..pp.num_proxies() {
                match pp.test_proxy_connection(i).await {
                    Ok(ip) => {
                        logger::log_info(&format!("  Proxy {}: working with IP: {}", i + 1, ip)).await;
                    }
                    Err(e) => {
                        logger::log_error(&format!("  Proxy {}: FAILED - {}", i + 1, e)).await;
                        all_working = false;
                    }
                }
            }
            if !all_working {
                logger::log_warn("Some proxies failed, but continuing with working ones...").await;
            } else {
                logger::log_info("All proxies verified and ready!").await;
            }
            logger::log_info(&format!("All {} Docker proxy containers started and ready", pp.num_proxies())).await;
            Some(pp)
        }
    } else {
        logger::log_info("VPN rotation disabled — using direct connection").await;
        None
    };
    // === Step 2: Initialize the main ChromeDriver pool (with proxy if enabled) ===
    let pool_size = config.max_parallel_instances;
    let task_limit = config.max_tasks_per_instance;
    logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", pool_size)).await;
    let pool = Arc::new(
        if task_limit > 0 {
            ChromeDriverPool::new_with_proxy_and_task_limit(pool_size, proxy_pool.clone(), task_limit).await?
        } else {
            ChromeDriverPool::new_with_proxy(pool_size, proxy_pool.clone()).await?
        }
    );
    logger::log_info(&format!("ChromeDriver pool ready with {} instances", pool_size)).await;
    // === Step 3: Graceful Ctrl+C handler ===
    {
        let pool_clone = Arc::clone(&pool);
        let proxy_clone = proxy_pool.clone();
        tokio::spawn(async move {
            tokio::signal::ctrl_c().await.ok();
            logger::log_info("Ctrl+C received — shutting down gracefully...").await;
            // Now works: &*pool_clone derefs Arc → &ChromeDriverPool
            if let Err(e) = (&*pool_clone).shutdown().await {
                logger::log_error(&format!("Error during pool shutdown: {}", e)).await;
            }
            if let Some(pp) = proxy_clone {
                if let Err(e) = pp.shutdown().await {
                    logger::log_warn(&format!("Failed to stop Docker containers: {}", e)).await;
                } else {
                    logger::log_info("All Docker VPN containers stopped").await;
                }
            }
            let _ = cleanup_all_proxy_containers().await;
            std::process::exit(0);
        });
    }
    // === Step 4: Run the actual scraping jobs ===
    logger::log_info("--- Starting ECONOMIC data update ---").await;
    economic::run_full_update(&config, &pool).await?;
    logger::log_info("Economic update completed").await;
-    // Then run corporate update, passing the shared pool
+    logger::log_info("--- Starting CORPORATE data update ---").await;
    corporate::run_full_update(&config, &pool).await?;
    logger::log_info("Corporate update completed").await;
    // === Step 5: Final cleanup ===
    logger::log_info("Shutting down ChromeDriver pool...").await;
    pool.shutdown().await?;
    if let Some(pp) = proxy_pool {
        logger::log_info("Stopping Docker VPN proxy containers...").await;
        pp.shutdown().await?;
        // CLEANUP ANY LEFTOVER CONTAINERS FROM PREVIOUS RUNS
        cleanup_all_proxy_containers().await.ok();
    }
    logger::log_info("=== Application finished successfully ===").await;
    Ok(())
 }
 /* 
 memory allocation of 4294967296 bytes failed
 error: process didn't exit successfully: `target\debug\event_backtest_engine.exe` (exit code: 0xc0000409, STATUS_STACK_BUFFER_OVERRUN)
 */
--- a/src/scraper/docker_vpn_proxy.rs
+++ b/src/scraper/docker_vpn_proxy.rs
@@ -0,0 +1,407 @@
 use anyhow::{anyhow, Context, Result};
 use futures::future::join_all;
 use std::{path::{Path, PathBuf}, time::Duration};
 use tokio::{process::Command, time::{sleep}};
 use walkdir::WalkDir;
 pub struct DockerVpnProxyPool {
    container_names: Vec<String>,
    proxy_ports: Vec<u16>,  // e.g., [10801, 10802, ...]
 }
 impl DockerVpnProxyPool {
    pub async fn new(ovpn_dir: &Path, username: String, password: String) -> Result<Self> {
        // Count hostnames (subdirs in ovpn_dir)
        let hostnames: Vec<_> = std::fs::read_dir(ovpn_dir)?
            .filter_map(Result::ok)
            .filter(|e| e.path().is_dir())
            .map(|e| e.file_name().into_string().unwrap())
            .collect();
        let num_servers = hostnames.len();
        if num_servers == 0 {
            return Err(anyhow!("No VPN hostnames found in {:?}", ovpn_dir));
        }
        crate::util::logger::log_info(&format!("Found {} VPN hostnames", num_servers)).await;
        let mut container_names = Vec::with_capacity(num_servers);
        let mut proxy_ports = Vec::with_capacity(num_servers);
        let base_port: u16 = 10800;
        // === STEP 1: Start ALL containers first ===
        for (i, hostname) in hostnames.iter().enumerate() {
            // Pick tcp443.ovpn if exists, else first .ovpn
            let hostname_dir = ovpn_dir.join(hostname);
            let mut ovpn_path: Option<PathBuf> = None;
            for entry in WalkDir::new(&hostname_dir).max_depth(1) {
                let entry = entry?;
                if entry.path().extension().map_or(false, |ext| ext == "ovpn") {
                    if entry.file_name().to_str().unwrap_or("").contains("tcp443") {
                        ovpn_path = Some(entry.path().to_path_buf());
                        break;
                    } else if ovpn_path.is_none() {
                        ovpn_path = Some(entry.path().to_path_buf());
                    }
                }
            }
            let ovpn_path = ovpn_path.ok_or_else(|| anyhow!("No .ovpn found for {}", hostname))?;
            let name = format!("vpn-proxy-{}", i);
            let port = base_port + i as u16 + 1;
            // Clean up any existing container with the same name
            let _ = Command::new("docker")
                .args(["rm", "-f", &name])
                .status()
                .await;
            // Run Docker container
            let status = Command::new("docker")
                .args([
                    "run", "-d",
                    "--name", &name,
                    "--cap-add=NET_ADMIN",
                    "--device", "/dev/net/tun",
                    "--sysctl", "net.ipv4.ip_forward=1",
                    "-v", &format!("{}:/vpn/config.ovpn", ovpn_path.display()),
                    "-e", &format!("VPN_USERNAME={}", username),
                    "-e", &format!("VPN_PASSWORD={}", password),
                    "-p", &format!("{}:1080", port),
                    "rust-vpn-proxy",
                ])
                .status()
                .await
                .context("Failed to run Docker")?;
            if !status.success() {
                return Err(anyhow!("Docker run failed for {}", name));
            }
            crate::util::logger::log_info(&format!("Started container {} on port {} (waiting for VPN...)", name, port)).await;
            container_names.push(name);
            proxy_ports.push(port);
        }
        // Brief pause to let containers start
        sleep(Duration::from_secs(8)).await;
        crate::util::logger::log_info(&format!("All {} containers started, beginning health checks...", container_names.len())).await;
        // === STEP 2: Test ALL proxies in parallel with 10-second intervals ===
        let results = Self::test_all_proxies_parallel(&container_names, &proxy_ports).await;
        // Filter out failed containers
        let mut working_containers = Vec::new();
        let mut working_ports = Vec::new();
        let mut failed_count = 0;
        for (i, (container_name, port)) in container_names.into_iter().zip(proxy_ports.into_iter()).enumerate() {
            match &results[i] {
                Ok(Some(ip)) => {
                    crate::util::logger::log_info(&format!("✓ Container {} on port {} ready with IP: {}", 
                        container_name, port, ip)).await;
                    working_containers.push(container_name);
                    working_ports.push(port);
                }
                Ok(None) => {
                    crate::util::logger::log_warn(&format!("✓ Container {} on port {} ready but IP detection failed", 
                        container_name, port)).await;
                    working_containers.push(container_name);
                    working_ports.push(port);
                }
                Err(e) => {
                    // Get container logs to debug
                    let logs = Command::new("docker")
                        .args(["logs", "--tail", "20", &container_name])
                        .output()
                        .await
                        .ok()
                        .and_then(|output| String::from_utf8_lossy(&output.stdout).to_string().into());
                    crate::util::logger::log_error(&format!("✗ Container {} on port {} failed: {}. Logs: {:?}", 
                        container_name, port, e, logs)).await;
                    failed_count += 1;
                    // Clean up failed container
                    let _ = Self::cleanup_container(&container_name).await;
                }
            }
        }
        if working_containers.is_empty() {
            return Err(anyhow!("All {} VPN proxy containers failed to start", num_servers));
        }
        crate::util::logger::log_info(&format!("Started {}/{} VPN proxy containers successfully", 
            working_containers.len(), num_servers)).await;
        if failed_count > 0 {
            crate::util::logger::log_warn(&format!("{} containers failed and were cleaned up", failed_count)).await;
        }
        Ok(Self {
            container_names: working_containers,
            proxy_ports: working_ports,
        })
    }
    /// Test all proxies in parallel with 10-second intervals between tests
    async fn test_all_proxies_parallel(container_names: &[String], proxy_ports: &[u16]) -> Vec<Result<Option<String>>> {
        let mut tasks = Vec::new();
        for (_i, (container_name, port)) in container_names.iter().zip(proxy_ports.iter()).enumerate() {
            let name = container_name.clone();
            let port = *port;
            tasks.push(tokio::spawn(async move {
                // Try up to 6 times with 10-second intervals (total 60 seconds)
                for attempt in 1..=6 {
                    crate::util::logger::log_info(&format!("Testing proxy {} (port {}) - Attempt {}/6", 
                        name, port, attempt)).await;
                    match Self::test_single_proxy(port).await {
                        Ok(Some(ip)) => {
                            return Ok(Some(ip));
                        }
                        Ok(None) => {
                            // Connection works but IP detection failed
                            return Ok(None);
                        }
                        Err(e) if attempt < 6 => {
                            crate::util::logger::log_info(&format!("Attempt {}/6 for {}: {} - retrying in 10s", 
                                attempt, name, e)).await;
                            sleep(Duration::from_secs(10)).await;
                        }
                        Err(e) => {
                            return Err(anyhow!("Failed after 6 attempts: {}", e));
                        }
                    }
                }
                Err(anyhow!("Unexpected exit from retry loop"))
            }));
        }
        // Wait for all tasks to complete
        join_all(tasks)
            .await
            .into_iter()
            .map(|result| match result {
                Ok(inner) => inner,
                Err(e) => Err(anyhow!("Task panicked: {}", e)),
            })
            .collect()
    }
    /// Test a single proxy connection
    async fn test_single_proxy(port: u16) -> Result<Option<String>> {
        use std::io::{Read, Write};
        use std::net::TcpStream;
        use std::time::Duration as StdDuration;
        // First, test SOCKS5 handshake directly
        crate::util::logger::log_info(&format!("Testing SOCKS5 handshake on port {}...", port)).await;
        // Use spawn_blocking for synchronous I/O
        let test_result = tokio::task::spawn_blocking(move || {
            // Connect to SOCKS5 proxy
            let mut stream = match TcpStream::connect_timeout(
                &format!("127.0.0.1:{}", port).parse().unwrap(),
                StdDuration::from_secs(5)
            ) {
                Ok(stream) => stream,
                Err(e) => return Err(anyhow!("Failed to connect: {}", e)),
            };
            // Send SOCKS5 greeting: version 5, 1 method (no auth)
            let greeting: [u8; 3] = [0x05, 0x01, 0x00]; // SOCKS5, 1 method, no auth
            if let Err(e) = stream.write_all(&greeting) {
                return Err(anyhow!("Failed to send greeting: {}", e));
            }
            // Read response
            let mut response = [0u8; 2];
            if let Err(e) = stream.read_exact(&mut response) {
                return Err(anyhow!("Failed to read response: {}", e));
            }
            // Check response: should be [0x05, 0x00] for no auth required
            if response[0] != 0x05 || response[1] != 0x00 {
                return Err(anyhow!("Unexpected SOCKS5 response: {:?}", response));
            }
            Ok(())
        }).await;
        match test_result {
            Ok(Ok(())) => {
                crate::util::logger::log_info(&format!("✓ SOCKS5 proxy on port {} accepts connections", port)).await;
                // Try to get IP through proxy using curl (fallback method)
                let curl_result = tokio::process::Command::new("curl")
                    .args([
                        "-s",
                        "--socks5", &format!("localhost:{}", port),
                        "--max-time", "10",
                        "https://checkip.amazonaws.com"
                    ])
                    .output()
                    .await;
                match curl_result {
                    Ok(output) if output.status.success() => {
                        let ip = String::from_utf8_lossy(&output.stdout).trim().to_string();
                        if Self::is_valid_ip(&ip) {
                            crate::util::logger::log_info(&format!("✓ Got IP via proxy: {}", ip)).await;
                            return Ok(Some(ip));
                        } else {
                            crate::util::logger::log_info(&format!("✓ Proxy works, invalid IP format: {}", ip)).await;
                            return Ok(None);
                        }
                    }
                    _ => {
                        // Proxy accepts connections but curl failed - still acceptable
                        crate::util::logger::log_info(&format!("✓ Proxy accepts connections (curl test failed)")).await;
                        return Ok(None);
                    }
                }
            }
            Ok(Err(e)) => {
                return Err(anyhow!("SOCKS5 test failed: {}", e));
            }
            Err(e) => {
                return Err(anyhow!("Task failed: {}", e));
            }
        }
    }
    /// Clean up a failed container
    async fn cleanup_container(container_name: &str) -> Result<()> {
        let _ = Command::new("docker")
            .args(["stop", container_name])
            .status()
            .await;
        let _ = Command::new("docker")
            .args(["rm", container_name])
            .status()
            .await;
        Ok(())
    }
    fn is_valid_ip(ip: &str) -> bool {
        let parts: Vec<&str> = ip.split('.').collect();
        if parts.len() != 4 {
            return false;
        }
        for part in parts {
            if let Ok(num) = part.parse::<u8>() {
                if part != num.to_string() {
                    return false;
                }
            } else {
                return false;
            }
        }
        true
    }
    /// Test if a specific proxy is working
    pub async fn test_proxy_connection(&self, index: usize) -> Result<String> {
        let port = self.proxy_ports[index];
        let proxy_url = format!("socks5://localhost:{}", port);
        let client = reqwest::Client::builder()
            .proxy(reqwest::Proxy::all(&proxy_url)?)
            .timeout(Duration::from_secs(10))
            .build()?;
        let response = client.get("http://checkip.amazonaws.com")
            .send()
            .await?
            .text()
            .await?;
        Ok(response.trim().to_string())
    }
    pub fn get_proxy_url(&self, index: usize) -> String {
        let port = self.proxy_ports[index % self.proxy_ports.len()];
        format!("socks5://localhost:{}", port)
    }
    pub fn num_proxies(&self) -> usize {
        self.proxy_ports.len()
    }
    pub async fn shutdown(&self) -> Result<()> {
        crate::util::logger::log_info(&format!("Shutting down {} Docker proxy containers...", 
            self.container_names.len())).await;
        for name in &self.container_names {
            let _ = Command::new("docker")
                .args(["stop", name])
                .status()
                .await;
            let _ = Command::new("docker")
                .args(["rm", name])
                .status()
                .await;
        }
        Ok(())
    }
 }
 pub async fn cleanup_all_proxy_containers() -> Result<()> {
    // Step 1: List all container IDs that match our pattern
    let output = Command::new("docker")
        .args(["ps", "-a", "--format", "{{.ID}} {{.Names}} {{.Image}}"])
        .output()
        .await?;
    let stdout = String::from_utf8_lossy(&output.stdout);
    let mut containers_to_kill = Vec::new();
    for line in stdout.lines() {
        let parts: Vec<&str> = line.split_whitespace().collect();
        if parts.len() >= 2 {
            let name_or_id = parts[0];
            let name = parts[1];
            let image = if parts.len() >= 3 { parts[2] } else { "" };
            // Match by name prefix OR by image name
            if name.starts_with("vpn-proxy-") || image.contains("rust-vpn-proxy") {
                containers_to_kill.push(name_or_id.to_string());
            }
        }
    }
    if containers_to_kill.is_empty() {
        crate::util::logger::log_info("No old rust-vpn-proxy containers found").await;
        return Ok(());
    }
    // Step 2: Kill and remove them all at once
    let status = Command::new("docker")
        .arg("rm")
        .arg("-f")
        .args(&containers_to_kill)
        .status()
        .await?;
    if status.success() {
        crate::util::logger::log_info(&format!(
            "Successfully removed {} old rust-vpn-proxy container(s)",
            containers_to_kill.len()
        ))
        .await;
    } else {
        crate::util::logger::log_warn("Some containers may still remain (non-critical)").await;
    }
    Ok(())
 }
--- a/src/scraper/mod.rs
+++ b/src/scraper/mod.rs
@@ -1 +1,2 @@
 pub mod webdriver;
 pub mod docker_vpn_proxy;
--- a/src/scraper/webdriver.rs
+++ b/src/scraper/webdriver.rs
@@ -3,212 +3,269 @@
 use anyhow::{anyhow, Context, Result};
 use fantoccini::{Client, ClientBuilder};
 use serde_json::{Map, Value};
 use std::pin::Pin;
 use std::process::Stdio;
 use std::sync::Arc;
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::{Child, Command};
 use tokio::task::JoinHandle;
 use tokio::sync::{Mutex, Semaphore};
-use tokio::time::{Duration, sleep, timeout};
+use tokio::time::{sleep, timeout, Duration};
-use std::pin::Pin;
+use crate::scraper::docker_vpn_proxy::{DockerVpnProxyPool};
-/// Manages a pool of ChromeDriver instances for parallel scraping.
+/// Manages a pool of ChromeDriver instances for parallel scraping with optional VPN binding.
 /// 
 /// This struct maintains multiple ChromeDriver processes and allows controlled
 /// concurrent access via a semaphore. Instances are reused across tasks to avoid
 /// the overhead of spawning new processes.
 pub struct ChromeDriverPool {
    instances: Vec<Arc<Mutex<ChromeInstance>>>,
    semaphore: Arc<Semaphore>,
    /// Optional Docker-based proxy pool (one proxy per Chrome instance)
    proxy_pool: Option<Arc<DockerVpnProxyPool>>,
 }
 impl ChromeDriverPool {
-    /// Creates a new pool with the specified number of ChromeDriver instances.
+/// Creates a new pool without any proxy (direct connection).
    /// 
    /// # Arguments
    /// * `pool_size` - Number of concurrent ChromeDriver instances to maintain
    pub async fn new(pool_size: usize) -> Result<Self> {
        Self::new_with_proxy_and_task_limit(pool_size, None, 0).await
    }
    /// Creates a new pool with task-per-instance limit but no proxy.
    pub async fn _new_with_task_limit(pool_size: usize, max_tasks_per_instance: usize) -> Result<Self> {
        Self::new_with_proxy_and_task_limit(pool_size, None, max_tasks_per_instance).await
    }
    /// Creates a new pool where each Chrome instance uses a different SOCKS5 proxy from the Docker pool.
    pub async fn new_with_proxy(
        pool_size: usize,
        proxy_pool: Option<Arc<DockerVpnProxyPool>>,
    ) -> Result<Self> {
        Self::new_with_proxy_and_task_limit(pool_size, proxy_pool, 0).await
    }
    /// Full constructor: supports proxy + task limiting.
    pub async fn new_with_proxy_and_task_limit(
        pool_size: usize,
        proxy_pool: Option<Arc<DockerVpnProxyPool>>,
        max_tasks_per_instance: usize,
    ) -> Result<Self> {
        let mut instances = Vec::with_capacity(pool_size);
-        println!("Initializing ChromeDriver pool with {} instances...", pool_size);
+        crate::util::logger::log_info(&format!(
            "Initializing ChromeDriver pool with {} instances{}...",
            pool_size,
            if proxy_pool.is_some() { " (each using a unique Docker SOCKS5 proxy)" } else { "" }
        ))
        .await;
        for i in 0..pool_size {
-            match ChromeInstance::new().await {
+            let proxy_url = proxy_pool
-                Ok(instance) => {
+                .as_ref()
-                    println!("  ✓ Instance {} ready", i + 1);
+                .map(|pp| pp.get_proxy_url(i));
-                    instances.push(Arc::new(Mutex::new(instance)));
+
-                }
+            let instance = ChromeInstance::new(proxy_url, max_tasks_per_instance).await?;
-                Err(e) => {
+
-                    eprintln!("  ✗ Failed to create instance {}: {}", i + 1, e);
+            crate::util::logger::log_info(&format!("  Instance {} ready", i + 1)).await;
-                    // Clean up already created instances
+            instances.push(Arc::new(Mutex::new(instance)));
                    drop(instances);
                    return Err(e);
                }
            }
        }
        Ok(Self {
            instances,
            semaphore: Arc::new(Semaphore::new(pool_size)),
            proxy_pool,
        })
    }
-    /// Executes a scrape task using an available instance from the pool.
+    /// Execute a scraping task using an available instance from the pool.
    pub async fn execute<T, F, Fut>(&self, url: String, parse: F) -> Result<T>
    where
        T: Send + 'static,
        F: FnOnce(Client) -> Fut + Send + 'static,
-        Fut: std::future::Future<Output = Result<T>> + Send + 'static,
+        Fut: std::future::Future<Output = Result<T>> + Send,
    {
-        // Acquire semaphore permit
+        let _permit = self.semaphore.acquire().await.map_err(|_| anyhow!("Pool closed"))?;
        let _permit = self.semaphore.acquire().await
            .map_err(|_| anyhow!("Semaphore closed"))?;
-        // Find an available instance (round-robin or first available)
+        // Round-robin selection
-        let instance = self.instances[0].clone(); // Simple: use first, could be round-robin
+        let index = rand::random_range(..self.instances.len());
        let instance = self.instances[index].clone();
        let mut guard = instance.lock().await;
-        // Create a new session for this task
+        guard.increment_task_count();
        if guard.max_tasks_per_instance > 0 {
            crate::util::logger::log_info(&format!(
                "Instance task count: {}/{}",
                guard.get_task_count(),
                guard.max_tasks_per_instance
            ))
            .await;
        }
        let client = guard.new_session().await?;
-        // Release lock while we do the actual scraping
+        drop(guard); // release lock early
        drop(guard);
-        // Navigate and parse
+        crate::util::logger::log_info(&format!("Scraping {} ...", url)).await;
-        client.goto(&url).await.context("Failed to navigate")?;
+        client.goto(&url).await.context("Navigation failed")?;
-        let result = timeout(Duration::from_secs(60), parse(client))
+
        let result = timeout(Duration::from_secs(90), parse(client))
            .await
-            .context("Parse function timed out after 60s")??;
+            .context("Parse timeout")??;
        Ok(result)
    }
-    pub fn get_number_of_instances (&self) -> usize {
+    /// Gracefully shut down all ChromeDriver processes and Docker proxy containers.
-      self.instances.len()
+    pub async fn shutdown(&self) -> Result<()> {
        for inst in &self.instances {
            let mut guard = inst.lock().await;
            guard.shutdown().await?;
        }
        if let Some(pp) = &self.proxy_pool {
            pp.shutdown().await?;
            crate::util::logger::log_info("All Docker VPN proxy containers stopped").await;
        }
        Ok(())
    }
    pub fn get_number_of_instances(&self) -> usize {
        self.instances.len()
    }
 }
-/// Represents a single instance of chromedriver process.
+/// Represents a single instance of chromedriver process, optionally bound to a VPN.
 pub struct ChromeInstance {
    process: Child,
    base_url: String,
    process: Child,
    stderr_log: Option<JoinHandle<()>>,
    task_count: usize,
    max_tasks_per_instance: usize,
    proxy_url: Option<String>,
 }
 impl ChromeInstance {
-/// Creates a new ChromeInstance by spawning chromedriver with random port.
+    pub async fn new(proxy_url: Option<String>, max_tasks_per_instance: usize) -> Result<Self> {
-    ///
+        let (base_url, process, stderr_handle) = Self::spawn_chromedriver().await?;
-    /// This spawns `chromedriver --port=0` to avoid port conflicts, reads stdout to extract
+
-    /// the listening address, and waits for the success message. If timeout occurs or
+        Ok(Self {
-    /// spawning fails, returns an error with context.
+            base_url,
-    ///
+            process,
-    /// # Errors
+            stderr_log: Some(stderr_handle),
-    ///
+            task_count: 0,
-    /// Returns an error if chromedriver fails to spawn (e.g., not in PATH, version mismatch),
+            max_tasks_per_instance,
-    /// if the process exits early, or if the address/success message isn't found within 30s.
+            proxy_url,
-    pub async fn new() -> Result<Self> {
+        })
-        let mut command = Command::new("chromedriver-win64/chromedriver.exe");
+    }
-        command
+
-            .arg("--port=0")  // Use random available port to support pooling
+    pub async fn new_session(&self) -> Result<Client> {
        ClientBuilder::native()
            .capabilities(self.chrome_args())
            .connect(&self.base_url)
            .await
            .context("Failed to connect to ChromeDriver")
    }
    pub fn increment_task_count(&mut self) {
        self.task_count +=  1;
    }
    pub fn get_task_count(&self) -> usize {
        self.task_count
    }
    pub async fn shutdown(&mut self) -> Result<()> {
        if let Some(handle) = self.stderr_log.take() {
            handle.abort();
            let _ = handle.await;
        }
        let _ = self.process.start_kill();
        let _ = self.process.wait().await;
        Ok(())
    }
    /// Spawns the actual `chromedriver` binary and waits for it to become ready.
    async fn spawn_chromedriver() -> Result<(String, Child, JoinHandle<()>)> {
        let mut process = Command::new("chromedriver-win64/chromedriver.exe")
            .arg("--port=0") // let OS choose free port
            .stdout(Stdio::piped())
-            .stderr(Stdio::piped());
+            .stderr(Stdio::piped())
        let mut process = command
            .spawn()
-            .context("Failed to spawn chromedriver. Ensure it's installed and in PATH.")?;
+            .context("Failed to start chromedriver. Is it in PATH?")?;
-        let mut stdout = BufReader::new(
+        let stdout = process.stdout.take().unwrap();
-            process.stdout.take().context("Failed to capture stdout")?
+        let stderr = process.stderr.take().unwrap();
        ).lines();
-        let mut stderr = BufReader::new(
+        let stdout_reader = BufReader::new(stdout);
-            process.stderr.take().context("Failed to capture stderr")?
+        let mut stdout_lines = stdout_reader.lines();
        ).lines();
-        let start_time = std::time::Instant::now();
+        let stderr_reader = BufReader::new(stderr);
-        let mut address: Option<String> = None;
+        let stderr_handle = tokio::spawn(async move {
-        let mut success = false;
+            let mut lines = stderr_reader.lines();
-
+            while let Ok(Some(line)) = lines.next_line().await {
-        // Log stderr in background for debugging
+                let t = line.trim();
-        tokio::spawn(async move {
+                if !t.is_empty() {
-            while let Ok(Some(line)) = stderr.next_line().await {
+                    let _ = crate::util::logger::log_info(&format!("ChromeDriver: {}", t)).await;
-                eprintln!("ChromeDriver stderr: {}", line);
+                }
            }
        });
-        // Wait for address and success (up to 30s)
+        let start = tokio::time::Instant::now();
-        while start_time.elapsed() < Duration::from_secs(30) {
+        let mut address: Option<String> = None;
-            if let Ok(Ok(Some(line))) =
+
-                timeout(Duration::from_secs(1), stdout.next_line()).await
+        while start.elapsed() < Duration::from_secs(30) {
-            {
+            if let Ok(Ok(Some(line))) = timeout(Duration::from_secs(1), stdout_lines.next_line()).await {
                if let Some(addr) = parse_chromedriver_address(&line) {
-                    address = Some(addr.to_string());
+                    address = Some(addr);
                }
-
+                if line.contains("ChromeDriver was started successfully") && address.is_some() {
-                if line.contains("ChromeDriver was started successfully") {
+                    return Ok((address.unwrap(), process, stderr_handle));
                    success = true;
                }
                if let (Some(addr), true) = (&address, success) {
                    return Ok(Self {
                        process,
                        base_url: addr.clone(),
                    });
                }
            }
            sleep(Duration::from_millis(100)).await;
        }
        // Cleanup on failure
        let _ = process.kill().await;
-        Err(anyhow!("Timeout: ChromeDriver did not start within 30 seconds. Check version match with Chrome browser and system resources."))
+        stderr_handle.abort();
        Err(anyhow!("ChromeDriver failed to start within 30s"))
    }
-    /// Creates a new browser session (client) from this ChromeDriver instance.
+    fn chrome_args(&self) -> Map<String, Value> {
-    /// Each session is independent and can be closed without affecting the driver.
+        let mut args = vec![
-    pub async fn new_session(&self) -> Result<Client> {
+            "--headless=new".to_string(),
-        ClientBuilder::native()
+            "--disable-gpu".to_string(),
-            .capabilities(Self::chrome_args())
+            "--no-sandbox".to_string(),
-            .connect(&self.base_url)
+            "--disable-dev-shm-usage".to_string(),
-            .await
+            "--disable-infobars".to_string(),
-            .context("Failed to create new session")
+            "--disable-extensions".to_string(),
-    }
+            "--disable-popup-blocking".to_string(),
-
+            "--disable-notifications".to_string(),
-    fn chrome_args() -> Map<String, Value> {
+            "--disable-logging".to_string(),
-        let args = serde_json::json!({
+            "--disable-autofill".to_string(),
            "--disable-sync".to_string(),
            "--disable-default-apps".to_string(),
            "--disable-translate".to_string(),
            "--window-size=1920,1080".to_string(),
            "--disable-blink-features=AutomationControlled".to_string(),
            "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36".to_string()
        ];
        if let Some(ref proxy) = self.proxy_url {
            let proxy = proxy.clone();
            let proxy_formatted = format!("--proxy-server={}", proxy);
            args.push(proxy_formatted);
        }
        let caps = serde_json::json!({
            "goog:chromeOptions": {
-                "args": [
+                "args": args,
                    "--headless=new",
                    "--disable-gpu",
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-infobars",
                    "--disable-extensions",
                    "--disable-popup-blocking",
                    "--disable-notifications",
                    "--disable-logging",
                    "--disable-autofill",
                    "--disable-features=TranslateUI,OptimizationGuideModelDownloading",
                    "--window-size=1920,1080",
                    "--disable-blink-features=AutomationControlled",
                    "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                ],
                "excludeSwitches": ["enable-logging", "enable-automation"],
                "useAutomationExtension": false,
                "prefs": {
                    "profile.default_content_setting_values.notifications": 2
                }
            }
        });
-        args.as_object()
+        caps.as_object().cloned().unwrap()
          .expect("Capabilities should be a JSON object")
          .clone()
    }
 }
 /// Parses the ChromeDriver address from a log line.
 ///
 /// Looks for the "Starting ChromeDriver ... on port XXXX" line and extracts the port.
 /// Returns `Some("http://localhost:XXXX")` if found, else `None`.
 fn parse_chromedriver_address(line: &str) -> Option<String> {
    if line.contains("Starting ChromeDriver") {
        if let Some(port_str) = line.split("on port ").nth(1) {
@@ -219,7 +276,6 @@ fn parse_chromedriver_address(line: &str) -> Option<String> {
            }
        }
    }
    // Fallback for other formats (e.g., explicit port mentions)
    for word in line.split_whitespace() {
        if let Ok(port) = word.trim_matches(|c: char| !c.is_numeric()).parse::<u16>() {
            if port > 1024 && port < 65535 && line.to_lowercase().contains("port") {
@@ -232,17 +288,18 @@ fn parse_chromedriver_address(line: &str) -> Option<String> {
 impl Drop for ChromeInstance {
    fn drop(&mut self) {
        // Signal child to terminate. Do NOT block here; shutdown should be
        // performed with the async `shutdown()` method when possible.
        let _ = self.process.start_kill();
        std::thread::sleep(std::time::Duration::from_millis(100));
    }
 }
-/// Simplified task execution - now uses the pool pattern.
+/// Simplified task execution - uses the pool pattern.
 /// 
 /// For backwards compatibility with existing code.
 pub struct ScrapeTask<T> {
    url: String,
-    parse: Box<dyn FnOnce(Client) -> Pin<Box<dyn std::future::Future<Output = Result<T>> + Send>> + Send>,
+    parse: Box<
        dyn FnOnce(Client) -> Pin<Box<dyn std::future::Future<Output = Result<T>> + Send>> + Send,
    >,
 }
 impl<T: Send + 'static> ScrapeTask<T> {
@@ -257,13 +314,11 @@ impl<T: Send + 'static> ScrapeTask<T> {
        }
    }
    /// Executes using a provided pool (more efficient for multiple tasks).
    pub async fn execute_with_pool(self, pool: &ChromeDriverPool) -> Result<T> {
        let url = self.url;
        let parse = self.parse;
-        pool.execute(url, move |client| async move {
+        pool.execute(url, move |client| async move { (parse)(client).await })
-            (parse)(client).await
+            .await
        }).await
    }
 }
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,22 +0,0 @@
 // src/util.rs   (or put it directly in main.rs if you prefer)
 use tokio::fs;
 use std::path::Path;
 /// Create the required data folders if they do not exist yet.
 pub async fn _ensure_data_dirs() -> anyhow::Result<()> {
    let dirs = [
        "economic_events",
        "economic_event_changes",
        "corporate_events",
        "corporate_prices",
        "data",
    ];
    for dir in dirs {
        let path = Path::new(dir);
        if !path.exists() {
            tokio::fs::create_dir_all(path).await?;
            println!("Created directory: {dir}");
        }
    }
    Ok(())
 }
--- a/src/util/directories.rs
+++ b/src/util/directories.rs
@@ -0,0 +1,169 @@
 use std::path::{Path, PathBuf};
 use std::fs;
 /// Central configuration for all data paths
 pub struct DataPaths {
  base_dir: PathBuf,
  data_dir: PathBuf,
  cache_dir: PathBuf,
  logs_dir: PathBuf,
  // Cache data subdirectories
  cache_gleif_dir: PathBuf,
  cache_openfigi_dir: PathBuf,
  cache_gleif_openfigi_map_dir: PathBuf,
  cache_openvpn_dir: PathBuf,
  // Economic data subdirectories
  economic_events_dir: PathBuf,
  economic_changes_dir: PathBuf,
  // Corporate data subdirectories
  corporate_events_dir: PathBuf,
  corporate_changes_dir: PathBuf,
  corporate_prices_dir: PathBuf,
 }
 impl DataPaths {
  /// Initialize paths from a base directory
  pub fn new(base_dir: impl AsRef<Path>) -> std::io::Result<Self> {
    let base_dir = base_dir.as_ref().to_path_buf();
    let data_dir = base_dir.join("data");
    let cache_dir = base_dir.join("cache");
    let logs_dir = base_dir.join("logs");
    // Cache subdirectories
    let cache_gleif_dir = cache_dir.join("gleif");
    let cache_openfigi_dir = cache_dir.join("openfigi");
    let cache_gleif_openfigi_map_dir = cache_dir.join("glei_openfigi");
    let cache_openvpn_dir = cache_dir.join("openvpn");
    // Economic subdirectories
    let economic_events_dir = data_dir.join("economic").join("events");
    let economic_changes_dir = economic_events_dir.join("changes");
    // Corporate subdirectories
    let corporate_dir = data_dir.join("corporate");
    let corporate_events_dir = corporate_dir.join("events");
    let corporate_changes_dir = corporate_events_dir.join("changes");
    let corporate_prices_dir = corporate_dir.join("prices");
    // Create all directories if they don't exist
    fs::create_dir_all(&data_dir)?;
    fs::create_dir_all(&cache_dir)?;
    fs::create_dir_all(&logs_dir)?;
    fs::create_dir_all(&cache_gleif_dir)?;
    fs::create_dir_all(&cache_openfigi_dir)?;
    fs::create_dir_all(&cache_gleif_openfigi_map_dir)?;
    fs::create_dir_all(&cache_openvpn_dir)?;
    fs::create_dir_all(&economic_events_dir)?;
    fs::create_dir_all(&economic_changes_dir)?;
    fs::create_dir_all(&corporate_events_dir)?;
    fs::create_dir_all(&corporate_changes_dir)?;
    fs::create_dir_all(&corporate_prices_dir)?;
    Ok(Self {
      base_dir,
      data_dir,
      cache_dir,
      logs_dir,
      cache_gleif_dir,
      cache_openfigi_dir,
      cache_gleif_openfigi_map_dir,
      cache_openvpn_dir,
      economic_events_dir,
      economic_changes_dir,
      corporate_events_dir,
      corporate_changes_dir,
      corporate_prices_dir,
    })
  }
  pub fn base_dir(&self) -> &Path {
    &self.base_dir
  }
  pub fn data_dir(&self) -> &Path {
    &self.data_dir
  }
  pub fn cache_dir(&self) -> &Path {
    &self.cache_dir
  }
  pub fn logs_dir(&self) -> &Path {
    &self.logs_dir
  }
  pub fn cache_gleif_dir(&self) -> &Path {
    &self.cache_gleif_dir
  }
  pub fn cache_openfigi_dir(&self) -> &Path {
    &self.cache_openfigi_dir
  }
  pub fn cache_gleif_openfigi_map_dir(&self) -> &Path {
    &self.cache_gleif_openfigi_map_dir
  }
  pub fn cache_openvpn_dir(&self) -> &Path {
    &self.cache_openvpn_dir
  }
  /// Get the economic events directory
  pub fn economic_events_dir(&self) -> &Path {
    &self.economic_events_dir
  }
  /// Get the economic changes directory
  pub fn economic_changes_dir(&self) -> &Path {
    &self.economic_changes_dir
  }
  /// Get the corporate events directory
  pub fn corporate_events_dir(&self) -> &Path {
    &self.corporate_events_dir
  }
  /// Get the corporate changes directory
  pub fn corporate_changes_dir(&self) -> &Path {
    &self.corporate_changes_dir
  }
  /// Get the corporate prices directory
  pub fn corporate_prices_dir(&self) -> &Path {
    &self.corporate_prices_dir
  }
  /// Get a specific file path within data directory
  pub fn data_file(&self, filename: &str) -> PathBuf {
    self.data_dir.join(filename)
  }
  /// Get a specific file path within cache directory
  pub fn cache_file(&self, filename: &str) -> PathBuf {
    self.cache_dir.join(filename)
  }
  /// Get a specific file path within logs directory
  pub fn log_file(&self, filename: &str) -> PathBuf {
    self.logs_dir.join(filename)
  }
 }
 #[cfg(test)]
 mod tests {
  use super::*;
  #[test]
  fn test_paths_creation() {
    let paths = DataPaths::new("./test_base").unwrap();
    assert!(paths.data_dir().exists());
    assert!(paths.cache_dir().exists());
    assert!(paths.logs_dir().exists());
    assert!(paths.economic_events_dir().exists());
    assert!(paths.economic_changes_dir().exists());
    assert!(paths.corporate_events_dir().exists());
    assert!(paths.corporate_changes_dir().exists());
    assert!(paths.corporate_prices_dir().exists());
  }
 }
--- a/src/util/logger.rs
+++ b/src/util/logger.rs
@@ -0,0 +1,78 @@
 // src/util/logger.rs
 use chrono::Local;
 use once_cell::sync::Lazy;
 use tokio::sync::Mutex;
 use std::fs::{self, OpenOptions};
 use std::io::Write;
 use std::path::PathBuf;
 static LOGGER: Lazy<Mutex<Option<DebugLogger>>> = Lazy::new(|| Mutex::new(None));
 pub struct DebugLogger {
    file: std::fs::File,
    log_path: PathBuf,
 }
 impl DebugLogger {
    fn new(log_dir: &std::path::Path) -> std::io::Result<Self> {
        fs::create_dir_all(log_dir)?;
        let filename = format!("backtest_{}.log", Local::now().format("%Y%m%d_%H%M%S"));
        let log_path = log_dir.join(&filename);
        let file = OpenOptions::new()
            .create(true)
            .append(true)
            .open(&log_path)?;
        Ok(Self { file, log_path })
    }
    async fn log(&mut self, msg: &str) {
        let line = format!("[{}] {}\n", Local::now().format("%H:%M:%S"), msg);
        let _ = self.file.write_all(line.as_bytes());
        let _ = self.file.flush();
        println!("{}", line.trim_end());
    }
 }
 pub async fn init_debug_logger(log_dir: &std::path::Path) -> Result<(), String> {
    let mut logger = LOGGER.lock().await;
    match DebugLogger::new(log_dir) {
        Ok(l) => {
            let log_path = l.log_path.clone();
            *logger = Some(l);
            println!("✓ Logger initialized at: {:?}", log_path);
            Ok(())
        }
        Err(e) => {
            let err_msg = format!("Failed to initialize logger: {}", e);
            eprintln!("{}", err_msg);
            Err(err_msg)
        }
    }
 }
 pub async fn log_message(msg: &str) {
    let mut logger = LOGGER.lock().await;
    if let Some(l) = logger.as_mut() {
        l.log(msg).await;
    } else {
        println!("[LOG] {}", msg);
    }
 }
 pub async fn log_detailed(level: &str, msg: &str) {
    let formatted = format!("[{}] {}", level, msg);
    log_message(&formatted).await;
 }
 pub async fn log_info(msg: &str) {
    log_detailed("INFO", msg).await;
 }
 pub async fn log_warn(msg: &str) {
    log_detailed("WARN", msg).await;
 }
 pub async fn log_error(msg: &str) {
    log_detailed("ERROR", msg).await;
 }
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -0,0 +1,4 @@
 // src/util/mod.rs
 pub mod logger;
 pub mod directories;
 pub mod opnv;
--- a/src/util/opnv.rs
+++ b/src/util/opnv.rs
@@ -0,0 +1,281 @@
 // src/scraper/opnv.rs
 //! Module for fetching, downloading, and extracting OpenVPN configurations from VPNBook.
 //!
 //! This module provides functionality to scrape the VPNBook free VPN page using
 //! a headless browser, handle potential consent popups, extract current credentials,
 //! collect download URLs for OpenVPN ZIP files, download them, and then extract
 //! the .ovpn files into a structured directory: cache/openvpn/<hostname>/<ovpn_filename>.
 //! It is designed to fetch the most recent data on every run, as credentials and
 //! server configurations change periodically.
 use anyhow::{anyhow, Context, Result};
 use fantoccini::{Client, Locator};
 use reqwest;
 use std::io::{Read};
 use std::path::{Path, PathBuf};
 use tokio::fs::File;
 use tokio::io::AsyncWriteExt;
 use url::Url;
 use zip::ZipArchive;
 use crate::scraper::webdriver::{ChromeDriverPool, ScrapeTask};
 use crate::util::{directories::DataPaths};
 /// Fetches, downloads, and extracts the latest OpenVPN configurations from VPNBook.
 ///
 /// This asynchronous function uses the provided `ChromeDriverPool` to scrape the
 /// VPNBook free VPN page. It dismisses any consent popup if present, extracts the
 /// current username and password, collects all OpenVPN ZIP download URLs, downloads
 /// the ZIP files temporarily, extracts the .ovpn files into the specified directory
 /// structure under `cache_dir`/openvpn/<hostname>/, and cleans up the ZIP files.
 ///
 /// The directory structure is: cache/openvpn/<hostname>/<ovpn_filename>, where
 /// <hostname> is derived from the ZIP filename (e.g., "ca149.vpnbook.com").
 ///
 /// The function ensures fresh data is fetched each time it runs, making it suitable
 /// for periodic updates where credentials may change.
 ///
 /// # Arguments
 ///
 /// * `pool` - A reference to the `ChromeDriverPool` for managing browser instances.
 /// * `cache_dir` - The path to the base cache directory. The OpenVPN files will be saved
 ///   under `cache_dir`/openvpn/<hostname>/.
 ///
 /// # Returns
 ///
 /// A `Result` containing a tuple with:
 /// - `String`: The scraped username.
 /// - `String`: The scraped password.
 /// - `Vec<PathBuf>`: Paths to the extracted .ovpn files.
 ///
 /// # Errors
 ///
 /// Returns an `anyhow::Error` if:
 /// - Navigation to the page fails.
 /// - The consent popup cannot be dismissed (if present).
 /// - Credentials cannot be parsed from the page.
 /// - Download URLs cannot be found or are invalid.
 /// - HTTP downloads fail or file writing errors occur.
 /// - ZIP extraction fails (e.g., invalid ZIP or I/O errors).
 ///
 /// # Dependencies
 ///
 /// This function requires the following crates (add to Cargo.toml if not present):
 /// - `anyhow` for error handling.
 /// - `fantoccini` for browser automation.
 /// - `reqwest` (with `tokio` features) for HTTP downloads.
 /// - `tokio` for asynchronous file operations.
 /// - `url` for URL manipulation.
 /// - `zip` for ZIP extraction.
 ///
 /// # Examples
 ///
 /// ```no_run
 /// use anyhow::Result;
 /// use event_backtest_engine::scraper::opnv::fetch_vpnbook_configs;
 /// use event_backtest_engine::scraper::webdriver::ChromeDriverPool;
 /// use std::path::Path;
 ///
 /// #[tokio::main]
 /// async fn main() -> Result<()> {
 ///     let pool = ChromeDriverPool::new(1).await?;
 ///     let (username, password, files) =
 ///         fetch_vpnbook_configs(&pool, Path::new("./cache")).await?;
 ///     println!("Username: {}, Password: {}", username, password);
 ///     for file in files {
 ///         println!("Extracted: {:?}", file);
 ///     }
 ///     Ok(())
 /// }
 /// ```
 pub async fn fetch_vpnbook_configs(
    pool: &ChromeDriverPool,
    cache_dir: &Path,
 ) -> Result<(String, String, Vec<PathBuf>)> {
    // Prepare the openvpn directory
    let dir = DataPaths::new(".")?;
    let vpn_dir = dir.cache_openvpn_dir();
    tokio::fs::create_dir_all(&vpn_dir)
        .await
        .context("Failed to create openvpn directory")?;
    // Temporary directory for ZIP downloads (under cache for consistency)
    let temp_dir = cache_dir.join("temp_vpn_zips");
    tokio::fs::create_dir_all(&temp_dir)
        .await
        .context("Failed to create temp directory")?;
    let url = "https://www.vpnbook.com/freevpn".to_string();
    // Define the scraping task
    let task = ScrapeTask::new(url, |client: Client| async move {
        // Attempt to dismiss consent popup if present
        let consent_selector = r#"body > div.fc-consent-root > div.fc-dialog-container > div.fc-dialog.fc-choice-dialog > div.fc-footer-buttons-container > div.fc-footer-buttons > button.fc-button.fc-cta-do-not-consent.fc-secondary-button > p"#;
        if let Ok(consent_elem) = client.find(Locator::Css(consent_selector)).await {
            consent_elem
                .click()
                .await
                .context("Failed to click consent dismissal button")?;
            // Brief delay to allow popup to close
            tokio::time::sleep(std::time::Duration::from_secs(1)).await;
        }
        // Find all <code> elements
        let codes = client
            .find_all(Locator::Css("code"))
            .await
            .context("Failed to find code elements")?;
        if codes.len() < 2 {
            return Err(anyhow!("Insufficient code elements found for credentials"));
        }
        // The first <code> is username, second is password
        let username = codes[0]
            .text()
            .await
            .context("Failed to get username text")?;
        let password = codes[1]
            .text()
            .await
            .context("Failed to get password text")?;
        // Locate all download links for OpenVPN ZIP files
        let links = client
            .find_all(Locator::Css(r#"a[href^="/free-openvpn-account/"][download=""]"#))
            .await
            .context("Failed to find download links")?;
        // Collect relative hrefs
        let mut rel_urls = Vec::new();
        for link in links {
            if let Some(href) = link.attr("href").await.context("Failed to get href attribute")? {
                rel_urls.push(href);
            }
        }
        Ok((username, password, rel_urls))
    });
    // Execute the scraping task using the pool
    let (username, password, rel_urls) = task.execute_with_pool(pool).await?;
    // Base URL for resolving relative paths
    let base_url = Url::parse("https://www.vpnbook.com/")?;
    // Download each ZIP file to temp_dir
    let mut zip_paths = Vec::new();
    for rel in &rel_urls {
        let full_url = base_url.join(rel).context("Failed to join URL")?;
        let filename = rel
            .split('/')
            .last()
            .ok_or_else(|| anyhow!("Invalid filename in URL"))?
            .to_string();
        let out_path = temp_dir.join(&filename);
        // Perform HTTP GET request
        let resp = reqwest::get(full_url.clone())
            .await
            .with_context(|| format!("Failed to send download request for {}", full_url))?;
        if resp.status().is_success() {
            let bytes = resp
                .bytes()
                .await
                .context("Failed to read response bytes")?;
            // Write to file asynchronously
            let mut file = File::create(&out_path)
                .await
                .context("Failed to create output file")?;
            file.write_all(&bytes)
                .await
                .context("Failed to write to file")?;
            zip_paths.push(out_path);
        } else {
            return Err(anyhow!(
                "Download failed with status: {} for URL: {}",
                resp.status(),
                full_url
            ));
        }
    }
    // Now extract .ovpn files from each ZIP
    let mut extracted_paths = Vec::new();
    for zip_path in zip_paths {
        let hostname = get_hostname_from_zip_filename(
            zip_path.file_name().unwrap().to_str().unwrap(),
        );
        let hostname_dir = vpn_dir.join(&hostname);
        tokio::fs::create_dir_all(&hostname_dir)
            .await
            .context("Failed to create hostname directory")?;
        // Use spawn_blocking for sync ZIP operations
        let zip_path_clone = zip_path.clone();
        let hostname_dir_clone = hostname_dir.clone();
        let extract_result = tokio::task::spawn_blocking(move || {
            let file = std::fs::File::open(&zip_path_clone)
                .with_context(|| format!("Failed to open ZIP file: {:?}", zip_path_clone))?;
            let mut archive = ZipArchive::new(file)
                .with_context(|| format!("Failed to read ZIP archive: {:?}", zip_path_clone))?;
            let mut paths = Vec::new();
            for i in 0..archive.len() {
                let mut zip_file = archive.by_index(i)?;
                if zip_file.name().ends_with(".ovpn") {
                    // Get just the filename, stripping any path
                    let file_name = Path::new(zip_file.name()).file_name()
                        .ok_or_else(|| anyhow!("Invalid file name in ZIP: {}", zip_file.name()))?
                        .to_str()
                        .ok_or_else(|| anyhow!("Invalid UTF-8 in file name: {}", zip_file.name()))?
                        .to_string();
                    let target_path = hostname_dir_clone.join(file_name);
                    let mut content = Vec::new();
                    zip_file.read_to_end(&mut content)?;
                    std::fs::write(&target_path, &content)
                        .with_context(|| format!("Failed to write .ovpn file: {:?}", target_path))?;
                    paths.push(target_path);
                }
            }
            Ok::<Vec<PathBuf>, anyhow::Error>(paths)
        })
        .await
        .context("Spawn blocking failed")??;
        extracted_paths.extend(extract_result);
        // Clean up the ZIP file after extraction
        tokio::fs::remove_file(&zip_path)
            .await
            .context("Failed to remove temp ZIP file")?;
    }
    // Optional: Clean up temp_dir if empty
    let _ = tokio::fs::remove_dir(&temp_dir).await;
    Ok((username, password, extracted_paths))
 }
 /// Derives the hostname from the ZIP filename.
 ///
 /// For example, "vpnbook-openvpn-ca149.zip" -> "ca149.vpnbook.com"
 ///
 /// If the format doesn't match, returns "unknown.vpnbook.com".
 fn get_hostname_from_zip_filename(filename: &str) -> String {
    if filename.starts_with("vpnbook-openvpn-") && filename.ends_with(".zip") {
        let code = filename
            .strip_prefix("vpnbook-openvpn-")
            .unwrap()
            .strip_suffix(".zip")
            .unwrap();
        format!("{}.vpnbook.com", code)
    } else {
        "unknown.vpnbook.com".to_string()
    }
 }
--- a/test/vpn_integration_tests.rs
+++ b/test/vpn_integration_tests.rs
@@ -0,0 +1,379 @@
 // tests/vpn_integration_tests.rs
 //! Integration tests for VPN rotation system
 #[cfg(test)]
 mod vpn_tests {
    use event_backtest_engine::{
        scraper::{
            webdriver::ChromeDriverPool,
            vpn_manager::{VpnInstance, VpnPool},
        },
        util::{directories::DataPaths, opnv},
    };
    use std::path::PathBuf;
    use std::sync::Arc;
    /// Helper to create a test VPN instance without connecting
    fn create_test_vpn_instance() -> VpnInstance {
        VpnInstance::new(
            PathBuf::from("test.ovpn"),
            "testuser".to_string(),
            "testpass".to_string(),
        )
        .expect("Failed to create test VPN instance")
    }
    #[test]
    fn test_vpn_instance_creation() {
        let vpn = create_test_vpn_instance();
        assert_eq!(vpn.hostname(), "test");
        assert!(!vpn.is_healthy());
        assert!(vpn.external_ip().is_none());
    }
    #[test]
    fn test_vpn_task_counting() {
        let mut vpn = create_test_vpn_instance();
        // Should not rotate initially
        assert!(!vpn.increment_task_count(10));
        // Increment tasks
        for i in 1..10 {
            assert!(!vpn.increment_task_count(10), "Should not rotate at task {}", i);
        }
        // Should rotate at threshold
        assert!(vpn.increment_task_count(10), "Should rotate at task 10");
        // Reset and verify
        vpn.reset_task_count();
        assert!(!vpn.increment_task_count(10), "Should not rotate after reset");
    }
    #[test]
    fn test_vpn_task_counting_zero_threshold() {
        let mut vpn = create_test_vpn_instance();
        // With threshold=0, should never auto-rotate
        for _ in 0..100 {
            assert!(!vpn.increment_task_count(0));
        }
    }
    #[tokio::test]
    async fn test_chromedriver_pool_creation_no_vpn() {
        let result = ChromeDriverPool::new(2).await;
        match result {
            Ok(pool) => {
                assert_eq!(pool.get_number_of_instances(), 2);
                assert!(!pool.is_vpn_enabled());
            }
            Err(e) => {
                eprintln!("ChromeDriver pool creation failed (expected if chromedriver not installed): {}", e);
            }
        }
    }
    #[test]
    fn test_data_paths_creation() {
        let paths = DataPaths::new("./test_data").expect("Failed to create paths");
        assert!(paths.data_dir().exists());
        assert!(paths.cache_dir().exists());
        assert!(paths.logs_dir().exists());
        assert!(paths.cache_openvpn_dir().exists());
        // Cleanup
        let _ = std::fs::remove_dir_all("./test_data");
    }
    #[tokio::test]
    #[ignore] // This test requires actual network access and VPNBook availability
    async fn test_fetch_vpnbook_configs() {
        let paths = DataPaths::new(".").expect("Failed to create paths");
        // This test requires a ChromeDriver pool
        let pool_result = ChromeDriverPool::new(1).await;
        if pool_result.is_err() {
            eprintln!("Skipping VPNBook fetch test: ChromeDriver not available");
            return;
        }
        let pool = Arc::new(pool_result.unwrap());
        let result = opnv::fetch_vpnbook_configs(&pool, paths.cache_dir()).await;
        match result {
            Ok((username, password, files)) => {
                assert!(!username.is_empty(), "Username should not be empty");
                assert!(!password.is_empty(), "Password should not be empty");
                assert!(!files.is_empty(), "Should fetch at least one config file");
                println!("Fetched {} VPN configs", files.len());
                for file in &files {
                    assert!(file.exists(), "Config file should exist: {:?}", file);
                    assert_eq!(file.extension().and_then(|s| s.to_str()), Some("ovpn"));
                }
            }
            Err(e) => {
                eprintln!("VPNBook fetch failed (may be temporary): {}", e);
            }
        }
    }
    #[tokio::test]
    #[ignore] // Requires actual VPN configs and OpenVPN installation
    async fn test_vpn_pool_creation() {
        let paths = DataPaths::new(".").expect("Failed to create paths");
        // First fetch configs
        let pool_result = ChromeDriverPool::new(1).await;
        if pool_result.is_err() {
            eprintln!("Skipping VPN pool test: ChromeDriver not available");
            return;
        }
        let temp_pool = Arc::new(pool_result.unwrap());
        let fetch_result = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await;
        if fetch_result.is_err() {
            eprintln!("Skipping VPN pool test: Could not fetch configs");
            return;
        }
        let (username, password, _) = fetch_result.unwrap();
        // Create VPN pool
        let vpn_pool_result = VpnPool::new(
            paths.cache_openvpn_dir(),
            username,
            password,
            false,
            0,
        ).await;
        match vpn_pool_result {
            Ok(vpn_pool) => {
                assert!(vpn_pool.len() > 0, "VPN pool should have at least one instance");
                println!("Created VPN pool with {} instances", vpn_pool.len());
            }
            Err(e) => {
                eprintln!("VPN pool creation failed: {}", e);
            }
        }
    }
    #[tokio::test]
    #[ignore] // Full integration test - requires all components
    async fn test_full_vpn_integration() {
        let paths = DataPaths::new(".").expect("Failed to create paths");
        // Step 1: Create temp ChromeDriver pool for fetching
        let temp_pool = match ChromeDriverPool::new(1).await {
            Ok(p) => Arc::new(p),
            Err(e) => {
                eprintln!("Skipping integration test: ChromeDriver not available - {}", e);
                return;
            }
        };
        // Step 2: Fetch VPNBook configs
        let (username, password, files) = match opnv::fetch_vpnbook_configs(
            &temp_pool,
            paths.cache_dir()
        ).await {
            Ok(result) => result,
            Err(e) => {
                eprintln!("Skipping integration test: Config fetch failed - {}", e);
                return;
            }
        };
        assert!(!files.is_empty(), "Should have fetched configs");
        // Step 3: Create VPN pool
        let vpn_pool = match VpnPool::new(
            paths.cache_openvpn_dir(),
            username,
            password,
            true,
            5,
        ).await {
            Ok(pool) => Arc::new(pool),
            Err(e) => {
                eprintln!("Skipping integration test: VPN pool creation failed - {}", e);
                return;
            }
        };
        // Step 4: Connect one VPN
        let vpn_instance = vpn_pool.acquire().await.expect("Failed to acquire VPN");
        let connect_result = {
            let mut vpn = vpn_instance.lock().await;
            vpn.connect().await
        };
        match connect_result {
            Ok(_) => {
                let vpn = vpn_instance.lock().await;
                println!("✓ VPN connected: {} ({})", 
                    vpn.hostname(), 
                    vpn.external_ip().unwrap_or("unknown")
                );
                assert!(vpn.is_healthy());
                assert!(vpn.external_ip().is_some());
            }
            Err(e) => {
                eprintln!("VPN connection failed: {}", e);
            }
        }
        // Step 5: Create ChromeDriver pool with VPN
        let driver_pool_result = ChromeDriverPool::new_with_vpn(
            1,
            Some(vpn_pool.clone())
        ).await;
        match driver_pool_result {
            Ok(driver_pool) => {
                assert!(driver_pool.is_vpn_enabled());
                println!("✓ ChromeDriver pool created with VPN binding");
            }
            Err(e) => {
                eprintln!("ChromeDriver pool creation failed: {}", e);
            }
        }
        // Step 6: Cleanup
        vpn_pool.disconnect_all().await.expect("Failed to disconnect VPNs");
        println!("✓ Integration test complete");
    }
    #[test]
    fn test_hostname_extraction() {
        // Test the hostname extraction logic
        let test_cases = vec![
            ("test/ca149.vpnbook.com/config.ovpn", "ca149.vpnbook.com"),
            ("test/us1.vpnbook.com/config.ovpn", "us1.vpnbook.com"),
            ("test/de4.vpnbook.com/config.ovpn", "de4.vpnbook.com"),
        ];
        for (path, expected_hostname) in test_cases {
            let pb = PathBuf::from(path);
            let hostname = pb.parent()
                .and_then(|p| p.file_name())
                .and_then(|n| n.to_str())
                .unwrap_or("unknown");
            assert_eq!(hostname, expected_hostname);
        }
    }
    #[cfg(target_os = "windows")]
    #[test]
    fn test_forcebindip_manager_creation() {
        use event_backtest_engine::ForceBindIpManager;
        match ForceBindIpManager::new() {
            Ok(manager) => {
                println!("✓ ForceBindIP found at: {:?}", manager.path());
                assert!(manager.path().exists());
            }
            Err(e) => {
                eprintln!("ForceBindIP not found (expected in dev): {}", e);
            }
        }
    }
    #[cfg(target_os = "windows")]
    #[test]
    fn test_forcebindip_command_creation() {
        use event_backtest_engine::ForceBindIpManager;
        use std::path::Path;
        if let Ok(manager) = ForceBindIpManager::new() {
            let cmd = manager.create_bound_command(
                "192.168.1.100",
                Path::new("test.exe"),
                &["--arg1", "value1"],
            );
            let cmd_str = format!("{:?}", cmd);
            assert!(cmd_str.contains("192.168.1.100"));
            assert!(cmd_str.contains("test.exe"));
            println!("✓ ForceBindIP command created successfully");
        }
    }
    #[test]
    fn test_config_defaults() {
        use event_backtest_engine::Config;
        let config = Config::default();
        assert_eq!(config.economic_start_date, "2007-02-13");
        assert_eq!(config.corporate_start_date, "2010-01-01");
        assert_eq!(config.economic_lookahead_months, 3);
        assert_eq!(config.max_parallel_instances, 10);
        assert!(!config.enable_vpn_rotation);
        assert_eq!(config.tasks_per_vpn_session, 0);
    }
 }
 #[cfg(test)]
 mod benchmark_tests {
    use super::*;
    #[tokio::test]
    #[ignore] // Performance test
    async fn benchmark_vpn_rotation_overhead() {
        use std::time::Instant;
        // This test measures the overhead of VPN rotation
        let start = Instant::now();
        // Simulate rotation cycle
        // 1. Disconnect (instant)
        // 2. Wait 2 seconds
        // 3. Connect (5-10 seconds)
        // 4. Verify IP (1-2 seconds)
        tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
        let elapsed = start.elapsed();
        println!("Rotation cycle took: {:?}", elapsed);
        // Typical rotation should complete in under 15 seconds
        assert!(elapsed.as_secs() < 15);
    }
    #[tokio::test]
    #[ignore] // Performance test
    async fn benchmark_parallel_scraping() {
        // This test measures throughput with different parallelism levels
        // Results help tune MAX_PARALLEL_INSTANCES
        let configs = vec![1, 2, 3, 5, 10];
        for &pool_size in &configs {
            println!("Testing with {} parallel instances...", pool_size);
            // Would need actual scraping implementation here
            // For now, just verify pool creation time
            let start = std::time::Instant::now();
            let pool_result = event_backtest_engine::ChromeDriverPool::new(pool_size).await;
            if let Ok(_pool) = pool_result {
                let elapsed = start.elapsed();
                println!("  Pool initialization: {:?}", elapsed);
                // Pool creation should be fast (< 5 seconds per instance)
                assert!(elapsed.as_secs() < pool_size as u64 * 5);
            } else {
                eprintln!("  Skipped - ChromeDriver not available");
            }
        }
    }
 }
Author	SHA1	Message	Date
donpat1to	00c9d45642	added data streaming instead of laoding	2025-12-12 10:54:01 +01:00
donpat1to	1bda78897b	implement vpn pool	2025-12-11 23:18:04 +01:00
donpat1to	470f0922ed	capabable spawning multiple openvpn instances	2025-12-11 00:36:46 +01:00
donpat1to	c9da56e8e9	removed unused functions	2025-12-09 23:27:51 +01:00
donpat1to	dde859b071	removed unused imports	2025-12-09 23:27:14 +01:00
donpat1to	2416947e9d	getting vpn ovpn data	2025-12-09 23:24:51 +01:00
donpat1to	3ab5d0dcc3	added fetching openvpn packages	2025-12-09 19:51:11 +01:00
donpat1to	c2408d9a56	added gettin opnv setup files	2025-12-09 19:23:54 +01:00
donpat1to	f95e9e2427	commented unused function	2025-12-09 16:56:45 +01:00
donpat1to	c00bfd8687	removing not map-able LEIs	2025-12-07 17:38:32 +01:00
donpat1to	0f89c8c0ce	updated cache saving	2025-12-07 14:49:25 +01:00
donpat1to	a6823dc938	moved data capturing into cache folder	2025-12-05 22:32:42 +01:00
donpat1to	58a498e694	added logging	2025-12-05 21:20:12 +01:00
donpat1to	f7083bf9f0	changed noting	2025-12-04 23:55:52 +01:00
donpat1to	f05df0b5ee	updated .gitignore	2025-12-04 21:08:36 +01:00
`@@ -1 +1,2 @@`
	`pub mod webdriver;`	`pub mod webdriver;`
		`pub mod docker_vpn_proxy;`