removed unused functions

removed unused imports
getting vpn ovpn data
2025-12-09 23:27:51 +01:00 · 2025-12-09 23:27:14 +01:00 · 2025-12-09 23:24:51 +01:00 · 2025-12-09 19:51:11 +01:00 · 2025-12-09 19:23:54 +01:00 · 2025-12-09 16:56:45 +01:00
25 changed files with 1925 additions and 590 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,48 @@
+# WebScraper Configuration File (.env)
+# ====================================
+# This file configures the behavior of the WebScraper application
+# Copy to .env and adjust values as needed
+
+# ===== ECONOMIC DATA =====
+# Start date for economic event scraping
+ECONOMIC_START_DATE=2007-02-13
+
+# How far into the future to look ahead for economic events (in months)
+ECONOMIC_LOOKAHEAD_MONTHS=3
+
+# ===== CORPORATE DATA =====
+# Start date for corporate earnings/data scraping
+CORPORATE_START_DATE=2010-01-01
+
+# ===== PERFORMANCE & CONCURRENCY =====
+# Maximum number of parallel ChromeDriver instances
+# Higher = more concurrent tasks, but higher resource usage
+MAX_PARALLEL_INSTANCES=3
+
+# Maximum tasks per ChromeDriver instance before recycling
+# 0 = unlimited (instance lives for entire application runtime)
+MAX_TASKS_PER_INSTANCE=0
+
+# ===== VPN ROTATION (ProtonVPN Integration) =====
+# Enable automatic VPN rotation between sessions?
+# If false, all traffic goes through system without VPN tunneling
+ENABLE_VPN_ROTATION=false
+
+# Comma-separated list of ProtonVPN servers to rotate through
+# Examples:
+#   "US-Free#1,US-Free#2,UK-Free#1"
+#   "US,UK,JP,DE,NL"
+# NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect
+VPN_SERVERS=
+
+# Number of tasks per VPN session before rotating to new server/IP
+# 0 = rotate between economic and corporate phases (one phase = one IP)
+# 5 = rotate every 5 tasks
+# NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect
+TASKS_PER_VPN_SESSION=0
+
+# ===== LOGGING =====
+# Set via RUST_LOG environment variable:
+#   RUST_LOG=info cargo run
+#   RUST_LOG=debug cargo run
+# Leave empty or unset for default logging level
--- a/.gitignore
+++ b/.gitignore
@@ -27,10 +27,17 @@ target/

 # /chromedriver-win64/*

-# data folders
-/economic_events*
-/economic_event_changes*
-/corporate_events*
-/corporate_prices*
-/corporate_event_changes*
-/data*
+# data files
+**/*.json
+**/*.jsonl
+**/*.csv
+**/*.zip
+**/*.log
+**/*.ovpn
+
+#/economic_events*
+#/economic_event_changes*
+#/corporate_events*
+#/corporate_prices*
+#/corporate_event_changes*
+#/data*
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -671,8 +671,10 @@ dependencies = [
 "fantoccini",
 "flate2",
 "futures",
+ "once_cell",
 "rand 0.9.2",
 "rayon",
+ "regex",
 "reqwest",
 "scraper",
 "serde",
@@ -681,6 +683,7 @@ dependencies = [
 "toml",
 "tracing",
 "tracing-subscriber",
+ "url",
 "yfinance-rs",
 "zip",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "
 scraper = "0.19"                    # HTML parsing for Yahoo earnings pages
 fantoccini = { version = "0.20", features = ["rustls-tls"] }  # Headless Chrome for finanzen.net
 yfinance-rs = "0.7.2"
+url = "2.5.7"

 # Serialization
 serde = { version = "1.0", features = ["derive"] }
@@ -29,6 +30,9 @@ csv = "1.3"
 zip = "6.0.0"
 flate2 = "1.1.5"

+# 
+regex = "1.12.2"
+
 # Generating
 rand = "0.9.2"

@@ -45,6 +49,7 @@ anyhow = "1.0"
 # Logging (optional but recommended)
 tracing = "0.1"
 tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
+once_cell = "1.21.3"

 # Parallel processing (for batch tickers)
 futures = "0.3"
--- a/cache/openfigi/INFO.md
+++ b/cache/openfigi/INFO.md
@@ -0,0 +1,15 @@
+# Openfigi Data
+
+## Market Security Description
+| Code       | Meaning                                                   |
+| ---------- | --------------------------------------------------------- |
+| **Comdty** | Commodity (e.g., oil, gold futures, physical commodities) |
+| **Corp**   | Corporate bond / corporate debt security                  |
+| **Curncy** | Currency or FX pair (e.g., EURUSD)                        |
+| **Equity** | Stocks / shares                                           |
+| **Govt**   | Government bond (Treasuries, Bunds, Gilts, etc.)          |
+| **Index**  | Market indices (S&P 500, DAX, NYSE Composite…)            |
+| **M-Mkt**  | Money market instruments (commercial paper, CDs, T-bills) |
+| **Mtge**   | Mortgage-backed securities (MBS)                          |
+| **Muni**   | Municipal bonds (US state/local government debt)          |
+| **Pfd**    | Preferred shares                                          |
--- a/data/INFO.md
+++ b/data/INFO.md
@@ -0,0 +1,15 @@
+# Global Data Info
+
+## Exchanges
+
+Source: Wikipedia
+
+## Gleif
+
+Data Download [.zip] over Website
+
+## OpenFigi
+
+Data Scraping over open API
+
+Api Key: .env
--- a/data/economic/INFO.md
+++ b/data/economic/INFO.md
@@ -0,0 +1,6 @@
+# Economic Info
+
+## Sources
+
+* continents: finanzen.net
+* countries: finanzen.net
--- a/src/config.rs
+++ b/src/config.rs
@@ -12,21 +12,47 @@ pub struct Config {
    pub economic_lookahead_months: u32,  // default: 3
    /// Maximum number of parallel scraping tasks (default: 10).
    /// This limits concurrency to protect system load and prevent website spamming.
-    #[serde(default = "default_max_parallel")]
-    pub max_parallel_tasks: usize,
+    #[serde(default = "default_max_parallel_instances")]
+    pub max_parallel_instances: usize,
+
+    pub max_tasks_per_instance: usize,
+
+    /// VPN rotation configuration
+    /// If set to "true", enables automatic VPN rotation between sessions
+    #[serde(default)]
+    pub enable_vpn_rotation: bool,
+
+    /// Comma-separated list of VPN servers/country codes to rotate through.
+    /// Example: "US-Free#1,UK-Free#1,JP-Free#1" or "US,JP,DE"
+    /// If empty, VPN rotation is disabled.
+    #[serde(default)]
+    pub vpn_servers: String,
+
+    /// Number of tasks per session before rotating VPN
+    /// If set to 0, rotates VPN between economic and corporate phases
+    #[serde(default = "default_tasks_per_session")]
+    pub tasks_per_vpn_session: usize,
 }

-fn default_max_parallel() -> usize {
+fn default_max_parallel_instances() -> usize {
    10
 }

+fn default_tasks_per_session() -> usize {
+    0 // 0 = rotate between economic/corporate
+}
+
 impl Default for Config {
    fn default() -> Self {
        Self {
            economic_start_date: "2007-02-13".to_string(),
            corporate_start_date: "2010-01-01".to_string(),
            economic_lookahead_months: 3,
-            max_parallel_tasks: default_max_parallel(),
+            max_parallel_instances: default_max_parallel_instances(),
+            max_tasks_per_instance: 0,
+            enable_vpn_rotation: false,
+            vpn_servers: String::new(),
+            tasks_per_vpn_session: default_tasks_per_session(),
        }
    }
 }
@@ -59,16 +85,38 @@ impl Config {
            .parse()
            .context("Failed to parse ECONOMIC_LOOKAHEAD_MONTHS as u32")?;

-        let max_parallel_tasks: usize = dotenvy::var("MAX_PARALLEL_TASKS")
+        let max_parallel_instances: usize = dotenvy::var("MAX_PARALLEL_INSTANCES")
            .unwrap_or_else(|_| "10".to_string())
            .parse()
-            .context("Failed to parse MAX_PARALLEL_TASKS as usize")?;
+            .context("Failed to parse MAX_PARALLEL_INSTANCES as usize")?;
+
+        let max_tasks_per_instance: usize = dotenvy::var("MAX_TASKS_PER_INSTANCE")
+            .unwrap_or_else(|_| "0".to_string())
+            .parse()
+            .context("Failed to parse MAX_TASKS_PER_INSTANCE as usize")?;
+
+        let enable_vpn_rotation = dotenvy::var("ENABLE_VPN_ROTATION")
+            .unwrap_or_else(|_| "false".to_string())
+            .parse::<bool>()
+            .context("Failed to parse ENABLE_VPN_ROTATION as bool")?;
+
+        let vpn_servers = dotenvy::var("VPN_SERVERS")
+            .unwrap_or_else(|_| String::new());
+
+        let tasks_per_vpn_session: usize = dotenvy::var("TASKS_PER_VPN_SESSION")
+            .unwrap_or_else(|_| "0".to_string())
+            .parse()
+            .context("Failed to parse TASKS_PER_VPN_SESSION as usize")?;

        Ok(Self {
            economic_start_date,
            corporate_start_date,
            economic_lookahead_months,
-            max_parallel_tasks,
+            max_parallel_instances,
+            max_tasks_per_instance,
+            enable_vpn_rotation,
+            vpn_servers,
+            tasks_per_vpn_session,
        })
    }

--- a/src/corporate/aggregation.rs
+++ b/src/corporate/aggregation.rs
@@ -1,6 +1,7 @@
 // src/corporate/aggregation.rs
 use super::types::CompanyPrice;
 use super::storage::*;
+use crate::util::directories::DataPaths;
 use tokio::fs;
 use std::collections::HashMap;

@@ -16,8 +17,8 @@ struct DayData {
 }

 /// Aggregate price data from multiple exchanges, converting all to USD
-pub async fn aggregate_best_price_data(lei: &str) -> anyhow::Result<()> {
-    let company_dir = get_company_dir(lei);
+pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> {
+    let company_dir = get_company_dir(paths, lei);

    for timeframe in ["daily", "5min"].iter() {
        let source_dir = company_dir.join(timeframe);
--- a/src/corporate/openfigi.rs
+++ b/src/corporate/openfigi.rs
--- a/src/corporate/scraper.rs
+++ b/src/corporate/scraper.rs
@@ -1,7 +1,7 @@
 // src/corporate/scraper.rs
 use super::{types::*, helpers::*, openfigi::*};
 //use crate::corporate::openfigi::OpenFigiClient;
-use crate::{scraper::webdriver::*};
+use crate::{scraper::webdriver::*, util::directories::DataPaths, util::logger};
 use fantoccini::{Client, Locator};
 use scraper::{Html, Selector};
 use chrono::{DateTime, Duration, NaiveDate, Utc};
@@ -15,160 +15,6 @@ use anyhow::{anyhow, Result};

 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";

-/// Discover all exchanges where this ISIN trades by querying Yahoo Finance and enriching with OpenFIGI API calls.
-/// 
-/// # Arguments
-/// * `isin` - The ISIN to search for.
-/// * `known_ticker` - A known ticker symbol for fallback or initial check.
-/// 
-/// # Returns
-/// A vector of FigiInfo structs containing enriched data from API calls.
-/// 
-/// # Errors
-/// Returns an error if HTTP requests fail, JSON parsing fails, or OpenFIGI API responds with an error.
-pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> anyhow::Result<Vec<FigiInfo>> {
-    println!("    Discovering exchanges for ISIN {}", isin);
-    
-    let mut potential: Vec<(String, PrimaryInfo)> = Vec::new();
-    
-    // Try the primary ticker first
-    if let Ok(info) = check_ticker_exists(known_ticker).await {
-        potential.push((known_ticker.to_string(), info));
-    }
-    
-    // Search for ISIN directly on Yahoo to find other listings
-    let search_url = format!(
-        "https://query2.finance.yahoo.com/v1/finance/search?q={}&quotesCount=20&newsCount=0",
-        isin
-    );
-    
-    let resp = HttpClient::new()
-        .get(&search_url)
-        .header("User-Agent", USER_AGENT)
-        .send()
-        .await?;
-    
-    let json = resp.json::<Value>().await?;
-    
-    if let Some(quotes) = json["quotes"].as_array() {
-        for quote in quotes {
-            // First: filter by quoteType directly from search results (faster rejection)
-            let quote_type = quote["quoteType"].as_str().unwrap_or("");
-            if quote_type.to_uppercase() != "EQUITY" {
-                continue; // Skip bonds, ETFs, mutual funds, options, etc.
-            }
-
-            if let Some(symbol) = quote["symbol"].as_str() {
-                // Avoid duplicates
-                if potential.iter().any(|(s, _)| s == symbol) {
-                    continue;
-                }
-
-                // Double-check with full quote data (some search results are misleading)
-                if let Ok(info) = check_ticker_exists(symbol).await {
-                    potential.push((symbol.to_string(), info));
-                }
-            }
-        }
-    }
-
-    if potential.is_empty() {
-        return Ok(vec![]);
-    }
-
-    // Enrich with OpenFIGI API
-    let client = OpenFigiClient::new()?;
-
-    let mut discovered_figis = Vec::new();
-
-    if !client.has_key() {
-        // Fallback without API key - create FigiInfo with default/empty fields
-        for (symbol, info) in potential {
-            println!("      Found equity listing: {} on {} ({}) - no FIGI (fallback mode)", symbol, info.exchange_mic, info.currency);
-            let figi_info = FigiInfo {
-                isin: info.isin,
-                figi: String::new(),
-                name: info.name,
-                ticker: symbol,
-                mic_code: info.exchange_mic,
-                currency: info.currency,
-                compositeFIGI: String::new(),
-                securityType: String::new(),
-                marketSector: String::new(),
-                shareClassFIGI: String::new(),
-                securityType2: String::new(),
-                securityDescription: String::new(),
-            };
-            discovered_figis.push(figi_info);
-        }
-        return Ok(discovered_figis);
-    }
-
-    // With API key, batch the mapping requests
-    let chunk_size = 100;
-    for chunk in potential.chunks(chunk_size) {
-        let mut jobs = vec![];
-        for (symbol, info) in chunk {
-            jobs.push(json!({
-                "idType": "TICKER",
-                "idValue": symbol,
-                "micCode": info.exchange_mic,
-                "marketSecDes": "Equity",
-            }));
-        }
-
-        let resp = client.get_figi_client()
-            .post("https://api.openfigi.com/v3/mapping")
-            .header("Content-Type", "application/json")
-            .json(&jobs)
-            .send()
-            .await?;
-
-        if !resp.status().is_success() {
-            return Err(anyhow::anyhow!("OpenFIGI mapping failed with status: {}", resp.status()));
-        }
-
-        let parsed: Vec<Value> = resp.json().await?;
-
-        for (i, item) in parsed.iter().enumerate() {
-            let (symbol, info) = &chunk[i];
-            if let Some(data) = item["data"].as_array() {
-                if let Some(entry) = data.first() {
-                    let market_sec = entry["marketSector"].as_str().unwrap_or("");
-                    if market_sec != "Equity" {
-                        continue;
-                    }
-                    println!("      Found equity listing: {} on {} ({}) - FIGI: {}", symbol, info.exchange_mic, info.currency, entry["figi"]);
-                    let figi_info = FigiInfo {
-                        isin: info.isin.clone(),
-                        figi: entry["figi"].as_str().unwrap_or("").to_string(),
-                        name: entry["name"].as_str().unwrap_or(&info.name).to_string(),
-                        ticker: symbol.clone(),
-                        mic_code: info.exchange_mic.clone(),
-                        currency: info.currency.clone(),
-                        compositeFIGI: entry["compositeFIGI"].as_str().unwrap_or("").to_string(),
-                        securityType: entry["securityType"].as_str().unwrap_or("").to_string(),
-                        marketSector: market_sec.to_string(),
-                        shareClassFIGI: entry["shareClassFIGI"].as_str().unwrap_or("").to_string(),
-                        securityType2: entry["securityType2"].as_str().unwrap_or("").to_string(),
-                        securityDescription: entry["securityDescription"].as_str().unwrap_or("").to_string(),
-                    };
-                    discovered_figis.push(figi_info);
-                } else {
-                    println!("      No data returned for ticker {} on MIC {}", symbol, info.exchange_mic);
-                }
-            } else if let Some(error) = item["error"].as_str() {
-                println!("      OpenFIGI error for ticker {}: {}", symbol, error);
-            }
-        }
-
-        // Respect rate limit (6 seconds between requests with key)
-        sleep(TokioDuration::from_secs(6)).await;
-    }
-
-    Ok(discovered_figis)
-}
-
 /// Check if a ticker exists on Yahoo Finance and return core metadata.
 ///
 /// This function calls the public Yahoo Finance quoteSummary endpoint and extracts:
@@ -190,7 +36,7 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
 /// - Not an equity (ETF, bond, etc.)
 /// - Missing critical fields
 /// - Network or JSON parsing errors
-pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
+/*pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
    let url = format!(
        "https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price%2CassetProfile",
        ticker
@@ -303,34 +149,7 @@ pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
        exchange_mic,
        currency,
    })
-}
-
-/// Convert Yahoo's exchange name to MIC code (best effort)
-fn exchange_name_to_mic(name: &str) -> String {
-    match name {
-        "NMS" | "NasdaqGS" | "NASDAQ" => "XNAS",
-        "NYQ" | "NYSE" => "XNYS",
-        "LSE" | "London" => "XLON",
-        "FRA" | "Frankfurt" | "GER" | "XETRA" => "XFRA",
-        "PAR" | "Paris" => "XPAR",
-        "AMS" | "Amsterdam" => "XAMS",
-        "MIL" | "Milan" => "XMIL",
-        "JPX" | "Tokyo" => "XJPX",
-        "HKG" | "Hong Kong" => "XHKG",
-        "SHH" | "Shanghai" => "XSHG",
-        "SHZ" | "Shenzhen" => "XSHE",
-        "TOR" | "Toronto" => "XTSE",
-        "ASX" | "Australia" => "XASX",
-        "SAU" | "Saudi" => "XSAU",
-        "SWX" | "Switzerland" => "XSWX",
-        "BSE" | "Bombay" => "XBSE",
-        "NSE" | "NSI" => "XNSE",
-        "TAI" | "Taiwan" => "XTAI",
-        "SAO" | "Sao Paulo" => "BVMF",
-        "MCE" | "Madrid" => "XMAD",
-        _ => name, // Fallback to name itself
-    }.to_string()
-}
+}*/

 /// Fetches earnings events for a ticker using a dedicated ScrapeTask.
 ///
@@ -670,60 +489,164 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow

 pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
-    let zip_path = "data/gleif/isin_lei.zip";
-    let csv_path = "data/gleif/isin_lei.csv";

-    if let Err(e) = std::fs::create_dir_all("data") {
-        println!("Failed to create data directory: {e}");
+    // Initialize DataPaths and create cache/gleif directory
+    let paths = DataPaths::new(".")?;
+    let gleif_cache_dir = paths.cache_gleif_dir();
+    
+    if let Err(e) = std::fs::create_dir_all(&gleif_cache_dir) {
+        let msg = format!("Failed to create cache/gleif directory: {}", e);
+        logger::log_error(&msg).await;
+        println!("{}", msg);
        return Ok(None);
    }

-    // Download ZIP
-    let bytes = match reqwest::Client::builder()
+    logger::log_info("Corporate Scraper: Downloading ISIN/LEI mapping from GLEIF...").await;
+
+    // Download ZIP and get the filename from Content-Disposition header
+    let client = match reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .timeout(std::time::Duration::from_secs(30))
        .build()
-        .and_then(|c| Ok(c))
    {
-        Ok(client) => match client.get(url).send().await {
-            Ok(resp) if resp.status().is_success() => match resp.bytes().await {
-                Ok(b) => b,
-                Err(e) => {
-                    println!("Failed to read ZIP bytes: {e}");
-                    return Ok(None);
-                }
-            },
-            Ok(resp) => {
-                println!("Server returned HTTP {}", resp.status());
-                return Ok(None);
-            }
-            Err(e) => {
-                println!("Failed to download ISIN/LEI ZIP: {e}");
-                return Ok(None);
-            }
-        },
+        Ok(c) => c,
        Err(e) => {
-            println!("Failed to create HTTP client: {e}");
+            let msg = format!("Failed to create HTTP client: {}", e);
+            logger::log_error(&msg).await;
+            println!("{}", msg);
            return Ok(None);
        }
    };

-    if let Err(e) = tokio::fs::write(zip_path, &bytes).await {
-        println!("Failed to write ZIP file: {e}");
-        return Ok(None);
+    let resp = match client.get(url).send().await {
+        Ok(r) if r.status().is_success() => r,
+        Ok(resp) => {
+            let msg = format!("Server returned HTTP {}", resp.status());
+            logger::log_error(&msg).await;
+            println!("{}", msg);
+            return Ok(None);
+        }
+        Err(e) => {
+            let msg = format!("Failed to download ISIN/LEI ZIP: {}", e);
+            logger::log_error(&msg).await;
+            println!("{}", msg);
+            return Ok(None);
+        }
+    };
+
+    // Extract filename from Content-Disposition header or use default
+    let filename = resp
+        .headers()
+        .get("content-disposition")
+        .and_then(|h| h.to_str().ok())
+        .and_then(|s| s.split("filename=").nth(1).map(|f| f.trim_matches('"').to_string()))
+        .unwrap_or_else(|| "isin_lei.zip".to_string());
+
+    // Parse timestamp from filename and convert to DDMMYYYY format
+    let parsed_filename = parse_gleif_filename(&filename);
+    logger::log_info(&format!("Corporate Scraper: Downloaded file: {} -> {}", filename, parsed_filename)).await;
+
+    // Determine date (DDMMYYYY) from parsed filename: "isin-lei-DDMMYYYY.csv"
+    let mut date_str = String::new();
+    if let Some(start_idx) = parsed_filename.find("isin-lei-") {
+        let rest = &parsed_filename[start_idx + 9..];
+        if rest.len() >= 8 {
+            date_str = rest[0..8].to_string();
+        }
    }

+    // If we parsed a date, use/create a date folder under cache/gleif and operate inside it; otherwise use cache root.
+    let date_dir = if !date_str.is_empty() {
+        let p = gleif_cache_dir.join(&date_str);
+        // Ensure the date folder exists (create if necessary)
+        if let Err(e) = std::fs::create_dir_all(&p) {
+            let msg = format!("Failed to create date directory {:?}: {}", p, e);
+            logger::log_warn(&msg).await;
+            None
+        } else {
+            Some(p)
+        }
+    } else {
+        None
+    };
+
+    // Choose the directory where we'll look for existing files and where we'll save the new ones
+    let target_dir = date_dir.clone().unwrap_or_else(|| gleif_cache_dir.to_path_buf());
+
+    // If the date folder exists (or was created), prefer any *_clean.csv inside it and return that immediately
+    if let Some(ref ddir) = date_dir {
+        if let Ok(entries) = std::fs::read_dir(ddir) {
+            for entry in entries.flatten() {
+                if let Some(name) = entry.file_name().to_str() {
+                    if name.to_lowercase().ends_with("_clean.csv") {
+                        let path = ddir.join(name);
+                        logger::log_info(&format!("Found existing clean GLEIF CSV: {}", path.display())).await;
+                        return Ok(Some(path.to_string_lossy().to_string()));
+                    }
+                }
+            }
+        }
+    }
+
+    // If no clean file found in the date folder (or date folder doesn't exist), check whether the csv/zip already exist in the target dir
+    let csv_candidate_name = parsed_filename.replace(".zip", ".csv");
+    let csv_candidate = target_dir.join(&csv_candidate_name);
+    let zip_candidate = target_dir.join(&parsed_filename);
+
+    if csv_candidate.exists() {
+        logger::log_info(&format!("Found existing GLEIF CSV: {}", csv_candidate.display())).await;
+        return Ok(Some(csv_candidate.to_string_lossy().to_string()));
+    }
+    if zip_candidate.exists() {
+        // If zip exists but csv does not, extract later; for now prefer returning csv path (may be created by extraction step)
+        let inferred_csv = target_dir.join(csv_candidate_name);
+        if inferred_csv.exists() {
+            logger::log_info(&format!("Found existing extracted CSV next to ZIP: {}", inferred_csv.display())).await;
+            return Ok(Some(inferred_csv.to_string_lossy().to_string()));
+        }
+        // otherwise we'll overwrite/extract into target_dir below
+    }
+
+    let bytes = match resp.bytes().await {
+        Ok(b) => b,
+        Err(e) => {
+            let msg = format!("Failed to read ZIP bytes: {}", e);
+            logger::log_error(&msg).await;
+            println!("{}", msg);
+            return Ok(None);
+        }
+    };
+    // Ensure target directory exists (create if it's the date folder and was absent earlier)
+    if let Some(ref ddir) = date_dir {
+        let _ = std::fs::create_dir_all(ddir);
+    }
+
+    let zip_path = target_dir.join(&parsed_filename);
+    let csv_path = target_dir.join(parsed_filename.replace(".zip", ".csv"));
+
+    if let Err(e) = tokio::fs::write(&zip_path, &bytes).await {
+        let msg = format!("Failed to write ZIP file: {}", e);
+        logger::log_error(&msg).await;
+        println!("{}", msg);
+        return Ok(None);
+    }
+    logger::log_info(&format!("Corporate Scraper: Saved ZIP to {:?}", zip_path)).await;
+
    // Extract CSV
-    let archive = match std::fs::File::open(zip_path)
+    let archive = match std::fs::File::open(&zip_path)
        .map(ZipArchive::new)
    {
        Ok(Ok(a)) => a,
        Ok(Err(e)) => {
-            println!("Invalid ZIP: {e}");
+            let msg = format!("Invalid ZIP: {}", e);
+            logger::log_error(&msg).await;
+            println!("{}", msg);
            return Ok(None);
        }
        Err(e) => {
-            println!("Cannot open ZIP file: {e}");
+            let msg = format!("Cannot open ZIP file: {}", e);
+            logger::log_error(&msg).await;
+            println!("{}", msg);
            return Ok(None);
        }
    };
@@ -737,7 +660,9 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    }) {
        Some(i) => i,
        None => {
-            println!("ZIP did not contain a CSV file");
+            let msg = "ZIP did not contain a CSV file";
+            logger::log_error(msg).await;
+            println!("{}", msg);
            return Ok(None);
        }
    };
@@ -745,23 +670,55 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let mut csv_file = match archive.by_index(idx) {
        Ok(f) => f,
        Err(e) => {
-            println!("Failed to read CSV entry: {e}");
+            let msg = format!("Failed to read CSV entry: {}", e);
+            logger::log_error(&msg).await;
+            println!("{}", msg);
            return Ok(None);
        }
    };

    let mut csv_bytes = Vec::new();
    if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
-        println!("Failed to extract CSV: {e}");
+        let msg = format!("Failed to extract CSV: {}", e);
+        logger::log_error(&msg).await;
        return Ok(None);
    }

-    if let Err(e) = tokio::fs::write(csv_path, &csv_bytes).await {
-        println!("Failed to save CSV file: {e}");
+    if let Err(e) = tokio::fs::write(&csv_path, &csv_bytes).await {
+        let msg = format!("Failed to save CSV file: {}", e);
+        logger::log_error(&msg).await;
        return Ok(None);
    }

-    Ok(Some(csv_path.to_string()))
+    let msg = format!("✓ ISIN/LEI CSV extracted: {:?}", csv_path);
+    logger::log_info(&msg).await;
+    
+    Ok(Some(csv_path.to_string_lossy().to_string()))
+}
+
+/// Parse GLEIF filename and convert timestamp to DDMMYYYY format
+/// Example: "isin-lei-20251124T080254.csv" -> "isin-lei-24112025.csv"
+fn parse_gleif_filename(filename: &str) -> String {
+    // Try to find pattern: isin-lei-YYYYMMDDTHHMMSS.zip/csv
+    if let Some(start_idx) = filename.find("isin-lei-") {
+        let rest = &filename[start_idx + 9..]; // After "isin-lei-"
+        
+        // Extract the 8 digits (YYYYMMDD)
+        if rest.len() >= 8 && rest[0..8].chars().all(|c| c.is_numeric()) {
+            let date_part = &rest[0..8];
+            // date_part is YYYYMMDD, convert to DDMMYYYY
+            if date_part.len() == 8 {
+                let year = &date_part[0..4];
+                let month = &date_part[4..6];
+                let day = &date_part[6..8];
+                let extension = if filename.ends_with(".zip") { ".zip" } else { ".csv" };
+                return format!("isin-lei-{}{}{}{}", day, month, year, extension);
+            }
+        }
+    }
+    
+    // Fallback: return original filename if parsing fails
+    filename.to_string()
 }


--- a/src/corporate/storage.rs
+++ b/src/corporate/storage.rs
@@ -1,20 +1,24 @@
 // src/corporate/storage.rs
 use super::{types::*, helpers::*};
-use crate::config;
+use crate::util::directories::DataPaths;
+use crate::util::logger;

 use tokio::fs;
+use tokio::io::AsyncWriteExt;
 use chrono::{Datelike, NaiveDate};
-use std::collections::{HashMap, HashSet};
-use std::path::{Path, PathBuf};
+use std::collections::{HashMap};
+use std::path::{PathBuf};

-pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
+pub async fn load_existing_events(paths: &DataPaths) -> anyhow::Result<HashMap<String, CompanyEvent>> {
    let mut map = HashMap::new();
-    let dir = std::path::Path::new("corporate_events");
+    let dir = paths.corporate_events_dir();
    if !dir.exists() {
+        logger::log_info("Corporate Storage: No existing events directory found").await;
        return Ok(map);
    }

    let mut entries = fs::read_dir(dir).await?;
+    let mut loaded_count = 0;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.extension().and_then(|s| s.to_str()) == Some("json") {
@@ -25,25 +29,32 @@ pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEve
                for event in events {
                    map.insert(event_key(&event), event);
                }
+                loaded_count += 1;
            }
        }
    }
+    logger::log_info(&format!("Corporate Storage: Loaded {} events from {} files", map.len(), loaded_count)).await;
    Ok(map)
 }

-pub async fn save_optimized_events(events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
-    let dir = std::path::Path::new("corporate_events");
+pub async fn save_optimized_events(paths: &DataPaths, events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
+    let dir = paths.corporate_events_dir();
    fs::create_dir_all(dir).await?;

+    logger::log_info("Corporate Storage: Removing old event files...").await;
+    let mut removed_count = 0;
    let mut entries = fs::read_dir(dir).await?;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
        if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
            fs::remove_file(&path).await?;
+            removed_count += 1;
        }
    }
+    logger::log_info(&format!("Corporate Storage: Removed {} old event files", removed_count)).await;

+    let total_events = events.len();
    let mut sorted: Vec<_> = events.into_values().collect();
    sorted.sort_by_key(|e| (e.ticker.clone(), e.date.clone()));

@@ -55,18 +66,26 @@ pub async fn save_optimized_events(events: HashMap<String, CompanyEvent>) -> any
        }
    }

+    let total_months = by_month.len();
    for (month, list) in by_month {
        let path = dir.join(format!("events_{}.json", month));
        fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
+        logger::log_info(&format!("Corporate Storage: Saved {} events for month {}", list.len(), month)).await;
    }
+    logger::log_info(&format!("Corporate Storage: Saved {} total events in {} month files", total_events, total_months)).await;
    Ok(())
 }

-pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()> {
-    if changes.is_empty() { return Ok(()); }
-    let dir = std::path::Path::new("corporate_event_changes");
+pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) -> anyhow::Result<()> {
+    if changes.is_empty() { 
+        logger::log_info("Corporate Storage: No changes to save").await;
+        return Ok(()); 
+    }
+    let dir = paths.corporate_changes_dir();
    fs::create_dir_all(dir).await?;

+    logger::log_info(&format!("Corporate Storage: Saving {} changes", changes.len())).await;
+
    let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
    for c in changes {
        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
@@ -81,14 +100,16 @@ pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()>
            let s = fs::read_to_string(&path).await?;
            serde_json::from_str(&s).unwrap_or_default()
        } else { vec![] };
-        all.extend(list);
+        all.extend(list.clone());
        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
+        logger::log_info(&format!("Corporate Storage: Saved {} changes for month {}", list.len(), month)).await;
    }
+    logger::log_info("Corporate Storage: All changes saved successfully").await;
    Ok(())
 }

-pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
-    let base_dir = Path::new("corporate_prices");
+pub async fn save_prices_for_ticker(paths: &DataPaths, ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
+    let base_dir = paths.corporate_prices_dir();
    let company_dir = base_dir.join(ticker.replace(".", "_"));
    let timeframe_dir = company_dir.join(timeframe);

@@ -102,35 +123,35 @@ pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: V
    Ok(())
 }

-pub fn get_company_dir(lei: &str) -> PathBuf {
-    PathBuf::from("corporate_prices").join(lei)
+pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf {
+    paths.corporate_prices_dir().join(lei)
 }

-pub async fn ensure_company_dirs(isin: &str) -> anyhow::Result<()> {
-    let base = get_company_dir(isin);
-    let paths = [
+pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> {
+    let base = get_company_dir(paths, isin);
+    let paths_to_create = [
        base.clone(),
        base.join("5min"),
        base.join("daily"),
        base.join("aggregated").join("5min"),
        base.join("aggregated").join("daily"),
    ];
-    for p in paths {
+    for p in paths_to_create {
        fs::create_dir_all(&p).await?;
    }
    Ok(())
 }

-pub async fn save_available_exchanges(isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
-    let dir = get_company_dir(isin);
+pub async fn save_available_exchanges(paths: &DataPaths, isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
+    let dir = get_company_dir(paths, isin);
    fs::create_dir_all(&dir).await?;
    let path = dir.join("available_exchanges.json");
    fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
    Ok(())
 }

-pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
-    let path = get_company_dir(lei).join("available_exchanges.json");
+pub async fn load_available_exchanges(paths: &DataPaths, lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
+    let path = get_company_dir(paths, lei).join("available_exchanges.json");
    if path.exists() {
        let content = fs::read_to_string(&path).await?;
        Ok(serde_json::from_str(&content)?)
@@ -140,13 +161,14 @@ pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<Available
 }

 pub async fn save_prices_by_source(
+    paths: &DataPaths,
    lei: &str,
    source_ticker: &str,
    timeframe: &str,
    prices: Vec<CompanyPrice>,
 ) -> anyhow::Result<()> {
    let source_safe = source_ticker.replace(".", "_").replace("/", "_");
-    let dir = get_company_dir(lei).join(timeframe).join(&source_safe);
+    let dir = get_company_dir(paths, lei).join(timeframe).join(&source_safe);
    fs::create_dir_all(&dir).await?;
    let path = dir.join("prices.json");
    let mut prices = prices;
@@ -156,14 +178,15 @@ pub async fn save_prices_by_source(
 }

 /// Update available_exchanges.json with fetch results
-pub async fn update_available_exchange(
+/*pub async fn update_available_exchange(
+    paths: &DataPaths,
    isin: &str,
    ticker: &str,
    exchange_mic: &str,
    has_daily: bool,
    has_5min: bool,
 ) -> anyhow::Result<()> {
-    let mut exchanges = load_available_exchanges(isin).await?;
+    let mut exchanges = load_available_exchanges(paths, isin).await?;

    if let Some(entry) = exchanges.iter_mut().find(|e| e.ticker == ticker) {
        // Update existing entry
@@ -181,39 +204,8 @@ pub async fn update_available_exchange(
        exchanges.push(new_entry);
    }

-    save_available_exchanges(isin, exchanges).await
-}
-
-/// Add a newly discovered exchange before fetching
-/// 
-/// # Arguments
-/// * `isin` - The ISIN associated with the exchange.
-/// * `figi_info` - The FigiInfo containing ticker, mic_code, and currency.
-/// 
-/// # Returns
-/// Ok(()) on success.
-/// 
-/// # Errors
-/// Returns an error if loading or saving available exchanges fails.
-pub async fn add_discovered_exchange(
-    isin: &str,
-    figi_info: &FigiInfo,
-) -> anyhow::Result<()> {
-    let mut exchanges = load_available_exchanges(isin).await?;
-    
-    // Only add if not already present
-    if !exchanges.iter().any(|e| e.ticker == figi_info.ticker && e.exchange_mic == figi_info.mic_code) {
-        let new_entry = AvailableExchange::new(
-            figi_info.ticker.clone(),
-            figi_info.mic_code.clone(),
-            figi_info.currency.clone(),
-        );
-        exchanges.push(new_entry);
-        save_available_exchanges(isin, exchanges).await?;
-    }
-    
-    Ok(())
-}
+    save_available_exchanges(paths, isin, exchanges).await
+}*/

 /// Infer currency from ticker suffix
 fn infer_currency_from_ticker(ticker: &str) -> String {
@@ -234,4 +226,42 @@ fn infer_currency_from_ticker(ticker: &str) -> String {
    if ticker.ends_with(".BO") || ticker.ends_with(".NS") { return "INR".to_string(); }
    
    "USD".to_string() // Default
+}
+
+/// Saves companies data to a JSONL file.
+///
+/// # Arguments
+/// * `paths` - Reference to DataPaths for directory management
+/// * `companies` - HashMap of company names to their securities (ISIN, Ticker pairs)
+///
+/// # Errors
+/// Returns an error if file operations or serialization fails.
+pub async fn save_companies_to_jsonl(
+    paths: &DataPaths,
+    companies: &HashMap<String, HashMap<String, String>>,
+) -> anyhow::Result<()> {
+    let file_path = paths.data_dir().join("companies.jsonl");
+    
+    logger::log_info(&format!("Corporate Storage: Saving {} companies to JSONL", companies.len())).await;
+    
+    // Create parent directory if it doesn't exist
+    if let Some(parent) = file_path.parent() {
+        tokio::fs::create_dir_all(parent).await?;
+    }
+    
+    let mut file = tokio::fs::File::create(&file_path).await?;
+    
+    for (name, securities) in companies.iter() {
+        let line = serde_json::json!({
+            "name": name,
+            "securities": securities
+        });
+        file.write_all(line.to_string().as_bytes()).await?;
+        file.write_all(b"\n").await?;
+    }
+    
+    let msg = format!("✓ Saved {} companies to {:?}", companies.len(), file_path);
+    println!("{}", msg);
+    logger::log_info(&msg).await;
+    Ok(())
 }
--- a/src/corporate/types.rs
+++ b/src/corporate/types.rs
@@ -1,6 +1,5 @@
-use std::collections::HashMap;
-
 // src/corporate/types.rs
+use std::collections::HashMap;
 use serde::{Deserialize, Serialize};

 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
@@ -53,24 +52,19 @@ pub struct FigiInfo {
    pub figi: String,
    pub name: String,
    pub ticker: String,
-    pub mic_code: String,
-    pub currency: String,
-    pub compositeFIGI: String,
-    pub securityType: String,
-    pub marketSector: String,
-    pub shareClassFIGI: String,
-    pub securityType2: String,
-    pub securityDescription: String,
-}
-
-/// Company Meta Data
-/// # Attributes
-/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
-/// * figi: metadata with ISIN as key
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyMetadata {
-    pub lei: String,
-    pub figi: Option<Vec<FigiInfo>>,
+    pub exch_code: String,
+    #[serde(rename = "compositeFIGI")]
+    pub composite_figi: String,
+    #[serde(rename = "securityType")]
+    pub security_type: String,
+    #[serde(rename = "marketSector")]
+    pub market_sector: String,
+    #[serde(rename = "shareClassFIGI")]
+    pub share_class_figi: String,
+    #[serde(rename = "securityType2")]
+    pub security_type2: String,
+    #[serde(rename = "securityDescription")]
+    pub security_description: String,
 }

 /// Company Info
@@ -85,6 +79,15 @@ pub struct CompanyInfo{
    pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
 }

+/// Company Meta Data
+/// # Attributes
+/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
+/// * figi: metadata with ISIN as key
+/*#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CompanyMetadata {
+    pub lei: String,
+    pub figi: Option<Vec<FigiInfo>>,
+}*/

 /// Warrant Info
 /// 
@@ -115,13 +118,13 @@ pub struct OptionInfo {
    pub options: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
+/*#[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct PrimaryInfo {
    pub isin: String,
    pub name: String,
    pub exchange_mic: String,
    pub currency: String,
-}
+}*/

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AvailableExchange {
@@ -136,28 +139,4 @@ pub struct AvailableExchange {
    pub discovered_at: Option<String>,   // When this exchange was first discovered
    #[serde(default)]
    pub fetch_count: u32,                // How many times successfully fetched
-}
-
-impl AvailableExchange {
-    pub fn new(ticker: String, exchange_mic: String, currency: String) -> Self {
-        Self {
-            exchange_mic,
-            ticker,
-            has_daily: false,
-            has_5min: false,
-            last_successful_fetch: None,
-            currency,
-            discovered_at: Some(chrono::Local::now().format("%Y-%m-%d").to_string()),
-            fetch_count: 0,
-        }
-    }
-    
-    pub fn record_success(&mut self, has_daily: bool, has_5min: bool) {
-        let today = chrono::Local::now().format("%Y-%m-%d").to_string();
-        
-        self.has_daily |= has_daily;
-        self.has_5min |= has_5min;
-        self.last_successful_fetch = Some(today);
-        self.fetch_count += 1;
-    }
 }
--- a/src/corporate/update.rs
+++ b/src/corporate/update.rs
@@ -1,6 +1,8 @@
 // src/corporate/update.rs
 use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
 use crate::config::Config;
+use crate::util::directories::DataPaths;
+use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;

 use chrono::Local;
@@ -24,50 +26,109 @@ use std::sync::Arc;
 /// # Errors
 /// Returns an error if any step in the update process fails.
 pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
-    println!("=== Starting LEI-based corporate full update ===");
+    let msg = "=== Starting LEI-based corporate full update ===";
+    println!("{}", msg);
+    logger::log_info(msg).await;
+
+    // Initialize paths
+    let paths = DataPaths::new(".")?;

    // 1. Load fresh GLEIF ISIN ↔ LEI mapping
+    logger::log_info("Corporate Update: Loading GLEIF ISIN ↔ LEI mapping...").await;
    let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
-        Ok(map) => map,
+        Ok(map) => {
+            let msg = format!("Corporate Update: Loaded GLEIF mapping with {} LEI entries", map.len());
+            println!("{}", msg);
+            logger::log_info(&msg).await;
+            map
+        }
        Err(e) => {
-            eprintln!("Warning: Could not load GLEIF ISIN↔LEI mapping: {}", e);
+            let msg = format!("Corporate Update: Warning - Could not load GLEIF ISIN↔LEI mapping: {}", e);
+            eprintln!("{}", msg);
+            logger::log_warn(&msg).await;
            HashMap::new()
        }
    };

    // 2. Load OpenFIGI mapping value lists (cached)
+    logger::log_info("Corporate Update: Loading OpenFIGI type lists...").await;
    if let Err(e) = load_figi_type_lists().await {
-        eprintln!("Warning: Could not load OpenFIGI type lists: {}", e);
+        let msg = format!("Corporate Update: Warning - Could not load OpenFIGI type lists: {}", e);
+        eprintln!("{}", msg);
+        logger::log_warn(&msg).await;
    }
+    logger::log_info("Corporate Update: OpenFIGI type lists loaded").await;

    // 3. Build FIGI → LEI map
-    // # Attributes
-    // * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
-    // * figi: metadata with ISIN as key
-    let figi_to_lei:HashMap<String, Vec<FigiInfo>> = match build_lei_to_figi_infos(&lei_to_isins).await {
-        Ok(map) => map,
+    logger::log_info("Corporate Update: Building FIGI → LEI map...").await;
+    let figi_to_lei:HashMap<String, Vec<FigiInfo>> = match build_lei_to_figi_infos(&lei_to_isins, None).await {
+        Ok(map) => {
+            let msg = format!("Corporate Update: Built FIGI map with {} entries", map.len());
+            println!("{}", msg);
+            logger::log_info(&msg).await;
+            map
+        }
        Err(e) => {
-            eprintln!("Warning: Could not build FIGI→LEI map: {}", e);
+            let msg = format!("Corporate Update: Warning - Could not build FIGI→LEI map: {}", e);
+            eprintln!("{}", msg);
+            logger::log_warn(&msg).await;
            HashMap::new()
        }
    };

    // 4. Load or build companies
-    let mut companies = load_or_build_all_securities(&figi_to_lei).await?;
-    println!("Processing {} companies", companies.0.len());
+    logger::log_info("Corporate Update: Loading/building company securities...").await;
+    let securities = load_or_build_all_securities(&figi_to_lei).await?;
+    let msg = format!("Corporate Update: Processing {} companies", securities.0.len());
+    println!("{}", msg);
+    logger::log_info(&msg).await;
+
+    // HashMap<Name, HashMap<ISIN, Ticker>> - unique pairs only
+    let companies: HashMap<String, HashMap<String, String>> = securities.0
+        .iter()
+        .fold(HashMap::new(), |mut acc, security| {
+            let mut isin_ticker_pairs: HashMap<String, String> = HashMap::new();
+            
+            // Collect all unique ISIN-Ticker pairs
+            for figi_infos in security.1.securities.values() {
+                for figi_info in figi_infos {
+                    if !figi_info.isin.is_empty() && !figi_info.ticker.is_empty() {
+                        isin_ticker_pairs.insert(figi_info.isin.clone(), figi_info.ticker.clone());
+                    }
+                }
+            }
+            
+            // Only add if there are pairs
+            if !isin_ticker_pairs.is_empty() {
+                acc.insert(security.1.name.clone(), isin_ticker_pairs);
+            }
+            acc
+        });
+    
+    logger::log_info(&format!("Corporate Update: Saving {} companies to JSONL", companies.len())).await;
+    save_companies_to_jsonl(&paths, &companies).await.expect("Failed to save companies List.");
+    logger::log_info("Corporate Update: Companies saved successfully").await;

    // 5. Load existing earnings events (for change detection)
-    let today = Local::now().format("%Y-%m-%d").to_string();
-    let mut existing_events = match load_existing_events().await {
-        Ok(events) => events,
+    logger::log_info("Corporate Update: Loading existing events...").await;
+    let existing_events = match load_existing_events(&paths).await {
+        Ok(events) => {
+            let msg = format!("Corporate Update: Loaded {} existing events", events.len());
+            println!("{}", msg);
+            logger::log_info(&msg).await;
+            events
+        }
        Err(e) => {
-            eprintln!("Warning: Could not load existing events: {}", e);
+            let msg = format!("Corporate Update: Warning - Could not load existing events: {}", e);
+            eprintln!("{}", msg);
+            logger::log_warn(&msg).await;
            HashMap::new()
        }
    };

    // 5. Use the provided pool (no need to create a new one)
    let pool_size = pool.get_number_of_instances(); // Use the size from the shared pool
+    logger::log_info(&format!("Corporate Update: Using pool size: {}", pool_size)).await;

    // Process companies in parallel using the shared pool
    /*let results: Vec<_> = stream::iter(companies.into_iter())
@@ -88,10 +149,14 @@ pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> a
        }
    }*/

-    save_optimized_events(existing_events).await?;
+    logger::log_info(&format!("Corporate Update: Saving {} events to optimized storage", existing_events.len())).await;
+    save_optimized_events(&paths, existing_events).await?;
+    logger::log_info("Corporate Update: Events saved successfully").await;
    //save_changes(&all_changes).await?;

-    //println!("Corporate update complete — {} changes detected", all_changes.len());
+    let msg = "✓ Corporate update complete";
+    println!("{}", msg);
+    logger::log_info(msg).await;
    Ok(())
 }

--- a/src/economic/scraper.rs
+++ b/src/economic/scraper.rs
@@ -7,39 +7,10 @@ const EXTRACTION_JS: &str = include_str!("extraction_script.js");

 pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
    client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
-    //dismiss_overlays(client).await?;

-    /*if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
-        tab.click().await?;
-        println!("High importance tab selected");
-        sleep(Duration::from_secs(2)).await;
-    }*/
    Ok(())
 }

-/*pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
-    for _ in 0..10 {
-        let removed: bool = client
-            .execute(
-                r#"(() => {
-                    const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
-                    if (iframe && iframe.parentNode) {
-                        iframe.parentNode.removeChild(iframe);
-                        return true;
-                    }
-                    return false;
-                })()"#,
-                vec![],
-            )
-            .await?
-            .as_bool()
-            .unwrap_or(false);
-        if removed { break; }
-        sleep(Duration::from_millis(500)).await;
-    }
-    Ok(())
-}*/
-
 pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
    let script = format!(
        r#"
--- a/src/economic/storage.rs
+++ b/src/economic/storage.rs
@@ -1,12 +1,14 @@
 // src/economic/storage.rs
 use super::types::*;
 use super::helpers::*;
+use crate::util::directories::DataPaths;
+use crate::util::logger;
 use tokio::fs;
 use chrono::{NaiveDate, Datelike};
 use std::collections::HashMap;

-pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
-    let dir = std::path::Path::new("data/economic/events");
+pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
+    let dir = paths.economic_events_dir();
    let mut chunks = Vec::new();

    if dir.exists() {
@@ -29,6 +31,7 @@ pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
        }
    }
    chunks.sort_by_key(|c| c.start_date.clone());
+    logger::log_info(&format!("Economic Storage: Scanned {} event chunks", chunks.len())).await;
    Ok(chunks)
 }

@@ -41,25 +44,28 @@ pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMa
            map.insert(event_key(&e), e);
        }
    }
+    logger::log_info(&format!("Economic Storage: Loaded {} events from {} chunks", map.len(), chunks.len())).await;
    Ok(map)
 }

-pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
-    let dir = std::path::Path::new("data/economic/events");
+pub async fn save_optimized_chunks(paths: &DataPaths, events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
+    let dir = paths.economic_events_dir();
    fs::create_dir_all(dir).await?;

-    // Delete all old chunk files to prevent duplicates and overlaps
-    println!("Removing old chunks...");
+    logger::log_info("Economic Storage: Removing old chunk files...").await;

    let mut entries = fs::read_dir(dir).await?;
+    let mut removed_count = 0;
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
            if name.starts_with("chunk_") && path.extension().map(|e| e == "json").unwrap_or(false) {
                fs::remove_file(&path).await?;
+                removed_count += 1;
            }
        }
    }
+    logger::log_info(&format!("Economic Storage: Removed {} old chunk files", removed_count)).await;

    let mut sorted: Vec<_> = events.into_values().collect();
    sorted.sort_by_key(|e| e.date.clone());
@@ -77,6 +83,7 @@ pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> an
    if !chunk.is_empty() {
        save_chunk(&chunk, dir).await?;
    }
+    logger::log_info(&format!("Economic Storage: Saved all event chunks to {:?}", dir)).await;
    Ok(())
 }

@@ -85,14 +92,20 @@ async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::
    let end = events.iter().map(|e| &e.date).max().unwrap().clone();
    let path = dir.join(format!("chunk_{}_{}.json", start, end));
    fs::write(&path, serde_json::to_string_pretty(events)?).await?;
+    logger::log_info(&format!("Economic Storage: Saved chunk {} - {} ({} events)", start, end, events.len())).await;
    Ok(())
 }

-pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
-    if changes.is_empty() { return Ok(()); }
-    let dir = std::path::Path::new("economic_event_changes");
+pub async fn save_changes(paths: &DataPaths, changes: &[EventChange]) -> anyhow::Result<()> {
+    if changes.is_empty() { 
+        logger::log_info("Economic Storage: No changes to save").await;
+        return Ok(()); 
+    }
+    let dir = paths.economic_changes_dir();
    fs::create_dir_all(dir).await?;

+    logger::log_info(&format!("Economic Storage: Saving {} changes to {:?}", changes.len(), dir)).await;
+
    let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
    for c in changes {
        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
@@ -107,8 +120,10 @@ pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
            let s = fs::read_to_string(&path).await?;
            serde_json::from_str(&s).unwrap_or_default()
        } else { vec![] };
-        all.extend(list);
+        all.extend(list.clone());
        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
+        logger::log_info(&format!("Economic Storage: Saved {} changes for month {}", list.len(), month)).await;
    }
+    logger::log_info("Economic Storage: All changes saved successfully").await;
    Ok(())
 }
--- a/src/economic/update.rs
+++ b/src/economic/update.rs
@@ -1,7 +1,6 @@
 // src/economic/update.rs
 use super::{scraper::*, storage::*, helpers::*, types::*};
-use crate::{config::Config, scraper::webdriver::ScrapeTask};
-use crate::scraper::webdriver::ChromeDriverPool;
+use crate::{config::Config, scraper::webdriver::{ScrapeTask, ChromeDriverPool}, util::directories::DataPaths, util::logger};
 use chrono::{Local};
 use std::sync::Arc;

@@ -14,38 +13,69 @@ use std::sync::Arc;
 /// # Errors
 /// Returns an error if scraping, loading, or saving fails.
 pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
+    let paths = DataPaths::new(".")?;
+
+    logger::log_info("Economic Update: Initializing...").await;
+
    let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
    let end_date = config.target_end_date();

-    let chunks = scan_existing_chunks().await?;
+    logger::log_info(&format!("Economic Update: Scanning existing chunks from {:?}", paths.economic_events_dir())).await;
+    let chunks = scan_existing_chunks(&paths).await?;
    let mut events = load_existing_events(&chunks).await?;
-    println!("Loaded {} events from {} chunks", events.len(), chunks.len());
+    
+    let msg = format!("Economic Update: Loaded {} events from {} chunks", events.len(), chunks.len());
+    println!("{}", msg);
+    logger::log_info(&msg).await;

    let start_date = if events.is_empty() {
+        logger::log_warn("Economic Update: No existing events found, starting from config date").await;
        config.economic_start_date.clone()
    } else if events.values().any(|e| e.date >= today_str) {
+        logger::log_info("Economic Update: Events exist for today, starting from today").await;
        today_str.clone()
    } else {
-        events.values()
+        let next = events.values()
            .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
            .max()
            .and_then(|d| d.succ_opt())
            .map(|d| d.format("%Y-%m-%d").to_string())
-            .unwrap_or(today_str.clone())
+            .unwrap_or(today_str.clone());
+        logger::log_info(&format!("Economic Update: Resuming from: {}", next)).await;
+        next
    };

-    println!("Scraping economic events: {} → {}", start_date, end_date);
+    let msg = format!("Economic Update: Scraping events from {} → {}", start_date, end_date);
+    println!("{}", msg);
+    logger::log_info(&msg).await;

    // Pass the pool to the scraping function
    let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
+    
+    let msg = format!("Economic Update: Scraped {} new events", new_events_all.len());
+    println!("{}", msg);
+    logger::log_info(&msg).await;

    // Process all at once or in batches
    let result = process_batch(&new_events_all, &mut events, &today_str);
    let total_changes = result.changes.len();
-    save_changes(&result.changes).await?;
+    
+    let msg = format!("Economic Update: Detected {} changes", total_changes);
+    println!("{}", msg);
+    logger::log_info(&msg).await;
+    
+    if total_changes > 0 {
+        logger::log_info(&format!("Economic Update: Saving {} changes to log", total_changes)).await;
+        save_changes(&paths, &result.changes).await?;
+        logger::log_info("Economic Update: Changes saved successfully").await;
+    }

-    save_optimized_chunks(events).await?;
-    println!("Economic update complete — {} changes detected", total_changes);
+    logger::log_info(&format!("Economic Update: Saving {} total events to chunks", events.len())).await;
+    save_optimized_chunks(&paths, events).await?;
+    
+    let msg = format!("✓ Economic update complete — {} changes detected", total_changes);
+    println!("{}", msg);
+    logger::log_info(&msg).await;
    Ok(())
 }

--- a/src/lib.rs
+++ b/src/lib.rs
@@ -0,0 +1,8 @@
+// src/lib.rs
+//! Event Backtest Engine - Core Library
+//!
+//! Exposes all public modules for use in examples and tests
+
+pub mod config;
+pub mod scraper;
+pub mod util;
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,18 +1,21 @@
 // src/main.rs
-mod economic;
-mod corporate;
 mod config;
+mod corporate;
+mod economic;
 mod util;
 mod scraper;

 use anyhow::Result;
 use config::Config;
 use scraper::webdriver::ChromeDriverPool;
+use util::directories::DataPaths;
+use util::{logger, opnv};
 use std::sync::Arc;

 /// The entry point of the application.
 ///
 /// This function loads the configuration, initializes a shared ChromeDriver pool,
+/// fetches the latest VPNBook OpenVPN configurations if VPN rotation is enabled,
 /// and sequentially runs the full updates for corporate and economic data.
 /// Sequential execution helps prevent resource exhaustion from concurrent
 /// chromedriver instances and avoids spamming the target websites with too many requests.
@@ -20,8 +23,8 @@ use std::sync::Arc;
 /// # Errors
 ///
 /// Returns an error if configuration loading fails, pool initialization fails,
-/// or if either update function encounters an issue (e.g., network errors,
-/// scraping failures, or chromedriver spawn failures like "program not found").
+/// VPN fetching fails (if enabled), or if either update function encounters an issue
+/// (e.g., network errors, scraping failures, or chromedriver spawn failures like "program not found").
 #[tokio::main]
 async fn main() -> Result<()> {
    let config = Config::load().map_err(|err| {
@@ -29,15 +32,48 @@ async fn main() -> Result<()> {
        err
    })?;

+    // Initialize paths
+    let paths = DataPaths::new(".")?;
+    
+    // Initialize logger
+    logger::init_debug_logger(paths.logs_dir()).await.map_err(|e| {
+        anyhow::anyhow!("Logger initialization failed: {}", e)
+    })?;
+
+    logger::log_info("=== Application started ===").await;
+    logger::log_info(&format!("Config: economic_start_date={}, corporate_start_date={}, lookahead_months={}, max_parallel_instances={}, enable_vpn_rotation={}", 
+        config.economic_start_date, config.corporate_start_date, config.economic_lookahead_months, config.max_parallel_instances, config.enable_vpn_rotation)).await;
+
    // Initialize the shared ChromeDriver pool once
-    let pool_size = config.max_parallel_tasks;
+    let pool_size = config.max_parallel_instances;
+    logger::log_info(&format!("Initializing ChromeDriver pool with size: {}", pool_size)).await;
+    
    let pool = Arc::new(ChromeDriverPool::new(pool_size).await?);
+    logger::log_info("✓ ChromeDriver pool initialized successfully").await;
+
+    // Fetch VPNBook configs if VPN rotation is enabled
+    if config.enable_vpn_rotation {
+        logger::log_info("--- Fetching latest VPNBook OpenVPN configurations ---").await;
+        let (username, password, files) = 
+            util::opnv::fetch_vpnbook_configs(&pool, paths.cache_dir()).await?;
+        logger::log_info(&format!("Fetched VPN username: {}, password: {}", username, password)).await;
+        for file in &files {
+            logger::log_info(&format!("Extracted OVPN: {:?}", file)).await;
+        }
+        // Optionally, store username/password for rotation use (e.g., in a file or global state)
+        // For now, just log them; extend as needed for rotation integration
+    }

    // Run economic update first, passing the shared pool
+    logger::log_info("--- Starting economic data update ---").await;
    economic::run_full_update(&config, &pool).await?;
+    logger::log_info("✓ Economic data update completed").await;

    // Then run corporate update, passing the shared pool
+    logger::log_info("--- Starting corporate data update ---").await;
    corporate::run_full_update(&config, &pool).await?;
+    logger::log_info("✓ Corporate data update completed").await;

+    logger::log_info("=== Application completed successfully ===").await;
    Ok(())
 }
--- a/src/scraper/webdriver.rs
+++ b/src/scraper/webdriver.rs
@@ -3,34 +3,38 @@
 use anyhow::{anyhow, Context, Result};
 use fantoccini::{Client, ClientBuilder};
 use serde_json::{Map, Value};
+use std::pin::Pin;
 use std::process::Stdio;
 use std::sync::Arc;
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::{Child, Command};
 use tokio::sync::{Mutex, Semaphore};
-use tokio::time::{Duration, sleep, timeout};
-use std::pin::Pin;
+use tokio::time::{sleep, timeout, Duration};

 /// Manages a pool of ChromeDriver instances for parallel scraping.
-/// 
+///
 /// This struct maintains multiple ChromeDriver processes and allows controlled
 /// concurrent access via a semaphore. Instances are reused across tasks to avoid
 /// the overhead of spawning new processes.
 pub struct ChromeDriverPool {
    instances: Vec<Arc<Mutex<ChromeInstance>>>,
    semaphore: Arc<Semaphore>,
+    tasks_per_instance: usize,
 }

 impl ChromeDriverPool {
    /// Creates a new pool with the specified number of ChromeDriver instances.
-    /// 
+    ///
    /// # Arguments
    /// * `pool_size` - Number of concurrent ChromeDriver instances to maintain
    pub async fn new(pool_size: usize) -> Result<Self> {
        let mut instances = Vec::with_capacity(pool_size);
-        
-        println!("Initializing ChromeDriver pool with {} instances...", pool_size);
-        
+
+        println!(
+            "Initializing ChromeDriver pool with {} instances...",
+            pool_size
+        );
+
        for i in 0..pool_size {
            match ChromeInstance::new().await {
                Ok(instance) => {
@@ -45,10 +49,11 @@ impl ChromeDriverPool {
                }
            }
        }
-        
+
        Ok(Self {
            instances,
            semaphore: Arc::new(Semaphore::new(pool_size)),
+            tasks_per_instance: 0,
        })
    }

@@ -60,7 +65,10 @@ impl ChromeDriverPool {
        Fut: std::future::Future<Output = Result<T>> + Send + 'static,
    {
        // Acquire semaphore permit
-        let _permit = self.semaphore.acquire().await
+        let _permit = self
+            .semaphore
+            .acquire()
+            .await
            .map_err(|_| anyhow!("Semaphore closed"))?;

        // Find an available instance (round-robin or first available)
@@ -69,7 +77,7 @@ impl ChromeDriverPool {

        // Create a new session for this task
        let client = guard.new_session().await?;
-        
+
        // Release lock while we do the actual scraping
        drop(guard);

@@ -82,8 +90,8 @@ impl ChromeDriverPool {
        Ok(result)
    }

-    pub fn get_number_of_instances (&self) -> usize {
-      self.instances.len()
+    pub fn get_number_of_instances(&self) -> usize {
+        self.instances.len()
    }
 }

@@ -94,7 +102,7 @@ pub struct ChromeInstance {
 }

 impl ChromeInstance {
-/// Creates a new ChromeInstance by spawning chromedriver with random port.
+    /// Creates a new ChromeInstance by spawning chromedriver with random port.
    ///
    /// This spawns `chromedriver --port=0` to avoid port conflicts, reads stdout to extract
    /// the listening address, and waits for the success message. If timeout occurs or
@@ -107,7 +115,7 @@ impl ChromeInstance {
    pub async fn new() -> Result<Self> {
        let mut command = Command::new("chromedriver-win64/chromedriver.exe");
        command
-            .arg("--port=0")  // Use random available port to support pooling
+            .arg("--port=0") // Use random available port to support pooling
            .stdout(Stdio::piped())
            .stderr(Stdio::piped());

@@ -115,13 +123,11 @@ impl ChromeInstance {
            .spawn()
            .context("Failed to spawn chromedriver. Ensure it's installed and in PATH.")?;

-        let mut stdout = BufReader::new(
-            process.stdout.take().context("Failed to capture stdout")?
-        ).lines();
+        let mut stdout =
+            BufReader::new(process.stdout.take().context("Failed to capture stdout")?).lines();

-        let mut stderr = BufReader::new(
-            process.stderr.take().context("Failed to capture stderr")?
-        ).lines();
+        let mut stderr =
+            BufReader::new(process.stderr.take().context("Failed to capture stderr")?).lines();

        let start_time = std::time::Instant::now();
        let mut address: Option<String> = None;
@@ -136,9 +142,7 @@ impl ChromeInstance {

        // Wait for address and success (up to 30s)
        while start_time.elapsed() < Duration::from_secs(30) {
-            if let Ok(Ok(Some(line))) =
-                timeout(Duration::from_secs(1), stdout.next_line()).await
-            {
+            if let Ok(Ok(Some(line))) = timeout(Duration::from_secs(1), stdout.next_line()).await {
                if let Some(addr) = parse_chromedriver_address(&line) {
                    address = Some(addr.to_string());
                }
@@ -200,8 +204,8 @@ impl ChromeInstance {
            }
        });
        args.as_object()
-          .expect("Capabilities should be a JSON object")
-          .clone()
+            .expect("Capabilities should be a JSON object")
+            .clone()
    }
 }

@@ -238,11 +242,13 @@ impl Drop for ChromeInstance {
 }

 /// Simplified task execution - now uses the pool pattern.
-/// 
+///
 /// For backwards compatibility with existing code.
 pub struct ScrapeTask<T> {
    url: String,
-    parse: Box<dyn FnOnce(Client) -> Pin<Box<dyn std::future::Future<Output = Result<T>> + Send>> + Send>,
+    parse: Box<
+        dyn FnOnce(Client) -> Pin<Box<dyn std::future::Future<Output = Result<T>> + Send>> + Send,
+    >,
 }

 impl<T: Send + 'static> ScrapeTask<T> {
@@ -261,9 +267,8 @@ impl<T: Send + 'static> ScrapeTask<T> {
    pub async fn execute_with_pool(self, pool: &ChromeDriverPool) -> Result<T> {
        let url = self.url;
        let parse = self.parse;
-        
-        pool.execute(url, move |client| async move {
-            (parse)(client).await
-        }).await
+
+        pool.execute(url, move |client| async move { (parse)(client).await })
+            .await
    }
-}
+}
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,22 +0,0 @@
-// src/util.rs   (or put it directly in main.rs if you prefer)
-use tokio::fs;
-use std::path::Path;
-
-/// Create the required data folders if they do not exist yet.
-pub async fn _ensure_data_dirs() -> anyhow::Result<()> {
-    let dirs = [
-        "economic_events",
-        "economic_event_changes",
-        "corporate_events",
-        "corporate_prices",
-        "data",
-    ];
-    for dir in dirs {
-        let path = Path::new(dir);
-        if !path.exists() {
-            tokio::fs::create_dir_all(path).await?;
-            println!("Created directory: {dir}");
-        }
-    }
-    Ok(())
-}
--- a/src/util/directories.rs
+++ b/src/util/directories.rs
@@ -0,0 +1,171 @@
+use std::path::{Path, PathBuf};
+use std::fs;
+
+use crate::util::opnv;
+
+/// Central configuration for all data paths
+pub struct DataPaths {
+  base_dir: PathBuf,
+  data_dir: PathBuf,
+  cache_dir: PathBuf,
+  logs_dir: PathBuf,
+  // Cache data subdirectories
+  cache_gleif_dir: PathBuf,
+  cache_openfigi_dir: PathBuf,
+  cache_gleif_openfigi_map_dir: PathBuf,
+  cache_openvpn_dir: PathBuf,
+  // Economic data subdirectories
+  economic_events_dir: PathBuf,
+  economic_changes_dir: PathBuf,
+  // Corporate data subdirectories
+  corporate_events_dir: PathBuf,
+  corporate_changes_dir: PathBuf,
+  corporate_prices_dir: PathBuf,
+}
+
+impl DataPaths {
+  /// Initialize paths from a base directory
+  pub fn new(base_dir: impl AsRef<Path>) -> std::io::Result<Self> {
+    let base_dir = base_dir.as_ref().to_path_buf();
+    
+    let data_dir = base_dir.join("data");
+    let cache_dir = base_dir.join("cache");
+    let logs_dir = base_dir.join("logs");
+    
+    // Cache subdirectories
+    let cache_gleif_dir = cache_dir.join("gleif");
+    let cache_openfigi_dir = cache_dir.join("openfigi");
+    let cache_gleif_openfigi_map_dir = cache_dir.join("glei_openfigi");
+    let cache_openvpn_dir = cache_dir.join("openvpn");
+
+    // Economic subdirectories
+    let economic_events_dir = data_dir.join("economic").join("events");
+    let economic_changes_dir = economic_events_dir.join("changes");
+    
+    // Corporate subdirectories
+    let corporate_dir = data_dir.join("corporate");
+    let corporate_events_dir = corporate_dir.join("events");
+    let corporate_changes_dir = corporate_events_dir.join("changes");
+    let corporate_prices_dir = corporate_dir.join("prices");
+    
+    // Create all directories if they don't exist
+    fs::create_dir_all(&data_dir)?;
+    fs::create_dir_all(&cache_dir)?;
+    fs::create_dir_all(&logs_dir)?;
+    fs::create_dir_all(&cache_gleif_dir)?;
+    fs::create_dir_all(&cache_openfigi_dir)?;
+    fs::create_dir_all(&cache_gleif_openfigi_map_dir)?;
+    fs::create_dir_all(&cache_openvpn_dir)?;
+    fs::create_dir_all(&economic_events_dir)?;
+    fs::create_dir_all(&economic_changes_dir)?;
+    fs::create_dir_all(&corporate_events_dir)?;
+    fs::create_dir_all(&corporate_changes_dir)?;
+    fs::create_dir_all(&corporate_prices_dir)?;
+    
+    Ok(Self {
+      base_dir,
+      data_dir,
+      cache_dir,
+      logs_dir,
+      cache_gleif_dir,
+      cache_openfigi_dir,
+      cache_gleif_openfigi_map_dir,
+      cache_openvpn_dir,
+      economic_events_dir,
+      economic_changes_dir,
+      corporate_events_dir,
+      corporate_changes_dir,
+      corporate_prices_dir,
+    })
+  }
+  
+  pub fn base_dir(&self) -> &Path {
+    &self.base_dir
+  }
+  
+  pub fn data_dir(&self) -> &Path {
+    &self.data_dir
+  }
+  
+  pub fn cache_dir(&self) -> &Path {
+    &self.cache_dir
+  }
+  
+  pub fn logs_dir(&self) -> &Path {
+    &self.logs_dir
+  }
+  
+  pub fn cache_gleif_dir(&self) -> &Path {
+    &self.cache_gleif_dir
+  }
+
+  pub fn cache_openfigi_dir(&self) -> &Path {
+    &self.cache_openfigi_dir
+  }
+
+  pub fn cache_gleif_openfigi_map_dir(&self) -> &Path {
+    &self.cache_gleif_openfigi_map_dir
+  }
+
+  pub fn cache_openvpn_dir(&self) -> &Path {
+    &self.cache_openvpn_dir
+  }
+
+  /// Get the economic events directory
+  pub fn economic_events_dir(&self) -> &Path {
+    &self.economic_events_dir
+  }
+  
+  /// Get the economic changes directory
+  pub fn economic_changes_dir(&self) -> &Path {
+    &self.economic_changes_dir
+  }
+  
+  /// Get the corporate events directory
+  pub fn corporate_events_dir(&self) -> &Path {
+    &self.corporate_events_dir
+  }
+  
+  /// Get the corporate changes directory
+  pub fn corporate_changes_dir(&self) -> &Path {
+    &self.corporate_changes_dir
+  }
+  
+  /// Get the corporate prices directory
+  pub fn corporate_prices_dir(&self) -> &Path {
+    &self.corporate_prices_dir
+  }
+  
+  /// Get a specific file path within data directory
+  pub fn data_file(&self, filename: &str) -> PathBuf {
+    self.data_dir.join(filename)
+  }
+  
+  /// Get a specific file path within cache directory
+  pub fn cache_file(&self, filename: &str) -> PathBuf {
+    self.cache_dir.join(filename)
+  }
+  
+  /// Get a specific file path within logs directory
+  pub fn log_file(&self, filename: &str) -> PathBuf {
+    self.logs_dir.join(filename)
+  }
+}
+
+#[cfg(test)]
+mod tests {
+  use super::*;
+  
+  #[test]
+  fn test_paths_creation() {
+    let paths = DataPaths::new("./test_base").unwrap();
+    assert!(paths.data_dir().exists());
+    assert!(paths.cache_dir().exists());
+    assert!(paths.logs_dir().exists());
+    assert!(paths.economic_events_dir().exists());
+    assert!(paths.economic_changes_dir().exists());
+    assert!(paths.corporate_events_dir().exists());
+    assert!(paths.corporate_changes_dir().exists());
+    assert!(paths.corporate_prices_dir().exists());
+  }
+}
--- a/src/util/logger.rs
+++ b/src/util/logger.rs
@@ -0,0 +1,78 @@
+// src/util/logger.rs
+use chrono::Local;
+use once_cell::sync::Lazy;
+use tokio::sync::Mutex;
+use std::fs::{self, OpenOptions};
+use std::io::Write;
+use std::path::PathBuf;
+
+static LOGGER: Lazy<Mutex<Option<DebugLogger>>> = Lazy::new(|| Mutex::new(None));
+
+pub struct DebugLogger {
+    file: std::fs::File,
+    log_path: PathBuf,
+}
+
+impl DebugLogger {
+    fn new(log_dir: &std::path::Path) -> std::io::Result<Self> {
+
+        fs::create_dir_all(log_dir)?;
+        let filename = format!("backtest_{}.log", Local::now().format("%Y%m%d_%H%M%S"));
+        let log_path = log_dir.join(&filename);
+        let file = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(&log_path)?;
+        Ok(Self { file, log_path })
+    }
+
+    async fn log(&mut self, msg: &str) {
+        let line = format!("[{}] {}\n", Local::now().format("%H:%M:%S"), msg);
+        let _ = self.file.write_all(line.as_bytes());
+        let _ = self.file.flush();
+        println!("{}", line.trim_end());
+    }
+}
+
+pub async fn init_debug_logger(log_dir: &std::path::Path) -> Result<(), String> {
+    let mut logger = LOGGER.lock().await;
+    match DebugLogger::new(log_dir) {
+        Ok(l) => {
+            let log_path = l.log_path.clone();
+            *logger = Some(l);
+            println!("✓ Logger initialized at: {:?}", log_path);
+            Ok(())
+        }
+        Err(e) => {
+            let err_msg = format!("Failed to initialize logger: {}", e);
+            eprintln!("{}", err_msg);
+            Err(err_msg)
+        }
+    }
+}
+
+pub async fn log_message(msg: &str) {
+    let mut logger = LOGGER.lock().await;
+    if let Some(l) = logger.as_mut() {
+        l.log(msg).await;
+    } else {
+        println!("[LOG] {}", msg);
+    }
+}
+
+pub async fn log_detailed(level: &str, msg: &str) {
+    let formatted = format!("[{}] {}", level, msg);
+    log_message(&formatted).await;
+}
+
+pub async fn log_info(msg: &str) {
+    log_detailed("INFO", msg).await;
+}
+
+pub async fn log_warn(msg: &str) {
+    log_detailed("WARN", msg).await;
+}
+
+pub async fn log_error(msg: &str) {
+    log_detailed("ERROR", msg).await;
+}
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -0,0 +1,4 @@
+// src/util/mod.rs
+pub mod logger;
+pub mod directories;
+pub mod opnv;
--- a/src/util/opnv.rs
+++ b/src/util/opnv.rs
@@ -0,0 +1,281 @@
+// src/scraper/opnv.rs
+
+//! Module for fetching, downloading, and extracting OpenVPN configurations from VPNBook.
+//!
+//! This module provides functionality to scrape the VPNBook free VPN page using
+//! a headless browser, handle potential consent popups, extract current credentials,
+//! collect download URLs for OpenVPN ZIP files, download them, and then extract
+//! the .ovpn files into a structured directory: cache/openvpn/<hostname>/<ovpn_filename>.
+//! It is designed to fetch the most recent data on every run, as credentials and
+//! server configurations change periodically.
+
+use anyhow::{anyhow, Context, Result};
+use fantoccini::{Client, Locator};
+use reqwest;
+use std::io::{Read};
+use std::path::{Path, PathBuf};
+use tokio::fs::File;
+use tokio::io::AsyncWriteExt;
+use url::Url;
+use zip::ZipArchive;
+use crate::scraper::webdriver::{ChromeDriverPool, ScrapeTask};
+use crate::util::{directories::DataPaths};
+
+/// Fetches, downloads, and extracts the latest OpenVPN configurations from VPNBook.
+///
+/// This asynchronous function uses the provided `ChromeDriverPool` to scrape the
+/// VPNBook free VPN page. It dismisses any consent popup if present, extracts the
+/// current username and password, collects all OpenVPN ZIP download URLs, downloads
+/// the ZIP files temporarily, extracts the .ovpn files into the specified directory
+/// structure under `cache_dir`/openvpn/<hostname>/, and cleans up the ZIP files.
+///
+/// The directory structure is: cache/openvpn/<hostname>/<ovpn_filename>, where
+/// <hostname> is derived from the ZIP filename (e.g., "ca149.vpnbook.com").
+///
+/// The function ensures fresh data is fetched each time it runs, making it suitable
+/// for periodic updates where credentials may change.
+///
+/// # Arguments
+///
+/// * `pool` - A reference to the `ChromeDriverPool` for managing browser instances.
+/// * `cache_dir` - The path to the base cache directory. The OpenVPN files will be saved
+///   under `cache_dir`/openvpn/<hostname>/.
+///
+/// # Returns
+///
+/// A `Result` containing a tuple with:
+/// - `String`: The scraped username.
+/// - `String`: The scraped password.
+/// - `Vec<PathBuf>`: Paths to the extracted .ovpn files.
+///
+/// # Errors
+///
+/// Returns an `anyhow::Error` if:
+/// - Navigation to the page fails.
+/// - The consent popup cannot be dismissed (if present).
+/// - Credentials cannot be parsed from the page.
+/// - Download URLs cannot be found or are invalid.
+/// - HTTP downloads fail or file writing errors occur.
+/// - ZIP extraction fails (e.g., invalid ZIP or I/O errors).
+///
+/// # Dependencies
+///
+/// This function requires the following crates (add to Cargo.toml if not present):
+/// - `anyhow` for error handling.
+/// - `fantoccini` for browser automation.
+/// - `reqwest` (with `tokio` features) for HTTP downloads.
+/// - `tokio` for asynchronous file operations.
+/// - `url` for URL manipulation.
+/// - `zip` for ZIP extraction.
+///
+/// # Examples
+///
+/// ```no_run
+/// use anyhow::Result;
+/// use event_backtest_engine::scraper::opnv::fetch_vpnbook_configs;
+/// use event_backtest_engine::scraper::webdriver::ChromeDriverPool;
+/// use std::path::Path;
+///
+/// #[tokio::main]
+/// async fn main() -> Result<()> {
+///     let pool = ChromeDriverPool::new(1).await?;
+///     let (username, password, files) =
+///         fetch_vpnbook_configs(&pool, Path::new("./cache")).await?;
+///     println!("Username: {}, Password: {}", username, password);
+///     for file in files {
+///         println!("Extracted: {:?}", file);
+///     }
+///     Ok(())
+/// }
+/// ```
+pub async fn fetch_vpnbook_configs(
+    pool: &ChromeDriverPool,
+    cache_dir: &Path,
+) -> Result<(String, String, Vec<PathBuf>)> {
+    // Prepare the openvpn directory
+    let dir = DataPaths::new(".")?;
+    let vpn_dir = dir.cache_openvpn_dir();
+    tokio::fs::create_dir_all(&vpn_dir)
+        .await
+        .context("Failed to create openvpn directory")?;
+
+    // Temporary directory for ZIP downloads (under cache for consistency)
+    let temp_dir = cache_dir.join("temp_vpn_zips");
+    tokio::fs::create_dir_all(&temp_dir)
+        .await
+        .context("Failed to create temp directory")?;
+
+    let url = "https://www.vpnbook.com/freevpn".to_string();
+
+    // Define the scraping task
+    let task = ScrapeTask::new(url, |client: Client| async move {
+        // Attempt to dismiss consent popup if present
+        let consent_selector = r#"body > div.fc-consent-root > div.fc-dialog-container > div.fc-dialog.fc-choice-dialog > div.fc-footer-buttons-container > div.fc-footer-buttons > button.fc-button.fc-cta-do-not-consent.fc-secondary-button > p"#;
+        if let Ok(consent_elem) = client.find(Locator::Css(consent_selector)).await {
+            consent_elem
+                .click()
+                .await
+                .context("Failed to click consent dismissal button")?;
+            // Brief delay to allow popup to close
+            tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+        }
+
+        // Find all <code> elements
+        let codes = client
+            .find_all(Locator::Css("code"))
+            .await
+            .context("Failed to find code elements")?;
+
+        if codes.len() < 2 {
+            return Err(anyhow!("Insufficient code elements found for credentials"));
+        }
+
+        // The first <code> is username, second is password
+        let username = codes[0]
+            .text()
+            .await
+            .context("Failed to get username text")?;
+
+        let password = codes[1]
+            .text()
+            .await
+            .context("Failed to get password text")?;
+
+        // Locate all download links for OpenVPN ZIP files
+        let links = client
+            .find_all(Locator::Css(r#"a[href^="/free-openvpn-account/"][download=""]"#))
+            .await
+            .context("Failed to find download links")?;
+
+        // Collect relative hrefs
+        let mut rel_urls = Vec::new();
+        for link in links {
+            if let Some(href) = link.attr("href").await.context("Failed to get href attribute")? {
+                rel_urls.push(href);
+            }
+        }
+
+        Ok((username, password, rel_urls))
+    });
+
+    // Execute the scraping task using the pool
+    let (username, password, rel_urls) = task.execute_with_pool(pool).await?;
+
+    // Base URL for resolving relative paths
+    let base_url = Url::parse("https://www.vpnbook.com/")?;
+
+    // Download each ZIP file to temp_dir
+    let mut zip_paths = Vec::new();
+    for rel in &rel_urls {
+        let full_url = base_url.join(rel).context("Failed to join URL")?;
+        let filename = rel
+            .split('/')
+            .last()
+            .ok_or_else(|| anyhow!("Invalid filename in URL"))?
+            .to_string();
+        let out_path = temp_dir.join(&filename);
+
+        // Perform HTTP GET request
+        let resp = reqwest::get(full_url.clone())
+            .await
+            .with_context(|| format!("Failed to send download request for {}", full_url))?;
+
+        if resp.status().is_success() {
+            let bytes = resp
+                .bytes()
+                .await
+                .context("Failed to read response bytes")?;
+
+            // Write to file asynchronously
+            let mut file = File::create(&out_path)
+                .await
+                .context("Failed to create output file")?;
+            file.write_all(&bytes)
+                .await
+                .context("Failed to write to file")?;
+
+            zip_paths.push(out_path);
+        } else {
+            return Err(anyhow!(
+                "Download failed with status: {} for URL: {}",
+                resp.status(),
+                full_url
+            ));
+        }
+    }
+
+    // Now extract .ovpn files from each ZIP
+    let mut extracted_paths = Vec::new();
+    for zip_path in zip_paths {
+        let hostname = get_hostname_from_zip_filename(
+            zip_path.file_name().unwrap().to_str().unwrap(),
+        );
+        let hostname_dir = vpn_dir.join(&hostname);
+        tokio::fs::create_dir_all(&hostname_dir)
+            .await
+            .context("Failed to create hostname directory")?;
+
+        // Use spawn_blocking for sync ZIP operations
+        let zip_path_clone = zip_path.clone();
+        let hostname_dir_clone = hostname_dir.clone();
+        let extract_result = tokio::task::spawn_blocking(move || {
+            let file = std::fs::File::open(&zip_path_clone)
+                .with_context(|| format!("Failed to open ZIP file: {:?}", zip_path_clone))?;
+            let mut archive = ZipArchive::new(file)
+                .with_context(|| format!("Failed to read ZIP archive: {:?}", zip_path_clone))?;
+
+            let mut paths = Vec::new();
+            for i in 0..archive.len() {
+                let mut zip_file = archive.by_index(i)?;
+                if zip_file.name().ends_with(".ovpn") {
+                    // Get just the filename, stripping any path
+                    let file_name = Path::new(zip_file.name()).file_name()
+                        .ok_or_else(|| anyhow!("Invalid file name in ZIP: {}", zip_file.name()))?
+                        .to_str()
+                        .ok_or_else(|| anyhow!("Invalid UTF-8 in file name: {}", zip_file.name()))?
+                        .to_string();
+                    let target_path = hostname_dir_clone.join(file_name);
+                    let mut content = Vec::new();
+                    zip_file.read_to_end(&mut content)?;
+
+                    std::fs::write(&target_path, &content)
+                        .with_context(|| format!("Failed to write .ovpn file: {:?}", target_path))?;
+                    paths.push(target_path);
+                }
+            }
+            Ok::<Vec<PathBuf>, anyhow::Error>(paths)
+        })
+        .await
+        .context("Spawn blocking failed")??;
+
+        extracted_paths.extend(extract_result);
+
+        // Clean up the ZIP file after extraction
+        tokio::fs::remove_file(&zip_path)
+            .await
+            .context("Failed to remove temp ZIP file")?;
+    }
+
+    // Optional: Clean up temp_dir if empty
+    let _ = tokio::fs::remove_dir(&temp_dir).await;
+
+    Ok((username, password, extracted_paths))
+}
+
+/// Derives the hostname from the ZIP filename.
+///
+/// For example, "vpnbook-openvpn-ca149.zip" -> "ca149.vpnbook.com"
+///
+/// If the format doesn't match, returns "unknown.vpnbook.com".
+fn get_hostname_from_zip_filename(filename: &str) -> String {
+    if filename.starts_with("vpnbook-openvpn-") && filename.ends_with(".zip") {
+        let code = filename
+            .strip_prefix("vpnbook-openvpn-")
+            .unwrap()
+            .strip_suffix(".zip")
+            .unwrap();
+        format!("{}.vpnbook.com", code)
+    } else {
+        "unknown.vpnbook.com".to_string()
+    }
+}
Author	SHA1	Message	Date
donpat1to	c9da56e8e9	removed unused functions	2025-12-09 23:27:51 +01:00
donpat1to	dde859b071	removed unused imports	2025-12-09 23:27:14 +01:00
donpat1to	2416947e9d	getting vpn ovpn data	2025-12-09 23:24:51 +01:00
donpat1to	3ab5d0dcc3	added fetching openvpn packages	2025-12-09 19:51:11 +01:00
donpat1to	c2408d9a56	added gettin opnv setup files	2025-12-09 19:23:54 +01:00
donpat1to	f95e9e2427	commented unused function	2025-12-09 16:56:45 +01:00
donpat1to	c00bfd8687	removing not map-able LEIs	2025-12-07 17:38:32 +01:00
donpat1to	0f89c8c0ce	updated cache saving	2025-12-07 14:49:25 +01:00
donpat1to	a6823dc938	moved data capturing into cache folder	2025-12-05 22:32:42 +01:00
donpat1to	58a498e694	added logging	2025-12-05 21:20:12 +01:00
donpat1to	f7083bf9f0	changed noting	2025-12-04 23:55:52 +01:00
donpat1to	f05df0b5ee	updated .gitignore	2025-12-04 21:08:36 +01:00