added companie mapping with yahoo tickers

2025-12-14 16:48:02 +01:00
parent 00c9d45642
commit d744769138
12 changed files with 1507 additions and 2591 deletions
--- a/src/corporate/mod.rs
+++ b/src/corporate/mod.rs
@@ -7,5 +7,6 @@ pub mod helpers;
 pub mod aggregation;
 pub mod fx;
 pub mod openfigi;
+pub mod yahoo;

 pub use update::run_full_update;
--- a/src/corporate/openfigi.rs
+++ b/src/corporate/openfigi.rs
--- a/src/corporate/scraper.rs
+++ b/src/corporate/scraper.rs
@@ -1,318 +1,19 @@
 // src/corporate/scraper.rs
-use super::{types::*, helpers::*, openfigi::*};
+use super::{types::*};
 //use crate::corporate::openfigi::OpenFigiClient;
 use crate::{scraper::webdriver::*, util::directories::DataPaths, util::logger};
-use fantoccini::{Client, Locator};
+use fantoccini::{Client};
 use scraper::{Html, Selector};
 use chrono::{DateTime, Duration, NaiveDate, Utc};
 use tokio::{time::{Duration as TokioDuration, sleep}};
 use reqwest::Client as HttpClient;
 use serde_json::{json, Value};
 use zip::ZipArchive;
-use std::{collections::HashMap, sync::Arc};
+use std::{collections::HashMap};
 use std::io::{Read};
-use anyhow::{anyhow, Result};

 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";

-/// Check if a ticker exists on Yahoo Finance and return core metadata.
-///
-/// This function calls the public Yahoo Finance quoteSummary endpoint and extracts:
-/// - ISIN (when available)
-/// - Company name
-/// - Exchange MIC code
-/// - Trading currency
-///
-/// It strictly filters to only accept **equity** securities.
-///
-/// # Arguments
-/// * `ticker` - The ticker symbol to validate (e.g., "AAPL", "7203.T", "BMW.DE")
-///
-/// # Returns
-/// `Ok(PrimaryInfo)` on success, `Err` if ticker doesn't exist, is not equity, or data is malformed.
-///
-/// # Errors
-/// - Ticker not found
-/// - Not an equity (ETF, bond, etc.)
-/// - Missing critical fields
-/// - Network or JSON parsing errors
-/*pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
-    let url = format!(
-        "https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price%2CassetProfile",
-        ticker
-    );
-
-    let resp = match HttpClient::new()
-        .get(&url)
-        .header("User-Agent", USER_AGENT)
-        .send()
-        .await
-    {
-        Ok(resp) => resp,
-        Err(err) => {
-            return Err(anyhow::anyhow!(
-                "Failed to reach Yahoo Finance for ticker {}: {}",
-                ticker,
-                err
-            ));
-        }
-    };
-
-    if !resp.status().is_success() {
-        return Err(anyhow::anyhow!("Yahoo returned HTTP {} for ticker {}", resp.status(), ticker));
-    }
-
-    let json: Value = match resp
-        .json()
-        .await {
-            Ok(resp) => resp,
-            Err(err) => {
-                return Err(anyhow::anyhow!(
-                    "Failed to parse JSON response from Yahoo Finance {}: {}",
-                    ticker,
-                    err
-                ));
-            }
-        };
-
-    let result_array = json["quoteSummary"]["result"]
-        .as_array()
-        .ok_or_else(|| anyhow::anyhow!("Missing 'quoteSummary.result' in response"))?;
-
-    if result_array.is_empty() || result_array[0].is_null() {
-        return Err(anyhow::anyhow!("No quote data returned for ticker {}", ticker));
-    }
-
-    let quote = &result_array[0]["price"];
-    let profile = &result_array[0]["assetProfile"];
-
-    // === 1. Must be EQUITY ===
-    let quote_type = quote["quoteType"]
-        .as_str()
-        .unwrap_or("")
-        .to_ascii_uppercase();
-
-    if quote_type != "EQUITY" {
-        println!("      → Skipping {} (quoteType: {})", ticker, quote_type);
-        return Err(anyhow::anyhow!("Not an equity security: {}", quote_type));
-    }
-
-    // === 2. Extract basic info ===
-    let long_name = quote["longName"]
-        .as_str()
-        .or_else(|| quote["shortName"].as_str())
-        .unwrap_or(ticker)
-        .trim()
-        .to_string();
-
-    let currency = quote["currency"]
-        .as_str()
-        .unwrap_or("USD")
-        .to_string();
-
-    let exchange_mic = quote["exchange"]
-        .as_str()
-        .unwrap_or("")
-        .to_string();
-
-    if exchange_mic.is_empty() {
-        return Err(anyhow::anyhow!("Missing exchange MIC for ticker {}", ticker));
-    }
-
-    // === 3. Extract ISIN (from assetProfile if available) ===
-    let isin = profile["isin"]
-        .as_str()
-        .and_then(|s| if s.len() == 12 && s.chars().all(|c| c.is_ascii_alphanumeric()) { Some(s) } else { None })
-        .unwrap_or("")
-        .to_ascii_uppercase();
-
-    // === 4. Final sanity check: reject obvious debt securities ===
-    let name_upper = long_name.to_ascii_uppercase();
-    if name_upper.contains(" BOND") ||
-       name_upper.contains(" NOTE") ||
-       name_upper.contains(" DEBENTURE") ||
-       name_upper.contains(" PREFERRED") && !name_upper.contains(" STOCK") {
-        return Err(anyhow::anyhow!("Security name suggests debt instrument: {}", long_name));
-    }
-
-    println!(
-        "      → Valid equity: {} | {} | {} | ISIN: {}",
-        ticker,
-        long_name,
-        exchange_mic,
-        if isin.is_empty() { "N/A" } else { &isin }
-    );
-
-    Ok(PrimaryInfo {
-        isin,
-        name: long_name,
-        exchange_mic,
-        currency,
-    })
-}*/
-
-/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
-///
-/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
-/// reject cookies, and extract the events.
-///
-/// # Arguments
-/// * `ticker` - The stock ticker symbol.
-///
-/// # Returns
-/// A vector of CompanyEvent structs on success.
-///
-/// # Errors
-/// Returns an error if the task execution fails, e.g., chromedriver spawn or navigation issues.
-pub async fn fetch_earnings_with_pool(
-    ticker: &str,
-    pool: &Arc<ChromeDriverPool>,
-) -> anyhow::Result<Vec<CompanyEvent>> {
-    let ticker = ticker.to_string();
-    let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
-
-    let ticker_cloned = ticker.clone();
-
-    pool.execute(url, move |client| {
-        let ticker = ticker_cloned.clone();
-        Box::pin(async move {
-            reject_yahoo_cookies(&client).await?;
-            extract_earnings_events(&client, &ticker).await
-        })
-    }).await
-}
-
-/// Extracts earnings events from the currently loaded Yahoo Finance earnings calendar page.
-///
-/// This function assumes the client is already navigated to the correct URL (e.g., 
-/// https://finance.yahoo.com/calendar/earnings?symbol={ticker}) and cookies are handled.
-///
-/// It waits for the earnings table, extracts rows, parses cells into CompanyEvent structs,
-/// and handles date parsing, float parsing, and optional fields.
-///
-/// # Arguments
-/// * `client` - The fantoccini Client with the page loaded.
-/// * `ticker` - The stock ticker symbol for the events.
-///
-/// # Returns
-/// A vector of CompanyEvent on success.
-///
-/// # Errors
-/// Returns an error if:
-/// - Table or elements not found.
-/// - Date or float parsing fails.
-/// - WebDriver operations fail.
-///
-/// # Examples
-///
-/// ```no_run
-/// use fantoccini::Client;
-/// use crate::corporate::scraper::extract_earnings;
-///
-/// #[tokio::main]
-/// async fn main() -> Result<()> {
-///     // Assume client is set up and navigated
-///     let events = extract_earnings(&client, "AAPL").await?;
-///     Ok(())
-/// }
-/// ```
-pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEvent>> {
-    // Wait for the table to load
-    let table = client
-        .wait()
-        .for_element(Locator::Css(r#"table[data-test="cal-table"]"#))
-        .await
-        .map_err(|e| anyhow!("Failed to find earnings table: {}", e))?;
-
-    // Find all rows in tbody
-    let rows = table
-        .find_all(Locator::Css("tbody tr"))
-        .await
-        .map_err(|e| anyhow!("Failed to find table rows: {}", e))?;
-
-    let mut events = Vec::with_capacity(rows.len());
-
-    for row in rows {
-        let cells = row
-            .find_all(Locator::Css("td"))
-            .await
-            .map_err(|e| anyhow!("Failed to find cells in row: {}", e))?;
-
-        if cells.len() < 5 {
-            continue; // Skip incomplete rows
-        }
-
-        // Extract and parse date
-        let date_str = cells[0]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get date text: {}", e))?;
-        let date = parse_yahoo_date(&date_str)
-            .map_err(|e| anyhow!("Failed to parse date '{}': {}", date_str, e))?
-            .format("%Y-%m-%d")
-            .to_string();
-
-        // Extract time, replace "Time Not Supplied" with empty
-        let time = cells[1]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get time text: {}", e))?
-            .replace("Time Not Supplied", "");
-
-        // Extract period
-        let period = cells[2]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get period text: {}", e))?;
-
-        // Parse EPS forecast
-        let eps_forecast_str = cells[3]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get EPS forecast text: {}", e))?;
-        let eps_forecast = parse_float(&eps_forecast_str);
-
-        // Parse EPS actual
-        let eps_actual_str = cells[4]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get EPS actual text: {}", e))?;
-        let eps_actual = parse_float(&eps_actual_str);
-
-        // Parse surprise % if available
-        let surprise_pct = if cells.len() > 5 {
-            let surprise_str = cells[5]
-                .text()
-                .await
-                .map_err(|e| anyhow!("Failed to get surprise text: {}", e))?;
-            parse_float(&surprise_str)
-        } else {
-            None
-        };
-
-        events.push(CompanyEvent {
-            ticker: ticker.to_string(),
-            date,
-            time,
-            period,
-            eps_forecast,
-            eps_actual,
-            revenue_forecast: None,
-            revenue_actual: None,
-            surprise_pct,
-            source: "Yahoo".to_string(),
-        });
-    }
-
-    if events.is_empty() {
-        eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
-    } else {
-        println!("Extracted {} earnings events for {}", events.len(), ticker);
-    }
-
-    Ok(events)
-}
-
 fn parse_price(v: Option<&Value>) -> f64 {
    v.and_then(|x| x.as_str())
        .and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
@@ -490,20 +191,17 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow
 pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";

-    // Initialize DataPaths and create cache/gleif directory
    let paths = DataPaths::new(".")?;
    let gleif_cache_dir = paths.cache_gleif_dir();
    
    if let Err(e) = std::fs::create_dir_all(&gleif_cache_dir) {
        let msg = format!("Failed to create cache/gleif directory: {}", e);
        logger::log_error(&msg).await;
-        println!("{}", msg);
        return Ok(None);
    }

-    logger::log_info("Corporate Scraper: Downloading ISIN/LEI mapping from GLEIF...").await;
+    logger::log_info("Downloading ISIN/LEI mapping from GLEIF...").await;

-    // Download ZIP and get the filename from Content-Disposition header
    let client = match reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .timeout(std::time::Duration::from_secs(30))
@@ -511,9 +209,7 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    {
        Ok(c) => c,
        Err(e) => {
-            let msg = format!("Failed to create HTTP client: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to create HTTP client: {}", e)).await;
            return Ok(None);
        }
    };
@@ -521,20 +217,15 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let resp = match client.get(url).send().await {
        Ok(r) if r.status().is_success() => r,
        Ok(resp) => {
-            let msg = format!("Server returned HTTP {}", resp.status());
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Server returned HTTP {}", resp.status())).await;
            return Ok(None);
        }
        Err(e) => {
-            let msg = format!("Failed to download ISIN/LEI ZIP: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to download: {}", e)).await;
            return Ok(None);
        }
    };

-    // Extract filename from Content-Disposition header or use default
    let filename = resp
        .headers()
        .get("content-disposition")
@@ -542,11 +233,10 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        .and_then(|s| s.split("filename=").nth(1).map(|f| f.trim_matches('"').to_string()))
        .unwrap_or_else(|| "isin_lei.zip".to_string());

-    // Parse timestamp from filename and convert to DDMMYYYY format
    let parsed_filename = parse_gleif_filename(&filename);
-    logger::log_info(&format!("Corporate Scraper: Downloaded file: {} -> {}", filename, parsed_filename)).await;
+    logger::log_info(&format!("Downloaded: {} -> {}", filename, parsed_filename)).await;

-    // Determine date (DDMMYYYY) from parsed filename: "isin-lei-DDMMYYYY.csv"
+    // Extract date from filename
    let mut date_str = String::new();
    if let Some(start_idx) = parsed_filename.find("isin-lei-") {
        let rest = &parsed_filename[start_idx + 9..];
@@ -555,13 +245,10 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        }
    }

-    // If we parsed a date, use/create a date folder under cache/gleif and operate inside it; otherwise use cache root.
    let date_dir = if !date_str.is_empty() {
        let p = gleif_cache_dir.join(&date_str);
-        // Ensure the date folder exists (create if necessary)
        if let Err(e) = std::fs::create_dir_all(&p) {
-            let msg = format!("Failed to create date directory {:?}: {}", p, e);
-            logger::log_warn(&msg).await;
+            logger::log_warn(&format!("Failed to create date directory: {}", e)).await;
            None
        } else {
            Some(p)
@@ -570,17 +257,16 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        None
    };

-    // Choose the directory where we'll look for existing files and where we'll save the new ones
    let target_dir = date_dir.clone().unwrap_or_else(|| gleif_cache_dir.to_path_buf());

-    // If the date folder exists (or was created), prefer any *_clean.csv inside it and return that immediately
+    // Check for existing clean CSV
    if let Some(ref ddir) = date_dir {
        if let Ok(entries) = std::fs::read_dir(ddir) {
            for entry in entries.flatten() {
                if let Some(name) = entry.file_name().to_str() {
                    if name.to_lowercase().ends_with("_clean.csv") {
                        let path = ddir.join(name);
-                        logger::log_info(&format!("Found existing clean GLEIF CSV: {}", path.display())).await;
+                        logger::log_info(&format!("Found existing clean CSV: {}", path.display())).await;
                        return Ok(Some(path.to_string_lossy().to_string()));
                    }
                }
@@ -588,71 +274,42 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        }
    }

-    // If no clean file found in the date folder (or date folder doesn't exist), check whether the csv/zip already exist in the target dir
-    let csv_candidate_name = parsed_filename.replace(".zip", ".csv");
-    let csv_candidate = target_dir.join(&csv_candidate_name);
-    let zip_candidate = target_dir.join(&parsed_filename);
-
+    let csv_candidate = target_dir.join(parsed_filename.replace(".zip", ".csv"));
    if csv_candidate.exists() {
-        logger::log_info(&format!("Found existing GLEIF CSV: {}", csv_candidate.display())).await;
+        logger::log_info(&format!("Found existing CSV: {}", csv_candidate.display())).await;
        return Ok(Some(csv_candidate.to_string_lossy().to_string()));
    }
-    if zip_candidate.exists() {
-        // If zip exists but csv does not, extract later; for now prefer returning csv path (may be created by extraction step)
-        let inferred_csv = target_dir.join(csv_candidate_name);
-        if inferred_csv.exists() {
-            logger::log_info(&format!("Found existing extracted CSV next to ZIP: {}", inferred_csv.display())).await;
-            return Ok(Some(inferred_csv.to_string_lossy().to_string()));
-        }
-        // otherwise we'll overwrite/extract into target_dir below
-    }

    let bytes = match resp.bytes().await {
        Ok(b) => b,
        Err(e) => {
-            let msg = format!("Failed to read ZIP bytes: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to read bytes: {}", e)).await;
            return Ok(None);
        }
    };
-    // Ensure target directory exists (create if it's the date folder and was absent earlier)
-    if let Some(ref ddir) = date_dir {
-        let _ = std::fs::create_dir_all(ddir);
-    }

    let zip_path = target_dir.join(&parsed_filename);
    let csv_path = target_dir.join(parsed_filename.replace(".zip", ".csv"));

    if let Err(e) = tokio::fs::write(&zip_path, &bytes).await {
-        let msg = format!("Failed to write ZIP file: {}", e);
-        logger::log_error(&msg).await;
-        println!("{}", msg);
+        logger::log_error(&format!("Failed to write ZIP: {}", e)).await;
        return Ok(None);
    }
-    logger::log_info(&format!("Corporate Scraper: Saved ZIP to {:?}", zip_path)).await;

-    // Extract CSV
-    let archive = match std::fs::File::open(&zip_path)
-        .map(ZipArchive::new)
-    {
+    // Extract CSV from ZIP
+    let archive = match std::fs::File::open(&zip_path).map(ZipArchive::new) {
        Ok(Ok(a)) => a,
        Ok(Err(e)) => {
-            let msg = format!("Invalid ZIP: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Invalid ZIP: {}", e)).await;
            return Ok(None);
        }
        Err(e) => {
-            let msg = format!("Cannot open ZIP file: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Cannot open ZIP: {}", e)).await;
            return Ok(None);
        }
    };

    let mut archive = archive;
-
    let idx = match (0..archive.len()).find(|&i| {
        archive.by_index(i)
            .map(|f| f.name().ends_with(".csv"))
@@ -660,9 +317,7 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    }) {
        Some(i) => i,
        None => {
-            let msg = "ZIP did not contain a CSV file";
-            logger::log_error(msg).await;
-            println!("{}", msg);
+            logger::log_error("ZIP contains no CSV").await;
            return Ok(None);
        }
    };
@@ -670,43 +325,32 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let mut csv_file = match archive.by_index(idx) {
        Ok(f) => f,
        Err(e) => {
-            let msg = format!("Failed to read CSV entry: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to read CSV: {}", e)).await;
            return Ok(None);
        }
    };

    let mut csv_bytes = Vec::new();
    if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
-        let msg = format!("Failed to extract CSV: {}", e);
-        logger::log_error(&msg).await;
+        logger::log_error(&format!("Failed to extract: {}", e)).await;
        return Ok(None);
    }

    if let Err(e) = tokio::fs::write(&csv_path, &csv_bytes).await {
-        let msg = format!("Failed to save CSV file: {}", e);
-        logger::log_error(&msg).await;
+        logger::log_error(&format!("Failed to save CSV: {}", e)).await;
        return Ok(None);
    }

-    let msg = format!("✓ ISIN/LEI CSV extracted: {:?}", csv_path);
-    logger::log_info(&msg).await;
-    
+    logger::log_info(&format!("✓ CSV extracted: {:?}", csv_path)).await;
    Ok(Some(csv_path.to_string_lossy().to_string()))
 }

-/// Parse GLEIF filename and convert timestamp to DDMMYYYY format
-/// Example: "isin-lei-20251124T080254.csv" -> "isin-lei-24112025.csv"
 fn parse_gleif_filename(filename: &str) -> String {
-    // Try to find pattern: isin-lei-YYYYMMDDTHHMMSS.zip/csv
    if let Some(start_idx) = filename.find("isin-lei-") {
-        let rest = &filename[start_idx + 9..]; // After "isin-lei-"
+        let rest = &filename[start_idx + 9..];
        
-        // Extract the 8 digits (YYYYMMDD)
        if rest.len() >= 8 && rest[0..8].chars().all(|c| c.is_numeric()) {
            let date_part = &rest[0..8];
-            // date_part is YYYYMMDD, convert to DDMMYYYY
            if date_part.len() == 8 {
                let year = &date_part[0..4];
                let month = &date_part[4..6];
@@ -717,11 +361,9 @@ fn parse_gleif_filename(filename: &str) -> String {
        }
    }
    
-    // Fallback: return original filename if parsing fails
    filename.to_string()
 }

-
 pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
    // 1. Download + extract the CSV (this is now async)
    let csv_path = match download_isin_lei_csv().await? {
@@ -769,30 +411,4 @@ pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>>
    );

    Ok(map)
-}
-
-pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
-    for _ in 0..10 {
-        let clicked: bool = client
-            .execute(
-                r#"(() => {
-                    const btn = document.querySelector('#consent-page .reject-all');
-                    if (btn) {
-                        btn.click();
-                        return true;
-                    }
-                    return false;
-                })()"#,
-                vec![],
-            )
-            .await?
-            .as_bool()
-            .unwrap_or(false);
-
-        if clicked { break; }
-        sleep(TokioDuration::from_millis(500)).await;
-    }
-
-    println!("Rejected Yahoo cookies if button existed");
-    Ok(())
 }
--- a/src/corporate/storage.rs
+++ b/src/corporate/storage.rs
@@ -6,49 +6,12 @@ use crate::util::logger;
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
 use chrono::{Datelike, NaiveDate};
-use std::collections::{HashMap};
+use std::collections::HashMap;
 use std::path::{PathBuf, Path};

-const BATCH_SIZE: usize = 500; // Process 500 events at a time
+const BATCH_SIZE: usize = 500;

-/// Load events in streaming fashion to avoid memory buildup
-pub async fn load_existing_events_streaming(
-    paths: &DataPaths,
-    callback: impl Fn(CompanyEvent) -> anyhow::Result<()>
-) -> anyhow::Result<usize> {
-    let dir = paths.corporate_events_dir();
-    if !dir.exists() {
-        logger::log_info("Corporate Storage: No existing events directory found").await;
-        return Ok(0);
-    }
-
-    let mut total = 0;
-    let mut entries = fs::read_dir(dir).await?;
-    
-    while let Some(entry) = entries.next_entry().await? {
-        let path = entry.path();
-        if path.extension().and_then(|s| s.to_str()) == Some("json") {
-            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
-            if name.starts_with("events_") && name.len() == 17 {
-                let content = fs::read_to_string(&path).await?;
-                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
-                
-                for event in events {
-                    callback(event)?;
-                    total += 1;
-                }
-                
-                // Yield to prevent blocking
-                tokio::task::yield_now().await;
-            }
-        }
-    }
-    
-    logger::log_info(&format!("Corporate Storage: Streamed {} events", total)).await;
-    Ok(total)
-}
-
-/// Build lightweight index of events instead of loading everything
+/// Lightweight index entry - only metadata, no full event data
 #[derive(Debug, Clone)]
 pub struct EventIndex {
    pub key: String,
@@ -57,9 +20,11 @@ pub struct EventIndex {
    pub file_path: PathBuf,
 }

+/// Build index of all events without loading them into memory
 pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventIndex>> {
    let dir = paths.corporate_events_dir();
    if !dir.exists() {
+        logger::log_info("Corporate Storage: No events directory found").await;
        return Ok(Vec::new());
    }

@@ -90,7 +55,7 @@ pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventInd
    Ok(index)
 }

-/// Lookup specific event by loading only its file
+/// Load specific event by key (only loads its file)
 pub async fn lookup_event_by_key(
    key: &str,
    index: &[EventIndex]
@@ -106,9 +71,48 @@ pub async fn lookup_event_by_key(
    }
 }

+/// Stream events file by file with callback
+pub async fn stream_events_with_callback<F>(
+    paths: &DataPaths,
+    mut callback: F
+) -> anyhow::Result<usize>
+where
+    F: FnMut(CompanyEvent) -> anyhow::Result<()>,
+{
+    let dir = paths.corporate_events_dir();
+    if !dir.exists() {
+        return Ok(0);
+    }
+
+    let mut total = 0;
+    let mut entries = fs::read_dir(dir).await?;
+    
+    while let Some(entry) = entries.next_entry().await? {
+        let path = entry.path();
+        if path.extension().and_then(|s| s.to_str()) == Some("json") {
+            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
+            if name.starts_with("events_") {
+                let content = fs::read_to_string(&path).await?;
+                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
+                
+                for event in events {
+                    callback(event)?;
+                    total += 1;
+                }
+                
+                tokio::task::yield_now().await;
+            }
+        }
+    }
+    
+    logger::log_info(&format!("Corporate Storage: Streamed {} events", total)).await;
+    Ok(total)
+}
+
+/// Save events organized by month (accepts Vec, not HashMap)
 pub async fn save_optimized_events(
    paths: &DataPaths, 
-    events: Vec<CompanyEvent> // Changed from HashMap to Vec
+    events: Vec<CompanyEvent>
 ) -> anyhow::Result<()> {
    let dir = paths.corporate_events_dir();
    fs::create_dir_all(dir).await?;
@@ -124,16 +128,14 @@ pub async fn save_optimized_events(
            removed_count += 1;
        }
    }
-    logger::log_info(&format!("Corporate Storage: Removed {} old event files", removed_count)).await;
+    logger::log_info(&format!("Corporate Storage: Removed {} old files", removed_count)).await;

    let total_events = events.len();
    let mut sorted = events;
    sorted.sort_by(|a, b| {
-        a.ticker.cmp(&b.ticker)
-            .then(a.date.cmp(&b.date))
+        a.ticker.cmp(&b.ticker).then(a.date.cmp(&b.date))
    });

-    // Process in batches to avoid memory buildup
    let mut by_month: HashMap<String, Vec<CompanyEvent>> = HashMap::new();
    
    for chunk in sorted.chunks(BATCH_SIZE) {
@@ -146,27 +148,28 @@ pub async fn save_optimized_events(
        tokio::task::yield_now().await;
    }

-    let total_months = by_month.len();
    for (month, list) in by_month {
        let path = dir.join(format!("events_{}.json", month));
        fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
-        logger::log_info(&format!("Corporate Storage: Saved {} events for month {}", list.len(), month)).await;
+        logger::log_info(&format!("Saved {} events for month {}", list.len(), month)).await;
    }
    
-    logger::log_info(&format!("Corporate Storage: Saved {} total events in {} month files", total_events, total_months)).await;
+    logger::log_info(&format!("Saved {} total events", total_events)).await;
    Ok(())
 }

-pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) -> anyhow::Result<()> {
+pub async fn save_changes(
+    paths: &DataPaths, 
+    changes: &[CompanyEventChange]
+) -> anyhow::Result<()> {
    if changes.is_empty() { 
        logger::log_info("Corporate Storage: No changes to save").await;
        return Ok(()); 
    }
+    
    let dir = paths.corporate_changes_dir();
    fs::create_dir_all(dir).await?;

-    logger::log_info(&format!("Corporate Storage: Saving {} changes", changes.len())).await;
-
    let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
    for c in changes {
        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
@@ -180,12 +183,13 @@ pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) ->
        let mut all = if path.exists() {
            let s = fs::read_to_string(&path).await?;
            serde_json::from_str(&s).unwrap_or_default()
-        } else { vec![] };
+        } else { 
+            vec![] 
+        };
        all.extend(list.clone());
        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
-        logger::log_info(&format!("Corporate Storage: Saved {} changes for month {}", list.len(), month)).await;
    }
-    logger::log_info("Corporate Storage: All changes saved successfully").await;
+    
    Ok(())
 }

@@ -203,9 +207,7 @@ pub async fn save_prices_for_ticker(
    let path = timeframe_dir.join("prices.json");

    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
-
-    let json = serde_json::to_string_pretty(&prices)?;
-    fs::write(&path, json).await?;
+    fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
    Ok(())
 }

@@ -240,7 +242,10 @@ pub async fn save_available_exchanges(
    Ok(())
 }

-pub async fn load_available_exchanges(paths: &DataPaths, lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
+pub async fn load_available_exchanges(
+    paths: &DataPaths, 
+    lei: &str
+) -> anyhow::Result<Vec<AvailableExchange>> {
    let path = get_company_dir(paths, lei).join("available_exchanges.json");
    if path.exists() {
        let content = fs::read_to_string(&path).await?;
@@ -267,15 +272,13 @@ pub async fn save_prices_by_source(
    Ok(())
 }

-/// Saves companies data to a JSONL file in streaming fashion
+/// Stream companies to JSONL incrementally
 pub async fn save_companies_to_jsonl_streaming(
    paths: &DataPaths,
-    companies: &HashMap<String, HashMap<String, String>>,
-) -> anyhow::Result<()> {
+    companies_iter: impl Iterator<Item = (String, HashMap<String, String>)>,
+) -> anyhow::Result<usize> {
    let file_path = paths.data_dir().join("companies.jsonl");
    
-    logger::log_info(&format!("Corporate Storage: Saving {} companies to JSONL", companies.len())).await;
-    
    if let Some(parent) = file_path.parent() {
        tokio::fs::create_dir_all(parent).await?;
    }
@@ -283,32 +286,33 @@ pub async fn save_companies_to_jsonl_streaming(
    let mut file = tokio::fs::File::create(&file_path).await?;
    let mut count = 0;
    
-    // Process in batches
-    for (name, securities) in companies.iter() {
+    for (name, securities) in companies_iter {
        let line = serde_json::json!({
            "name": name,
            "securities": securities
        });
+        
        file.write_all(line.to_string().as_bytes()).await?;
        file.write_all(b"\n").await?;
-        
        count += 1;
+        
        if count % 100 == 0 {
            tokio::task::yield_now().await;
        }
    }
    
-    let msg = format!("✓ Saved {} companies to {:?}", companies.len(), file_path);
-    println!("{}", msg);
-    logger::log_info(&msg).await;
-    Ok(())
+    logger::log_info(&format!("Saved {} companies to JSONL", count)).await;
+    Ok(count)
 }

-/// Load companies from JSONL in streaming fashion
-pub async fn load_companies_from_jsonl_streaming(
+/// Stream read companies from JSONL
+pub async fn stream_companies_from_jsonl<F>(
    path: &Path,
-    callback: impl Fn(String, HashMap<String, String>) -> anyhow::Result<()>
-) -> anyhow::Result<usize> {
+    mut callback: F
+) -> anyhow::Result<usize>
+where
+    F: FnMut(String, HashMap<String, String>) -> anyhow::Result<()>,
+{
    if !path.exists() {
        return Ok(0);
    }
--- a/src/corporate/types.rs
+++ b/src/corporate/types.rs
@@ -79,15 +79,11 @@ pub struct CompanyInfo{
    pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
 }

-/// Company Meta Data
-/// # Attributes
-/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
-/// * figi: metadata with ISIN as key
-/*#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyMetadata {
-    pub lei: String,
-    pub figi: Option<Vec<FigiInfo>>,
-}*/
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CompanyCrossPlatformInfo {
+    pub name: String,
+    pub isin_tickers_map: HashMap<String, Vec<String>>, // ISIN -> Tickers
+}

 /// Warrant Info
 /// 
@@ -118,14 +114,6 @@ pub struct OptionInfo {
    pub options: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
 }

-/*#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct PrimaryInfo {
-    pub isin: String,
-    pub name: String,
-    pub exchange_mic: String,
-    pub currency: String,
-}*/
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AvailableExchange {
    pub exchange_mic: String,
--- a/src/corporate/update.rs
+++ b/src/corporate/update.rs
@@ -1,170 +1,274 @@
-// src/corporate/update.rs
-use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
+// src/corporate/update.rs - COMPLETE STREAMING VERSION
+
+use super::{scraper::*, storage::*, helpers::*, types::*, openfigi::*, yahoo::*};
 use crate::config::Config;
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;

 use chrono::Local;
-use std::collections::{HashMap};
+use std::collections::HashMap;
 use std::sync::Arc;

-/// Main function: Full update for all companies with streaming to minimize memory usage
-pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
-    let msg = "=== Starting LEI-based corporate full update (STREAMING) ===";
-    println!("{}", msg);
-    logger::log_info(msg).await;
+/// Main update function - fully streaming, minimal memory usage
+pub async fn run_full_update(_config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
+    logger::log_info("=== Corporate Update (STREAMING MODE) ===").await;

    let paths = DataPaths::new(".")?;

-    // Step 1: Download/locate GLEIF CSV (don't load into memory yet)
-    logger::log_info("Corporate Update: Downloading/locating GLEIF CSV...").await;
+    // Step 1: Download GLEIF CSV (don't load into memory)
+    logger::log_info("Step 1: Downloading GLEIF CSV...").await;
    let gleif_csv_path = match download_isin_lei_csv().await? {
        Some(p) => {
-            logger::log_info(&format!("Corporate Update: GLEIF CSV at: {}", p)).await;
+            logger::log_info(&format!("  ✓ GLEIF CSV at: {}", p)).await;
            p
        }
        None => {
-            logger::log_warn("Corporate Update: Could not obtain GLEIF CSV, continuing with limited data").await;
+            logger::log_warn("  ✗ Could not obtain GLEIF CSV").await;
            return Ok(());
        }
    };

    // Step 2: Load OpenFIGI type lists (small, cached)
-    logger::log_info("Corporate Update: Loading OpenFIGI type lists...").await;
-    if let Err(e) = load_figi_type_lists().await {
-        logger::log_warn(&format!("Could not load OpenFIGI type lists: {}", e)).await;
+    logger::log_info("Step 2: Loading OpenFIGI metadata...").await;
+    load_figi_type_lists().await.ok();
+    logger::log_info("  ✓ OpenFIGI metadata loaded").await;
+
+    // Step 3: Check mapping status and process only unmapped LEIs
+    logger::log_info("Step 3: Checking LEI-FIGI mapping status...").await;
+    
+    let all_mapped = ensure_all_leis_mapped(&gleif_csv_path, None).await?;
+    
+    if !all_mapped {
+        logger::log_warn("  ⚠ Some LEIs failed to map - continuing with partial data").await;
+    } else {
+        logger::log_info("  ✓ All LEIs successfully mapped").await;
    }

-    // Step 3: Process GLEIF → FIGI mapping in streaming fashion
-    logger::log_info("Corporate Update: Building FIGI mappings (streaming)...").await;
+    // Step 4: Build securities from FIGI data (streaming)
+    logger::log_info("Step 4: Building securities map (streaming)...").await;
+    let date_dir = find_most_recent_figi_date_dir(&paths).await?;
    
-    // Build LEI→ISINs map by streaming the CSV
-    let mut lei_to_isins: HashMap<String, Vec<String>> = HashMap::new();
-    let mut lei_batch = Vec::new();
-    const LEI_BATCH_SIZE: usize = 1000;
-    
-    stream_gleif_csv(&gleif_csv_path, |lei, isin| {
-        lei_to_isins.entry(lei.clone()).or_default().push(isin);
-        lei_batch.push(lei);
-        
-        // Process in batches
-        if lei_batch.len() >= LEI_BATCH_SIZE {
-            lei_batch.clear();
-        }
-        
-        Ok(())
-    }).await?;
-    
-    logger::log_info(&format!("Corporate Update: Collected {} LEIs", lei_to_isins.len())).await;
-
-    // Step 4: Build FIGI mappings in batches (process and save incrementally)
-    logger::log_info("Corporate Update: Processing FIGI mappings in batches...").await;
-    let figi_result = build_lei_to_figi_infos(&lei_to_isins, None).await;
-    
-    // Don't keep the full result in memory - it's already saved to JSONL files
-    drop(figi_result);
-    drop(lei_to_isins); // Release this too
-    
-    logger::log_info("Corporate Update: FIGI mappings saved to cache").await;
-
-    // Step 5: Load or build securities (streaming from JSONL files)
-    logger::log_info("Corporate Update: Building securities map (streaming)...").await;
-    
-    let dir = DataPaths::new(".")?;
-    let map_cache_dir = dir.cache_gleif_openfigi_map_dir();
-    
-    // Find the most recent date directory
-    let date_dir = find_most_recent_date_dir(&map_cache_dir).await?;
-    
-    let (common_stocks, _warrants, _options) = if let Some(date_dir) = date_dir {
-        logger::log_info(&format!("Using FIGI data from: {:?}", date_dir)).await;
-        load_or_build_all_securities_streaming(&date_dir).await?
+    if let Some(date_dir) = date_dir {
+        logger::log_info(&format!("  Using FIGI data from: {:?}", date_dir)).await;
+        build_securities_from_figi_streaming(&date_dir).await?;
+        logger::log_info("  ✓ Securities map updated").await;
    } else {
-        logger::log_warn("No FIGI date directory found, using empty maps").await;
-        (HashMap::new(), HashMap::new(), HashMap::new())
-    };
+        logger::log_warn("  ✗ No FIGI data directory found").await;
+    }

-    logger::log_info(&format!("Corporate Update: Processing {} companies", common_stocks.len())).await;
+    // Step 5: Build companies JSONL (streaming from securities)
+    logger::log_info("Step 5: Building companies.jsonl (streaming)...").await;
+    let count = build_companies_jsonl_streaming(&paths, pool).await?;
+    logger::log_info(&format!("  ✓ Saved {} companies", count)).await;

-    // Step 6: Convert to simplified companies map and save incrementally
-    logger::log_info("Corporate Update: Building companies JSONL (streaming)...").await;
+    // Step 6: Process events (using index, not full load)
+    logger::log_info("Step 6: Processing events (using index)...").await;
+    let _event_index = build_event_index(&paths).await?;
+    logger::log_info("  ✓ Event index built").await;
+
+    logger::log_info("✓ Corporate update complete").await;
+    Ok(())
+}
+
+/// Stream companies.jsonl creation from securities cache - INCREMENTAL MODE
+async fn build_companies_jsonl_streaming(paths: &DataPaths, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<usize> {
+    let path = DataPaths::new(".")?;
+    let corporate_path = path.data_dir().join("corporate").join("by_name");
+    let securities_path = corporate_path.join("common_stocks.json");
+    
+    if !securities_path.exists() {
+        logger::log_warn("No common_stocks.json found").await;
+        return Ok(0);
+    }
+    
+    // Load securities
+    let content = tokio::fs::read_to_string(securities_path).await?;
+    let securities: HashMap<String, CompanyInfo> = serde_json::from_str(&content)?;
    
    let companies_path = paths.data_dir().join("companies.jsonl");
    
-    // Create file and write incrementally
    if let Some(parent) = companies_path.parent() {
        tokio::fs::create_dir_all(parent).await?;
    }
    
-    let mut file = tokio::fs::File::create(&companies_path).await?;
-    let mut processed = 0;
+    // Load existing companies into a map
+    let mut existing_companies: HashMap<String, CompanyCrossPlatformInfo> = HashMap::new();
    
-    for (name, company_info) in common_stocks.iter() {
-        let mut isin_ticker_pairs: HashMap<String, String> = HashMap::new();
+    if companies_path.exists() {
+        logger::log_info("Loading existing companies.jsonl...").await;
+        let existing_content = tokio::fs::read_to_string(&companies_path).await?;
+        for line in existing_content.lines() {
+            if line.trim().is_empty() {
+                continue;
+            }
+            match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
+                Ok(company) => {
+                    existing_companies.insert(company.name.clone(), company);
+                }
+                Err(e) => {
+                    logger::log_warn(&format!("Failed to parse existing company line: {}", e)).await;
+                }
+            }
+        }
+        logger::log_info(&format!("Loaded {} existing companies", existing_companies.len())).await;
+    }
+    
+    // Create temporary file for atomic write
+    let temp_path = companies_path.with_extension("jsonl.tmp");
+    let mut file = tokio::fs::File::create(&temp_path).await?;
+    let mut count = 0;
+    let mut updated_count = 0;
+    let mut new_count = 0;
+    
+    use tokio::io::AsyncWriteExt;
+    
+    for (name, company_info) in securities.iter() {
+        // Check if we already have this company
+        let existing_entry = existing_companies.remove(name);
+        let is_update = existing_entry.is_some();
+        
+        // Start with existing ISIN-ticker map or create new one
+        let mut isin_tickers_map: HashMap<String, Vec<String>> = 
+            existing_entry
+                .map(|e| e.isin_tickers_map)
+                .unwrap_or_default();
+        
+        // Step 1: Extract unique ISIN-ticker pairs from FigiInfo
+        let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
        
        for figi_infos in company_info.securities.values() {
            for figi_info in figi_infos {
-                if !figi_info.isin.is_empty() && !figi_info.ticker.is_empty() {
-                    isin_ticker_pairs.insert(figi_info.isin.clone(), figi_info.ticker.clone());
+                if !figi_info.isin.is_empty() {
+                    let tickers = unique_isin_ticker_pairs
+                        .entry(figi_info.isin.clone())
+                        .or_insert_with(Vec::new);
+                    
+                    // Add FIGI ticker if present and not duplicate
+                    if !figi_info.ticker.is_empty() && !tickers.contains(&figi_info.ticker) {
+                        tickers.push(figi_info.ticker.clone());
+                    }
                }
            }
        }
        
-        if !isin_ticker_pairs.is_empty() {
-            use tokio::io::AsyncWriteExt;
+        // Step 2: Merge FIGI tickers into main map
+        for (isin, figi_tickers) in unique_isin_ticker_pairs {
+            let tickers = isin_tickers_map
+                .entry(isin.clone())
+                .or_insert_with(Vec::new);
            
-            let line = serde_json::json!({
-                "name": name,
-                "securities": isin_ticker_pairs
-            });
+            // Add FIGI tickers that aren't already present
+            for figi_ticker in figi_tickers {
+                if !tickers.contains(&figi_ticker) {
+                    tickers.push(figi_ticker);
+                }
+            }
            
-            file.write_all(line.to_string().as_bytes()).await?;
+            // Step 3: Check if we need to fetch Yahoo ticker for this ISIN
+            let has_yahoo_ticker = tickers.iter().any(|t| t.starts_with("YAHOO:"));
+            
+            if !has_yahoo_ticker {
+                logger::log_info(&format!("Fetching Yahoo ticker for {} (ISIN: {})", name, isin)).await;
+                let yahoo_result = scrape_ticker_by_isin(pool, &isin).await;
+                
+                match yahoo_result {
+                    Ok(result) => {
+                        let log_msg = match &result {
+                            YahooTickerResult::Found(ticker) => 
+                                format!("✓ Found Yahoo ticker {} for ISIN {}", ticker, isin),
+                            YahooTickerResult::NoResults => 
+                                format!("○ No search results for ISIN {}", isin),
+                            YahooTickerResult::NotFound => 
+                                format!("○ Empty ticker result for ISIN {}", isin),
+                            YahooTickerResult::AmbiguousResults => 
+                                format!("⚠ Ambiguous results for ISIN {}", isin),
+                        };
+                        
+                        if result.is_found() {
+                            logger::log_info(&log_msg).await;
+                        } else {
+                            logger::log_warn(&log_msg).await;
+                        }
+                        
+                        tickers.push(result.to_tagged_string());
+                    },
+                    Err(e) => {
+                        logger::log_warn(&format!("✗ Yahoo lookup error for ISIN {}: {}", isin, e)).await;
+                        tickers.push("YAHOO:ERROR".to_string());
+                    }
+                }
+            } else {
+                logger::log_warn(&format!("Skipping Yahoo lookup for {} ISIN {} - already has Yahoo data", name, isin)).await;
+            }
+        }
+        
+        // Only write if we have ticker data
+        if !isin_tickers_map.is_empty() {
+            let company_entry = CompanyCrossPlatformInfo {
+                name: name.clone(),
+                isin_tickers_map,
+            };
+            
+            let line = serde_json::to_string(&company_entry)?;
+            
+            file.write_all(line.as_bytes()).await?;
            file.write_all(b"\n").await?;
-            processed += 1;
            
-            // Yield periodically
-            if processed % 100 == 0 {
+            // Flush after each write for crash safety
+            file.flush().await?;
+            
+            count += 1;
+            if is_update {
+                updated_count += 1;
+            } else {
+                new_count += 1;
+            }
+            
+            if count % 10 == 0 {
+                logger::log_info(&format!("Progress: {} companies ({} new, {} updated)", count, new_count, updated_count)).await;
                tokio::task::yield_now().await;
-                logger::log_info(&format!("Saved {} companies so far...", processed)).await;
            }
        }
    }
    
-    logger::log_info(&format!("Corporate Update: Saved {} companies to JSONL", processed)).await;
-
-    // Step 7: Process events in streaming fashion
-    logger::log_info("Corporate Update: Processing events (streaming)...").await;
+    // Write any remaining existing companies that weren't in securities
+    for (_name, company) in existing_companies {
+        let line = serde_json::to_string(&company)?;
+        file.write_all(line.as_bytes()).await?;
+        file.write_all(b"\n").await?;
+        file.flush().await?;
+        count += 1;
+        logger::log_warn(&format!("Preserved existing company: {}", _name)).await;
+    }
    
-    let event_index = build_event_index(&paths).await?;
-    logger::log_info(&format!("Corporate Update: Built index of {} events", event_index.len())).await;
+    // Ensure all data is written
+    file.sync_all().await?;
+    drop(file);
    
-    // For now, we just maintain the index
-    // In a full implementation, you'd stream through tickers and update events
+    // Atomic rename: replace old file with new one
+    tokio::fs::rename(&temp_path, &companies_path).await?;
    
-    // Step 8: Save any updates
-    logger::log_info("Corporate Update: Finalizing...").await;
-
-    let msg = "✓ Corporate update complete (streaming)";
-    println!("{}", msg);
-    logger::log_info(msg).await;
-    Ok(())
+    logger::log_info(&format!("✓ Completed: {} total companies ({} new, {} updated)", count, new_count, updated_count)).await;
+    
+    Ok(count)
 }

-/// Helper to find the most recent date directory in the FIGI cache
-async fn find_most_recent_date_dir(map_cache_dir: &std::path::Path) -> anyhow::Result<Option<std::path::PathBuf>> {
+/// Find most recent FIGI date directory
+async fn find_most_recent_figi_date_dir(paths: &DataPaths) -> anyhow::Result<Option<std::path::PathBuf>> {
+    let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
+    
    if !map_cache_dir.exists() {
        return Ok(None);
    }
    
-    let mut entries = tokio::fs::read_dir(map_cache_dir).await?;
+    let mut entries = tokio::fs::read_dir(&map_cache_dir).await?;
    let mut dates = Vec::new();
    
    while let Some(entry) = entries.next_entry().await? {
        let path = entry.path();
        if path.is_dir() {
            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
-                // Date format: DDMMYYYY
                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
                    dates.push((name.to_string(), path));
                }
@@ -176,67 +280,16 @@ async fn find_most_recent_date_dir(map_cache_dir: &std::path::Path) -> anyhow::R
        return Ok(None);
    }
    
-    // Sort by date (DDMMYYYY format)
-    dates.sort_by(|a, b| b.0.cmp(&a.0)); // Descending order
-    
+    dates.sort_by(|a, b| b.0.cmp(&a.0));
    Ok(Some(dates[0].1.clone()))
 }

+
+
 pub struct ProcessResult {
    pub changes: Vec<CompanyEventChange>,
 }

-/// Process events in batches to avoid memory buildup
-pub async fn process_events_streaming(
-    index: &[EventIndex],
-    new_events: &[CompanyEvent],
-    today: &str,
-) -> anyhow::Result<(Vec<CompanyEventChange>, Vec<CompanyEvent>)> {
-    let mut all_changes = Vec::new();
-    let mut final_events: HashMap<String, CompanyEvent> = HashMap::new();
-    
-    // Step 1: Load existing events in batches using the index
-    logger::log_info("Loading existing events in batches...").await;
-    
-    let mut loaded_files = std::collections::HashSet::new();
-    
-    for entry in index {
-        if loaded_files.contains(&entry.file_path) {
-            continue;
-        }
-        
-        let content = tokio::fs::read_to_string(&entry.file_path).await?;
-        let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
-        
-        for e in events {
-            final_events.insert(event_key(&e), e);
-        }
-        
-        loaded_files.insert(entry.file_path.clone());
-        
-        if final_events.len() % 1000 == 0 {
-            logger::log_info(&format!("Loaded {} events so far...", final_events.len())).await;
-            tokio::task::yield_now().await;
-        }
-    }
-    
-    logger::log_info(&format!("Loaded {} existing events", final_events.len())).await;
-    
-    // Step 2: Process new events in batches
-    for (idx, batch) in new_events.chunks(500).enumerate() {
-        logger::log_info(&format!("Processing batch {} ({} events)", idx + 1, batch.len())).await;
-        
-        let batch_result = process_batch(batch, &mut final_events, today);
-        all_changes.extend(batch_result.changes);
-        
-        tokio::task::yield_now().await;
-    }
-    
-    let events_vec: Vec<CompanyEvent> = final_events.into_values().collect();
-    
-    Ok((all_changes, events_vec))
-}
-
 pub fn process_batch(
    new_events: &[CompanyEvent],
    existing: &mut HashMap<String, CompanyEvent>,
@@ -253,7 +306,6 @@ pub fn process_batch(
            continue;
        }

-        // Check for time change on same date
        let date_key = format!("{}|{}", new.ticker, new.date);
        let mut found_old = None;
        for (k, e) in existing.iter() {
--- a/src/corporate/yahoo.rs
+++ b/src/corporate/yahoo.rs
@@ -0,0 +1,312 @@
+// src/corporate/yahoo.rs
+use super::{types::*, helpers::*};
+use crate::{scraper::webdriver::*, util::{directories::DataPaths}};
+use event_backtest_engine::logger;
+use fantoccini::{Client, Locator};
+use serde::{Deserialize, Serialize};
+use tokio::{time::{Duration as TokioDuration, sleep}};
+use std::{sync::Arc};
+use anyhow::{anyhow, Result};
+
+/// Mapping existing 
+
+/// getting historical stock price data daily (xxxx - 2025) and hourly (last 30 days)
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum YahooTickerResult {
+    Found(String),
+    NotFound,
+    NoResults,
+    AmbiguousResults,
+}
+
+impl YahooTickerResult {
+    pub fn to_tagged_string(&self) -> String {
+        match self {
+            YahooTickerResult::Found(ticker) => format!("YAHOO:{}", ticker),
+            YahooTickerResult::NotFound => "YAHOO:NOT_FOUND".to_string(),
+            YahooTickerResult::NoResults => "YAHOO:NO_RESULTS".to_string(),
+            YahooTickerResult::AmbiguousResults => "YAHOO:AMBIGUOUS".to_string(),
+        }
+    }
+
+    pub fn is_found(&self) -> bool {
+        matches!(self, YahooTickerResult::Found(_))
+    }
+
+    pub fn get_ticker(&self) -> Option<&str> {
+        match self {
+            YahooTickerResult::Found(ticker) => Some(ticker),
+            _ => None,
+        }
+    }
+}
+
+pub async fn scrape_ticker_by_isin(
+    pool: &Arc<ChromeDriverPool>,
+    isin: &str,
+) -> anyhow::Result<YahooTickerResult> {
+    let isin = isin.to_string();
+    pool.execute(format!("https://finance.yahoo.com/lookup?s={}", isin), move |client| {
+        let isin = isin.clone();
+        Box::pin(async move {
+            sleep(TokioDuration::from_millis(1000)).await;
+            reject_yahoo_cookies(&client).await?;
+            sleep(TokioDuration::from_millis(1000)).await;
+            extract_ticker_by_isin(&client, &isin).await
+        })
+    }).await
+}
+
+pub async fn extract_ticker_by_isin(
+    client: &Client,
+    _isin: &str,
+) -> Result<YahooTickerResult> {
+    //let search_url = format!("https://finance.yahoo.com/lookup?s={}", isin);
+
+    // Check for "No results found" message
+    if client.find(Locator::Css(".noData")).await.is_ok() {
+        return Ok(YahooTickerResult::NoResults);
+    }
+
+    // Wait for results table
+    let table = match client
+        .wait()
+        .for_element(Locator::Css("table[data-test='lookup-table']"))
+        .await
+    {
+        Ok(t) => t,
+        Err(_) => return Ok(YahooTickerResult::NoResults),
+    };
+    
+    // Find first row
+    let first_row = match table
+        .find(Locator::Css("tbody tr"))
+        .await
+    {
+        Ok(row) => row,
+        Err(_) => return Ok(YahooTickerResult::NoResults),
+    };
+    
+    // Extract ticker from first cell
+    let ticker_cell = first_row
+        .find(Locator::Css("td:nth-child(1)"))
+        .await
+        .map_err(|e| anyhow!("Failed to find ticker cell: {}", e))?;
+    
+    let ticker = ticker_cell
+        .text()
+        .await
+        .map_err(|e| anyhow!("Failed to get ticker text: {}", e))?
+        .trim()
+        .to_string();
+    
+    if ticker.is_empty() {
+        Ok(YahooTickerResult::NotFound)
+    } else {
+        Ok(YahooTickerResult::Found(ticker))
+    }
+}
+
+pub async fn get_all_tickers_from_companies_jsonl(paths: &DataPaths) -> anyhow::Result<Vec<String>> {
+    let corporate_path = paths.data_dir().join("corporate").join("by_name");
+    let companies_file = corporate_path.join("companies.jsonl");
+    let content = tokio::fs::read_to_string(companies_file).await?;
+    let mut tickers = Vec::new();
+    for line in content.lines() {
+        let company: CompanyCrossPlatformInfo = serde_json::from_str(line)?;
+        for (_isin, ticker_vec) in company.isin_tickers_map {
+            tickers.extend(ticker_vec);
+        }
+    }
+    Ok(tickers)
+}
+
+/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
+///
+/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
+/// reject cookies, and extract the events.
+///
+/// # Arguments
+/// * `ticker` - The stock ticker symbol.
+///
+/// # Returns
+/// A vector of CompanyEvent structs on success.
+///
+/// # Errors
+/// Returns an error if the task execution fails, e.g., chromedriver spawn or navigation issues.
+pub async fn fetch_earnings_with_pool(
+    pool: &Arc<ChromeDriverPool>,
+    ticker: &str,
+) -> anyhow::Result<Vec<CompanyEvent>> {
+    let ticker = ticker.to_string();
+    let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
+
+    let ticker_cloned = ticker.clone();
+
+    pool.execute(url, move |client| {
+        let ticker = ticker_cloned.clone();
+        Box::pin(async move {
+            reject_yahoo_cookies(&client).await?;
+            extract_earnings_events(&client, &ticker).await
+        })
+    }).await
+}
+
+/// Extracts earnings events from the currently loaded Yahoo Finance earnings calendar page.
+///
+/// This function assumes the client is already navigated to the correct URL (e.g., 
+/// https://finance.yahoo.com/calendar/earnings?symbol={ticker}) and cookies are handled.
+///
+/// It waits for the earnings table, extracts rows, parses cells into CompanyEvent structs,
+/// and handles date parsing, float parsing, and optional fields.
+///
+/// # Arguments
+/// * `client` - The fantoccini Client with the page loaded.
+/// * `ticker` - The stock ticker symbol for the events.
+///
+/// # Returns
+/// A vector of CompanyEvent on success.
+///
+/// # Errors
+/// Returns an error if:
+/// - Table or elements not found.
+/// - Date or float parsing fails.
+/// - WebDriver operations fail.
+///
+/// # Examples
+///
+/// ```no_run
+/// use fantoccini::Client;
+/// use crate::corporate::scraper::extract_earnings;
+///
+/// #[tokio::main]
+/// async fn main() -> Result<()> {
+///     // Assume client is set up and navigated
+///     let events = extract_earnings(&client, "AAPL").await?;
+///     Ok(())
+/// }
+/// ```
+pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEvent>> {
+    // Wait for the table to load
+    let table = client
+        .wait()
+        .for_element(Locator::Css(r#"table[data-test="cal-table"]"#))
+        .await
+        .map_err(|e| anyhow!("Failed to find earnings table: {}", e))?;
+
+    // Find all rows in tbody
+    let rows = table
+        .find_all(Locator::Css("tbody tr"))
+        .await
+        .map_err(|e| anyhow!("Failed to find table rows: {}", e))?;
+
+    let mut events = Vec::with_capacity(rows.len());
+
+    for row in rows {
+        let cells = row
+            .find_all(Locator::Css("td"))
+            .await
+            .map_err(|e| anyhow!("Failed to find cells in row: {}", e))?;
+
+        if cells.len() < 5 {
+            continue; // Skip incomplete rows
+        }
+
+        // Extract and parse date
+        let date_str = cells[0]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get date text: {}", e))?;
+        let date = parse_yahoo_date(&date_str)
+            .map_err(|e| anyhow!("Failed to parse date '{}': {}", date_str, e))?
+            .format("%Y-%m-%d")
+            .to_string();
+
+        // Extract time, replace "Time Not Supplied" with empty
+        let time = cells[1]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get time text: {}", e))?
+            .replace("Time Not Supplied", "");
+
+        // Extract period
+        let period = cells[2]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get period text: {}", e))?;
+
+        // Parse EPS forecast
+        let eps_forecast_str = cells[3]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get EPS forecast text: {}", e))?;
+        let eps_forecast = parse_float(&eps_forecast_str);
+
+        // Parse EPS actual
+        let eps_actual_str = cells[4]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get EPS actual text: {}", e))?;
+        let eps_actual = parse_float(&eps_actual_str);
+
+        // Parse surprise % if available
+        let surprise_pct = if cells.len() > 5 {
+            let surprise_str = cells[5]
+                .text()
+                .await
+                .map_err(|e| anyhow!("Failed to get surprise text: {}", e))?;
+            parse_float(&surprise_str)
+        } else {
+            None
+        };
+
+        events.push(CompanyEvent {
+            ticker: ticker.to_string(),
+            date,
+            time,
+            period,
+            eps_forecast,
+            eps_actual,
+            revenue_forecast: None,
+            revenue_actual: None,
+            surprise_pct,
+            source: "Yahoo".to_string(),
+        });
+    }
+
+    if events.is_empty() {
+        eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
+    } else {
+        println!("Extracted {} earnings events for {}", events.len(), ticker);
+    }
+
+    Ok(events)
+}
+
+/// Rejecting Yahoo Cookies
+async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
+    for _ in 0..10 {
+        let clicked: bool = client
+        .execute(
+            r#"(() => {
+            const btn = document.querySelector('#consent-page .reject-all');
+            if (btn) {
+                btn.click();
+                return true;
+            }
+            return false;
+            })()"#,
+            vec![],
+        )
+        .await?
+        .as_bool()
+        .unwrap_or(false);
+
+        if clicked { break; }
+        sleep(TokioDuration::from_millis(500)).await;
+    }
+
+    logger::log_info("Rejected Yahoo cookies if button existed").await;
+    Ok(())
+}
--- a/src/economic/storage.rs
+++ b/src/economic/storage.rs
@@ -116,7 +116,7 @@ pub async fn build_event_index(chunks: &[ChunkInfo]) -> anyhow::Result<Vec<Event
    Ok(index)
 }

-/// NEW: Look up a specific event by loading only its chunk
+/// Look up a specific event by loading only its chunk
 pub async fn lookup_event_by_key(key: &str, index: &[EventIndex]) -> anyhow::Result<Option<EconomicEvent>> {
    // Find which chunk contains this event
    let entry = index.iter().find(|e| e.key == key);
--- a/src/main.rs
+++ b/src/main.rs
@@ -14,11 +14,6 @@ use util::directories::DataPaths;
 use util::{logger, opnv};
 use std::sync::Arc;

-/// Application entry point
-// src/main.rs
-
-// ... existing imports ...
-
 #[tokio::main]
 async fn main() -> Result<()> {
    cleanup_all_proxy_containers().await.ok();
@@ -138,7 +133,7 @@ async fn main() -> Result<()> {
            std::process::exit(0);
        });
    }
-
+    
    // === Step 4: Run the actual scraping jobs ===
    logger::log_info("--- Starting ECONOMIC data update ---").await;
    economic::run_full_update(&config, &pool).await?;
@@ -161,9 +156,4 @@ async fn main() -> Result<()> {

    logger::log_info("=== Application finished successfully ===").await;
    Ok(())
-}
-
-/* 
-memory allocation of 4294967296 bytes failed
-error: process didn't exit successfully: `target\debug\event_backtest_engine.exe` (exit code: 0xc0000409, STATUS_STACK_BUFFER_OVERRUN)
-*/
+}