removed claudes md

added cross compatiblity between shutdown flag and state entries
removed crossplatformcompany from types
2026-01-15 00:23:29 +01:00 · 2026-01-15 00:22:55 +01:00 · 2026-01-14 14:49:00 +01:00 · 2026-01-14 14:28:16 +01:00 · 2026-01-12 23:03:01 +01:00 · 2026-01-12 22:23:34 +01:00
55 changed files with 15516 additions and 3415 deletions
--- a/.env.example
+++ b/.env.example
@@ -3,46 +3,40 @@
 # This file configures the behavior of the WebScraper application
 # Copy to .env and adjust values as needed

-# ===== ECONOMIC DATA =====
-# Start date for economic event scraping
+OPENFIGI_API_KEY=
+
+# Economic calendar start (usually the earliest available on finanzen.net)
 ECONOMIC_START_DATE=2007-02-13

-# How far into the future to look ahead for economic events (in months)
-ECONOMIC_LOOKAHEAD_MONTHS=3
-
-# ===== CORPORATE DATA =====
-# Start date for corporate earnings/data scraping
+# Corporate earnings & price history start
 CORPORATE_START_DATE=2010-01-01

-# ===== PERFORMANCE & CONCURRENCY =====
-# Maximum number of parallel ChromeDriver instances
-# Higher = more concurrent tasks, but higher resource usage
-MAX_PARALLEL_INSTANCES=3
+# How far into the future we scrape economic events (in months)
+ECONOMIC_LOOKAHEAD_MONTHS=3

-# Maximum tasks per ChromeDriver instance before recycling
-# 0 = unlimited (instance lives for entire application runtime)
-MAX_TASKS_PER_INSTANCE=0
+# Maximum number of parallel scraping tasks (default: 4)
+MAX_PARALLEL_INSTANCES=10

 # ===== VPN ROTATION (ProtonVPN Integration) =====
 # Enable automatic VPN rotation between sessions?
 # If false, all traffic goes through system without VPN tunneling
-ENABLE_VPN_ROTATION=false
-
-# Comma-separated list of ProtonVPN servers to rotate through
-# Examples:
-#   "US-Free#1,US-Free#2,UK-Free#1"
-#   "US,UK,JP,DE,NL"
-# NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect
-VPN_SERVERS=
+ENABLE_VPN_ROTATION=true

 # Number of tasks per VPN session before rotating to new server/IP
 # 0 = rotate between economic and corporate phases (one phase = one IP)
 # 5 = rotate every 5 tasks
 # NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect
-TASKS_PER_VPN_SESSION=0
+TASKS_PER_VPN_SESSION=50

 # ===== LOGGING =====
 # Set via RUST_LOG environment variable:
 #   RUST_LOG=info cargo run
 #   RUST_LOG=debug cargo run
 # Leave empty or unset for default logging level
+
+
+MAX_REQUESTS_PER_SESSION=25
+MIN_REQUEST_INTERVAL_MS=300
+MAX_RETRY_ATTEMPTS=3
+
+PROXY_INSTANCES_PER_CERTIFICATE=2
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,8 @@ target/
 **/*.zip
 **/*.log
 **/*.ovpn
+**/*.tmp
+**/*.txt

 #/economic_events*
 #/economic_event_changes*
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -110,6 +110,17 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "async-trait"
+version = "0.1.89"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.110",
+]
+
 [[package]]
 name = "atomic-waker"
 version = "1.1.2"
@@ -122,6 +133,64 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"

+[[package]]
+name = "axum"
+version = "0.7.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
+dependencies = [
+ "async-trait",
+ "axum-core",
+ "base64 0.22.1",
+ "bytes",
+ "futures-util",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "hyper 1.8.1",
+ "hyper-util",
+ "itoa",
+ "matchit",
+ "memchr",
+ "mime",
+ "percent-encoding",
+ "pin-project-lite",
+ "rustversion",
+ "serde",
+ "serde_json",
+ "serde_path_to_error",
+ "serde_urlencoded",
+ "sha1",
+ "sync_wrapper",
+ "tokio",
+ "tokio-tungstenite 0.24.0",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "axum-core"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199"
+dependencies = [
+ "async-trait",
+ "bytes",
+ "futures-util",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "http-body-util",
+ "mime",
+ "pin-project-lite",
+ "rustversion",
+ "sync_wrapper",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+]
+
 [[package]]
 name = "base64"
 version = "0.21.7"
@@ -660,34 +729,6 @@ dependencies = [
 "windows-sys 0.61.2",
 ]

-[[package]]
-name = "event_backtest_engine"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "chrono",
- "csv",
- "dotenvy",
- "fantoccini",
- "flate2",
- "futures",
- "once_cell",
- "rand 0.9.2",
- "rayon",
- "regex",
- "reqwest",
- "scraper",
- "serde",
- "serde_json",
- "tokio",
- "toml",
- "tracing",
- "tracing-subscriber",
- "url",
- "yfinance-rs",
- "zip",
-]
-
 [[package]]
 name = "fantoccini"
 version = "0.20.0"
@@ -1099,6 +1140,7 @@ dependencies = [
 "http 1.3.1",
 "http-body 1.0.1",
 "httparse",
+ "httpdate",
 "itoa",
 "pin-project-lite",
 "pin-utils",
@@ -1522,6 +1564,12 @@ dependencies = [
 "regex-automata",
 ]

+[[package]]
+name = "matchit"
+version = "0.7.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
+
 [[package]]
 name = "memchr"
 version = "2.7.6"
@@ -2417,9 +2465,9 @@ checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"

 [[package]]
 name = "rustix"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
+checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
 dependencies = [
 "bitflags",
 "errno",
@@ -2530,6 +2578,15 @@ version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"

+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.28"
@@ -2676,10 +2733,21 @@ dependencies = [
 ]

 [[package]]
-name = "serde_spanned"
-version = "1.0.3"
+name = "serde_path_to_error"
+version = "0.1.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e24345aa0fe688594e73770a5f6d1b216508b4f93484c0026d521acd30134392"
+checksum = "10a9ff822e371bb5403e391ecd83e182e0e77ba7f6fe0160b795797109d1b457"
+dependencies = [
+ "itoa",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "serde_spanned"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776"
 dependencies = [
 "serde_core",
 ]
@@ -2915,9 +2983,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"

 [[package]]
 name = "tempfile"
-version = "3.23.0"
+version = "3.24.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c"
 dependencies = [
 "fastrand",
 "getrandom 0.3.4",
@@ -3100,6 +3168,30 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "tokio-tungstenite"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c83b561d025642014097b66e6c1bb422783339e0909e4429cde4749d1990bc38"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite 0.21.0",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "edc5f74e248dc973e0dbb7b74c7e0d6fcc301c694ff50049504004ef4d0cdcd9"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite 0.24.0",
+]
+
 [[package]]
 name = "tokio-tungstenite"
 version = "0.28.0"
@@ -3113,7 +3205,7 @@ dependencies = [
 "rustls-pki-types",
 "tokio",
 "tokio-rustls 0.26.4",
- "tungstenite",
+ "tungstenite 0.28.0",
 ]

 [[package]]
@@ -3131,9 +3223,9 @@ dependencies = [

 [[package]]
 name = "toml"
-version = "0.9.8"
+version = "0.9.11+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dc8b1fb61449e27716ec0e1bdf0f6b8f3e8f6b05391e8497b8b6d7804ea6d8"
+checksum = "f3afc9a848309fe1aaffaed6e1546a7a14de1f935dc9d89d32afd9a44bab7c46"
 dependencies = [
 "indexmap",
 "serde_core",
@@ -3146,9 +3238,9 @@ dependencies = [

 [[package]]
 name = "toml_datetime"
-version = "0.7.3"
+version = "0.7.5+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533"
+checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347"
 dependencies = [
 "serde_core",
 ]
@@ -3167,18 +3259,18 @@ dependencies = [

 [[package]]
 name = "toml_parser"
-version = "1.0.4"
+version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
+checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44"
 dependencies = [
 "winnow",
 ]

 [[package]]
 name = "toml_writer"
-version = "1.0.4"
+version = "1.0.6+spec-1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df8b2b54733674ad286d16267dcfc7a71ed5c776e4ac7aa3c3e2561f7c637bf2"
+checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607"

 [[package]]
 name = "tower"
@@ -3193,6 +3285,7 @@ dependencies = [
 "tokio",
 "tower-layer",
 "tower-service",
+ "tracing",
 ]

 [[package]]
@@ -3231,6 +3324,7 @@ version = "0.1.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
 dependencies = [
+ "log",
 "pin-project-lite",
 "tracing-attributes",
 "tracing-core",
@@ -3292,6 +3386,43 @@ version = "0.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"

+[[package]]
+name = "tungstenite"
+version = "0.21.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ef1a641ea34f399a848dea702823bbecfb4c486f911735368f1f137cb8257e1"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "data-encoding",
+ "http 1.3.1",
+ "httparse",
+ "log",
+ "rand 0.8.5",
+ "sha1",
+ "thiserror 1.0.69",
+ "url",
+ "utf-8",
+]
+
+[[package]]
+name = "tungstenite"
+version = "0.24.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "18e5b8366ee7a95b16d32197d0b2604b43a0be89dc5fac9f8e96ccafbaedda8a"
+dependencies = [
+ "byteorder",
+ "bytes",
+ "data-encoding",
+ "http 1.3.1",
+ "httparse",
+ "log",
+ "rand 0.8.5",
+ "sha1",
+ "thiserror 1.0.69",
+ "utf-8",
+]
+
 [[package]]
 name = "tungstenite"
 version = "0.28.0"
@@ -3353,6 +3484,12 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "urlencoding"
+version = "2.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
+
 [[package]]
 name = "utf-8"
 version = "0.7.6"
@@ -3371,6 +3508,7 @@ version = "1.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
 dependencies = [
+ "getrandom 0.3.4",
 "js-sys",
 "wasm-bindgen",
 ]
@@ -3393,6 +3531,16 @@ version = "0.9.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"

+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
 [[package]]
 name = "want"
 version = "0.3.1"
@@ -3495,6 +3643,40 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "web_scraper"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "axum",
+ "chrono",
+ "csv",
+ "dotenvy",
+ "fantoccini",
+ "flate2",
+ "futures",
+ "once_cell",
+ "rand 0.9.2",
+ "rayon",
+ "regex",
+ "reqwest",
+ "scraper",
+ "serde",
+ "serde_json",
+ "sha2",
+ "tokio",
+ "tokio-tungstenite 0.21.0",
+ "toml",
+ "tracing",
+ "tracing-subscriber",
+ "url",
+ "urlencoding",
+ "uuid",
+ "walkdir",
+ "yfinance-rs",
+ "zip",
+]
+
 [[package]]
 name = "webdriver"
 version = "0.50.0"
@@ -3524,6 +3706,15 @@ dependencies = [
 "rustls-pki-types",
 ]

+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "windows-core"
 version = "0.62.2"
@@ -3800,7 +3991,7 @@ dependencies = [
 "serde_json",
 "thiserror 2.0.17",
 "tokio",
- "tokio-tungstenite",
+ "tokio-tungstenite 0.28.0",
 "url",
 ]

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
-name = "event_backtest_engine"
+name = "web_scraper"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 authors = ["Your Name <you@example.com>"]
 description = "High-impact economic & corporate earnings data collector for short-event backtesting (overnight/weekend gaps)"
 license = "MIT OR Apache-2.0"
@@ -17,11 +17,12 @@ categories = ["finance", "data-structures", "asynchronous"]
 tokio = { version = "1.38", features = ["full"] }

 # Web scraping & HTTP
-reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "blocking"] }
+reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "blocking", "socks", "cookies"] }
 scraper = "0.19"                    # HTML parsing for Yahoo earnings pages
 fantoccini = { version = "0.20", features = ["rustls-tls"] }  # Headless Chrome for finanzen.net
 yfinance-rs = "0.7.2"
 url = "2.5.7"
+urlencoding = "2.1"

 # Serialization
 serde = { version = "1.0", features = ["derive"] }
@@ -30,8 +31,9 @@ csv = "1.3"
 zip = "6.0.0"
 flate2 = "1.1.5"

-# 
+# Formatting
 regex = "1.12.2"
+walkdir = "2"

 # Generating
 rand = "0.9.2"
@@ -53,4 +55,15 @@ once_cell = "1.21.3"

 # Parallel processing (for batch tickers)
 futures = "0.3"
-rayon = "1.10"  # optional: for parallel price downloads
+rayon = "1.10"  # optional: for parallel price downloads
+
+# Web server for dashboard
+axum = { version = "0.7", features = ["ws"] }
+tokio-tungstenite = "0.21"  # For WebSocket support
+
+# tests
+#tempfile = "3.24.0"
+
+# data integrity
+sha2 = "0.10.9"
+uuid = { version = "1.0", features = ["v4", "v7"] }
--- a/README.md
+++ b/README.md
@@ -248,4 +248,8 @@ Der Scraper unterstützt 52 Länder und Regionen (siehe `countries.json`), darun
 ## chromedriver Download

 https://chromedriver.storage.googleapis.com/index.html
-https://googlechromelabs.github.io/chrome-for-testing/
+https://googlechromelabs.github.io/chrome-for-testing/
+
+## Gaphviz.org Download
+
+https://graphviz.org/download/
--- a/data_updating_rule.md
+++ b/data_updating_rule.md
@@ -0,0 +1,25 @@
+# Abort-Safe Incremental JSONL Persistence Rule
+
+**Rule:** Persist state using an *append-only, fsync-backed JSONL log with atomic checkpoints*.
+
+**Requirements**
+- Write updates as **single-line JSON objects** (one logical mutation per line).
+- **Append only** (`O_APPEND`), never modify existing lines.
+- After each write batch, call **`fsync`** (or `File::sync_data`) before reporting success.
+- Treat a **line as committed only if it ends with `\n`**; ignore trailing partial lines on recovery.
+- Periodically create a **checkpoint**:
+  - Write full state to `state.tmp`
+  - `fsync`
+  - **Atomic rename** to `state.jsonl`
+- On startup:
+  - Load last checkpoint
+  - Replay log lines after it in order
+- On abort/panic/crash:
+  - No truncation
+  - Replay guarantees no data loss beyond last fsynced line
+
+**Outcome**
+- Crash/abort-safe
+- O(1) writes
+- Deterministic recovery
+- Minimal overhead
--- a/event_backtest_engine.exe
+++ b/event_backtest_engine.exe
--- a/integrity/checkpoint_dependencies.dot
+++ b/integrity/checkpoint_dependencies.dot
@@ -0,0 +1,28 @@
+digraph Dependencies {
+  rankdir=LR;
+  node [shape=box];
+
+  "yahoo_options_enrichment_complete" [label="yahoo_options_enrichment_complete
+Options data enriched for all companies"];
+  "yahoo_events_enrichment_complete" [label="yahoo_events_enrichment_complete
+Corporate events enriched for all companies"];
+  "yahoo_companies_cleansed_no_data" [label="yahoo_companies_cleansed_no_data
+Companies cleansed of data with no Yahoo results"];
+  "yahoo_chart_enrichment_complete" [label="yahoo_chart_enrichment_complete
+Chart data enriched for all companies"];
+  "enrichment_group" [label="enrichment_group
+Yahoo exchanges collected and validated"];
+  "yahoo_companies_cleansed_low_profile" [label="yahoo_companies_cleansed_low_profile
+Companies cleansed of low profile (insufficient market cap/price data)"];
+  "lei_figi_mapping_complete" [label="lei_figi_mapping_complete
+LEI-to-FIGI mappings from OpenFIGI API"];
+  "securities_data_complete" [label="securities_data_complete
+Securities data built from FIGI mappings"];
+
+  "yahoo_options_enrichment_complete" -> "yahoo_companies_cleansed_low_profile" [label="via group enrichment_group"];
+  "yahoo_events_enrichment_complete" -> "yahoo_companies_cleansed_low_profile" [label="via group enrichment_group"];
+  "yahoo_companies_cleansed_no_data" -> "securities_data_complete";
+  "yahoo_chart_enrichment_complete" -> "yahoo_companies_cleansed_low_profile" [label="via group enrichment_group"];
+  "yahoo_companies_cleansed_low_profile" -> "yahoo_companies_cleansed_no_data";
+  "securities_data_complete" -> "lei_figi_mapping_complete";
+}
--- a/integrity/checkpoint_dependencies.toml
+++ b/integrity/checkpoint_dependencies.toml
@@ -0,0 +1,61 @@
+# checkpoint_dependencies.toml - Complete configuration
+
+# ============================================================================
+# COLLECTION STAGE (No dependencies)
+# ============================================================================
+
+[checkpoints.lei_figi_mapping_complete]
+description = "LEI-to-FIGI mappings from OpenFIGI API"
+depends_on = []
+
+[checkpoints.securities_data_complete]
+description = "Securities data built from FIGI mappings"
+depends_on = ["lei_figi_mapping_complete"]
+
+# ============================================================================
+# CLEANSING STAGE (Depends on collection)
+# ============================================================================
+
+[checkpoints.yahoo_companies_cleansed_no_data]
+description = "Companies cleansed of data with no Yahoo results"
+depends_on = ["securities_data_complete"]
+
+[checkpoints.yahoo_companies_cleansed_low_profile]
+description = "Companies cleansed of low profile (insufficient market cap/price data)"
+depends_on = ["yahoo_companies_cleansed_no_data"]
+
+# ============================================================================
+# ENRICHMENT GROUP (All depend on cleansed companies)
+# ============================================================================
+
+[groups.enrichment_group]
+description = "Yahoo Finance enrichment functions"
+members = [
+    "yahoo_events_enrichment_complete",
+    "yahoo_options_enrichment_complete",
+    "yahoo_chart_enrichment_complete"
+]
+depends_on = ["yahoo_companies_cleansed_low_profile"]
+
+[checkpoints.yahoo_events_enrichment_complete]
+description = "Corporate events enriched for all companies"
+depends_on = []
+group = "enrichment_group"
+
+[checkpoints.yahoo_options_enrichment_complete]
+description = "Options data enriched for all companies"
+depends_on = []
+group = "enrichment_group"
+
+[checkpoints.yahoo_chart_enrichment_complete]
+description = "Chart data enriched for all companies"
+depends_on = []
+group = "enrichment_group"
+
+# ============================================================================
+# SECURITIES PROCESSING (Depends on LEI mapping)
+# ============================================================================
+
+[checkpoints.enrichment_group]
+description = "Yahoo exchanges collected and validated"
+depends_on = []
--- a/src/config.rs
+++ b/src/config.rs
@@ -1,45 +1,35 @@
+// src/config.rs - FIXED VERSION
+
 use anyhow::{Context, Result};
 use chrono::{self};
 use serde::{Deserialize, Serialize};

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Config {
-    // Economic calendar start (usually the earliest available on finanzen.net)
-    pub economic_start_date: String,     // e.g. "2007-02-13"
-    // Corporate earnings & price history start
-    pub corporate_start_date: String,    // e.g. "2000-01-01" or "2010-01-01"
-    // How far into the future we scrape economic events
-    pub economic_lookahead_months: u32,  // default: 3
-    /// Maximum number of parallel scraping tasks (default: 10).
-    /// This limits concurrency to protect system load and prevent website spamming.
+    pub economic_start_date: String,
+    pub corporate_start_date: String,
+    pub economic_lookahead_months: u32,
+    
    #[serde(default = "default_max_parallel_instances")]
    pub max_parallel_instances: usize,

    pub max_tasks_per_instance: usize,

-    /// VPN rotation configuration
-    /// If set to "true", enables automatic VPN rotation between sessions
-    #[serde(default)]
+    #[serde(default = "default_enable_vpn_rotation")]
    pub enable_vpn_rotation: bool,

-    /// Comma-separated list of VPN servers/country codes to rotate through.
-    /// Example: "US-Free#1,UK-Free#1,JP-Free#1" or "US,JP,DE"
-    /// If empty, VPN rotation is disabled.
-    #[serde(default)]
-    pub vpn_servers: String,
+    // IMPROVEMENT: Reduzierte Defaults für weniger aggressive Scraping
+    #[serde(default = "default_max_requests_per_session")]
+    pub max_requests_per_session: usize,
+    
+    #[serde(default = "default_min_request_interval_ms")]
+    pub min_request_interval_ms: u64,
+    
+    #[serde(default = "default_max_retry_attempts")]
+    pub max_retry_attempts: u32,

-    /// Number of tasks per session before rotating VPN
-    /// If set to 0, rotates VPN between economic and corporate phases
-    #[serde(default = "default_tasks_per_session")]
-    pub tasks_per_vpn_session: usize,
-}
-
-fn default_max_parallel_instances() -> usize {
-    10
-}
-
-fn default_tasks_per_session() -> usize {
-    0 // 0 = rotate between economic/corporate
+    #[serde(default = "default_proxy_instances_per_certificate")]
+    pub proxy_instances_per_certificate: Option<usize>,
 }

 impl Default for Config {
@@ -50,28 +40,40 @@ impl Default for Config {
            economic_lookahead_months: 3,
            max_parallel_instances: default_max_parallel_instances(),
            max_tasks_per_instance: 0,
+            max_requests_per_session: default_max_requests_per_session(),
+            min_request_interval_ms: default_min_request_interval_ms(),
+            max_retry_attempts: default_max_retry_attempts(),
            enable_vpn_rotation: false,
-            vpn_servers: String::new(),
-            tasks_per_vpn_session: default_tasks_per_session(),
+            proxy_instances_per_certificate: default_proxy_instances_per_certificate(),
        }
    }
 }

+fn default_enable_vpn_rotation() -> bool {
+    false
+}
+
+fn default_max_parallel_instances() -> usize {
+    4
+}
+
+fn default_max_requests_per_session() -> usize { 
+    10
+}
+
+fn default_min_request_interval_ms() -> u64 { 
+    1200
+}
+
+fn default_max_retry_attempts() -> u32 { 3 }
+
+fn default_proxy_instances_per_certificate() -> Option<usize> {
+    Some(1)
+}
+
 impl Config {
-    /// Loads the configuration from environment variables using dotenvy.
-    ///
-    /// This function loads a `.env` file if present (via `dotenvy::dotenv()`),
-    /// then retrieves each configuration value from environment variables.
-    /// If a variable is missing, it falls back to the default value.
-    /// Variable names are uppercase with underscores (e.g., ECONOMIC_START_DATE).
-    ///
-    /// # Returns
-    /// The loaded Config on success.
-    ///
-    /// # Errors
-    /// Returns an error if parsing fails (e.g., invalid integer for lookahead months).
+    /// Loads configuration from environment variables using dotenvy.
    pub fn load() -> Result<Self> {
-        // Load .env file if it exists; ignore if not found (dotenvy::dotenv returns Ok if no file)
        let _ = dotenvy::dotenv().context("Failed to load .env file (optional)")?;

        let economic_start_date = dotenvy::var("ECONOMIC_START_DATE")
@@ -85,13 +87,14 @@ impl Config {
            .parse()
            .context("Failed to parse ECONOMIC_LOOKAHEAD_MONTHS as u32")?;

+        // IMPROVEMENT: Reduzierte Defaults
        let max_parallel_instances: usize = dotenvy::var("MAX_PARALLEL_INSTANCES")
-            .unwrap_or_else(|_| "10".to_string())
+            .unwrap_or_else(|_| "4".to_string())  // Geändert von 10
            .parse()
            .context("Failed to parse MAX_PARALLEL_INSTANCES as usize")?;

        let max_tasks_per_instance: usize = dotenvy::var("MAX_TASKS_PER_INSTANCE")
-            .unwrap_or_else(|_| "0".to_string())
+            .unwrap_or_else(|_| "5".to_string())  // Geändert von 0
            .parse()
            .context("Failed to parse MAX_TASKS_PER_INSTANCE as usize")?;

@@ -100,13 +103,25 @@ impl Config {
            .parse::<bool>()
            .context("Failed to parse ENABLE_VPN_ROTATION as bool")?;

-        let vpn_servers = dotenvy::var("VPN_SERVERS")
-            .unwrap_or_else(|_| String::new());
-
-        let tasks_per_vpn_session: usize = dotenvy::var("TASKS_PER_VPN_SESSION")
-            .unwrap_or_else(|_| "0".to_string())
+        let max_requests_per_session: usize = dotenvy::var("MAX_REQUESTS_PER_SESSION")
+            .unwrap_or_else(|_| "10".to_string())  // Geändert von 25
            .parse()
-            .context("Failed to parse TASKS_PER_VPN_SESSION as usize")?;
+            .context("Failed to parse MAX_REQUESTS_PER_SESSION as usize")?;
+
+        let min_request_interval_ms: u64 = dotenvy::var("MIN_REQUEST_INTERVAL_MS")
+            .unwrap_or_else(|_| "1200".to_string())  // Geändert von 300
+            .parse()
+            .context("Failed to parse MIN_REQUEST_INTERVAL_MS as u64")?;
+
+        let max_retry_attempts: u32 = dotenvy::var("MAX_RETRY_ATTEMPTS")
+            .unwrap_or_else(|_| "3".to_string())
+            .parse()
+            .context("Failed to parse MAX_RETRY_ATTEMPTS as u32")?;
+
+        let proxy_instances_per_certificate: Option<usize> = match dotenvy::var("PROXY_INSTANCES_PER_CERTIFICATE") {
+            Ok(val) => Some(val.parse().context("Failed to parse PROXY_INSTANCES_PER_CERTIFICATE as usize")?),
+            Err(_) => Some(1),
+        };

        Ok(Self {
            economic_start_date,
@@ -115,8 +130,10 @@ impl Config {
            max_parallel_instances,
            max_tasks_per_instance,
            enable_vpn_rotation,
-            vpn_servers,
-            tasks_per_vpn_session,
+            max_requests_per_session,
+            min_request_interval_ms,
+            max_retry_attempts,
+            proxy_instances_per_certificate,
        })
    }

--- a/src/corporate/aggregation.rs
+++ b/src/corporate/aggregation.rs
@@ -1,195 +0,0 @@
-// src/corporate/aggregation.rs
-use super::types::CompanyPrice;
-use super::storage::*;
-use crate::util::directories::DataPaths;
-use tokio::fs;
-use std::collections::HashMap;
-
-#[derive(Debug)]
-struct DayData {
-    sources: Vec<(CompanyPrice, String)>, // (price, source_ticker)
-    total_volume: u64,
-    vwap: f64,
-    open: f64,
-    high: f64,
-    low: f64,
-    close: f64,
-}
-
-/// Aggregate price data from multiple exchanges, converting all to USD
-pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> {
-    let company_dir = get_company_dir(paths, lei);
-
-    for timeframe in ["daily", "5min"].iter() {
-        let source_dir = company_dir.join(timeframe);
-        if !source_dir.exists() {
-            continue;
-        }
-
-        let mut all_prices: Vec<(CompanyPrice, String)> = Vec::new();
-        let mut by_date_time: HashMap<String, DayData> = HashMap::new();
-
-        // Load all sources with their ticker names
-        let mut entries = tokio::fs::read_dir(&source_dir).await?;
-        let mut source_count = 0;
-        let mut sources_used = std::collections::HashSet::new();
-        
-        while let Some(entry) = entries.next_entry().await? {
-            let source_dir_path = entry.path();
-            if !source_dir_path.is_dir() { continue; }
-            
-            let source_ticker = source_dir_path
-                .file_name()
-                .and_then(|n| n.to_str())
-                .unwrap_or("unknown")
-                .to_string();
-            
-            let prices_path = source_dir_path.join("prices.json");
-            if !prices_path.exists() { continue; }
-
-            let content = tokio::fs::read_to_string(&prices_path).await?;
-            let mut prices: Vec<CompanyPrice> = serde_json::from_str(&content)?;
-            
-            if !prices.is_empty() {
-                sources_used.insert(source_ticker.clone());
-                source_count += 1;
-            }
-            
-            for price in prices {
-                all_prices.push((price, source_ticker.clone()));
-            }
-        }
-
-        if all_prices.is_empty() {
-            continue;
-        }
-
-        println!("    Aggregating from {} exchanges: {}", 
-            sources_used.len(),
-            sources_used.iter()
-                .map(|s| s.as_str())
-                .collect::<Vec<_>>()
-                .join(", ")
-        );
-
-        // Group by date + time (for 5min) or just date
-        for (p, source) in all_prices {
-            let key = if timeframe == &"5min" && !p.time.is_empty() {
-                format!("{}_{}", p.date, p.time)
-            } else {
-                p.date.clone()
-            };
-
-            // Convert to USD immediately
-            let usd_rate = super::fx::get_usd_rate(&p.currency).await.unwrap_or(1.0);
-            
-            let mut p_usd = p.clone();
-            p_usd.open *= usd_rate;
-            p_usd.high *= usd_rate;
-            p_usd.low *= usd_rate;
-            p_usd.close *= usd_rate;
-            p_usd.adj_close *= usd_rate;
-            p_usd.currency = "USD".to_string();
-
-            let entry = by_date_time.entry(key.clone()).or_insert(DayData {
-                sources: vec![],
-                total_volume: 0,
-                vwap: 0.0,
-                open: p_usd.open,
-                high: p_usd.high,
-                low: p_usd.low,
-                close: p_usd.close,
-            });
-
-            let volume = p.volume.max(1); // avoid div0
-            let vwap_contrib = p_usd.close * volume as f64;
-
-            entry.sources.push((p_usd.clone(), source));
-            entry.total_volume += volume;
-            entry.vwap += vwap_contrib;
-
-            // Use first open, last close, max high, min low
-            if entry.sources.len() == 1 {
-                entry.open = p_usd.open;
-            }
-            entry.close = p_usd.close;
-            entry.high = entry.high.max(p_usd.high);
-            entry.low = entry.low.min(p_usd.low);
-        }
-
-        // Finalize aggregated data
-        let mut aggregated: Vec<CompanyPrice> = Vec::new();
-
-        for (key, data) in by_date_time {
-            let vwap = data.vwap / data.total_volume as f64;
-
-            let (date, time) = if key.contains('_') {
-                let parts: Vec<&str> = key.split('_').collect();
-                (parts[0].to_string(), parts[1].to_string())
-            } else {
-                (key, "".to_string())
-            };
-
-            // Track which exchange contributed most volume
-            let best_source = data.sources.iter()
-                .max_by_key(|(p, _)| p.volume)
-                .map(|(_, src)| src.clone())
-                .unwrap_or_else(|| "unknown".to_string());
-
-            aggregated.push(CompanyPrice {
-                ticker: format!("{lei}@agg"), // Mark as aggregated
-                date,
-                time,
-                open: data.open,
-                high: data.high,
-                low: data.low,
-                close: data.close,
-                adj_close: vwap,
-                volume: data.total_volume,
-                currency: "USD".to_string(),
-            });
-        }
-
-        aggregated.sort_by_key(|p| (p.date.clone(), p.time.clone()));
-
-        // Save aggregated result
-        let agg_dir = company_dir.join("aggregated").join(timeframe);
-        fs::create_dir_all(&agg_dir).await?;
-        let path = agg_dir.join("prices.json");
-        fs::write(&path, serde_json::to_string_pretty(&aggregated)?).await?;
-        
-        // Save aggregation metadata
-        let meta = AggregationMetadata {
-            lei: lei.to_string(),  // ← CHANGE THIS
-            timeframe: timeframe.to_string(),
-            sources: sources_used.into_iter().collect(),
-            total_bars: aggregated.len(),
-            date_range: (
-                aggregated.first().map(|p| p.date.clone()).unwrap_or_default(),
-                aggregated.last().map(|p| p.date.clone()).unwrap_or_default(),
-            ),
-            aggregated_at: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
-        };
-                
-        let meta_path = agg_dir.join("metadata.json");
-        fs::write(&meta_path, serde_json::to_string_pretty(&meta)?).await?;
-        
-        println!("    ✓ {} {} bars from {} sources (USD)", 
-            aggregated.len(), 
-            timeframe,
-            source_count
-        );
-    }
-
-    Ok(())
-}
-
-#[derive(Debug, serde::Serialize, serde::Deserialize)]
-struct AggregationMetadata {
-    lei: String,
-    timeframe: String,
-    sources: Vec<String>,
-    total_bars: usize,
-    date_range: (String, String),
-    aggregated_at: String,
-}
--- a/src/corporate/bond_processing.rs
+++ b/src/corporate/bond_processing.rs
@@ -0,0 +1,273 @@
+// src/corporate/bond_processing.rs
+// Bond-specific processing logic for corporate and government bonds
+
+use super::types::*;
+
+/// Parse bond details from ticker and security description
+/// 
+/// Examples:
+/// - "WTFC 4.3 01/12/26 0003" -> coupon: 4.3, maturity: 2026-01-12
+/// - "SLOVAK 1.5225 05/10/28 4Y" -> coupon: 1.5225, maturity: 2028-05-10
+/// - "SEK Float 06/30/34" -> floating rate, maturity: 2034-06-30
+/// - "GGB 0 10/15/42" -> zero coupon, maturity: 2042-10-15
+pub fn parse_bond_details(ticker: &str, security_description: &str) -> BondDetails {
+    let mut details = BondDetails {
+        coupon_rate: None,
+        maturity_date: None,
+        is_floating: false,
+        is_zero_coupon: false,
+        tenor_years: None,
+        series_identifier: None,
+    };
+    
+    // Check for floating rate - look for "Float", " F ", "V0" patterns
+    if ticker.contains("Float") || ticker.contains(" F ") || ticker.contains(" V0 ")
+        || security_description.contains("Float") {
+        details.is_floating = true;
+    }
+    
+    // Parse coupon rate if not floating
+    if !details.is_floating {
+        if let Some(coupon) = extract_coupon_rate(ticker, security_description) {
+            details.coupon_rate = Some(coupon);
+            details.is_zero_coupon = coupon == 0.0;
+        }
+    }
+    
+    // Parse maturity date
+    if let Some(maturity) = extract_maturity_date(ticker, security_description) {
+        details.maturity_date = Some(maturity.clone());
+        
+        // Calculate tenor (simplified - just extract year)
+        if let Some(year_str) = maturity.split('-').next() {
+            if let Ok(mat_year) = year_str.parse::<i32>() {
+                let current_year = 2026; // From system prompt
+                let years_to_maturity = (mat_year - current_year).max(0) as u32;
+                details.tenor_years = Some(years_to_maturity);
+            }
+        }
+    }
+    
+    // Extract series identifier
+    details.series_identifier = extract_series_identifier(ticker);
+    
+    details
+}
+
+/// Extract coupon rate from ticker/description
+/// Handles: "4.3", "1.5225", "12 1/2" (fractional), "0"
+fn extract_coupon_rate(ticker: &str, description: &str) -> Option<f64> {
+    let text = format!("{} {}", ticker, description);
+    
+    // Pattern 1: Fractional rates like "12 1/2" -> 12.5
+    if let Some(frac_result) = parse_fractional_coupon(&text) {
+        return Some(frac_result);
+    }
+    
+    // Pattern 2: Decimal rates like "4.3" or "1.5225"
+    // Look for number followed by space and date pattern
+    let parts: Vec<&str> = text.split_whitespace().collect();
+    for i in 0..parts.len() {
+        if let Ok(rate) = parts[i].parse::<f64>() {
+            // Sanity check: coupon rates are typically 0-20%
+            if rate >= 0.0 && rate <= 20.0 {
+                // Make sure it's before a date-like pattern
+                if i + 1 < parts.len() {
+                    let next = parts[i + 1];
+                    if next.contains('/') || next.len() >= 8 {
+                        return Some(rate);
+                    }
+                }
+            }
+        }
+    }
+    
+    None
+}
+
+/// Parse fractional coupon like "12 1/2" -> 12.5
+fn parse_fractional_coupon(text: &str) -> Option<f64> {
+    let parts: Vec<&str> = text.split_whitespace().collect();
+    
+    for i in 0..parts.len().saturating_sub(1) {
+        // Check if current part is a number
+        if let Ok(whole) = parts[i].parse::<f64>() {
+            // Check if next part is a fraction like "1/2"
+            if let Some(slash_pos) = parts[i + 1].find('/') {
+                let frac_str = parts[i + 1];
+                let num_str = &frac_str[..slash_pos];
+                let den_str = &frac_str[slash_pos + 1..];
+                
+                if let (Ok(num), Ok(den)) = (num_str.parse::<f64>(), den_str.parse::<f64>()) {
+                    if den != 0.0 {
+                        return Some(whole + num / den);
+                    }
+                }
+            }
+        }
+    }
+    
+    None
+}
+
+/// Extract maturity date from ticker/description
+/// Handles: "01/12/26", "05/10/28", "06/30/2034"
+fn extract_maturity_date(ticker: &str, description: &str) -> Option<String> {
+    let text = format!("{} {}", ticker, description);
+    
+    // Look for MM/DD/YY or MM/DD/YYYY patterns
+    let parts: Vec<&str> = text.split_whitespace().collect();
+    
+    for part in parts {
+        if let Some(date) = parse_date_pattern(part) {
+            return Some(date);
+        }
+    }
+    
+    None
+}
+
+/// Parse various date formats to YYYY-MM-DD
+fn parse_date_pattern(s: &str) -> Option<String> {
+    let slash_count = s.matches('/').count();
+    
+    if slash_count != 2 {
+        return None;
+    }
+    
+    let parts: Vec<&str> = s.split('/').collect();
+    if parts.len() != 3 {
+        return None;
+    }
+    
+    let month = parts[0];
+    let day = parts[1];
+    let year_part = parts[2];
+    
+    // Parse year - could be 2 or 4 digits
+    let year = if year_part.len() == 2 {
+        if let Ok(yy) = year_part.parse::<u32>() {
+            // Assume 20xx for values <= 50, 19xx for > 50
+            if yy <= 50 {
+                format!("{}", 2000 + yy)
+            } else {
+                format!("{}", 1900 + yy)
+            }
+        } else {
+            return None;
+        }
+    } else if year_part.len() == 4 {
+        year_part.to_string()
+    } else {
+        return None;
+    };
+    
+    // Validate month and day
+    if let (Ok(m), Ok(d)) = (month.parse::<u32>(), day.parse::<u32>()) {
+        if m >= 1 && m <= 12 && d >= 1 && d <= 31 {
+            return Some(format!("{}-{:02}-{:02}", year, m, d));
+        }
+    }
+    
+    None
+}
+
+/// Extract series identifier (tokens after the date)
+/// Examples: "0003", "4Y", "144A", "REGS", "MTN", "PSI", "CD"
+fn extract_series_identifier(ticker: &str) -> Option<String> {
+    let parts: Vec<&str> = ticker.split_whitespace().collect();
+    
+    // Look for date pattern, then take what comes after
+    for i in 0..parts.len() {
+        if parts[i].contains('/') && parts[i].matches('/').count() == 2 {
+            // Found date, check if there's something after
+            if i + 1 < parts.len() {
+                return Some(parts[i + 1].to_string());
+            }
+        }
+    }
+    
+    None
+}
+
+/// Classify government issuer type
+pub fn classify_government_issuer(name: &str) -> String {
+    let name_lower = name.to_lowercase();
+    
+    // Sovereign nations
+    if name_lower.contains("republic") 
+        || name_lower.contains("kingdom")
+        || name_lower.contains("federal republic")
+        || name_lower.ends_with(" govt")
+        || name_lower.ends_with(" government") 
+        || name_lower.contains("hellenic") // Greece
+        || name_lower.contains("slovak") {
+        return "sovereign".to_string();
+    }
+    
+    // Municipalities (Norwegian communes, cities, etc.)
+    if name_lower.contains("kommune") 
+        || name_lower.contains("municipality")
+        || name_lower.contains("city of")
+        || name_lower.contains("town of")
+        || name_lower.contains("county council") {
+        return "municipal".to_string();
+    }
+    
+    // States/Provinces/Regions
+    if name_lower.contains("state of")
+        || name_lower.contains("province")
+        || name_lower.contains("region")
+        || name_lower.contains("county") {
+        return "state".to_string();
+    }
+    
+    // Government agencies/entities
+    if name_lower.contains("export credit")
+        || name_lower.contains("development bank")
+        || name_lower.contains("housing")
+        || name_lower.contains("akademiska")
+        || name_lower.contains("byggdastofnun") {
+        return "agency".to_string();
+    }
+    
+    "other".to_string()
+}
+
+/// Classify government bond type based on security_type
+/// 
+/// Maps OpenFIGI security types to simplified bond categories for government bonds
+/// 
+/// # Examples
+/// - "DOMESTIC" -> "domestic"
+/// - "GLOBAL" -> "global"
+/// - "EURO NON-DOLLAR" -> "euro"
+/// - "DOMESTIC MTN" -> "mtn"
+pub fn classify_government_bond_type(security_type: &str) -> String {
+    let security_type_upper = security_type.to_uppercase();
+    
+    if security_type_upper.contains("GLOBAL") {
+        return "global".to_string();
+    }
+    
+    if security_type_upper.contains("EURO") {
+        if security_type_upper.contains("NON-DOLLAR") || !security_type_upper.contains("DOLLAR") {
+            return "euro".to_string();
+        }
+        return "eurodollar".to_string();
+    }
+    
+    if security_type_upper.contains("YANKEE") {
+        return "yankee".to_string();
+    }
+    
+    if security_type_upper.contains("MTN") {
+        return "mtn".to_string();
+    }
+    
+    if security_type_upper.contains("DOMESTIC") {
+        return "domestic".to_string();
+    }
+    
+    "other".to_string()
+}
--- a/src/corporate/checkpoint_helpers.rs
+++ b/src/corporate/checkpoint_helpers.rs
@@ -0,0 +1,215 @@
+// src/corporate/checkpoint_helpers.rs
+//! Shared helpers for checkpoint-based recovery and logging
+//! 
+//! This module extracts common patterns used across multiple update modules
+//! to reduce code duplication and improve maintainability.
+
+use super::types::CompanyData;
+use crate::util::logger;
+use std::collections::HashMap;
+use std::path::{Path};
+use tokio::fs::{File};
+use tokio::io::{AsyncWriteExt};
+use anyhow::Result;
+
+/// Load companies from checkpoint and replay log for recovery
+/// 
+/// This function implements the checkpoint + write-ahead log pattern:
+/// 1. Loads the main checkpoint file
+/// 2. Replays any pending updates from the log file
+/// 3. Returns the merged state
+pub async fn load_checkpoint_with_log<P1, P2>(
+    checkpoint_path: P1,
+    log_path: P2,
+    checkpoint_desc: &str,
+) -> Result<HashMap<String, CompanyData>> 
+where
+    P1: AsRef<Path>,
+    P2: AsRef<Path>,
+{
+    let checkpoint_path = checkpoint_path.as_ref();
+    let log_path = log_path.as_ref();
+    
+    let mut companies: HashMap<String, CompanyData> = HashMap::new();
+    
+    // Load checkpoint if it exists
+    if checkpoint_path.exists() {
+        logger::log_info(&format!("Loading checkpoint from {}...", checkpoint_desc)).await;
+        let content = tokio::fs::read_to_string(checkpoint_path).await?;
+        
+        for line in content.lines() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<CompanyData>(line) {
+                Ok(company) => {
+                    companies.insert(company.name.clone(), company);
+                }
+                Err(e) => {
+                    logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
+                }
+            }
+        }
+        logger::log_info(&format!("Loaded checkpoint with {} companies", companies.len())).await;
+    }
+    
+    // Replay log if it exists
+    if log_path.exists() {
+        logger::log_info("Replaying update log...").await;
+        let log_content = tokio::fs::read_to_string(log_path).await?;
+        let mut replayed = 0;
+        
+        for line in log_content.lines() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<CompanyData>(line) {
+                Ok(company) => {
+                    companies.insert(company.name.clone(), company);
+                    replayed += 1;
+                }
+                Err(e) => {
+                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
+                }
+            }
+        }
+        
+        if replayed > 0 {
+            logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
+        }
+    }
+    
+    Ok(companies)
+}
+
+/// Consolidate log into checkpoint and clear log
+/// 
+/// Atomically writes all companies to a new checkpoint file and removes the log.
+/// Uses atomic rename to ensure crash safety.
+pub async fn consolidate_checkpoint<P1, P2>(
+    checkpoint_path: P1,
+    log_path: P2,
+    companies: &HashMap<String, CompanyData>,
+) -> Result<()>
+where
+    P1: AsRef<Path>,
+    P2: AsRef<Path>,
+{
+    let checkpoint_path = checkpoint_path.as_ref();
+    let log_path = log_path.as_ref();
+    
+    logger::log_info("Consolidating update log into checkpoint...").await;
+    
+    let temp_checkpoint = checkpoint_path.with_extension("tmp");
+    let mut temp_file = File::create(&temp_checkpoint).await?;
+    
+    for company in companies.values() {
+        let json_line = serde_json::to_string(company)?;
+        temp_file.write_all(json_line.as_bytes()).await?;
+        temp_file.write_all(b"\n").await?;
+    }
+    
+    temp_file.flush().await?;
+    temp_file.sync_data().await?;
+    drop(temp_file);
+    
+    tokio::fs::rename(&temp_checkpoint, checkpoint_path).await?;
+    
+    // Remove log after successful consolidation
+    if log_path.exists() {
+        tokio::fs::remove_file(log_path).await.ok();
+    }
+    
+    logger::log_info(&format!("✓ Consolidated {} companies", companies.len())).await;
+    
+    Ok(())
+}
+
+/// Check if log file has content
+pub async fn log_has_content<P: AsRef<Path>>(log_path: P) -> bool {
+    if let Ok(metadata) = tokio::fs::metadata(log_path.as_ref()).await {
+        metadata.len() > 0
+    } else {
+        false
+    }
+}
+
+/// Load enrichment progress from log file
+/// 
+/// Used by enrichment functions to track which companies have already been processed.
+/// Parses log entries with format: {"company_name": "...", "status": "enriched", ...}
+pub async fn load_enrichment_progress<P>(
+    log_path: P,
+) -> Result<std::collections::HashSet<String>>
+where
+    P: AsRef<Path>,
+{
+    let mut enriched_companies = std::collections::HashSet::new();
+    
+    if !log_path.as_ref().exists() {
+        return Ok(enriched_companies);
+    }
+    
+    logger::log_info("Loading enrichment progress from log...").await;
+    let log_content = tokio::fs::read_to_string(log_path.as_ref()).await?;
+    
+    for line in log_content.lines() {
+        if line.trim().is_empty() || !line.ends_with('}') {
+            continue; // Skip incomplete lines
+        }
+        
+        match serde_json::from_str::<serde_json::Value>(line) {
+            Ok(entry) => {
+                if let Some(name) = entry.get("company_name").and_then(|v| v.as_str()) {
+                    if entry.get("status").and_then(|v| v.as_str()) == Some("enriched") {
+                        enriched_companies.insert(name.to_string());
+                    }
+                }
+            }
+            Err(e) => {
+                logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
+            }
+        }
+    }
+    
+    logger::log_info(&format!(
+        "Loaded {} enriched companies from log",
+        enriched_companies.len()
+    )).await;
+    
+    Ok(enriched_companies)
+}
+
+/// Count enriched companies by checking for data files
+/// 
+/// Walks through the corporate directory and counts companies that have
+/// a data file in the specified subdirectory (e.g., "events", "options", "chart").
+pub async fn count_enriched_companies(
+    paths: &crate::util::directories::DataPaths,
+    data_type: &str,
+) -> Result<usize> {
+    let corporate_dir = paths.corporate_dir();
+    
+    if !corporate_dir.exists() {
+        return Ok(0);
+    }
+    
+    let mut count = 0;
+    let mut entries = tokio::fs::read_dir(&corporate_dir).await?;
+    
+    while let Some(entry) = entries.next_entry().await? {
+        let path = entry.path();
+        if path.is_dir() {
+            let data_dir = path.join(data_type);
+            let data_file = data_dir.join("data.jsonl");
+            
+            if data_file.exists() {
+                count += 1;
+            }
+        }
+    }
+    
+    Ok(count)
+}
--- a/src/corporate/collect_exchanges.rs
+++ b/src/corporate/collect_exchanges.rs
@@ -0,0 +1,720 @@
+// src/corporate/collect_exchanges.rs
+use crate::util::directories::DataPaths;
+use crate::util::integrity::{DataStage, StateEntry, StateManager, file_reference};
+use crate::util::logger;
+use crate::corporate::types::*;
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use tokio::fs;
+use tokio::io::AsyncWriteExt;
+
+/// Exchange information collected from company data
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ExchangeInfo {
+    #[serde(rename = "exchangeName")]
+    pub exchange_name: String,
+    pub currency: String,
+    #[serde(rename = "currencySymbol")]
+    pub currency_symbol: String,
+    #[serde(rename = "exchangeDataDelayedBy")]
+    pub exchange_data_delayed_by: i64,
+    #[serde(rename = "totalMarketCap")]
+    pub total_market_cap: u64,
+    #[serde(rename = "totalMarketCapUSD")]
+    pub total_market_cap_usd: f64,  // NEW: Market cap converted to USD
+    pub companies: Vec<String>,
+}
+
+/// Extract exchange data from company core data
+#[derive(Debug, Deserialize)]
+struct CompanyCoreData {
+    modules: Option<CoreModules>,
+}
+
+#[derive(Debug, Deserialize)]
+struct CoreModules {
+    price: Option<PriceModule>,
+}
+
+#[derive(Debug, Deserialize)]
+struct PriceModule {
+    #[serde(rename = "exchangeName")]
+    exchange_name: Option<String>,
+    currency: Option<String>,
+    #[serde(rename = "currencySymbol")]
+    currency_symbol: Option<String>,
+    exchange: Option<String>,
+    #[serde(rename = "exchangeDataDelayedBy")]
+    exchange_data_delayed_by: Option<i64>,
+    #[serde(rename = "marketCap")]
+    market_cap: Option<MarketCapData>,
+}
+
+#[derive(Debug, Deserialize)]
+struct MarketCapData {
+    raw: Option<u64>,
+}
+
+/// Normalize currency code and get conversion factor
+/// Handles special cases like GBp (pence) and ZAc (cents)
+fn normalize_currency(currency: &str) -> (&str, f64) {
+    match currency {
+        "GBp" => ("GBP", 100.0),  // British Pence -> Pounds (divide by 100)
+        "ZAc" => ("ZAR", 100.0),  // South African Cents -> Rand (divide by 100)
+        _ => (currency, 1.0),     // No conversion needed
+    }
+}
+
+/// FX rate cache for currency conversion
+struct FxRateCache {
+    rates: HashMap<String, f64>,
+}
+
+impl FxRateCache {
+    /// Create new FX rate cache by loading all currency charts
+    async fn new(paths: &DataPaths) -> anyhow::Result<Self> {
+        let mut rates = HashMap::new();
+        
+        // USD to USD is always 1.0
+        rates.insert("USD".to_string(), 1.0);
+        
+        let currency_dir = paths.data_dir().join("economic").join("currency");
+        
+        if !currency_dir.exists() {
+            logger::log_warn("  FX rates directory not found - will use default rates").await;
+            return Ok(Self { rates });
+        }
+        
+        let mut entries = fs::read_dir(&currency_dir).await?;
+        let mut loaded_count = 0;
+        
+        while let Some(entry) = entries.next_entry().await? {
+            let path = entry.path();
+            if !path.is_dir() {
+                continue;
+            }
+            
+            let currency_code = match path.file_name().and_then(|n| n.to_str()) {
+                Some(code) => code.to_string(),
+                None => continue,
+            };
+            
+            let chart_path = path.join("chart").join("data.jsonl");
+            
+            if !chart_path.exists() {
+                continue;
+            }
+            
+            // Load chart and get latest rate
+            match load_latest_fx_rate(&chart_path).await {
+                Ok(rate) => {
+                    rates.insert(currency_code.clone(), rate);
+                    loaded_count += 1;
+                }
+                Err(e) => {
+                    logger::log_warn(&format!(
+                        "  Failed to load FX rate for {}: {}",
+                        currency_code, e
+                    )).await;
+                }
+            }
+        }
+        
+        logger::log_info(&format!("  ✓ Loaded {} FX rates", loaded_count)).await;
+        
+        Ok(Self { rates })
+    }
+    
+    /// Convert amount from given currency to USD
+    fn to_usd(&self, amount: u64, currency: &str) -> f64 {
+        // Normalize currency and get conversion factor
+        // e.g., GBp -> (GBP, 100.0), ZAc -> (ZAR, 100.0)
+        let (normalized_currency, factor) = normalize_currency(currency);
+        
+        // First convert to base currency unit (e.g., pence to pounds)
+        let amount_in_base = amount as f64 / factor;
+        
+        if normalized_currency == "USD" {
+            return amount_in_base;
+        }
+        
+        // Get rate (USD per currency unit)
+        // For USD/EUR = 0.92, this means 1 USD = 0.92 EUR
+        // To convert EUR to USD: EUR_amount / 0.92
+        match self.rates.get(normalized_currency) {
+            Some(&rate) if rate > 0.0 => {
+                amount_in_base / rate
+            }
+            _ => {
+                // Fallback: use approximate rates for common currencies
+                let fallback_rate = get_fallback_rate(normalized_currency);
+                amount_in_base / fallback_rate
+            }
+        }
+    }
+    
+    /// Get rate for a currency (USD per unit)
+    fn get_rate(&self, currency: &str) -> Option<f64> {
+        let (normalized_currency, _) = normalize_currency(currency);
+        self.rates.get(normalized_currency).copied()
+    }
+}
+
+/// Load latest FX rate from chart data
+async fn load_latest_fx_rate(chart_path: &std::path::Path) -> anyhow::Result<f64> {
+    let content = fs::read_to_string(chart_path).await?;
+    
+    for line in content.lines() {
+        if line.trim().is_empty() {
+            continue;
+        }
+        
+        let chart: ChartData = serde_json::from_str(line)?;
+        
+        if chart.quotes.is_empty() {
+            return Err(anyhow::anyhow!("No quotes in chart data"));
+        }
+        
+        // Get most recent quote with a close price
+        let latest_rate = chart.quotes
+            .iter()
+            .rev()
+            .find_map(|q| q.close)
+            .ok_or_else(|| anyhow::anyhow!("No valid close prices"))?;
+        
+        return Ok(latest_rate);
+    }
+    
+    Err(anyhow::anyhow!("No data in chart file"))
+}
+
+/// Fallback rates for common currencies (approximate, as of 2024)
+/// These are USD per currency unit (same format as our FX data)
+fn get_fallback_rate(currency: &str) -> f64 {
+    match currency {
+        "USD" => 1.0,
+        "EUR" => 0.92,      // 1 USD = 0.92 EUR
+        "GBP" => 0.79,      // 1 USD = 0.79 GBP
+        "JPY" => 150.0,     // 1 USD = 150 JPY
+        "CNY" | "RMB" => 7.2,
+        "CHF" => 0.88,
+        "AUD" => 1.52,
+        "CAD" => 1.36,
+        "HKD" => 7.8,
+        "SGD" => 1.34,
+        "SEK" => 10.5,
+        "NOK" => 10.8,
+        "DKK" => 6.9,
+        "PLN" => 4.0,
+        "CZK" => 23.0,
+        "TRY" => 32.0,
+        "ZAR" => 18.5,
+        "ILS" => 3.7,
+        "RON" => 4.6,
+        "KWD" => 0.31,
+        "TWD" => 31.5,
+        "ISK" => 138.0,
+        "NZD" => 1.65,
+        "MXN" => 17.0,
+        "BRL" => 5.0,
+        "INR" => 83.0,
+        "KRW" => 1320.0,
+        "THB" => 35.0,
+        "MYR" => 4.6,
+        "IDR" => 15700.0,
+        "PHP" => 56.0,
+        "VND" => 24500.0,
+        _ => {
+            // Default: assume similar to USD
+            1.0
+        }
+    }
+}
+
+/// Collect all exchanges from company directories and create yahoo_exchanges.json
+/// 
+/// # Features
+/// - Iterates through all company directories
+/// - Extracts exchange data from core/data.jsonl
+/// - Groups companies by exchange
+/// - Sums up market caps for each exchange
+/// - Converts all market caps to USD using FX rates
+/// - Saves consolidated mapping to data/yahoo_exchanges.json
+/// - Handles missing or invalid data gracefully
+/// - Integrity tracking with content hash validation
+pub async fn collect_and_save_exchanges(paths: &DataPaths) -> anyhow::Result<usize> {
+    let output_path = paths.data_dir().join("yahoo_exchanges.json");
+        
+    let manager = StateManager::new(paths.integrity_dir()).await?;
+    let step_name = "exchange_collection_complete";
+    
+    if manager.is_step_valid(step_name).await? {
+        logger::log_info("  Exchange collection already completed and valid").await;
+        
+        // Load and count exchanges
+        if output_path.exists() {
+            let content = fs::read_to_string(&output_path).await?;
+            let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
+            logger::log_info(&format!("  ✓ Found {} valid exchanges", exchanges.len())).await;
+            return Ok(exchanges.len());
+        }
+    }
+    let entry = create_exchange_collection_state_entry(&manager, &output_path, step_name).await?;
+    logger::log_info("Collecting exchange information from company directories...").await;
+    
+    let corporate_dir = paths.corporate_dir();
+    
+    if !corporate_dir.exists() {
+        logger::log_warn("  Corporate directory does not exist").await;
+        return Ok(0);
+    }
+    
+    // Load FX rates for currency conversion
+    logger::log_info("Loading FX rates for currency conversion...").await;
+    let fx_cache = FxRateCache::new(paths).await?;
+    
+    // Map of exchange code -> ExchangeInfo
+    let mut exchanges: HashMap<String, ExchangeInfo> = HashMap::new();
+    
+    let mut entries = fs::read_dir(&corporate_dir).await?;
+    let mut processed_count = 0;
+    let mut skipped_count = 0;
+    
+    while let Some(entry) = entries.next_entry().await? {
+        let company_path = entry.path();
+        
+        if !company_path.is_dir() {
+            continue;
+        }
+        
+        let company_name = match company_path.file_name().and_then(|n| n.to_str()) {
+            Some(name) => name.to_string(),
+            None => {
+                skipped_count += 1;
+                continue;
+            }
+        };
+        
+        // Read core/data.jsonl
+        let core_data_path = company_path.join("core").join("data.jsonl");
+        
+        if !core_data_path.exists() {
+            skipped_count += 1;
+            continue;
+        }
+        
+        // Parse core data
+        match extract_exchange_info(&core_data_path, &company_name).await {
+            Ok(Some((exchange_code, exchange_name, currency, currency_symbol, delay, market_cap))) => {
+                // Convert market cap to USD
+                let market_cap_usd = fx_cache.to_usd(market_cap, &currency);
+                
+                // Add or update exchange entry
+                exchanges
+                    .entry(exchange_code.clone())
+                    .and_modify(|info| {
+                        // Add company to existing exchange and sum market caps
+                        info.companies.push(company_name.clone());
+                        info.total_market_cap = info.total_market_cap.saturating_add(market_cap);
+                        info.total_market_cap_usd += market_cap_usd;
+                    })
+                    .or_insert_with(|| {
+                        // Create new exchange entry
+                        ExchangeInfo {
+                            exchange_name,
+                            currency,
+                            currency_symbol,
+                            exchange_data_delayed_by: delay,
+                            total_market_cap: market_cap,
+                            total_market_cap_usd: market_cap_usd,
+                            companies: vec![company_name.clone()],
+                        }
+                    });
+                
+                processed_count += 1;
+            }
+            Ok(None) => {
+                // No exchange data found
+                skipped_count += 1;
+            }
+            Err(e) => {
+                logger::log_warn(&format!(
+                    "  Failed to parse exchange data for {}: {}",
+                    company_name, e
+                )).await;
+                skipped_count += 1;
+            }
+        }
+        
+        // Progress logging every 100 companies
+        if (processed_count + skipped_count) % 100 == 0 {
+            logger::log_info(&format!(
+                "  Progress: {} companies processed, {} skipped",
+                processed_count, skipped_count
+            )).await;
+        }
+    }
+    
+    logger::log_info(&format!(
+        "  ✓ Collected data from {} companies ({} skipped)",
+        processed_count, skipped_count
+    )).await;
+    
+    logger::log_info(&format!(
+        "  ✓ Found {} unique exchanges",
+        exchanges.len()
+    )).await;
+    
+    // Sort companies within each exchange for consistency
+    for exchange_info in exchanges.values_mut() {
+        exchange_info.companies.sort();
+    }
+    
+    // Save to yahoo_exchanges.json
+    save_exchanges_json(&output_path, &exchanges).await?;
+    
+    logger::log_info(&format!(
+        "  ✓ Saved exchange mapping to {}",
+        output_path.display()
+    )).await;
+
+    manager.mark_valid(entry).await?;
+    logger::log_info("  ✓ Exchange collection marked as complete with integrity tracking").await;
+    
+    // Print summary statistics
+    print_exchange_statistics(&exchanges, &fx_cache).await;
+    
+    Ok(exchanges.len())
+}
+
+/// Track exchange collection completion with content hash verification
+async fn create_exchange_collection_state_entry(
+    manager: &StateManager,
+    output_path: &std::path::Path,
+    step_name: &str,
+) -> anyhow::Result<StateEntry> {
+    // Create content reference for the output file
+    let content_reference = file_reference(output_path);
+    
+    // Track completion with:
+    // - Content reference: The yahoo_exchanges.json file
+    // - Data stage: Data (7-day TTL by default)
+    // - Dependencies: None (this is a collection step, not dependent on other tracked steps)
+    //   Note: In practice, it depends on core data, but we track the output file
+    //   which will change if core data changes, so explicit dependency not needed
+    Ok(manager.create_entry(
+        step_name.to_string(),
+        content_reference,
+        DataStage::Data,
+    ).await?)
+}
+
+/// Extract exchange information from a company's core data file
+async fn extract_exchange_info(
+    core_data_path: &std::path::Path,
+    company_name: &str,
+) -> anyhow::Result<Option<(String, String, String, String, i64, u64)>> {
+    let content = fs::read_to_string(core_data_path).await?;
+    
+    // Parse JSONL - should be single line
+    for line in content.lines() {
+        if line.trim().is_empty() {
+            continue;
+        }
+        
+        match serde_json::from_str::<CompanyCoreData>(line) {
+            Ok(data) => {
+                // Extract from modules.price
+                let price_module = match data.modules.and_then(|m| m.price) {
+                    Some(p) => p,
+                    None => return Ok(None),
+                };
+                
+                // Extract required fields
+                let exchange = match price_module.exchange {
+                    Some(e) if !e.is_empty() => e,
+                    _ => return Ok(None),
+                };
+                
+                // Filter out invalid placeholder exchange codes
+                if exchange == "CCC" {
+                    return Ok(None);
+                }
+                
+                let exchange_name = price_module.exchange_name.unwrap_or_else(|| exchange.clone());
+                let currency = price_module.currency.unwrap_or_else(|| "USD".to_string());
+                let currency_symbol = price_module.currency_symbol.unwrap_or_else(|| "$".to_string());
+                let delay = price_module.exchange_data_delayed_by.unwrap_or(0);
+                let market_cap = price_module
+                    .market_cap
+                    .and_then(|mc| mc.raw)
+                    .unwrap_or(0);
+                
+                return Ok(Some((
+                    exchange,
+                    exchange_name,
+                    currency,
+                    currency_symbol,
+                    delay,
+                    market_cap,
+                )));
+            }
+            Err(e) => {
+                // Try to parse as generic JSON to check if exchange field exists in modules.price
+                if let Ok(json) = serde_json::from_str::<serde_json::Value>(line) {
+                    // Try to access modules.price.exchange
+                    if let Some(price) = json.get("modules").and_then(|m| m.get("price")) {
+                        if let Some(exchange) = price.get("exchange").and_then(|v| v.as_str()) {
+                            if !exchange.is_empty() && exchange != "CCC" {
+                                let exchange_name = price
+                                    .get("exchangeName")
+                                    .and_then(|v| v.as_str())
+                                    .unwrap_or(exchange)
+                                    .to_string();
+                                
+                                let currency = price
+                                    .get("currency")
+                                    .and_then(|v| v.as_str())
+                                    .unwrap_or("USD")
+                                    .to_string();
+                                
+                                let currency_symbol = price
+                                    .get("currencySymbol")
+                                    .and_then(|v| v.as_str())
+                                    .unwrap_or("$")
+                                    .to_string();
+                                
+                                let delay = price
+                                    .get("exchangeDataDelayedBy")
+                                    .and_then(|v| v.as_i64())
+                                    .unwrap_or(0);
+                                
+                                let market_cap = price
+                                    .get("marketCap")
+                                    .and_then(|mc| mc.get("raw"))
+                                    .and_then(|v| v.as_u64())
+                                    .unwrap_or(0);
+                                
+                                return Ok(Some((
+                                    exchange.to_string(),
+                                    exchange_name,
+                                    currency,
+                                    currency_symbol,
+                                    delay,
+                                    market_cap,
+                                )));
+                            }
+                        }
+                    }
+                }
+                
+                return Err(anyhow::anyhow!(
+                    "Failed to parse core data for {}: {}",
+                    company_name,
+                    e
+                ));
+            }
+        }
+    }
+    
+    Ok(None)
+}
+
+/// Save exchanges map to JSON file with fsync
+async fn save_exchanges_json(
+    path: &std::path::Path,
+    exchanges: &HashMap<String, ExchangeInfo>,
+) -> anyhow::Result<()> {
+    // Create sorted output for consistency
+    let mut sorted_exchanges: Vec<_> = exchanges.iter().collect();
+    sorted_exchanges.sort_by_key(|(code, _)| code.as_str());
+    
+    let exchanges_map: HashMap<String, ExchangeInfo> = sorted_exchanges
+        .into_iter()
+        .map(|(k, v)| (k.clone(), v.clone()))
+        .collect();
+    
+    // Serialize with pretty printing
+    let json_content = serde_json::to_string_pretty(&exchanges_map)?;
+    
+    // Write to temporary file first (atomic write pattern)
+    let tmp_path = path.with_extension("json.tmp");
+    let mut file = fs::File::create(&tmp_path).await?;
+    file.write_all(json_content.as_bytes()).await?;
+    file.write_all(b"\n").await?;
+    file.flush().await?;
+    file.sync_all().await?;
+    
+    // Atomic rename
+    fs::rename(&tmp_path, path).await?;
+    
+    Ok(())
+}
+
+/// Format market cap as a human-readable string
+fn format_market_cap(market_cap: f64) -> String {
+    if market_cap >= 1_000_000_000_000.0 {
+        format!("{:.2}T", market_cap / 1_000_000_000_000.0)
+    } else if market_cap >= 1_000_000_000.0 {
+        format!("{:.2}B", market_cap / 1_000_000_000.0)
+    } else if market_cap >= 1_000_000.0 {
+        format!("{:.2}M", market_cap / 1_000_000.0)
+    } else if market_cap >= 1_000.0 {
+        format!("{:.2}K", market_cap / 1_000.0)
+    } else {
+        format!("{:.2}", market_cap)
+    }
+}
+
+/// Print statistics about collected exchanges
+async fn print_exchange_statistics(exchanges: &HashMap<String, ExchangeInfo>, fx_cache: &FxRateCache) {
+    logger::log_info("Exchange Statistics (sorted by USD market cap):").await;
+    
+    // Sort by total market cap in USD (descending)
+    let mut exchange_list: Vec<_> = exchanges.iter().collect();
+    exchange_list.sort_by(|a, b| {
+        b.1.total_market_cap_usd
+            .partial_cmp(&a.1.total_market_cap_usd)
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+    
+    // Print top 20 exchanges by total market cap (USD)
+    logger::log_info("  Top 20 exchanges by total market cap (USD):").await;
+    for (i, (code, info)) in exchange_list.iter().take(20).enumerate() {
+        let (normalized_currency, factor) = normalize_currency(&info.currency);
+        let fx_rate = fx_cache.get_rate(&info.currency);
+        
+        let fx_info = match fx_rate {
+            Some(rate) => {
+                if factor > 1.0 {
+                    // Show conversion for pence/cents
+                    format!(" (1 {} = {} {}, {} {} = 1 {})", 
+                        normalized_currency, 
+                        format!("{:.4}", rate),
+                        "USD",
+                        factor as i32,
+                        info.currency,
+                        normalized_currency)
+                } else {
+                    format!(" (1 USD = {:.4} {})", rate, info.currency)
+                }
+            }
+            None => format!(" (using fallback rate for {})", info.currency),
+        };
+        
+        logger::log_info(&format!(
+            "    {}. {} ({}) - ${} USD ({}{} {}) - {} companies{}",
+            i + 1,
+            info.exchange_name,
+            code,
+            format_market_cap(info.total_market_cap_usd),
+            info.currency_symbol,
+            format_market_cap(info.total_market_cap as f64),
+            info.currency,
+            info.companies.len(),
+            if info.currency != "USD" { &fx_info } else { "" }
+        )).await;
+    }
+    
+    // Count by currency
+    let mut currency_counts: HashMap<String, usize> = HashMap::new();
+    let mut currency_market_caps: HashMap<String, f64> = HashMap::new();
+    for info in exchanges.values() {
+        *currency_counts.entry(info.currency.clone()).or_insert(0) += info.companies.len();
+        *currency_market_caps.entry(info.currency.clone()).or_insert(0.0) += info.total_market_cap_usd;
+    }
+    
+    let mut currencies: Vec<_> = currency_counts.iter().collect();
+    currencies.sort_by(|a, b| {
+        currency_market_caps.get(b.0)
+            .unwrap_or(&0.0)
+            .partial_cmp(currency_market_caps.get(a.0).unwrap_or(&0.0))
+            .unwrap_or(std::cmp::Ordering::Equal)
+    });
+    
+    logger::log_info("  Market cap by currency (USD equivalent):").await;
+    for (currency, count) in currencies.iter().take(10) {
+        let market_cap_usd = currency_market_caps.get(*currency).unwrap_or(&0.0);
+        let (normalized_currency, factor) = normalize_currency(currency);
+        let fx_rate = fx_cache.get_rate(currency);
+        
+        let fx_info = match fx_rate {
+            Some(rate) => {
+                if factor > 1.0 {
+                    format!(" (1 {} = {:.4} USD, {} {} = 1 {})", 
+                        normalized_currency, rate, factor as i32, currency, normalized_currency)
+                } else {
+                    format!(" (1 USD = {:.4} {})", rate, currency)
+                }
+            }
+            None => format!(" (fallback)"),
+        };
+        
+        logger::log_info(&format!(
+            "    {}: {} companies, ${} USD{}",
+            currency,
+            count,
+            format_market_cap(*market_cap_usd),
+            if *currency != "USD" { &fx_info } else { "" }
+        )).await;
+    }
+    
+    // Delay statistics
+    let delayed_exchanges: Vec<_> = exchanges
+        .iter()
+        .filter(|(_, info)| info.exchange_data_delayed_by > 0)
+        .collect();
+    
+    if !delayed_exchanges.is_empty() {
+        logger::log_info(&format!(
+            "  Exchanges with data delay: {} (out of {})",
+            delayed_exchanges.len(),
+            exchanges.len()
+        )).await;
+    }
+    
+    // Total market cap across all exchanges (in USD)
+    let total_market_cap_usd: f64 = exchanges.values()
+        .map(|info| info.total_market_cap_usd)
+        .sum();
+    
+    logger::log_info(&format!(
+        "  Total market cap across all exchanges: ${} USD",
+        format_market_cap(total_market_cap_usd)
+    )).await;
+}
+
+/// Get exchange information for a specific exchange code
+pub async fn get_exchange_info(
+    paths: &DataPaths,
+    exchange_code: &str,
+) -> anyhow::Result<Option<ExchangeInfo>> {
+    let exchanges_path = paths.data_dir().join("yahoo_exchanges.json");
+    
+    if !exchanges_path.exists() {
+        return Ok(None);
+    }
+    
+    let content = fs::read_to_string(&exchanges_path).await?;
+    let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
+    
+    Ok(exchanges.get(exchange_code).cloned())
+}
+
+/// List all available exchanges
+pub async fn list_all_exchanges(paths: &DataPaths) -> anyhow::Result<Vec<(String, ExchangeInfo)>> {
+    let exchanges_path = paths.data_dir().join("yahoo_exchanges.json");
+    
+    if !exchanges_path.exists() {
+        return Ok(Vec::new());
+    }
+    
+    let content = fs::read_to_string(&exchanges_path).await?;
+    let exchanges: HashMap<String, ExchangeInfo> = serde_json::from_str(&content)?;
+    
+    let mut exchange_list: Vec<_> = exchanges.into_iter().collect();
+    exchange_list.sort_by(|a, b| a.0.cmp(&b.0));
+    
+    Ok(exchange_list)
+}
--- a/src/corporate/fx.rs
+++ b/src/corporate/fx.rs
@@ -1,51 +0,0 @@
-// src/corporate/fx.rs
-use std::collections::HashMap;
-use reqwest;
-use serde_json::Value;
-use tokio::fs;
-use std::path::Path;
-
-static FX_CACHE_PATH: &str = "fx_rates.json";
-
-pub async fn get_usd_rate(currency: &str) -> anyhow::Result<f64> {
-    if currency == "USD" {
-        return Ok(1.0);
-    }
-
-    let mut cache: HashMap<String, (f64, String)> = if Path::new(FX_CACHE_PATH).exists() {
-        let content = fs::read_to_string(FX_CACHE_PATH).await?;
-        serde_json::from_str(&content).unwrap_or_default()
-    } else {
-        HashMap::new()
-    };
-
-    let today = chrono::Local::now().format("%Y-%m-%d").to_string();
-    if let Some((rate, date)) = cache.get(currency) {
-        if date == &today {
-            return Ok(*rate);
-        }
-    }
-
-    let symbol = format!("{}USD=X", currency);
-    let url = format!("https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d", symbol);
-
-    let json: Value = reqwest::Client::new()
-        .get(&url)
-        .header("User-Agent", "Mozilla/5.0")
-        .send()
-        .await?
-        .json()
-        .await?;
-
-    let close = json["chart"]["result"][0]["meta"]["regularMarketPrice"]
-        .as_f64()
-        .or_else(|| json["chart"]["result"][0]["indicators"]["quote"][0]["close"][0].as_f64())
-        .unwrap_or(1.0);
-
-    let rate = if currency == "JPY" || currency == "KRW" { close } else { 1.0 / close }; // inverse pairs
-
-    cache.insert(currency.to_string(), (rate, today.clone()));
-    let _ = fs::write(FX_CACHE_PATH, serde_json::to_string_pretty(&cache)?).await;
-
-    Ok(rate)
-}
--- a/src/corporate/helpers.rs
+++ b/src/corporate/helpers.rs
@@ -1,20 +1,25 @@
 // src/corporate/helpers.rs
 use super::types::*;
-use chrono::{Local, NaiveDate};
-use std::collections::{HashMap, HashSet};
+use crate::util::directories::DataPaths;

-pub fn event_key(e: &CompanyEvent) -> String {
+use chrono::{Local, NaiveDate};
+use rand::rngs::StdRng;
+use rand::prelude::{Rng, SeedableRng, IndexedRandom};
+use tokio::fs;
+use anyhow::{anyhow};
+
+pub fn event_key(e: &CompanyEventData) -> String {
    format!("{}|{}|{}", e.ticker, e.date, e.time)
 }

-pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Vec<CompanyEventChange> {
+pub fn detect_changes(old: &CompanyEventData, new: &CompanyEventData, today: &str) -> Vec<CompanyEventChangeData> {
    let mut changes = Vec::new();
    let ts = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();

    if new.date.as_str() <= today { return changes; }

    if old.time != new.time {
-        changes.push(CompanyEventChange {
+        changes.push(CompanyEventChangeData {
            ticker: new.ticker.clone(),
            date: new.date.clone(),
            field_changed: "time".to_string(),
@@ -25,7 +30,7 @@ pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Ve
    }

    if old.eps_forecast != new.eps_forecast {
-        changes.push(CompanyEventChange {
+        changes.push(CompanyEventChangeData {
            ticker: new.ticker.clone(),
            date: new.date.clone(),
            field_changed: "eps_forecast".to_string(),
@@ -36,7 +41,7 @@ pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Ve
    }

    if old.eps_actual != new.eps_actual {
-        changes.push(CompanyEventChange {
+        changes.push(CompanyEventChangeData {
            ticker: new.ticker.clone(),
            date: new.date.clone(),
            field_changed: "eps_actual".to_string(),
@@ -51,14 +56,6 @@ pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Ve
    changes
 }

-pub fn price_key(p: &CompanyPrice) -> String {
-    if p.time.is_empty() {
-        format!("{}|{}", p.ticker, p.date)
-    } else {
-        format!("{}|{}|{}", p.ticker, p.date, p.time)
-    }
-}
-
 pub fn parse_float(s: &str) -> Option<f64> {
    s.replace("--", "").replace(",", "").parse::<f64>().ok()
 }
@@ -67,4 +64,121 @@ pub fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
    NaiveDate::parse_from_str(s, "%B %d, %Y")
        .or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
        .map_err(|_| anyhow::anyhow!("Bad date: {s}"))
+}
+
+/// Send-safe random range
+pub fn random_range(min: u64, max: u64) -> u64 {
+    let mut rng = StdRng::from_rng(&mut rand::rng());
+    rng.random_range(min..max)
+}
+
+/// Send-safe random choice
+pub fn choose_random<T: Clone>(items: &[T]) -> T {
+    let mut rng = StdRng::from_rng(&mut rand::rng());
+    items.choose(&mut rng).unwrap().clone()
+}
+
+/// Extract first valid Yahoo ticker from company
+pub fn extract_first_yahoo_ticker(company: &CompanyData) -> Option<String> {
+    if let Some(isin_tickers_map) = &company.isin_tickers_map {
+        for tickers in isin_tickers_map.values() {
+            for ticker in tickers {
+                if ticker.starts_with("YAHOO:") 
+                    && ticker != "YAHOO:NO_RESULTS" 
+                    && ticker != "YAHOO:ERROR" 
+                {
+                    return Some(ticker.trim_start_matches("YAHOO:").to_string());
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Sanitize company name for file system use
+pub fn sanitize_company_name(name: &str) -> String {
+    name.replace("/", "_")
+        .replace("\\", "_")
+        .replace(":", "_")
+        .replace("*", "_")
+        .replace("?", "_")
+        .replace("\"", "_")
+        .replace("<", "_")
+        .replace(">", "_")
+        .replace("|", "_")
+}
+
+/// Load companies from JSONL file
+pub async fn load_companies_from_jsonl(
+    path: &std::path::Path
+) -> anyhow::Result<Vec<CompanyData>> {
+    let content = tokio::fs::read_to_string(path).await?;
+    let mut companies = Vec::new();
+    
+    for line in content.lines() {
+        if line.trim().is_empty() {
+            continue;
+        }
+        if let Ok(company) = serde_json::from_str::<CompanyData>(line) {
+            companies.push(company);
+        }
+    }
+    
+    Ok(companies)
+}
+
+pub async fn find_most_recent_figi_date_dir(paths: &DataPaths) -> anyhow::Result<Option<std::path::PathBuf>> {
+    let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
+    
+    if !map_cache_dir.exists() {
+        return Ok(None);
+    }
+    
+    let mut entries = tokio::fs::read_dir(&map_cache_dir).await?;
+    let mut dates = Vec::new();
+    
+    while let Some(entry) = entries.next_entry().await? {
+        let path = entry.path();
+        if path.is_dir() {
+            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
+                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
+                    dates.push((name.to_string(), path));
+                }
+            }
+        }
+    }
+    
+    if dates.is_empty() {
+        return Ok(None);
+    }
+    
+    dates.sort_by(|a, b| b.0.cmp(&a.0));
+    Ok(Some(dates[0].1.clone()))
+}
+
+pub async fn determine_gleif_date(
+    gleif_date: Option<&str>,
+    paths: &DataPaths,
+) -> anyhow::Result<String> {
+    if let Some(d) = gleif_date {
+        return Ok(d.to_string());
+    }
+    
+    let gleif_dir = paths.cache_gleif_dir();
+    let mut entries = fs::read_dir(gleif_dir).await?;
+    let mut dates = Vec::new();
+    
+    while let Some(entry) = entries.next_entry().await? {
+        let path = entry.path();
+        if path.is_dir() {
+            if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
+                if name.len() == 8 && name.chars().all(|c| c.is_numeric()) {
+                    dates.push(name.to_string());
+                }
+            }
+        }
+    }
+    
+    dates.sort();
+    dates.last().cloned().ok_or_else(|| anyhow!("No GLEIF date found"))
 }
--- a/src/corporate/mod.rs
+++ b/src/corporate/mod.rs
@@ -2,10 +2,20 @@
 pub mod types;
 pub mod scraper;
 pub mod storage;
-pub mod update;
 pub mod helpers;
-pub mod aggregation;
-pub mod fx;
-pub mod openfigi;
+pub mod update_openfigi;
+pub mod yahoo_company_extraction;
+pub mod page_validation;
+pub mod checkpoint_helpers;
+
+// Corporate update modules
+pub mod update;
+pub mod update_companies;
+pub mod update_companies_cleanse;
+pub mod update_companies_enrich;
+
+pub mod collect_exchanges;
+pub mod bond_processing;
+pub mod option_processing;

 pub use update::run_full_update;
--- a/src/corporate/openfigi.rs
+++ b/src/corporate/openfigi.rs
--- a/src/corporate/option_processing.rs
+++ b/src/corporate/option_processing.rs
@@ -0,0 +1,54 @@
+/// Parse strike price from option ticker (e.g., "AAPL 150 CALL" -> 150.0)
+pub fn parse_strike_from_ticker(ticker: &str) -> Option<f64> {
+    let parts: Vec<&str> = ticker.split_whitespace().collect();
+    for (i, part) in parts.iter().enumerate() {
+        if let Ok(strike) = part.parse::<f64>() {
+            // Check if next word is CALL/PUT to confirm this is strike
+            if i + 1 < parts.len() && (parts[i + 1].to_uppercase() == "CALL" || parts[i + 1].to_uppercase() == "PUT") {
+                return Some(strike);
+            }
+        }
+    }
+    None
+}
+
+/// Parse expiration date from option ticker (e.g., "AAPL 150 CALL 01/17/25" -> timestamp)
+pub fn parse_expiration_from_ticker(ticker: &str) -> Option<i64> {
+    let parts: Vec<&str> = ticker.split_whitespace().collect();
+    for part in parts {
+        // Look for date pattern MM/DD/YY
+        if part.contains('/') && part.len() >= 8 {
+            if let Ok(date) = chrono::NaiveDate::parse_from_str(part, "%m/%d/%y") {
+                return Some(date.and_hms_opt(16, 0, 0)?.and_utc().timestamp());
+            }
+        }
+    }
+    None
+}
+
+/// Parse option name to extract underlying company, issuer, and option type
+/// 
+/// Examples:
+/// - "December 25 Calls on ALPHA GA" -> ("ALPHA GA", None, "call")
+/// - "January 26 Puts on TESLA INC" -> ("TESLA INC", None, "put")
+pub fn parse_option_name(name: &str) -> (String, Option<String>, String) {
+    let name_upper = name.to_uppercase();
+    
+    // Detect option type
+    let option_type = if name_upper.contains("CALL") {
+        "call".to_string()
+    } else if name_upper.contains("PUT") {
+        "put".to_string()
+    } else {
+        "unknown".to_string()
+    };
+    
+    // Try to extract underlying after "on"
+    if let Some(pos) = name_upper.find(" ON ") {
+        let underlying = name[pos + 4..].trim().to_string();
+        return (underlying, None, option_type);
+    }
+    
+    // Fallback: return entire name
+    (name.to_string(), None, option_type)
+}
--- a/src/corporate/page_validation.rs
+++ b/src/corporate/page_validation.rs
@@ -0,0 +1,180 @@
+// src/corporate/page_validation.rs
+//
+// Utilities to ensure page state is correct before extraction
+
+use anyhow::{anyhow, Result};
+use fantoccini::Client;
+use tokio::time::{sleep, Duration};
+
+/// Validates that the browser navigated to the expected URL
+/// 
+/// This prevents extracting data from a stale page when navigation fails silently
+pub async fn verify_navigation(
+    client: &Client,
+    expected_url_fragment: &str,
+    max_attempts: u32,
+) -> Result<()> {
+    for attempt in 1..=max_attempts {
+        let current_url = client.current_url().await?;
+        let current = current_url.as_str();
+        
+        if current.contains(expected_url_fragment) {
+            crate::util::logger::log_info(&format!(
+                "✓ Navigation verified: {} (attempt {})",
+                current, attempt
+            )).await;
+            return Ok(());
+        }
+        
+        if attempt < max_attempts {
+            crate::util::logger::log_warn(&format!(
+                "Navigation mismatch (attempt {}): expected '{}', got '{}'. Retrying...",
+                attempt, expected_url_fragment, current
+            )).await;
+            sleep(Duration::from_millis(500)).await;
+        }
+    }
+    
+    let current_url = client.current_url().await?;
+    Err(anyhow!(
+        "Navigation verification failed: expected URL containing '{}', but got '{}'",
+        expected_url_fragment,
+        current_url.as_str()
+    ))
+}
+
+/// Clears browser state by navigating to a blank page
+/// 
+/// Use this when a navigation fails or times out to ensure clean slate
+pub async fn clear_browser_state(client: &Client) -> Result<()> {
+    crate::util::logger::log_info("Clearing browser state with about:blank").await;
+    
+    // Navigate to blank page to clear any stale content
+    client.goto("about:blank").await?;
+    
+    // Brief wait to ensure page clears
+    sleep(Duration::from_millis(200)).await;
+    
+    Ok(())
+}
+
+/// Validates that expected content exists on the page before extraction
+/// 
+/// This adds an extra safety check that the page actually loaded
+pub async fn verify_page_content(
+    client: &Client,
+    content_checks: Vec<ContentCheck>,
+) -> Result<()> {
+    for check in content_checks {
+        match check {
+            ContentCheck::ElementExists(selector) => {
+                let exists: bool = client
+                    .execute(
+                        &format!(
+                            "return !!document.querySelector('{}');",
+                            selector.replace("'", "\\'")
+                        ),
+                        vec![],
+                    )
+                    .await?
+                    .as_bool()
+                    .unwrap_or(false);
+                
+                if !exists {
+                    return Err(anyhow!(
+                        "Expected element '{}' not found on page",
+                        selector
+                    ));
+                }
+            }
+            ContentCheck::TextContains(text) => {
+                let page_text: String = client
+                    .execute("return document.body.innerText;", vec![])
+                    .await?
+                    .as_str()
+                    .unwrap_or("")
+                    .to_string();
+                
+                if !page_text.contains(&text) {
+                    return Err(anyhow!(
+                        "Expected text '{}' not found on page",
+                        text
+                    ));
+                }
+            }
+        }
+    }
+    
+    Ok(())
+}
+
+#[derive(Debug, Clone)]
+pub enum ContentCheck {
+    /// Verify that a CSS selector exists
+    ElementExists(String),
+    /// Verify that page body contains text
+    TextContains(String),
+}
+
+/// Safe navigation wrapper that validates and clears state on failure
+pub async fn navigate_with_validation(
+    client: &Client,
+    url: &str,
+    expected_url_fragment: &str,
+    timeout_secs: u64,
+) -> Result<()> {
+    use tokio::time::timeout;
+    
+    // Attempt navigation with timeout
+    let nav_result = timeout(
+        Duration::from_secs(timeout_secs),
+        client.goto(url)
+    ).await;
+    
+    match nav_result {
+        Ok(Ok(_)) => {
+            // Navigation succeeded, verify we're on correct page
+            verify_navigation(client, expected_url_fragment, 3).await?;
+            Ok(())
+        }
+        Ok(Err(e)) => {
+            // Navigation failed - clear state before returning error
+            crate::util::logger::log_error(&format!(
+                "Navigation failed: {}. Clearing browser state...",
+                e
+            )).await;
+            clear_browser_state(client).await.ok(); // Best effort
+            Err(anyhow!("Navigation failed: {}", e))
+        }
+        Err(_) => {
+            // Navigation timed out - clear state before returning error
+            crate::util::logger::log_error(&format!(
+                "Navigation timeout after {}s. Clearing browser state...",
+                timeout_secs
+            )).await;
+            clear_browser_state(client).await.ok(); // Best effort
+            Err(anyhow!("Navigation timeout"))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    
+    #[test]
+    fn test_content_check_variants() {
+        let check1 = ContentCheck::ElementExists("table".to_string());
+        let check2 = ContentCheck::TextContains("Yahoo Finance".to_string());
+        
+        match check1 {
+            ContentCheck::ElementExists(sel) => assert_eq!(sel, "table"),
+            _ => panic!("Wrong variant"),
+        }
+        
+        match check2 {
+            ContentCheck::TextContains(text) => assert_eq!(text, "Yahoo Finance"),
+            _ => panic!("Wrong variant"),
+        }
+    }
+}
--- a/src/corporate/scraper.rs
+++ b/src/corporate/scraper.rs
@@ -1,478 +1,13 @@
 // src/corporate/scraper.rs
-use super::{types::*, helpers::*, openfigi::*};
-//use crate::corporate::openfigi::OpenFigiClient;
-use crate::{scraper::webdriver::*, util::directories::DataPaths, util::logger};
-use fantoccini::{Client, Locator};
+use crate::{util::directories::DataPaths, util::logger};
+use fantoccini::{Client};
 use scraper::{Html, Selector};
-use chrono::{DateTime, Duration, NaiveDate, Utc};
-use tokio::{time::{Duration as TokioDuration, sleep}};
-use reqwest::Client as HttpClient;
-use serde_json::{json, Value};
 use zip::ZipArchive;
-use std::{collections::HashMap, sync::Arc};
+use std::{collections::HashMap};
 use std::io::{Read};
-use anyhow::{anyhow, Result};

 const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";

-/// Check if a ticker exists on Yahoo Finance and return core metadata.
-///
-/// This function calls the public Yahoo Finance quoteSummary endpoint and extracts:
-/// - ISIN (when available)
-/// - Company name
-/// - Exchange MIC code
-/// - Trading currency
-///
-/// It strictly filters to only accept **equity** securities.
-///
-/// # Arguments
-/// * `ticker` - The ticker symbol to validate (e.g., "AAPL", "7203.T", "BMW.DE")
-///
-/// # Returns
-/// `Ok(PrimaryInfo)` on success, `Err` if ticker doesn't exist, is not equity, or data is malformed.
-///
-/// # Errors
-/// - Ticker not found
-/// - Not an equity (ETF, bond, etc.)
-/// - Missing critical fields
-/// - Network or JSON parsing errors
-/*pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
-    let url = format!(
-        "https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price%2CassetProfile",
-        ticker
-    );
-
-    let resp = match HttpClient::new()
-        .get(&url)
-        .header("User-Agent", USER_AGENT)
-        .send()
-        .await
-    {
-        Ok(resp) => resp,
-        Err(err) => {
-            return Err(anyhow::anyhow!(
-                "Failed to reach Yahoo Finance for ticker {}: {}",
-                ticker,
-                err
-            ));
-        }
-    };
-
-    if !resp.status().is_success() {
-        return Err(anyhow::anyhow!("Yahoo returned HTTP {} for ticker {}", resp.status(), ticker));
-    }
-
-    let json: Value = match resp
-        .json()
-        .await {
-            Ok(resp) => resp,
-            Err(err) => {
-                return Err(anyhow::anyhow!(
-                    "Failed to parse JSON response from Yahoo Finance {}: {}",
-                    ticker,
-                    err
-                ));
-            }
-        };
-
-    let result_array = json["quoteSummary"]["result"]
-        .as_array()
-        .ok_or_else(|| anyhow::anyhow!("Missing 'quoteSummary.result' in response"))?;
-
-    if result_array.is_empty() || result_array[0].is_null() {
-        return Err(anyhow::anyhow!("No quote data returned for ticker {}", ticker));
-    }
-
-    let quote = &result_array[0]["price"];
-    let profile = &result_array[0]["assetProfile"];
-
-    // === 1. Must be EQUITY ===
-    let quote_type = quote["quoteType"]
-        .as_str()
-        .unwrap_or("")
-        .to_ascii_uppercase();
-
-    if quote_type != "EQUITY" {
-        println!("      → Skipping {} (quoteType: {})", ticker, quote_type);
-        return Err(anyhow::anyhow!("Not an equity security: {}", quote_type));
-    }
-
-    // === 2. Extract basic info ===
-    let long_name = quote["longName"]
-        .as_str()
-        .or_else(|| quote["shortName"].as_str())
-        .unwrap_or(ticker)
-        .trim()
-        .to_string();
-
-    let currency = quote["currency"]
-        .as_str()
-        .unwrap_or("USD")
-        .to_string();
-
-    let exchange_mic = quote["exchange"]
-        .as_str()
-        .unwrap_or("")
-        .to_string();
-
-    if exchange_mic.is_empty() {
-        return Err(anyhow::anyhow!("Missing exchange MIC for ticker {}", ticker));
-    }
-
-    // === 3. Extract ISIN (from assetProfile if available) ===
-    let isin = profile["isin"]
-        .as_str()
-        .and_then(|s| if s.len() == 12 && s.chars().all(|c| c.is_ascii_alphanumeric()) { Some(s) } else { None })
-        .unwrap_or("")
-        .to_ascii_uppercase();
-
-    // === 4. Final sanity check: reject obvious debt securities ===
-    let name_upper = long_name.to_ascii_uppercase();
-    if name_upper.contains(" BOND") ||
-       name_upper.contains(" NOTE") ||
-       name_upper.contains(" DEBENTURE") ||
-       name_upper.contains(" PREFERRED") && !name_upper.contains(" STOCK") {
-        return Err(anyhow::anyhow!("Security name suggests debt instrument: {}", long_name));
-    }
-
-    println!(
-        "      → Valid equity: {} | {} | {} | ISIN: {}",
-        ticker,
-        long_name,
-        exchange_mic,
-        if isin.is_empty() { "N/A" } else { &isin }
-    );
-
-    Ok(PrimaryInfo {
-        isin,
-        name: long_name,
-        exchange_mic,
-        currency,
-    })
-}*/
-
-/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
-///
-/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
-/// reject cookies, and extract the events.
-///
-/// # Arguments
-/// * `ticker` - The stock ticker symbol.
-///
-/// # Returns
-/// A vector of CompanyEvent structs on success.
-///
-/// # Errors
-/// Returns an error if the task execution fails, e.g., chromedriver spawn or navigation issues.
-pub async fn fetch_earnings_with_pool(
-    ticker: &str,
-    pool: &Arc<ChromeDriverPool>,
-) -> anyhow::Result<Vec<CompanyEvent>> {
-    let ticker = ticker.to_string();
-    let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
-
-    let ticker_cloned = ticker.clone();
-
-    pool.execute(url, move |client| {
-        let ticker = ticker_cloned.clone();
-        Box::pin(async move {
-            reject_yahoo_cookies(&client).await?;
-            extract_earnings_events(&client, &ticker).await
-        })
-    }).await
-}
-
-/// Extracts earnings events from the currently loaded Yahoo Finance earnings calendar page.
-///
-/// This function assumes the client is already navigated to the correct URL (e.g., 
-/// https://finance.yahoo.com/calendar/earnings?symbol={ticker}) and cookies are handled.
-///
-/// It waits for the earnings table, extracts rows, parses cells into CompanyEvent structs,
-/// and handles date parsing, float parsing, and optional fields.
-///
-/// # Arguments
-/// * `client` - The fantoccini Client with the page loaded.
-/// * `ticker` - The stock ticker symbol for the events.
-///
-/// # Returns
-/// A vector of CompanyEvent on success.
-///
-/// # Errors
-/// Returns an error if:
-/// - Table or elements not found.
-/// - Date or float parsing fails.
-/// - WebDriver operations fail.
-///
-/// # Examples
-///
-/// ```no_run
-/// use fantoccini::Client;
-/// use crate::corporate::scraper::extract_earnings;
-///
-/// #[tokio::main]
-/// async fn main() -> Result<()> {
-///     // Assume client is set up and navigated
-///     let events = extract_earnings(&client, "AAPL").await?;
-///     Ok(())
-/// }
-/// ```
-pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEvent>> {
-    // Wait for the table to load
-    let table = client
-        .wait()
-        .for_element(Locator::Css(r#"table[data-test="cal-table"]"#))
-        .await
-        .map_err(|e| anyhow!("Failed to find earnings table: {}", e))?;
-
-    // Find all rows in tbody
-    let rows = table
-        .find_all(Locator::Css("tbody tr"))
-        .await
-        .map_err(|e| anyhow!("Failed to find table rows: {}", e))?;
-
-    let mut events = Vec::with_capacity(rows.len());
-
-    for row in rows {
-        let cells = row
-            .find_all(Locator::Css("td"))
-            .await
-            .map_err(|e| anyhow!("Failed to find cells in row: {}", e))?;
-
-        if cells.len() < 5 {
-            continue; // Skip incomplete rows
-        }
-
-        // Extract and parse date
-        let date_str = cells[0]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get date text: {}", e))?;
-        let date = parse_yahoo_date(&date_str)
-            .map_err(|e| anyhow!("Failed to parse date '{}': {}", date_str, e))?
-            .format("%Y-%m-%d")
-            .to_string();
-
-        // Extract time, replace "Time Not Supplied" with empty
-        let time = cells[1]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get time text: {}", e))?
-            .replace("Time Not Supplied", "");
-
-        // Extract period
-        let period = cells[2]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get period text: {}", e))?;
-
-        // Parse EPS forecast
-        let eps_forecast_str = cells[3]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get EPS forecast text: {}", e))?;
-        let eps_forecast = parse_float(&eps_forecast_str);
-
-        // Parse EPS actual
-        let eps_actual_str = cells[4]
-            .text()
-            .await
-            .map_err(|e| anyhow!("Failed to get EPS actual text: {}", e))?;
-        let eps_actual = parse_float(&eps_actual_str);
-
-        // Parse surprise % if available
-        let surprise_pct = if cells.len() > 5 {
-            let surprise_str = cells[5]
-                .text()
-                .await
-                .map_err(|e| anyhow!("Failed to get surprise text: {}", e))?;
-            parse_float(&surprise_str)
-        } else {
-            None
-        };
-
-        events.push(CompanyEvent {
-            ticker: ticker.to_string(),
-            date,
-            time,
-            period,
-            eps_forecast,
-            eps_actual,
-            revenue_forecast: None,
-            revenue_actual: None,
-            surprise_pct,
-            source: "Yahoo".to_string(),
-        });
-    }
-
-    if events.is_empty() {
-        eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
-    } else {
-        println!("Extracted {} earnings events for {}", events.len(), ticker);
-    }
-
-    Ok(events)
-}
-
-fn parse_price(v: Option<&Value>) -> f64 {
-    v.and_then(|x| x.as_str())
-        .and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
-        .or_else(|| v.and_then(|x| x.as_f64()))
-        .unwrap_or(0.0)
-}
-
-fn parse_volume(v: Option<&Value>) -> u64 {
-    v.and_then(|x| x.as_str())
-        .and_then(|s| s.replace(',', "").parse::<u64>().ok())
-        .or_else(|| v.and_then(|x| x.as_u64()))
-        .unwrap_or(0)
-}
-
-pub async fn fetch_daily_price_history(
-    ticker: &str,
-    start_str: &str,
-    end_str: &str,
-) -> anyhow::Result<Vec<CompanyPrice>> {
-    let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
-    let end   = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1);
-
-    let mut all_prices = Vec::new();
-    let mut current = start;
-
-    while current < end {
-        let chunk_end = current + Duration::days(730);
-        let actual_end = chunk_end.min(end);
-
-        let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
-        let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
-
-        println!("    Fetching {ticker} {} → {}", current, actual_end - Duration::days(1));
-
-        let url = format!(
-            "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true"
-        );
-
-        let json: Value = HttpClient::new()
-            .get(&url)
-            .header("User-Agent", USER_AGENT)
-            .send()
-            .await?
-            .json()
-            .await?;
-
-        let result = &json["chart"]["result"][0];
-        let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
-        let quote = &result["indicators"]["quote"][0];
-        let meta = &result["meta"];
-        let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
-
-        let opens = quote["open"].as_array();
-        let highs = quote["high"].as_array();
-        let lows  = quote["low"].as_array();
-        let closes = quote["close"].as_array();
-        let adj_closes = result["indicators"]["adjclose"][0]["adjclose"].as_array()
-            .or_else(|| closes);
-        let volumes = quote["volume"].as_array();
-
-        for (i, ts_val) in timestamps.iter().enumerate() {
-            let ts = ts_val.as_i64().unwrap_or(0);
-            let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
-            let date_str = dt.format("%Y-%m-%d").to_string();
-
-            if date_str < start_str.to_string() || date_str > end_str.to_string() {
-                continue;
-            }
-
-            let open = parse_price(opens.and_then(|a| a.get(i)));
-            let high = parse_price(highs.and_then(|a| a.get(i)));
-            let low  = parse_price(lows.and_then(|a| a.get(i)));
-            let close = parse_price(closes.and_then(|a| a.get(i)));
-            let adj_close = parse_price(adj_closes.and_then(|a| a.get(i)));
-            let volume = parse_volume(volumes.and_then(|a| a.get(i)));
-
-            all_prices.push(CompanyPrice {
-                ticker: ticker.to_string(),
-                date: date_str,
-                time: "".to_string(),
-                open,
-                high,
-                low,
-                close,
-                adj_close,
-                volume,
-                currency: currency.clone(),
-            });
-        }
-
-        sleep(TokioDuration::from_millis(200)).await;
-        current = actual_end;
-    }
-
-    all_prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
-    all_prices.dedup_by(|a, b| a.date == b.date && a.time == b.time);
-
-    println!("    Got {} daily bars for {ticker}", all_prices.len());
-    Ok(all_prices)
-}
-
-pub async fn fetch_price_history_5min(
-    ticker: &str,
-    _start: &str,
-    _end: &str,
-) -> anyhow::Result<Vec<CompanyPrice>> {
-    let now = Utc::now().timestamp();
-    let period1 = now - 5184000;
-    let period2 = now;
-
-    let url = format!(
-        "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true"
-    );
-
-    let json: Value = HttpClient::new()
-        .get(&url)
-        .header("User-Agent", USER_AGENT)
-        .send()
-        .await?
-        .json()
-        .await?;
-
-    let result = &json["chart"]["result"][0];
-    let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
-    let quote = &result["indicators"]["quote"][0];
-    let meta = &result["meta"];
-    let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
-
-    let mut prices = Vec::new();
-
-    for (i, ts_val) in timestamps.iter().enumerate() {
-        let ts = ts_val.as_i64().unwrap_or(0);
-        let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
-        let date_str = dt.format("%Y-%m-%d").to_string();
-        let time_str = dt.format("%H:%M:%S").to_string();
-
-        let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i)));
-        let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i)));
-        let low  = parse_price(quote["low"].as_array().and_then(|a| a.get(i)));
-        let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i)));
-        let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i)));
-
-        prices.push(CompanyPrice {
-            ticker: ticker.to_string(),
-            date: date_str,
-            time: time_str,
-            open,
-            high,
-            low,
-            close,
-            adj_close: close,
-            volume,
-            currency: currency.clone(),
-        });
-    }
-
-    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
-    Ok(prices)
-}
-
 /// Fetch the URL of the latest ISIN↔LEI mapping CSV from GLEIF
 /// Overengineered; we could just use the static URL, but this shows how to scrape if needed
 pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow::Result<String> {
@@ -490,20 +25,17 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow
 pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";

-    // Initialize DataPaths and create cache/gleif directory
    let paths = DataPaths::new(".")?;
    let gleif_cache_dir = paths.cache_gleif_dir();
    
    if let Err(e) = std::fs::create_dir_all(&gleif_cache_dir) {
        let msg = format!("Failed to create cache/gleif directory: {}", e);
        logger::log_error(&msg).await;
-        println!("{}", msg);
        return Ok(None);
    }

-    logger::log_info("Corporate Scraper: Downloading ISIN/LEI mapping from GLEIF...").await;
+    logger::log_info("Downloading ISIN/LEI mapping from GLEIF...").await;

-    // Download ZIP and get the filename from Content-Disposition header
    let client = match reqwest::Client::builder()
        .user_agent(USER_AGENT)
        .timeout(std::time::Duration::from_secs(30))
@@ -511,9 +43,7 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    {
        Ok(c) => c,
        Err(e) => {
-            let msg = format!("Failed to create HTTP client: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to create HTTP client: {}", e)).await;
            return Ok(None);
        }
    };
@@ -521,20 +51,15 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let resp = match client.get(url).send().await {
        Ok(r) if r.status().is_success() => r,
        Ok(resp) => {
-            let msg = format!("Server returned HTTP {}", resp.status());
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Server returned HTTP {}", resp.status())).await;
            return Ok(None);
        }
        Err(e) => {
-            let msg = format!("Failed to download ISIN/LEI ZIP: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to download: {}", e)).await;
            return Ok(None);
        }
    };

-    // Extract filename from Content-Disposition header or use default
    let filename = resp
        .headers()
        .get("content-disposition")
@@ -542,11 +67,10 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        .and_then(|s| s.split("filename=").nth(1).map(|f| f.trim_matches('"').to_string()))
        .unwrap_or_else(|| "isin_lei.zip".to_string());

-    // Parse timestamp from filename and convert to DDMMYYYY format
    let parsed_filename = parse_gleif_filename(&filename);
-    logger::log_info(&format!("Corporate Scraper: Downloaded file: {} -> {}", filename, parsed_filename)).await;
+    logger::log_info(&format!("Downloaded: {} -> {}", filename, parsed_filename)).await;

-    // Determine date (DDMMYYYY) from parsed filename: "isin-lei-DDMMYYYY.csv"
+    // Extract date from filename
    let mut date_str = String::new();
    if let Some(start_idx) = parsed_filename.find("isin-lei-") {
        let rest = &parsed_filename[start_idx + 9..];
@@ -555,13 +79,10 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        }
    }

-    // If we parsed a date, use/create a date folder under cache/gleif and operate inside it; otherwise use cache root.
    let date_dir = if !date_str.is_empty() {
        let p = gleif_cache_dir.join(&date_str);
-        // Ensure the date folder exists (create if necessary)
        if let Err(e) = std::fs::create_dir_all(&p) {
-            let msg = format!("Failed to create date directory {:?}: {}", p, e);
-            logger::log_warn(&msg).await;
+            logger::log_warn(&format!("Failed to create date directory: {}", e)).await;
            None
        } else {
            Some(p)
@@ -570,17 +91,16 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        None
    };

-    // Choose the directory where we'll look for existing files and where we'll save the new ones
    let target_dir = date_dir.clone().unwrap_or_else(|| gleif_cache_dir.to_path_buf());

-    // If the date folder exists (or was created), prefer any *_clean.csv inside it and return that immediately
+    // Check for existing clean CSV
    if let Some(ref ddir) = date_dir {
        if let Ok(entries) = std::fs::read_dir(ddir) {
            for entry in entries.flatten() {
                if let Some(name) = entry.file_name().to_str() {
                    if name.to_lowercase().ends_with("_clean.csv") {
                        let path = ddir.join(name);
-                        logger::log_info(&format!("Found existing clean GLEIF CSV: {}", path.display())).await;
+                        logger::log_info(&format!("Found existing clean CSV: {}", path.display())).await;
                        return Ok(Some(path.to_string_lossy().to_string()));
                    }
                }
@@ -588,71 +108,42 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
        }
    }

-    // If no clean file found in the date folder (or date folder doesn't exist), check whether the csv/zip already exist in the target dir
-    let csv_candidate_name = parsed_filename.replace(".zip", ".csv");
-    let csv_candidate = target_dir.join(&csv_candidate_name);
-    let zip_candidate = target_dir.join(&parsed_filename);
-
+    let csv_candidate = target_dir.join(parsed_filename.replace(".zip", ".csv"));
    if csv_candidate.exists() {
-        logger::log_info(&format!("Found existing GLEIF CSV: {}", csv_candidate.display())).await;
+        logger::log_info(&format!("Found existing CSV: {}", csv_candidate.display())).await;
        return Ok(Some(csv_candidate.to_string_lossy().to_string()));
    }
-    if zip_candidate.exists() {
-        // If zip exists but csv does not, extract later; for now prefer returning csv path (may be created by extraction step)
-        let inferred_csv = target_dir.join(csv_candidate_name);
-        if inferred_csv.exists() {
-            logger::log_info(&format!("Found existing extracted CSV next to ZIP: {}", inferred_csv.display())).await;
-            return Ok(Some(inferred_csv.to_string_lossy().to_string()));
-        }
-        // otherwise we'll overwrite/extract into target_dir below
-    }

    let bytes = match resp.bytes().await {
        Ok(b) => b,
        Err(e) => {
-            let msg = format!("Failed to read ZIP bytes: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to read bytes: {}", e)).await;
            return Ok(None);
        }
    };
-    // Ensure target directory exists (create if it's the date folder and was absent earlier)
-    if let Some(ref ddir) = date_dir {
-        let _ = std::fs::create_dir_all(ddir);
-    }

    let zip_path = target_dir.join(&parsed_filename);
    let csv_path = target_dir.join(parsed_filename.replace(".zip", ".csv"));

    if let Err(e) = tokio::fs::write(&zip_path, &bytes).await {
-        let msg = format!("Failed to write ZIP file: {}", e);
-        logger::log_error(&msg).await;
-        println!("{}", msg);
+        logger::log_error(&format!("Failed to write ZIP: {}", e)).await;
        return Ok(None);
    }
-    logger::log_info(&format!("Corporate Scraper: Saved ZIP to {:?}", zip_path)).await;

-    // Extract CSV
-    let archive = match std::fs::File::open(&zip_path)
-        .map(ZipArchive::new)
-    {
+    // Extract CSV from ZIP
+    let archive = match std::fs::File::open(&zip_path).map(ZipArchive::new) {
        Ok(Ok(a)) => a,
        Ok(Err(e)) => {
-            let msg = format!("Invalid ZIP: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Invalid ZIP: {}", e)).await;
            return Ok(None);
        }
        Err(e) => {
-            let msg = format!("Cannot open ZIP file: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Cannot open ZIP: {}", e)).await;
            return Ok(None);
        }
    };

    let mut archive = archive;
-
    let idx = match (0..archive.len()).find(|&i| {
        archive.by_index(i)
            .map(|f| f.name().ends_with(".csv"))
@@ -660,9 +151,7 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    }) {
        Some(i) => i,
        None => {
-            let msg = "ZIP did not contain a CSV file";
-            logger::log_error(msg).await;
-            println!("{}", msg);
+            logger::log_error("ZIP contains no CSV").await;
            return Ok(None);
        }
    };
@@ -670,43 +159,32 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
    let mut csv_file = match archive.by_index(idx) {
        Ok(f) => f,
        Err(e) => {
-            let msg = format!("Failed to read CSV entry: {}", e);
-            logger::log_error(&msg).await;
-            println!("{}", msg);
+            logger::log_error(&format!("Failed to read CSV: {}", e)).await;
            return Ok(None);
        }
    };

    let mut csv_bytes = Vec::new();
    if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
-        let msg = format!("Failed to extract CSV: {}", e);
-        logger::log_error(&msg).await;
+        logger::log_error(&format!("Failed to extract: {}", e)).await;
        return Ok(None);
    }

    if let Err(e) = tokio::fs::write(&csv_path, &csv_bytes).await {
-        let msg = format!("Failed to save CSV file: {}", e);
-        logger::log_error(&msg).await;
+        logger::log_error(&format!("Failed to save CSV: {}", e)).await;
        return Ok(None);
    }

-    let msg = format!("✓ ISIN/LEI CSV extracted: {:?}", csv_path);
-    logger::log_info(&msg).await;
-    
+    logger::log_info(&format!("✓ CSV extracted: {:?}", csv_path)).await;
    Ok(Some(csv_path.to_string_lossy().to_string()))
 }

-/// Parse GLEIF filename and convert timestamp to DDMMYYYY format
-/// Example: "isin-lei-20251124T080254.csv" -> "isin-lei-24112025.csv"
 fn parse_gleif_filename(filename: &str) -> String {
-    // Try to find pattern: isin-lei-YYYYMMDDTHHMMSS.zip/csv
    if let Some(start_idx) = filename.find("isin-lei-") {
-        let rest = &filename[start_idx + 9..]; // After "isin-lei-"
+        let rest = &filename[start_idx + 9..];
        
-        // Extract the 8 digits (YYYYMMDD)
        if rest.len() >= 8 && rest[0..8].chars().all(|c| c.is_numeric()) {
            let date_part = &rest[0..8];
-            // date_part is YYYYMMDD, convert to DDMMYYYY
            if date_part.len() == 8 {
                let year = &date_part[0..4];
                let month = &date_part[4..6];
@@ -717,11 +195,9 @@ fn parse_gleif_filename(filename: &str) -> String {
        }
    }
    
-    // Fallback: return original filename if parsing fails
    filename.to_string()
 }

-
 pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
    // 1. Download + extract the CSV (this is now async)
    let csv_path = match download_isin_lei_csv().await? {
@@ -769,30 +245,4 @@ pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>>
    );

    Ok(map)
-}
-
-pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
-    for _ in 0..10 {
-        let clicked: bool = client
-            .execute(
-                r#"(() => {
-                    const btn = document.querySelector('#consent-page .reject-all');
-                    if (btn) {
-                        btn.click();
-                        return true;
-                    }
-                    return false;
-                })()"#,
-                vec![],
-            )
-            .await?
-            .as_bool()
-            .unwrap_or(false);
-
-        if clicked { break; }
-        sleep(TokioDuration::from_millis(500)).await;
-    }
-
-    println!("Rejected Yahoo cookies if button existed");
-    Ok(())
 }
--- a/src/corporate/storage.rs
+++ b/src/corporate/storage.rs
@@ -1,267 +1,87 @@
 // src/corporate/storage.rs
-use super::{types::*, helpers::*};
 use crate::util::directories::DataPaths;
 use crate::util::logger;

-use tokio::fs;
 use tokio::io::AsyncWriteExt;
-use chrono::{Datelike, NaiveDate};
-use std::collections::{HashMap};
-use std::path::{PathBuf};
+use std::collections::HashMap;
+use std::path::{PathBuf, Path};

-pub async fn load_existing_events(paths: &DataPaths) -> anyhow::Result<HashMap<String, CompanyEvent>> {
-    let mut map = HashMap::new();
-    let dir = paths.corporate_events_dir();
-    if !dir.exists() {
-        logger::log_info("Corporate Storage: No existing events directory found").await;
-        return Ok(map);
-    }

-    let mut entries = fs::read_dir(dir).await?;
-    let mut loaded_count = 0;
-    while let Some(entry) = entries.next_entry().await? {
-        let path = entry.path();
-        if path.extension().and_then(|s| s.to_str()) == Some("json") {
-            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
-            if name.starts_with("events_") && name.len() == 17 {
-                let content = fs::read_to_string(&path).await?;
-                let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
-                for event in events {
-                    map.insert(event_key(&event), event);
-                }
-                loaded_count += 1;
-            }
-        }
-    }
-    logger::log_info(&format!("Corporate Storage: Loaded {} events from {} files", map.len(), loaded_count)).await;
-    Ok(map)
+/// Lightweight index entry - only metadata, no full event data
+#[derive(Debug, Clone)]
+pub struct EventIndex {
+    pub key: String,
+    pub ticker: String,
+    pub date: String,
+    pub file_path: PathBuf,
 }

-pub async fn save_optimized_events(paths: &DataPaths, events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
-    let dir = paths.corporate_events_dir();
-    fs::create_dir_all(dir).await?;
-
-    logger::log_info("Corporate Storage: Removing old event files...").await;
-    let mut removed_count = 0;
-    let mut entries = fs::read_dir(dir).await?;
-    while let Some(entry) = entries.next_entry().await? {
-        let path = entry.path();
-        let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
-        if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
-            fs::remove_file(&path).await?;
-            removed_count += 1;
-        }
-    }
-    logger::log_info(&format!("Corporate Storage: Removed {} old event files", removed_count)).await;
-
-    let total_events = events.len();
-    let mut sorted: Vec<_> = events.into_values().collect();
-    sorted.sort_by_key(|e| (e.ticker.clone(), e.date.clone()));
-
-    let mut by_month: HashMap<String, Vec<CompanyEvent>> = HashMap::new();
-    for e in sorted {
-        if let Ok(d) = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d") {
-            let key = format!("{}-{:02}", d.year(), d.month());
-            by_month.entry(key).or_default().push(e);
-        }
-    }
-
-    let total_months = by_month.len();
-    for (month, list) in by_month {
-        let path = dir.join(format!("events_{}.json", month));
-        fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
-        logger::log_info(&format!("Corporate Storage: Saved {} events for month {}", list.len(), month)).await;
-    }
-    logger::log_info(&format!("Corporate Storage: Saved {} total events in {} month files", total_events, total_months)).await;
-    Ok(())
-}
-
-pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) -> anyhow::Result<()> {
-    if changes.is_empty() { 
-        logger::log_info("Corporate Storage: No changes to save").await;
-        return Ok(()); 
-    }
-    let dir = paths.corporate_changes_dir();
-    fs::create_dir_all(dir).await?;
-
-    logger::log_info(&format!("Corporate Storage: Saving {} changes", changes.len())).await;
-
-    let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
-    for c in changes {
-        if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
-            let key = format!("{}-{:02}", d.year(), d.month());
-            by_month.entry(key).or_default().push(c.clone());
-        }
-    }
-
-    for (month, list) in by_month {
-        let path = dir.join(format!("changes_{}.json", month));
-        let mut all = if path.exists() {
-            let s = fs::read_to_string(&path).await?;
-            serde_json::from_str(&s).unwrap_or_default()
-        } else { vec![] };
-        all.extend(list.clone());
-        fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
-        logger::log_info(&format!("Corporate Storage: Saved {} changes for month {}", list.len(), month)).await;
-    }
-    logger::log_info("Corporate Storage: All changes saved successfully").await;
-    Ok(())
-}
-
-pub async fn save_prices_for_ticker(paths: &DataPaths, ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
-    let base_dir = paths.corporate_prices_dir();
-    let company_dir = base_dir.join(ticker.replace(".", "_"));
-    let timeframe_dir = company_dir.join(timeframe);
-
-    fs::create_dir_all(&timeframe_dir).await?;
-    let path = timeframe_dir.join("prices.json");
-
-    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
-
-    let json = serde_json::to_string_pretty(&prices)?;
-    fs::write(&path, json).await?;
-    Ok(())
-}
-
-pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf {
-    paths.corporate_prices_dir().join(lei)
-}
-
-pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> {
-    let base = get_company_dir(paths, isin);
-    let paths_to_create = [
-        base.clone(),
-        base.join("5min"),
-        base.join("daily"),
-        base.join("aggregated").join("5min"),
-        base.join("aggregated").join("daily"),
-    ];
-    for p in paths_to_create {
-        fs::create_dir_all(&p).await?;
-    }
-    Ok(())
-}
-
-pub async fn save_available_exchanges(paths: &DataPaths, isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
-    let dir = get_company_dir(paths, isin);
-    fs::create_dir_all(&dir).await?;
-    let path = dir.join("available_exchanges.json");
-    fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
-    Ok(())
-}
-
-pub async fn load_available_exchanges(paths: &DataPaths, lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
-    let path = get_company_dir(paths, lei).join("available_exchanges.json");
-    if path.exists() {
-        let content = fs::read_to_string(&path).await?;
-        Ok(serde_json::from_str(&content)?)
-    } else {
-        Ok(vec![])
-    }
-}
-
-pub async fn save_prices_by_source(
+/// Stream companies to JSONL incrementally
+pub async fn save_companies_to_jsonl_streaming(
    paths: &DataPaths,
-    lei: &str,
-    source_ticker: &str,
-    timeframe: &str,
-    prices: Vec<CompanyPrice>,
-) -> anyhow::Result<()> {
-    let source_safe = source_ticker.replace(".", "_").replace("/", "_");
-    let dir = get_company_dir(paths, lei).join(timeframe).join(&source_safe);
-    fs::create_dir_all(&dir).await?;
-    let path = dir.join("prices.json");
-    let mut prices = prices;
-    prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
-    fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
-    Ok(())
-}
-
-/// Update available_exchanges.json with fetch results
-/*pub async fn update_available_exchange(
-    paths: &DataPaths,
-    isin: &str,
-    ticker: &str,
-    exchange_mic: &str,
-    has_daily: bool,
-    has_5min: bool,
-) -> anyhow::Result<()> {
-    let mut exchanges = load_available_exchanges(paths, isin).await?;
-
-    if let Some(entry) = exchanges.iter_mut().find(|e| e.ticker == ticker) {
-        // Update existing entry
-        entry.record_success(has_daily, has_5min);
-    } else {
-        // Create new entry - need to get currency from somewhere
-        // Try to infer from the ticker or use a default
-        let currency = infer_currency_from_ticker(ticker);
-        let mut new_entry = AvailableExchange::new(
-            ticker.to_string(),
-            exchange_mic.to_string(),
-            currency,
-        );
-        new_entry.record_success(has_daily, has_5min);
-        exchanges.push(new_entry);
-    }
-
-    save_available_exchanges(paths, isin, exchanges).await
-}*/
-
-/// Infer currency from ticker suffix
-fn infer_currency_from_ticker(ticker: &str) -> String {
-    if ticker.ends_with(".L") { return "GBP".to_string(); }
-    if ticker.ends_with(".PA") { return "EUR".to_string(); }
-    if ticker.ends_with(".DE") { return "EUR".to_string(); }
-    if ticker.ends_with(".AS") { return "EUR".to_string(); }
-    if ticker.ends_with(".MI") { return "EUR".to_string(); }
-    if ticker.ends_with(".SW") { return "CHF".to_string(); }
-    if ticker.ends_with(".T") { return "JPY".to_string(); }
-    if ticker.ends_with(".HK") { return "HKD".to_string(); }
-    if ticker.ends_with(".SS") { return "CNY".to_string(); }
-    if ticker.ends_with(".SZ") { return "CNY".to_string(); }
-    if ticker.ends_with(".TO") { return "CAD".to_string(); }
-    if ticker.ends_with(".AX") { return "AUD".to_string(); }
-    if ticker.ends_with(".SA") { return "BRL".to_string(); }
-    if ticker.ends_with(".MC") { return "EUR".to_string(); }
-    if ticker.ends_with(".BO") || ticker.ends_with(".NS") { return "INR".to_string(); }
-    
-    "USD".to_string() // Default
-}
-
-/// Saves companies data to a JSONL file.
-///
-/// # Arguments
-/// * `paths` - Reference to DataPaths for directory management
-/// * `companies` - HashMap of company names to their securities (ISIN, Ticker pairs)
-///
-/// # Errors
-/// Returns an error if file operations or serialization fails.
-pub async fn save_companies_to_jsonl(
-    paths: &DataPaths,
-    companies: &HashMap<String, HashMap<String, String>>,
-) -> anyhow::Result<()> {
+    companies_iter: impl Iterator<Item = (String, HashMap<String, String>)>,
+) -> anyhow::Result<usize> {
    let file_path = paths.data_dir().join("companies.jsonl");
    
-    logger::log_info(&format!("Corporate Storage: Saving {} companies to JSONL", companies.len())).await;
-    
-    // Create parent directory if it doesn't exist
    if let Some(parent) = file_path.parent() {
        tokio::fs::create_dir_all(parent).await?;
    }
    
    let mut file = tokio::fs::File::create(&file_path).await?;
+    let mut count = 0;
    
-    for (name, securities) in companies.iter() {
+    for (name, securities) in companies_iter {
        let line = serde_json::json!({
            "name": name,
            "securities": securities
        });
+        
        file.write_all(line.to_string().as_bytes()).await?;
        file.write_all(b"\n").await?;
+        count += 1;
+        
+        if count % 100 == 0 {
+            tokio::task::yield_now().await;
+        }
    }
    
-    let msg = format!("✓ Saved {} companies to {:?}", companies.len(), file_path);
-    println!("{}", msg);
-    logger::log_info(&msg).await;
-    Ok(())
+    logger::log_info(&format!("Saved {} companies to JSONL", count)).await;
+    Ok(count)
+}
+
+/// Stream read companies from JSONL
+pub async fn stream_companies_from_jsonl<F>(
+    path: &Path,
+    mut callback: F
+) -> anyhow::Result<usize>
+where
+    F: FnMut(String, HashMap<String, String>) -> anyhow::Result<()>,
+{
+    if !path.exists() {
+        return Ok(0);
+    }
+    
+    let content = tokio::fs::read_to_string(path).await?;
+    let mut count = 0;
+    
+    for line in content.lines() {
+        if line.trim().is_empty() {
+            continue;
+        }
+        
+        let entry: serde_json::Value = serde_json::from_str(line)?;
+        let name = entry["name"].as_str().unwrap_or("").to_string();
+        let securities: HashMap<String, String> = serde_json::from_value(
+            entry["securities"].clone()
+        )?;
+        
+        callback(name, securities)?;
+        count += 1;
+        
+        if count % 100 == 0 {
+            tokio::task::yield_now().await;
+        }
+    }
+    
+    Ok(count)
 }
--- a/src/corporate/types.rs
+++ b/src/corporate/types.rs
@@ -2,42 +2,22 @@
 use std::collections::HashMap;
 use serde::{Deserialize, Serialize};

-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
-pub struct CompanyEvent {
-    pub ticker: String,
-    pub date: String,                    // YYYY-MM-DD
-    pub time: String,                    // "AMC", "BMO", "TAS", or ""
-    pub period: String,                  // "Q1 2025", "FY 2024"
-    pub eps_forecast: Option<f64>,
-    pub eps_actual: Option<f64>,
-    pub revenue_forecast: Option<f64>,
-    pub revenue_actual: Option<f64>,
-    pub surprise_pct: Option<f64>,       // (actual - forecast) / |forecast|
-    pub source: String,                  // "Yahoo"
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChartData {
+    pub symbol: String,
+    pub quotes: Vec<Quote>,
+    pub timestamp: i64,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyPrice {
-    pub ticker: String,
-    pub date: String,                    // YYYY-MM-DD
-    pub time: String,                    // HH:MM:SS for intraday, "" for daily
-    pub open: f64,
-    pub high: f64,
-    pub low: f64,
-    pub close: f64,
-    pub adj_close: f64,
-    pub volume: u64,
-    pub currency: String,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyEventChange {
-    pub ticker: String,
-    pub date: String,
-    pub field_changed: String,           // "time", "eps_forecast", "eps_actual", "new_event"
-    pub old_value: String,
-    pub new_value: String,
-    pub detected_at: String,
+pub struct Quote {
+    pub timestamp: i64,
+    pub open: Option<f64>,
+    pub high: Option<f64>,
+    pub low: Option<f64>,
+    pub close: Option<f64>,
+    pub volume: Option<u64>,
+    pub adjusted_close: Option<f64>,
 }

 /// Figi Info based on API calls [https://www.openfigi.com/]
@@ -47,7 +27,7 @@ pub struct CompanyEventChange {
 /// # Comments
 /// Use Mapping the Object List onto Figi Properties
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FigiInfo {
+pub struct FigiData {
    pub isin: String,
    pub figi: String,
    pub name: String,
@@ -69,74 +49,144 @@ pub struct FigiInfo {

 /// Company Info
 /// # Attributes
-/// * Name as primary key (for one instition) -> might have to changed when first FigiInfo is coming in
+/// * Name as primary key (for one institution) -> might have to changed when first FigiInfo is coming in
 /// * ISIN as the most liquid / preferred traded security (used for fallback)
 /// * securities: Grouped by ISIN, filtered for Common Stock only
+/// * isin_tickers_map: Map of ISINs to their associated tickers across platforms
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyInfo{
+pub struct CompanyData{
    pub name: String,
    pub primary_isin: String,
-    pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
+    pub securities: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo>
+    pub yahoo_company_data: Option<Vec<YahooCompanyData>>,
+    pub isin_tickers_map: Option<HashMap<String, Vec<String>>>, // ISIN -> Tickers
 }

-/// Company Meta Data
-/// # Attributes
-/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
-/// * figi: metadata with ISIN as key
-/*#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CompanyMetadata {
-    pub lei: String,
-    pub figi: Option<Vec<FigiInfo>>,
-}*/
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YahooCompanyData {
+    pub ticker: String,
+    pub sector: Option<String>,
+    pub exchange: Option<String>,
+}

-/// Warrant Info
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WarrantData {
+    pub company_name: String, // key in CompanyData
+    pub warrants: HashMap<String, WarrantDetails>, // underlying company name -> Warrant
+}
+
+/// Warrant Data
 /// 
-/// Information for Warrant securities fetched out of Name in FigiInfo         
+/// Information for Warrant securities fetched out of Name in FigiData        
 /// example1: "name": "VONTOBE-PW26 LEONARDO SPA",
 /// issued by VONTOBEL Put Warrant for underlying company LEONARDO SPA
 /// example2: "BAYER H-CW25 L'OREAL",
 /// other formats like only on company instead of two, underlying and issuing company are the same, leave issuer_company_name NULL
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct WarrantInfo {
-    pub underlying_company_name: String, // key in CompanyInfo, key for WarrantInfo
-    pub issuer_company_name: Option<String>, // key in CompanyInfo
+pub struct WarrantDetails {
+    pub company_name: String, // key in CompanyData, key for WarrantDetails
+    pub issuer_company_name: Option<String>, // key in CompanyData
    pub warrant_type: String, // "put" or "call"
-    pub warrants: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
+    pub warrants: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiData> (grouped by ISIN)
 }

-/// Option Info
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OptionData {
+    pub company_name: String, // key in CompanyData
+    pub expiration_dates: Vec<i64>,
+    pub strikes: Vec<f64>,
+    pub option: Vec<OptionChain>,
+    pub timestamp: i64,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OptionChain {
+    pub expiration_date: i64,
+    pub calls: Vec<OptionContract>,
+    pub puts: Vec<OptionContract>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OptionContract {
+    pub strike: f64,
+    pub last_price: Option<f64>,
+    pub bid: Option<f64>,
+    pub ask: Option<f64>,
+    pub volume: Option<u64>,
+    pub open_interest: Option<u64>,
+    pub implied_volatility: Option<f64>,
+}
+
+/// Bond parsed details from ticker/description
 /// 
-/// Information for Option securities fetched out of Name in FigiInfo         
-/// example1: "name": "December 25 Calls on ALPHA GA",
-/// issued by NULL Call Option for underlying company ALPHA GA
-/// other formats like only on company instead of two, underlying and issuing company are the same, leave issuer_company_name NULL
+/// Parses bond information from ticker format:
+/// Corporate: "WTFC 4.3 01/12/26 0003"
+/// Government: "SLOVAK 1.5225 05/10/28 4Y"
 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OptionInfo {
-    pub underlying_company_name: String, // key in CompanyInfo, key for OptionInfo
-    pub issuer_company_name: Option<String>, // key in CompanyInfo
-    pub option_type: String, // "put" or "call"
-    pub options: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
+pub struct BondDetails {
+    pub coupon_rate: Option<f64>,        // 4.3, 1.5225
+    pub maturity_date: Option<String>,   // "2026-01-12", "2028-05-10"
+    pub is_floating: bool,               // true if "Float" in description
+    pub is_zero_coupon: bool,            // true if coupon is 0
+    pub tenor_years: Option<u32>,        // Parsed from maturity or inferred
+    pub series_identifier: Option<String>, // "0003", "4Y", "144A", "REGS", etc.
 }

-/*#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct PrimaryInfo {
-    pub isin: String,
-    pub name: String,
-    pub exchange_mic: String,
-    pub currency: String,
-}*/
+/// Corporate Bond Info
+/// 
+/// Information for corporate bonds grouped by issuer
+/// Example: "name": "LIBERTYVILLE BK & TRUST"
+/// ticker: "WTFC 4.3 01/12/26 0003"
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CorporateBondData {
+    pub underlying_company_name: String,             // key - company name issuing the bond
+    pub bonds: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
+    #[serde(skip_serializing_if = "HashMap::is_empty", default)]
+    pub bond_details: HashMap<String, BondDetails>, // ISIN -> parsed bond details
+}
+
+/// Government Bond Info
+/// 
+/// Information for government bonds grouped by issuer (country/municipality)
+/// Example: "name": "SLOVAK REPUBLIC"
+/// ticker: "SLOVAK 1.5225 05/10/28 4Y"
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GovernmentBondData {
+    pub issuer_name: String,             // key - government entity name
+    pub issuer_type: String,             // "sovereign", "municipal", "state", "province", etc.
+    pub bonds: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
+    #[serde(skip_serializing_if = "HashMap::is_empty", default)]
+    pub bond_details: HashMap<String, BondDetails>, // ISIN -> parsed bond details
+}

 #[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AvailableExchange {
-    pub exchange_mic: String,
+pub struct ExchangeData {
+    pub mic: String,
    pub ticker: String,
-    pub has_daily: bool,
-    pub has_5min: bool,
-    pub last_successful_fetch: Option<String>, // YYYY-MM-DD
    #[serde(default)]
    pub currency: String,
-    #[serde(default)]
-    pub discovered_at: Option<String>,   // When this exchange was first discovered
-    #[serde(default)]
-    pub fetch_count: u32,                // How many times successfully fetched
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct CompanyEventData {
+    pub ticker: String,
+    pub date: String,                    // YYYY-MM-DD
+    pub time: String,                    // "AMC", "BMO", "TAS", or ""
+    pub period: String,                  // "Q1 2025", "FY 2024"
+    pub eps_forecast: Option<f64>,
+    pub eps_actual: Option<f64>,
+    pub revenue_forecast: Option<f64>,
+    pub revenue_actual: Option<f64>,
+    pub surprise_pct: Option<f64>,       // (actual - forecast) / |forecast|
+    pub source: String,                  // "Yahoo"
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CompanyEventChangeData {
+    pub ticker: String,
+    pub date: String,
+    pub field_changed: String,           // "time", "eps_forecast", "eps_actual", "new_event"
+    pub old_value: String,
+    pub new_value: String,
+    pub detected_at: String,
 }
--- a/src/corporate/update.rs
+++ b/src/corporate/update.rs
@@ -1,211 +1,129 @@
 // src/corporate/update.rs
-use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
+use super::{scraper::*, update_openfigi::*};
 use crate::config::Config;
+use crate::check_shutdown;
+use crate::corporate::update_companies::update_companies;
+use crate::corporate::update_companies_cleanse::{companies_yahoo_cleansed_low_profile, companies_yahoo_cleansed_no_data};
+use crate::corporate::update_companies_enrich::{enrich_companies_with_events, enrich_companies_with_chart, enrich_companies_with_option};
+use crate::corporate::collect_exchanges::collect_and_save_exchanges;
+use crate::economic::yahoo_update_forex::collect_fx_rates;
 use crate::util::directories::DataPaths;
 use crate::util::logger;
 use crate::scraper::webdriver::ChromeDriverPool;
+use crate::scraper::yahoo::{YahooClientPool};
+use crate::scraper::openfigi::load_figi_type_lists;

-use chrono::Local;
-use std::collections::{HashMap};
+use std::result::Result::Ok;
 use std::sync::Arc;
+use std::sync::atomic::{AtomicBool};

-/// Main function: Full update for all companies (LEI-based) with optimized parallel execution.
-///
-/// This function coordinates the entire update process:
-/// - Loads GLEIF mappings
-/// - Builds FIGI-LEI map
-/// - Loads existing events
-/// - Processes each company: discovers exchanges via FIGI, fetches prices & earnings, aggregates data
-/// - Uses the provided shared ChromeDriver pool for efficient parallel scraping
-/// - Saves optimized events
-///
-/// # Arguments
-/// * `config` - The application configuration.
-/// * `pool` - Shared pool of ChromeDriver instances for scraping.
-///
-/// # Errors
-/// Returns an error if any step in the update process fails.
-pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
-    let msg = "=== Starting LEI-based corporate full update ===";
-    println!("{}", msg);
-    logger::log_info(msg).await;
+/// Main corporate update entry point with shutdown awareness
+pub async fn run_full_update(
+    config: &Config, 
+    pool: &Arc<ChromeDriverPool>,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<()> {
+    logger::log_info("=== Corporate Update ===").await;

-    // Initialize paths
    let paths = DataPaths::new(".")?;

-    // 1. Load fresh GLEIF ISIN ↔ LEI mapping
-    logger::log_info("Corporate Update: Loading GLEIF ISIN ↔ LEI mapping...").await;
-    let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
-        Ok(map) => {
-            let msg = format!("Corporate Update: Loaded GLEIF mapping with {} LEI entries", map.len());
-            println!("{}", msg);
-            logger::log_info(&msg).await;
-            map
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 1: Downloading GLEIF CSV...").await;
+    let gleif_csv_path = match download_isin_lei_csv().await? {
+        Some(p) => {
+            logger::log_info(&format!("  ✓ GLEIF CSV at: {}", p)).await;
+            p
        }
-        Err(e) => {
-            let msg = format!("Corporate Update: Warning - Could not load GLEIF ISIN↔LEI mapping: {}", e);
-            eprintln!("{}", msg);
-            logger::log_warn(&msg).await;
-            HashMap::new()
+        None => {
+            logger::log_warn("  ✗ Could not obtain GLEIF CSV").await;
+            return Ok(());
        }
    };

-    // 2. Load OpenFIGI mapping value lists (cached)
-    logger::log_info("Corporate Update: Loading OpenFIGI type lists...").await;
-    if let Err(e) = load_figi_type_lists().await {
-        let msg = format!("Corporate Update: Warning - Could not load OpenFIGI type lists: {}", e);
-        eprintln!("{}", msg);
-        logger::log_warn(&msg).await;
-    }
-    logger::log_info("Corporate Update: OpenFIGI type lists loaded").await;
+    check_shutdown!(shutdown_flag);

-    // 3. Build FIGI → LEI map
-    logger::log_info("Corporate Update: Building FIGI → LEI map...").await;
-    let figi_to_lei:HashMap<String, Vec<FigiInfo>> = match build_lei_to_figi_infos(&lei_to_isins, None).await {
-        Ok(map) => {
-            let msg = format!("Corporate Update: Built FIGI map with {} entries", map.len());
-            println!("{}", msg);
-            logger::log_info(&msg).await;
-            map
-        }
-        Err(e) => {
-            let msg = format!("Corporate Update: Warning - Could not build FIGI→LEI map: {}", e);
-            eprintln!("{}", msg);
-            logger::log_warn(&msg).await;
-            HashMap::new()
-        }
-    };
+    logger::log_info("Step 2: Loading OpenFIGI metadata...").await;
+    load_figi_type_lists(&paths).await.ok();
+    logger::log_info("  ✓ OpenFIGI metadata loaded").await;

-    // 4. Load or build companies
-    logger::log_info("Corporate Update: Loading/building company securities...").await;
-    let securities = load_or_build_all_securities(&figi_to_lei).await?;
-    let msg = format!("Corporate Update: Processing {} companies", securities.0.len());
-    println!("{}", msg);
-    logger::log_info(&msg).await;
+    check_shutdown!(shutdown_flag);

-    // HashMap<Name, HashMap<ISIN, Ticker>> - unique pairs only
-    let companies: HashMap<String, HashMap<String, String>> = securities.0
-        .iter()
-        .fold(HashMap::new(), |mut acc, security| {
-            let mut isin_ticker_pairs: HashMap<String, String> = HashMap::new();
-            
-            // Collect all unique ISIN-Ticker pairs
-            for figi_infos in security.1.securities.values() {
-                for figi_info in figi_infos {
-                    if !figi_info.isin.is_empty() && !figi_info.ticker.is_empty() {
-                        isin_ticker_pairs.insert(figi_info.isin.clone(), figi_info.ticker.clone());
-                    }
-                }
-            }
-            
-            // Only add if there are pairs
-            if !isin_ticker_pairs.is_empty() {
-                acc.insert(security.1.name.clone(), isin_ticker_pairs);
-            }
-            acc
-        });
+    logger::log_info("Step 3: Checking LEI-FIGI mapping status...").await;
+    let all_mapped = update_lei_mapping(&paths, &gleif_csv_path, None).await?;
    
-    logger::log_info(&format!("Corporate Update: Saving {} companies to JSONL", companies.len())).await;
-    save_companies_to_jsonl(&paths, &companies).await.expect("Failed to save companies List.");
-    logger::log_info("Corporate Update: Companies saved successfully").await;
+    if !all_mapped {
+        logger::log_warn("  ⚠ Some LEIs failed to map - continuing with partial data").await;
+    } else {
+        logger::log_info("  ✓ All LEIs successfully mapped").await;
+    }

-    // 5. Load existing earnings events (for change detection)
-    logger::log_info("Corporate Update: Loading existing events...").await;
-    let existing_events = match load_existing_events(&paths).await {
-        Ok(events) => {
-            let msg = format!("Corporate Update: Loaded {} existing events", events.len());
-            println!("{}", msg);
-            logger::log_info(&msg).await;
-            events
-        }
-        Err(e) => {
-            let msg = format!("Corporate Update: Warning - Could not load existing events: {}", e);
-            eprintln!("{}", msg);
-            logger::log_warn(&msg).await;
-            HashMap::new()
-        }
-    };
+    check_shutdown!(shutdown_flag);

-    // 5. Use the provided pool (no need to create a new one)
-    let pool_size = pool.get_number_of_instances(); // Use the size from the shared pool
-    logger::log_info(&format!("Corporate Update: Using pool size: {}", pool_size)).await;
+    logger::log_info("Step 4: Building securities map (streaming)...").await;
+    update_securities(&paths).await?;
+    logger::log_info("  ✓ Securities map updated").await;

-    // Process companies in parallel using the shared pool
-    /*let results: Vec<_> = stream::iter(companies.into_iter())
-        .map(|company| {
-            let pool_clone = pool.clone();
-            async move {
-                process_company_data(&company, &pool_clone, &mut existing_events).await
-            }
-        })
-        .buffer_unordered(pool_size)
-        .collect().await;
+    let paths = DataPaths::new(".")?;

-    // Handle results (e.g., collect changes)
-    let mut all_changes = Vec::new();
-    for result in results {
-        if let Ok(ProcessResult { changes }) = result {
-            all_changes.extend(changes);
-        }
-    }*/
+    check_shutdown!(shutdown_flag);

-    logger::log_info(&format!("Corporate Update: Saving {} events to optimized storage", existing_events.len())).await;
-    save_optimized_events(&paths, existing_events).await?;
-    logger::log_info("Corporate Update: Events saved successfully").await;
-    //save_changes(&all_changes).await?;
+    logger::log_info("Step 5: Building companies.jsonl with Yahoo Data...").await;
+    let count = update_companies(&paths, pool, shutdown_flag, config, &None).await?;
+    logger::log_info(&format!("  ✓ Saved {} companies", count)).await;

-    let msg = "✓ Corporate update complete";
-    println!("{}", msg);
-    logger::log_info(msg).await;
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 6: Cleansing companies with missing essential data...").await;
+    let cleansed_count = companies_yahoo_cleansed_no_data(&paths).await?;
+    logger::log_info(&format!("  ✓ {} companies found on Yahoo ready for further use in companies_yahoo.jsonl", cleansed_count)).await;
+
+    check_shutdown!(shutdown_flag);
+
+    let proxy_pool = pool.get_proxy_pool()
+        .ok_or_else(|| anyhow::anyhow!("ChromeDriverPool must be created with VPN proxy rotation enabled"))?;
+
+    logger::log_info("Creating YahooClientPool with proxy rotation...").await;
+    let yahoo_pool = Arc::new(YahooClientPool::new(proxy_pool, config, None).await?);
+    logger::log_info(&format!("✓ YahooClientPool ready with {} clients", yahoo_pool.num_clients().await)).await;
+
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 7: Cleansing companies with too low profile (with abort-safe persistence)...").await;
+    let cleansed_count = companies_yahoo_cleansed_low_profile(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
+    logger::log_info(&format!("  ✓ {} companies with sufficient profile ready for analytics", cleansed_count)).await;
+
+    check_shutdown!(shutdown_flag);
+    
+    logger::log_info("Step 8: Enriching companies with Yahoo Events (with abort-safe persistence)...").await;
+    let enriched_count = enrich_companies_with_events(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
+    logger::log_info(&format!("  ✓ {} companies enriched with event data", enriched_count)).await;
+
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 9: Enriching companies with Yahoo Options (with abort-safe persistence)...").await;
+    let options_count = enrich_companies_with_option(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
+    logger::log_info(&format!("  ✓ {} companies enriched with options data", options_count)).await;
+
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 10: Enriching companies with Yahoo Chart (with abort-safe persistence)...").await;
+    let chart_count = enrich_companies_with_chart(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
+    logger::log_info(&format!("  ✓ {} companies enriched with chart data", chart_count)).await;
+
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 11: Collecting FX rates...").await;
+    let fx_count = collect_fx_rates(&paths, config, yahoo_pool.clone(), shutdown_flag).await?;
+    logger::log_info(&format!("  ✓ Collected {} FX rates", fx_count)).await;
+
+    check_shutdown!(shutdown_flag);
+
+    logger::log_info("Step 12: Collecting exchange information...").await;
+    let exchange_count = collect_and_save_exchanges(&paths).await?;
+    logger::log_info(&format!("  ✓ Collected {} exchanges", exchange_count)).await;
+
+    logger::log_info("=== Corporate update complete === ").await;
    Ok(())
 }

-pub struct ProcessResult {
-    pub changes: Vec<CompanyEventChange>,
-}
-
-pub fn process_batch(
-    new_events: &[CompanyEvent],
-    existing: &mut HashMap<String, CompanyEvent>,
-    today: &str,
-) -> ProcessResult {
-    let mut changes = Vec::new();
-
-    for new in new_events {
-        let key = event_key(new);
-
-        if let Some(old) = existing.get(&key) {
-            changes.extend(detect_changes(old, new, today));
-            existing.insert(key, new.clone());
-            continue;
-        }
-
-        // Check for time change on same date
-        let date_key = format!("{}|{}", new.ticker, new.date);
-        let mut found_old = None;
-        for (k, e) in existing.iter() {
-            if format!("{}|{}", e.ticker, e.date) == date_key && k != &key {
-                found_old = Some((k.clone(), e.clone()));
-                break;
-            }
-        }
-
-        if let Some((old_key, old_event)) = found_old {
-            if new.date.as_str() > today {
-                changes.push(CompanyEventChange {
-                    ticker: new.ticker.clone(),
-                    date: new.date.clone(),
-                    field_changed: "time".to_string(),
-                    old_value: old_event.time.clone(),
-                    new_value: new.time.clone(),
-                    detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
-                });
-            }
-            existing.remove(&old_key);
-        }
-
-        existing.insert(key, new.clone());
-    }
-
-    ProcessResult { changes }
-}
--- a/src/corporate/update_companies.rs
+++ b/src/corporate/update_companies.rs
@@ -0,0 +1,907 @@
+// src/corporate/update_companies.rs
+use super::{types::*, yahoo_company_extraction::*, helpers::*};
+use crate::util::directories::DataPaths;
+use crate::util::integrity::{DataStage, StateManager, file_reference};
+use crate::util::logger;
+use crate::scraper::webdriver::ChromeDriverPool;
+use crate::scraper::hard_reset::perform_hard_reset;
+use crate::corporate::checkpoint_helpers;
+use crate::config::Config;
+
+use tokio::sync::mpsc;
+use tokio::io::AsyncWriteExt;
+use tokio::fs::OpenOptions;
+use tokio::time::sleep;
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::time::Duration;
+use futures::stream::{FuturesUnordered, StreamExt};
+use anyhow::{anyhow, Result};
+
+/// Represents a write command to be serialized through the log writer
+enum LogCommand {
+    Write(CompanyData),
+    Checkpoint,
+    Shutdown,
+}
+
+/// Result from processing a single company
+struct CompanyProcessResult {
+    company: CompanyData,
+    is_update: bool,
+}
+
+/// Check if a company needs Yahoo data processing
+/// Returns true if company has incomplete data (needs processing)
+fn company_needs_processing(
+    company_name: &str,
+    company_info: &CompanyData,
+    existing_companies: &HashMap<String, CompanyData>,
+) -> bool {
+    // If company not in existing data at all, definitely needs processing
+    let Some(existing_entry) = existing_companies.get(company_name) else {
+        return true;
+    };
+    
+    // Collect all ISINs this company should have
+    let mut required_isins = std::collections::HashSet::new();
+    for figi_infos in company_info.securities.values() {
+        for figi_info in figi_infos {
+            if !figi_info.isin.is_empty() {
+                required_isins.insert(figi_info.isin.clone());
+            }
+        }
+    }
+    
+    // Check each required ISIN
+    for isin in required_isins {
+        // Check if this ISIN exists in the company's ticker map
+        if let Some(map) = &existing_entry.isin_tickers_map {
+            if let Some(tickers) = map.get(&isin) {
+                // Check if this ISIN has valid Yahoo data
+                let has_valid_yahoo = tickers.iter().any(|t| {
+                    t.starts_with("YAHOO:") && 
+                    t != "YAHOO:ERROR" //&&  // Error marker means needs retry
+                    //t != "YAHOO:NO_RESULTS"  // This is actually valid (legitimately not found)
+                });
+                
+                // If no valid Yahoo data for this ISIN, company needs processing
+                if !has_valid_yahoo {
+                    return true;
+                }
+            } else {
+                // ISIN not in map at all, needs processing
+                return true;
+            }
+        } else {
+            // No isin_tickers_map at all, needs processing
+            return true;
+        }
+    }
+    
+    // All ISINs have valid Yahoo data, skip this company
+    false
+}
+
+/// Abort-safe incremental JSONL persistence with proper hard reset handling
+pub async fn update_companies(
+    paths: &DataPaths, 
+    pool: &Arc<ChromeDriverPool>,
+    shutdown_flag: &Arc<AtomicBool>,
+    config: &Config,
+    monitoring: &Option<crate::monitoring::MonitoringHandle>,
+) -> anyhow::Result<usize> {
+    // Configuration constants
+    const CHECKPOINT_INTERVAL: usize = 50;
+    const FSYNC_BATCH_SIZE: usize = 10;
+    const FSYNC_INTERVAL_SECS: u64 = 10;
+    const CONCURRENCY_LIMIT: usize = 100;
+    
+    // Wrap pool in mutex for potential replacement
+    let pool_mutex = Arc::new(tokio::sync::Mutex::new(Arc::clone(pool)));
+    
+    // Synchronization for hard reset
+    let reset_in_progress = Arc::new(tokio::sync::Mutex::new(false));
+        
+    let securities_path = paths.figi_securities_dir();
+    let securities_checkpoint = securities_path.join("common_stocks.jsonl");
+    let securities_log = securities_path.join("common_stocks.log.jsonl");
+    
+    if !securities_checkpoint.exists() {
+        logger::log_warn("No common_stocks.jsonl found").await;
+        return Ok(0);
+    }
+    
+    // Load securities from checkpoint and replay log
+    logger::log_info("Loading common stocks from JSONL checkpoint and log...").await;
+    let securities = load_securities_from_jsonl(&securities_checkpoint, &securities_log).await?;
+    logger::log_info(&format!("Loaded {} companies from common stocks", securities.len())).await;
+    
+    let companies_path = paths.data_dir().join("companies.jsonl");
+    let log_path = paths.data_dir().join("companies_updates.log");
+    
+    if let Some(parent) = companies_path.parent() {
+        tokio::fs::create_dir_all(parent).await?;
+    }
+
+    let manager = StateManager::new(paths.integrity_dir()).await?;
+    let content_reference = file_reference(&companies_path);
+    let step_name = "corporate_companies_update";
+    let data_stage = DataStage::Data;
+
+    if manager.is_step_valid(step_name).await? {
+        logger::log_info("  Companies data already built and valid").await;
+        return Ok(securities.len());
+    }
+    logger::log_info("  Companies data incomplete or missing, proceeding with update").await;
+    let entry: crate::util::integrity::StateEntry = manager.create_entry(step_name.to_string(), content_reference, data_stage).await?;
+
+    // === RECOVERY PHASE: Load checkpoint + replay log ===
+    let existing_companies = checkpoint_helpers::load_checkpoint_with_log(
+        &companies_path,
+        &log_path,
+        "companies.jsonl"
+    ).await?;
+    
+    // === SETUP LOG WRITER TASK ===
+    let (write_tx, mut write_rx) = mpsc::channel::<LogCommand>(1000);
+    
+    let log_file_init = OpenOptions::new()
+        .create(true)
+        .append(true)
+        .open(&log_path)
+        .await?;
+    
+    let companies_path_clone = companies_path.clone();
+    let log_path_clone = log_path.clone();
+    let existing_companies_writer = Arc::new(tokio::sync::Mutex::new(existing_companies.clone()));
+    
+    // Clone the Arc for the writer task (Arc clone is cheap, just increments ref count)
+    let existing_companies_writer_for_task = Arc::clone(&existing_companies_writer);
+    
+    let write_tx_for_writer = write_tx.clone();
+    let writer_task = tokio::spawn(async move {
+        let mut log_file = log_file_init;
+        let mut writes_since_fsync = 0;
+        let mut last_fsync = std::time::Instant::now();
+        let mut updates_since_checkpoint = 0;
+        let mut count = 0;
+        let mut new_count = 0;
+        let mut updated_count = 0;
+        
+        while let Some(cmd) = write_rx.recv().await {
+            match cmd {
+                LogCommand::Write(company) => {
+                    // Write to log
+                    let line = serde_json::to_string(&company).unwrap();
+                    if let Err(e) = log_file.write_all(line.as_bytes()).await {
+                        logger::log_error(&format!("Failed to write to log: {}", e)).await;
+                        break;
+                    }
+                    if let Err(e) = log_file.write_all(b"\n").await {
+                        logger::log_error(&format!("Failed to write newline: {}", e)).await;
+                        break;
+                    }
+                    
+                    writes_since_fsync += 1;
+                    updates_since_checkpoint += 1;
+                    count += 1;
+                    
+                    // Update in-memory state
+                    let mut existing_companies = existing_companies_writer_for_task.lock().await;
+                    let is_update = existing_companies.contains_key(&company.name);
+                    existing_companies.insert(company.name.clone(), company);
+                    drop(existing_companies);
+                    
+                    if is_update {
+                        updated_count += 1;
+                    } else {
+                        new_count += 1;
+                    }
+                    
+                    // Batched + time-based fsync
+                    let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE 
+                        || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
+                    
+                    if should_fsync {
+                        if let Err(e) = log_file.flush().await {
+                            logger::log_error(&format!("Failed to flush: {}", e)).await;
+                            break;
+                        }
+                        if let Err(e) = log_file.sync_data().await {
+                            logger::log_error(&format!("Failed to fsync: {}", e)).await;
+                            break;
+                        }
+                        writes_since_fsync = 0;
+                        last_fsync = std::time::Instant::now();
+                    }
+                }
+                LogCommand::Checkpoint => {
+                    if let Err(e) = log_file.flush().await {
+                        logger::log_error(&format!("Failed to flush before checkpoint: {}", e)).await;
+                        break;
+                    }
+                    if let Err(e) = log_file.sync_data().await {
+                        logger::log_error(&format!("Failed to fsync before checkpoint: {}", e)).await;
+                        break;
+                    }
+                    
+                    let existing_companies = existing_companies_writer_for_task.lock().await;
+                    let companies_vec: Vec<_> = existing_companies.values().cloned().collect();
+                    drop(existing_companies);
+                    
+                    let temp_path = companies_path_clone.with_extension("tmp");
+                    match tokio::fs::File::create(&temp_path).await {
+                        Ok(mut temp_file) => {
+                            let mut checkpoint_ok = true;
+                            for company in &companies_vec {
+                                if let Ok(line) = serde_json::to_string(company) {
+                                    if temp_file.write_all(line.as_bytes()).await.is_err() ||
+                                       temp_file.write_all(b"\n").await.is_err() {
+                                        checkpoint_ok = false;
+                                        break;
+                                    }
+                                }
+                            }
+                            
+                            if checkpoint_ok {
+                                if temp_file.flush().await.is_ok() && 
+                                   temp_file.sync_data().await.is_ok() {
+                                    drop(temp_file);
+                                    
+                                    if tokio::fs::rename(&temp_path, &companies_path_clone).await.is_ok() {
+                                        if tokio::fs::remove_file(&log_path_clone).await.is_ok() {
+                                            logger::log_info(&format!(
+                                                "✓ Checkpoint created ({} companies), log cleared",
+                                                companies_vec.len()
+                                            )).await;
+                                            
+                                            if let Ok(new_log) = OpenOptions::new()
+                                                .create(true)
+                                                .append(true)
+                                                .open(&log_path_clone)
+                                                .await {
+                                                log_file = new_log;
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        Err(e) => {
+                            logger::log_error(&format!("Failed to create checkpoint temp file: {}", e)).await;
+                        }
+                    }
+                    updates_since_checkpoint = 0;
+                }
+                LogCommand::Shutdown => {
+                    logger::log_info("Writer shutting down...").await;
+                    break;
+                }
+            }
+            
+            // Periodic checkpoint trigger
+            if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
+                let _ = write_tx.send(LogCommand::Checkpoint).await;
+            }
+        }
+        
+        // Final fsync
+        let _ = log_file.flush().await;
+        let _ = log_file.sync_data().await;
+        
+        logger::log_info(&format!(
+            "Writer finished: {} total ({} new, {} updated)",
+            count, new_count, updated_count
+        )).await;
+        
+        (count, new_count, updated_count)
+    });
+    
+    // === MAIN PROCESSING LOOP ===
+    let total = securities.len();
+    logger::log_info(&format!("Processing {} companies with concurrency limit {}", total, CONCURRENCY_LIMIT)).await;
+    
+    let mut tasks = FuturesUnordered::new();
+    
+    // Build initial pending list with proper filtering
+    let mut pending: Vec<(String, CompanyData)> = securities.iter()
+        .filter(|(name, info)| company_needs_processing(name, info, &existing_companies))
+        .map(|(name, info)| (name.clone(), info.clone()))
+        .collect();
+    
+    logger::log_info(&format!(
+        "Initial scan: {} companies need processing ({} already complete)",
+        pending.len(),
+        total - pending.len()
+    )).await;
+    
+    let mut processed = 0;
+    let mut hard_reset_count = 0;
+    
+    // Spawn initial batch
+    for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
+        if let Some((name, company_info)) = pending.pop() {
+            let current_pool = {
+                let pool_guard = pool_mutex.lock().await;
+                Arc::clone(&*pool_guard)
+            };
+            
+            let existing = existing_companies.get(&name).cloned();
+            let shutdown_flag_clone = Arc::clone(shutdown_flag);
+            
+            let task = tokio::spawn(async move {
+                process_single_company_validated(
+                    name,
+                    company_info,
+                    existing,
+                    &current_pool,
+                    &shutdown_flag_clone,
+                ).await
+            });
+            
+            tasks.push(task);
+        }
+    }
+    
+    // Process results and spawn new tasks
+    while let Some(task_result) = tasks.next().await {
+        // Check for shutdown
+        if shutdown_flag.load(Ordering::SeqCst) {
+            logger::log_warn("Shutdown signal received, stopping processing").await;
+            break;
+        }
+        
+        match task_result {
+            Ok(Ok(Some(result))) => {
+                // Success: send to writer
+                let _ = write_tx_for_writer.send(LogCommand::Write(result.company)).await;
+                processed += 1;
+                
+                // Log progress every 100 companies
+                if processed % 100 == 0 {
+                    logger::log_info(&format!(
+                        "Progress: {}/{} companies processed ({} resets)", 
+                        processed, 
+                        total,
+                        hard_reset_count
+                    )).await;
+                }
+                
+                // Spawn next task if available
+                if let Some((name, company_info)) = pending.pop() {
+                    let current_pool = {
+                        let pool_guard = pool_mutex.lock().await;
+                        Arc::clone(&*pool_guard)
+                    };
+                    
+                    let existing = existing_companies.get(&name).cloned();
+                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
+                    
+                    let task = tokio::spawn(async move {
+                        process_single_company_validated(
+                            name,
+                            company_info,
+                            existing,
+                            &current_pool,
+                            &shutdown_flag_clone,
+                        ).await
+                    });
+                    
+                    tasks.push(task);
+                }
+            }
+            Ok(Ok(None)) => {
+                // No result (shutdown or skip)
+                processed += 1;
+                
+                if let Some((name, company_info)) = pending.pop() {
+                    let current_pool = {
+                        let pool_guard = pool_mutex.lock().await;
+                        Arc::clone(&*pool_guard)
+                    };
+                    
+                    let existing = existing_companies.get(&name).cloned();
+                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
+                    
+                    let task = tokio::spawn(async move {
+                        process_single_company_validated(
+                            name,
+                            company_info,
+                            existing,
+                            &current_pool,
+                            &shutdown_flag_clone,
+                        ).await
+                    });
+                    
+                    tasks.push(task);
+                }
+            }
+            Ok(Err(e)) => {
+                let error_msg = e.to_string();
+                
+                if error_msg.contains("HARD_RESET_REQUIRED") {
+                    // Check if reset already in progress (race condition protection)
+                    let mut reset_lock = reset_in_progress.lock().await;
+                    if *reset_lock {
+                        logger::log_info("Hard reset already in progress, skipping duplicate").await;
+                        processed += 1;
+                        continue;
+                    }
+                    *reset_lock = true;
+                    drop(reset_lock);  // Release lock during reset
+                    
+                    logger::log_error("🔴 HARD RESET THRESHOLD REACHED - INITIATING RESET SEQUENCE").await;
+                    logger::log_warn("Draining active tasks before hard reset...").await;
+                    
+                    // Save remaining pending count
+                    let remaining_count = pending.len();
+                    
+                    // Stop spawning new tasks
+                    pending.clear();
+                    
+                    // Wait for all active tasks to complete
+                    let mut drained = 0;
+                    while let Some(_) = tasks.next().await {
+                        drained += 1;
+                        if drained % 10 == 0 {
+                            logger::log_info(&format!("Drained {} tasks...", drained)).await;
+                        }
+                    }
+                    
+                    logger::log_info(&format!(
+                        "All tasks drained ({} active). {} companies need reprocessing.", 
+                        drained,
+                        remaining_count
+                    )).await;
+                    
+                    // Perform the actual hard reset
+                    match perform_hard_reset(&pool_mutex, config, paths, monitoring, shutdown_flag).await {
+                        Ok(()) => {
+                            logger::log_info("✅ Hard reset completed successfully").await;
+                            hard_reset_count += 1;
+                            
+                            // Reset the error counter
+                            {
+                                let pool_guard = pool_mutex.lock().await;
+                                let current_pool = Arc::clone(&*pool_guard);
+                                current_pool.get_reset_controller().reset();
+                            }
+                            logger::log_info("✓ Error counter cleared").await;
+                            
+                            // Rebuild pending list by checking which companies need processing
+                            logger::log_info("Rebuilding pending queue with proper Yahoo data checks...").await;
+                            
+                            // Get current state of written companies
+                            let current_existing = {
+                                let companies = existing_companies_writer.lock().await;
+                                companies.clone()
+                            };
+                            
+                            // Reload all securities from disk (checkpoint + log)
+                            logger::log_info("Reloading securities from JSONL...").await;
+                            let all_securities = load_securities_from_jsonl(&securities_checkpoint, &securities_log).await?;
+                            logger::log_info(&format!("Reloaded {} companies", all_securities.len())).await;
+                            
+                            // Build pending list: only companies that need processing
+                            pending = all_securities.iter()
+                                .filter(|(name, info)| company_needs_processing(name, info, &current_existing))
+                                .map(|(name, info)| (name.clone(), info.clone()))
+                                .collect();
+                            
+                            logger::log_info(&format!(
+                                "Restarting with {} remaining companies (out of {} total)", 
+                                pending.len(),
+                                total
+                            )).await;
+                            
+                            // Only continue if there's work to do
+                            if pending.is_empty() {
+                                logger::log_info("All companies have complete data, exiting").await;
+                                
+                                // Clear reset flag
+                                let mut reset_lock = reset_in_progress.lock().await;
+                                *reset_lock = false;
+                                drop(reset_lock);
+                                
+                                break;  // Exit main loop
+                            }
+                            
+                            // Respawn initial batch with NEW pool
+                            for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
+                                if let Some((name, company_info)) = pending.pop() {
+                                    let current_pool = {
+                                        let pool_guard = pool_mutex.lock().await;
+                                        Arc::clone(&*pool_guard)
+                                    };
+                                    
+                                    let existing = existing_companies.get(&name).cloned();
+                                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
+                                    
+                                    let task = tokio::spawn(async move {
+                                        process_single_company_validated(
+                                            name,
+                                            company_info,
+                                            existing,
+                                            &current_pool,
+                                            &shutdown_flag_clone,
+                                        ).await
+                                    });
+                                    
+                                    tasks.push(task);
+                                }
+                            }
+                            
+                            // Clear reset flag
+                            let mut reset_lock = reset_in_progress.lock().await;
+                            *reset_lock = false;
+                            drop(reset_lock);
+                            
+                            // ✅ Continue processing (don't spawn duplicate task)
+                            continue;
+                        }
+                        Err(reset_err) => {
+                            logger::log_error(&format!("Hard reset failed: {}", reset_err)).await;
+                            
+                            // Clear reset flag
+                            let mut reset_lock = reset_in_progress.lock().await;
+                            *reset_lock = false;
+                            drop(reset_lock);
+                            
+                            // Exit if hard reset fails
+                            break;
+                        }
+                    }
+                } else {
+                    // Regular error
+                    logger::log_warn(&format!("Company processing error: {}", error_msg)).await;
+                    processed += 1;
+                    
+                    // Spawn next task
+                    if let Some((name, company_info)) = pending.pop() {
+                        let current_pool = {
+                            let pool_guard = pool_mutex.lock().await;
+                            Arc::clone(&*pool_guard)
+                        };
+                        
+                        let existing = existing_companies.get(&name).cloned();
+                        let shutdown_flag_clone = Arc::clone(shutdown_flag);
+                        
+                        let task = tokio::spawn(async move {
+                            process_single_company_validated(
+                                name,
+                                company_info,
+                                existing,
+                                &current_pool,
+                                &shutdown_flag_clone,
+                            ).await
+                        });
+                        
+                        tasks.push(task);
+                    }
+                }
+            }
+            Err(e) => {
+                // Task panic
+                logger::log_error(&format!("Task panic: {}", e)).await;
+                processed += 1;
+                
+                // Spawn next task
+                if let Some((name, company_info)) = pending.pop() {
+                    let current_pool = {
+                        let pool_guard = pool_mutex.lock().await;
+                        Arc::clone(&*pool_guard)
+                    };
+                    
+                    let existing = existing_companies.get(&name).cloned();
+                    let shutdown_flag_clone = Arc::clone(shutdown_flag);
+                    
+                    let task = tokio::spawn(async move {
+                        process_single_company_validated(
+                            name,
+                            company_info,
+                            existing,
+                            &current_pool,
+                            &shutdown_flag_clone,
+                        ).await
+                    });
+                    
+                    tasks.push(task);
+                }
+            }
+        }
+    }
+    
+    logger::log_info("Main processing loop completed").await;
+    
+    // Signal writer to finish
+    let _ = write_tx_for_writer.send(LogCommand::Checkpoint).await;
+    let _ = write_tx_for_writer.send(LogCommand::Shutdown).await;
+    drop(write_tx_for_writer);
+    
+    // Wait for writer to finish
+    let (final_count, final_new, final_updated) = writer_task.await
+        .unwrap_or((0, 0, 0));
+    
+    logger::log_info(&format!(
+        "✅ Completed: {} total companies ({} new, {} updated, {} hard resets)", 
+        final_count, final_new, final_updated, hard_reset_count
+    )).await;
+
+    // Track completion with:
+    // - Content reference: All output JSONL files
+    // - Data stage: Data (7-day TTL) - Securities data relatively stable
+    // - Dependencies: LEI-FIGI mapping must be valid
+    
+    // Check for shutdown BEFORE marking complete
+    if shutdown_flag.load(Ordering::SeqCst) {
+        logger::log_warn("Shutdown detected during company update - marking as invalid for retry").await;
+        manager.mark_invalid(
+            entry,
+            format!("Invalid: processed {} of {} companies before shutdown", final_count, total),
+        ).await?;
+    } else {
+        // Only mark complete if we got here without shutdown
+        manager.mark_valid(entry).await?;
+    }
+    
+    Ok(final_count)
+}
+
+/// Loads CompanyInfo securities from checkpoint and log JSONL files
+async fn load_securities_from_jsonl(
+    checkpoint_path: &std::path::Path,
+    log_path: &std::path::Path,
+) -> anyhow::Result<HashMap<String, CompanyData>> {
+    let mut securities: HashMap<String, CompanyData> = HashMap::new();
+    
+    // Load checkpoint
+    if checkpoint_path.exists() {
+        let content = tokio::fs::read_to_string(checkpoint_path).await?;
+        
+        for (line_num, line) in content.lines().enumerate() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<CompanyData>(line) {
+                Ok(company_info) => {
+                    securities.insert(company_info.name.clone(), company_info);
+                }
+                Err(e) => {
+                    logger::log_warn(&format!(
+                        "Skipping invalid line {} in checkpoint: {}",
+                        line_num + 1, e
+                    )).await;
+                }
+            }
+        }
+    }
+    
+    // Replay log (overwrites checkpoint entries if they exist)
+    if log_path.exists() {
+        let content = tokio::fs::read_to_string(log_path).await?;
+        
+        for (line_num, line) in content.lines().enumerate() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<CompanyData>(line) {
+                Ok(company_info) => {
+                    securities.insert(company_info.name.clone(), company_info);
+                }
+                Err(e) => {
+                    logger::log_warn(&format!(
+                        "Skipping invalid line {} in log: {}",
+                        line_num + 1, e
+                    )).await;
+                }
+            }
+        }
+    }
+    
+    Ok(securities)
+}
+
+/// Scrape with retry, validation, and shutdown awareness
+async fn scrape_with_retry(
+    pool: &Arc<ChromeDriverPool>,
+    isin: &str,
+    max_retries: u32,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> Result<Option<YahooCompanyData>> {
+    let mut retries = 0;
+    
+    loop {
+        // Check shutdown before each attempt
+        if shutdown_flag.load(Ordering::SeqCst) {
+            return Err(anyhow!("Aborted due to shutdown"));
+        }
+
+        if pool.should_perform_hard_reset() {
+            logger::log_error("HARD_RESET_REQUIRED detected before scrape attempt").await;
+            return Err(anyhow!("HARD_RESET_REQUIRED"));
+        }
+        
+        match scrape_company_details_by_isin(pool, isin, shutdown_flag).await {
+            Ok(result) => return Ok(result),
+            Err(e) => {
+                // Check if this is a hard reset required error
+                let error_msg = e.to_string();
+                if error_msg.contains("HARD_RESET_REQUIRED") {
+                    logger::log_error(&format!(
+                        "Hard reset required error for ISIN {}, propagating immediately",
+                        isin
+                    )).await;
+                    return Err(e);  // Propagate immediately, don't retry
+                }
+
+                if retries >= max_retries {
+                    logger::log_error(&format!(
+                        "All {} retries exhausted for ISIN {}: {}",
+                        max_retries, isin, e
+                    )).await;
+                    return Err(e);
+                }
+                
+                let backoff_ms = 1000 * 2u64.pow(retries);
+                let jitter_ms = random_range(0, 500);
+                let total_delay = backoff_ms + jitter_ms;
+                
+                logger::log_warn(&format!(
+                    "Retry {}/{} for ISIN {} after {}ms: {}",
+                    retries + 1, max_retries, isin, total_delay, e
+                )).await;
+                
+                sleep(Duration::from_millis(total_delay)).await;
+                retries += 1;
+            }
+        }
+    }
+}
+
+/// Process single company with validation and shutdown checks
+async fn process_single_company_validated(
+    name: String,
+    company_info: CompanyData,
+    existing_entry: Option<CompanyData>,
+    pool: &Arc<ChromeDriverPool>,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<Option<CompanyProcessResult>> {
+    // Check shutdown at start
+    if shutdown_flag.load(Ordering::SeqCst) {
+        logger::log_warn(&format!("Shutdown detected, skipping company: {}", name)).await;
+        return Ok(None);
+    }
+    
+    let is_update = existing_entry.is_some();
+    
+    let mut isin_tickers_map: HashMap<String, Vec<String>> = 
+        existing_entry
+            .as_ref()
+            .and_then(|e| e.isin_tickers_map.clone())
+            .unwrap_or_default();
+    
+    // Collect unique ISIN-ticker pairs
+    let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
+    
+    for figi_infos in company_info.securities.values() {
+        for figi_info in figi_infos {
+            if !figi_info.isin.is_empty() {
+                let tickers = unique_isin_ticker_pairs
+                    .entry(figi_info.isin.clone())
+                    .or_insert_with(Vec::new);
+                
+                if !figi_info.ticker.is_empty() && !tickers.contains(&figi_info.ticker) {
+                    tickers.push(figi_info.ticker.clone());
+                }
+            }
+        }
+    }
+    
+    // Process each ISIN independently with per-ISIN status checking
+    for (isin, figi_tickers) in unique_isin_ticker_pairs {
+        // Check shutdown before each ISIN
+        if shutdown_flag.load(Ordering::SeqCst) {
+            logger::log_warn(&format!(
+                "Shutdown detected while processing company: {}",
+                name
+            )).await;
+            break;
+        }
+
+        let tickers = isin_tickers_map
+            .entry(isin.clone())
+            .or_insert_with(Vec::new);
+        
+        for figi_ticker in figi_tickers {
+            if !tickers.contains(&figi_ticker) {
+                tickers.push(figi_ticker);
+            }
+        }
+        
+        // Check if THIS SPECIFIC ISIN has valid Yahoo data (not ERROR)
+        let has_valid_yahoo = tickers.iter().any(|t| {
+            t.starts_with("YAHOO:") && t != "YAHOO:ERROR"
+            // Note: YAHOO:NO_RESULTS is valid (legitimately not found)
+        });
+        
+        if !has_valid_yahoo {
+            logger::log_info(&format!("Fetching Yahoo details for {} (ISIN: {})", name, isin)).await;
+            tickers.retain(|t| !t.starts_with("YAHOO:"));
+            
+            match scrape_with_retry(pool, &isin, 3, shutdown_flag).await {
+                Ok(Some(details)) => {
+                    logger::log_info(&format!(
+                        "✓ Found Yahoo ticker {} for ISIN {} (company: {})",
+                        details.ticker, isin, name
+                    )).await;
+                    
+                    tickers.push(format!("YAHOO:{}", details.ticker));
+                },
+                Ok(None) => {
+                    logger::log_warn(&format!("◯ No search results for ISIN {} (company: {})", isin, name)).await;
+                    tickers.push("YAHOO:NO_RESULTS".to_string());
+                },
+                Err(e) => {
+                    if shutdown_flag.load(Ordering::SeqCst) {
+                        logger::log_warn(&format!("Shutdown during scrape for ISIN {}", isin)).await;
+                        break;
+                    }
+
+                    // Check if this is a hard reset required error
+                    let error_msg = e.to_string();
+                    if error_msg.contains("HARD_RESET_REQUIRED") {
+                        logger::log_error(&format!(
+                            "Hard reset required during ISIN {} processing, propagating error",
+                            isin
+                        )).await;
+                        return Err(e);  // ← CRITICAL: Propagate immediately
+                    }
+
+                    logger::log_warn(&format!(
+                        "✗ Yahoo lookup error for ISIN {} (company: {}): {}",
+                        isin, name, e
+                    )).await;
+                    
+                    // Mark this ISIN as failed to enable retry
+                    tickers.push("YAHOO:ERROR".to_string());
+                }
+            }
+        }
+    }
+    
+    // Final shutdown check before returning result
+    if shutdown_flag.load(Ordering::SeqCst) {
+        logger::log_warn(&format!(
+            "Shutdown detected, discarding incomplete result for: {}",
+            name
+        )).await;
+        return Ok(None);
+    }
+
+    if pool.should_perform_hard_reset() {
+        logger::log_error("HARD_RESET_REQUIRED detected during company processing").await;
+        return Err(anyhow!("HARD_RESET_REQUIRED"));
+    }
+
+    if !isin_tickers_map.is_empty() {
+        let company_entry = CompanyData {
+            name: name.clone(),
+            primary_isin: company_info.primary_isin.clone(),
+            securities: company_info.securities.clone(),
+            yahoo_company_data: company_info.yahoo_company_data.clone(),
+            isin_tickers_map: Some(isin_tickers_map),
+        };
+        
+        Ok(Some(CompanyProcessResult {
+            company: company_entry,
+            is_update,
+        }))
+    } else {
+        logger::log_warn(&format!("No ISINs found for company: {}", name)).await;
+        Ok(None)
+    }
+}
--- a/src/corporate/update_companies_cleanse.rs
+++ b/src/corporate/update_companies_cleanse.rs
@@ -0,0 +1,911 @@
+// src/corporate/update_companies_cleanse.rs
+use super::{helpers::*, types::*};
+use crate::config::Config;
+use crate::corporate::checkpoint_helpers;
+use crate::util::directories::DataPaths;
+use crate::util::integrity::{DataStage, StateManager, file_reference};
+use crate::util::logger;
+use crate::scraper::yahoo::{YahooClientPool, QuoteSummaryModule};
+
+use std::result::Result::Ok;
+use chrono::{Utc};
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use tokio::fs::{File, OpenOptions};
+use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
+use futures::stream::{FuturesUnordered, StreamExt};
+use tokio::sync::mpsc;
+
+/// Result of processing a single company
+#[derive(Debug, Clone)]
+pub enum CompanyProcessResult {
+    Valid(CompanyData),
+    FilteredLowCap { name: String, market_cap: f64 },
+    FilteredNoPrice { name: String },
+    Failed { company: CompanyData, error: String, is_transient: bool },
+}
+
+/// Represents a write command to be serialized through the log writer
+enum LogCommand {
+    Write(CompanyData),
+    Checkpoint,
+    Shutdown,
+}
+
+
+/// Cleansing function to remove companies with missing essential yahoo data for integrity
+pub async fn companies_yahoo_cleansed_no_data(paths: &DataPaths) -> Result<usize, anyhow::Error> {
+    let data_path = paths.data_dir();
+    
+    let input_path = data_path.join("companies.jsonl");
+    let output_path = data_path.join("companies_yahoo.jsonl");
+    
+    if !input_path.exists() {
+        logger::log_warn("companies.jsonl not found, skipping cleansing").await;
+        return Ok(0);
+    }
+
+    let manager = StateManager::new(paths.integrity_dir()).await?;
+    let step_name = "yahoo_companies_cleansed_no_data";
+    let content_reference = file_reference(&output_path);
+    
+    if manager.is_step_valid(step_name).await? {
+        let output_content = tokio::fs::read_to_string(&output_path).await?;
+        let count = output_content.lines()
+            .filter(|line| !line.trim().is_empty())
+            .count();
+        
+        logger::log_info(&format!("  ✓ Found {} companies in companies_yahoo.jsonl", count)).await;
+        return Ok(count);
+    }
+    let entry = manager.create_entry(
+        step_name.to_string(),
+        content_reference.clone(),
+        DataStage::Data,
+    ).await?;
+    
+    logger::log_info("  Cleansing companies with missing Yahoo data...").await;
+    
+    logger::log_info(&format!("  Reading from: {:?}", input_path)).await;
+    logger::log_info(&format!("  Writing to: {:?}", output_path)).await;
+    
+    let file = File::open(&input_path).await?;
+    let reader = BufReader::new(file);
+    let mut lines = reader.lines();
+    
+    let mut output_file = File::create(&output_path).await?;
+    let mut valid_count = 0;
+    let mut removed_count = 0;
+    let mut total_count = 0;
+    
+    while let Some(line) = lines.next_line().await? {
+        if line.trim().is_empty() {
+            continue;
+        }
+        
+        total_count += 1;
+        
+        let company: CompanyData = match serde_json::from_str(&line) {
+            Ok(c) => c,
+            Err(e) => {
+                logger::log_warn(&format!("  Failed to parse company on line {}: {}", total_count, e)).await;
+                continue;
+            }
+        };
+        
+        let has_valid_yahoo = company.isin_tickers_map
+            .as_ref()
+            .map(|map| {
+                map.values()
+                    .flatten()
+                    .any(|ticker| {
+                        ticker.starts_with("YAHOO:") 
+                        && ticker != "YAHOO:NO_RESULTS" 
+                        && ticker != "YAHOO:ERROR"
+                    })
+            })
+            .unwrap_or(false);
+        
+        if has_valid_yahoo {
+            let json_line = serde_json::to_string(&company)?;
+            output_file.write_all(json_line.as_bytes()).await?;
+            output_file.write_all(b"\n").await?;
+            valid_count += 1;
+        } else {
+            removed_count += 1;
+            if removed_count <= 5 {
+                logger::log_info(&format!("  Removed company '{}' (no valid Yahoo ticker)", company.name)).await;
+            }
+        }
+        
+        if total_count % 1000 == 0 {
+            logger::log_info(&format!("  Processed {} companies...", total_count)).await;
+        }
+    }
+    
+    output_file.flush().await?;
+    
+    logger::log_info(&format!(
+        "  ✓ Cleansing complete: {} total → {} valid, {} removed", 
+        total_count, valid_count, removed_count
+    )).await;
+    
+    // Track completion with:
+    // - Content reference: All event directories
+    // - Data stage: Data (7-day TTL by default)
+    // - Dependencies: Depends on cleaned companies data
+    manager.mark_valid(entry).await?;
+    
+    Ok(valid_count)
+}
+
+/// Yahoo Low Profile Cleansing WITH ABORT-SAFE INCREMENTAL PERSISTENCE
+/// 
+/// # Features
+/// - Graceful shutdown (abort-safe)
+/// - Task panic isolation (tasks fail independently)
+/// - Crash-safe persistence (checkpoint + log with fsync)
+/// - Smart skip logic (only process incomplete data)
+/// - Uses pending queue instead of retry mechanism
+/// - Reuses companies_update.log for persistence
+/// 
+/// # Persistence Strategy
+/// - Checkpoint: companies_yahoo_cleaned.jsonl (atomic state)
+/// - Log: companies_update.log (append-only updates)
+/// - On restart: Load checkpoint + replay log
+/// - Periodic checkpoints (every 50 companies)
+/// - Batched fsync (every 10 writes or 10 seconds)
+pub async fn companies_yahoo_cleansed_low_profile(
+    paths: &DataPaths,
+    _config: &Config,
+    yahoo_pool: Arc<YahooClientPool>,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<usize> {
+    // Configuration constants
+    const CHECKPOINT_INTERVAL: usize = 50;
+    const FSYNC_BATCH_SIZE: usize = 10;
+    const FSYNC_INTERVAL_SECS: u64 = 10;
+    const CONCURRENCY_LIMIT: usize = 50; // Limit parallel validation tasks
+    
+    let data_path = paths.data_dir();
+    
+    // File paths (reusing companies_update.log)
+    let input_path = data_path.join("companies_yahoo.jsonl");
+    let checkpoint_path = data_path.join("companies_yahoo_cleaned.jsonl");
+    let log_path = data_path.join("companies_updates.log");
+    
+    // Check input exists
+    if !input_path.exists() {
+        logger::log_warn("  companies_yahoo.jsonl not found, skipping low profile cleansing").await;
+        return Ok(0);
+    }
+
+    let manager = StateManager::new(paths.integrity_dir()).await?;
+    let step_name = "yahoo_companies_cleansed_low_profile";
+    let content_reference = file_reference(&checkpoint_path);
+    
+    if manager.is_step_valid(step_name).await? {
+        let checkpoint_content = tokio::fs::read_to_string(&checkpoint_path).await?;
+        let count = checkpoint_content.lines()
+            .filter(|line| !line.trim().is_empty())
+            .count();
+        
+        logger::log_info(&format!("  ✓ Found {} companies in companies_yahoo_cleaned.jsonl", count)).await;
+        return Ok(count);
+    }
+    let entry = manager.create_entry(
+        step_name.to_string(),
+        content_reference.clone(),
+        DataStage::Data,
+    ).await?;
+    
+    logger::log_info("  Cleansing companies with low Yahoo profile...").await;
+    
+    // === RECOVERY PHASE: Load checkpoint + replay log ===
+    let mut existing_companies: HashMap<String, CompanyData> = HashMap::new();
+    let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
+    
+    if checkpoint_path.exists() {
+        logger::log_info("Loading checkpoint from companies_yahoo_cleaned.jsonl...").await;
+        let checkpoint_content = tokio::fs::read_to_string(&checkpoint_path).await?;
+        
+        for line in checkpoint_content.lines() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<CompanyData>(line) {
+                Ok(company) => {
+                    processed_names.insert(company.name.clone());
+                    existing_companies.insert(company.name.clone(), company);
+                }
+                Err(e) => {
+                    logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
+                }
+            }
+        }
+        logger::log_info(&format!("Loaded checkpoint with {} companies", existing_companies.len())).await;
+    }
+    
+    if log_path.exists() {
+        logger::log_info("Replaying update log...").await;
+        let log_content = tokio::fs::read_to_string(&log_path).await?;
+        let mut replayed = 0;
+        
+        for line in log_content.lines() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<CompanyData>(line) {
+                Ok(company) => {
+                    processed_names.insert(company.name.clone());
+                    existing_companies.insert(company.name.clone(), company);
+                    replayed += 1;
+                }
+                Err(e) => {
+                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
+                }
+            }
+        }
+        if replayed > 0 {
+            logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
+        }
+    }
+    
+    // === LOAD INPUT COMPANIES ===
+    logger::log_info(&format!("Loading companies from: {:?}", input_path)).await;
+    let input_companies = load_companies_from_jsonl(&input_path).await?;
+    logger::log_info(&format!("Loaded {} companies from input", input_companies.len())).await;
+    
+    // === BUILD PENDING LIST (smart skip logic) ===
+    let mut pending: Vec<CompanyData> = input_companies
+        .into_iter()
+        .filter(|company| company_needs_processing(company, &existing_companies))
+        .collect();
+    
+    logger::log_info(&format!(
+        "Initial scan: {} companies need processing ({} already complete)",
+        pending.len(),
+        existing_companies.len()
+    )).await;
+    
+    // === CONSOLIDATE LOG BEFORE EARLY EXIT ===
+    if pending.is_empty() {
+        logger::log_info("  ✓ All companies already processed").await;
+        
+        // Consolidate log into checkpoint before exiting
+        if checkpoint_helpers::log_has_content(&log_path).await {
+            checkpoint_helpers::consolidate_checkpoint(&checkpoint_path, &log_path, &existing_companies).await?;
+        }
+        
+        return Ok(existing_companies.len());
+    }
+    
+    // === SETUP LOG WRITER TASK ===
+    let (write_tx, mut write_rx) = mpsc::channel::<LogCommand>(1000);
+    
+    let log_file_init = OpenOptions::new()
+        .create(true)
+        .append(true)
+        .open(&log_path)
+        .await?;
+    
+    let checkpoint_path_clone = checkpoint_path.clone();
+    let log_path_clone = log_path.clone();
+    let existing_companies_writer = Arc::new(tokio::sync::Mutex::new(existing_companies.clone()));
+    let existing_companies_writer_for_task = Arc::clone(&existing_companies_writer);
+    
+    let write_tx_for_writer = write_tx.clone();
+    let writer_task = tokio::spawn(async move {
+        let mut log_file = log_file_init;
+        let mut writes_since_fsync = 0;
+        let mut last_fsync = std::time::Instant::now();
+        let mut updates_since_checkpoint = 0;
+        let mut count = 0;
+        let mut new_count = 0;
+        let mut updated_count = 0;
+        
+        while let Some(cmd) = write_rx.recv().await {
+            match cmd {
+                LogCommand::Write(company) => {
+                    // Write to log
+                    let line = serde_json::to_string(&company).unwrap();
+                    if let Err(e) = log_file.write_all(line.as_bytes()).await {
+                        logger::log_error(&format!("Failed to write to log: {}", e)).await;
+                        break;
+                    }
+                    if let Err(e) = log_file.write_all(b"\n").await {
+                        logger::log_error(&format!("Failed to write newline: {}", e)).await;
+                        break;
+                    }
+                    
+                    writes_since_fsync += 1;
+                    updates_since_checkpoint += 1;
+                    count += 1;
+                    
+                    // Update in-memory state
+                    let mut existing_companies = existing_companies_writer_for_task.lock().await;
+                    let is_update = existing_companies.contains_key(&company.name);
+                    existing_companies.insert(company.name.clone(), company);
+                    drop(existing_companies);
+                    
+                    if is_update {
+                        updated_count += 1;
+                    } else {
+                        new_count += 1;
+                    }
+                    
+                    // Batched + time-based fsync
+                    let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE 
+                        || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
+                    
+                    if should_fsync {
+                        if let Err(e) = log_file.flush().await {
+                            logger::log_error(&format!("Failed to flush: {}", e)).await;
+                            break;
+                        }
+                        if let Err(e) = log_file.sync_data().await {
+                            logger::log_error(&format!("Failed to fsync: {}", e)).await;
+                            break;
+                        }
+                        writes_since_fsync = 0;
+                        last_fsync = std::time::Instant::now();
+                    }
+                }
+                LogCommand::Checkpoint => {
+                    if let Err(e) = log_file.flush().await {
+                        logger::log_error(&format!("Failed to flush before checkpoint: {}", e)).await;
+                        break;
+                    }
+                    if let Err(e) = log_file.sync_data().await {
+                        logger::log_error(&format!("Failed to fsync before checkpoint: {}", e)).await;
+                        break;
+                    }
+                    
+                    let existing_companies = existing_companies_writer_for_task.lock().await;
+                    let companies_vec: Vec<_> = existing_companies.values().cloned().collect();
+                    drop(existing_companies);
+                    
+                    let temp_path = checkpoint_path_clone.with_extension("tmp");
+                    match tokio::fs::File::create(&temp_path).await {
+                        Ok(mut temp_file) => {
+                            let mut checkpoint_ok = true;
+                            for company in &companies_vec {
+                                if let Ok(line) = serde_json::to_string(company) {
+                                    if temp_file.write_all(line.as_bytes()).await.is_err() ||
+                                       temp_file.write_all(b"\n").await.is_err() {
+                                        checkpoint_ok = false;
+                                        break;
+                                    }
+                                }
+                            }
+                            
+                            if checkpoint_ok {
+                                if temp_file.flush().await.is_ok() && 
+                                   temp_file.sync_data().await.is_ok() {
+                                    drop(temp_file);
+                                    
+                                    if tokio::fs::rename(&temp_path, &checkpoint_path_clone).await.is_ok() {
+                                        if tokio::fs::remove_file(&log_path_clone).await.is_ok() {
+                                            logger::log_info(&format!(
+                                                "✓ Checkpoint created ({} companies), log cleared",
+                                                companies_vec.len()
+                                            )).await;
+                                            
+                                            if let Ok(new_log) = OpenOptions::new()
+                                                .create(true)
+                                                .append(true)
+                                                .open(&log_path_clone)
+                                                .await {
+                                                log_file = new_log;
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        Err(e) => {
+                            logger::log_error(&format!("Failed to create checkpoint temp file: {}", e)).await;
+                        }
+                    }
+                    updates_since_checkpoint = 0;
+                }
+                LogCommand::Shutdown => {
+                    logger::log_info("Writer shutting down...").await;
+                    break;
+                }
+            }
+            
+            // Periodic checkpoint trigger
+            if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
+                let _ = write_tx_for_writer.send(LogCommand::Checkpoint).await;
+            }
+        }
+        
+        // Final fsync
+        let _ = log_file.flush().await;
+        let _ = log_file.sync_data().await;
+        
+        logger::log_info(&format!(
+            "Writer finished: {} total ({} new, {} updated)",
+            count, new_count, updated_count
+        )).await;
+        
+        (count, new_count, updated_count)
+    });
+    
+    // Wrap paths in Arc for safe sharing across tasks
+    let paths = Arc::new((*paths).clone());
+
+    // === MAIN PROCESSING LOOP WITH TASK PANIC ISOLATION ===
+    let total = pending.len();
+    let mut tasks = FuturesUnordered::new();
+    
+    // Counters
+    let processed = Arc::new(AtomicUsize::new(0));
+    let valid_count = Arc::new(AtomicUsize::new(0));
+    let filtered_low_cap = Arc::new(AtomicUsize::new(0));
+    let filtered_no_price = Arc::new(AtomicUsize::new(0));
+    let failed_count = Arc::new(AtomicUsize::new(0));
+    
+    // Spawn initial batch
+    for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
+        if let Some(company) = pending.pop() {
+            spawn_validation_task(
+                company,
+                &yahoo_pool,
+                &paths,
+                &write_tx,
+                shutdown_flag,
+                &processed,
+                &valid_count,
+                &filtered_low_cap,
+                &filtered_no_price,
+                &failed_count,
+                total,
+                &mut tasks,
+            );
+        }
+    }
+    
+    // Process results and spawn new tasks (with task panic isolation)
+    while let Some(task_result) = tasks.next().await {
+        // Check for shutdown
+        if shutdown_flag.load(Ordering::SeqCst) {
+            logger::log_warn("Shutdown signal received, stopping processing").await;
+            break;
+        }
+        
+        match task_result {
+            Ok(Ok(_)) => {
+                // Success - spawn next task
+                if let Some(company) = pending.pop() {
+                    spawn_validation_task(
+                        company,
+                        &yahoo_pool,
+                        &paths,
+                        &write_tx,
+                        shutdown_flag,
+                        &processed,
+                        &valid_count,
+                        &filtered_low_cap,
+                        &filtered_no_price,
+                        &failed_count,
+                        total,
+                        &mut tasks,
+                    );
+                }
+            }
+            Ok(Err(e)) => {
+                // Processing error
+                logger::log_error(&format!("Company processing error: {}", e)).await;
+                
+                if let Some(company) = pending.pop() {
+                    spawn_validation_task(
+                        company,
+                        &yahoo_pool,
+                        &paths,
+                        &write_tx,
+                        shutdown_flag,
+                        &processed,
+                        &valid_count,
+                        &filtered_low_cap,
+                        &filtered_no_price,
+                        &failed_count,
+                        total,
+                        &mut tasks,
+                    );
+                }
+            }
+            Err(e) => {
+                // Task panic (isolated - doesn't crash entire process)
+                logger::log_error(&format!("Task panic: {}", e)).await;
+                
+                if let Some(company) = pending.pop() {
+                    spawn_validation_task(
+                        company,
+                        &yahoo_pool,
+                        &paths,
+                        &write_tx,
+                        shutdown_flag,
+                        &processed,
+                        &valid_count,
+                        &filtered_low_cap,
+                        &filtered_no_price,
+                        &failed_count,
+                        total,
+                        &mut tasks,
+                    );
+                }
+            }
+        }
+    }
+    
+    logger::log_info("Main processing loop completed").await;
+    
+    // Signal writer to finish
+    let _ = write_tx.send(LogCommand::Checkpoint).await;
+    let _ = write_tx.send(LogCommand::Shutdown).await;
+    drop(write_tx);
+    
+    // Wait for writer to finish
+    let (final_count, final_new, final_updated) = writer_task.await
+        .unwrap_or((0, 0, 0));
+    
+    let final_valid = valid_count.load(Ordering::SeqCst);
+    let final_filtered_low_cap = filtered_low_cap.load(Ordering::SeqCst);
+    let final_filtered_no_price = filtered_no_price.load(Ordering::SeqCst);
+    let final_failed = failed_count.load(Ordering::SeqCst);
+    
+    logger::log_info(&format!(
+        "✅ Completed: {} total companies ({} new, {} updated)", 
+        final_count, final_new, final_updated
+    )).await;
+    logger::log_info(&format!(
+        "   Valid: {}, Filtered (low cap): {}, Filtered (no price): {}, Failed: {}",
+        final_valid, final_filtered_low_cap, final_filtered_no_price, final_failed
+    )).await;
+    
+    // === VERIFY AND RECREATE FINAL OUTPUT ===
+    logger::log_info("Verifying final output integrity...").await;
+    
+    let final_companies_map = existing_companies_writer.lock().await;
+    let expected_count = final_companies_map.len();
+    
+    // Always write final consolidated checkpoint
+    let temp_checkpoint = checkpoint_path.with_extension("tmp");
+    let mut temp_file = File::create(&temp_checkpoint).await?;
+    
+    for company in final_companies_map.values() {
+        let json_line = serde_json::to_string(company)?;
+        temp_file.write_all(json_line.as_bytes()).await?;
+        temp_file.write_all(b"\n").await?;
+    }
+    
+    temp_file.flush().await?;
+    temp_file.sync_data().await?;
+    drop(temp_file);
+    
+    tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
+    drop(final_companies_map);
+    
+    // Clear log since everything is in checkpoint
+    if log_path.exists() {
+        tokio::fs::remove_file(&log_path).await.ok();
+    }
+    
+    logger::log_info(&format!("✓ Final output: {} companies in {:?}", expected_count, checkpoint_path)).await;
+    
+    // Shutdown Yahoo pool
+    yahoo_pool.shutdown().await?;
+    
+    // Track completion with:
+    // - Content reference: All event directories
+    // - Data stage: Data (7-day TTL by default)
+    // - Dependencies: Depends on cleaned companies data
+    if !shutdown_flag.load(Ordering::SeqCst) {
+        manager.mark_valid(entry).await?;
+    }
+    
+    Ok(final_count)
+}
+
+/// Helper function to spawn a validation task (reduces code duplication)
+fn spawn_validation_task(
+    company: CompanyData,
+    yahoo_pool: &Arc<YahooClientPool>,
+    paths: &Arc<DataPaths>,
+    write_tx: &mpsc::Sender<LogCommand>,
+    shutdown_flag: &Arc<AtomicBool>,
+    processed: &Arc<AtomicUsize>,
+    valid_count: &Arc<AtomicUsize>,
+    filtered_low_cap: &Arc<AtomicUsize>,
+    filtered_no_price: &Arc<AtomicUsize>,
+    failed_count: &Arc<AtomicUsize>,
+    total: usize,
+    tasks: &mut FuturesUnordered<tokio::task::JoinHandle<anyhow::Result<Option<()>>>>,
+) {
+    let yahoo_pool_clone = Arc::clone(yahoo_pool);
+    let paths_clone = Arc::clone(paths);
+    let shutdown_flag_clone = Arc::clone(shutdown_flag);
+    let write_tx_clone = write_tx.clone();
+    let processed_clone = Arc::clone(processed);
+    let valid_count_clone = Arc::clone(valid_count);
+    let filtered_low_cap_clone = Arc::clone(filtered_low_cap);
+    let filtered_no_price_clone = Arc::clone(filtered_no_price);
+    let failed_count_clone = Arc::clone(failed_count);
+    
+    let task = tokio::spawn(async move {
+        // Check shutdown at start
+        if shutdown_flag_clone.load(Ordering::SeqCst) {
+            return Ok::<_, anyhow::Error>(None);
+        }
+        
+        let result = process_company_with_validation(
+            &company,
+            &yahoo_pool_clone,
+            &*paths_clone,
+        ).await;
+        
+        match result {
+            CompanyProcessResult::Valid(validated_company) => {
+                // Send to writer
+                let _ = write_tx_clone.send(LogCommand::Write(validated_company)).await;
+                valid_count_clone.fetch_add(1, Ordering::SeqCst);
+            }
+            CompanyProcessResult::FilteredLowCap { name, market_cap } => {
+                filtered_low_cap_clone.fetch_add(1, Ordering::SeqCst);
+                if filtered_low_cap_clone.load(Ordering::SeqCst) <= 10 {
+                    logger::log_info(&format!("  Filtered {} - low market cap: {:.0} EUR", name, market_cap)).await;
+                }
+            }
+            CompanyProcessResult::FilteredNoPrice { name } => {
+                filtered_no_price_clone.fetch_add(1, Ordering::SeqCst);
+                if filtered_no_price_clone.load(Ordering::SeqCst) <= 10 {
+                    logger::log_info(&format!("  Filtered {} - no recent price data", name)).await;
+                }
+            }
+            CompanyProcessResult::Failed { company: failed_company, error, is_transient: _ } => {
+                failed_count_clone.fetch_add(1, Ordering::SeqCst);
+                logger::log_warn(&format!("  Failed to process '{}': {}", failed_company.name, error)).await;
+            }
+        }
+        
+        // Progress reporting
+        let current = processed_clone.fetch_add(1, Ordering::SeqCst) + 1;
+        if current % 100 == 0 {
+            logger::log_info(&format!(
+                "Progress: {}/{} ({} valid, {} low cap, {} no price, {} failed)",
+                current, total,
+                valid_count_clone.load(Ordering::SeqCst),
+                filtered_low_cap_clone.load(Ordering::SeqCst),
+                filtered_no_price_clone.load(Ordering::SeqCst),
+                failed_count_clone.load(Ordering::SeqCst)
+            )).await;
+        }
+        
+        Ok(None::<()>)
+    });
+    
+    tasks.push(task);
+}
+
+/// Process a single company with full error categorization
+async fn process_company_with_validation(
+    company: &CompanyData,
+    yahoo_pool: &Arc<YahooClientPool>,
+    paths: &DataPaths,
+) -> CompanyProcessResult {
+    // Extract Yahoo ticker
+    let ticker = match extract_first_yahoo_ticker(company) {
+        Some(t) => t,
+        None => {
+            return CompanyProcessResult::Failed {
+                company: company.clone(),
+                error: "No valid Yahoo ticker found".to_string(),
+                is_transient: false, // Permanent - no ticker means no data
+            };
+        }
+    };
+    
+    // Fetch core modules from Yahoo
+    let summary = match yahoo_pool.get_quote_summary(
+        &ticker,
+        &QuoteSummaryModule::core_modules(),
+    ).await {
+        Ok(s) => s,
+        Err(e) => {
+            let error_msg = e.to_string();
+            let is_transient = is_transient_error(&error_msg);
+            
+            return CompanyProcessResult::Failed {
+                company: company.clone(),
+                error: format!("API error fetching summary: {}", error_msg),
+                is_transient,
+            };
+        }
+    };
+    
+    // Validate market cap
+    let market_cap = extract_market_cap(&summary);
+    if market_cap < 100_000_000.0 {
+        return CompanyProcessResult::FilteredLowCap {
+            name: company.name.clone(),
+            market_cap,
+        };
+    }
+    
+    // Validate recent price activity
+    let has_recent_price = match check_recent_price_activity(yahoo_pool, &ticker).await {
+        Ok(has) => has,
+        Err(e) => {
+            let error_msg = e.to_string();
+            let is_transient = is_transient_error(&error_msg);
+            
+            return CompanyProcessResult::Failed {
+                company: company.clone(),
+                error: format!("API error fetching price history: {}", error_msg),
+                is_transient,
+            };
+        }
+    };
+    
+    if !has_recent_price {
+        return CompanyProcessResult::FilteredNoPrice {
+            name: company.name.clone(),
+        };
+    }
+    
+    // Save core data
+    if let Err(e) = save_company_core_data(paths, &company.name, &summary).await {
+        logger::log_warn(&format!(
+            "  Failed to save core data for {}: {}",
+            company.name, e
+        )).await;
+    }
+    
+    CompanyProcessResult::Valid(company.clone())
+}
+
+/// Determine if an error is transient (should retry) or permanent (skip)
+fn is_transient_error(error: &str) -> bool {
+    let error_lower = error.to_lowercase();
+    
+    // Transient errors (network, rate limiting, timeouts)
+    let transient_patterns = [
+        "timeout",
+        "timed out",
+        "connection",
+        "network",
+        "rate limit",
+        "too many requests",
+        "429",
+        "503",
+        "502",
+        "500",
+        "temporarily",
+        "unavailable",
+    ];
+    
+    for pattern in &transient_patterns {
+        if error_lower.contains(pattern) {
+            return true;
+        }
+    }
+    
+    // Permanent errors (invalid ticker, no data, parsing errors)
+    let permanent_patterns = [
+        "404",
+        "not found",
+        "invalid",
+        "no data",
+        "parse error",
+        "400",
+        "401",
+        "403",
+    ];
+    
+    for pattern in &permanent_patterns {
+        if error_lower.contains(pattern) {
+            return false;
+        }
+    }
+    
+    // Default: treat unknown errors as transient (safer to retry)
+    true
+}
+
+fn extract_market_cap(summary: &crate::scraper::yahoo::QuoteSummary) -> f64 {
+    let price_module = match summary.modules.get("price") {
+        Some(m) => m,
+        None => return 0.0,
+    };
+    
+    let market_cap_raw = price_module
+        .get("marketCap")
+        .and_then(|v| v.get("raw"))
+        .and_then(|v| v.as_f64())
+        .unwrap_or(0.0);
+    
+    let currency = price_module
+        .get("currency")
+        .and_then(|v| v.as_str())
+        .unwrap_or("USD");
+    
+    let market_cap_eur = match currency {
+        "EUR" => market_cap_raw,
+        "USD" => market_cap_raw * 0.92,
+        "GBP" => market_cap_raw * 1.17,
+        "JPY" => market_cap_raw * 0.0061,
+        "CHF" => market_cap_raw * 1.05,
+        _ => market_cap_raw * 0.92,
+    };
+    
+    market_cap_eur
+}
+
+async fn check_recent_price_activity(
+    yahoo_pool: &Arc<YahooClientPool>,
+    ticker: &str,
+) -> anyhow::Result<bool> {
+    let now = Utc::now().timestamp();
+    let one_year_ago = now - (365 * 24 * 60 * 60);
+    let sixty_days_ago = now - (60 * 24 * 60 * 60);
+    
+    let chart_data = yahoo_pool.get_chart_data(
+        ticker,
+        "1d",
+        sixty_days_ago,
+        now,
+    ).await?;
+    
+    if chart_data.quotes.is_empty() {
+        return Ok(false);
+    }
+    
+    let most_recent_timestamp = chart_data.quotes
+        .iter()
+        .map(|q| q.timestamp)
+        .max()
+        .unwrap_or(0);
+    
+    Ok(most_recent_timestamp >= one_year_ago)
+}
+
+async fn save_company_core_data(
+    paths: &DataPaths,
+    company_name: &str,
+    summary: &crate::scraper::yahoo::QuoteSummary,
+) -> anyhow::Result<()> {
+    use tokio::fs;
+    
+    let safe_name = sanitize_company_name(company_name);
+    
+    let company_dir = paths.corporate_dir().join(&safe_name).join("core");
+    fs::create_dir_all(&company_dir).await?;
+    
+    let data_path = company_dir.join("data.jsonl");
+    let json_line = serde_json::to_string(summary)?;
+    
+    let mut file = fs::File::create(&data_path).await?;
+    file.write_all(json_line.as_bytes()).await?;
+    file.write_all(b"\n").await?;
+    file.flush().await?;
+    
+    Ok(())
+}
+
+
+
+
+
+/// Check if a company needs processing (validation check)
+fn company_needs_processing(
+    company: &CompanyData,
+    existing_companies: &HashMap<String, CompanyData>,
+) -> bool {
+    // If company exists in cleaned output, skip it
+    !existing_companies.contains_key(&company.name)
+}
--- a/src/corporate/update_companies_enrich.rs
+++ b/src/corporate/update_companies_enrich.rs
--- a/src/corporate/update_openfigi.rs
+++ b/src/corporate/update_openfigi.rs
--- a/src/corporate/yahoo_company_extraction.js
+++ b/src/corporate/yahoo_company_extraction.js
@@ -0,0 +1,229 @@
+// yahoo_company_extraction.js
+// JavaScript extraction script for Yahoo Finance company details
+// Used to extract ticker, sector, and exchange from Yahoo Finance search results
+// Only ticker is mandatory - sector and exchange are optional fields
+
+// Example selectors:
+// with results:
+// document.querySelector("#main-content-wrapper > section > section.container.yf-1omxedn > div.tableContainer.yf-1omxedn > div > table")
+// document.querySelector("#\\30  > td:nth-child(1) > span > div > a")
+// document.querySelector("#\\30  > td:nth-child(2) > span > div")
+// document.querySelector("#\\30  > td:nth-child(3) > span > div")
+// document.querySelector("#\\30  > td:nth-child(4) > span > div > a")
+// document.querySelector("#\\30  > td:nth-child(5) > span > div")
+// document.querySelector("#\\30  > td:nth-child(6) > span > div")
+// row with no result:
+// document.querySelector("#\\32  > td:nth-child(4) > span > p")
+// no results:
+// document.querySelector("#main-content-wrapper > section > div.noData.yf-1omxedn")
+
+// Using a wrapper to ensure the result is properly captured
+var extractionResult = (function() {
+    try {
+        // Check for "No results found" message using very flexible selector
+        const noDataElement = document.querySelector('[class*="noData"]') ||
+                              document.querySelector('[class*="error"]') ||
+                              (document.body.innerText && document.body.innerText.includes('No results'));
+        if (noDataElement) {
+            return { status: 'no_results', ticker: null, sector: null, exchange: null };
+        }
+
+        // Find the results table using most flexible selector possible
+        // Try multiple strategies to find the table
+        const table = document.querySelector('table') ||
+                      document.querySelector('[role="table"]') ||
+                      document.querySelector('.table') ||
+                      document.querySelector('#main-content-wrapper > section > section[class*="container"] > div[class*="tableContainer"] > div > table');
+        if (!table) {
+            return { status: 'no_results', ticker: null, sector: null, exchange: null };
+        }
+
+        // Find all rows in tbody
+        const allRows = table.querySelectorAll('tbody tr');
+        if (!allRows || allRows.length === 0) {
+            return { status: 'no_results', ticker: null, sector: null, exchange: null };
+        }
+
+        // Helper function to safely extract text content
+        function extractText(element) {
+            if (!element) return '';
+            const text = element.textContent.trim();
+            return text;
+        }
+
+        // Helper function to check if a cell actually contains data
+        // Multiple indicators are used to determine if data is present
+        function hasValidData(cellElement) {
+            if (!cellElement) return false;
+
+            // Indicator 1: Check if the cell contains a <p> tag (Yahoo uses this for "no data")
+            const pTag = cellElement.querySelector('p');
+            if (pTag) return false;
+
+            // Indicator 2: Check the direct child structure
+            // Valid data cells have: td > span > div or td > span > div > a
+            // Invalid data cells have: td > span > p
+            const span = cellElement.querySelector('span');
+            if (span) {
+                const directChildren = Array.from(span.children);
+                // If the only or first child is a <p>, it's likely "no data"
+                if (directChildren.length > 0 && directChildren[0].tagName === 'P') {
+                    return false;
+                }
+            }
+
+            // Indicator 3: Check text content
+            const text = extractText(cellElement);
+            if (!text) return false;
+            const normalized = text.toLowerCase().trim();
+            
+            // Common "no data" indicators
+            const noDataIndicators = [
+                '-',
+                'n/a',
+                'na',
+                'none',
+                'not available',
+                'no data',
+                '--',
+                '—', // em dash
+                '–', // en dash
+            ];
+            
+            if (noDataIndicators.includes(normalized)) {
+                return false;
+            }
+
+            // Indicator 4: Check for common CSS classes that indicate empty state
+            const classIndicators = ['empty', 'no-data', 'na', 'null', 'undefined'];
+            const classList = cellElement.className || '';
+            for (const indicator of classIndicators) {
+                if (classList.includes(indicator)) {
+                    return false;
+                }
+            }
+
+            // Indicator 5: Check if cell has an anchor tag (usually indicates real data)
+            const hasLink = cellElement.querySelector('a') !== null;
+            
+            // Indicator 6: Check if there's actual substantial content
+            // If text is very short (1-2 chars) and not alphanumeric, it's likely not real data
+            if (text.length <= 2 && !/[a-zA-Z0-9]/.test(text)) {
+                return false;
+            }
+
+            // If we passed all checks, consider it valid data
+            return true;
+        }
+
+        // Helper function to extract and normalize data from a cell
+        function extractCellData(cellElement) {
+            if (!cellElement) return null;
+            if (!hasValidData(cellElement)) return null;
+            
+            const text = extractText(cellElement);
+            return text || null;
+        }
+
+        // Helper function to extract and normalize data from a row
+        function extractRowData(row) {
+            // Extract ticker from column 1 (td:nth-child(1))
+            const tickerCell = row.querySelector('td:nth-child(1)');
+            const ticker = extractCellData(tickerCell);
+
+            // Extract sector from column 4 (td:nth-child(4))
+            const sectorCell = row.querySelector('td:nth-child(4)');
+            const sector = extractCellData(sectorCell);
+
+            // Extract exchange from column 6 (td:nth-child(6))
+            const exchangeCell = row.querySelector('td:nth-child(6)');
+            const exchange = extractCellData(exchangeCell);
+
+            return { ticker, sector, exchange };
+        }
+
+        // Helper function to count non-null fields (data completeness counter)
+        function countValidFields(data) {
+            let count = 0;
+            if (data.ticker) count++;
+            if (data.sector) count++;
+            if (data.exchange) count++;
+            return count;
+        }
+
+        // Helper function to score a row (prioritize rows with more complete data)
+        function scoreRow(data) {
+            let score = 0;
+            
+            // Ticker is mandatory and gets highest weight
+            if (data.ticker) score += 100;
+            
+            // Sector and exchange are nice-to-have
+            if (data.sector) score += 10;
+            if (data.exchange) score += 10;
+            
+            return score;
+        }
+
+        // Extract data from all rows and find the one with most complete data
+        let bestRow = null;
+        let maxScore = -1;
+        let rowIndex = 0;
+
+        for (const row of allRows) {
+            const data = extractRowData(row);
+            const score = scoreRow(data);
+            
+            // Select row with highest score (most complete data)
+            // If tied, first row wins
+            if (score > maxScore) {
+                bestRow = data;
+                maxScore = score;
+                bestRow.rowIndex = rowIndex;
+                bestRow.validFieldCount = countValidFields(data);
+                bestRow.score = score;
+            }
+            
+            rowIndex++;
+        }
+
+        // Ticker is mandatory - return error status if not found
+        if (!bestRow || !bestRow.ticker) {
+            return { 
+                status: 'error', 
+                error_message: 'No ticker found in any row',
+                ticker: null, 
+                sector: null, 
+                exchange: null 
+            };
+        }
+
+        // Return success with ticker (mandatory) and optional sector/exchange
+        // Include metadata about which row was selected and how many valid fields it had
+        return {
+            status: 'found',
+            ticker: bestRow.ticker,
+            sector: bestRow.sector,
+            exchange: bestRow.exchange,
+            metadata: {
+                selectedRowIndex: bestRow.rowIndex,
+                validFieldCount: bestRow.validFieldCount,
+                score: bestRow.score,
+                totalRows: allRows.length
+            }
+        };
+
+    } catch (error) {
+        // Only catch unexpected errors during extraction
+        return {
+            status: 'error',
+            error_message: error.toString(),
+            ticker: null,
+            sector: null,
+            exchange: null
+        };
+    }
+})();
+
+// Return the result explicitly
+return extractionResult;
--- a/src/corporate/yahoo_company_extraction.rs
+++ b/src/corporate/yahoo_company_extraction.rs
@@ -0,0 +1,468 @@
+// src/corporate/yahoo.rs
+use super::{types::*, helpers::*, page_validation::*};
+use crate::{scraper::webdriver::*, util::{directories::DataPaths}};
+use crate::logger;
+use fantoccini::{Client, Locator};
+use rand::Rng;
+use serde::{Deserialize, Serialize};
+use tokio::time::{Duration as TokioDuration, sleep, timeout};
+use std::{sync::Arc, sync::atomic::{AtomicBool, Ordering}};
+use anyhow::{anyhow, Result};
+
+const YAHOO_COMPANY_EXTRACTION_JS: &str = include_str!("yahoo_company_extraction.js");
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum YahooTickerResult {
+    Found(String),
+    NotFound,
+    NoResults,
+    AmbiguousResults,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ExtractionMetadata {
+    #[serde(rename = "selectedRowIndex")]
+    pub selected_row_index: usize,
+    #[serde(rename = "validFieldCount")]
+    pub valid_field_count: usize,
+    #[serde(rename = "totalRows")]
+    pub total_rows: usize,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct ExtractionResult {
+    status: String,
+    ticker: Option<String>,
+    sector: Option<String>,
+    exchange: Option<String>,
+    #[serde(default)]
+    error_message: Option<String>,
+    #[serde(default)]
+    metadata: Option<ExtractionMetadata>,
+}
+
+impl YahooTickerResult {
+    pub fn to_tagged_string(&self) -> String {
+        match self {
+            YahooTickerResult::Found(ticker) => format!("YAHOO:{}", ticker),
+            YahooTickerResult::NotFound => "YAHOO:NOT_FOUND".to_string(),
+            YahooTickerResult::NoResults => "YAHOO:NO_RESULTS".to_string(),
+            YahooTickerResult::AmbiguousResults => "YAHOO:AMBIGUOUS".to_string(),
+        }
+    }
+
+    pub fn is_found(&self) -> bool {
+        matches!(self, YahooTickerResult::Found(_))
+    }
+
+    pub fn get_ticker(&self) -> Option<&str> {
+        match self {
+            YahooTickerResult::Found(ticker) => Some(ticker),
+            _ => None,
+        }
+    }
+}
+
+/// Scrape company details with full validation and shutdown support
+pub async fn scrape_company_details_by_isin(
+    pool: &Arc<ChromeDriverPool>,
+    isin: &str,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<Option<YahooCompanyData>> {
+    // Check shutdown before starting
+    if shutdown_flag.load(Ordering::SeqCst) {
+        logger::log_warn(&format!("Shutdown detected, skipping ISIN: {}", isin)).await;
+        return Ok(None);
+    }
+
+    if pool.should_perform_hard_reset() {
+        logger::log_warn("HARD_RESET_REQUIRED detected before starting ISIN scrape").await;
+        return Err(anyhow!("HARD_RESET_REQUIRED"));
+    }
+    
+    let isin_owned = isin.to_string();
+    let shutdown_clone = Arc::clone(shutdown_flag);
+    let url = format!("https://finance.yahoo.com/lookup/?s={}", isin);
+    
+    pool.execute(url.clone(), move |client| {
+        let isin = isin_owned.clone();
+        let shutdown = shutdown_clone.clone();
+        
+        Box::pin(async move {
+            // Check shutdown during task execution
+            if shutdown.load(Ordering::SeqCst) {
+                return Err(anyhow!("Task aborted due to shutdown"));
+            }
+            
+            // Random delay
+            let delay = rand::rng().random_range(800..1500);
+            sleep(TokioDuration::from_millis(delay)).await;
+            
+            // Reject cookies
+            reject_yahoo_cookies(&client).await?;
+            
+            // Check shutdown again
+            if shutdown.load(Ordering::SeqCst) {
+                return Err(anyhow!("Task aborted due to shutdown"));
+            }
+            
+            // CRITICAL: Validate navigation succeeded
+            let expected_fragment = format!("lookup/?s={}", isin);
+            match verify_navigation(&client, &expected_fragment, 5).await {
+                Ok(_) => {
+                    logger::log_info(&format!("✓ Navigation validated for ISIN: {}", isin)).await;
+                }
+                Err(e) => {
+                    logger::log_error(&format!(
+                        "Navigation verification failed for ISIN {}: {}",
+                        isin, e
+                    )).await;
+                    // Clear browser state before returning error
+                    clear_browser_state(&client).await.ok();
+                    return Err(e);
+                }
+            }
+            
+            // Additional content validation - look for table or noData element anywhere on page
+            let page_ready: bool = client
+                .execute(
+                    r#"
+                    // Try multiple selector strategies
+                    const table = document.querySelector('table') ||
+                                  document.querySelector('[role="table"]') ||
+                                  document.querySelector('.table');
+                    const noData = document.querySelector('[class*="noData"]') ||
+                                   document.querySelector('[class*="error"]') ||
+                                   document.body.innerText.includes('No results');
+                    const hasContent = !!(table || noData);
+                    console.log('Page ready check - table:', !!table, 'noData:', !!noData, 'hasContent:', hasContent);
+                    return hasContent;
+                    "#,
+                    vec![],
+                )
+                .await?
+                .as_bool()
+                .unwrap_or(false);
+            
+            if !page_ready {
+                logger::log_error(&format!(
+                    "Page content not ready for ISIN {} - neither table nor no-data element found",
+                    isin
+                )).await;
+                clear_browser_state(&client).await.ok();
+                return Err(anyhow!("Page content not ready"));
+            }
+            
+            logger::log_info(&format!("✓ Page content validated for ISIN: {}", isin)).await;
+            
+            // Check shutdown before extraction
+            if shutdown.load(Ordering::SeqCst) {
+                return Err(anyhow!("Task aborted due to shutdown"));
+            }
+            
+            // Random delay before extraction
+            let delay = rand::rng().random_range(800..1500);
+            sleep(TokioDuration::from_millis(delay)).await;
+            
+            // Now safe to extract
+            extract_company_details_validated(&client, &isin).await
+        })
+    }).await
+}
+
+/// UPDATED: Extract with additional URL validation
+async fn extract_company_details_validated(
+    client: &Client,
+    isin: &str,
+) -> Result<Option<YahooCompanyData>> {
+    // Double-check URL is still correct before extraction
+    let current_url = client.current_url().await?;
+    if !current_url.as_str().contains(isin) {
+        logger::log_error(&format!(
+            "URL mismatch before extraction: expected ISIN '{}' in URL, got '{}'",
+            isin,
+            current_url.as_str()
+        )).await;
+        clear_browser_state(client).await.ok();
+        return Err(anyhow!("URL mismatch - possible stale page"));
+    }
+    
+    // Run extraction
+    let result = extract_company_details(client, isin).await?;
+    
+    // Validate extraction result
+    if let Some(ref details) = result {
+        logger::log_info(&format!(
+            "✓ Extracted ticker '{}' for ISIN {} (sector: {:?}, exchange: {:?})",
+            details.ticker, isin, details.sector, details.exchange
+        )).await;
+    } else {
+        logger::log_info(&format!(
+            "No ticker found for ISIN {} (legitimately not found)",
+            isin
+        )).await;
+    }
+    
+    Ok(result)
+}
+
+pub async fn extract_company_details(
+    client: &Client,
+    _isin: &str,
+) -> Result<Option<YahooCompanyData>> {
+    // Wait for page to load - look for either the table or the no-data element using simple selectors
+    let wait_result: Result<Result<bool, anyhow::Error>> = timeout(
+        TokioDuration::from_secs(30),
+        async {
+            for _ in 0..60 {
+                let has_content: bool = client
+                    .execute(
+                        r#"
+                        // Use flexible selectors that don't depend on exact DOM structure
+                        const table = document.querySelector('table') ||
+                                      document.querySelector('[role="table"]') ||
+                                      document.querySelector('.table');
+                        const noData = document.querySelector('[class*="noData"]') ||
+                                       document.querySelector('[class*="error"]');
+                        const hasContent = !!(table || noData);
+                        return hasContent;
+                        "#,
+                        vec![],
+                    )
+                    .await
+                    .map_err(|e| anyhow!("Execute error: {}", e))?
+                    .as_bool()
+                    .unwrap_or(false);
+
+                if has_content {
+                    return Ok(true);
+                }
+
+                sleep(TokioDuration::from_millis(500)).await;
+            }
+            Ok(false)
+        },
+    )
+    .await
+    .map_err(|_| anyhow!("Timeout waiting for Yahoo Finance page to load"));
+    
+    match wait_result {
+        Err(_) => {
+            return Err(anyhow!("Timeout waiting for Yahoo Finance page to load"));
+        },
+        Ok(Err(e)) => {
+            return Err(anyhow!("Error checking page content: {}", e));
+        },
+        Ok(Ok(false)) => {
+            logger::log_warn("Page content not found after waiting, attempting extraction anyway").await;
+        },
+        Ok(Ok(true)) => {
+            logger::log_info("Page content detected, proceeding with extraction").await;
+        }
+    }
+    
+    // Execute the JavaScript extraction script
+    let result = client.execute(YAHOO_COMPANY_EXTRACTION_JS, vec![]).await?;
+    
+    // Log the raw result for debugging
+    logger::log_info(&format!("JavaScript extraction raw result: {:?}", result)).await;
+    
+    // Check if result is null
+    if result.is_null() {
+        return Err(anyhow!("JavaScript returned null - page may not be fully loaded or script failed"));
+    }
+    
+    // Parse the JSON result
+    let extraction: ExtractionResult = serde_json::from_value(result.clone())
+        .map_err(|e| {
+            let result_str = serde_json::to_string_pretty(&result).unwrap_or_else(|_| format!("{:?}", result));
+            anyhow!("Failed to parse extraction result: {}. Raw result: {}", e, result_str)
+        })?;
+    
+    match extraction.status.as_str() {
+        "found" => {
+            if let Some(ticker) = extraction.ticker {
+                if let Some(ref metadata) = extraction.metadata {
+                    logger::log_info(&format!(
+                        "Selected row {} with {} valid fields out of {} total rows",
+                        metadata.selected_row_index,
+                        metadata.valid_field_count,
+                        metadata.total_rows
+                    )).await;
+                }
+                
+                Ok(Some(YahooCompanyData {
+                    ticker,
+                    sector: extraction.sector,
+                    exchange: extraction.exchange,
+                }))
+            } else {
+                Err(anyhow!("Status 'found' but no ticker present"))
+            }
+        },
+        "no_results" => Ok(None),
+        "error" => {
+            let error_msg = extraction.error_message.unwrap_or_else(|| "Unknown error".to_string());
+            Err(anyhow!("JavaScript extraction error: {}", error_msg))
+        },
+        _ => Ok(None),
+    }
+}
+
+pub async fn get_all_tickers_from_companies_jsonl(paths: &DataPaths) -> anyhow::Result<Vec<String>> {
+    let corporate_path = paths.data_dir().join("corporate").join("by_name");
+    let companies_file = corporate_path.join("companies.jsonl");
+    let content = tokio::fs::read_to_string(companies_file).await?;
+    let mut tickers = Vec::new();
+    for line in content.lines() {
+        let company: CompanyData = serde_json::from_str(line)?;
+        if let Some(isin_tickers_map) = company.isin_tickers_map {
+            for (_isin, ticker_vec) in isin_tickers_map {
+                tickers.extend(ticker_vec);
+            }
+        }
+    }
+    Ok(tickers)
+}
+
+pub async fn fetch_earnings_with_pool(
+    pool: &Arc<ChromeDriverPool>,
+    ticker: &str,
+) -> anyhow::Result<Vec<CompanyEventData>> {
+    let ticker = ticker.to_string();
+    let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
+
+    let ticker_cloned = ticker.clone();
+
+    pool.execute(url, move |client| {
+        let ticker = ticker_cloned.clone();
+        Box::pin(async move {
+            reject_yahoo_cookies(&client).await?;
+            extract_earnings_events(&client, &ticker).await
+        })
+    }).await
+}
+
+pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEventData>> {
+    // Wait for the table to load
+    let table = client
+        .wait()
+        .for_element(Locator::Css(r#"table[data-test="cal-table"]"#))
+        .await
+        .map_err(|e| anyhow!("Failed to find earnings table: {}", e))?;
+
+    // Find all rows in tbody
+    let rows = table
+        .find_all(Locator::Css("tbody tr"))
+        .await
+        .map_err(|e| anyhow!("Failed to find table rows: {}", e))?;
+
+    let mut events = Vec::with_capacity(rows.len());
+
+    for row in rows {
+        let cells = row
+            .find_all(Locator::Css("td"))
+            .await
+            .map_err(|e| anyhow!("Failed to find cells in row: {}", e))?;
+
+        if cells.len() < 5 {
+            continue; // Skip incomplete rows
+        }
+
+        // Extract and parse date
+        let date_str = cells[0]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get date text: {}", e))?;
+        let date = parse_yahoo_date(&date_str)
+            .map_err(|e| anyhow!("Failed to parse date '{}': {}", date_str, e))?
+            .format("%Y-%m-%d")
+            .to_string();
+
+        // Extract time, replace "Time Not Supplied" with empty
+        let time = cells[1]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get time text: {}", e))?
+            .replace("Time Not Supplied", "");
+
+        // Extract period
+        let period = cells[2]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get period text: {}", e))?;
+
+        // Parse EPS forecast
+        let eps_forecast_str = cells[3]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get EPS forecast text: {}", e))?;
+        let eps_forecast = parse_float(&eps_forecast_str);
+
+        // Parse EPS actual
+        let eps_actual_str = cells[4]
+            .text()
+            .await
+            .map_err(|e| anyhow!("Failed to get EPS actual text: {}", e))?;
+        let eps_actual = parse_float(&eps_actual_str);
+
+        // Parse surprise % if available
+        let surprise_pct = if cells.len() > 5 {
+            let surprise_str = cells[5]
+                .text()
+                .await
+                .map_err(|e| anyhow!("Failed to get surprise text: {}", e))?;
+            parse_float(&surprise_str)
+        } else {
+            None
+        };
+
+        events.push(CompanyEventData {
+            ticker: ticker.to_string(),
+            date,
+            time,
+            period,
+            eps_forecast,
+            eps_actual,
+            revenue_forecast: None,
+            revenue_actual: None,
+            surprise_pct,
+            source: "Yahoo".to_string(),
+        });
+    }
+
+    if events.is_empty() {
+        logger::log_warn(&format!("Warning: No earnings events extracted for ticker {}", ticker)).await;
+    } else {
+        logger::log_info(&format!("Extracted {} earnings events for {}", events.len(), ticker)).await;
+    }
+
+    Ok(events)
+}
+
+/// Rejecting Yahoo Cookies
+async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
+    for _ in 0..10 {
+        let clicked: bool = client
+        .execute(
+            r#"(() => {
+            const btn = document.querySelector('#consent-page .reject-all');
+            if (btn) {
+                btn.click();
+                return true;
+            }
+            return false;
+            })()"#,
+            vec![],
+        )
+        .await?
+        .as_bool()
+        .unwrap_or(false);
+
+        if clicked { break; }
+        sleep(TokioDuration::from_millis(500)).await;
+    }
+
+    logger::log_info("Rejected Yahoo cookies if button existed").await;
+    Ok(())
+}
--- a/src/economic/mod.rs
+++ b/src/economic/mod.rs
@@ -2,7 +2,9 @@
 pub mod types;
 pub mod scraper;
 pub mod storage;
-pub mod update;
 pub mod helpers;

+pub mod update;
+pub mod yahoo_update_forex;
+
 pub use update::run_full_update;
--- a/src/economic/scraper.rs
+++ b/src/economic/scraper.rs
@@ -1,5 +1,6 @@
 // src/economic/scraper.rs
 use super::types::{EconomicEvent};
+use crate::logger;
 use fantoccini::Client;
 use tokio::time::{sleep, Duration};

@@ -7,7 +8,30 @@ const EXTRACTION_JS: &str = include_str!("extraction_script.js");

 pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
    client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
+    dismiss_overlays(client).await?;
+    Ok(())
+}

+pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
+    for _ in 0..10 {
+        let removed: bool = client
+            .execute(
+                r#"(() => {
+                    const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
+                    if (iframe && iframe.parentNode) {
+                        iframe.parentNode.removeChild(iframe);
+                        return true;
+                    }
+                    return false;
+                })()"#,
+                vec![],
+            )
+            .await?
+            .as_bool()
+            .unwrap_or(false);
+        if removed { break; }
+        sleep(Duration::from_millis(500)).await;
+    }
    Ok(())
 }

@@ -49,6 +73,6 @@ pub async fn extract_events(client: &Client) -> anyhow::Result<Vec<EconomicEvent
            });
        }
    }
-    println!("Extracted {} high-impact events", events.len());
+    logger::log_info(&format!("Extracted {} high-impact events", events.len())).await;
    Ok(events)
 }
--- a/src/economic/storage.rs
+++ b/src/economic/storage.rs
@@ -6,6 +6,9 @@ use crate::util::logger;
 use tokio::fs;
 use chrono::{NaiveDate, Datelike};
 use std::collections::HashMap;
+use serde_json;
+
+const MAX_EVENTS_PER_FILE: usize = 3000;

 pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
    let dir = paths.economic_events_dir();
@@ -18,37 +21,122 @@ pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<Chunk
            if path.extension().map(|e| e == "json").unwrap_or(false) {
                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
                    if name.starts_with("chunk_") {
-                        if let Some(content) = fs::read_to_string(&path).await.ok() {
-                            if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
-                                let start = name[6..16].to_string();
-                                let end = name[17..27].to_string();
-                                chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: events.len() });
-                            }
-                        }
+                        // Don't load the events here, just record the chunk info
+                        let start = name[6..16].to_string();
+                        let end = name[17..27].to_string();
+                        chunks.push(ChunkInfo { 
+                            start_date: start, 
+                            end_date: end, 
+                            path, 
+                            event_count: 0 // We'll count later if needed
+                        });
                    }
                }
            }
        }
    }
    chunks.sort_by_key(|c| c.start_date.clone());
-    logger::log_info(&format!("Economic Storage: Scanned {} event chunks", chunks.len())).await;
+    logger::log_info(&format!("Economic Storage: Found {} event chunks", chunks.len())).await;
    Ok(chunks)
 }

-pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMap<String, EconomicEvent>> {
-    let mut map = HashMap::new();
-    for chunk in chunks {
-        let content = fs::read_to_string(&chunk.path).await?;
-        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
-        for e in events {
-            map.insert(event_key(&e), e);
-        }
+/// Stream events from a single chunk file
+pub async fn stream_chunk_events(
+    chunk: &ChunkInfo,
+    callback: impl Fn(EconomicEvent) -> anyhow::Result<()>
+) -> anyhow::Result<usize> {
+    let content = fs::read_to_string(&chunk.path).await?;
+    let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
+    let count = events.len();
+    
+    for event in events {
+        callback(event)?;
    }
-    logger::log_info(&format!("Economic Storage: Loaded {} events from {} chunks", map.len(), chunks.len())).await;
-    Ok(map)
+    
+    Ok(count)
 }

-pub async fn save_optimized_chunks(paths: &DataPaths, events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
+/// Load events in batches to avoid memory explosion
+pub async fn load_events_in_batches(
+    chunks: &[ChunkInfo],
+    batch_size: usize,
+) -> anyhow::Result<impl Iterator<Item = (String, EconomicEvent)>> {
+    let mut all_events = Vec::new();
+    
+    for chunk in chunks {
+        logger::log_info(&format!("Loading chunk: {:?}", chunk.path.file_name())).await;
+        
+        let content = fs::read_to_string(&chunk.path).await?;
+        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
+        
+        for e in events {
+            all_events.push((event_key(&e), e));
+        }
+        
+        // If we've accumulated enough, yield them
+        if all_events.len() >= batch_size {
+            break;
+        }
+    }
+    
+    logger::log_info(&format!("Loaded {} events in batch", all_events.len())).await;
+    Ok(all_events.into_iter())
+}
+
+/// Build a lightweight index instead of loading all events
+#[derive(Debug, Clone)]
+pub struct EventIndex {
+    pub key: String,
+    pub identity_key: String,
+    pub date: String,
+    pub chunk_file: std::path::PathBuf,
+}
+
+pub async fn build_event_index(chunks: &[ChunkInfo]) -> anyhow::Result<Vec<EventIndex>> {
+    let mut index = Vec::new();
+    
+    for chunk in chunks {
+        logger::log_info(&format!("Indexing chunk: {:?}", chunk.path.file_name())).await;
+        
+        let content = fs::read_to_string(&chunk.path).await?;
+        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
+        
+        for e in events {
+            index.push(EventIndex {
+                key: event_key(&e),
+                identity_key: identity_key(&e),
+                date: e.date.clone(),
+                chunk_file: chunk.path.clone(),
+            });
+        }
+    }
+    
+    logger::log_info(&format!("Built index with {} entries", index.len())).await;
+    Ok(index)
+}
+
+/// Look up a specific event by loading only its chunk
+pub async fn lookup_event_by_key(key: &str, index: &[EventIndex]) -> anyhow::Result<Option<EconomicEvent>> {
+    // Find which chunk contains this event
+    let entry = index.iter().find(|e| e.key == key);
+    
+    if let Some(entry) = entry {
+        // Load only that chunk
+        let content = fs::read_to_string(&entry.chunk_file).await?;
+        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
+        
+        // Find the specific event
+        Ok(events.into_iter().find(|e| event_key(e) == key))
+    } else {
+        Ok(None)
+    }
+}
+
+/// Save events in smaller, more manageable chunks
+pub async fn save_optimized_chunks(
+    paths: &DataPaths, 
+    events: Vec<EconomicEvent> // Changed from HashMap to Vec
+) -> anyhow::Result<()> {
    let dir = paths.economic_events_dir();
    fs::create_dir_all(dir).await?;

@@ -67,31 +155,36 @@ pub async fn save_optimized_chunks(paths: &DataPaths, events: HashMap<String, Ec
    }
    logger::log_info(&format!("Economic Storage: Removed {} old chunk files", removed_count)).await;

-    let mut sorted: Vec<_> = events.into_values().collect();
-    sorted.sort_by_key(|e| e.date.clone());
+    let mut sorted = events;
+    sorted.sort_by(|a, b| a.date.cmp(&b.date));

-    let mut chunk: Vec<EconomicEvent> = Vec::new();
-    const MAX_EVENTS_PER_CHUNK: usize = ( 30000 / 2 ) / 11;  // (30000 - 2) / 11 = 2727
-
-    for e in sorted {
-        if !chunk.is_empty() && chunk.len() >= MAX_EVENTS_PER_CHUNK {
-            save_chunk(&chunk, dir).await?;
-            chunk.clear();
-        }
-        chunk.push(e);
+    // Save in smaller chunks
+    let mut chunk_num = 0;
+    for chunk in sorted.chunks(MAX_EVENTS_PER_FILE) {
+        save_chunk_vec(chunk, dir, chunk_num).await?;
+        chunk_num += 1;
+        
+        // Allow other tasks to run
+        tokio::task::yield_now().await;
    }
-    if !chunk.is_empty() {
-        save_chunk(&chunk, dir).await?;
-    }
-    logger::log_info(&format!("Economic Storage: Saved all event chunks to {:?}", dir)).await;
+    
+    logger::log_info(&format!("Economic Storage: Saved {} chunks to {:?}", chunk_num, dir)).await;
    Ok(())
 }

-async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::Result<()> {
-    let start = events.iter().map(|e| &e.date).min().unwrap().clone();
-    let end = events.iter().map(|e| &e.date).max().unwrap().clone();
-    let path = dir.join(format!("chunk_{}_{}.json", start, end));
-    fs::write(&path, serde_json::to_string_pretty(events)?).await?;
+async fn save_chunk_vec(events: &[EconomicEvent], dir: &std::path::Path, chunk_num: usize) -> anyhow::Result<()> {
+    if events.is_empty() {
+        return Ok(());
+    }
+    
+    let start = &events[0].date;
+    let end = &events[events.len() - 1].date;
+    let path = dir.join(format!("chunk_{:04}_{}_{}.json", chunk_num, start, end));
+    
+    // Write incrementally to avoid large memory allocation
+    let json = serde_json::to_string_pretty(events)?;
+    fs::write(&path, json).await?;
+    
    logger::log_info(&format!("Economic Storage: Saved chunk {} - {} ({} events)", start, end, events.len())).await;
    Ok(())
 }
--- a/src/economic/update.rs
+++ b/src/economic/update.rs
@@ -1,18 +1,13 @@
 // src/economic/update.rs
 use super::{scraper::*, storage::*, helpers::*, types::*};
+use crate::check_shutdown;
 use crate::{config::Config, scraper::webdriver::{ScrapeTask, ChromeDriverPool}, util::directories::DataPaths, util::logger};
 use chrono::{Local};
-use std::sync::Arc;
+use std::sync::{Arc, atomic::{AtomicBool}};
+use std::collections::HashMap;

-/// Runs the full update for economic data, using the provided ChromeDriver pool.
-///
-/// # Arguments
-/// * `config` - The application configuration.
-/// * `pool` - Shared pool of ChromeDriver instances for scraping.
-///
-/// # Errors
-/// Returns an error if scraping, loading, or saving fails.
-pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
+/// Runs the full update for economic data using streaming to minimize memory usage
+pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>, shutdown_flag: &Arc<AtomicBool>) -> anyhow::Result<()> {
    let paths = DataPaths::new(".")?;

    logger::log_info("Economic Update: Initializing...").await;
@@ -20,81 +15,132 @@ pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> a
    let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
    let end_date = config.target_end_date();

-    logger::log_info(&format!("Economic Update: Scanning existing chunks from {:?}", paths.economic_events_dir())).await;
-    let chunks = scan_existing_chunks(&paths).await?;
-    let mut events = load_existing_events(&chunks).await?;
-    
-    let msg = format!("Economic Update: Loaded {} events from {} chunks", events.len(), chunks.len());
-    println!("{}", msg);
-    logger::log_info(&msg).await;
+    logger::log_info("=== Economic Update ===").await;

-    let start_date = if events.is_empty() {
-        logger::log_warn("Economic Update: No existing events found, starting from config date").await;
+    check_shutdown!(shutdown_flag);
+
+    // Step 1: Build lightweight index instead of loading all events
+    logger::log_info("Step 1: Building event index...").await;
+    let chunks = scan_existing_chunks(&paths).await?;
+    let event_index = build_event_index(&chunks).await?;
+    logger::log_info(&format!("  Economic Update: Indexed {} events from {} chunks", 
+        event_index.len(), chunks.len())).await;
+
+    check_shutdown!(shutdown_flag);
+
+    // Step 2: Determine start date
+    let start_date = if event_index.is_empty() {
+        logger::log_warn("Step 2: No existing events found, starting from config date").await;
        config.economic_start_date.clone()
-    } else if events.values().any(|e| e.date >= today_str) {
-        logger::log_info("Economic Update: Events exist for today, starting from today").await;
-        today_str.clone()
    } else {
-        let next = events.values()
-            .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
+        // Find the latest date in the index
+        let max_date = event_index.iter()
+            .map(|e| &e.date)
            .max()
-            .and_then(|d| d.succ_opt())
-            .map(|d| d.format("%Y-%m-%d").to_string())
+            .cloned()
            .unwrap_or(today_str.clone());
-        logger::log_info(&format!("Economic Update: Resuming from: {}", next)).await;
-        next
+        
+        if max_date >= today_str {
+            logger::log_info("  Events exist for today, starting from today").await;
+            today_str.clone()
+        } else {
+            let next = chrono::NaiveDate::parse_from_str(&max_date, "%Y-%m-%d")
+                .ok()
+                .and_then(|d| d.succ_opt())
+                .map(|d| d.format("%Y-%m-%d").to_string())
+                .unwrap_or(today_str.clone());
+            logger::log_info(&format!("  Resuming from: {}", next)).await;
+            next
+        }
    };

-    let msg = format!("Economic Update: Scraping events from {} → {}", start_date, end_date);
-    println!("{}", msg);
-    logger::log_info(&msg).await;
+    check_shutdown!(shutdown_flag);

-    // Pass the pool to the scraping function
-    let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
-    
-    let msg = format!("Economic Update: Scraped {} new events", new_events_all.len());
-    println!("{}", msg);
-    logger::log_info(&msg).await;
+    // Step 3: Scrape new events in batches
+    logger::log_info(&format!("Step 3: Scraping events from {} → {}", start_date, end_date)).await;
+    let new_events = scrape_all_economic_events(&start_date, &end_date, pool).await?;
+    logger::log_info(&format!("  Scraped {} new events", new_events.len())).await;

-    // Process all at once or in batches
-    let result = process_batch(&new_events_all, &mut events, &today_str);
-    let total_changes = result.changes.len();
-    
-    let msg = format!("Economic Update: Detected {} changes", total_changes);
-    println!("{}", msg);
-    logger::log_info(&msg).await;
-    
-    if total_changes > 0 {
-        logger::log_info(&format!("Economic Update: Saving {} changes to log", total_changes)).await;
-        save_changes(&paths, &result.changes).await?;
-        logger::log_info("Economic Update: Changes saved successfully").await;
+    check_shutdown!(shutdown_flag);
+
+    // Step 4: Process events in streaming fashion
+    logger::log_info(&format!("Step 4: Detecting changes")).await;
+    let (changes, updated_events) = process_events_streaming(&chunks, &new_events, &today_str).await?;
+    logger::log_info(&format!("  Detected {} changes", changes.len())).await;
+    if !changes.is_empty() {
+        logger::log_info(&format!("  Saving {} changes to log", changes.len())).await;
+        save_changes(&paths, &changes).await?;
+        logger::log_info("  Changes saved successfully").await;
    }

-    logger::log_info(&format!("Economic Update: Saving {} total events to chunks", events.len())).await;
-    save_optimized_chunks(&paths, events).await?;
-    
-    let msg = format!("✓ Economic update complete — {} changes detected", total_changes);
-    println!("{}", msg);
-    logger::log_info(&msg).await;
+    check_shutdown!(shutdown_flag);
+
+    // Step 5: Save consolidated events
+    logger::log_info(&format!("Step 5: Saving {} total events to chunks", updated_events.len())).await;
+    save_optimized_chunks(&paths, updated_events).await?;
+    logger::log_info(&format!("  ✓ Economic update complete — {} changes detected", changes.len())).await;
+
    Ok(())
 }

-/// Scrapes all economic events from start to end date using a dedicated ScrapeTask with the provided pool.
-///
-/// This function creates a ScrapeTask to navigate to the Finanzen.net page, prepare it,
-/// and then loop through date ranges to extract events.
-///
-/// # Arguments
-/// * `start` - Start date in YYYY-MM-DD.
-/// * `end` - End date in YYYY-MM-DD.
-/// * `pool` - Shared pool of ChromeDriver instances.
-///
-/// # Returns
-/// A vector of all extracted EconomicEvent structs.
-///
-/// # Errors
-/// Returns an error if task execution fails or extraction issues occur.
-pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<Vec<EconomicEvent>> {
+/// Process events using streaming to minimize memory usage
+async fn process_events_streaming(
+    chunks: &[ChunkInfo],
+    new_events: &[EconomicEvent],
+    today: &str,
+) -> anyhow::Result<(Vec<EventChange>, Vec<EconomicEvent>)> {
+    let mut all_changes = Vec::new();
+    let mut final_events: HashMap<String, EconomicEvent> = HashMap::new();
+    
+    // Step 1: Load existing events in batches
+    logger::log_info("Processing existing events in batches...").await;
+    
+    for chunk in chunks {
+        logger::log_info(&format!("Loading chunk: {:?}", chunk.path.file_name())).await;
+        
+        let content = tokio::fs::read_to_string(&chunk.path).await?;
+        let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
+        
+        // Add to final events map
+        for e in events {
+            final_events.insert(event_key(&e), e);
+        }
+        
+        // Clear memory periodically
+        if final_events.len() > 10000 {
+            logger::log_info(&format!("Loaded {} events so far...", final_events.len())).await;
+        }
+    }
+    
+    logger::log_info(&format!("Loaded {} existing events total", final_events.len())).await;
+    
+    // Step 2: Process new events in batches
+    logger::log_info("Processing new events...").await;
+    
+    for (idx, batch) in new_events.chunks(500).enumerate() {
+        logger::log_info(&format!("Processing batch {} ({} events)", idx + 1, batch.len())).await;
+        
+        let batch_result = process_batch(batch, &mut final_events, today);
+        all_changes.extend(batch_result.changes);
+        
+        // Yield to prevent blocking
+        tokio::task::yield_now().await;
+    }
+    
+    logger::log_info(&format!("Processing complete. Total events: {}", final_events.len())).await;
+    
+    // Convert HashMap to Vec for saving
+    let events_vec: Vec<EconomicEvent> = final_events.into_values().collect();
+    
+    Ok((all_changes, events_vec))
+}
+
+/// Scrapes all economic events from start to end date
+pub async fn scrape_all_economic_events(
+    start: &str, 
+    end: &str, 
+    pool: &Arc<ChromeDriverPool>
+) -> anyhow::Result<Vec<EconomicEvent>> {
    let url = "https://www.finanzen.net/termine/wirtschaftsdaten/".to_string();
    let start_clone = start.to_string();
    let end_clone = end.to_string();
@@ -108,9 +154,18 @@ pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<Chrom
            set_date_range(&client, &current, &end_clone).await?;
            tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
            let new_events = extract_events(&client).await?;
-            if new_events.is_empty() { break; }
+            
+            if new_events.is_empty() { 
+                break; 
+            }
+            
            all_events.extend(new_events.clone());

+            // Prevent memory buildup - process in chunks if too large
+            if all_events.len() > 5000 {
+                logger::log_info(&format!("Scraped {} events so far, continuing...", all_events.len())).await;
+            }
+
            let next = new_events.iter()
                .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
                .max()
@@ -121,22 +176,23 @@ pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<Chrom
            if next > end_clone { break; }
            current = next;
        }
+        
        Ok(all_events)
    });

-    // Use the pool for execution
    task.execute_with_pool(pool).await
 }

+/// Process a batch of events and detect changes
 pub fn process_batch(
    new_events: &[EconomicEvent],
-    existing: &mut std::collections::HashMap<String, EconomicEvent>,
+    existing: &mut HashMap<String, EconomicEvent>,
    today: &str,
 ) -> ScrapeResult {
    let mut changes = Vec::new();
    let mut removed = std::collections::HashSet::new();

-    let identity_map = build_identity_lookup(existing);
+    //let identity_map = build_identity_lookup(existing);
    let date_map = build_date_event_lookup(existing);

    for new in new_events {
--- a/src/economic/yahoo_update_forex.rs
+++ b/src/economic/yahoo_update_forex.rs
@@ -0,0 +1,477 @@
+// src/forex/update_forex.rs
+use crate::config::Config;
+use crate::util::directories::DataPaths;
+use crate::util::integrity::{DataStage, StateManager, directory_reference};
+use crate::util::logger;
+use crate::scraper::yahoo::{YahooClientPool};
+use crate::corporate::types::*;
+
+use std::result::Result::Ok;
+use chrono::{TimeZone, Utc};
+use std::collections::HashSet;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use tokio::fs::{OpenOptions};
+use tokio::io::{AsyncWriteExt};
+use futures::stream::{FuturesUnordered, StreamExt};
+use serde_json::json;
+use tokio::sync::mpsc;
+
+/// Currency information
+#[derive(Debug, Clone)]
+struct CurrencyPair {
+    code: String,           // e.g., "EUR", "JPY"
+    name: String,           // e.g., "Euro", "Japanese Yen"
+    yahoo_symbol: String,   // e.g., "USDEUR=X", "USDJPY=X"
+}
+
+impl CurrencyPair {
+    fn new(code: &str, name: &str) -> Self {
+        Self {
+            code: code.to_string(),
+            name: name.to_string(),
+            yahoo_symbol: format!("USD{}=X", code),
+        }
+    }
+}
+
+/// Get list of currency pairs to fetch (USD as base currency)
+fn get_currency_pairs() -> Vec<CurrencyPair> {
+    vec![
+        CurrencyPair::new("EUR", "Euro"),
+        CurrencyPair::new("TRY", "Turkish Lira"),
+        CurrencyPair::new("CHF", "Swiss Franc"),
+        CurrencyPair::new("SEK", "Swedish Krona"),
+        CurrencyPair::new("TWD", "New Taiwan Dollar"),
+        CurrencyPair::new("AUD", "Australian Dollar"),
+        CurrencyPair::new("GBP", "British Pound"),  // Fixed: GBp -> GBP
+        CurrencyPair::new("NOK", "Norwegian Krone"),
+        CurrencyPair::new("CAD", "Canadian Dollar"),
+        CurrencyPair::new("CZK", "Czech Koruna"),
+        CurrencyPair::new("SGD", "Singapore Dollar"),
+        CurrencyPair::new("ISK", "Icelandic Króna"),
+        CurrencyPair::new("ZAR", "South African Rand"),  // Fixed: ZAc -> ZAR
+        CurrencyPair::new("JPY", "Japanese Yen"),
+        CurrencyPair::new("PLN", "Polish Złoty"),
+        CurrencyPair::new("DKK", "Danish Krone"),
+        CurrencyPair::new("HKD", "Hong Kong Dollar"),
+        CurrencyPair::new("ILS", "Israeli Shekel"),  // Fixed: ILA -> ILS
+        CurrencyPair::new("RON", "Romanian Leu"),
+        CurrencyPair::new("KWD", "Kuwaiti Dinar"),   // Fixed: KWF -> KWD
+    ]
+}
+
+/// Yahoo Collect Foreign Exchange Charts WITH ABORT-SAFE INCREMENTAL PERSISTENCE
+/// 
+/// # Features
+/// - Graceful shutdown (abort-safe)
+/// - Task panic isolation (tasks fail independently)
+/// - Crash-safe persistence (checkpoint + log with fsync)
+/// - Smart skip logic (only process incomplete data)
+/// - Uses pending queue instead of retry mechanism
+/// 
+/// # Persistence Strategy
+/// - Checkpoint: fx_rates_collected.jsonl (atomic state)
+/// - Log: fx_rates_updates.log (append-only updates)
+/// - On restart: Load checkpoint + replay log
+/// - Periodic checkpoints (every 10 currencies)
+/// - Batched fsync (every 5 writes or 10 seconds)
+pub async fn collect_fx_rates(
+    paths: &DataPaths,
+    _config: &Config,
+    yahoo_pool: Arc<YahooClientPool>,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<usize> {
+    // Configuration constants
+    const CHECKPOINT_INTERVAL: usize = 10;
+    const FSYNC_BATCH_SIZE: usize = 5;
+    const FSYNC_INTERVAL_SECS: u64 = 10;
+    const CONCURRENCY_LIMIT: usize = 10; // Limit parallel fetch tasks
+    
+    let data_path = paths.data_dir();
+    
+    // File paths
+    let output_path = data_path.join("economic").join("currency");
+    let log_path = data_path.join("fx_rates_updates.log");
+    
+    let manager = StateManager::new(paths.integrity_dir()).await?;
+    let step_name = "yahoo_fx_rate_collection_completed";
+    let content_reference = directory_reference(&output_path,
+        Some(vec![
+            "*/chart/*.jsonl".to_string(),      // Main pattern for events data
+            "*/chart/data.jsonl".to_string(),   // Specific pattern (more precise)
+        ]),
+        Some(vec![
+            "*.log".to_string(),                 // Exclude log files
+            "*.tmp".to_string(),                 // Exclude temp files
+            "*.bak".to_string(),                 // Exclude backup files
+        ]),
+    );
+    
+    if manager.is_step_valid(step_name).await? {
+        logger::log_info("  FX rates collection already completed").await;
+        let count = count_collected_currencies(paths).await?;
+        logger::log_info(&format!("  ✓ Found {} currencies with chart data", count)).await;
+        return Ok(count);
+    }
+    let entry = manager.create_entry(
+        step_name.to_string(),
+        content_reference.clone(),
+        DataStage::Data,
+    ).await?;
+    
+    logger::log_info("  Updating missing forex data...").await;
+    
+    // === RECOVERY PHASE: Track collected currencies ===
+    let mut collected_currencies: HashSet<String> = HashSet::new();
+    
+    if log_path.exists() {
+        logger::log_info("Loading FX rates collection progress from log...").await;
+        let log_content = tokio::fs::read_to_string(&log_path).await?;
+        
+        for line in log_content.lines() {
+            if line.trim().is_empty() || !line.ends_with('}') {
+                continue; // Skip incomplete lines
+            }
+            
+            match serde_json::from_str::<serde_json::Value>(line) {
+                Ok(entry) => {
+                    if let Some(code) = entry.get("currency_code").and_then(|v| v.as_str()) {
+                        if entry.get("status").and_then(|v| v.as_str()) == Some("collected") {
+                            collected_currencies.insert(code.to_string());
+                        }
+                    }
+                }
+                Err(e) => {
+                    logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
+                }
+            }
+        }
+        logger::log_info(&format!("Loaded {} collected currencies from log", collected_currencies.len())).await;
+    }
+    
+    // Get all currency pairs
+    let currency_pairs = get_currency_pairs();
+    let total_currencies = currency_pairs.len();
+    logger::log_info(&format!("Found {} currency pairs to collect", total_currencies)).await;
+    
+    // Filter currencies that need collection
+    let pending_pairs: Vec<CurrencyPair> = currency_pairs
+        .into_iter()
+        .filter(|pair| !collected_currencies.contains(&pair.code))
+        .collect();
+    
+    let pending_count = pending_pairs.len();
+    logger::log_info(&format!(
+        "  {} already collected, {} pending",
+        collected_currencies.len(),
+        pending_count
+    )).await;
+    
+    if pending_count == 0 {
+        logger::log_info("  ✓ All currencies already collected").await;
+        manager.mark_valid(entry).await?;
+        return Ok(collected_currencies.len());
+    }
+    
+    // === PROCESSING PHASE: Collect FX rates ===
+    
+    // Shared counters
+    let processed_count = Arc::new(AtomicUsize::new(collected_currencies.len()));
+    let success_count = Arc::new(AtomicUsize::new(collected_currencies.len()));
+    let failed_count = Arc::new(AtomicUsize::new(0));
+    
+    // Log writer channel with batching and fsync
+    let (log_tx, mut log_rx) = mpsc::channel::<LogCommand>(1000);
+    
+    // Spawn log writer task
+    let log_writer_handle = {
+        let log_path = log_path.clone();
+        let processed_count = Arc::clone(&processed_count);
+        let total_currencies = total_currencies;
+        
+        tokio::spawn(async move {
+            let mut log_file = OpenOptions::new()
+                .create(true)
+                .append(true)
+                .open(&log_path)
+                .await
+                .expect("Failed to open log file");
+            
+            let mut write_count = 0;
+            let mut last_fsync = tokio::time::Instant::now();
+            
+            while let Some(cmd) = log_rx.recv().await {
+                match cmd {
+                    LogCommand::Write(entry) => {
+                        let json_line = serde_json::to_string(&entry).expect("Serialization failed");
+                        log_file.write_all(json_line.as_bytes()).await.expect("Write failed");
+                        log_file.write_all(b"\n").await.expect("Write failed");
+                        
+                        write_count += 1;
+                        
+                        // Batched fsync
+                        if write_count >= FSYNC_BATCH_SIZE 
+                            || last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS 
+                        {
+                            log_file.flush().await.expect("Flush failed");
+                            log_file.sync_all().await.expect("Fsync failed");
+                            write_count = 0;
+                            last_fsync = tokio::time::Instant::now();
+                        }
+                    }
+                    LogCommand::Checkpoint => {
+                        // Force fsync on checkpoint
+                        log_file.flush().await.expect("Flush failed");
+                        log_file.sync_all().await.expect("Fsync failed");
+                        write_count = 0;
+                        last_fsync = tokio::time::Instant::now();
+                        
+                        let current = processed_count.load(Ordering::SeqCst);
+                        logger::log_info(&format!(
+                            "  Checkpoint: {}/{} currencies processed",
+                            current, total_currencies
+                        )).await;
+                    }
+                    LogCommand::Shutdown => {
+                        // Final fsync before shutdown
+                        log_file.flush().await.expect("Flush failed");
+                        log_file.sync_all().await.expect("Fsync failed");
+                        break;
+                    }
+                }
+            }
+        })
+    };
+    
+    // Process currencies concurrently with task panic isolation
+    let mut tasks = FuturesUnordered::new();
+    let mut pending_iter = pending_pairs.into_iter();
+    let semaphore = Arc::new(tokio::sync::Semaphore::new(CONCURRENCY_LIMIT));
+    
+    // Initial batch of tasks
+    for _ in 0..CONCURRENCY_LIMIT.min(pending_count) {
+        if let Some(pair) = pending_iter.next() {
+            let task = spawn_collection_task(
+                pair,
+                Arc::clone(&yahoo_pool),
+                paths.clone(),
+                Arc::clone(&processed_count),
+                Arc::clone(&success_count),
+                Arc::clone(&failed_count),
+                log_tx.clone(),
+                Arc::clone(&semaphore),
+                Arc::clone(shutdown_flag),
+            );
+            tasks.push(task);
+        }
+    }
+    
+    // Process tasks as they complete and spawn new ones
+    let mut checkpoint_counter = 0;
+    while let Some(_result) = tasks.next().await {
+        // Check for shutdown
+        if shutdown_flag.load(Ordering::SeqCst) {
+            logger::log_warn("Shutdown signal received, stopping FX collection").await;
+            break;
+        }
+        
+        // Spawn new task if more pending
+        if let Some(pair) = pending_iter.next() {
+            let task = spawn_collection_task(
+                pair,
+                Arc::clone(&yahoo_pool),
+                paths.clone(),
+                Arc::clone(&processed_count),
+                Arc::clone(&success_count),
+                Arc::clone(&failed_count),
+                log_tx.clone(),
+                Arc::clone(&semaphore),
+                Arc::clone(shutdown_flag),
+            );
+            tasks.push(task);
+        }
+        
+        // Periodic checkpoint
+        checkpoint_counter += 1;
+        if checkpoint_counter % CHECKPOINT_INTERVAL == 0 {
+            let _ = log_tx.send(LogCommand::Checkpoint).await;
+        }
+    }
+    
+    // Signal shutdown to log writer
+    let _ = log_tx.send(LogCommand::Shutdown).await;
+    
+    // Wait for log writer to finish
+    let _ = log_writer_handle.await;
+    
+    // Final statistics
+    let final_success = success_count.load(Ordering::SeqCst);
+    let final_failed = failed_count.load(Ordering::SeqCst);
+    
+    logger::log_info(&format!(
+        "  FX collection complete: {} succeeded, {} failed",
+        final_success, final_failed
+    )).await;
+    
+    // Mark as complete if not shutdown
+    if !shutdown_flag.load(Ordering::SeqCst) {
+        manager.mark_valid(entry).await?;    
+    }    
+    Ok(final_success)
+}
+
+/// Spawn a collection task with panic isolation
+fn spawn_collection_task(
+    pair: CurrencyPair,
+    yahoo_pool: Arc<YahooClientPool>,
+    paths: DataPaths,
+    processed_count: Arc<AtomicUsize>,
+    success_count: Arc<AtomicUsize>,
+    failed_count: Arc<AtomicUsize>,
+    log_tx: mpsc::Sender<LogCommand>,
+    semaphore: Arc<tokio::sync::Semaphore>,
+    shutdown_flag: Arc<AtomicBool>,
+) -> tokio::task::JoinHandle<()> {
+    tokio::spawn(async move {
+        // Acquire semaphore permit
+        let _permit = semaphore.acquire().await.expect("Semaphore closed");
+        
+        // Check shutdown before processing
+        if shutdown_flag.load(Ordering::SeqCst) {
+            return;
+        }
+        
+        // Perform collection (panic-isolated)
+        let result = collect_currency_chart(&pair, &yahoo_pool, &paths).await;
+        
+        // Update counters
+        processed_count.fetch_add(1, Ordering::SeqCst);
+        
+        let status = match result {
+            Ok(_) => {
+                success_count.fetch_add(1, Ordering::SeqCst);
+                logger::log_info(&format!(
+                    "  ✓ Collected {} ({})",
+                    pair.code, pair.name
+                )).await;
+                "collected"
+            }
+            Err(e) => {
+                failed_count.fetch_add(1, Ordering::SeqCst);
+                logger::log_warn(&format!(
+                    "  ✗ Failed to collect {} ({}): {}",
+                    pair.code, pair.name, e
+                )).await;
+                "failed"
+            }
+        };
+        
+        // Log result
+        let log_entry = json!({
+            "currency_code": pair.code,
+            "currency_name": pair.name,
+            "yahoo_symbol": pair.yahoo_symbol,
+            "status": status,
+            "timestamp": Utc::now().to_rfc3339(),
+        });
+        
+        let _ = log_tx.send(LogCommand::Write(log_entry)).await;
+    })
+}
+
+/// Collect chart data for a single currency pair
+async fn collect_currency_chart(
+    pair: &CurrencyPair,
+    yahoo_pool: &Arc<YahooClientPool>,
+    paths: &DataPaths,
+) -> anyhow::Result<()> {
+    // Get historical data from year 2000 to now
+    let now = Utc::now().timestamp();
+    let start_2000 = Utc
+        .with_ymd_and_hms(2000, 1, 1, 0, 0, 0)
+        .unwrap()
+        .timestamp();
+    
+    // Fetch chart data from Yahoo
+    let chart_data = yahoo_pool.get_chart_data(
+        &pair.yahoo_symbol,
+        "1d",  // Daily interval
+        start_2000,
+        now,
+    ).await?;
+    
+    // Validate we got data
+    if chart_data.quotes.is_empty() {
+        return Err(anyhow::anyhow!(
+            "No chart data available for {} ({})",
+            pair.code,
+            pair.yahoo_symbol
+        ));
+    }
+    
+    // Save chart data to currency directory
+    save_currency_chart(paths, &pair.code, &chart_data).await?;
+    
+    Ok(())
+}
+
+/// Save currency chart data to filesystem
+async fn save_currency_chart(
+    paths: &DataPaths,
+    currency_code: &str,
+    chart_data: &ChartData,
+) -> anyhow::Result<()> {
+    use tokio::fs;
+    
+    // Create directory structure: data/economic/currency/{code}/chart/
+    let economic_dir = paths.data_dir().join("economic");
+    let currency_dir = economic_dir.join("currency").join(currency_code);
+    let chart_dir = currency_dir.join("chart");
+    
+    fs::create_dir_all(&chart_dir).await?;
+    
+    // Write chart data to data.jsonl
+    let data_path = chart_dir.join("data.jsonl");
+    let json_line = serde_json::to_string(chart_data)?;
+    
+    let mut file = fs::File::create(&data_path).await?;
+    file.write_all(json_line.as_bytes()).await?;
+    file.write_all(b"\n").await?;
+    file.flush().await?;
+    file.sync_all().await?; // Ensure data is persisted
+    
+    Ok(())
+}
+
+/// Count collected currencies (currencies with chart data)
+async fn count_collected_currencies(paths: &DataPaths) -> anyhow::Result<usize> {
+    let currency_dir = paths.data_dir().join("economic").join("currency");
+    
+    if !currency_dir.exists() {
+        return Ok(0);
+    }
+    
+    let mut count = 0;
+    let mut entries = tokio::fs::read_dir(&currency_dir).await?;
+    
+    while let Some(entry) = entries.next_entry().await? {
+        let path = entry.path();
+        if path.is_dir() {
+            let chart_file = path.join("chart").join("data.jsonl");
+            
+            if chart_file.exists() {
+                count += 1;
+            }
+        }
+    }
+    
+    Ok(count)
+}
+
+/// Log command enum
+enum LogCommand {
+    Write(serde_json::Value),
+    Checkpoint,
+    Shutdown,
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,4 +5,17 @@

 pub mod config;
 pub mod scraper;
-pub mod util;
+pub mod util;
+pub mod monitoring;
+pub mod economic;
+pub mod corporate;
+
+// Re-export commonly used types for convenience
+pub use monitoring::{init_monitoring, ConfigSnapshot, MonitoringEvent};
+pub use config::Config;
+pub use scraper::webdriver::{ChromeDriverPool, ChromeInstance, ScrapeTask};
+pub use util::logger;
+pub use util::macros;
+pub use scraper::yahoo::{
+    YahooClient, YahooClientPool, QuoteSummaryModule, QuoteSummary, SearchResult
+};
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,79 +1,359 @@
-// src/main.rs
-mod config;
-mod corporate;
-mod economic;
-mod util;
-mod scraper;
-
-use anyhow::Result;
-use config::Config;
+use web_scraper::util::integrity::StateManager;
+// src/main.rs - Cleaned up version with extracted helpers
+use web_scraper::{*, scraper, corporate};
+use crate::check_shutdown;
+use anyhow::{Result};
+use web_scraper::config::Config;
+use scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers};
 use scraper::webdriver::ChromeDriverPool;
 use util::directories::DataPaths;
 use util::{logger, opnv};
+use std::fs::{OpenOptions};
 use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::process::Command;
+use std::time::{Duration, Instant};

-/// The entry point of the application.
-///
-/// This function loads the configuration, initializes a shared ChromeDriver pool,
-/// fetches the latest VPNBook OpenVPN configurations if VPN rotation is enabled,
-/// and sequentially runs the full updates for corporate and economic data.
-/// Sequential execution helps prevent resource exhaustion from concurrent
-/// chromedriver instances and avoids spamming the target websites with too many requests.
-///
-/// # Errors
-///
-/// Returns an error if configuration loading fails, pool initialization fails,
-/// VPN fetching fails (if enabled), or if either update function encounters an issue
-/// (e.g., network errors, scraping failures, or chromedriver spawn failures like "program not found").
-#[tokio::main]
-async fn main() -> Result<()> {
-    let config = Config::load().map_err(|err| {
-        println!("Failed to load Config .env: {}", err);
-        err
-    })?;
+// ============================================================================
+// HELPER FUNCTIONS - Extracted to reduce duplication
+// ============================================================================

-    // Initialize paths
-    let paths = DataPaths::new(".")?;
-    
-    // Initialize logger
-    logger::init_debug_logger(paths.logs_dir()).await.map_err(|e| {
-        anyhow::anyhow!("Logger initialization failed: {}", e)
-    })?;
+/// Start Docker Desktop on Windows
+async fn start_docker_desktop() {
+    if cfg!(target_os = "windows") {
+        let _ = Command::new("cmd")
+            .args(["/C", "docker desktop start"])
+            .output();
+    }
+}

-    logger::log_info("=== Application started ===").await;
-    logger::log_info(&format!("Config: economic_start_date={}, corporate_start_date={}, lookahead_months={}, max_parallel_instances={}, enable_vpn_rotation={}", 
-        config.economic_start_date, config.corporate_start_date, config.economic_lookahead_months, config.max_parallel_instances, config.enable_vpn_rotation)).await;
+/// Shutdown ChromeDriver pool with error handling
+async fn shutdown_chrome_pool(pool: &ChromeDriverPool) {
+    logger::log_info("Shutting down ChromeDriver pool...").await;
+    match pool.shutdown().await {
+        Ok(()) => logger::log_info("✓ ChromeDriver pool shut down successfully").await,
+        Err(e) => logger::log_error(&format!("✗ Pool shutdown error: {}", e)).await,
+    }
+}

-    // Initialize the shared ChromeDriver pool once
-    let pool_size = config.max_parallel_instances;
-    logger::log_info(&format!("Initializing ChromeDriver pool with size: {}", pool_size)).await;
-    
-    let pool = Arc::new(ChromeDriverPool::new(pool_size).await?);
-    logger::log_info("✓ ChromeDriver pool initialized successfully").await;
+/// Shutdown Docker VPN proxy pool with error handling
+async fn shutdown_proxy_pool(proxy_pool: &DockerVpnProxyPool) {
+    logger::log_info("Stopping Docker VPN proxy containers...").await;
+    match proxy_pool.shutdown().await {
+        Ok(()) => logger::log_info("✓ All Docker VPN containers stopped").await,
+        Err(e) => logger::log_error(&format!("✗ Proxy shutdown error: {}", e)).await,
+    }
+}

-    // Fetch VPNBook configs if VPN rotation is enabled
-    if config.enable_vpn_rotation {
-        logger::log_info("--- Fetching latest VPNBook OpenVPN configurations ---").await;
-        let (username, password, files) = 
-            util::opnv::fetch_vpnbook_configs(&pool, paths.cache_dir()).await?;
-        logger::log_info(&format!("Fetched VPN username: {}, password: {}", username, password)).await;
-        for file in &files {
-            logger::log_info(&format!("Extracted OVPN: {:?}", file)).await;
+/// Force-kill Chrome and ChromeDriver processes (Windows only)
+#[cfg(target_os = "windows")]
+async fn force_kill_chrome_processes() {
+    logger::log_info("Force-killing any remaining Chrome processes...").await;
+    let _ = tokio::process::Command::new("taskkill")
+        .args(["/F", "/IM", "chrome.exe"])
+        .output()
+        .await;
+    let _ = tokio::process::Command::new("taskkill")
+        .args(["/F", "/IM", "chromedriver.exe"])
+        .output()
+        .await;
+}
+
+#[cfg(not(target_os = "windows"))]
+async fn force_kill_chrome_processes() {
+    // No-op on non-Windows platforms
+}
+
+/// Verify Chrome processes are cleaned up (Windows only)
+#[cfg(target_os = "windows")]
+async fn verify_chrome_cleanup() {
+    if let Ok(output) = tokio::process::Command::new("tasklist")
+        .args(["/FI", "IMAGENAME eq chrome.exe"])
+        .output()
+        .await
+    {
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
+        
+        if chrome_count > 0 {
+            logger::log_warn(&format!("⚠️ {} Chrome processes still running after cleanup!", chrome_count)).await;
+        } else {
+            logger::log_info("✓ All Chrome processes cleaned up").await;
+        }
+    }
+}
+
+#[cfg(not(target_os = "windows"))]
+async fn verify_chrome_cleanup() {
+    // No-op on non-Windows platforms
+}
+
+/// Complete cleanup sequence: shutdown pools, cleanup containers, kill processes
+async fn perform_full_cleanup(
+    pool: &ChromeDriverPool,
+    proxy_pool: Option<&DockerVpnProxyPool>,
+) {
+    shutdown_chrome_pool(pool).await;
+    
+    if let Some(pp) = proxy_pool {
+        shutdown_proxy_pool(pp).await;
+        cleanup_all_proxy_containers().await.ok();
+    }
+    
+    force_kill_chrome_processes().await;
+}
+
+/// Create temporary ChromeDriver pool, fetch VPN credentials, and cleanup
+async fn fetch_vpn_credentials_with_temp_pool(
+    config: &Config,
+    paths: &DataPaths,
+    monitoring_handle: &monitoring::MonitoringHandle,
+) -> Result<Option<Arc<DockerVpnProxyPool>>> {
+    logger::log_info("VPN Rotation Enabled – Fetching latest VPNBook configs").await;
+    
+    // Create temp pool
+    logger::log_info("Creating temporary ChromeDriver pool for VPN credential fetch...").await;
+    let temp_pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(
+        None, 
+        config, 
+        Some(monitoring_handle.clone())
+    ).await?);
+    
+    // Fetch credentials
+    logger::log_info("Fetching VPNBook credentials...").await;
+    let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?;
+    logger::log_info(&format!("VPNBook credentials → User: {}", username)).await;
+
+    // Cleanup temp pool
+    logger::log_info("Shutting down temporary pool...").await;
+    match temp_pool.shutdown().await {
+        Ok(()) => logger::log_info("✓ Temporary pool shut down successfully").await,
+        Err(e) => {
+            logger::log_error(&format!("✗ Temp pool shutdown error: {}", e)).await;
+            force_kill_chrome_processes().await;
+        }
+    }
+    
+    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
+    
+    // Count VPN servers and create proxy pool
+    let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
+        .filter(|e| e.as_ref().unwrap().path().is_dir())
+        .count();
+
+    if server_count == 0 {
+        logger::log_warn("No VPN servers found – continuing without VPN").await;
+        return Ok(None);
+    }
+    
+    logger::log_info(&format!("Found {} VPN servers – starting Docker proxy containers", server_count)).await;
+    
+    let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
+    let proxy_pool = Arc::new(DockerVpnProxyPool::new(
+        paths.cache_openvpn_dir(),
+        username,
+        password,
+        number_proxy_instances
+    ).await?);
+
+    logger::log_info(&format!("All {} Docker proxy containers started and ready", proxy_pool.num_proxies())).await;
+    
+    // Emit proxy connection events
+    for i in 0..proxy_pool.num_proxies() {
+        if let Some(proxy_info) = proxy_pool.get_proxy_info(i) {
+            monitoring_handle.emit(monitoring::MonitoringEvent::ProxyConnected {
+                container_name: proxy_info.container_name.clone(),
+                ip_address: proxy_info.ip_address.clone(),
+                port: proxy_info.port,
+            });
        }
-        // Optionally, store username/password for rotation use (e.g., in a file or global state)
-        // For now, just log them; extend as needed for rotation integration
    }

-    // Run economic update first, passing the shared pool
-    logger::log_info("--- Starting economic data update ---").await;
-    economic::run_full_update(&config, &pool).await?;
-    logger::log_info("✓ Economic data update completed").await;
+    Ok(Some(proxy_pool))
+}

-    // Then run corporate update, passing the shared pool
-    logger::log_info("--- Starting corporate data update ---").await;
-    corporate::run_full_update(&config, &pool).await?;
-    logger::log_info("✓ Corporate data update completed").await;
+/// Initialize monitoring system
+async fn initialize_monitoring(
+    config: &Config,
+    paths: &DataPaths,
+) -> Result<(monitoring::MonitoringHandle, tokio::task::JoinHandle<()>)> {
+    let config_snapshot = ConfigSnapshot {
+        max_parallel_instances: config.max_parallel_instances,
+        max_tasks_per_instance: config.max_tasks_per_instance,
+        enable_vpn_rotation: config.enable_vpn_rotation,
+        max_requests_per_session: config.max_requests_per_session,
+        min_request_interval_ms: config.min_request_interval_ms,
+        max_retry_attempts: config.max_retry_attempts,
+    };
+
+    let (monitoring_handle, monitoring_task) = init_monitoring(
+        config_snapshot,
+        paths.logs_dir().to_path_buf(),
+        3030,
+    ).await?;
+
+    monitoring_handle.emit(monitoring::MonitoringEvent::PoolInitialized {
+        pool_size: config.max_parallel_instances,
+        with_proxy: config.enable_vpn_rotation,
+        with_rotation: config.max_tasks_per_instance > 0,
+    });
+
+    logger::log_info("Monitoring dashboard available at http://localhost:3030").await;
+    
+    Ok((monitoring_handle, monitoring_task))
+}
+
+/// Setup Ctrl+C handler for graceful shutdown
+fn setup_shutdown_handler(
+    shutdown_flag: Arc<AtomicBool>,
+    pool: Arc<ChromeDriverPool>,
+    proxy_pool: Option<Arc<DockerVpnProxyPool>>,
+) {
+    tokio::spawn(async move {
+        tokio::signal::ctrl_c().await.ok();
+        logger::log_info("Ctrl+C received – shutting down gracefully...").await;
+        
+        shutdown_flag.store(true, Ordering::SeqCst);
+        tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
+
+        perform_full_cleanup(&pool, proxy_pool.as_deref()).await;
+        
+        logger::log_info("Shutdown complete").await;
+        std::process::exit(0);
+    });
+}
+
+fn format_duration(duration: Duration) -> String {
+    let total_seconds = duration.as_secs();
+    
+    let days = total_seconds / 86400;
+    let hours = (total_seconds % 86400) / 3600;
+    let minutes = (total_seconds % 3600) / 60;
+    let seconds = total_seconds % 60;
+    
+    format!("{:02}::{:02}::{:02}::{:02}", days, hours, minutes, seconds)
+}
+
+async fn create_state_file(paths: &DataPaths) -> Result<()> {
+    let integrity_path = paths.integrity_dir().join("state.jsonl");
+
+    // Use OpenOptions to create the file only if it doesn't exist
+    OpenOptions::new()
+        .create(true)  // Create if it doesn't exist
+        .write(true)   // Ensure we can write to the file
+        .open(&integrity_path)?;
+    logger::log_info(&format!("Checked or created file: {}", integrity_path.display())).await;
+
+    Ok(())
+}
+
+async fn visualize_checkpoint_dependencies(paths: &DataPaths) -> Result<()> {
+    // Add more detailed error handling
+    match StateManager::new(
+        paths.integrity_dir(),
+    ).await {
+        Ok(manager) => {
+            logger::log_info("✓ Dependency configuration loaded successfully").await;
+            manager.print_dependency_graph();
+            
+            let dot = manager.get_dependency_config().to_dot();
+            let dot_path = paths.integrity_dir().join("checkpoint_dependencies.dot");
+            std::fs::write(&dot_path, dot)?;
+            
+            logger::log_info(&format!("✓ DOT file written to: {}", dot_path.display())).await;
+            Ok(())
+        }
+        Err(e) => {
+            logger::log_error(&format!("✗ Failed to load dependency config: {}", e)).await;
+            Err(e)
+        }
+    }
+}
+
+// ============================================================================
+// MAIN FUNCTION - Simplified with extracted helpers
+// ============================================================================
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    // Initial setup
+    let start = Instant::now();
+    let paths = DataPaths::new(".")?;
+
+    start_docker_desktop().await;
+    cleanup_all_proxy_containers().await.ok();
+    create_state_file(&paths).await.ok();
+    visualize_checkpoint_dependencies(&paths).await.ok();
+
+    let config = Config::load().unwrap_or_else(|_| {
+        eprintln!("Using default configuration");
+        Config::default()
+    });
+
+    // Initialize monitoring
+    let (monitoring_handle, _monitoring_task) = initialize_monitoring(&config, &paths).await?;
+
+    // Initialize debug logger
+    logger::init_debug_logger(paths.logs_dir()).await.ok();
+    logger::log_info("=== Economic Webscraper Started ===").await;
+    logger::log_info(&format!(
+        "Config → parallel_instances: {}, task_limit: {}, vpn_rotation: {}, proxy_instances_per_certificate: {:?}",
+        config.max_parallel_instances,
+        config.max_tasks_per_instance,
+        config.enable_vpn_rotation,
+        config.proxy_instances_per_certificate
+    )).await;
+
+    let shutdown_flag = Arc::new(AtomicBool::new(false));
+
+    // Fetch VPN credentials and setup proxy pool if enabled
+    let proxy_pool = if config.enable_vpn_rotation {
+        fetch_vpn_credentials_with_temp_pool(&config, &paths, &monitoring_handle).await?
+    } else {
+        logger::log_info("VPN rotation disabled – using direct connection").await;
+        None
+    };
+
+    // Create main ChromeDriver pool
+    logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", config.max_parallel_instances)).await;
+    
+    let pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(
+        proxy_pool.clone(),
+        &config,
+        Some(monitoring_handle.clone())
+    ).await?);
+
+    logger::log_info(&format!("ChromeDriver pool ready with {} instances", config.max_parallel_instances)).await;
+
+    // Setup Ctrl+C handler
+    setup_shutdown_handler(
+        Arc::clone(&shutdown_flag),
+        Arc::clone(&pool),
+        proxy_pool.clone(),
+    );
+    
+    // Run scraping jobs
+    check_shutdown!(&shutdown_flag);
+
+    logger::log_info("--- Starting ECONOMIC data update ---").await;
+    economic::run_full_update(&config, &pool, &shutdown_flag).await?;
+    logger::log_info("Economic update completed").await;
+
+    check_shutdown!(&shutdown_flag);
+
+    logger::log_info("--- Starting CORPORATE data update ---").await;
+    corporate::run_full_update(&config, &pool, &shutdown_flag).await?;
+    logger::log_info("Corporate update completed").await;
+
+    check_shutdown!(&shutdown_flag);
+
+    // Final cleanup if not already shutting down
+    perform_full_cleanup(&pool, proxy_pool.as_deref()).await;
+    verify_chrome_cleanup().await;
+
+    logger::log_info(&format!("=== Application finished after {} ===", format_duration(start.elapsed()))).await;    
+
+    logger::log_info("=== Application finished successfully ===").await;    

-    logger::log_info("=== Application completed successfully ===").await;
    Ok(())
 }
--- a/src/monitoring/dashboard.html
+++ b/src/monitoring/dashboard.html
@@ -0,0 +1,830 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Scraper Monitoring Dashboard</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: 'Courier New', monospace;
+            background: #1a1a1a;
+            color: #f0f0f0;
+            padding: 20px;
+            font-size: 13px;
+        }
+
+        .header {
+            text-align: center;
+            padding: 20px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            border-radius: 8px;
+            margin-bottom: 20px;
+        }
+
+        .header h1 {
+            font-size: 28px;
+            margin-bottom: 5px;
+        }
+
+        .header .uptime {
+            font-size: 14px;
+            opacity: 0.9;
+        }
+
+        .section {
+            background: #2a2a2a;
+            border: 2px solid #444;
+            padding: 15px;
+            margin-bottom: 20px;
+            border-radius: 5px;
+        }
+
+        .section-title {
+            font-size: 16px;
+            font-weight: bold;
+            margin-bottom: 12px;
+            padding-bottom: 8px;
+            border-bottom: 2px solid #667eea;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        /* Config Section */
+        .config-grid {
+            display: grid;
+            grid-template-columns: repeat(3, 1fr);
+            gap: 15px;
+        }
+
+        .config-item {
+            background: #333;
+            padding: 12px;
+            border-radius: 4px;
+            border-left: 3px solid #667eea;
+        }
+
+        .config-label {
+            color: #888;
+            font-size: 11px;
+            text-transform: uppercase;
+            margin-bottom: 5px;
+        }
+
+        .config-value {
+            color: #4CAF50;
+            font-size: 18px;
+            font-weight: bold;
+        }
+
+        /* Instance Grid */
+        .instance-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(450px, 1fr));
+            gap: 15px;
+            margin-top: 10px;
+        }
+
+        .instance-box {
+            background: #333;
+            border: 2px solid #555;
+            border-radius: 5px;
+            padding: 0;
+            display: flex;
+            gap: 0;
+            overflow: hidden;
+            transition: border-color 0.3s;
+        }
+
+        .instance-box.status-idle {
+            border-color: #666;
+        }
+
+        .instance-box.status-active {
+            border-color: #4CAF50;
+            box-shadow: 0 0 10px rgba(76, 175, 80, 0.3);
+        }
+
+        .instance-box.status-renewing {
+            border-color: #FF9800;
+            box-shadow: 0 0 10px rgba(255, 152, 0, 0.3);
+        }
+
+        .instance-box.status-error {
+            border-color: #f44336;
+            box-shadow: 0 0 10px rgba(244, 67, 54, 0.3);
+        }
+
+        .instance-side,
+        .proxy-side {
+            flex: 1;
+            padding: 12px;
+        }
+
+        .instance-side {
+            background: #3a3a3a;
+            border-right: 1px solid #555;
+        }
+
+        .proxy-side {
+            background: #2a3a4a;
+        }
+
+        .side-header {
+            font-weight: bold;
+            font-size: 14px;
+            margin-bottom: 10px;
+            padding-bottom: 5px;
+            border-bottom: 1px solid #555;
+            display: flex;
+            align-items: center;
+            gap: 5px;
+        }
+
+        .status-badge {
+            display: inline-block;
+            padding: 2px 8px;
+            border-radius: 3px;
+            font-size: 11px;
+            font-weight: bold;
+            text-transform: uppercase;
+        }
+
+        .status-badge.idle {
+            background: #666;
+            color: #fff;
+        }
+
+        .status-badge.active {
+            background: #4CAF50;
+            color: #fff;
+        }
+
+        .status-badge.renewing {
+            background: #FF9800;
+            color: #fff;
+        }
+
+        .status-badge.error {
+            background: #f44336;
+            color: #fff;
+        }
+
+        .metric-row {
+            display: flex;
+            justify-content: space-between;
+            padding: 4px 0;
+            font-size: 12px;
+            border-bottom: 1px solid #444;
+        }
+
+        .metric-row:last-child {
+            border-bottom: none;
+        }
+
+        .metric-label {
+            color: #888;
+        }
+
+        .metric-value {
+            color: #4CAF50;
+            font-weight: bold;
+        }
+
+        .metric-value.warning {
+            color: #FF9800;
+        }
+
+        .metric-value.danger {
+            color: #f44336;
+        }
+
+        .current-url {
+            margin-top: 8px;
+            padding-top: 8px;
+            border-top: 1px solid #555;
+            font-size: 11px;
+            color: #aaa;
+            word-wrap: break-word;
+        }
+
+        .no-proxy {
+            text-align: center;
+            color: #666;
+            padding: 30px 10px;
+            font-style: italic;
+        }
+
+        /* Global Stats */
+        .stats-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+            gap: 12px;
+            margin-top: 10px;
+        }
+
+        .stat-box {
+            background: #333;
+            padding: 15px;
+            border-radius: 5px;
+            text-align: center;
+            border-left: 4px solid #667eea;
+        }
+
+        .stat-value {
+            font-size: 28px;
+            font-weight: bold;
+            color: #4CAF50;
+            margin-bottom: 5px;
+        }
+
+        .stat-label {
+            font-size: 11px;
+            color: #888;
+            text-transform: uppercase;
+        }
+
+        /* Yahoo Stats */
+        .yahoo-stats-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+            gap: 12px;
+            margin-top: 10px;
+        }
+
+        .yahoo-stat-box {
+            background: #2a3a4a;
+            padding: 15px;
+            border-radius: 5px;
+            text-align: center;
+            border-left: 4px solid #FF9800;
+        }
+
+        .yahoo-stat-value {
+            font-size: 28px;
+            font-weight: bold;
+            color: #FF9800;
+            margin-bottom: 5px;
+        }
+
+        .yahoo-stat-label {
+            font-size: 11px;
+            color: #aaa;
+            text-transform: uppercase;
+        }
+
+        /* Logs */
+        .log-container {
+            max-height: 300px;
+            overflow-y: auto;
+            background: #1a1a1a;
+            padding: 10px;
+            border-radius: 4px;
+            font-size: 12px;
+        }
+
+        .log-container::-webkit-scrollbar {
+            width: 8px;
+        }
+
+        .log-container::-webkit-scrollbar-track {
+            background: #2a2a2a;
+        }
+
+        .log-container::-webkit-scrollbar-thumb {
+            background: #667eea;
+            border-radius: 4px;
+        }
+
+        .log-entry {
+            padding: 4px 0;
+            border-bottom: 1px solid #333;
+            display: flex;
+            gap: 10px;
+        }
+
+        .log-entry:last-child {
+            border-bottom: none;
+        }
+
+        .log-time {
+            color: #666;
+            font-weight: bold;
+            min-width: 70px;
+        }
+
+        .log-message {
+            flex: 1;
+        }
+
+        .log-message.info {
+            color: #4CAF50;
+        }
+
+        .log-message.warn {
+            color: #FF9800;
+        }
+
+        .log-message.error {
+            color: #f44336;
+        }
+
+        .connection-status {
+            position: fixed;
+            top: 20px;
+            right: 20px;
+            padding: 8px 15px;
+            border-radius: 20px;
+            font-size: 12px;
+            font-weight: bold;
+            z-index: 1000;
+        }
+
+        .connection-status.connected {
+            background: #4CAF50;
+            color: white;
+        }
+
+        .connection-status.disconnected {
+            background: #f44336;
+            color: white;
+        }
+
+        @keyframes pulse {
+            0%, 100% {
+                opacity: 1;
+            }
+            50% {
+                opacity: 0.5;
+            }
+        }
+
+        .pulse {
+            animation: pulse 2s infinite;
+        }
+
+        /* Yahoo Client Box */
+        .yahoo-client-box {
+            background: #2a3a4a;
+            border: 2px solid #FF9800;
+            border-radius: 5px;
+            padding: 12px;
+            display: flex;
+            gap: 0;
+            overflow: hidden;
+        }
+
+        .yahoo-client-side {
+            flex: 1;
+            padding: 12px;
+        }
+
+        .yahoo-client-side.left {
+            background: #3a4a5a;
+            border-right: 1px solid #555;
+        }
+
+        .yahoo-client-side.right {
+            background: #2a3a4a;
+        }
+    </style>
+</head>
+<body>
+    <div class="connection-status" id="connection-status">
+        Connecting...
+    </div>
+
+    <div class="header">
+        <h1>🚀 Scraper Monitoring Dashboard</h1>
+        <div class="uptime" id="uptime">Uptime: Loading...</div>
+    </div>
+
+    <!-- Config Section -->
+    <div class="section">
+        <div class="section-title">⚙️ CONFIGURATION</div>
+        <div class="config-grid" id="config"></div>
+    </div>
+
+    <!-- Pool Status Section -->
+    <div class="section">
+        <div class="section-title">🔧 POOL STATUS</div>
+        <div class="instance-grid" id="instances"></div>
+    </div>
+
+    <!-- Yahoo API Section -->
+    <div class="section">
+        <div class="section-title">📈 YAHOO API METRICS</div>
+        <div class="yahoo-stats-grid" id="yahoo-stats"></div>
+        <div class="instance-grid" id="yahoo-clients"></div>
+    </div>
+
+    <!-- Global Metrics Section -->
+    <div class="section">
+        <div class="section-title">📊 GLOBAL METRICS</div>
+        <div class="stats-grid" id="global-stats"></div>
+    </div>
+
+    <!-- Logs Section -->
+    <div class="section">
+        <div class="section-title">📝 RECENT LOGS</div>
+        <div class="log-container" id="logs"></div>
+    </div>
+
+    <script>
+        let ws = null;
+        let reconnectInterval = null;
+
+        function connect() {
+            ws = new WebSocket('ws://' + window.location.host + '/ws');
+
+            ws.onopen = () => {
+                console.log('WebSocket connected');
+                updateConnectionStatus(true);
+                if (reconnectInterval) {
+                    clearInterval(reconnectInterval);
+                    reconnectInterval = null;
+                }
+            };
+
+            ws.onmessage = (event) => {
+                try {
+                    const state = JSON.parse(event.data);
+                    updateDashboard(state);
+                } catch (error) {
+                    console.error('Failed to parse message:', error);
+                }
+            };
+
+            ws.onclose = () => {
+                console.log('WebSocket disconnected');
+                updateConnectionStatus(false);
+                // Attempt to reconnect every 3 seconds
+                if (!reconnectInterval) {
+                    reconnectInterval = setInterval(() => {
+                        console.log('Attempting to reconnect...');
+                        connect();
+                    }, 3000);
+                }
+            };
+
+            ws.onerror = (error) => {
+                console.error('WebSocket error:', error);
+            };
+        }
+
+        function updateConnectionStatus(connected) {
+            const status = document.getElementById('connection-status');
+            if (connected) {
+                status.textContent = '● Connected';
+                status.className = 'connection-status connected';
+            } else {
+                status.textContent = '● Disconnected';
+                status.className = 'connection-status disconnected pulse';
+            }
+        }
+
+        function updateDashboard(state) {
+            updateConfig(state.config);
+            updateInstances(state.instances);
+            updateGlobalStats(state.global);
+            updateYahooStats(state.global);
+            updateYahooClients(state.yahoo_clients);
+            updateLogs(state.logs);
+        }
+
+        function updateConfig(config) {
+            const container = document.getElementById('config');
+            container.innerHTML = `
+                <div class="config-item">
+                    <div class="config-label">Parallel Instances</div>
+                    <div class="config-value">${config.max_parallel_instances}</div>
+                </div>
+                <div class="config-item">
+                    <div class="config-label">Tasks per Instance</div>
+                    <div class="config-value">${config.max_tasks_per_instance || 'Unlimited'}</div>
+                </div>
+                <div class="config-item">
+                    <div class="config-label">VPN Rotation</div>
+                    <div class="config-value">${config.enable_vpn_rotation ? '✓ Enabled' : '✗ Disabled'}</div>
+                </div>
+                <div class="config-item">
+                    <div class="config-label">Requests per Session</div>
+                    <div class="config-value">${config.max_requests_per_session}</div>
+                </div>
+                <div class="config-item">
+                    <div class="config-label">Min Request Interval</div>
+                    <div class="config-value">${config.min_request_interval_ms}ms</div>
+                </div>
+                <div class="config-item">
+                    <div class="config-label">Max Retry Attempts</div>
+                    <div class="config-value">${config.max_retry_attempts}</div>
+                </div>
+            `;
+        }
+
+        function updateInstances(instances) {
+            const container = document.getElementById('instances');
+            if (!instances || instances.length === 0) {
+                container.innerHTML = '<div style="text-align: center; padding: 40px; color: #666;">No instances available</div>';
+                return;
+            }
+
+            container.innerHTML = instances.map(inst => {
+                const statusClass = `status-${inst.status}`;
+                const proxy = inst.connected_proxy;
+
+                const successRate = inst.total_requests > 0 
+                    ? ((inst.success_count / inst.total_requests) * 100).toFixed(1)
+                    : '0.0';
+
+                const yahooSuccessRate = inst.yahoo_requests > 0
+                    ? ((inst.yahoo_success / inst.yahoo_requests) * 100).toFixed(1)
+                    : '0.0';
+
+                return `
+                    <div class="instance-box ${statusClass}">
+                        <div class="instance-side">
+                            <div class="side-header">
+                                🖥️ Instance #${inst.id}
+                                <span class="status-badge ${inst.status}">${inst.status}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Current Tasks</span>
+                                <span class="metric-value ${inst.tasks_current_session >= inst.tasks_max ? 'warning' : ''}">
+                                    ${inst.tasks_current_session}/${inst.tasks_max}
+                                </span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Session Requests</span>
+                                <span class="metric-value">${inst.session_requests}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Total Requests</span>
+                                <span class="metric-value">${inst.total_requests}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Success / Fail</span>
+                                <span class="metric-value">${inst.success_count} / ${inst.failure_count}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Success Rate</span>
+                                <span class="metric-value ${successRate < 50 ? 'danger' : successRate < 80 ? 'warning' : ''}">
+                                    ${successRate}%
+                                </span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Yahoo Requests</span>
+                                <span class="metric-value">${inst.yahoo_requests}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Yahoo Rate</span>
+                                <span class="metric-value ${yahooSuccessRate < 50 ? 'danger' : yahooSuccessRate < 80 ? 'warning' : ''}">
+                                    ${yahooSuccessRate}%
+                                </span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Last Activity</span>
+                                <span class="metric-value">${inst.last_activity}</span>
+                            </div>
+                            ${inst.current_task ? `
+                            <div class="current-url">
+                                <strong>Current URL:</strong><br>
+                                ${escapeHtml(inst.current_task)}
+                            </div>
+                            ` : ''}
+                        </div>
+
+                        ${proxy ? `
+                        <div class="proxy-side">
+                            <div class="side-header">
+                                📡 ${proxy.container_name}
+                                <span class="status-badge ${proxy.status}">${proxy.status}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">IP Address</span>
+                                <span class="metric-value">${proxy.ip_address}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Port</span>
+                                <span class="metric-value">${proxy.port}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Status</span>
+                                <span class="metric-value">${proxy.status}</span>
+                            </div>
+                        </div>
+                        ` : `
+                        <div class="proxy-side">
+                            <div class="no-proxy">
+                                🌐<br>
+                                Direct Connection<br>
+                                (No Proxy)
+                            </div>
+                        </div>
+                        `}
+                    </div>
+                `;
+            }).join('');
+        }
+
+        function updateYahooStats(global) {
+            const container = document.getElementById('yahoo-stats');
+            const yahooSuccessRate = global.total_yahoo_requests > 0 
+                ? ((global.successful_yahoo_requests / global.total_yahoo_requests) * 100).toFixed(1)
+                : '0.0';
+            
+            container.innerHTML = `
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${global.total_yahoo_requests || 0}</div>
+                    <div class="yahoo-stat-label">Total Requests</div>
+                </div>
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${yahooSuccessRate}%</div>
+                    <div class="yahoo-stat-label">Success Rate</div>
+                </div>
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${global.successful_yahoo_requests || 0}</div>
+                    <div class="yahoo-stat-label">Successful</div>
+                </div>
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${global.failed_yahoo_requests || 0}</div>
+                    <div class="yahoo-stat-label">Failed</div>
+                </div>
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${global.yahoo_client_count || 0}</div>
+                    <div class="yahoo-stat-label">Active Clients</div>
+                </div>
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${global.yahoo_batch_requests || 0}</div>
+                    <div class="yahoo-stat-label">Batch Requests</div>
+                </div>
+                <div class="yahoo-stat-box">
+                    <div class="yahoo-stat-value">${global.yahoo_session_renewals || 0}</div>
+                    <div class="yahoo-stat-label">Session Renewals</div>
+                </div>
+            `;
+        }
+
+        function updateYahooClients(yahooClients) {
+            const container = document.getElementById('yahoo-clients');
+            if (!yahooClients || yahooClients.length === 0) {
+                container.innerHTML = '<div style="text-align: center; padding: 40px; color: #666;">No Yahoo clients available</div>';
+                return;
+            }
+            
+            container.innerHTML = yahooClients.map(client => {
+                const successRate = client.requests_total > 0 
+                    ? ((client.requests_successful / client.requests_total) * 100).toFixed(1)
+                    : '0.0';
+                    
+                return `
+                    <div class="yahoo-client-box">
+                        <div class="yahoo-client-side left">
+                            <div class="side-header">
+                                📊 Yahoo Client #${client.instance_id}
+                                ${client.has_proxy ? '🔗' : '🌐'}
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Total Requests</span>
+                                <span class="metric-value">${client.requests_total}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Success / Fail</span>
+                                <span class="metric-value">${client.requests_successful} / ${client.requests_failed}</span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Success Rate</span>
+                                <span class="metric-value ${successRate < 50 ? 'danger' : successRate < 80 ? 'warning' : ''}">
+                                    ${successRate}%
+                                </span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Current / Max</span>
+                                <span class="metric-value ${client.current_requests >= client.max_requests ? 'danger' : ''}">
+                                    ${client.current_requests} / ${client.max_requests}
+                                </span>
+                            </div>
+                            <div class="metric-row">
+                                <span class="metric-label">Last Activity</span>
+                                <span class="metric-value">${client.last_activity}</span>
+                            </div>
+                        </div>
+                        <div class="yahoo-client-side right">
+                            ${client.proxy_info ? `
+                                <div class="side-header">🔗 ${client.proxy_info.container_name}</div>
+                                <div class="metric-row">
+                                    <span class="metric-label">IP Address</span>
+                                    <span class="metric-value">${client.proxy_info.ip_address}</span>
+                                </div>
+                                <div class="metric-row">
+                                    <span class="metric-label">Port</span>
+                                    <span class="metric-value">${client.proxy_info.port}</span>
+                                </div>
+                                <div class="metric-row">
+                                    <span class="metric-label">Status</span>
+                                    <span class="metric-value">${client.proxy_info.status}</span>
+                                </div>
+                            ` : `
+                                <div class="no-proxy">
+                                    ${client.has_proxy ? '⚠️' : '🌐'}<br>
+                                    ${client.has_proxy ? 'Proxy Not Connected' : 'Direct Connection'}
+                                </div>
+                            `}
+                        </div>
+                    </div>
+                `;
+            }).join('');
+        }
+
+        function updateGlobalStats(global) {
+            const container = document.getElementById('global-stats');
+            
+            const uptime = document.getElementById('uptime');
+            uptime.textContent = `Uptime: ${formatUptime(global.uptime_seconds)}`;
+
+            container.innerHTML = `
+                <div class="stat-box">
+                    <div class="stat-value">${global.total_requests}</div>
+                    <div class="stat-label">Total Requests</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.success_rate.toFixed(1)}%</div>
+                    <div class="stat-label">Success Rate</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.successful_requests}</div>
+                    <div class="stat-label">Successful</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.failed_requests}</div>
+                    <div class="stat-label">Failed</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.session_renewals}</div>
+                    <div class="stat-label">Session Renewals</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.rotation_events}</div>
+                    <div class="stat-label">Rotation Events</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.navigation_timeouts}</div>
+                    <div class="stat-label">Timeouts</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.bot_detection_hits}</div>
+                    <div class="stat-label">Bot Detection</div>
+                </div>
+                <div class="stat-box">
+                    <div class="stat-value">${global.proxy_failures}</div>
+                    <div class="stat-label">Proxy Failures</div>
+                </div>
+            `;
+        }
+
+        function updateLogs(logs) {
+            const container = document.getElementById('logs');
+            const wasScrolledToBottom = container.scrollHeight - container.scrollTop === container.clientHeight;
+
+            container.innerHTML = logs.map(log => `
+                <div class="log-entry">
+                    <span class="log-time">${log.timestamp}</span>
+                    <span class="log-message ${log.level}">${escapeHtml(log.message)}</span>
+                </div>
+            `).join('');
+
+            // Auto-scroll to bottom if user was already at bottom
+            if (wasScrolledToBottom) {
+                container.scrollTop = container.scrollHeight;
+            }
+        }
+
+        function formatUptime(seconds) {
+            const hours = Math.floor(seconds / 3600);
+            const minutes = Math.floor((seconds % 3600) / 60);
+            const secs = seconds % 60;
+            return `${hours}h ${minutes}m ${secs}s`;
+        }
+
+        function escapeHtml(text) {
+            const map = {
+                '&': '&amp;',
+                '<': '&lt;',
+                '>': '&gt;',
+                '"': '&quot;',
+                "'": '&#039;'
+            };
+            return text.replace(/[&<>"']/g, m => map[m]);
+        }
+
+        // Initialize connection
+        connect();
+    </script>
+</body>
+</html>
--- a/src/monitoring/events.rs
+++ b/src/monitoring/events.rs
@@ -0,0 +1,173 @@
+// src/monitoring/events.rs
+use super::metrics::ProxyInfo;
+
+/// Events emitted by the scraper system
+#[derive(Debug, Clone)]
+pub enum MonitoringEvent {
+    // Pool initialization
+    PoolInitialized {
+        pool_size: usize,
+        with_proxy: bool,
+        with_rotation: bool,
+    },
+    
+    // Instance lifecycle
+    InstanceCreated {
+        instance_id: usize,
+        max_tasks: usize,
+        proxy: Option<ProxyInfo>,
+    },
+    
+    InstanceStatusChanged {
+        instance_id: usize,
+        status: InstanceStatusChange,
+    },
+
+    InstanceSelected {
+        instance_id: usize,
+        half: usize,
+    },
+    
+    // Task execution
+    TaskStarted {
+        instance_id: usize,
+        url: String,
+    },
+    
+    TaskCompleted {
+        instance_id: usize,
+        success: bool,
+        duration_ms: u64,
+        error: Option<String>,
+    },
+    
+    NavigationTimeout {
+        instance_id: usize,
+        url: String,
+    },
+    
+    BotDetectionTriggered {
+        instance_id: usize,
+        url: String,
+    },
+    
+    // Session management
+    SessionStarted {
+        instance_id: usize,
+        proxy: Option<ProxyInfo>,
+    },
+    
+    SessionRenewed {
+        instance_id: usize,
+        old_request_count: usize,
+        reason: RenewalReason,
+        new_proxy: Option<ProxyInfo>,
+    },
+    
+    SessionRequestIncremented {
+        instance_id: usize,
+        new_count: usize,
+    },
+    
+    // Proxy events
+    ProxyConnected {
+        container_name: String,
+        ip_address: String,
+        port: u16,
+    },
+    
+    ProxyFailed {
+        container_name: String,
+        error: String,
+    },
+    
+    ProxyRotated {
+        instance_id: usize,
+        old_proxy: Option<String>,
+        new_proxy: String,
+    },
+    
+    // Pool rotation events
+    RotationTriggered {
+        reason: String,
+    },
+    
+    // Yahoo API events
+    YahooRequestStarted {
+        instance_id: usize,
+        endpoint: String,
+        symbol: Option<String>,
+    },
+    
+    YahooRequestCompleted {
+        instance_id: usize,
+        success: bool,
+        duration_ms: u64,
+        error: Option<String>,
+    },
+    
+    YahooBatchRequestStarted {
+        count: usize,
+        symbols: Vec<String>,
+        endpoint: String,
+    },
+    
+    YahooBatchRequestCompleted {
+        successful: usize,
+        failed: usize,
+        total: usize,
+        duration_ms: u64,
+    },
+    
+    YahooClientCreated {
+        instance_id: usize,
+        has_proxy: bool,
+        max_requests: u32,
+    },
+    
+    YahooClientReset {
+        instance_id: usize,
+        previous_requests: u32,
+        reason: String,
+    },
+    
+    // Logging
+    LogMessage {
+        level: LogLevel,
+        message: String,
+    },
+}
+
+#[derive(Debug, Clone)]
+pub enum InstanceStatusChange {
+    Idle,
+    Active,
+    Renewing,
+    Error(String),
+}
+
+#[derive(Debug, Clone)]
+pub enum RenewalReason {
+    TaskLimit,
+    RequestLimit,
+    Error,
+    Manual,
+}
+
+#[derive(Debug, Clone)]
+pub enum LogLevel {
+    Info,
+    Warn,
+    Error,
+}
+
+impl std::fmt::Display for RenewalReason {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            RenewalReason::TaskLimit => write!(f, "task_limit"),
+            RenewalReason::RequestLimit => write!(f, "request_limit"),
+            RenewalReason::Error => write!(f, "error"),
+            RenewalReason::Manual => write!(f, "manual"),
+        }
+    }
+}
--- a/src/monitoring/logger.rs
+++ b/src/monitoring/logger.rs
@@ -0,0 +1,103 @@
+// src/monitoring/logger.rs
+use super::metrics::SessionSummary;
+use chrono::Local;
+use std::path::PathBuf;
+use tokio::fs::OpenOptions;
+use tokio::io::AsyncWriteExt;
+use tokio::sync::Mutex;
+
+/// Logs session summaries to JSONL files
+pub struct SessionLogger {
+    log_dir: PathBuf,
+    file: Mutex<Option<tokio::fs::File>>,
+}
+
+impl SessionLogger {
+    pub fn new(log_dir: PathBuf) -> Self {
+        Self {
+            log_dir,
+            file: Mutex::new(None),
+        }
+    }
+
+    /// Log a completed session summary
+    pub async fn log_session(&self, summary: &SessionSummary) {
+        if let Err(e) = self.write_session(summary).await {
+            eprintln!("Failed to log session: {}", e);
+        }
+    }
+
+    async fn write_session(&self, summary: &SessionSummary) -> anyhow::Result<()> {
+        let mut file_guard = self.file.lock().await;
+
+        // Open file if not already open
+        if file_guard.is_none() {
+            let filename = format!(
+                "sessions_{}.jsonl",
+                Local::now().format("%Y%m%d")
+            );
+            let filepath = self.log_dir.join(filename);
+
+            tokio::fs::create_dir_all(&self.log_dir).await?;
+
+            let file = OpenOptions::new()
+                .create(true)
+                .append(true)
+                .open(&filepath)
+                .await?;
+
+            *file_guard = Some(file);
+        }
+
+        if let Some(file) = file_guard.as_mut() {
+            let json_line = serde_json::to_string(summary)?;
+            file.write_all(json_line.as_bytes()).await?;
+            file.write_all(b"\n").await?;
+            file.flush().await?;
+        }
+
+        Ok(())
+    }
+}
+
+/// Logs metrics snapshots periodically
+pub struct MetricsLogger {
+    log_dir: PathBuf,
+}
+
+impl MetricsLogger {
+    pub fn new(log_dir: PathBuf) -> Self {
+        Self { log_dir }
+    }
+
+    /// Log a metrics snapshot
+    pub async fn log_metrics(&self, state: &super::metrics::DashboardState) -> anyhow::Result<()> {
+        let filename = format!(
+            "metrics_{}.jsonl",
+            Local::now().format("%Y%m%d")
+        );
+        let filepath = self.log_dir.join(filename);
+
+        tokio::fs::create_dir_all(&self.log_dir).await?;
+
+        let mut file = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(&filepath)
+            .await?;
+
+        let snapshot = serde_json::json!({
+            "timestamp": Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
+            "global": state.global,
+            "instance_count": state.instances.len(),
+            "proxy_count": state.proxies.len(),
+        });
+
+        let json_line = serde_json::to_string(&snapshot)?;
+        file.write_all(json_line.as_bytes()).await?;
+        file.write_all(b"\n").await?;
+        file.flush().await?;
+
+        Ok(())
+    }
+}
--- a/src/monitoring/metrics.rs
+++ b/src/monitoring/metrics.rs
@@ -0,0 +1,361 @@
+// src/monitoring/metrics.rs
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::time::Instant;
+
+/// Complete dashboard state sent to web clients
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DashboardState {
+    pub config: ConfigSnapshot,
+    pub instances: Vec<InstanceMetrics>,
+    pub proxies: Vec<ProxyMetrics>,
+    pub yahoo_clients: Vec<YahooClientMetrics>,
+    pub global: GlobalMetrics,
+    pub logs: Vec<LogEntry>,
+}
+
+/// Snapshot of configuration settings
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ConfigSnapshot {
+    pub max_parallel_instances: usize,
+    pub max_tasks_per_instance: usize,
+    pub enable_vpn_rotation: bool,
+    pub max_requests_per_session: usize,
+    pub min_request_interval_ms: u64,
+    pub max_retry_attempts: u32,
+}
+
+/// Metrics for a single ChromeDriver instance
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct InstanceMetrics {
+    pub id: usize,
+    pub status: InstanceStatus,
+    pub current_task: Option<String>,
+    pub tasks_current_session: usize,
+    pub tasks_max: usize,
+    pub session_requests: usize,
+    pub total_requests: usize,
+    pub success_count: usize,
+    pub failure_count: usize,
+    pub connected_proxy: Option<ProxyInfo>,
+    pub last_activity: String, // Timestamp
+    pub yahoo_requests: usize,
+    pub yahoo_success: usize,
+    pub yahoo_failures: usize,
+    pub yahoo_success_rate: f64,
+    pub yahoo_current_requests: u32,
+    pub yahoo_max_requests: u32,
+    pub yahoo_last_endpoint: Option<String>,
+    pub yahoo_last_symbol: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum InstanceStatus {
+    Idle,
+    Active,
+    Renewing,
+    Error,
+}
+
+/// Information about a proxy connection
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProxyInfo {
+    pub container_name: String,
+    pub ip_address: String,
+    pub port: u16,
+    pub status: ProxyStatus,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ProxyStatus {
+    Connected,
+    Disconnected,
+}
+
+/// Metrics for a proxy
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ProxyMetrics {
+    pub container_name: String,
+    pub ip_address: String,
+    pub port: u16,
+    pub status: ProxyStatus,
+    pub instances_using: Vec<usize>,
+}
+
+/// Metrics for a Yahoo client
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct YahooClientMetrics {
+    pub instance_id: usize,
+    pub requests_total: usize,
+    pub requests_successful: usize,
+    pub requests_failed: usize,
+    pub current_requests: u32,
+    pub max_requests: u32,
+    pub has_proxy: bool,
+    pub last_activity: String,
+    pub proxy_info: Option<ProxyInfo>,
+}
+
+/// Global pool metrics
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GlobalMetrics {
+    pub total_requests: usize,
+    pub successful_requests: usize,
+    pub failed_requests: usize,
+    pub success_rate: f64,
+    pub session_renewals: usize,
+    pub rotation_events: usize,
+    pub navigation_timeouts: usize,
+    pub bot_detection_hits: usize,
+    pub proxy_failures: usize,
+    pub uptime_seconds: u64,
+    pub total_yahoo_requests: usize,
+    pub successful_yahoo_requests: usize,
+    pub failed_yahoo_requests: usize,
+    pub yahoo_success_rate: f64,
+    pub yahoo_batch_requests: usize,
+    pub yahoo_session_renewals: usize,
+    pub yahoo_client_count: usize,
+}
+
+/// Log entry for display in dashboard
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LogEntry {
+    pub timestamp: String,
+    pub level: LogLevel,
+    pub message: String,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum LogLevel {
+    Info,
+    Warn,
+    Error,
+}
+
+/// Internal state tracked by monitoring service
+#[derive(Debug, Clone)]
+pub struct MonitoringState {
+    pub instances: HashMap<usize, InstanceState>,
+    pub proxies: HashMap<String, ProxyState>,
+    pub yahoo_clients: HashMap<usize, YahooClientState>,
+    pub global: GlobalState,
+    pub start_time: Instant,
+}
+
+#[derive(Debug, Clone)]
+pub struct InstanceState {
+    pub id: usize,
+    pub status: InstanceStatus,
+    pub current_task: Option<String>,
+    pub tasks_current_session: usize,
+    pub tasks_max: usize,
+    pub session_requests: usize,
+    pub total_requests: usize,
+    pub success_count: usize,
+    pub failure_count: usize,
+    pub connected_proxy: Option<ProxyInfo>,
+    pub last_activity: Instant,
+    pub yahoo_requests: usize,
+    pub yahoo_success: usize,
+    pub yahoo_failures: usize,
+    pub yahoo_current_requests: u32,
+    pub yahoo_max_requests: u32,
+    pub yahoo_last_endpoint: Option<String>,
+    pub yahoo_last_symbol: Option<String>,
+}
+
+#[derive(Debug, Clone)]
+pub struct ProxyState {
+    pub container_name: String,
+    pub ip_address: String,
+    pub port: u16,
+    pub status: ProxyStatus,
+    pub instances_using: Vec<usize>,
+}
+
+#[derive(Debug, Clone)]
+pub struct YahooClientState {
+    pub instance_id: usize,
+    pub requests_total: usize,
+    pub requests_successful: usize,
+    pub requests_failed: usize,
+    pub current_requests: u32,
+    pub max_requests: u32,
+    pub has_proxy: bool,
+    pub last_activity: Instant,
+    pub proxy_info: Option<ProxyInfo>,
+}
+
+#[derive(Debug, Clone)]
+pub struct GlobalState {
+    pub total_requests: usize,
+    pub successful_requests: usize,
+    pub failed_requests: usize,
+    pub session_renewals: usize,
+    pub rotation_events: usize,
+    pub navigation_timeouts: usize,
+    pub bot_detection_hits: usize,
+    pub proxy_failures: usize,
+    pub total_yahoo_requests: usize,
+    pub successful_yahoo_requests: usize,
+    pub failed_yahoo_requests: usize,
+    pub yahoo_batch_requests: usize,
+    pub yahoo_session_renewals: usize,
+    pub yahoo_client_count: usize,
+}
+
+impl MonitoringState {
+    pub fn new() -> Self {
+        Self {
+            instances: HashMap::new(),
+            proxies: HashMap::new(),
+            yahoo_clients: HashMap::new(),
+            global: GlobalState {
+                total_requests: 0,
+                successful_requests: 0,
+                failed_requests: 0,
+                session_renewals: 0,
+                rotation_events: 0,
+                navigation_timeouts: 0,
+                bot_detection_hits: 0,
+                proxy_failures: 0,
+                total_yahoo_requests: 0,
+                successful_yahoo_requests: 0,
+                failed_yahoo_requests: 0,
+                yahoo_batch_requests: 0,
+                yahoo_session_renewals: 0,
+                yahoo_client_count: 0,
+            },
+            start_time: Instant::now(),
+        }
+    }
+
+    /// Convert internal state to dashboard state for web clients
+    pub fn to_dashboard_state(&self, config: ConfigSnapshot, logs: Vec<LogEntry>) -> DashboardState {
+        let instances: Vec<InstanceMetrics> = self
+            .instances
+            .values()
+            .map(|inst| {
+                let yahoo_success_rate = if inst.yahoo_success + inst.yahoo_failures > 0 {
+                    (inst.yahoo_success as f64 / (inst.yahoo_success + inst.yahoo_failures) as f64) * 100.0
+                } else {
+                    0.0
+                };
+                
+                InstanceMetrics {
+                    id: inst.id,
+                    status: inst.status.clone(),
+                    current_task: inst.current_task.clone(),
+                    tasks_current_session: inst.tasks_current_session,
+                    tasks_max: inst.tasks_max,
+                    session_requests: inst.session_requests,
+                    total_requests: inst.total_requests,
+                    success_count: inst.success_count,
+                    failure_count: inst.failure_count,
+                    connected_proxy: inst.connected_proxy.clone(),
+                    last_activity: format_timestamp(inst.last_activity),
+                    yahoo_requests: inst.yahoo_requests,
+                    yahoo_success: inst.yahoo_success,
+                    yahoo_failures: inst.yahoo_failures,
+                    yahoo_success_rate,
+                    yahoo_current_requests: inst.yahoo_current_requests,
+                    yahoo_max_requests: inst.yahoo_max_requests,
+                    yahoo_last_endpoint: inst.yahoo_last_endpoint.clone(),
+                    yahoo_last_symbol: inst.yahoo_last_symbol.clone(),
+                }
+            })
+            .collect();
+
+        let proxies: Vec<ProxyMetrics> = self
+            .proxies
+            .values()
+            .map(|proxy| ProxyMetrics {
+                container_name: proxy.container_name.clone(),
+                ip_address: proxy.ip_address.clone(),
+                port: proxy.port,
+                status: proxy.status.clone(),
+                instances_using: proxy.instances_using.clone(),
+            })
+            .collect();
+
+        let yahoo_clients: Vec<YahooClientMetrics> = self
+            .yahoo_clients
+            .values()
+            .map(|client| YahooClientMetrics {
+                instance_id: client.instance_id,
+                requests_total: client.requests_total,
+                requests_successful: client.requests_successful,
+                requests_failed: client.requests_failed,
+                current_requests: client.current_requests,
+                max_requests: client.max_requests,
+                has_proxy: client.has_proxy,
+                last_activity: format_timestamp(client.last_activity),
+                proxy_info: client.proxy_info.clone(),
+            })
+            .collect();
+
+        let success_rate = if self.global.total_requests > 0 {
+            (self.global.successful_requests as f64 / self.global.total_requests as f64) * 100.0
+        } else {
+            0.0
+        };
+
+        let yahoo_success_rate = if self.global.total_yahoo_requests > 0 {
+            (self.global.successful_yahoo_requests as f64 / self.global.total_yahoo_requests as f64) * 100.0
+        } else {
+            0.0
+        };
+
+        let global = GlobalMetrics {
+            total_requests: self.global.total_requests,
+            successful_requests: self.global.successful_requests,
+            failed_requests: self.global.failed_requests,
+            success_rate,
+            session_renewals: self.global.session_renewals,
+            rotation_events: self.global.rotation_events,
+            navigation_timeouts: self.global.navigation_timeouts,
+            bot_detection_hits: self.global.bot_detection_hits,
+            proxy_failures: self.global.proxy_failures,
+            uptime_seconds: self.start_time.elapsed().as_secs(),
+            total_yahoo_requests: self.global.total_yahoo_requests,
+            successful_yahoo_requests: self.global.successful_yahoo_requests,
+            failed_yahoo_requests: self.global.failed_yahoo_requests,
+            yahoo_success_rate,
+            yahoo_batch_requests: self.global.yahoo_batch_requests,
+            yahoo_session_renewals: self.global.yahoo_session_renewals,
+            yahoo_client_count: self.global.yahoo_client_count,
+        };
+
+        DashboardState {
+            config,
+            instances,
+            proxies,
+            yahoo_clients,
+            global,
+            logs,
+        }
+    }
+}
+
+fn format_timestamp(instant: Instant) -> String {
+    use chrono::Local;
+    Local::now().format("%H:%M:%S").to_string()
+}
+
+/// Session completion summary for logging
+#[derive(Debug, Clone, Serialize)]
+pub struct SessionSummary {
+    pub instance_id: usize,
+    pub session_start: String,
+    pub session_end: String,
+    pub duration_seconds: u64,
+    pub total_requests: usize,
+    pub successful_requests: usize,
+    pub failed_requests: usize,
+    pub proxy_info: Option<ProxyInfo>,
+    pub renewal_reason: String, // "task_limit", "request_limit", "error"
+}
--- a/src/monitoring/mod.rs
+++ b/src/monitoring/mod.rs
@@ -0,0 +1,78 @@
+// src/monitoring/mod.rs
+//! Monitoring system for tracking scraper performance and health
+//!
+//! This module provides:
+//! - Real-time metrics collection
+//! - Web-based dashboard
+//! - Session logging
+//! - Minimal performance overhead
+
+pub mod metrics;
+pub mod events;
+pub mod service;
+pub mod webserver;
+pub mod logger;
+
+pub use events::{MonitoringEvent,RenewalReason, InstanceStatusChange};
+pub use metrics::{ConfigSnapshot, ProxyInfo, ProxyStatus};
+pub use service::{MonitoringService, MonitoringHandle};
+pub use webserver::WebServer;
+
+use std::path::PathBuf;
+use std::sync::Arc;
+use tokio::sync::{mpsc, RwLock};
+
+/// Initialize the complete monitoring system
+pub async fn init_monitoring(
+    config_snapshot: ConfigSnapshot,
+    log_dir: PathBuf,
+    dashboard_port: u16,
+) -> anyhow::Result<(MonitoringHandle, tokio::task::JoinHandle<()>)> {
+    // Create channel for events
+    let (tx, rx) = mpsc::unbounded_channel();
+
+    // Create monitoring service
+    let service = MonitoringService::new(config_snapshot, rx, log_dir);
+    let service_arc = Arc::new(RwLock::new(service));
+
+    // Start monitoring service task
+    let service_clone = Arc::clone(&service_arc);
+    let monitoring_task = tokio::spawn(async move {
+        println!("🚀 MONITORING TASK STARTED!");
+        // Take ownership of the service
+        let mut service = {
+            let mut guard = service_clone.write().await;
+            std::mem::replace(
+                &mut *guard,
+                MonitoringService::new(
+                    ConfigSnapshot {
+                        max_parallel_instances: 0,
+                        max_tasks_per_instance: 0,
+                        enable_vpn_rotation: false,
+                        max_requests_per_session: 0,
+                        min_request_interval_ms: 0,
+                        max_retry_attempts: 0,
+                    },
+                    mpsc::unbounded_channel().1,
+                    PathBuf::new(),
+                ),
+            )
+        };
+        
+        println!("✅ ABOUT TO RUN SERVICE!");
+        service.run().await;
+    });
+
+    // Start web server
+    let webserver = WebServer::new(Arc::clone(&service_arc), dashboard_port);
+    tokio::spawn(async move {
+        if let Err(e) = webserver.run().await {
+            eprintln!("Web server error: {}", e);
+        }
+    });
+
+    // Create handle for emitting events
+    let handle = MonitoringHandle::new(tx);
+
+    Ok((handle, monitoring_task))
+}
--- a/src/monitoring/service.rs
+++ b/src/monitoring/service.rs
@@ -0,0 +1,511 @@
+// src/monitoring/service.rs
+use super::events::*;
+use super::metrics::*;
+use super::logger::SessionLogger;
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::time::Instant;
+use tokio::sync::{mpsc, RwLock};
+use chrono::Local;
+
+const MAX_LOGS: usize = 100;
+
+/// Monitoring service that collects events and maintains state
+pub struct MonitoringService {
+    state: Arc<RwLock<MonitoringState>>,
+    config: ConfigSnapshot,
+    logs: Arc<RwLock<VecDeque<LogEntry>>>,
+    session_logger: Arc<SessionLogger>,
+    event_rx: mpsc::UnboundedReceiver<MonitoringEvent>,
+}
+
+impl MonitoringService {
+    pub fn new(
+        config: ConfigSnapshot,
+        event_rx: mpsc::UnboundedReceiver<MonitoringEvent>,
+        log_dir: std::path::PathBuf,
+    ) -> Self {
+        Self {
+            state: Arc::new(RwLock::new(MonitoringState::new())),
+            config,
+            logs: Arc::new(RwLock::new(VecDeque::with_capacity(MAX_LOGS))),
+            session_logger: Arc::new(SessionLogger::new(log_dir)),
+            event_rx,
+        }
+    }
+
+    /// Get current dashboard state for web clients
+    pub async fn get_dashboard_state(&self) -> DashboardState {
+        let state = self.state.read().await;
+        let logs = self.logs.read().await;
+        state.to_dashboard_state(
+            self.config.clone(),
+            logs.iter().cloned().collect(),
+        )
+    }
+
+    /// Main event processing loop
+    pub async fn run(mut self) {
+        while let Some(event) = self.event_rx.recv().await {
+            self.process_event(event).await;
+        }
+    }
+
+    async fn process_event(&self, event: MonitoringEvent) {
+        match event {
+            MonitoringEvent::PoolInitialized { pool_size, with_proxy, with_rotation } => {
+                self.log_info(format!(
+                    "Pool initialized: {} instances, proxy={}, rotation={}",
+                    pool_size, with_proxy, with_rotation
+                )).await;
+            }
+
+            MonitoringEvent::InstanceCreated { instance_id, max_tasks, proxy } => {
+                let mut state = self.state.write().await;
+                state.instances.insert(
+                    instance_id,
+                    InstanceState {
+                        id: instance_id,
+                        status: InstanceStatus::Idle,
+                        current_task: None,
+                        tasks_current_session: 0,
+                        tasks_max: max_tasks,
+                        session_requests: 0,
+                        total_requests: 0,
+                        success_count: 0,
+                        failure_count: 0,
+                        connected_proxy: proxy.clone(),
+                        last_activity: Instant::now(),
+                        yahoo_requests: 0,
+                        yahoo_success: 0,
+                        yahoo_failures: 0,
+                        yahoo_current_requests: 0,
+                        yahoo_max_requests: 0,
+                        yahoo_last_endpoint: None,
+                        yahoo_last_symbol: None,
+                    },
+                );
+
+                if let Some(proxy_info) = proxy {
+                    state.proxies.entry(proxy_info.container_name.clone()).or_insert_with(|| {
+                        ProxyState {
+                            container_name: proxy_info.container_name.clone(),
+                            ip_address: proxy_info.ip_address.clone(),
+                            port: proxy_info.port,
+                            status: ProxyStatus::Connected,
+                            instances_using: vec![instance_id],
+                        }
+                    }).instances_using.push(instance_id);
+                }
+
+                self.log_info(format!("Instance #{} created", instance_id)).await;
+            }
+
+            MonitoringEvent::InstanceStatusChanged { instance_id, status } => {
+                let mut state = self.state.write().await;
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.status = match status {
+                        InstanceStatusChange::Idle => InstanceStatus::Idle,
+                        InstanceStatusChange::Active => InstanceStatus::Active,
+                        InstanceStatusChange::Renewing => InstanceStatus::Renewing,
+                        InstanceStatusChange::Error(_) => InstanceStatus::Error,
+                    };
+                    inst.last_activity = Instant::now();
+                }
+            }
+
+            MonitoringEvent::InstanceSelected { instance_id, half } => {
+                self.log_info(format!("Instance #{} selected (half {})", instance_id, half)).await;
+            }
+
+            MonitoringEvent::TaskStarted { instance_id, url } => {
+                let mut state = self.state.write().await;
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.status = InstanceStatus::Active;
+                    inst.current_task = Some(url.clone());
+                    inst.last_activity = Instant::now();
+                }
+                state.global.total_requests += 1;
+
+                self.log_info(format!("Instance #{} started task: {}", instance_id, url)).await;
+            }
+
+            MonitoringEvent::TaskCompleted { instance_id, success, duration_ms, error } => {
+                let mut state = self.state.write().await;
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.current_task = None;
+                    inst.status = InstanceStatus::Idle;
+                    inst.total_requests += 1;
+                    inst.last_activity = Instant::now();
+
+                    if success {
+                        inst.success_count += 1;
+                        state.global.successful_requests += 1;
+                    } else {
+                        inst.failure_count += 1;
+                        state.global.failed_requests += 1;
+                    }
+                }
+
+                if success {
+                    self.log_info(format!(
+                        "Instance #{} completed task in {}ms",
+                        instance_id, duration_ms
+                    )).await;
+                } else {
+                    self.log_error(format!(
+                        "Instance #{} failed task: {}",
+                        instance_id,
+                        error.unwrap_or_else(|| "unknown error".to_string())
+                    )).await;
+                }
+            }
+
+            MonitoringEvent::NavigationTimeout { instance_id, url } => {
+                let mut state = self.state.write().await;
+                state.global.navigation_timeouts += 1;
+                
+                self.log_warn(format!(
+                    "Instance #{} navigation timeout: {}",
+                    instance_id, url
+                )).await;
+            }
+
+            MonitoringEvent::BotDetectionTriggered { instance_id, url } => {
+                let mut state = self.state.write().await;
+                state.global.bot_detection_hits += 1;
+                
+                self.log_warn(format!(
+                    "Instance #{} bot detection triggered: {}",
+                    instance_id, url
+                )).await;
+            }
+
+            MonitoringEvent::SessionStarted { instance_id, proxy } => {
+                let mut state = self.state.write().await;
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.session_requests = 0;
+                    inst.tasks_current_session = 0;
+                    inst.connected_proxy = proxy;
+                    inst.last_activity = Instant::now();
+                }
+
+                self.log_info(format!("Instance #{} started new session", instance_id)).await;
+            }
+
+            MonitoringEvent::SessionRenewed { instance_id, old_request_count, reason, new_proxy } => {
+                // Log the completed session
+                let session_summary = {
+                    let state = self.state.read().await;
+                    if let Some(inst) = state.instances.get(&instance_id) {
+                        Some(SessionSummary {
+                            instance_id,
+                            session_start: "N/A".to_string(),
+                            session_end: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
+                            duration_seconds: 0,
+                            total_requests: old_request_count,
+                            successful_requests: inst.success_count,
+                            failed_requests: inst.failure_count,
+                            proxy_info: inst.connected_proxy.clone(),
+                            renewal_reason: reason.to_string(),
+                        })
+                    } else {
+                        None
+                    }
+                };
+
+                if let Some(summary) = session_summary {
+                    self.session_logger.log_session(&summary).await;
+                }
+
+                // Update state for new session
+                let mut state = self.state.write().await;
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.session_requests = 0;
+                    inst.tasks_current_session = 0;
+                    inst.connected_proxy = new_proxy;
+                    inst.last_activity = Instant::now();
+                }
+                state.global.session_renewals += 1;
+
+                self.log_info(format!(
+                    "Instance #{} renewed session (reason: {}, {} requests)",
+                    instance_id, reason, old_request_count
+                )).await;
+            }
+
+            MonitoringEvent::SessionRequestIncremented { instance_id, new_count } => {
+                let mut state = self.state.write().await;
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.session_requests = new_count;
+                    inst.last_activity = Instant::now();
+                }
+            }
+
+            MonitoringEvent::ProxyConnected { container_name, ip_address, port } => {
+                let mut state = self.state.write().await;
+                state.proxies.insert(
+                    container_name.clone(),
+                    ProxyState {
+                        container_name: container_name.clone(),
+                        ip_address: ip_address.clone(),
+                        port,
+                        status: ProxyStatus::Connected,
+                        instances_using: vec![],
+                    },
+                );
+
+                self.log_info(format!(
+                    "Proxy {} connected: {}:{}",
+                    container_name, ip_address, port
+                )).await;
+            }
+
+            MonitoringEvent::ProxyFailed { container_name, error } => {
+                let mut state = self.state.write().await;
+                if let Some(proxy) = state.proxies.get_mut(&container_name) {
+                    proxy.status = ProxyStatus::Disconnected;
+                }
+                state.global.proxy_failures += 1;
+
+                self.log_error(format!(
+                    "Proxy {} failed: {}",
+                    container_name, error
+                )).await;
+            }
+
+            MonitoringEvent::ProxyRotated { instance_id, old_proxy, new_proxy } => {
+                self.log_info(format!(
+                    "Instance #{} rotated proxy: {} -> {}",
+                    instance_id,
+                    old_proxy.unwrap_or_else(|| "none".to_string()),
+                    new_proxy
+                )).await;
+            }
+
+            MonitoringEvent::RotationTriggered { reason } => {
+                let mut state = self.state.write().await;
+                state.global.rotation_events += 1;
+
+                self.log_info(format!("Pool rotation triggered: {}", reason)).await;
+            }
+
+            // Yahoo API Events
+            MonitoringEvent::YahooRequestStarted { instance_id, endpoint, symbol } => {
+                let mut state = self.state.write().await;
+                
+                // Update global Yahoo stats
+                state.global.total_yahoo_requests += 1;
+                
+                // Update instance stats
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.yahoo_requests += 1;
+                    inst.yahoo_current_requests += 1;
+                    inst.yahoo_last_endpoint = Some(endpoint.clone());
+                    inst.yahoo_last_symbol = symbol.clone();
+                    inst.last_activity = Instant::now();
+                }
+                
+                // Update Yahoo client stats
+                if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
+                    client.requests_total += 1;
+                    client.current_requests += 1;
+                    client.last_activity = Instant::now();
+                }
+                
+                self.log_info(format!(
+                    "YahooClient[{}] started request: {} {}",
+                    instance_id,
+                    endpoint,
+                    symbol.unwrap_or_else(|| "search".to_string())
+                )).await;
+            }
+            
+            MonitoringEvent::YahooRequestCompleted { instance_id, success, duration_ms, error } => {
+                let mut state = self.state.write().await;
+                
+                // Update global Yahoo stats
+                if success {
+                    state.global.successful_yahoo_requests += 1;
+                } else {
+                    state.global.failed_yahoo_requests += 1;
+                }
+                
+                // Update instance stats
+                if let Some(inst) = state.instances.get_mut(&instance_id) {
+                    inst.yahoo_current_requests = inst.yahoo_current_requests.saturating_sub(1);
+                    if success {
+                        inst.yahoo_success += 1;
+                    } else {
+                        inst.yahoo_failures += 1;
+                    }
+                    inst.last_activity = Instant::now();
+                }
+                
+                // Update Yahoo client stats
+                if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
+                    client.current_requests = client.current_requests.saturating_sub(1);
+                    if success {
+                        client.requests_successful += 1;
+                    } else {
+                        client.requests_failed += 1;
+                    }
+                    client.last_activity = Instant::now();
+                }
+                
+                if success {
+                    self.log_info(format!(
+                        "YahooClient[{}] completed request in {}ms",
+                        instance_id, duration_ms
+                    )).await;
+                } else {
+                    self.log_error(format!(
+                        "YahooClient[{}] failed request in {}ms: {}",
+                        instance_id,
+                        duration_ms,
+                        error.unwrap_or_else(|| "unknown error".to_string())
+                    )).await;
+                }
+            }
+            
+            MonitoringEvent::YahooBatchRequestStarted { count, symbols, endpoint } => {
+                let mut state = self.state.write().await;
+                state.global.yahoo_batch_requests += 1;
+                
+                self.log_info(format!(
+                    "Yahoo batch request started: {} symbols, endpoint: {}",
+                    count, endpoint
+                )).await;
+                
+                if !symbols.is_empty() {
+                    self.log_debug(format!(
+                        "Batch symbols: {}",
+                        symbols.join(", ")
+                    )).await;
+                }
+            }
+            
+            MonitoringEvent::YahooBatchRequestCompleted { successful, failed, total, duration_ms } => {
+                let success_rate = if total > 0 {
+                    (successful as f64 / total as f64) * 100.0
+                } else {
+                    0.0
+                };
+                
+                self.log_info(format!(
+                    "Yahoo batch completed: {}/{} successful ({:.1}%) in {}ms",
+                    successful, total, success_rate, duration_ms
+                )).await;
+            }
+            
+            MonitoringEvent::YahooClientCreated { instance_id, has_proxy, max_requests } => {
+                let mut state = self.state.write().await;
+                state.global.yahoo_client_count += 1;
+                
+                state.yahoo_clients.insert(
+                    instance_id,
+                    YahooClientState {
+                        instance_id,
+                        requests_total: 0,
+                        requests_successful: 0,
+                        requests_failed: 0,
+                        current_requests: 0,
+                        max_requests,
+                        has_proxy,
+                        last_activity: Instant::now(),
+                        proxy_info: None,
+                    },
+                );
+                
+                self.log_info(format!(
+                    "YahooClient[{}] created (proxy: {}, max requests: {})",
+                    instance_id, has_proxy, max_requests
+                )).await;
+            }
+            
+            MonitoringEvent::YahooClientReset { instance_id, previous_requests, reason } => {
+                let mut state = self.state.write().await;
+                state.global.yahoo_session_renewals += 1;
+                
+                if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
+                    client.current_requests = 0;
+                    client.last_activity = Instant::now();
+                }
+                
+                self.log_info(format!(
+                    "YahooClient[{}] reset (had {} requests, reason: {})",
+                    instance_id, previous_requests, reason
+                )).await;
+            }
+
+            MonitoringEvent::LogMessage { level, message } => {
+                match level {
+                    crate::monitoring::events::LogLevel::Info => self.log_info(message).await,
+                    crate::monitoring::events::LogLevel::Warn => self.log_warn(message).await,
+                    crate::monitoring::events::LogLevel::Error => self.log_error(message).await,
+                }
+            }
+        }
+    }
+
+    async fn log_info(&self, message: String) {
+        self.add_log(LogEntry {
+            timestamp: Local::now().format("%H:%M:%S").to_string(),
+            level: super::metrics::LogLevel::Info,
+            message,
+        }).await;
+    }
+
+    async fn log_warn(&self, message: String) {
+        self.add_log(LogEntry {
+            timestamp: Local::now().format("%H:%M:%S").to_string(),
+            level: super::metrics::LogLevel::Warn,
+            message,
+        }).await;
+    }
+
+    async fn log_error(&self, message: String) {
+        self.add_log(LogEntry {
+            timestamp: Local::now().format("%H:%M:%S").to_string(),
+            level: super::metrics::LogLevel::Error,
+            message,
+        }).await;
+    }
+
+    async fn log_debug(&self, message: String) {
+        // Only log debug if DEBUG_LOGGING is enabled
+        if std::env::var("DEBUG_LOGGING").is_ok() {
+            self.add_log(LogEntry {
+                timestamp: Local::now().format("%H:%M:%S").to_string(),
+                level: super::metrics::LogLevel::Info,
+                message: format!("[DEBUG] {}", message),
+            }).await;
+        }
+    }
+
+    async fn add_log(&self, entry: LogEntry) {
+        let mut logs = self.logs.write().await;
+        if logs.len() >= MAX_LOGS {
+            logs.pop_front();
+        }
+        logs.push_back(entry);
+    }
+}
+
+/// Handle for emitting monitoring events
+#[derive(Clone)]
+pub struct MonitoringHandle {
+    tx: mpsc::UnboundedSender<MonitoringEvent>,
+}
+
+impl MonitoringHandle {
+    pub fn new(tx: mpsc::UnboundedSender<MonitoringEvent>) -> Self {
+        Self { tx }
+    }
+
+    /// Emit a monitoring event (non-blocking)
+    pub fn emit(&self, event: MonitoringEvent) {
+        // Ignore send errors (monitoring should never block application)
+        let _ = self.tx.send(event);
+    }
+}
--- a/src/monitoring/webserver.rs
+++ b/src/monitoring/webserver.rs
@@ -0,0 +1,77 @@
+// src/monitoring/webserver.rs
+use super::service::MonitoringService;
+use axum::{
+    extract::{
+        ws::{Message, WebSocket, WebSocketUpgrade},
+        State,
+    },
+    response::{Html, IntoResponse, Response},
+    routing::get,
+    Router,
+};
+use std::sync::Arc;
+use tokio::sync::RwLock;
+use tokio::time::{interval, Duration};
+
+const UPDATE_INTERVAL_MS: u64 = 1000; // 1 second updates
+
+pub struct WebServer {
+    service: Arc<RwLock<MonitoringService>>,
+    port: u16,
+}
+
+impl WebServer {
+    pub fn new(service: Arc<RwLock<MonitoringService>>, port: u16) -> Self {
+        Self { service, port }
+    }
+
+    pub async fn run(self) -> anyhow::Result<()> {
+        let app = Router::new()
+            .route("/", get(dashboard_handler))
+            .route("/ws", get(websocket_handler))
+            .with_state(self.service);
+
+        let addr = format!("0.0.0.0:{}", self.port);
+        println!("📊 Dashboard available at: http://localhost:{}", self.port);
+
+        let listener = tokio::net::TcpListener::bind(&addr).await?;
+        axum::serve(listener, app).await?;
+
+        Ok(())
+    }
+}
+
+async fn dashboard_handler() -> impl IntoResponse {
+    Html(include_str!("dashboard.html"))
+}
+
+async fn websocket_handler(
+    ws: WebSocketUpgrade,
+    State(service): State<Arc<RwLock<MonitoringService>>>,
+) -> Response {
+    ws.on_upgrade(|socket| handle_socket(socket, service))
+}
+
+async fn handle_socket(mut socket: WebSocket, service: Arc<RwLock<MonitoringService>>) {
+    let mut ticker = interval(Duration::from_millis(UPDATE_INTERVAL_MS));
+
+    loop {
+        ticker.tick().await;
+
+        let service_guard = service.read().await;
+        let state = service_guard.get_dashboard_state().await;
+        drop(service_guard);
+
+        match serde_json::to_string(&state) {
+            Ok(json) => {
+                if socket.send(Message::Text(json)).await.is_err() {
+                    break; // Client disconnected
+                }
+            }
+            Err(e) => {
+                eprintln!("Failed to serialize dashboard state: {}", e);
+                break;
+            }
+        }
+    }
+}
--- a/src/scraper/docker_vpn_proxy.rs
+++ b/src/scraper/docker_vpn_proxy.rs
@@ -0,0 +1,516 @@
+use anyhow::{anyhow, Context, Result};
+use futures::future::join_all;
+use std::{collections::HashSet, path::{Path, PathBuf}, sync::{Arc, RwLock}, time::Duration};
+use tokio::{process::Command, time::{sleep}};
+use walkdir::WalkDir;
+
+pub struct DockerVpnProxyPool {
+    container_names: Vec<String>,
+    proxy_ports: Vec<u16>,  // e.g., [10801, 10802, ...]
+    dead_proxies: Arc<RwLock<HashSet<usize>>>,
+}
+
+impl DockerVpnProxyPool {
+    pub async fn new(
+        ovpn_dir: &Path, 
+        username: String, 
+        password: String,
+        instances_per_ovpn: usize,
+    ) -> Result<Self> {
+        if instances_per_ovpn == 0 {
+            return Err(anyhow!("instances_per_ovpn must be at least 1"));
+        }
+
+        // Count hostnames (subdirs in ovpn_dir)
+        let hostnames: Vec<_> = std::fs::read_dir(ovpn_dir)?
+            .filter_map(Result::ok)
+            .filter(|e| e.path().is_dir())
+            .map(|e| e.file_name().into_string().unwrap())
+            .collect();
+
+        let num_servers = hostnames.len();
+        if num_servers == 0 {
+            return Err(anyhow!("No VPN hostnames found in {:?}", ovpn_dir));
+        }
+
+        // Calculate total containers: hostnames × instances_per_ovpn
+        let total_containers = num_servers * instances_per_ovpn;
+
+        crate::util::logger::log_info(&format!(
+            "Found {} VPN hostnames × {} instances = {} total containers",
+            num_servers, instances_per_ovpn, total_containers
+        )).await;
+
+        let mut container_names = Vec::with_capacity(total_containers);
+        let mut proxy_ports = Vec::with_capacity(total_containers);
+        let base_port: u16 = 10800;
+        let mut port_counter = 0u16;
+
+        // === STEP 1: Start ALL containers first ===
+        for hostname in hostnames.iter() {
+            // Pick tcp443.ovpn if exists, else first .ovpn
+            let hostname_dir = ovpn_dir.join(hostname);
+            let mut ovpn_path: Option<PathBuf> = None;
+            for entry in WalkDir::new(&hostname_dir).max_depth(1) {
+                let entry = entry?;
+                if entry.path().extension().map_or(false, |ext| ext == "ovpn") {
+                    if entry.file_name().to_str().unwrap_or("").contains("tcp443") {
+                        ovpn_path = Some(entry.path().to_path_buf());
+                        break;
+                    } else if ovpn_path.is_none() {
+                        ovpn_path = Some(entry.path().to_path_buf());
+                    }
+                }
+            }
+
+            let ovpn_path = ovpn_path.ok_or_else(|| anyhow!("No .ovpn found for {}", hostname))?;
+
+            // Spawn multiple instances for this .ovpn file
+            for instance_num in 0..instances_per_ovpn {
+                let name = format!("vpn-proxy-{}-{}", hostname, instance_num);
+                let port = base_port + port_counter + 1;
+                port_counter += 1;
+
+                // Clean up any existing container with the same name
+                let _ = Command::new("docker")
+                    .args(["rm", "-f", &name])
+                    .status()
+                    .await;
+
+                // Run Docker container
+                let status = Command::new("docker")
+                    .args([
+                        "run", "-d",
+                        "--name", &name,
+                        "--cap-add=NET_ADMIN",
+                        "--device", "/dev/net/tun",
+                        "--sysctl", "net.ipv4.ip_forward=1",
+                        "-v", &format!("{}:/vpn/config.ovpn", ovpn_path.display()),
+                        "-e", &format!("VPN_USERNAME={}", username),
+                        "-e", &format!("VPN_PASSWORD={}", password),
+                        "-p", &format!("{}:1080", port),
+                        "rust-vpn-proxy",
+                    ])
+                    .status()
+                    .await
+                    .context("Failed to run Docker")?;
+
+                if !status.success() {
+                    return Err(anyhow!("Docker run failed for {}", name));
+                }
+
+                crate::util::logger::log_info(&format!(
+                    "Started container {} on port {} (using {})", 
+                    name, port, ovpn_path.file_name().unwrap().to_string_lossy()
+                )).await;
+
+                container_names.push(name);
+                proxy_ports.push(port);
+            }
+        }
+
+        // Brief pause to let containers start
+        sleep(Duration::from_secs(8)).await;
+        crate::util::logger::log_info(&format!(
+            "All {} containers started, beginning health checks...", 
+            container_names.len()
+        )).await;
+
+        // === STEP 2: Test ALL proxies in parallel ===
+        let results = Self::test_all_proxies_parallel(&container_names, &proxy_ports).await;
+        
+        // Filter out failed containers
+        let mut working_containers = Vec::new();
+        let mut working_ports = Vec::new();
+        let mut failed_count = 0;
+
+        for (i, (container_name, port)) in container_names.into_iter().zip(proxy_ports.into_iter()).enumerate() {
+            match &results[i] {
+                Ok(Some(ip)) => {
+                    crate::util::logger::log_info(&format!(
+                        "✓ Container {} on port {} ready with IP: {}", 
+                        container_name, port, ip
+                    )).await;
+                    working_containers.push(container_name);
+                    working_ports.push(port);
+                }
+                Ok(None) => {
+                    let logs = Command::new("docker")
+                        .args(["logs", "--tail", "20", &container_name])
+                        .output()
+                        .await
+                        .ok()
+                        .and_then(|output| String::from_utf8_lossy(&output.stdout).to_string().into());
+                    
+                    crate::util::logger::log_error(&format!(
+                        "✗ Container {} on port {} ready but IP detection failed. Logs: {:?}", 
+                        container_name, port, logs
+                    )).await;
+                    failed_count += 1;
+                    // Clean up failed container
+                    let _ = Self::cleanup_container(&container_name).await;
+                }
+                Err(e) => {
+                    let logs = Command::new("docker")
+                        .args(["logs", "--tail", "20", &container_name])
+                        .output()
+                        .await
+                        .ok()
+                        .and_then(|output| String::from_utf8_lossy(&output.stdout).to_string().into());
+                    
+                    crate::util::logger::log_error(&format!(
+                        "✗ Container {} on port {} failed: {}. Logs: {:?}", 
+                        container_name, port, e, logs
+                    )).await;
+                    failed_count += 1;
+                    // Clean up failed container
+                    let _ = Self::cleanup_container(&container_name).await;
+                }
+            }
+        }
+
+        if working_containers.is_empty() {
+            return Err(anyhow!("All {} VPN proxy containers failed to start", total_containers));
+        }
+
+        crate::util::logger::log_info(&format!(
+            "Started {}/{} VPN proxy containers successfully ({} hostnames × {} instances)", 
+            working_containers.len(), total_containers, num_servers, instances_per_ovpn
+        )).await;
+        
+        if failed_count > 0 {
+            crate::util::logger::log_warn(&format!(
+                "{} containers failed and were cleaned up", 
+                failed_count
+            )).await;
+        }
+
+        Ok(Self {
+            container_names: working_containers,
+            proxy_ports: working_ports,
+            dead_proxies: Arc::new(RwLock::new(HashSet::new())),
+        })
+    }
+
+    /// Test all proxies in parallel with 10-second intervals between tests
+    async fn test_all_proxies_parallel(container_names: &[String], proxy_ports: &[u16]) -> Vec<Result<Option<String>>> {
+        let mut tasks = Vec::new();
+        
+        for (_i, (container_name, port)) in container_names.iter().zip(proxy_ports.iter()).enumerate() {
+            let name = container_name.clone();
+            let port = *port;
+            
+            tasks.push(tokio::spawn(async move {
+                // Try up to 6 times with 10-second intervals (total 60 seconds)
+                for attempt in 1..=6 {
+                    crate::util::logger::log_info(&format!("Testing proxy {} (port {}) - Attempt {}/6", 
+                        name, port, attempt)).await;
+                    
+                    match Self::test_single_proxy(port).await {
+                        Ok(Some(ip)) => {
+                            return Ok(Some(ip));
+                        }
+                        Ok(None) => {
+                            // Connection works but IP detection failed
+                            return Ok(None);
+                        }
+                        Err(e) if attempt < 6 => {
+                            crate::util::logger::log_info(&format!("Attempt {}/6 for {}: {} - retrying in 10s", 
+                                attempt, name, e)).await;
+                            sleep(Duration::from_secs(10)).await;
+                        }
+                        Err(e) => {
+                            return Err(anyhow!("Failed after 6 attempts: {}", e));
+                        }
+                    }
+                }
+                Err(anyhow!("Unexpected exit from retry loop"))
+            }));
+        }
+        
+        // Wait for all tasks to complete
+        join_all(tasks)
+            .await
+            .into_iter()
+            .map(|result| match result {
+                Ok(inner) => inner,
+                Err(e) => Err(anyhow!("Task panicked: {}", e)),
+            })
+            .collect()
+    }
+
+    /// Test a single proxy connection
+    async fn test_single_proxy(port: u16) -> Result<Option<String>> {
+        use std::io::{Read, Write};
+        use std::net::TcpStream;
+        use std::time::Duration as StdDuration;
+        
+        // First, test SOCKS5 handshake directly
+        crate::util::logger::log_info(&format!("Testing SOCKS5 handshake on port {}...", port)).await;
+        
+        // Use spawn_blocking for synchronous I/O
+        let test_result = tokio::task::spawn_blocking(move || {
+            // Connect to SOCKS5 proxy
+            let mut stream = match TcpStream::connect_timeout(
+                &format!("127.0.0.1:{}", port).parse().unwrap(),
+                StdDuration::from_secs(5)
+            ) {
+                Ok(stream) => stream,
+                Err(e) => return Err(anyhow!("Failed to connect: {}", e)),
+            };
+            
+            // Send SOCKS5 greeting: version 5, 1 method (no auth)
+            let greeting: [u8; 3] = [0x05, 0x01, 0x00]; // SOCKS5, 1 method, no auth
+            if let Err(e) = stream.write_all(&greeting) {
+                return Err(anyhow!("Failed to send greeting: {}", e));
+            }
+            
+            // Read response
+            let mut response = [0u8; 2];
+            if let Err(e) = stream.read_exact(&mut response) {
+                return Err(anyhow!("Failed to read response: {}", e));
+            }
+            
+            // Check response: should be [0x05, 0x00] for no auth required
+            if response[0] != 0x05 || response[1] != 0x00 {
+                return Err(anyhow!("Unexpected SOCKS5 response: {:?}", response));
+            }
+            
+            Ok(())
+        }).await;
+        
+        match test_result {
+            Ok(Ok(())) => {
+                crate::util::logger::log_info(&format!("✓ SOCKS5 proxy on port {} accepts connections", port)).await;
+                
+                // Try to get IP through proxy using curl (fallback method)
+                let curl_result = tokio::process::Command::new("curl")
+                    .args([
+                        "-s",
+                        "--socks5", &format!("localhost:{}", port),
+                        "--max-time", "10",
+                        "https://checkip.amazonaws.com"
+                    ])
+                    .output()
+                    .await;
+                
+                match curl_result {
+                    Ok(output) if output.status.success() => {
+                        let ip = String::from_utf8_lossy(&output.stdout).trim().to_string();
+                        if Self::is_valid_ip(&ip) {
+                            crate::util::logger::log_info(&format!("✓ Got IP via proxy: {}", ip)).await;
+                            return Ok(Some(ip));
+                        } else {
+                            crate::util::logger::log_info(&format!("✓ Proxy works, invalid IP format: {}", ip)).await;
+                            return Ok(None);
+                        }
+                    }
+                    _ => {
+                        // Proxy accepts connections but curl failed - still acceptable
+                        crate::util::logger::log_info(&format!("✓ Proxy accepts connections (curl test failed)")).await;
+                        return Ok(None);
+                    }
+                }
+            }
+            Ok(Err(e)) => {
+                return Err(anyhow!("SOCKS5 test failed: {}", e));
+            }
+            Err(e) => {
+                return Err(anyhow!("Task failed: {}", e));
+            }
+        }
+    }
+
+    /// Clean up a failed container
+    async fn cleanup_container(container_name: &str) -> Result<()> {
+        let _ = Command::new("docker")
+            .args(["stop", container_name])
+            .status()
+            .await;
+        
+        let _ = Command::new("docker")
+            .args(["rm", container_name])
+            .status()
+            .await;
+        
+        Ok(())
+    }
+
+    fn is_valid_ip(ip: &str) -> bool {
+        let parts: Vec<&str> = ip.split('.').collect();
+        if parts.len() != 4 {
+            return false;
+        }
+        
+        for part in parts {
+            if let Ok(num) = part.parse::<u8>() {
+                if part != num.to_string() {
+                    return false;
+                }
+            } else {
+                return false;
+            }
+        }
+        
+        true
+    }
+
+    pub fn get_proxy_url(&self, index: usize) -> String {
+        let port = self.proxy_ports[index % self.proxy_ports.len()];
+        format!("socks5h://localhost:{}", port)
+    }
+
+    pub fn num_proxies(&self) -> usize {
+        self.proxy_ports.len()
+    }
+
+    pub async fn shutdown(&self) -> Result<()> {
+        crate::util::logger::log_info(&format!("Shutting down {} Docker proxy containers...", 
+            self.container_names.len())).await;
+            
+        for name in &self.container_names {
+            let _ = Command::new("docker")
+                .args(["stop", name])
+                .status()
+                .await;
+            let _ = Command::new("docker")
+                .args(["rm", name])
+                .status()
+                .await;
+        }
+        Ok(())
+    }
+
+    /// Get ProxyInfo for monitoring dashboard
+    pub fn get_proxy_info(&self, index: usize) -> Option<crate::monitoring::ProxyInfo> {
+        if index >= self.container_names.len() {
+            return None;
+        }
+        
+        Some(crate::monitoring::ProxyInfo {
+            container_name: self.container_names[index].clone(),
+            ip_address: "127.0.0.1".to_string(), // SOCKS5 proxy on localhost
+            port: self.proxy_ports[index],
+            status: crate::monitoring::ProxyStatus::Connected,
+        })
+    }
+
+    /// Get container name by index
+    pub fn get_container_name(&self, index: usize) -> Option<String> {
+        self.container_names.get(index).cloned()
+    }
+
+    // Get a healthy proxy URL (skips dead proxies)
+    pub async fn get_healthy_proxy_url(&self, start_index: usize) -> Option<(usize, String)> {
+        let dead = match self.dead_proxies.read() {
+            Ok(value) => value,
+            Err(_) => return None,
+        };
+        let total = self.proxy_ports.len();
+        
+        // Try up to 'total' proxies starting from start_index
+        for attempt in 0..total {
+            let index = (start_index + attempt) % total;
+            
+            // Skip if dead
+            if dead.contains(&index) {
+                continue;
+            }
+            
+            let port = self.proxy_ports[index];
+            return Some((index, format!("socks5h://localhost:{}", port)));
+        }
+        
+        None
+    }
+    
+    // Mark a proxy as dead
+    pub async fn mark_proxy_dead(&self, index: usize) -> Option<bool> {
+        // Acquire lock, perform mutation, and get values for logging
+        let (port, remaining, total) = {
+            let mut dead = match self.dead_proxies.write() {
+                Ok(value) => value,
+                Err(_) => return None,
+            };
+            dead.insert(index);
+            
+            let port = self.proxy_ports.get(index).copied().unwrap_or(0);
+            let remaining = self.proxy_ports.len() - dead.len();
+            let total = self.proxy_ports.len();
+            
+            // Lock is automatically dropped here when the scope ends
+            (port, remaining, total)
+        };
+        
+        // Now we can await without holding the lock
+        crate::util::logger::log_warn(&format!(
+            "⚠ Marked proxy {} (port {}) as DEAD ({}/{} proxies remaining)",
+            index,
+            port,
+            remaining,
+            total
+        )).await;
+
+        Some(true)
+    }
+    
+    // Get count of healthy proxies
+    pub async fn num_healthy_proxies(&self) -> Option<usize> {
+        let dead = match self.dead_proxies.read() {
+            Ok(value) => value,
+            Err(_) => return None,
+        };
+        Some(self.proxy_ports.len() - dead.len())
+    }
+}
+
+pub async fn cleanup_all_proxy_containers() -> Result<()> {
+    // Step 1: List all container IDs that match our pattern
+    let output = Command::new("docker")
+        .args(["ps", "-a", "--format", "{{.ID}} {{.Names}} {{.Image}}"])
+        .output()
+        .await?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout);
+
+    let mut containers_to_kill = Vec::new();
+
+    for line in stdout.lines() {
+        let parts: Vec<&str> = line.split_whitespace().collect();
+        if parts.len() >= 2 {
+            let name_or_id = parts[0];
+            let name = parts[1];
+            let image = if parts.len() >= 3 { parts[2] } else { "" };
+
+            // Match by name prefix OR by image name
+            if name.starts_with("vpn-proxy-") || image.contains("rust-vpn-proxy") {
+                containers_to_kill.push(name_or_id.to_string());
+            }
+        }
+    }
+
+    if containers_to_kill.is_empty() {
+        crate::util::logger::log_info("No old rust-vpn-proxy containers found").await;
+        return Ok(());
+    }
+
+    // Step 2: Kill and remove them all at once
+    let status = Command::new("docker")
+        .arg("rm")
+        .arg("-f")
+        .args(&containers_to_kill)
+        .status()
+        .await?;
+
+    if status.success() {
+        crate::util::logger::log_info(&format!(
+            "Successfully removed {} old rust-vpn-proxy container(s)",
+            containers_to_kill.len()
+        ))
+        .await;
+    } else {
+        crate::util::logger::log_warn("Some containers may still remain (non-critical)").await;
+    }
+
+    Ok(())
+}
--- a/src/scraper/hard_reset.rs
+++ b/src/scraper/hard_reset.rs
@@ -0,0 +1,377 @@
+// src/scraper/hard_reset.rs - FIXED: Proper cleanup without Arc leaks
+use std::sync::{Arc, atomic::{AtomicBool, AtomicUsize, Ordering}};
+
+use crate::{ChromeDriverPool, Config, logger, scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers}, util::directories::DataPaths};
+
+/// Simple error counter for triggering hard resets
+pub struct HardResetController {
+    consecutive_errors: AtomicUsize,
+}
+
+impl HardResetController {
+    pub fn new() -> Self {
+        Self {
+            consecutive_errors: AtomicUsize::new(0),
+        }
+    }
+    
+    /// Record success - resets counter
+    pub fn record_success(&self) {
+        self.consecutive_errors.store(0, Ordering::SeqCst);
+    }
+    
+    /// Record error - returns new count
+    pub fn record_error(&self) -> usize {
+        self.consecutive_errors.fetch_add(1, Ordering::SeqCst) + 1
+    }
+    
+    /// Reset counter
+    pub fn reset(&self) {
+        self.consecutive_errors.store(0, Ordering::SeqCst);
+    }
+    
+    /// Get current count
+    pub fn get_count(&self) -> usize {
+        self.consecutive_errors.load(Ordering::SeqCst)
+    }
+}
+
+/// ✅ FIXED: Perform hard reset without Arc reference leaks
+/// 
+/// Key improvements:
+/// 1. Don't clone old_pool - just shutdown through mutex guard
+/// 2. Verify all processes killed before creating new pool
+/// 3. Explicitly shutdown temp pools with error handling
+/// 4. Add process counting/verification
+pub async fn perform_hard_reset(
+    pool_mutex: &Arc<tokio::sync::Mutex<Arc<ChromeDriverPool>>>,
+    config: &Config,
+    paths: &DataPaths,
+    monitoring: &Option<crate::monitoring::MonitoringHandle>,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<()> {
+    //let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
+    logger::log_error("🔴 STARTING HARD RESET SEQUENCE").await;
+    
+    // Check if shutdown was requested
+    if shutdown_flag.load(Ordering::SeqCst) {
+        logger::log_warn("Shutdown requested during hard reset, aborting").await;
+        return Ok(());
+    }
+    
+    // ===== STEP 1: ACQUIRE POOL LOCK (NO CLONING!) =====
+    logger::log_info("  [1/12] Acquiring pool lock...").await;
+    let mut pool_guard = pool_mutex.lock().await;
+    
+    // Get instance count before shutdown for verification
+    let old_instance_count = pool_guard.get_number_of_instances();
+    logger::log_info(&format!("  [1/12] Pool has {} instances", old_instance_count)).await;
+    
+    // ===== STEP 2: SHUTDOWN OLD POOL (NO ARC CLONE!) =====
+    logger::log_info("  [2/12] Shutting down old pool (NO Arc clone)...").await;
+    
+    // Shutdown through the Arc without cloning it
+    // This is safe because we hold the mutex lock
+    match pool_guard.shutdown().await {
+        Ok(()) => {
+            logger::log_info("  [2/12] ✓ Pool shutdown complete").await;
+        }
+        Err(e) => {
+            logger::log_error(&format!("  [2/12] ✗ Pool shutdown error: {}", e)).await;
+            // Continue anyway - we'll force-kill processes
+        }
+    }
+    
+    // ===== STEP 3: FORCE-KILL ANY REMAINING CHROME PROCESSES =====
+    logger::log_info("  [3/12] Force-killing any remaining Chrome/ChromeDriver processes...").await;
+    
+    #[cfg(target_os = "windows")]
+    {
+        // Kill all chrome.exe processes
+        let chrome_result = tokio::process::Command::new("taskkill")
+            .args(["/F", "/IM", "chrome.exe"])
+            .output()
+            .await;
+        
+        match chrome_result {
+            Ok(output) if output.status.success() => {
+                logger::log_info("  [3/12] ✓ Chrome processes killed").await;
+            }
+            _ => {
+                logger::log_info("  [3/12] ⊘ No Chrome processes found").await;
+            }
+        }
+        
+        // Kill all chromedriver.exe processes
+        let chromedriver_result = tokio::process::Command::new("taskkill")
+            .args(["/F", "/IM", "chromedriver.exe"])
+            .output()
+            .await;
+        
+        match chromedriver_result {
+            Ok(output) if output.status.success() => {
+                logger::log_info("  [3/12] ✓ ChromeDriver processes killed").await;
+            }
+            _ => {
+                logger::log_info("  [3/12] ⊘ No ChromeDriver processes found").await;
+            }
+        }
+    }
+    
+    #[cfg(not(target_os = "windows"))]
+    {
+        // Kill all chrome processes
+        let _ = tokio::process::Command::new("pkill")
+            .arg("chrome")
+            .output()
+            .await;
+        
+        let _ = tokio::process::Command::new("pkill")
+            .arg("chromedriver")
+            .output()
+            .await;
+        
+        logger::log_info("  [3/12] ✓ Force-killed Chrome/ChromeDriver").await;
+    }
+    
+    // ===== STEP 4: SHUTDOWN PROXIES =====
+    logger::log_info("  [4/12] Shutting down proxy containers...").await;
+    cleanup_all_proxy_containers().await.ok();
+    
+    // ===== STEP 5: WAIT FOR CLEANUP =====
+    logger::log_info("  [5/12] Waiting 30 seconds for cleanup...").await;
+    tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
+    
+    // ===== STEP 6: VERIFY CLEANUP =====
+    logger::log_info("  [6/12] Verifying process cleanup...").await;
+    
+    #[cfg(target_os = "windows")]
+    {
+        let check_chrome = tokio::process::Command::new("tasklist")
+            .args(["/FI", "IMAGENAME eq chrome.exe"])
+            .output()
+            .await;
+        
+        if let Ok(output) = check_chrome {
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
+            
+            if chrome_count > 0 {
+                logger::log_warn(&format!("  [6/12] ⚠️  {} Chrome processes still running!", chrome_count)).await;
+            } else {
+                logger::log_info("  [6/12] ✓ No Chrome processes running").await;
+            }
+        }
+    }
+    
+    // Check shutdown again
+    if shutdown_flag.load(Ordering::SeqCst) {
+        logger::log_warn("Shutdown requested during cleanup, aborting reset").await;
+        return Ok(());
+    }
+    
+    // ===== STEP 7: RECREATE PROXY POOL =====
+    logger::log_info("  [7/12] Recreating proxy pool...").await;
+    let new_proxy_pool = if config.enable_vpn_rotation {
+        match recreate_proxy_pool_with_fresh_credentials(config, paths, monitoring, shutdown_flag).await {
+            Ok(pool) => {
+                logger::log_info(&format!(
+                    "  [7/12] ✓ Proxy pool created with {} proxies",
+                    pool.num_proxies()
+                )).await;
+                Some(pool)
+            }
+            Err(e) => {
+                logger::log_warn(&format!(
+                    "  [7/12] ⚠️  Proxy creation failed: {}. Continuing without proxies.",
+                    e
+                )).await;
+                None
+            }
+        }
+    } else {
+        logger::log_info("  [7/12] ⊘ VPN rotation disabled, skipping proxy pool").await;
+        None
+    };
+    
+    // ===== STEP 8: RECREATE CHROMEDRIVER POOL =====
+    logger::log_info("  [8/12] Recreating ChromeDriver pool...").await;
+    let new_pool = Arc::new(
+        ChromeDriverPool::new_with_proxy_and_task_limit(
+            new_proxy_pool,
+            config,
+            monitoring.clone(),
+        ).await?
+    );
+    
+    logger::log_info(&format!(
+        "  [8/12] ✓ ChromeDriver pool created with {} instances", 
+        new_pool.get_number_of_instances()
+    )).await;
+    
+    // ===== STEP 9: RESET ERROR COUNTER =====
+    logger::log_info("  [9/12] Resetting error counter...").await;
+    new_pool.get_reset_controller().reset();
+    logger::log_info("  [9/12] ✓ Error counter cleared").await;
+    
+    // ===== STEP 10: REPLACE POOL ATOMICALLY =====
+    logger::log_info("  [10/12] Activating new pool...").await;
+    *pool_guard = new_pool;
+    drop(pool_guard);
+    logger::log_info("  [10/12] ✓ New pool activated").await;
+    
+    // ===== STEP 11: EMIT MONITORING EVENT =====
+    logger::log_info("  [11/12] Updating monitoring...").await;
+    if let Some(mon) = monitoring {
+        mon.emit(crate::monitoring::MonitoringEvent::PoolInitialized {
+            pool_size: config.max_parallel_instances,
+            with_proxy: config.enable_vpn_rotation,
+            with_rotation: config.max_tasks_per_instance > 0,
+        });
+    }
+    
+    // ===== STEP 12: FINAL VERIFICATION =====
+    logger::log_info("  [12/12] Final verification...").await;
+    
+    #[cfg(target_os = "windows")]
+    {
+        let check_chrome = tokio::process::Command::new("tasklist")
+            .args(["/FI", "IMAGENAME eq chrome.exe"])
+            .output()
+            .await;
+        
+        if let Ok(output) = check_chrome {
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
+            logger::log_info(&format!("  [12/12] Chrome processes: {}", chrome_count)).await;
+        }
+        
+        let check_chromedriver = tokio::process::Command::new("tasklist")
+            .args(["/FI", "IMAGENAME eq chromedriver.exe"])
+            .output()
+            .await;
+        
+        if let Ok(output) = check_chromedriver {
+            let stdout = String::from_utf8_lossy(&output.stdout);
+            let chromedriver_count = stdout.lines().filter(|line| line.contains("chromedriver.exe")).count();
+            logger::log_info(&format!("  [12/12] ChromeDriver processes: {}", chromedriver_count)).await;
+        }
+    }
+    
+    logger::log_info("✅ HARD RESET COMPLETE").await;
+    
+    Ok(())
+}
+
+/// ✅ FIXED: Recreate proxy pool with temp pool that's properly shut down
+async fn recreate_proxy_pool_with_fresh_credentials(
+    config: &Config,
+    paths: &DataPaths,
+    monitoring: &Option<crate::monitoring::MonitoringHandle>,
+    shutdown_flag: &Arc<AtomicBool>,
+) -> anyhow::Result<Arc<DockerVpnProxyPool>> {
+    
+    let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
+    
+    // Check shutdown
+    if shutdown_flag.load(Ordering::SeqCst) {
+        return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
+    }
+    
+    logger::log_info("    [7.1] Creating temporary ChromeDriver pool for credential fetch...").await;
+    
+    // Create temporary pool WITHOUT proxy
+    let temp_pool = Arc::new(
+        ChromeDriverPool::new_with_proxy_and_task_limit(
+            None,  // No proxy for temp pool
+            config,
+            monitoring.clone(),
+        ).await?
+    );
+    
+    logger::log_info("    [7.2] Fetching fresh VPNBook credentials...").await;
+    
+    // Fetch fresh VPNBook credentials
+    let (username, password, _files) = crate::util::opnv::fetch_vpnbook_configs(
+        &temp_pool, 
+        paths.cache_dir()
+    ).await?;
+    
+    logger::log_info(&format!("    [7.3] Got credentials → User: {}", username)).await;
+    
+    // ✅ FIXED: Properly shutdown temp pool with error handling
+    logger::log_info("    [7.4] Shutting down temporary pool...").await;
+    match temp_pool.shutdown().await {
+        Ok(()) => {
+            logger::log_info("    [7.4] ✓ Temp pool shut down successfully").await;
+        }
+        Err(e) => {
+            logger::log_error(&format!("    [7.4] ✗ Temp pool shutdown error: {}", e)).await;
+            // Force-kill processes as backup
+            #[cfg(target_os = "windows")]
+            {
+                let _ = tokio::process::Command::new("taskkill")
+                    .args(["/F", "/IM", "chrome.exe"])
+                    .output()
+                    .await;
+                let _ = tokio::process::Command::new("taskkill")
+                    .args(["/F", "/IM", "chromedriver.exe"])
+                    .output()
+                    .await;
+            }
+        }
+    }
+    
+    // Wait a moment for temp pool cleanup
+    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
+    
+    // Check shutdown again
+    if shutdown_flag.load(Ordering::SeqCst) {
+        return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
+    }
+    
+    // Check if we have VPN server configs
+    let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
+        .filter(|e| e.as_ref().unwrap().path().is_dir())
+        .count();
+    
+    if server_count == 0 {
+        return Err(anyhow::anyhow!("No VPN servers found after credential fetch"));
+    }
+    
+    logger::log_info(&format!(
+        "    [7.5] Found {} VPN servers → Creating proxy pool with {} instances per server...",
+        server_count,
+        number_proxy_instances
+    )).await;
+    
+    // Create new proxy pool
+    let proxy_pool = Arc::new(
+        DockerVpnProxyPool::new(
+            paths.cache_openvpn_dir(),
+            username,
+            password,
+            number_proxy_instances,
+        ).await?
+    );
+    
+    logger::log_info(&format!(
+        "    [7.6] ✓ Proxy pool ready with {} total proxies",
+        proxy_pool.num_proxies()
+    )).await;
+    
+    // Emit proxy connected events for monitoring
+    if let Some(mon) = monitoring {
+        for i in 0..proxy_pool.num_proxies() {
+            if let Some(proxy_info) = proxy_pool.get_proxy_info(i) {
+                mon.emit(crate::monitoring::MonitoringEvent::ProxyConnected {
+                    container_name: proxy_info.container_name.clone(),
+                    ip_address: proxy_info.ip_address.clone(),
+                    port: proxy_info.port,
+                });
+            }
+        }
+    }
+    
+    Ok(proxy_pool)
+}
--- a/src/scraper/helpers.rs
+++ b/src/scraper/helpers.rs
@@ -0,0 +1,14 @@
+use rand::rngs::StdRng;
+use rand::prelude::{Rng, SeedableRng, IndexedRandom};
+
+/// Send-safe random range
+pub fn random_range(min: u64, max: u64) -> u64 {
+    let mut rng = StdRng::from_rng(&mut rand::rng());
+    rng.random_range(min..max)
+}
+
+/// Send-safe random choice
+pub fn choose_random<T: Clone>(items: &[T]) -> T {
+    let mut rng = StdRng::from_rng(&mut rand::rng());
+    items.choose(&mut rng).unwrap().clone()
+}
--- a/src/scraper/mod.rs
+++ b/src/scraper/mod.rs
@@ -1 +1,6 @@
 pub mod webdriver;
+pub mod docker_vpn_proxy;
+pub mod helpers;
+pub mod hard_reset;
+pub mod yahoo;
+pub mod openfigi;
--- a/src/scraper/openfigi.rs
+++ b/src/scraper/openfigi.rs
@@ -0,0 +1,367 @@
+// src/scraper/openfigi.rs - STREAMING VERSION
+// Key changes: Never load entire GLEIF CSV or FIGI maps into memory
+
+use crate::util::directories::DataPaths;
+use crate::util::logger;
+use crate::corporate::{types::*};
+use reqwest::Client as HttpClient;
+use reqwest::header::{HeaderMap, HeaderValue};
+use serde_json::{json, Value};
+use std::path::Path;
+use tokio::time::{sleep, Duration};
+use tokio::fs as tokio_fs;
+use anyhow::{Context, anyhow};
+
+#[derive(Clone)]
+pub struct OpenFigiClient {
+    pub client: HttpClient,
+    pub has_key: bool,
+}
+
+impl OpenFigiClient {
+    pub async fn new() -> anyhow::Result<Self> {
+        let api_key = dotenvy::var("OPENFIGI_API_KEY").ok();
+        let has_key = api_key.is_some();
+
+        let mut builder = HttpClient::builder()
+            .user_agent("Mozilla/5.0 (compatible; OpenFIGI-Rust/1.0)")
+            .timeout(Duration::from_secs(30));
+
+        if let Some(key) = &api_key {
+            let mut headers = HeaderMap::new();
+            headers.insert("X-OPENFIGI-APIKEY", HeaderValue::from_str(key)?);
+            builder = builder.default_headers(headers);
+        }
+
+        let client = builder.build().context("Failed to build HTTP client")?;
+        logger::log_info(&format!("OpenFIGI client: {}", 
+            if has_key { "with API key" } else { "no key" })).await;
+
+        Ok(Self { client, has_key })
+    }
+
+    pub async fn map_isins_to_figi_infos(&self, isins: &[String]) -> anyhow::Result<Vec<FigiData>> {
+        if isins.is_empty() {
+            return Ok(vec![]);
+        }
+
+        let mut all_figi_infos = Vec::new();
+        let chunk_size = if self.has_key { 100 } else { 5 };
+        let inter_sleep = if self.has_key {
+            Duration::from_millis(240)
+        } else {
+            Duration::from_millis(2400)
+        };
+
+        for chunk in isins.chunks(chunk_size) {
+            let jobs: Vec<Value> = chunk.iter()
+                .map(|isin| json!({
+                    "idType": "ID_ISIN",
+                    "idValue": isin,
+                }))
+                .collect();
+
+            let mut retry_count = 0;
+            let max_retries = 5;
+            let mut backoff_ms = 1000u64;
+            
+            loop {
+                let resp_result = self.client
+                    .post("https://api.openfigi.com/v3/mapping")
+                    .header("Content-Type", "application/json")
+                    .json(&jobs)
+                    .send()
+                    .await;
+
+                let resp = match resp_result {
+                    Ok(r) => r,
+                    Err(e) => {
+                        retry_count += 1;
+                        if retry_count >= max_retries {
+                            let err_msg = format!("Failed to send mapping request after {} retries: {}", max_retries, e);
+                            logger::log_error(&err_msg).await;
+                            return Err(anyhow!(err_msg));
+                        }
+                        let warn_msg = format!("Transient error sending mapping request (attempt {}/{}): {}", retry_count, max_retries, e);
+                        logger::log_warn(&warn_msg).await;
+                        let retry_msg = format!("  Retrying in {}ms...", backoff_ms);
+                        logger::log_info(&retry_msg).await;
+                        sleep(Duration::from_millis(backoff_ms)).await;
+                        backoff_ms = (backoff_ms * 2).min(60000); // Cap at 60s
+                        continue;
+                    }
+                };
+
+                let status = resp.status();
+                let headers = resp.headers().clone();
+                let body = resp.text().await?;
+
+                if status == 429 {
+                    let reset_sec = headers
+                        .get("ratelimit-reset")
+                        .and_then(|v| v.to_str().ok())
+                        .and_then(|s| s.parse::<u64>().ok())
+                        .unwrap_or(10);
+                    sleep(Duration::from_secs(reset_sec.max(10))).await;
+                    continue;
+                } else if !status.is_success() {
+                    if status.is_server_error() && retry_count < max_retries {
+                        retry_count += 1;
+                        sleep(Duration::from_millis(backoff_ms)).await;
+                        backoff_ms = (backoff_ms * 2).min(60000);
+                        continue;
+                    }
+                    return Err(anyhow!("OpenFIGI error {}: {}", status, body));
+                }
+
+                let results: Vec<Value> = serde_json::from_str(&body)?;
+
+                for (isin, result) in chunk.iter().zip(results) {
+                    if let Some(data) = result["data"].as_array() {
+                        for item in data {
+                            if let Some(figi) = item["figi"].as_str() {
+                                all_figi_infos.push(FigiData {
+                                    isin: isin.clone(),
+                                    figi: figi.to_string(),
+                                    name: item["name"].as_str().unwrap_or("").to_string(),
+                                    ticker: item["ticker"].as_str().unwrap_or("").to_string(),
+                                    exch_code: item["exchCode"].as_str().unwrap_or("").to_string(),
+                                    composite_figi: item["compositeFIGI"].as_str().unwrap_or("").to_string(),
+                                    security_type: item["securityType"].as_str().unwrap_or("").to_string(),
+                                    market_sector: item["marketSector"].as_str().unwrap_or("").to_string(),
+                                    share_class_figi: item["shareClassFIGI"].as_str().unwrap_or("").to_string(),
+                                    security_type2: item["securityType2"].as_str().unwrap_or("").to_string(),
+                                    security_description: item["securityDescription"].as_str().unwrap_or("").to_string(),
+                                });
+                            }
+                        }
+                    }
+                }
+                
+                break;
+            }
+
+            sleep(inter_sleep).await;
+        }
+
+        Ok(all_figi_infos)
+    }
+}
+
+/// Fetches and caches the list of valid securityType values.
+///
+/// # Arguments
+/// * `client` - The OpenFIGI client instance.
+/// * `cache_dir` - Directory to save the cached JSON file.
+///
+/// # Returns
+/// Ok(()) on success.
+///
+/// # Errors
+/// Returns an error if the API request fails or file I/O fails.
+async fn get_figi_security_type(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
+    let cache_file = cache_dir.join("securityType.json");
+    
+    if should_use_cache(&cache_file).await? {
+        logger::log_info("  Using cached securityType values").await;
+        return Ok(());
+    }
+
+    logger::log_info("  Fetching securityType values from OpenFIGI API...").await;
+    
+    let resp = client.client
+        .get("https://api.openfigi.com/v3/mapping/values/securityType")
+        .send()
+        .await
+        .context("Failed to fetch securityType values")?;
+    
+    handle_rate_limit(&resp).await?;
+    
+    let values: Value = resp.json().await
+        .context("Failed to parse securityType response")?;
+    
+    let json_str = serde_json::to_string_pretty(&values)?;
+    tokio_fs::write(&cache_file, json_str).await
+        .context("Failed to write securityType cache")?;
+    
+    logger::log_info("  ✓ Cached securityType values").await;
+    
+    sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
+    
+    Ok(())
+}
+
+
+/// Loads all OpenFIGI mapping value lists (marketSecDes, micCode, securityType).
+///
+/// This function fetches the available values for each mapping parameter from the OpenFIGI API
+/// and caches them as JSON files in `data/openfigi/`. If the files already exist and are recent
+/// (less than 30 days old), they are reused instead of re-fetching.
+///
+/// # Returns
+/// Ok(()) on success.
+///
+/// # Errors
+/// Returns an error if API requests fail, JSON parsing fails, or file I/O fails.
+pub async fn load_figi_type_lists(paths: &DataPaths) -> anyhow::Result<()> {
+    logger::log_info("Loading OpenFIGI mapping value lists...").await;
+
+    let cache_openfigi_dir = paths.cache_openfigi_dir();
+    tokio_fs::create_dir_all(cache_openfigi_dir).await
+        .context("Failed to create data/openfigi directory")?;
+    
+    let client = OpenFigiClient::new().await?;
+    
+    // Fetch each type list
+    get_figi_market_sec_des(&client, cache_openfigi_dir).await?;
+    get_figi_mic_code(&client, cache_openfigi_dir).await?;
+    get_figi_security_type(&client, cache_openfigi_dir).await?;
+    
+    logger::log_info("OpenFIGI mapping value lists loaded successfully").await;
+
+    Ok(())
+}
+
+/// Fetches and caches the list of valid marketSecDes values.
+///
+/// # Arguments
+/// * `client` - The OpenFIGI client instance.
+/// * `cache_dir` - Directory to save the cached JSON file.
+///
+/// # Returns
+/// Ok(()) on success.
+///
+/// # Errors
+/// Returns an error if the API request fails or file I/O fails.
+async fn get_figi_market_sec_des(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
+    let cache_file = cache_dir.join("marketSecDes.json");
+    
+    // Check if cache exists and is recent (< 30 days old)
+    if should_use_cache(&cache_file).await? {
+        logger::log_info("  Using cached marketSecDes values").await;
+        return Ok(());
+    }
+    
+    logger::log_info("  Fetching marketSecDes values from OpenFIGI API...").await;
+    
+    let resp = client.client
+        .get("https://api.openfigi.com/v3/mapping/values/marketSecDes")
+        .send()
+        .await
+        .context("Failed to fetch marketSecDes values")?;
+    
+    handle_rate_limit(&resp).await?;
+    
+    let values: Value = resp.json().await
+        .context("Failed to parse marketSecDes response")?;
+    
+    // Save to cache
+    let json_str = serde_json::to_string_pretty(&values)?;
+    tokio_fs::write(&cache_file, json_str).await
+        .context("Failed to write marketSecDes cache")?;
+    
+    logger::log_info("  ✓ Cached marketSecDes values").await;
+    
+    // Respect rate limits
+    sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
+    
+    Ok(())
+}
+
+/// Fetches and caches the list of valid micCode values.
+///
+/// # Arguments
+/// * `client` - The OpenFIGI client instance.
+/// * `cache_dir` - Directory to save the cached JSON file.
+///
+/// # Returns
+/// Ok(()) on success.
+///
+/// # Errors
+/// Returns an error if the API request fails or file I/O fails.
+async fn get_figi_mic_code(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
+    let cache_file = cache_dir.join("micCode.json");
+    
+    if should_use_cache(&cache_file).await? {
+        logger::log_info("  Using cached micCode values").await;
+        return Ok(());
+    }
+
+    logger::log_info("  Fetching micCode values from OpenFIGI API...").await;
+    
+    let resp = client.client
+        .get("https://api.openfigi.com/v3/mapping/values/micCode")
+        .send()
+        .await
+        .context("Failed to fetch micCode values")?;
+    
+    handle_rate_limit(&resp).await?;
+    
+    let values: Value = resp.json().await
+        .context("Failed to parse micCode response")?;
+    
+    let json_str = serde_json::to_string_pretty(&values)?;
+    tokio_fs::write(&cache_file, json_str).await
+        .context("Failed to write micCode cache")?;
+    
+    logger::log_info("  ✓ Cached micCode values").await;
+    
+    sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
+    
+    Ok(())
+}
+
+/// Handles rate limit responses from the OpenFIGI API.
+///
+/// If a 429 status is received, this function sleeps for the duration specified
+/// in the `ratelimit-reset` header (or 10 seconds by default).
+///
+/// # Arguments
+/// * `resp` - The HTTP response to check.
+///
+/// # Returns
+/// Ok(()) if no rate limit, or after waiting for the reset period.
+///
+/// # Errors
+/// Returns an error if the response status indicates a non-rate-limit error.
+async fn handle_rate_limit(resp: &reqwest::Response) -> anyhow::Result<()> {
+    let status = resp.status();
+    
+    if status == 429 {
+        let headers = resp.headers();
+        let reset_sec = headers
+            .get("ratelimit-reset")
+            .and_then(|v| v.to_str().ok())
+            .and_then(|s| s.parse::<u64>().ok())
+            .unwrap_or(10);
+        
+        logger::log_info(&format!("  Rate limited—waiting {}s", reset_sec)).await;
+        sleep(std::time::Duration::from_secs(reset_sec.max(10))).await;
+        
+        return Err(anyhow!("Rate limited, please retry"));
+    } else if status.is_client_error() || status.is_server_error() {
+        return Err(anyhow!("OpenFIGI API error: {}", status));
+    }
+    
+    Ok(())
+}
+
+/// Checks if a cache file exists and is less than 30 days old.
+///
+/// # Arguments
+/// * `path` - Path to the cache file.
+///
+/// # Returns
+/// True if the cache should be used, false if it needs refreshing.
+async fn should_use_cache(path: &Path) -> anyhow::Result<bool> {
+    if !path.exists() {
+        return Ok(false);
+    }
+    
+    let metadata = tokio_fs::metadata(path).await?;
+    let modified = metadata.modified()?;
+    let age = modified.elapsed().unwrap_or(std::time::Duration::from_secs(u64::MAX));
+    
+    // Cache is valid for 30 days
+    Ok(age < std::time::Duration::from_secs(30 * 24 * 60 * 60))
+}
--- a/src/scraper/webdriver.rs
+++ b/src/scraper/webdriver.rs
--- a/src/scraper/yahoo.rs
+++ b/src/scraper/yahoo.rs
--- a/src/util/directories.rs
+++ b/src/util/directories.rs
@@ -1,26 +1,27 @@
 use std::path::{Path, PathBuf};
 use std::fs;

-use crate::util::opnv;
-
 /// Central configuration for all data paths
+#[derive(Clone)]
 pub struct DataPaths {
  base_dir: PathBuf,
  data_dir: PathBuf,
  cache_dir: PathBuf,
  logs_dir: PathBuf,
+  integrity_dir: PathBuf,
  // Cache data subdirectories
  cache_gleif_dir: PathBuf,
  cache_openfigi_dir: PathBuf,
  cache_gleif_openfigi_map_dir: PathBuf,
  cache_openvpn_dir: PathBuf,
+  // Figi Securities data subdirectories
+  figi_securities_dir: PathBuf,
  // Economic data subdirectories
  economic_events_dir: PathBuf,
  economic_changes_dir: PathBuf,
+  economic_currency_dir: PathBuf,
  // Corporate data subdirectories
-  corporate_events_dir: PathBuf,
-  corporate_changes_dir: PathBuf,
-  corporate_prices_dir: PathBuf,
+  corporate_dir: PathBuf,
 }

 impl DataPaths {
@@ -31,6 +32,7 @@ impl DataPaths {
    let data_dir = base_dir.join("data");
    let cache_dir = base_dir.join("cache");
    let logs_dir = base_dir.join("logs");
+    let integrity_dir = base_dir.join("integrity");
    
    // Cache subdirectories
    let cache_gleif_dir = cache_dir.join("gleif");
@@ -38,44 +40,47 @@ impl DataPaths {
    let cache_gleif_openfigi_map_dir = cache_dir.join("glei_openfigi");
    let cache_openvpn_dir = cache_dir.join("openvpn");

+    // Figi Securities subdirectories
+    let figi_securities_dir = data_dir.join("figi_securities");
+
    // Economic subdirectories
    let economic_events_dir = data_dir.join("economic").join("events");
    let economic_changes_dir = economic_events_dir.join("changes");
+    let economic_currency_dir = data_dir.join("economic").join("currency");
    
    // Corporate subdirectories
    let corporate_dir = data_dir.join("corporate");
-    let corporate_events_dir = corporate_dir.join("events");
-    let corporate_changes_dir = corporate_events_dir.join("changes");
-    let corporate_prices_dir = corporate_dir.join("prices");
    
    // Create all directories if they don't exist
    fs::create_dir_all(&data_dir)?;
    fs::create_dir_all(&cache_dir)?;
    fs::create_dir_all(&logs_dir)?;
+    fs::create_dir_all(&integrity_dir)?;
    fs::create_dir_all(&cache_gleif_dir)?;
    fs::create_dir_all(&cache_openfigi_dir)?;
    fs::create_dir_all(&cache_gleif_openfigi_map_dir)?;
    fs::create_dir_all(&cache_openvpn_dir)?;
+    fs::create_dir_all(&figi_securities_dir)?;
    fs::create_dir_all(&economic_events_dir)?;
    fs::create_dir_all(&economic_changes_dir)?;
-    fs::create_dir_all(&corporate_events_dir)?;
-    fs::create_dir_all(&corporate_changes_dir)?;
-    fs::create_dir_all(&corporate_prices_dir)?;
+    fs::create_dir_all(&economic_currency_dir)?;
+    fs::create_dir_all(&corporate_dir)?;
    
    Ok(Self {
      base_dir,
      data_dir,
      cache_dir,
      logs_dir,
+      integrity_dir,
      cache_gleif_dir,
      cache_openfigi_dir,
      cache_gleif_openfigi_map_dir,
      cache_openvpn_dir,
+      figi_securities_dir,
      economic_events_dir,
      economic_changes_dir,
-      corporate_events_dir,
-      corporate_changes_dir,
-      corporate_prices_dir,
+      economic_currency_dir,
+      corporate_dir,
    })
  }
  
@@ -90,6 +95,10 @@ impl DataPaths {
  pub fn cache_dir(&self) -> &Path {
    &self.cache_dir
  }
+
+  pub fn integrity_dir(&self) -> &Path {
+    &self.integrity_dir
+  }
  
  pub fn logs_dir(&self) -> &Path {
    &self.logs_dir
@@ -111,6 +120,10 @@ impl DataPaths {
    &self.cache_openvpn_dir
  }

+  pub fn figi_securities_dir(&self) -> &Path {
+    &self.figi_securities_dir
+  }
+
  /// Get the economic events directory
  pub fn economic_events_dir(&self) -> &Path {
    &self.economic_events_dir
@@ -120,20 +133,14 @@ impl DataPaths {
  pub fn economic_changes_dir(&self) -> &Path {
    &self.economic_changes_dir
  }
-  
+
+  pub fn economic_currency_dir(&self) -> &Path {
+    &self.economic_currency_dir
+  }
+    
  /// Get the corporate events directory
-  pub fn corporate_events_dir(&self) -> &Path {
-    &self.corporate_events_dir
-  }
-  
-  /// Get the corporate changes directory
-  pub fn corporate_changes_dir(&self) -> &Path {
-    &self.corporate_changes_dir
-  }
-  
-  /// Get the corporate prices directory
-  pub fn corporate_prices_dir(&self) -> &Path {
-    &self.corporate_prices_dir
+  pub fn corporate_dir(&self) -> &Path {
+    &self.corporate_dir
  }
  
  /// Get a specific file path within data directory
@@ -164,8 +171,5 @@ mod tests {
    assert!(paths.logs_dir().exists());
    assert!(paths.economic_events_dir().exists());
    assert!(paths.economic_changes_dir().exists());
-    assert!(paths.corporate_events_dir().exists());
-    assert!(paths.corporate_changes_dir().exists());
-    assert!(paths.corporate_prices_dir().exists());
  }
 }
--- a/src/util/integrity.rs
+++ b/src/util/integrity.rs
@@ -0,0 +1,911 @@
+// src/util/integrity.rs
+//! Content integrity and state lifecycle management module
+//! 
+//! Features:
+//! - File and directory hashing (SHA-256)
+//! - Hash validation against content references
+//! - State invalidation based on time or validation failures
+//! - 3-stage data lifecycle: cache → data → storage
+//! - Inline vs. external hash storage based on size
+//! - Centralized dependency configuration (Single Source of Truth)
+//! - Support for checkpoint groups and hierarchies
+//! - Automatic transitive dependency resolution
+//! - Cycle detection in dependency graph
+
+use anyhow::{Context, Result, bail};
+use chrono::{DateTime, Duration, Utc};
+use serde::{Deserialize, Serialize};
+use sha2::{Digest, Sha256};
+use std::collections::{HashMap, HashSet};
+use std::fs;
+use std::io::{BufReader, Read};
+use std::path::{Path, PathBuf};
+use tokio::fs as async_fs;
+use tokio::io::AsyncWriteExt;
+
+// ============================================================================
+// CONSTANTS
+// ============================================================================
+
+const INLINE_HASH_THRESHOLD: usize = 1024;
+const HASH_STORAGE_DIR: &str = ".integrity_hashes";
+const HASH_FILE_EXT: &str = ".hash";
+const DEFAULT_DEPENDENCY_CONFIG: &str = "checkpoint_dependencies.toml";
+
+// ============================================================================
+// DEPENDENCY CONFIGURATION
+// ============================================================================
+
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct DependencyConfig {
+    #[serde(default)]
+    pub checkpoints: HashMap<String, CheckpointConfig>,
+    #[serde(default)]
+    pub groups: HashMap<String, GroupConfig>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CheckpointConfig {
+    #[serde(default)]
+    pub description: String,
+    #[serde(default)]
+    pub depends_on: Vec<String>,
+    #[serde(default)]
+    pub group: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct GroupConfig {
+    #[serde(default)]
+    pub description: String,
+    pub members: Vec<String>,
+    #[serde(default)]
+    pub depends_on: Vec<String>,
+}
+
+impl DependencyConfig {
+    /// Load from file or return empty config
+    pub async fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
+        let path = path.as_ref();
+        if !path.exists() {
+            return Ok(Self::default());
+        }
+        
+        let content = async_fs::read_to_string(path).await
+            .with_context(|| format!("Failed to read: {}", path.display()))?;
+        
+        let config: Self = toml::from_str(&content)
+            .context("Failed to parse dependency config")?;
+        
+        config.validate()?;
+        Ok(config)
+    }
+    
+    /// Validate configuration (checks for cycles and invalid references)
+    pub fn validate(&self) -> Result<()> {
+        // Check for cycles
+        for checkpoint in self.checkpoints.keys() {
+            self.detect_cycle(checkpoint)?;
+        }
+        
+        // Validate group memberships
+        for (group_name, group) in &self.groups {
+            for member in &group.members {
+                if !self.checkpoints.contains_key(member) {
+                    bail!("Group '{}' references unknown checkpoint: {}", group_name, member);
+                }
+            }
+        }
+        
+        // Validate checkpoint group declarations
+        for (checkpoint_name, checkpoint) in &self.checkpoints {
+            if let Some(group_name) = &checkpoint.group {
+                let group = self.groups.get(group_name)
+                    .ok_or_else(|| anyhow::anyhow!("Checkpoint '{}' references unknown group: {}", checkpoint_name, group_name))?;
+                
+                if !group.members.contains(checkpoint_name) {
+                    bail!("Checkpoint '{}' claims group '{}' but group doesn't list it", 
+                          checkpoint_name, group_name);
+                }
+            }
+        }
+        
+        Ok(())
+    }
+    
+    /// Detect cycles using DFS
+    fn detect_cycle(&self, start: &str) -> Result<()> {
+        let mut visited = HashSet::new();
+        let mut stack = HashSet::new();
+        self.dfs_cycle_check(start, &mut visited, &mut stack)
+    }
+    
+    fn dfs_cycle_check(&self, node: &str, visited: &mut HashSet<String>, stack: &mut HashSet<String>) -> Result<()> {
+        if stack.contains(node) {
+            bail!("Cycle detected at checkpoint: {}", node);
+        }
+        if visited.contains(node) {
+            return Ok(());
+        }
+        
+        visited.insert(node.to_string());
+        stack.insert(node.to_string());
+        
+        if let Some(config) = self.checkpoints.get(node) {
+            for dep in &config.depends_on {
+                self.dfs_cycle_check(dep, visited, stack)?;
+            }
+        }
+        
+        stack.remove(node);
+        Ok(())
+    }
+    
+    /// Get all dependencies (including transitive and group dependencies)
+    pub fn get_all_dependencies(&self, checkpoint: &str) -> Result<Vec<String>> {
+        let mut deps = Vec::new();
+        let mut visited = HashSet::new();
+        self.collect_deps(checkpoint, &mut deps, &mut visited)?;
+        
+        // Remove duplicates while preserving order
+        let mut seen = HashSet::new();
+        deps.retain(|d| seen.insert(d.clone()));
+        
+        Ok(deps)
+    }
+    
+    fn collect_deps(&self, node: &str, deps: &mut Vec<String>, visited: &mut HashSet<String>) -> Result<()> {
+        if visited.contains(node) {
+            return Ok(());
+        }
+        visited.insert(node.to_string());
+        
+        let config = self.checkpoints.get(node)
+            .ok_or_else(|| anyhow::anyhow!("Unknown checkpoint: {}", node))?;
+        
+        // Add group dependencies first
+        if let Some(group_name) = &config.group {
+            if let Some(group) = self.groups.get(group_name) {
+                for dep in &group.depends_on {
+                    if !visited.contains(dep) {
+                        deps.push(dep.clone());
+                        self.collect_deps(dep, deps, visited)?;
+                    }
+                }
+            }
+        }
+        
+        // Add direct dependencies
+        for dep in &config.depends_on {
+            if !visited.contains(dep) {
+                deps.push(dep.clone());
+                self.collect_deps(dep, deps, visited)?;
+            }
+        }
+        
+        Ok(())
+    }
+    
+    /// Generate DOT format for visualization
+    pub fn to_dot(&self) -> String {
+        let mut dot = String::from("digraph Dependencies {\n  rankdir=LR;\n  node [shape=box];\n\n");
+        
+        // Nodes
+        for (name, config) in &self.checkpoints {
+            let label = if config.description.is_empty() {
+                name.clone()
+            } else {
+                format!("{}\\n{}", name, config.description)
+            };
+            dot.push_str(&format!("  \"{}\" [label=\"{}\"];\n", name, label));
+        }
+        
+        // Edges
+        dot.push_str("\n");
+        for (name, config) in &self.checkpoints {
+            // Group dependencies
+            if let Some(group_name) = &config.group {
+                if let Some(group) = self.groups.get(group_name) {
+                    for dep in &group.depends_on {
+                        dot.push_str(&format!("  \"{}\" -> \"{}\" [label=\"via {}\"];\n", name, dep, group_name));
+                    }
+                }
+            }
+            
+            // Direct dependencies
+            for dep in &config.depends_on {
+                dot.push_str(&format!("  \"{}\" -> \"{}\";\n", name, dep));
+            }
+        }
+        
+        dot.push_str("}\n");
+        dot
+    }
+}
+
+// ============================================================================
+// DATA STRUCTURES
+// ============================================================================
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ContentReference {
+    File { path: PathBuf },
+    Directory { 
+        path: PathBuf,
+        include_patterns: Option<Vec<String>>,
+        exclude_patterns: Option<Vec<String>>,
+    },
+    Composite { references: Vec<ContentReference> },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(tag = "storage", rename_all = "lowercase")]
+pub enum HashStorage {
+    Inline { hash: String },
+    External { hash_file: PathBuf },
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
+#[serde(rename_all = "lowercase")]
+pub enum DataStage {
+    Cache,
+    Data,
+    Storage,
+}
+
+impl DataStage {
+    pub fn default_ttl(&self) -> Duration {
+        match self {
+            Self::Cache => Duration::hours(24),
+            Self::Data => Duration::days(7),
+            Self::Storage => Duration::days(365),
+        }
+    }
+    
+    pub fn revalidation_interval(&self) -> Duration {
+        match self {
+            Self::Cache => Duration::hours(6),
+            Self::Data => Duration::days(1),
+            Self::Storage => Duration::days(30),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StateEntry {
+    pub step_name: String,
+    pub completed: bool,
+    pub completed_at: Option<DateTime<Utc>>,
+    pub content_reference: Option<ContentReference>,
+    pub content_hash: Option<HashStorage>,
+    pub data_stage: Option<DataStage>,
+    pub ttl_override: Option<Duration>,
+    pub last_validated_at: Option<DateTime<Utc>>,
+    pub validation_status: ValidationStatus,
+    #[serde(default)]
+    pub dependencies: Vec<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ValidationStatus {
+    Unknown,
+    Valid,
+    Invalid { reason: String },
+    Expired,
+    DependencyFailed { failed_dependency: String },
+}
+
+// ============================================================================
+// HASH COMPUTATION
+// ============================================================================
+
+/// Hash a single file using SHA-256
+pub fn hash_file<P: AsRef<Path>>(path: P) -> Result<String> {
+    let path = path.as_ref();
+    let file = fs::File::open(path)
+        .with_context(|| format!("Failed to open: {}", path.display()))?;
+    
+    let mut reader = BufReader::new(file);
+    let mut hasher = Sha256::new();
+    let mut buffer = [0u8; 8192];
+    
+    loop {
+        let bytes_read = reader.read(&mut buffer)?;
+        if bytes_read == 0 { break; }
+        hasher.update(&buffer[..bytes_read]);
+    }
+    
+    Ok(format!("{:x}", hasher.finalize()))
+}
+
+/// Hash a directory recursively
+pub fn hash_directory<P: AsRef<Path>>(
+    path: P,
+    include_patterns: Option<&[String]>,
+    exclude_patterns: Option<&[String]>,
+) -> Result<String> {
+    let path = path.as_ref();
+    if !path.is_dir() {
+        bail!("Not a directory: {}", path.display());
+    }
+    
+    let mut files = Vec::new();
+    collect_files_recursive(path, &mut files, include_patterns, exclude_patterns)?;
+    files.sort();
+    
+    if files.is_empty() {
+        return Ok(String::from("d41d8cd98f00b204e9800998ecf8427e")); // Empty hash
+    }
+    
+    let mut hasher = Sha256::new();
+    for file_path in files {
+        let rel_path = file_path.strip_prefix(path)
+            .unwrap_or(&file_path)
+            .to_string_lossy();
+        hasher.update(rel_path.as_bytes());
+        hasher.update(hash_file(&file_path)?.as_bytes());
+    }
+    
+    Ok(format!("{:x}", hasher.finalize()))
+}
+
+fn collect_files_recursive(
+    dir: &Path,
+    files: &mut Vec<PathBuf>,
+    include: Option<&[String]>,
+    exclude: Option<&[String]>,
+) -> Result<()> {
+    for entry in fs::read_dir(dir)? {
+        let path = entry?.path();
+        
+        // Skip hidden files
+        if path.file_name()
+            .and_then(|n| n.to_str())
+            .map_or(false, |n| n.starts_with('.')) {
+            continue;
+        }
+        
+        if path.is_dir() {
+            collect_files_recursive(&path, files, include, exclude)?;
+        } else if path.is_file() && should_include(&path, include, exclude) {
+            files.push(path);
+        }
+    }
+    Ok(())
+}
+
+fn should_include(path: &Path, include: Option<&[String]>, exclude: Option<&[String]>) -> bool {
+    let path_str = path.to_string_lossy();
+    
+    // Check exclusions first
+    if let Some(patterns) = exclude {
+        if patterns.iter().any(|p| glob_match(&path_str, p)) {
+            return false;
+        }
+    }
+    
+    // Check inclusions
+    match include {
+        Some(patterns) => patterns.iter().any(|p| glob_match(&path_str, p)),
+        None => true,
+    }
+}
+
+fn glob_match(path: &str, pattern: &str) -> bool {
+    if pattern.contains('*') {
+        let parts: Vec<&str> = pattern.split('*').collect();
+        if parts.len() == 2 {
+            path.contains(parts[0]) && path.ends_with(parts[1])
+        } else {
+            false
+        }
+    } else {
+        path.ends_with(pattern)
+    }
+}
+
+/// Hash content based on reference type
+pub fn hash_content_reference(reference: &ContentReference) -> Result<String> {
+    match reference {
+        ContentReference::File { path } => hash_file(path),
+        ContentReference::Directory { path, include_patterns, exclude_patterns } => {
+            hash_directory(path, include_patterns.as_deref(), exclude_patterns.as_deref())
+        }
+        ContentReference::Composite { references } => {
+            let mut hasher = Sha256::new();
+            for ref_item in references {
+                hasher.update(hash_content_reference(ref_item)?.as_bytes());
+            }
+            Ok(format!("{:x}", hasher.finalize()))
+        }
+    }
+}
+
+// ============================================================================
+// HASH STORAGE
+// ============================================================================
+
+fn determine_storage(hash: &str, base_dir: &Path) -> HashStorage {
+    if hash.len() > INLINE_HASH_THRESHOLD {
+        let hash_dir = base_dir.join(HASH_STORAGE_DIR);
+        let hash_file = hash_dir.join(format!("{}{}", &hash[..16], HASH_FILE_EXT));
+        HashStorage::External { hash_file }
+    } else {
+        HashStorage::Inline { hash: hash.to_string() }
+    }
+}
+
+async fn store_hash(hash: &str, storage: &HashStorage) -> Result<()> {
+    if let HashStorage::External { hash_file } = storage {
+        if let Some(parent) = hash_file.parent() {
+            async_fs::create_dir_all(parent).await?;
+        }
+        async_fs::write(hash_file, hash.as_bytes()).await?;
+    }
+    Ok(())
+}
+
+async fn load_hash(storage: &HashStorage) -> Result<String> {
+    match storage {
+        HashStorage::Inline { hash } => Ok(hash.clone()),
+        HashStorage::External { hash_file } => {
+            Ok(async_fs::read_to_string(hash_file).await?.trim().to_string())
+        }
+    }
+}
+
+// ============================================================================
+// VALIDATION
+// ============================================================================
+
+/// Validate a single state entry
+async fn validate_entry(entry: &StateEntry) -> Result<ValidationStatus> {
+    // Check if completed
+    if !entry.completed {
+        return Ok(ValidationStatus::Unknown);
+    }
+    
+    // Get content reference and hash
+    let (content_ref, hash_storage) = match (&entry.content_reference, &entry.content_hash) {
+        (Some(r), Some(h)) => (r, h),
+        _ => return Ok(ValidationStatus::Unknown),
+    };
+    
+    // Load stored hash
+    let stored_hash = load_hash(hash_storage).await?;
+    
+    // Compute current hash
+    let current_hash = match hash_content_reference(content_ref) {
+        Ok(h) => h,
+        Err(e) => return Ok(ValidationStatus::Invalid { 
+            reason: format!("Failed to compute hash: {}", e) 
+        }),
+    };
+    
+    // Check hash match
+    if stored_hash != current_hash {
+        return Ok(ValidationStatus::Invalid { reason: "Hash mismatch".to_string() });
+    }
+    
+    // Check TTL
+    if let Some(stage) = entry.data_stage {
+        let ttl = entry.ttl_override.unwrap_or_else(|| stage.default_ttl());
+        if let Some(completed_at) = entry.completed_at {
+            if Utc::now() - completed_at > ttl {
+                return Ok(ValidationStatus::Expired);
+            }
+        }
+    }
+    
+    Ok(ValidationStatus::Valid)
+}
+
+/// Validate all entries with cascade invalidation
+async fn validate_all_entries(entries: &mut HashMap<String, StateEntry>) -> Result<ValidationReport> {
+    let mut report = ValidationReport::default();
+    
+    // Validate each entry
+    for (name, entry) in entries.iter_mut() {
+        let status = validate_entry(entry).await?;
+        entry.validation_status = status.clone();
+        entry.last_validated_at = Some(Utc::now());
+        
+        match status {
+            ValidationStatus::Valid => report.valid_count += 1,
+            ValidationStatus::Invalid { .. } => {
+                report.invalid_count += 1;
+                report.invalid_entries.push(name.clone());
+            }
+            ValidationStatus::Expired => {
+                report.expired_count += 1;
+                report.expired_entries.push(name.clone());
+            }
+            ValidationStatus::Unknown => report.unknown_count += 1,
+            ValidationStatus::DependencyFailed { .. } => {}
+        }
+    }
+    
+    // Cascade invalidation
+    let mut invalidated: HashSet<String> = report.invalid_entries.iter().cloned().collect();
+    
+    loop {
+        let mut newly_invalidated = Vec::new();
+        
+        for (name, entry) in entries.iter() {
+            if invalidated.contains(name) {
+                continue;
+            }
+            
+            // Check if any dependency is invalidated
+            if let Some(failed_dep) = entry.dependencies.iter().find(|d| invalidated.contains(*d)) {
+                newly_invalidated.push((name.clone(), failed_dep.clone()));
+            }
+        }
+        
+        if newly_invalidated.is_empty() {
+            break;
+        }
+        
+        for (name, failed_dep) in newly_invalidated {
+            invalidated.insert(name.clone());
+            report.cascaded_invalidations.push(name.clone());
+            
+            if let Some(entry) = entries.get_mut(&name) {
+                entry.validation_status = ValidationStatus::DependencyFailed { failed_dependency: failed_dep };
+            }
+        }
+    }
+    
+    Ok(report)
+}
+
+#[derive(Debug, Default)]
+pub struct ValidationReport {
+    pub valid_count: usize,
+    pub invalid_count: usize,
+    pub expired_count: usize,
+    pub unknown_count: usize,
+    pub invalid_entries: Vec<String>,
+    pub expired_entries: Vec<String>,
+    pub cascaded_invalidations: Vec<String>,
+}
+
+impl ValidationReport {
+    pub fn print_summary(&self) {
+        println!("=== Validation Report ===");
+        println!("Valid:   {}", self.valid_count);
+        println!("Invalid: {}", self.invalid_count);
+        println!("Expired: {}", self.expired_count);
+        println!("Unknown: {}", self.unknown_count);
+        
+        if !self.invalid_entries.is_empty() {
+            println!("\nInvalid entries:");
+            for entry in &self.invalid_entries {
+                println!("  - {}", entry);
+            }
+        }
+        
+        if !self.expired_entries.is_empty() {
+            println!("\nExpired entries:");
+            for entry in &self.expired_entries {
+                println!("  - {}", entry);
+            }
+        }
+        
+        if !self.cascaded_invalidations.is_empty() {
+            println!("\nCascaded invalidations:");
+            for entry in &self.cascaded_invalidations {
+                println!("  - {}", entry);
+            }
+        }
+    }
+}
+
+// ============================================================================
+// STATE MANAGEMENT
+// ============================================================================
+
+/// State manager with centralized dependency configuration
+/// 
+/// # Orchestration: Shutdown Flag + State Management
+/// 
+/// ## Happy Path (Normal Completion)
+/// 1. Work completes successfully
+/// 2. Call `update_entry()` with `completed: true`
+/// 3. StateEntry saved with timestamp and valid hash
+/// 4. On next run: skips already-completed step
+/// 
+/// ## Shutdown Path (Interrupted Work)  
+/// 1. Shutdown flag is set via Ctrl+C handler
+/// 2. Long-running code checks: `if shutdown_flag.load(Ordering::SeqCst) { break }`
+/// 3. Before returning, call `mark_invalid()` 
+/// 4. StateEntry saved with `completed: false` and ValidationStatus::Invalid
+/// 5. On next run: retries invalid step
+/// 
+/// ## Usage Pattern
+/// 
+/// ```rust
+/// let manager = StateManager::new(&paths.integrity_dir()).await?;
+/// let content_ref = directory_reference(&output_dir, None, None);
+/// 
+/// loop {
+///     if shutdown_flag.load(Ordering::SeqCst) {
+///         manager.mark_invalid(
+///             step_name.to_string(),
+///             Some(content_ref.clone()),
+///             Some(DataStage::Data),
+///             "invalid due to shutdown".to_string(),
+///         ).await?;
+///         return Ok(());
+///     }
+///     // Do work...
+/// }
+/// 
+/// // Completed successfully
+/// manager.update_entry(step_name.to_string(), content_ref, DataStage::Data, None).await?;
+/// ```
+pub struct StateManager {
+    base_dir: PathBuf,
+    dependency_config: DependencyConfig,
+}
+
+impl StateManager {
+    /// Create new state manager and load dependency configuration
+    pub async fn new<P: AsRef<Path>>(base_dir: P) -> Result<Self> {
+        let base_dir = base_dir.as_ref().to_path_buf();
+        let config_path = base_dir.join(DEFAULT_DEPENDENCY_CONFIG);
+        let dependency_config = DependencyConfig::from_file(config_path).await?;
+        
+        Ok(Self { base_dir, dependency_config })
+    }
+    
+    /// Create with explicit dependency configuration
+    pub fn with_config<P: AsRef<Path>>(base_dir: P, dependency_config: DependencyConfig) -> Result<Self> {
+        dependency_config.validate()?;
+        Ok(Self {
+            base_dir: base_dir.as_ref().to_path_buf(),
+            dependency_config,
+        })
+    }
+    
+    /// Get the dependency configuration
+    pub fn get_dependency_config(&self) -> &DependencyConfig {
+        &self.dependency_config
+    }
+    
+    /// Load all state entries from state.jsonl
+    pub async fn load_entries(&self) -> Result<HashMap<String, StateEntry>> {
+        let state_file = self.base_dir.join("state.jsonl");
+        if !state_file.exists() {
+            return Ok(HashMap::new());
+        }
+        
+        let content = async_fs::read_to_string(&state_file).await?;
+        let mut entries = HashMap::new();
+        
+        for line in content.lines() {
+            if line.trim().is_empty() {
+                continue;
+            }
+            if let Ok(entry) = serde_json::from_str::<StateEntry>(line) {
+                entries.insert(entry.step_name.clone(), entry);
+            }
+        }
+        
+        Ok(entries)
+    }
+    
+    /// Save all state entries to state.jsonl
+    pub async fn save_entries(&self, entries: &HashMap<String, StateEntry>) -> Result<()> {
+        if let Some(parent) = self.base_dir.parent() {
+            async_fs::create_dir_all(parent).await?;
+        }
+
+        let mut file = async_fs::File::create(self.base_dir.join("state.jsonl")).await?;
+        
+        for entry in entries.values() {
+            file.write_all((serde_json::to_string(&entry)? + "\n").as_bytes()).await?;
+        }
+        
+        file.sync_all().await?;
+        Ok(())
+    }
+    
+    /// Create an empty entry for a step (can be updated later)
+    /// 
+    /// Creates a placeholder entry that marks the step as incomplete and unknown,
+    /// allowing you to later mark it as valid or invalid via `mark_valid()` or `mark_invalid()`.
+    /// 
+    /// # Example
+    /// ```rust
+    /// let manager = StateManager::new(&paths.integrity_dir()).await?;
+    /// 
+    /// // Start tracking a long step
+    /// let mut entry = manager.create_entry("long_operation".to_string()).await?;
+    /// 
+    /// // Do work...
+    /// 
+    /// // Mark as valid when done
+    /// entry.content_reference = Some(content_ref);
+    /// entry.data_stage = Some(DataStage::Data);
+    /// manager.mark_valid(entry).await?;
+    /// ```
+    pub async fn create_entry(&self, step_name: String, content_reference: ContentReference, data_stage: DataStage) -> Result<StateEntry> {
+        // Resolve dependencies from configuration
+        let dependencies = self.dependency_config
+            .get_all_dependencies(&step_name)
+            .unwrap_or_default();
+        
+        // Create empty entry with Unknown status
+        let entry = StateEntry {
+            step_name: step_name.clone(),
+            completed: false,
+            completed_at: None,
+            content_reference: Some(content_reference),
+            content_hash: None,
+            data_stage: Some(data_stage),
+            ttl_override: None,
+            last_validated_at: Some(Utc::now()),
+            validation_status: ValidationStatus::Unknown,
+            dependencies,
+        };
+        
+        // Update and save
+        let mut entries = self.load_entries().await?;
+        entries.insert(step_name, entry.clone());
+        self.save_entries(&entries).await?;
+        
+        Ok(entry)
+    }
+    
+    /// Mark a StateEntry as valid and save to disk
+    /// 
+    /// Updates the entry with:
+    /// - `completed: true`
+    /// - `completed_at: now`
+    /// - `validation_status: Valid`
+    /// - Computes and stores content hash
+    /// 
+    /// # Requires
+    /// - `entry.content_reference` must be `Some()`
+    /// - `entry.data_stage` must be `Some()`
+    pub async fn mark_valid(&self, mut entry: StateEntry) -> Result<StateEntry> {
+        // Get content reference and data stage (required)
+        let content_reference = entry.content_reference.as_ref()
+            .ok_or_else(|| anyhow::anyhow!("content_reference is required to mark entry valid"))?;
+        let data_stage = entry.data_stage
+            .ok_or_else(|| anyhow::anyhow!("data_stage is required to mark entry valid"))?;
+        
+        // Compute and store hash
+        let hash = hash_content_reference(content_reference)?;
+        let storage = determine_storage(&hash, &self.base_dir);
+        store_hash(&hash, &storage).await?;
+        
+        // Update entry
+        entry.completed = true;
+        entry.completed_at = Some(Utc::now());
+        entry.content_hash = Some(storage);
+        entry.data_stage = Some(data_stage);
+        entry.last_validated_at = Some(Utc::now());
+        entry.validation_status = ValidationStatus::Valid;
+        
+        // Save
+        let mut entries = self.load_entries().await?;
+        entries.insert(entry.step_name.clone(), entry.clone());
+        self.save_entries(&entries).await?;
+        
+        Ok(entry)
+    }
+    
+    /// Mark a StateEntry as invalid and save to disk
+    /// 
+    /// Updates the entry with:
+    /// - `completed: false`
+    /// - `completed_at: None`
+    /// - `validation_status: Invalid { reason }`
+    pub async fn mark_invalid(&self, mut entry: StateEntry, reason: String) -> Result<StateEntry> {
+        // Update entry
+        entry.completed = false;
+        entry.completed_at = None;
+        entry.last_validated_at = Some(Utc::now());
+        entry.validation_status = ValidationStatus::Invalid { reason };
+        
+        // Save
+        let mut entries = self.load_entries().await?;
+        entries.insert(entry.step_name.clone(), entry.clone());
+        self.save_entries(&entries).await?;
+        
+        Ok(entry)
+    }
+    
+    /// Check if a step is valid and completed
+    pub async fn is_step_valid(&self, step_name: &str) -> Result<bool> {
+        let entries = self.load_entries().await?;
+        
+        if let Some(entry) = entries.get(step_name) {
+            let status = validate_entry(entry).await?;
+            Ok(matches!(status, ValidationStatus::Valid))
+        } else {
+            Ok(false)
+        }
+    }
+    
+    /// Run full validation on all entries
+    pub async fn validate_all(&self) -> Result<ValidationReport> {
+        let mut entries = self.load_entries().await?;
+        let report = validate_all_entries(&mut entries).await?;
+        self.save_entries(&entries).await?;
+        Ok(report)
+    }
+    
+    /// Print dependency graph information
+    pub fn print_dependency_graph(&self) {
+        println!("=== Dependency Configuration ===");
+        println!("\nCheckpoints: {}", self.dependency_config.checkpoints.len());
+        println!("Groups: {}", self.dependency_config.groups.len());
+        
+        println!("\n--- Checkpoints ---");
+        for (name, config) in &self.dependency_config.checkpoints {
+            println!("{}", name);
+            if !config.description.is_empty() {
+                println!("  Description: {}", config.description);
+            }
+            if let Some(group) = &config.group {
+                println!("  Group: {}", group);
+            }
+            if !config.depends_on.is_empty() {
+                println!("  Depends on: {}", config.depends_on.join(", "));
+            }
+            
+            // Show resolved dependencies
+            if let Ok(all_deps) = self.dependency_config.get_all_dependencies(name) {
+                if !all_deps.is_empty() {
+                    println!("  Resolved (including transitive): {}", all_deps.join(", "));
+                }
+            }
+            println!();
+        }
+        
+        println!("\n--- Groups ---");
+        for (name, group) in &self.dependency_config.groups {
+            println!("{}", name);
+            if !group.description.is_empty() {
+                println!("  Description: {}", group.description);
+            }
+            println!("  Members: {}", group.members.join(", "));
+            if !group.depends_on.is_empty() {
+                println!("  Group dependencies: {}", group.depends_on.join(", "));
+            }
+            println!();
+        }
+    }
+}
+
+// ============================================================================
+// HELPER FUNCTIONS
+// ============================================================================
+
+/// Create a simple file reference
+pub fn file_reference<P: AsRef<Path>>(path: P) -> ContentReference {
+    ContentReference::File { path: path.as_ref().to_path_buf() }
+}
+
+/// Create a directory reference
+pub fn directory_reference<P: AsRef<Path>>(
+    path: P,
+    include_patterns: Option<Vec<String>>,
+    exclude_patterns: Option<Vec<String>>,
+) -> ContentReference {
+    ContentReference::Directory {
+        path: path.as_ref().to_path_buf(),
+        include_patterns,
+        exclude_patterns,
+    }
+}
+
+/// Create a composite reference
+pub fn composite_reference(references: Vec<ContentReference>) -> ContentReference {
+    ContentReference::Composite { references }
+}
--- a/src/util/macros.rs
+++ b/src/util/macros.rs
@@ -0,0 +1,28 @@
+// src/macros.rs
+#[macro_export]
+macro_rules! check_shutdown {
+    ($shutdown_flag:expr) => {
+        if $shutdown_flag.load(std::sync::atomic::Ordering::SeqCst) {
+            logger::log_warn("Shutdown detected, stopping processes").await;
+            return Ok(());
+        }
+    };
+}
+
+/// Mark incomplete state on shutdown
+/// Usage: mark_incomplete_on_shutdown!(&manager, "step_name", content_ref, DataStage::Data, &shutdown_flag)?;
+#[macro_export]
+macro_rules! mark_incomplete_on_shutdown {
+    ($manager:expr, $step_name:expr, $content_ref:expr, $data_stage:expr, $shutdown_flag:expr) => {
+        if $shutdown_flag.load(std::sync::atomic::Ordering::SeqCst) {
+            $manager
+                .mark_incomplete(
+                    $step_name.to_string(),
+                    $content_ref,
+                    $data_stage,
+                    "Incomplete due to shutdown".to_string(),
+                )
+                .await?;
+        }
+    };
+}
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1,4 +1,6 @@
 // src/util/mod.rs
 pub mod logger;
 pub mod directories;
-pub mod opnv;
+pub mod opnv;
+pub mod macros;
+pub mod integrity;
Author	SHA1	Message	Date
donpat1to	eff1412c0f	removed claudes md	2026-01-15 00:23:29 +01:00
donpat1to	75ab1969c7	added cross compatiblity between shutdown flag and state entries	2026-01-15 00:22:55 +01:00
donpat1to	f4b20f824d	removed crossplatformcompany from types	2026-01-14 14:49:00 +01:00
donpat1to	93fbefc9d4	removed id creation on scrape	2026-01-14 14:28:16 +01:00
donpat1to	4ea0c78d3d	added ids for companies	2026-01-12 23:03:01 +01:00
donpat1to	1d025a04ce	updated securities directory	2026-01-12 22:23:34 +01:00
donpat1to	98e1bca12f	moved helper functions into helpers.rs	2026-01-12 22:06:13 +01:00
donpat1to	29d8f1d89e	moved structs to types.rs	2026-01-12 18:50:44 +01:00
donpat1to	c0c9bc0ed9	added bond extraction from figi	2026-01-12 15:58:06 +01:00
donpat1to	659757482d	öi	2026-01-12 01:01:19 +01:00
donpat1to	bd74f36f4c	added integrity dir for set data collection; one state.jsonl	2026-01-11 16:57:36 +01:00
donpat1to	e6f8393660	merged enriching functions into one module	2026-01-11 14:24:18 +01:00
donpat1to	aff340ee2f	migrated checkpoint handling in integrity.rs to ssot principle	2026-01-11 13:05:31 +01:00
donpat1to	0487c2ec49	changed file names for openfigi	2026-01-11 12:21:10 +01:00
donpat1to	04f4b0d0c4	added integrity check to openfigi functions	2026-01-11 00:06:25 +01:00
donpat1to	6f05dc8c99	added integrity check to forex and exchange collection functiosn	2026-01-10 19:46:21 +01:00
donpat1to	ac1345798d	added integrity check to cleanse functions	2026-01-10 18:42:39 +01:00
donpat1to	766eb803f1	added integrity check to enrichment functions	2026-01-10 17:40:16 +01:00
donpat1to	151c96e35f	working code :)	2026-01-10 15:11:06 +01:00
donpat1to	ae1876b014	cleaned up main	2026-01-10 00:30:59 +01:00
donpat1to	c86d828940	cleaned up main	2026-01-10 00:30:42 +01:00
donpat1to	c6d301d434	added helper functions to reduce bloat	2026-01-09 21:24:18 +01:00
donpat1to	ba841248f0	cleaned up update.rs eco and corp	2026-01-09 19:52:26 +01:00
donpat1to	8dd75f7bdf	added yahoo exchange extraction	2026-01-09 19:09:42 +01:00
donpat1to	ea128f6187	added options chart enrichment	2026-01-08 11:35:25 +01:00
donpat1to	1720716144	added event enrichment	2026-01-08 00:35:10 +01:00
donpat1to	f9ce5bad99	fixed yahoo api calls for cleansing low profile data	2026-01-06 00:15:57 +01:00
donpat1to	fc25f32cbc	fixed yahoo api calls for cleansing low profile data	2026-01-06 00:15:46 +01:00
donpat1to	3d16475b79	readded yahoo	2026-01-05 17:00:42 +01:00
donpat1to	86944a9c58	cleaned yahoo hits	2025-12-24 00:00:21 +01:00
donpat1to	f9f09d0291	added working hard reset	2025-12-23 15:07:40 +01:00
donpat1to	fb0876309f	added hard reset for navigation timeout after 3 hours	2025-12-22 00:31:28 +01:00
donpat1to	c01b47000f	removed serial data scraping for yahoo tickers	2025-12-19 16:58:22 +01:00
donpat1to	5e81959322	updated yahoo company extraction js to get the most data rich row	2025-12-19 14:43:36 +01:00
donpat1to	b366f366e6	added atomic writer action for ctr c abort	2025-12-19 14:12:56 +01:00
donpat1to	cd91de253b	added pool rotation to chromedriver pool	2025-12-18 15:59:56 +01:00
donpat1to	c51b36c125	added session detection with requests per task	2025-12-18 14:01:51 +01:00
donpat1to	9c66f0d361	added parallelized scraping instances for company yahoo ticker seeding	2025-12-18 13:05:23 +01:00
donpat1to	d26e833d93	added update_rule for incremental change	2025-12-15 23:47:28 +01:00
donpat1to	d744769138	added companie mapping with yahoo tickers	2025-12-14 16:48:02 +01:00
donpat1to	00c9d45642	added data streaming instead of laoding	2025-12-12 10:54:01 +01:00
donpat1to	1bda78897b	implement vpn pool	2025-12-11 23:18:04 +01:00
donpat1to	470f0922ed	capabable spawning multiple openvpn instances	2025-12-11 00:36:46 +01:00