adding corporate data to webscraper

This commit is contained in:
2025-11-21 00:17:59 +01:00
parent 0ea3fcc3b5
commit 9d0d15f3f8
18 changed files with 2128 additions and 970 deletions

View File

@@ -1,930 +1,51 @@
use chrono::{Datelike, Local, NaiveDate};
// src/main.rs
mod economic;
mod corporate;
mod config;
mod util;
use fantoccini::{ClientBuilder, Locator};
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use std::{
collections::{HashMap, HashSet},
path::PathBuf,
process::Command,
};
use tokio::{
fs, signal,
time::{Duration, sleep},
};
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)]
struct EconomicEvent {
country: String,
date: String,
time: String,
event: String,
actual: String,
forecast: String,
previous: String,
importance: String,
description: String,
}
#[derive(Debug, Serialize, Deserialize, Clone)]
struct EventChange {
date: String,
event: String,
country: String,
change_type: String, // date | time | forecast | previous | actual | description | newly_added
field_changed: String,
old_value: String,
new_value: String,
detected_at: String,
}
#[derive(Debug)]
struct ChunkInfo {
start_date: String,
end_date: String,
path: PathBuf,
event_count: usize,
}
#[derive(Debug)]
struct ScrapeResult {
changes: Vec<EventChange>,
removed_keys: HashSet<String>, // Keys of events that were removed (rescheduled)
}
fn start_chromedriver(port: u16) -> std::process::Child {
Command::new("chromedriver-win64/chromedriver.exe")
.args(&[format!("--port={}", port)])
.spawn()
.expect("Failed to start ChromeDriver")
}
async fn dismiss_overlays(client: &fantoccini::Client) -> anyhow::Result<()> {
for _ in 0..10 {
let removed: bool = client
.execute(
r#"(() => {
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
if (iframe && iframe.parentNode) {
iframe.parentNode.removeChild(iframe);
return true;
}
return false;
})()"#,
vec![],
)
.await?
.as_bool()
.unwrap_or(false);
if removed {
break;
}
sleep(Duration::from_millis(500)).await;
}
Ok(())
}
async fn extract_all_data_via_js(
client: &fantoccini::Client,
) -> anyhow::Result<Vec<EconomicEvent>> {
let extraction_script = r#"
const events = [];
let currentDate = '';
const rows = document.querySelectorAll('#TeletraderForm table tbody tr');
for (let i = 0; i < rows.length; i++) {
const row = rows[i];
const cells = row.querySelectorAll('td');
if (cells.length === 1 && cells[0].colSpan === 9) {
const dateText = cells[0].textContent.trim();
const monthMap = {
'Januar': '01', 'Februar': '02', 'März': '03', 'April': '04',
'Mai': '05', 'Juni': '06', 'Juli': '07', 'August': '08',
'September': '09', 'Oktober': '10', 'November': '11', 'Dezember': '12'
};
const dateParts = dateText.match(/(\d{1,2})\.\s+([a-zA-ZäöüßÄÖÜ]+)\s+(\d{4})/);
if (dateParts) {
const day = dateParts[1].padStart(2, '0');
const germanMonth = dateParts[2];
const year = dateParts[3];
const month = monthMap[germanMonth] || '01';
currentDate = `${year}-${month}-${day}`;
} else {
currentDate = '';
}
continue;
}
if (cells.length >= 8) {
const time = cells[0]?.textContent?.trim() || '';
const country = cells[2]?.textContent?.trim() || '';
const eventName = cells[4]?.textContent?.trim() || '';
if (!time || !country || !eventName) continue;
const importanceCell = cells[3];
const yellowStarCount = importanceCell?.querySelectorAll('.icon--star.font-color-yellow').length || 0;
if (yellowStarCount === 3) {
let description = '';
if (i + 1 < rows.length) {
const nextRow = rows[i + 1];
const nextCells = nextRow.querySelectorAll('td');
if (nextCells.length === 1 || nextCells[0].colSpan === 8) {
const descPara = nextRow.querySelector('p');
if (descPara) {
description = descPara.textContent?.trim() || '';
}
}
}
events.push({
country: country,
date: currentDate,
time: time,
event: eventName,
actual: cells[7]?.textContent?.trim() || '',
forecast: cells[6]?.textContent?.trim() || '',
previous: cells[5]?.textContent?.trim() || '',
importance: 'High',
description: description
});
}
}
}
return events;
"#;
let result = client.execute(extraction_script, vec![]).await?;
if let Some(events_array) = result.as_array() {
let mut events = Vec::new();
for event_value in events_array {
if let Some(event_obj) = event_value.as_object() {
let event = EconomicEvent {
country: event_obj
.get("country")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
date: event_obj
.get("date")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
time: event_obj
.get("time")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
event: event_obj
.get("event")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
actual: event_obj
.get("actual")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
forecast: event_obj
.get("forecast")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
previous: event_obj
.get("previous")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
importance: event_obj
.get("importance")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
description: event_obj
.get("description")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string(),
};
events.push(event);
}
}
println!("Extracted {} events (3 YELLOW stars ONLY)", events.len());
return Ok(events);
}
Ok(vec![])
}
async fn set_date_range(client: &fantoccini::Client, start: &str, end: &str) -> anyhow::Result<()> {
let set_dates_script = format!(
r#"
(() => {{
const fromInput = document.querySelector('#dtTeletraderFromDate');
const toInput = document.querySelector('#dtTeletraderEndDate');
if (fromInput) {{
fromInput.value = '{}';
fromInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
fromInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
}}
if (toInput) {{
toInput.value = '{}';
toInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
toInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
}}
return !!fromInput && !!toInput;
}})()
"#,
start, end
);
client.execute(&set_dates_script, vec![]).await?;
sleep(Duration::from_millis(1000)).await;
Ok(())
}
fn parse_date(date: &str) -> Option<NaiveDate> {
NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()
}
fn calculate_next_start_date(events: &[EconomicEvent]) -> anyhow::Result<String> {
let mut dates: Vec<_> = events.iter().filter_map(|e| parse_date(&e.date)).collect();
if dates.is_empty() {
return Err(anyhow::anyhow!("No parseable dates found"));
}
dates.sort();
let next = dates.last().unwrap().succ_opt().unwrap();
Ok(next.format("%Y-%m-%d").to_string())
}
/// Storage key: date|time|event (for exact occurrence deduplication)
fn event_lookup_key(event: &EconomicEvent) -> String {
format!("{}|{}|{}", event.date, event.time, event.event)
}
/// Identity key: country|event|date (to distinguish recurring monthly/quarterly events)
/// This prevents treating December and January releases of the same recurring event as reschedules
fn event_identity_key(event: &EconomicEvent) -> String {
format!("{}|{}|{}", event.country, event.event, event.date)
}
/// Compare two events and detect changes in future data
fn detect_changes(old: &EconomicEvent, new: &EconomicEvent, now: &str) -> Vec<EventChange> {
let mut changes = Vec::new();
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
println!(
"🔍 Checking event: {} on {} (now: {})",
new.event, new.date, now
);
// Only track changes for future events
if new.date.as_str() <= now {
println!(" ⏭️ Skipped: Event is in the past/today");
return changes;
}
println!(" 📅 Event is in the future - checking for changes...");
let fields = [
("actual", "actual", &old.actual, &new.actual),
("forecast", "forecast", &old.forecast, &new.forecast),
("previous", "previous", &old.previous, &new.previous),
("description", "description", &old.description, &new.description),
];
for (field_name, change_type, old_val, new_val) in fields {
if old_val != new_val {
println!(
" 📝 CHANGE DETECTED in '{}': '{}' -> '{}'",
field_name, old_val, new_val
);
changes.push(EventChange {
date: new.date.clone(),
event: new.event.clone(),
country: new.country.clone(),
change_type: change_type.to_string(),
field_changed: field_name.to_string(),
old_value: old_val.to_string(),
new_value: new_val.to_string(),
detected_at: timestamp.clone(),
});
}
}
if changes.is_empty() {
println!(" ✅ No changes detected");
} else {
println!(" 🎯 Total changes: {}", changes.len());
}
changes
}
/// Build identity lookup map: finds most recent occurrence of each event by identity
/// Identity now includes date to distinguish recurring events (e.g., monthly GDP releases)
fn build_identity_lookup(
events: &HashMap<String, EconomicEvent>,
) -> HashMap<String, (String, EconomicEvent)> {
let mut identity_map: HashMap<String, (String, EconomicEvent)> = HashMap::new();
for (lookup_key, event) in events {
let identity = event_identity_key(event);
identity_map.insert(identity, (lookup_key.clone(), event.clone()));
}
identity_map
}
/// Build a separate lookup for detecting time-only changes (same date, different time)
fn build_date_event_lookup(
events: &HashMap<String, EconomicEvent>,
) -> HashMap<String, Vec<(String, EconomicEvent)>> {
let mut date_event_map: HashMap<String, Vec<(String, EconomicEvent)>> = HashMap::new();
for (lookup_key, event) in events {
let key = format!("{}|{}|{}", event.country, event.event, event.date);
date_event_map.entry(key).or_default().push((lookup_key.clone(), event.clone()));
}
date_event_map
}
/// Scan the economic_events directory for existing chunks
async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
let events_dir = PathBuf::from("economic_events");
if !events_dir.exists() {
fs::create_dir_all(&events_dir).await?;
println!("📁 Created economic_events directory");
return Ok(vec![]);
}
let mut chunks = Vec::new();
let mut entries = fs::read_dir(&events_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("json") {
if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) {
if let Some(dates) = filename.strip_prefix("chunk_") {
let parts: Vec<&str> = dates.split('_').collect();
if parts.len() == 2 {
if let Ok(content) = fs::read_to_string(&path).await {
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content)
{
chunks.push(ChunkInfo {
start_date: parts[0].to_string(),
end_date: parts[1].to_string(),
path: path.clone(),
event_count: events.len(),
});
}
}
}
}
}
}
}
chunks.sort_by(|a, b| a.start_date.cmp(&b.start_date));
if !chunks.is_empty() {
println!("\n📊 Found {} existing chunks:", chunks.len());
for chunk in &chunks {
println!(
"{} to {} ({} events)",
chunk.start_date, chunk.end_date, chunk.event_count
);
}
} else {
println!("🔭 No existing chunks found");
}
Ok(chunks)
}
/// Calculate target end date: first day of month, 3 months from now
fn calculate_target_end_date() -> String {
let now = Local::now().naive_local().date();
let three_months_ahead = if now.month() + 3 > 12 {
NaiveDate::from_ymd_opt(now.year() + 1, (now.month() + 3) % 12, 1)
} else {
NaiveDate::from_ymd_opt(now.year(), now.month() + 3, 1)
}
.unwrap();
three_months_ahead.format("%Y-%m-%d").to_string()
}
/// Load all events from existing chunks into a HashMap
async fn load_existing_events(
chunks: &[ChunkInfo],
) -> anyhow::Result<HashMap<String, EconomicEvent>> {
let mut event_map = HashMap::new();
for chunk in chunks {
if let Ok(content) = fs::read_to_string(&chunk.path).await {
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
for event in events {
event_map.insert(event_lookup_key(&event), event);
}
}
}
}
println!("📥 Loaded {} events from existing chunks", event_map.len());
Ok(event_map)
}
/// Save or append changes to monthly change files
async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
if changes.is_empty() {
println!(" No changes to save");
return Ok(());
}
println!("\n💾 Saving {} changes...", changes.len());
let changes_dir = PathBuf::from("economic_event_changes");
fs::create_dir_all(&changes_dir).await?;
// Group changes by month
let mut changes_by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
for change in changes {
if let Some(date) = parse_date(&change.date) {
let month_key = format!("{:02}_{}", date.month(), date.year());
changes_by_month
.entry(month_key)
.or_default()
.push(change.clone());
}
}
println!("📂 Grouped into {} month(s)", changes_by_month.len());
// Save each month's changes
for (month_key, month_changes) in changes_by_month {
let filename = format!("event_changes_{}.json", month_key);
let filepath = changes_dir.join(&filename);
// Load existing changes if file exists
let existing_count = if filepath.exists() {
let content = fs::read_to_string(&filepath).await?;
serde_json::from_str::<Vec<EventChange>>(&content)
.unwrap_or_default()
.len()
} else {
0
};
let mut all_changes = if filepath.exists() {
let content = fs::read_to_string(&filepath).await?;
serde_json::from_str::<Vec<EventChange>>(&content).unwrap_or_default()
} else {
Vec::new()
};
// Append new changes
all_changes.extend(month_changes.clone());
// Save combined changes
let json = serde_json::to_string_pretty(&all_changes)?;
fs::write(&filepath, json).await?;
println!(
"{}: {} existing + {} new = {} total changes",
filename,
existing_count,
month_changes.len(),
all_changes.len()
);
}
Ok(())
}
/// Reorganize events into optimal chunks and save them
async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
if events.is_empty() {
return Ok(());
}
let events_dir = PathBuf::from("economic_events");
fs::create_dir_all(&events_dir).await?;
// Convert to sorted vector
let mut all_events: Vec<EconomicEvent> = events.into_values().collect();
all_events.sort_by(|a, b| a.date.cmp(&b.date));
// Group events by date ranges (chunks of ~100 days or similar)
let mut chunks: Vec<Vec<EconomicEvent>> = Vec::new();
let mut current_chunk = Vec::new();
let mut current_start_date: Option<NaiveDate> = None;
for event in all_events {
if let Some(event_date) = parse_date(&event.date) {
if let Some(start) = current_start_date {
// Start new chunk if we've gone 100+ days or have 500+ events
if (event_date - start).num_days() > 100 || current_chunk.len() >= 500 {
chunks.push(current_chunk);
current_chunk = Vec::new();
current_start_date = Some(event_date);
}
} else {
current_start_date = Some(event_date);
}
current_chunk.push(event);
}
}
if !current_chunk.is_empty() {
chunks.push(current_chunk);
}
// Delete old chunk files
let mut entries = fs::read_dir(&events_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) {
if filename.starts_with("chunk_") {
fs::remove_file(&path).await?;
}
}
}
// Save new optimized chunks
for chunk in chunks {
if chunk.is_empty() {
continue;
}
let start = chunk
.iter()
.filter_map(|e| parse_date(&e.date))
.min()
.unwrap()
.format("%Y-%m-%d")
.to_string();
let end = chunk
.iter()
.filter_map(|e| parse_date(&e.date))
.max()
.unwrap()
.format("%Y-%m-%d")
.to_string();
let filename = format!("chunk_{}_{}.json", start, end);
let filepath = events_dir.join(&filename);
let json = serde_json::to_string_pretty(&chunk)?;
fs::write(&filepath, json).await?;
println!(
"💾 Saved optimized chunk: {} ({} events)",
filename,
chunk.len()
);
}
Ok(())
}
/// Scrape and update data with change tracking
async fn scrape_and_update(
client: &fantoccini::Client,
start: &str,
end: &str,
existing_events: &mut HashMap<String, EconomicEvent>,
) -> anyhow::Result<ScrapeResult> {
println!("\n🎯 Scraping range: {} to {}", start, end);
let mut current_start = start.to_string();
let mut all_changes = Vec::new();
let mut all_removed_keys = HashSet::new();
let now = Local::now()
.naive_local()
.date()
.format("%Y-%m-%d")
.to_string();
println!("📅 Current date for comparison: {}", now);
println!("🔍 Starting change detection...\n");
loop {
set_date_range(client, &current_start, end).await?;
sleep(Duration::from_secs(3)).await;
let events = extract_all_data_via_js(client).await?;
if events.is_empty() {
println!(" ✅ No more events in this range");
break;
}
println!(" 📦 Fetched {} events", events.len());
// Build lookups for existing events
let identity_lookup = build_identity_lookup(existing_events);
let date_event_lookup = build_date_event_lookup(existing_events);
let mut events_to_remove: Vec<String> = Vec::new();
// Process events: detect changes and update map
let mut new_events_count = 0;
let mut updated_events_count = 0;
let mut time_changed_events_count = 0;
for new_event in events.clone() {
let lookup_key = event_lookup_key(&new_event);
let identity_key = event_identity_key(&new_event);
// CASE A: Exact match (same date/time/event)
if let Some(old_event) = existing_events.get(&lookup_key) {
println!("\n 🔎 Comparing existing event:");
println!(" Event: {}", new_event.event);
println!(" Date: {} | Time: {}", new_event.date, new_event.time);
let changes = detect_changes(old_event, &new_event, &now);
if !changes.is_empty() {
println!("{} change(s) detected and recorded!", changes.len());
all_changes.extend(changes);
updated_events_count += 1;
}
// CRITICAL: Always update the event in the map with latest data
existing_events.insert(lookup_key, new_event);
continue;
}
// CASE B: Check if time changed for same date/event
let date_event_key = format!("{}|{}|{}", new_event.country, new_event.event, new_event.date);
if let Some(existing_occurrences) = date_event_lookup.get(&date_event_key) {
// Find if there's an existing event with different time
if let Some((old_lookup_key, old_event)) = existing_occurrences.iter()
.find(|(key, _)| key != &lookup_key) {
println!("\n 🕐 TIME CHANGE DETECTED:");
println!(" Event: {}", new_event.event);
println!(" Date: {}", new_event.date);
println!(" Old time: {} | New time: {}", old_event.time, new_event.time);
// Track time change
if new_event.date.as_str() > now.as_str() {
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
all_changes.push(EventChange {
date: new_event.date.clone(),
event: new_event.event.clone(),
country: new_event.country.clone(),
change_type: "time".to_string(),
field_changed: "time".to_string(),
old_value: old_event.time.clone(),
new_value: new_event.time.clone(),
detected_at: timestamp,
});
println!(" 📝 Time change recorded");
}
// Check for other field changes too
let field_changes = detect_changes(old_event, &new_event, &now);
if !field_changes.is_empty() {
println!(
"{} additional field change(s) detected!",
field_changes.len()
);
all_changes.extend(field_changes);
}
// Remove old occurrence and add new one
events_to_remove.push(old_lookup_key.clone());
all_removed_keys.insert(old_lookup_key.clone());
existing_events.insert(lookup_key, new_event);
time_changed_events_count += 1;
continue;
}
}
// CASE C: New event
new_events_count += 1;
println!(
" New event: {} on {} @ {}",
new_event.event, new_event.date, new_event.time
);
// Track as newly added if it's a future event
if new_event.date.as_str() > now.as_str() {
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
all_changes.push(EventChange {
date: new_event.date.clone(),
event: new_event.event.clone(),
country: new_event.country.clone(),
change_type: "newly_added".to_string(),
field_changed: "new_event".to_string(),
old_value: "".to_string(),
new_value: format!("{} @ {}", new_event.date, new_event.time),
detected_at: timestamp,
});
}
existing_events.insert(lookup_key, new_event);
}
// Remove old occurrences of time-changed events
for key in events_to_remove {
existing_events.remove(&key);
}
println!("\n 📊 Batch summary:");
println!(" New events: {}", new_events_count);
println!(" Updated events: {}", updated_events_count);
println!(" Time changed events: {}", time_changed_events_count);
println!(" Changes tracked: {}", all_changes.len());
let next = match calculate_next_start_date(&events) {
Ok(n) => n,
Err(_) => {
println!(" ⚠️ Cannot calculate next date, stopping");
break;
}
};
if next > end.to_string() {
println!(" ✅ Reached end of range");
break;
}
current_start = next;
sleep(Duration::from_secs(2)).await;
}
println!("\n🎯 SCRAPE COMPLETE:");
println!(" Total changes detected: {}", all_changes.len());
println!(" Total events removed (time changes): {}", all_removed_keys.len());
Ok(ScrapeResult {
changes: all_changes,
removed_keys: all_removed_keys,
})
}
/// Main logic with intelligent update handling
async fn run_intelligent_update(client: &fantoccini::Client) -> anyhow::Result<()> {
let now = Local::now()
.naive_local()
.date()
.format("%Y-%m-%d")
.to_string();
let target_end = calculate_target_end_date();
println!("📅 Today: {}", now);
println!("🎯 Target end date: {}", target_end);
// Load existing chunks
let chunks = scan_existing_chunks().await?;
let mut existing_events = load_existing_events(&chunks).await?;
if existing_events.is_empty() {
// No existing data - full scrape from beginning
println!("\n🔭 No existing data - starting fresh scrape from 2007-02-13");
let result =
scrape_and_update(client, "2007-02-13", &target_end, &mut existing_events).await?;
save_changes(&result.changes).await?;
save_optimized_chunks(existing_events).await?;
return Ok(());
}
// Find date range of existing data
let dates: Vec<NaiveDate> = existing_events
.values()
.filter_map(|e| parse_date(&e.date))
.collect();
let min_date = dates.iter().min().unwrap().format("%Y-%m-%d").to_string();
let max_date = dates.iter().max().unwrap().format("%Y-%m-%d").to_string();
println!("📊 Existing data range: {} to {}", min_date, max_date);
// Determine update strategy
if max_date < now {
// Case 1: Data is in the past, need to update from max_date to target
let next_start = parse_date(&max_date)
.and_then(|d| d.succ_opt())
.map(|d| d.format("%Y-%m-%d").to_string())
.unwrap_or(max_date);
println!(
"\n📈 Updating from end of existing data: {} to {}",
next_start, target_end
);
let result =
scrape_and_update(client, &next_start, &target_end, &mut existing_events).await?;
save_changes(&result.changes).await?;
save_optimized_chunks(existing_events).await?;
} else if max_date >= now {
// Case 2: Data extends to or beyond today, refresh future data
println!(
"\n🔄 Data exists up to today - refreshing future data: {} to {}",
now, target_end
);
// CRITICAL FIX: Pass the actual existing_events HashMap directly
// This ensures all updates (including rescheduled events) are properly handled
let result = scrape_and_update(client, &now, &target_end, &mut existing_events).await?;
save_changes(&result.changes).await?;
// The existing_events HashMap is already updated in-place by scrape_and_update
// Just save the optimized chunks
save_optimized_chunks(existing_events).await?;
}
println!("\n✅ Update complete!");
Ok(())
}
use tokio::signal;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
let port = 9515;
let mut chromedriver = start_chromedriver(port);
sleep(Duration::from_secs(1)).await;
// === Ensure data directories exist ===
util::ensure_data_dirs().await?;
let caps_value = serde_json::json!({
"goog:chromeOptions": {
"args": [
"--disable-gpu",
"--disable-notifications",
"--disable-popup-blocking",
"--disable-blink-features=AutomationControlled",
],
"excludeSwitches": ["enable-automation"]
}
});
// === Load configuration ===
let config = config::Config::default();
let caps_map: Map<String, Value> = caps_value
.as_object()
.expect("Capabilities should be a JSON object")
.clone();
// === Start ChromeDriver ===
let mut child = std::process::Command::new("chromedriver-win64/chromedriver.exe")
.args(["--port=9515"])
.spawn()?;
let mut client = ClientBuilder::native()
.capabilities(caps_map)
.connect(&format!("http://localhost:{}", port))
let client = ClientBuilder::native()
.connect("http://localhost:9515")
.await?;
// Setup graceful shutdown
let shutdown_client = client.clone();
// Graceful shutdown
let client_clone = client.clone();
tokio::spawn(async move {
signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
println!("\nCtrl+C received, shutting down...");
shutdown_client.close().await.ok();
signal::ctrl_c().await.unwrap();
client_clone.close().await.ok();
std::process::exit(0);
});
// Navigate to page
let url = "https://www.finanzen.net/termine/wirtschaftsdaten/";
client.goto(url).await?;
// === Economic Calendar Update ===
println!("Updating Economic Calendar (High Impact Only)");
economic::goto_and_prepare(&client).await?;
economic::run_full_update(&client, &config).await?;
dismiss_overlays(&client).await?;
// Click high importance tab
if let Ok(tab) = client
.find(Locator::Css(
r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#,
))
.await
{
tab.click().await?;
println!("✓ High importance tab clicked");
sleep(Duration::from_secs(2)).await;
}
// Run intelligent update
run_intelligent_update(&client).await?;
// Display final summary
let chunks = scan_existing_chunks().await?;
let final_events = load_existing_events(&chunks).await?;
println!("\n📊 FINAL SUMMARY:");
println!(" • Total chunks: {}", chunks.len());
println!(" • Total events: {}", final_events.len());
// === Corporate Earnings Update ===
println!("\nUpdating Corporate Earnings");
let tickers = config::get_tickers();
corporate::run_full_update(tickers, &config).await?;
// === Cleanup ===
client.close().await?;
chromedriver.kill()?;
child.kill()?;
println!("\nAll data updated successfully!");
Ok(())
}