adding corporate data to webscraper
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -24,4 +24,7 @@ target/
|
||||
|
||||
/chromedriver-win64/*
|
||||
/economic_events*
|
||||
/economic_event_changes*
|
||||
/economic_event_changes*
|
||||
/corporate_events*
|
||||
/corporate_prices*
|
||||
/corporate_event_changes*
|
||||
1301
Cargo.lock
generated
1301
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
44
Cargo.toml
44
Cargo.toml
@@ -1,14 +1,40 @@
|
||||
[package]
|
||||
name = "WebScraper"
|
||||
name = "event_backtest_engine"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
edition = "2021"
|
||||
authors = ["Your Name <you@example.com>"]
|
||||
description = "High-impact economic & corporate earnings data collector for short-event backtesting (overnight/weekend gaps)"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/yourname/event_backtest_engine"
|
||||
keywords = ["finance", "earnings", "economic-calendar", "backtesting", "quant"]
|
||||
categories = ["finance", "data-structures", "asynchronous"]
|
||||
|
||||
# ===================================================================
|
||||
# Dependencies
|
||||
# ===================================================================
|
||||
[dependencies]
|
||||
fantoccini = { version = "0.21.5", default-features = false, features = ["native-tls"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
anyhow = "1.0"
|
||||
futures = "0.3"
|
||||
# Async runtime
|
||||
tokio = { version = "1.38", features = ["full"] }
|
||||
|
||||
# Web scraping & HTTP
|
||||
reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate"] }
|
||||
scraper = "0.19" # HTML parsing for Yahoo earnings pages
|
||||
fantoccini = { version = "0.20", features = ["rustls-tls"] } # Headless Chrome for finanzen.net
|
||||
|
||||
# Serialization
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
chrono = "0.4.42"
|
||||
regex = "1.0"
|
||||
|
||||
# Date & time
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
|
||||
# Error handling
|
||||
anyhow = "1.0"
|
||||
|
||||
# Logging (optional but recommended)
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
|
||||
|
||||
# Parallel processing (for batch tickers)
|
||||
futures = "0.3"
|
||||
rayon = "1.10" # optional: for parallel price downloads
|
||||
38
src/config.rs
Normal file
38
src/config.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
// src/config.rs
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Config {
|
||||
// Economic calendar start (usually the earliest available on finanzen.net)
|
||||
pub economic_start_date: String, // e.g. "2007-02-13"
|
||||
|
||||
// Corporate earnings & price history start
|
||||
pub corporate_start_date: String, // e.g. "2000-01-01" or "2010-01-01"
|
||||
|
||||
// How far into the future we scrape economic events
|
||||
pub economic_lookahead_months: u32, // default: 3
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
economic_start_date: "2007-02-13".to_string(),
|
||||
corporate_start_date: "2010-01-01".to_string(),
|
||||
economic_lookahead_months: 3,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
pub fn target_end_date(&self) -> String {
|
||||
let now = chrono::Local::now().naive_local().date();
|
||||
let future = now + chrono::Duration::days(30 * self.economic_lookahead_months as i64);
|
||||
future.format("%Y-%m-%d").to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_tickers() -> Vec<String> {
|
||||
vec![
|
||||
"AAPL", "MSFT", "NVDA", "GOOGL", "AMZN",
|
||||
"TSLA", "META", "JPM", "V", "WMT",
|
||||
// ... your 100–500 tickers here
|
||||
].into_iter().map(String::from).collect()
|
||||
}
|
||||
8
src/corporate/mod.rs
Normal file
8
src/corporate/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
// src/corporate/mod.rs
|
||||
pub mod types;
|
||||
pub mod scraper;
|
||||
pub mod storage;
|
||||
pub mod update;
|
||||
|
||||
pub use types::*;
|
||||
pub use update::run_full_update;
|
||||
116
src/corporate/scraper.rs
Normal file
116
src/corporate/scraper.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
// src/corporate/scraper.rs
|
||||
use super::types::{CompanyEvent, CompanyPrice};
|
||||
use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
use chrono::{NaiveDate, Datelike};
|
||||
|
||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
||||
|
||||
pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
let client = Client::new();
|
||||
let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings");
|
||||
|
||||
let text = client
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
let document = Html::parse_document(&text);
|
||||
let row_sel = Selector::parse(r#"table tbody tr"#).unwrap();
|
||||
let mut events = Vec::new();
|
||||
|
||||
for row in document.select(&row_sel) {
|
||||
let cols: Vec<String> = row.text().map(|s| s.trim().to_string()).collect();
|
||||
if cols.len() < 4 { continue; }
|
||||
|
||||
let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]);
|
||||
let date = match parse_yahoo_date(raw_date) {
|
||||
Ok(d) => d,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let eps_forecast = parse_float(&cols[1]);
|
||||
let eps_actual = parse_float(&cols[2]);
|
||||
|
||||
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
||||
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
||||
} else { None };
|
||||
|
||||
let time = if cols[0].contains("After") || cols[0].contains("AMC") {
|
||||
"AMC".to_string()
|
||||
} else if cols[0].contains("Before") || cols[0].contains("BMO") {
|
||||
"BMO".to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
events.push(CompanyEvent {
|
||||
ticker: ticker.to_string(),
|
||||
date: date.format("%Y-%m-%d").to_string(),
|
||||
time,
|
||||
period: cols.get(3).cloned().unwrap_or_default(),
|
||||
eps_forecast,
|
||||
eps_actual,
|
||||
revenue_forecast: None,
|
||||
revenue_actual: None,
|
||||
surprise_pct,
|
||||
source: "Yahoo".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow::Result<Vec<CompanyPrice>> {
|
||||
let client = Client::new();
|
||||
let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")?
|
||||
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
||||
.timestamp();
|
||||
|
||||
let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")?
|
||||
.succ_opt().unwrap()
|
||||
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
||||
.timestamp();
|
||||
|
||||
let url = format!(
|
||||
"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true"
|
||||
);
|
||||
|
||||
let csv = client
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
let mut prices = Vec::new();
|
||||
for line in csv.lines().skip(1) {
|
||||
let cols: Vec<&str> = line.split(',').collect();
|
||||
if cols.len() < 7 { continue; }
|
||||
prices.push(CompanyPrice {
|
||||
ticker: ticker.to_string(),
|
||||
date: cols[0].to_string(),
|
||||
open: cols[1].parse()?,
|
||||
high: cols[2].parse()?,
|
||||
low: cols[3].parse()?,
|
||||
close: cols[4].parse()?,
|
||||
adj_close: cols[5].parse()?,
|
||||
volume: cols[6].parse()?,
|
||||
});
|
||||
}
|
||||
Ok(prices)
|
||||
}
|
||||
|
||||
fn parse_float(s: &str) -> Option<f64> {
|
||||
s.replace("--", "").replace(",", "").parse::<f64>().ok()
|
||||
}
|
||||
|
||||
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
|
||||
NaiveDate::parse_from_str(s, "%b %d, %Y")
|
||||
.or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y"))
|
||||
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
|
||||
}
|
||||
64
src/corporate/storage.rs
Normal file
64
src/corporate/storage.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
// src/corporate/storage.rs
|
||||
use super::types::{CompanyEvent, CompanyPrice};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use tokio::fs;
|
||||
use chrono::{Local, NaiveDate};
|
||||
|
||||
/// Load all events from disk into a HashMap<ticker|date, event>
|
||||
async fn load_all_events_map() -> anyhow::Result<HashMap<String, CompanyEvent>> {
|
||||
let mut map = HashMap::new();
|
||||
let dir = std::path::Path::new("corporate_events");
|
||||
if !dir.exists() {
|
||||
return Ok(map);
|
||||
}
|
||||
|
||||
let mut entries = fs::read_dir(dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
if let Ok(events) = serde_json::from_str::<Vec<CompanyEvent>>(&content) {
|
||||
for event in events {
|
||||
let key = format!("{}|{}", event.ticker, event.date);
|
||||
map.insert(key, event);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Merge new events with existing ones and save back to disk
|
||||
pub async fn merge_and_save_events(ticker: &str, new_events: Vec<CompanyEvent>) -> anyhow::Result<()> {
|
||||
let mut existing = load_all_events_map().await?;
|
||||
|
||||
// Insert or update
|
||||
for event in new_events {
|
||||
let key = format!("{}|{}", event.ticker, event.date);
|
||||
existing.insert(key, event);
|
||||
}
|
||||
|
||||
// Convert back to Vec and save (simple single file for now)
|
||||
let all_events: Vec<CompanyEvent> = existing.into_values().collect();
|
||||
let dir = std::path::Path::new("corporate_events");
|
||||
fs::create_dir_all(dir).await?;
|
||||
let path = dir.join("all_events.json");
|
||||
let json = serde_json::to_string_pretty(&all_events)?;
|
||||
fs::write(&path, json).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save price history for a single ticker (overwrite old file)
|
||||
pub async fn save_prices_for_ticker(ticker: &str, prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
|
||||
let dir = std::path::Path::new("corporate_prices");
|
||||
fs::create_dir_all(dir).await?;
|
||||
let path = dir.join(format!("{}.json", ticker));
|
||||
|
||||
// Optional: sort by date
|
||||
let mut prices = prices;
|
||||
prices.sort_by_key(|p| p.date.clone());
|
||||
|
||||
let json = serde_json::to_string_pretty(&prices)?;
|
||||
fs::write(&path, json).await?;
|
||||
Ok(())
|
||||
}
|
||||
38
src/corporate/types.rs
Normal file
38
src/corporate/types.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
// src/corporate/types.rs
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct CompanyEvent {
|
||||
pub ticker: String,
|
||||
pub date: String, // YYYY-MM-DD
|
||||
pub time: String, // "AMC", "BMO", "TAS", or ""
|
||||
pub period: String, // "Q1 2025", "FY 2024"
|
||||
pub eps_forecast: Option<f64>,
|
||||
pub eps_actual: Option<f64>,
|
||||
pub revenue_forecast: Option<f64>,
|
||||
pub revenue_actual: Option<f64>,
|
||||
pub surprise_pct: Option<f64>, // (actual - forecast) / |forecast|
|
||||
pub source: String, // "Yahoo"
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CompanyPrice {
|
||||
pub ticker: String,
|
||||
pub date: String, // YYYY-MM-DD
|
||||
pub open: f64,
|
||||
pub high: f64,
|
||||
pub low: f64,
|
||||
pub close: f64,
|
||||
pub adj_close: f64,
|
||||
pub volume: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CompanyEventChange {
|
||||
pub ticker: String,
|
||||
pub date: String,
|
||||
pub field: String, // "time", "eps_forecast", "eps_actual", "new_event"
|
||||
pub old_value: String,
|
||||
pub new_value: String,
|
||||
pub detected_at: String,
|
||||
}
|
||||
31
src/corporate/update.rs
Normal file
31
src/corporate/update.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// src/corporate/update.rs
|
||||
use super::{scraper::*, storage::*, types::*};
|
||||
use crate::config::Config;
|
||||
|
||||
use chrono::Local;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
|
||||
pub async fn run_full_update(tickers: Vec<String>, config: &Config) -> anyhow::Result<()> {
|
||||
println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date);
|
||||
|
||||
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
|
||||
|
||||
for ticker in tickers {
|
||||
print!(" → {:6} ", ticker);
|
||||
|
||||
// Earnings
|
||||
if let Ok(events) = fetch_earnings_history(&ticker).await {
|
||||
merge_and_save_events(&ticker, events.clone()).await?;
|
||||
println!("{} earnings", events.len());
|
||||
}
|
||||
|
||||
// Prices – now using config.corporate_start_date
|
||||
if let Ok(prices) = fetch_price_history(&ticker, &config.corporate_start_date, &today).await {
|
||||
save_prices_for_ticker(&ticker, prices).await?;
|
||||
}
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
60
src/economic/extraction_script.js
Normal file
60
src/economic/extraction_script.js
Normal file
@@ -0,0 +1,60 @@
|
||||
// src/economic/extraction_script.js
|
||||
const events = [];
|
||||
let currentDate = '';
|
||||
|
||||
const rows = document.querySelectorAll('#TeletraderForm table tbody tr');
|
||||
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
const cells = row.querySelectorAll('td');
|
||||
|
||||
if (cells.length === 1 && cells[0].colSpan === 9) {
|
||||
const dateText = cells[0].textContent.trim();
|
||||
const monthMap = {
|
||||
'Januar': '01', 'Februar': '02', 'März': '03', 'April': '04',
|
||||
'Mai': '05', 'Juni': '06', 'Juli': '07', 'August': '08',
|
||||
'September': '09', 'Oktober': '10', 'November': '11', 'Dezember': '12'
|
||||
};
|
||||
const match = dateText.match(/(\d{1,2})\.\s+([a-zA-ZäöüßÄÖÜ]+)\s+(\d{4})/);
|
||||
if (match) {
|
||||
const day = match[1].padStart(2, '0');
|
||||
const month = monthMap[match[2]] || '01';
|
||||
const year = match[3];
|
||||
currentDate = `${year}-${month}-${day}`;
|
||||
} else {
|
||||
currentDate = '';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cells.length >= 8) {
|
||||
const time = cells[0]?.textContent?.trim() || '';
|
||||
const country = cells[2]?.textContent?.trim() || '';
|
||||
const eventName = cells[4]?.textContent?.trim() || '';
|
||||
if (!time || !country || !eventName) continue;
|
||||
|
||||
const yellowStars = cells[3]?.querySelectorAll('.icon--star.font-color-yellow').length || 0;
|
||||
if (yellowStars !== 3) continue;
|
||||
|
||||
let description = '';
|
||||
if (i + 1 < rows.length) {
|
||||
const next = rows[i + 1];
|
||||
const descP = next.querySelector('p');
|
||||
if (descP) description = descP.textContent?.trim() || '';
|
||||
}
|
||||
|
||||
events.push({
|
||||
country,
|
||||
date: currentDate,
|
||||
time,
|
||||
event: eventName,
|
||||
actual: cells[7]?.textContent?.trim() || '',
|
||||
forecast: cells[6]?.textContent?.trim() || '',
|
||||
previous: cells[5]?.textContent?.trim() || '',
|
||||
importance: 'High',
|
||||
description
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return events;
|
||||
62
src/economic/helpers.rs
Normal file
62
src/economic/helpers.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
// src/economic/helpers.rs
|
||||
use super::types::*;
|
||||
use chrono::{Local, NaiveDate};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub fn event_key(e: &EconomicEvent) -> String {
|
||||
format!("{}|{}|{}", e.date, e.time, e.event)
|
||||
}
|
||||
|
||||
pub fn identity_key(e: &EconomicEvent) -> String {
|
||||
format!("{}|{}|{}", e.country, e.event, e.date.split('-').take(2).collect::<Vec<_>>().join("-"))
|
||||
}
|
||||
|
||||
pub fn build_identity_lookup(events: &HashMap<String, EconomicEvent>) -> HashMap<String, (String, EconomicEvent)> {
|
||||
let mut map = HashMap::new();
|
||||
for (k, e) in events {
|
||||
map.insert(identity_key(e), (k.clone(), e.clone()));
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
pub fn build_date_event_lookup(
|
||||
events: &HashMap<String, EconomicEvent>,
|
||||
) -> HashMap<String, Vec<(String, EconomicEvent)>> {
|
||||
let mut map: HashMap<String, Vec<(String, EconomicEvent)>> = HashMap::new();
|
||||
|
||||
for (k, e) in events {
|
||||
let key = format!("{}|{}|{}", e.country, e.event, e.date);
|
||||
map.entry(key).or_default().push((k.clone(), e.clone()));
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
pub fn detect_changes(old: &EconomicEvent, new: &EconomicEvent, today: &str) -> Vec<EventChange> {
|
||||
let mut changes = Vec::new();
|
||||
let ts = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
|
||||
|
||||
if new.date.as_str() <= today { return changes; }
|
||||
|
||||
let fields = [
|
||||
("actual", &old.actual, &new.actual),
|
||||
("forecast", &old.forecast, &new.forecast),
|
||||
("previous", &old.previous, &new.previous),
|
||||
("description", &old.description, &new.description),
|
||||
];
|
||||
|
||||
for (field, old_val, new_val) in fields {
|
||||
if old_val != new_val {
|
||||
changes.push(EventChange {
|
||||
date: new.date.clone(),
|
||||
event: new.event.clone(),
|
||||
country: new.country.clone(),
|
||||
change_type: field.to_string(),
|
||||
field_changed: field.to_string(),
|
||||
old_value: old_val.clone(),
|
||||
new_value: new_val.clone(),
|
||||
detected_at: ts.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
changes
|
||||
}
|
||||
11
src/economic/mod.rs
Normal file
11
src/economic/mod.rs
Normal file
@@ -0,0 +1,11 @@
|
||||
// src/economic/mod.rs
|
||||
pub mod types;
|
||||
pub mod scraper;
|
||||
pub mod storage;
|
||||
pub mod update;
|
||||
pub mod helpers;
|
||||
|
||||
pub use types::*;
|
||||
pub use scraper::*;
|
||||
pub use update::run_full_update;
|
||||
pub use helpers::*;
|
||||
84
src/economic/scraper.rs
Normal file
84
src/economic/scraper.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
// src/economic/scraper.rs
|
||||
use super::types::{EconomicEvent, ScrapeResult};
|
||||
use fantoccini::Client;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use chrono::{Local, NaiveDate};
|
||||
|
||||
const EXTRACTION_JS: &str = include_str!("extraction_script.js");
|
||||
|
||||
pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
|
||||
client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
|
||||
dismiss_overlays(client).await?;
|
||||
|
||||
if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
|
||||
tab.click().await?;
|
||||
println!("High importance tab selected");
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let removed: bool = client
|
||||
.execute(
|
||||
r#"(() => {
|
||||
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
|
||||
if (iframe && iframe.parentNode) {
|
||||
iframe.parentNode.removeChild(iframe);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()"#,
|
||||
vec![],
|
||||
)
|
||||
.await?
|
||||
.as_bool()
|
||||
.unwrap_or(false);
|
||||
if removed { break; }
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
|
||||
let script = format!(
|
||||
r#"
|
||||
(() => {{
|
||||
const from = document.querySelector('#dtTeletraderFromDate');
|
||||
const to = document.querySelector('#dtTeletraderEndDate');
|
||||
if (from) {{ from.value = '{}'; from.dispatchEvent(new Event('change', {{bubbles: true}})); }}
|
||||
if (to) {{ to.value = '{}'; to.dispatchEvent(new Event('change', {{bubbles: true}})); }}
|
||||
return true;
|
||||
}})()
|
||||
"#,
|
||||
start, end
|
||||
);
|
||||
client.execute(&script, vec![]).await?;
|
||||
sleep(Duration::from_millis(1200)).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn extract_events(client: &Client) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
let result = client.execute(EXTRACTION_JS, vec![]).await?;
|
||||
let array = result.as_array().ok_or_else(|| anyhow::anyhow!("Expected array"))?;
|
||||
|
||||
let mut events = Vec::with_capacity(array.len());
|
||||
for val in array {
|
||||
if let Some(obj) = val.as_object() {
|
||||
events.push(EconomicEvent {
|
||||
country: obj["country"].as_str().unwrap_or("").to_string(),
|
||||
date: obj["date"].as_str().unwrap_or("").to_string(),
|
||||
time: obj["time"].as_str().unwrap_or("").to_string(),
|
||||
event: obj["event"].as_str().unwrap_or("").to_string(),
|
||||
actual: obj["actual"].as_str().unwrap_or("").to_string(),
|
||||
forecast: obj["forecast"].as_str().unwrap_or("").to_string(),
|
||||
previous: obj["previous"].as_str().unwrap_or("").to_string(),
|
||||
importance: "High".to_string(),
|
||||
description: obj["description"].as_str().unwrap_or("").to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
println!("Extracted {} high-impact events", events.len());
|
||||
Ok(events)
|
||||
}
|
||||
113
src/economic/storage.rs
Normal file
113
src/economic/storage.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
// src/economic/storage.rs
|
||||
use super::types::*;
|
||||
use super::helpers::*;
|
||||
use tokio::fs;
|
||||
use chrono::{Local, NaiveDate, Datelike};
|
||||
|
||||
pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
||||
let dir = std::path::Path::new("economic_events");
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
if dir.exists() {
|
||||
let mut entries = fs::read_dir(dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().map(|e| e == "json").unwrap_or(false) {
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
if name.starts_with("chunk_") {
|
||||
if let Some(content) = fs::read_to_string(&path).await.ok() {
|
||||
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
|
||||
let start = name[6..16].to_string();
|
||||
let end = name[17..27].to_string();
|
||||
chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: events.len() });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
chunks.sort_by_key(|c| c.start_date.clone());
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMap<String, EconomicEvent>> {
|
||||
let mut map = HashMap::new();
|
||||
for chunk in chunks {
|
||||
let content = fs::read_to_string(&chunk.path).await?;
|
||||
let events: Vec<EconomicEvent> = serde_json::from_str(&content)?;
|
||||
for e in events {
|
||||
map.insert(event_key(&e), e);
|
||||
}
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
|
||||
let dir = std::path::Path::new("economic_events");
|
||||
fs::create_dir_all(dir).await?;
|
||||
|
||||
let mut sorted: Vec<_> = events.into_values().collect();
|
||||
sorted.sort_by_key(|e| e.date.clone());
|
||||
|
||||
let mut chunk = Vec::new();
|
||||
let mut start: Option<NaiveDate> = None;
|
||||
for e in sorted {
|
||||
let date = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d")?;
|
||||
if let Some(s) = start {
|
||||
if (date - s).num_days() > 100 || chunk.len() >= 500 {
|
||||
save_chunk(&chunk, dir).await?;
|
||||
chunk.clear();
|
||||
start = Some(date);
|
||||
}
|
||||
} else {
|
||||
start = Some(date);
|
||||
}
|
||||
chunk.push(e);
|
||||
}
|
||||
if !chunk.is_empty() {
|
||||
save_chunk(&chunk, dir).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::Result<()> {
|
||||
let start = events.iter().map(|e| &e.date).min().unwrap().clone();
|
||||
let end = events.iter().map(|e| &e.date).max().unwrap().clone();
|
||||
let path = dir.join(format!("chunk_{}_{}.json", start, end));
|
||||
fs::write(&path, serde_json::to_string_pretty(events)?).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
|
||||
if changes.is_empty() { return Ok(()); }
|
||||
let dir = std::path::Path::new("economic_event_changes");
|
||||
fs::create_dir_all(dir).await?;
|
||||
|
||||
let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
|
||||
for c in changes {
|
||||
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
||||
let key = format!("{:02}_{}", d.month(), d.year());
|
||||
by_month.entry(key).or_default().push(c.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for (month, list) in by_month {
|
||||
let path = dir.join(format!("event_changes_{}.json", month));
|
||||
let mut all = if path.exists() {
|
||||
let s = fs::read_to_string(&path).await?;
|
||||
serde_json::from_str(&s).unwrap_or_default()
|
||||
} else { vec![] };
|
||||
all.extend(list);
|
||||
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn target_end_date() -> String {
|
||||
let now = Local::now().naive_local().date();
|
||||
let future = now + chrono::Duration::days(90);
|
||||
future.format("%Y-%m-%d").to_string()
|
||||
}
|
||||
41
src/economic/types.rs
Normal file
41
src/economic/types.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
// src/economic/types.rs
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct EconomicEvent {
|
||||
pub country: String,
|
||||
pub date: String, // YYYY-MM-DD
|
||||
pub time: String,
|
||||
pub event: String,
|
||||
pub actual: String,
|
||||
pub forecast: String,
|
||||
pub previous: String,
|
||||
pub importance: String,
|
||||
pub description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct EventChange {
|
||||
pub date: String,
|
||||
pub event: String,
|
||||
pub country: String,
|
||||
pub change_type: String, // actual|forecast|time|newly_added|removed
|
||||
pub field_changed: String,
|
||||
pub old_value: String,
|
||||
pub new_value: String,
|
||||
pub detected_at: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ChunkInfo {
|
||||
pub start_date: String,
|
||||
pub end_date: String,
|
||||
pub path: std::path::PathBuf,
|
||||
pub event_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ScrapeResult {
|
||||
pub changes: Vec<EventChange>,
|
||||
pub removed_keys: std::collections::HashSet<String>,
|
||||
}
|
||||
116
src/economic/update.rs
Normal file
116
src/economic/update.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
// src/economic/update.rs
|
||||
use super::{scraper::*, storage::*, helpers::*, types::*};
|
||||
use crate::config::Config;
|
||||
use chrono::{Local, NaiveDate};
|
||||
|
||||
pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> anyhow::Result<()> {
|
||||
let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
|
||||
let end_date = config.target_end_date();
|
||||
|
||||
let chunks = scan_existing_chunks().await?;
|
||||
let mut events = load_existing_events(&chunks).await?;
|
||||
println!("Loaded {} events from {} chunks", events.len(), chunks.len());
|
||||
|
||||
let start_date = if events.is_empty() {
|
||||
config.economic_start_date.clone()
|
||||
} else if events.values().any(|e| e.date >= today_str) {
|
||||
today_str.clone()
|
||||
} else {
|
||||
events.values()
|
||||
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
||||
.max()
|
||||
.and_then(|d| d.succ_opt())
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or(today_str.clone())
|
||||
};
|
||||
|
||||
println!("Scraping economic events: {} → {}", start_date, end_date);
|
||||
|
||||
let mut current = start_date;
|
||||
let mut total_changes = 0;
|
||||
|
||||
while current <= end_date {
|
||||
set_date_range(client, ¤t, &end_date).await?;
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
|
||||
|
||||
let new_events = extract_events(client).await?;
|
||||
if new_events.is_empty() { break; }
|
||||
|
||||
let result = process_batch(&new_events, &mut events, &today_str);
|
||||
total_changes += result.changes.len();
|
||||
save_changes(&result.changes).await?;
|
||||
|
||||
let next = new_events.iter()
|
||||
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
||||
.max()
|
||||
.and_then(|d| d.succ_opt())
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or(end_date.clone());
|
||||
|
||||
if next > end_date { break; }
|
||||
current = next;
|
||||
}
|
||||
|
||||
save_optimized_chunks(events).await?;
|
||||
println!("Economic update complete — {} changes detected", total_changes);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn process_batch(
|
||||
new_events: &[EconomicEvent],
|
||||
existing: &mut std::collections::HashMap<String, EconomicEvent>,
|
||||
today: &str,
|
||||
) -> ScrapeResult {
|
||||
let mut changes = Vec::new();
|
||||
let mut removed = std::collections::HashSet::new();
|
||||
|
||||
let identity_map = build_identity_lookup(existing);
|
||||
let date_map = build_date_event_lookup(existing);
|
||||
|
||||
for new in new_events {
|
||||
let key = event_key(new);
|
||||
|
||||
if let Some(old) = existing.get(&key) {
|
||||
changes.extend(detect_changes(old, new, today));
|
||||
existing.insert(key, new.clone());
|
||||
continue;
|
||||
}
|
||||
|
||||
let date_key = format!("{}|{}|{}", new.country, new.event, new.date);
|
||||
if let Some(occurrences) = date_map.get(&date_key) {
|
||||
if let Some((old_key, old_event)) = occurrences.iter().find(|(k, _)| *k != key) {
|
||||
if new.date.as_str() > today {
|
||||
changes.push(EventChange {
|
||||
date: new.date.clone(),
|
||||
event: new.event.clone(),
|
||||
country: new.country.clone(),
|
||||
change_type: "time".to_string(),
|
||||
field_changed: "time".to_string(),
|
||||
old_value: old_event.time.clone(),
|
||||
new_value: new.time.clone(),
|
||||
detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
});
|
||||
}
|
||||
removed.insert(old_key.clone());
|
||||
existing.remove(old_key);
|
||||
}
|
||||
}
|
||||
|
||||
if new.date.as_str() > today {
|
||||
changes.push(EventChange {
|
||||
date: new.date.clone(),
|
||||
event: new.event.clone(),
|
||||
country: new.country.clone(),
|
||||
change_type: "newly_added".to_string(),
|
||||
field_changed: "new_event".to_string(),
|
||||
old_value: "".to_string(),
|
||||
new_value: format!("{} @ {}", new.date, new.time),
|
||||
detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
existing.insert(key, new.clone());
|
||||
}
|
||||
|
||||
ScrapeResult { changes, removed_keys: removed }
|
||||
}
|
||||
943
src/main.rs
943
src/main.rs
@@ -1,930 +1,51 @@
|
||||
use chrono::{Datelike, Local, NaiveDate};
|
||||
// src/main.rs
|
||||
mod economic;
|
||||
mod corporate;
|
||||
mod config;
|
||||
mod util;
|
||||
|
||||
use fantoccini::{ClientBuilder, Locator};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Map, Value};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
path::PathBuf,
|
||||
process::Command,
|
||||
};
|
||||
use tokio::{
|
||||
fs, signal,
|
||||
time::{Duration, sleep},
|
||||
};
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)]
|
||||
struct EconomicEvent {
|
||||
country: String,
|
||||
date: String,
|
||||
time: String,
|
||||
event: String,
|
||||
actual: String,
|
||||
forecast: String,
|
||||
previous: String,
|
||||
importance: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
struct EventChange {
|
||||
date: String,
|
||||
event: String,
|
||||
country: String,
|
||||
change_type: String, // date | time | forecast | previous | actual | description | newly_added
|
||||
field_changed: String,
|
||||
old_value: String,
|
||||
new_value: String,
|
||||
detected_at: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ChunkInfo {
|
||||
start_date: String,
|
||||
end_date: String,
|
||||
path: PathBuf,
|
||||
event_count: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ScrapeResult {
|
||||
changes: Vec<EventChange>,
|
||||
removed_keys: HashSet<String>, // Keys of events that were removed (rescheduled)
|
||||
}
|
||||
|
||||
fn start_chromedriver(port: u16) -> std::process::Child {
|
||||
Command::new("chromedriver-win64/chromedriver.exe")
|
||||
.args(&[format!("--port={}", port)])
|
||||
.spawn()
|
||||
.expect("Failed to start ChromeDriver")
|
||||
}
|
||||
|
||||
async fn dismiss_overlays(client: &fantoccini::Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let removed: bool = client
|
||||
.execute(
|
||||
r#"(() => {
|
||||
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
|
||||
if (iframe && iframe.parentNode) {
|
||||
iframe.parentNode.removeChild(iframe);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()"#,
|
||||
vec![],
|
||||
)
|
||||
.await?
|
||||
.as_bool()
|
||||
.unwrap_or(false);
|
||||
|
||||
if removed {
|
||||
break;
|
||||
}
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_all_data_via_js(
|
||||
client: &fantoccini::Client,
|
||||
) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
let extraction_script = r#"
|
||||
const events = [];
|
||||
let currentDate = '';
|
||||
|
||||
const rows = document.querySelectorAll('#TeletraderForm table tbody tr');
|
||||
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
const cells = row.querySelectorAll('td');
|
||||
|
||||
if (cells.length === 1 && cells[0].colSpan === 9) {
|
||||
const dateText = cells[0].textContent.trim();
|
||||
|
||||
const monthMap = {
|
||||
'Januar': '01', 'Februar': '02', 'März': '03', 'April': '04',
|
||||
'Mai': '05', 'Juni': '06', 'Juli': '07', 'August': '08',
|
||||
'September': '09', 'Oktober': '10', 'November': '11', 'Dezember': '12'
|
||||
};
|
||||
|
||||
const dateParts = dateText.match(/(\d{1,2})\.\s+([a-zA-ZäöüßÄÖÜ]+)\s+(\d{4})/);
|
||||
if (dateParts) {
|
||||
const day = dateParts[1].padStart(2, '0');
|
||||
const germanMonth = dateParts[2];
|
||||
const year = dateParts[3];
|
||||
const month = monthMap[germanMonth] || '01';
|
||||
currentDate = `${year}-${month}-${day}`;
|
||||
} else {
|
||||
currentDate = '';
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cells.length >= 8) {
|
||||
const time = cells[0]?.textContent?.trim() || '';
|
||||
const country = cells[2]?.textContent?.trim() || '';
|
||||
const eventName = cells[4]?.textContent?.trim() || '';
|
||||
|
||||
if (!time || !country || !eventName) continue;
|
||||
|
||||
const importanceCell = cells[3];
|
||||
const yellowStarCount = importanceCell?.querySelectorAll('.icon--star.font-color-yellow').length || 0;
|
||||
|
||||
if (yellowStarCount === 3) {
|
||||
let description = '';
|
||||
if (i + 1 < rows.length) {
|
||||
const nextRow = rows[i + 1];
|
||||
const nextCells = nextRow.querySelectorAll('td');
|
||||
if (nextCells.length === 1 || nextCells[0].colSpan === 8) {
|
||||
const descPara = nextRow.querySelector('p');
|
||||
if (descPara) {
|
||||
description = descPara.textContent?.trim() || '';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
events.push({
|
||||
country: country,
|
||||
date: currentDate,
|
||||
time: time,
|
||||
event: eventName,
|
||||
actual: cells[7]?.textContent?.trim() || '',
|
||||
forecast: cells[6]?.textContent?.trim() || '',
|
||||
previous: cells[5]?.textContent?.trim() || '',
|
||||
importance: 'High',
|
||||
description: description
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return events;
|
||||
"#;
|
||||
|
||||
let result = client.execute(extraction_script, vec![]).await?;
|
||||
|
||||
if let Some(events_array) = result.as_array() {
|
||||
let mut events = Vec::new();
|
||||
for event_value in events_array {
|
||||
if let Some(event_obj) = event_value.as_object() {
|
||||
let event = EconomicEvent {
|
||||
country: event_obj
|
||||
.get("country")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
date: event_obj
|
||||
.get("date")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
time: event_obj
|
||||
.get("time")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
event: event_obj
|
||||
.get("event")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
actual: event_obj
|
||||
.get("actual")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
forecast: event_obj
|
||||
.get("forecast")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
previous: event_obj
|
||||
.get("previous")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
importance: event_obj
|
||||
.get("importance")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
description: event_obj
|
||||
.get("description")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
};
|
||||
events.push(event);
|
||||
}
|
||||
}
|
||||
println!("Extracted {} events (3 YELLOW stars ONLY)", events.len());
|
||||
return Ok(events);
|
||||
}
|
||||
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn set_date_range(client: &fantoccini::Client, start: &str, end: &str) -> anyhow::Result<()> {
|
||||
let set_dates_script = format!(
|
||||
r#"
|
||||
(() => {{
|
||||
const fromInput = document.querySelector('#dtTeletraderFromDate');
|
||||
const toInput = document.querySelector('#dtTeletraderEndDate');
|
||||
|
||||
if (fromInput) {{
|
||||
fromInput.value = '{}';
|
||||
fromInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
||||
fromInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
}}
|
||||
|
||||
if (toInput) {{
|
||||
toInput.value = '{}';
|
||||
toInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
||||
toInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
}}
|
||||
|
||||
return !!fromInput && !!toInput;
|
||||
}})()
|
||||
"#,
|
||||
start, end
|
||||
);
|
||||
|
||||
client.execute(&set_dates_script, vec![]).await?;
|
||||
sleep(Duration::from_millis(1000)).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_date(date: &str) -> Option<NaiveDate> {
|
||||
NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok()
|
||||
}
|
||||
|
||||
fn calculate_next_start_date(events: &[EconomicEvent]) -> anyhow::Result<String> {
|
||||
let mut dates: Vec<_> = events.iter().filter_map(|e| parse_date(&e.date)).collect();
|
||||
|
||||
if dates.is_empty() {
|
||||
return Err(anyhow::anyhow!("No parseable dates found"));
|
||||
}
|
||||
|
||||
dates.sort();
|
||||
let next = dates.last().unwrap().succ_opt().unwrap();
|
||||
|
||||
Ok(next.format("%Y-%m-%d").to_string())
|
||||
}
|
||||
|
||||
/// Storage key: date|time|event (for exact occurrence deduplication)
|
||||
fn event_lookup_key(event: &EconomicEvent) -> String {
|
||||
format!("{}|{}|{}", event.date, event.time, event.event)
|
||||
}
|
||||
|
||||
/// Identity key: country|event|date (to distinguish recurring monthly/quarterly events)
|
||||
/// This prevents treating December and January releases of the same recurring event as reschedules
|
||||
fn event_identity_key(event: &EconomicEvent) -> String {
|
||||
format!("{}|{}|{}", event.country, event.event, event.date)
|
||||
}
|
||||
|
||||
/// Compare two events and detect changes in future data
|
||||
fn detect_changes(old: &EconomicEvent, new: &EconomicEvent, now: &str) -> Vec<EventChange> {
|
||||
let mut changes = Vec::new();
|
||||
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
|
||||
|
||||
println!(
|
||||
"🔍 Checking event: {} on {} (now: {})",
|
||||
new.event, new.date, now
|
||||
);
|
||||
|
||||
// Only track changes for future events
|
||||
if new.date.as_str() <= now {
|
||||
println!(" ⏭️ Skipped: Event is in the past/today");
|
||||
return changes;
|
||||
}
|
||||
|
||||
println!(" 📅 Event is in the future - checking for changes...");
|
||||
|
||||
let fields = [
|
||||
("actual", "actual", &old.actual, &new.actual),
|
||||
("forecast", "forecast", &old.forecast, &new.forecast),
|
||||
("previous", "previous", &old.previous, &new.previous),
|
||||
("description", "description", &old.description, &new.description),
|
||||
];
|
||||
|
||||
for (field_name, change_type, old_val, new_val) in fields {
|
||||
if old_val != new_val {
|
||||
println!(
|
||||
" 📝 CHANGE DETECTED in '{}': '{}' -> '{}'",
|
||||
field_name, old_val, new_val
|
||||
);
|
||||
changes.push(EventChange {
|
||||
date: new.date.clone(),
|
||||
event: new.event.clone(),
|
||||
country: new.country.clone(),
|
||||
change_type: change_type.to_string(),
|
||||
field_changed: field_name.to_string(),
|
||||
old_value: old_val.to_string(),
|
||||
new_value: new_val.to_string(),
|
||||
detected_at: timestamp.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if changes.is_empty() {
|
||||
println!(" ✅ No changes detected");
|
||||
} else {
|
||||
println!(" 🎯 Total changes: {}", changes.len());
|
||||
}
|
||||
|
||||
changes
|
||||
}
|
||||
|
||||
/// Build identity lookup map: finds most recent occurrence of each event by identity
|
||||
/// Identity now includes date to distinguish recurring events (e.g., monthly GDP releases)
|
||||
fn build_identity_lookup(
|
||||
events: &HashMap<String, EconomicEvent>,
|
||||
) -> HashMap<String, (String, EconomicEvent)> {
|
||||
let mut identity_map: HashMap<String, (String, EconomicEvent)> = HashMap::new();
|
||||
|
||||
for (lookup_key, event) in events {
|
||||
let identity = event_identity_key(event);
|
||||
identity_map.insert(identity, (lookup_key.clone(), event.clone()));
|
||||
}
|
||||
|
||||
identity_map
|
||||
}
|
||||
|
||||
/// Build a separate lookup for detecting time-only changes (same date, different time)
|
||||
fn build_date_event_lookup(
|
||||
events: &HashMap<String, EconomicEvent>,
|
||||
) -> HashMap<String, Vec<(String, EconomicEvent)>> {
|
||||
let mut date_event_map: HashMap<String, Vec<(String, EconomicEvent)>> = HashMap::new();
|
||||
|
||||
for (lookup_key, event) in events {
|
||||
let key = format!("{}|{}|{}", event.country, event.event, event.date);
|
||||
date_event_map.entry(key).or_default().push((lookup_key.clone(), event.clone()));
|
||||
}
|
||||
|
||||
date_event_map
|
||||
}
|
||||
|
||||
/// Scan the economic_events directory for existing chunks
|
||||
async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
||||
let events_dir = PathBuf::from("economic_events");
|
||||
|
||||
if !events_dir.exists() {
|
||||
fs::create_dir_all(&events_dir).await?;
|
||||
println!("📁 Created economic_events directory");
|
||||
return Ok(vec![]);
|
||||
}
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
let mut entries = fs::read_dir(&events_dir).await?;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
||||
if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) {
|
||||
if let Some(dates) = filename.strip_prefix("chunk_") {
|
||||
let parts: Vec<&str> = dates.split('_').collect();
|
||||
if parts.len() == 2 {
|
||||
if let Ok(content) = fs::read_to_string(&path).await {
|
||||
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content)
|
||||
{
|
||||
chunks.push(ChunkInfo {
|
||||
start_date: parts[0].to_string(),
|
||||
end_date: parts[1].to_string(),
|
||||
path: path.clone(),
|
||||
event_count: events.len(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
chunks.sort_by(|a, b| a.start_date.cmp(&b.start_date));
|
||||
|
||||
if !chunks.is_empty() {
|
||||
println!("\n📊 Found {} existing chunks:", chunks.len());
|
||||
for chunk in &chunks {
|
||||
println!(
|
||||
" • {} to {} ({} events)",
|
||||
chunk.start_date, chunk.end_date, chunk.event_count
|
||||
);
|
||||
}
|
||||
} else {
|
||||
println!("🔭 No existing chunks found");
|
||||
}
|
||||
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
/// Calculate target end date: first day of month, 3 months from now
|
||||
fn calculate_target_end_date() -> String {
|
||||
let now = Local::now().naive_local().date();
|
||||
let three_months_ahead = if now.month() + 3 > 12 {
|
||||
NaiveDate::from_ymd_opt(now.year() + 1, (now.month() + 3) % 12, 1)
|
||||
} else {
|
||||
NaiveDate::from_ymd_opt(now.year(), now.month() + 3, 1)
|
||||
}
|
||||
.unwrap();
|
||||
|
||||
three_months_ahead.format("%Y-%m-%d").to_string()
|
||||
}
|
||||
|
||||
/// Load all events from existing chunks into a HashMap
|
||||
async fn load_existing_events(
|
||||
chunks: &[ChunkInfo],
|
||||
) -> anyhow::Result<HashMap<String, EconomicEvent>> {
|
||||
let mut event_map = HashMap::new();
|
||||
|
||||
for chunk in chunks {
|
||||
if let Ok(content) = fs::read_to_string(&chunk.path).await {
|
||||
if let Ok(events) = serde_json::from_str::<Vec<EconomicEvent>>(&content) {
|
||||
for event in events {
|
||||
event_map.insert(event_lookup_key(&event), event);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("📥 Loaded {} events from existing chunks", event_map.len());
|
||||
Ok(event_map)
|
||||
}
|
||||
|
||||
/// Save or append changes to monthly change files
|
||||
async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
|
||||
if changes.is_empty() {
|
||||
println!("ℹ️ No changes to save");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("\n💾 Saving {} changes...", changes.len());
|
||||
|
||||
let changes_dir = PathBuf::from("economic_event_changes");
|
||||
fs::create_dir_all(&changes_dir).await?;
|
||||
|
||||
// Group changes by month
|
||||
let mut changes_by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
|
||||
|
||||
for change in changes {
|
||||
if let Some(date) = parse_date(&change.date) {
|
||||
let month_key = format!("{:02}_{}", date.month(), date.year());
|
||||
changes_by_month
|
||||
.entry(month_key)
|
||||
.or_default()
|
||||
.push(change.clone());
|
||||
}
|
||||
}
|
||||
|
||||
println!("📂 Grouped into {} month(s)", changes_by_month.len());
|
||||
|
||||
// Save each month's changes
|
||||
for (month_key, month_changes) in changes_by_month {
|
||||
let filename = format!("event_changes_{}.json", month_key);
|
||||
let filepath = changes_dir.join(&filename);
|
||||
|
||||
// Load existing changes if file exists
|
||||
let existing_count = if filepath.exists() {
|
||||
let content = fs::read_to_string(&filepath).await?;
|
||||
serde_json::from_str::<Vec<EventChange>>(&content)
|
||||
.unwrap_or_default()
|
||||
.len()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let mut all_changes = if filepath.exists() {
|
||||
let content = fs::read_to_string(&filepath).await?;
|
||||
serde_json::from_str::<Vec<EventChange>>(&content).unwrap_or_default()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
// Append new changes
|
||||
all_changes.extend(month_changes.clone());
|
||||
|
||||
// Save combined changes
|
||||
let json = serde_json::to_string_pretty(&all_changes)?;
|
||||
fs::write(&filepath, json).await?;
|
||||
|
||||
println!(
|
||||
" ✅ {}: {} existing + {} new = {} total changes",
|
||||
filename,
|
||||
existing_count,
|
||||
month_changes.len(),
|
||||
all_changes.len()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Reorganize events into optimal chunks and save them
|
||||
async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
|
||||
if events.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let events_dir = PathBuf::from("economic_events");
|
||||
fs::create_dir_all(&events_dir).await?;
|
||||
|
||||
// Convert to sorted vector
|
||||
let mut all_events: Vec<EconomicEvent> = events.into_values().collect();
|
||||
all_events.sort_by(|a, b| a.date.cmp(&b.date));
|
||||
|
||||
// Group events by date ranges (chunks of ~100 days or similar)
|
||||
let mut chunks: Vec<Vec<EconomicEvent>> = Vec::new();
|
||||
let mut current_chunk = Vec::new();
|
||||
let mut current_start_date: Option<NaiveDate> = None;
|
||||
|
||||
for event in all_events {
|
||||
if let Some(event_date) = parse_date(&event.date) {
|
||||
if let Some(start) = current_start_date {
|
||||
// Start new chunk if we've gone 100+ days or have 500+ events
|
||||
if (event_date - start).num_days() > 100 || current_chunk.len() >= 500 {
|
||||
chunks.push(current_chunk);
|
||||
current_chunk = Vec::new();
|
||||
current_start_date = Some(event_date);
|
||||
}
|
||||
} else {
|
||||
current_start_date = Some(event_date);
|
||||
}
|
||||
current_chunk.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
if !current_chunk.is_empty() {
|
||||
chunks.push(current_chunk);
|
||||
}
|
||||
|
||||
// Delete old chunk files
|
||||
let mut entries = fs::read_dir(&events_dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) {
|
||||
if filename.starts_with("chunk_") {
|
||||
fs::remove_file(&path).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save new optimized chunks
|
||||
for chunk in chunks {
|
||||
if chunk.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = chunk
|
||||
.iter()
|
||||
.filter_map(|e| parse_date(&e.date))
|
||||
.min()
|
||||
.unwrap()
|
||||
.format("%Y-%m-%d")
|
||||
.to_string();
|
||||
|
||||
let end = chunk
|
||||
.iter()
|
||||
.filter_map(|e| parse_date(&e.date))
|
||||
.max()
|
||||
.unwrap()
|
||||
.format("%Y-%m-%d")
|
||||
.to_string();
|
||||
|
||||
let filename = format!("chunk_{}_{}.json", start, end);
|
||||
let filepath = events_dir.join(&filename);
|
||||
|
||||
let json = serde_json::to_string_pretty(&chunk)?;
|
||||
fs::write(&filepath, json).await?;
|
||||
|
||||
println!(
|
||||
"💾 Saved optimized chunk: {} ({} events)",
|
||||
filename,
|
||||
chunk.len()
|
||||
);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scrape and update data with change tracking
|
||||
async fn scrape_and_update(
|
||||
client: &fantoccini::Client,
|
||||
start: &str,
|
||||
end: &str,
|
||||
existing_events: &mut HashMap<String, EconomicEvent>,
|
||||
) -> anyhow::Result<ScrapeResult> {
|
||||
println!("\n🎯 Scraping range: {} to {}", start, end);
|
||||
|
||||
let mut current_start = start.to_string();
|
||||
let mut all_changes = Vec::new();
|
||||
let mut all_removed_keys = HashSet::new();
|
||||
let now = Local::now()
|
||||
.naive_local()
|
||||
.date()
|
||||
.format("%Y-%m-%d")
|
||||
.to_string();
|
||||
|
||||
println!("📅 Current date for comparison: {}", now);
|
||||
println!("🔍 Starting change detection...\n");
|
||||
|
||||
loop {
|
||||
set_date_range(client, ¤t_start, end).await?;
|
||||
sleep(Duration::from_secs(3)).await;
|
||||
|
||||
let events = extract_all_data_via_js(client).await?;
|
||||
if events.is_empty() {
|
||||
println!(" ✅ No more events in this range");
|
||||
break;
|
||||
}
|
||||
|
||||
println!(" 📦 Fetched {} events", events.len());
|
||||
|
||||
// Build lookups for existing events
|
||||
let identity_lookup = build_identity_lookup(existing_events);
|
||||
let date_event_lookup = build_date_event_lookup(existing_events);
|
||||
let mut events_to_remove: Vec<String> = Vec::new();
|
||||
|
||||
// Process events: detect changes and update map
|
||||
let mut new_events_count = 0;
|
||||
let mut updated_events_count = 0;
|
||||
let mut time_changed_events_count = 0;
|
||||
|
||||
for new_event in events.clone() {
|
||||
let lookup_key = event_lookup_key(&new_event);
|
||||
let identity_key = event_identity_key(&new_event);
|
||||
|
||||
// CASE A: Exact match (same date/time/event)
|
||||
if let Some(old_event) = existing_events.get(&lookup_key) {
|
||||
println!("\n 🔎 Comparing existing event:");
|
||||
println!(" Event: {}", new_event.event);
|
||||
println!(" Date: {} | Time: {}", new_event.date, new_event.time);
|
||||
|
||||
let changes = detect_changes(old_event, &new_event, &now);
|
||||
if !changes.is_empty() {
|
||||
println!(" ✨ {} change(s) detected and recorded!", changes.len());
|
||||
all_changes.extend(changes);
|
||||
updated_events_count += 1;
|
||||
}
|
||||
|
||||
// CRITICAL: Always update the event in the map with latest data
|
||||
existing_events.insert(lookup_key, new_event);
|
||||
continue;
|
||||
}
|
||||
|
||||
// CASE B: Check if time changed for same date/event
|
||||
let date_event_key = format!("{}|{}|{}", new_event.country, new_event.event, new_event.date);
|
||||
if let Some(existing_occurrences) = date_event_lookup.get(&date_event_key) {
|
||||
// Find if there's an existing event with different time
|
||||
if let Some((old_lookup_key, old_event)) = existing_occurrences.iter()
|
||||
.find(|(key, _)| key != &lookup_key) {
|
||||
|
||||
println!("\n 🕐 TIME CHANGE DETECTED:");
|
||||
println!(" Event: {}", new_event.event);
|
||||
println!(" Date: {}", new_event.date);
|
||||
println!(" Old time: {} | New time: {}", old_event.time, new_event.time);
|
||||
|
||||
// Track time change
|
||||
if new_event.date.as_str() > now.as_str() {
|
||||
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
|
||||
all_changes.push(EventChange {
|
||||
date: new_event.date.clone(),
|
||||
event: new_event.event.clone(),
|
||||
country: new_event.country.clone(),
|
||||
change_type: "time".to_string(),
|
||||
field_changed: "time".to_string(),
|
||||
old_value: old_event.time.clone(),
|
||||
new_value: new_event.time.clone(),
|
||||
detected_at: timestamp,
|
||||
});
|
||||
|
||||
println!(" 📝 Time change recorded");
|
||||
}
|
||||
|
||||
// Check for other field changes too
|
||||
let field_changes = detect_changes(old_event, &new_event, &now);
|
||||
if !field_changes.is_empty() {
|
||||
println!(
|
||||
" ✨ {} additional field change(s) detected!",
|
||||
field_changes.len()
|
||||
);
|
||||
all_changes.extend(field_changes);
|
||||
}
|
||||
|
||||
// Remove old occurrence and add new one
|
||||
events_to_remove.push(old_lookup_key.clone());
|
||||
all_removed_keys.insert(old_lookup_key.clone());
|
||||
existing_events.insert(lookup_key, new_event);
|
||||
time_changed_events_count += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// CASE C: New event
|
||||
new_events_count += 1;
|
||||
println!(
|
||||
" ➕ New event: {} on {} @ {}",
|
||||
new_event.event, new_event.date, new_event.time
|
||||
);
|
||||
|
||||
// Track as newly added if it's a future event
|
||||
if new_event.date.as_str() > now.as_str() {
|
||||
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
|
||||
all_changes.push(EventChange {
|
||||
date: new_event.date.clone(),
|
||||
event: new_event.event.clone(),
|
||||
country: new_event.country.clone(),
|
||||
change_type: "newly_added".to_string(),
|
||||
field_changed: "new_event".to_string(),
|
||||
old_value: "".to_string(),
|
||||
new_value: format!("{} @ {}", new_event.date, new_event.time),
|
||||
detected_at: timestamp,
|
||||
});
|
||||
}
|
||||
|
||||
existing_events.insert(lookup_key, new_event);
|
||||
}
|
||||
|
||||
// Remove old occurrences of time-changed events
|
||||
for key in events_to_remove {
|
||||
existing_events.remove(&key);
|
||||
}
|
||||
|
||||
println!("\n 📊 Batch summary:");
|
||||
println!(" New events: {}", new_events_count);
|
||||
println!(" Updated events: {}", updated_events_count);
|
||||
println!(" Time changed events: {}", time_changed_events_count);
|
||||
println!(" Changes tracked: {}", all_changes.len());
|
||||
|
||||
let next = match calculate_next_start_date(&events) {
|
||||
Ok(n) => n,
|
||||
Err(_) => {
|
||||
println!(" ⚠️ Cannot calculate next date, stopping");
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
if next > end.to_string() {
|
||||
println!(" ✅ Reached end of range");
|
||||
break;
|
||||
}
|
||||
|
||||
current_start = next;
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
|
||||
println!("\n🎯 SCRAPE COMPLETE:");
|
||||
println!(" Total changes detected: {}", all_changes.len());
|
||||
println!(" Total events removed (time changes): {}", all_removed_keys.len());
|
||||
|
||||
Ok(ScrapeResult {
|
||||
changes: all_changes,
|
||||
removed_keys: all_removed_keys,
|
||||
})
|
||||
}
|
||||
|
||||
/// Main logic with intelligent update handling
|
||||
async fn run_intelligent_update(client: &fantoccini::Client) -> anyhow::Result<()> {
|
||||
let now = Local::now()
|
||||
.naive_local()
|
||||
.date()
|
||||
.format("%Y-%m-%d")
|
||||
.to_string();
|
||||
let target_end = calculate_target_end_date();
|
||||
|
||||
println!("📅 Today: {}", now);
|
||||
println!("🎯 Target end date: {}", target_end);
|
||||
|
||||
// Load existing chunks
|
||||
let chunks = scan_existing_chunks().await?;
|
||||
let mut existing_events = load_existing_events(&chunks).await?;
|
||||
|
||||
if existing_events.is_empty() {
|
||||
// No existing data - full scrape from beginning
|
||||
println!("\n🔭 No existing data - starting fresh scrape from 2007-02-13");
|
||||
let result =
|
||||
scrape_and_update(client, "2007-02-13", &target_end, &mut existing_events).await?;
|
||||
save_changes(&result.changes).await?;
|
||||
save_optimized_chunks(existing_events).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Find date range of existing data
|
||||
let dates: Vec<NaiveDate> = existing_events
|
||||
.values()
|
||||
.filter_map(|e| parse_date(&e.date))
|
||||
.collect();
|
||||
|
||||
let min_date = dates.iter().min().unwrap().format("%Y-%m-%d").to_string();
|
||||
let max_date = dates.iter().max().unwrap().format("%Y-%m-%d").to_string();
|
||||
|
||||
println!("📊 Existing data range: {} to {}", min_date, max_date);
|
||||
|
||||
// Determine update strategy
|
||||
if max_date < now {
|
||||
// Case 1: Data is in the past, need to update from max_date to target
|
||||
let next_start = parse_date(&max_date)
|
||||
.and_then(|d| d.succ_opt())
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or(max_date);
|
||||
|
||||
println!(
|
||||
"\n📈 Updating from end of existing data: {} to {}",
|
||||
next_start, target_end
|
||||
);
|
||||
let result =
|
||||
scrape_and_update(client, &next_start, &target_end, &mut existing_events).await?;
|
||||
save_changes(&result.changes).await?;
|
||||
save_optimized_chunks(existing_events).await?;
|
||||
} else if max_date >= now {
|
||||
// Case 2: Data extends to or beyond today, refresh future data
|
||||
println!(
|
||||
"\n🔄 Data exists up to today - refreshing future data: {} to {}",
|
||||
now, target_end
|
||||
);
|
||||
|
||||
// CRITICAL FIX: Pass the actual existing_events HashMap directly
|
||||
// This ensures all updates (including rescheduled events) are properly handled
|
||||
let result = scrape_and_update(client, &now, &target_end, &mut existing_events).await?;
|
||||
|
||||
save_changes(&result.changes).await?;
|
||||
|
||||
// The existing_events HashMap is already updated in-place by scrape_and_update
|
||||
// Just save the optimized chunks
|
||||
save_optimized_chunks(existing_events).await?;
|
||||
}
|
||||
|
||||
println!("\n✅ Update complete!");
|
||||
Ok(())
|
||||
}
|
||||
use tokio::signal;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let port = 9515;
|
||||
let mut chromedriver = start_chromedriver(port);
|
||||
sleep(Duration::from_secs(1)).await;
|
||||
// === Ensure data directories exist ===
|
||||
util::ensure_data_dirs().await?;
|
||||
|
||||
let caps_value = serde_json::json!({
|
||||
"goog:chromeOptions": {
|
||||
"args": [
|
||||
"--disable-gpu",
|
||||
"--disable-notifications",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
],
|
||||
"excludeSwitches": ["enable-automation"]
|
||||
}
|
||||
});
|
||||
// === Load configuration ===
|
||||
let config = config::Config::default();
|
||||
|
||||
let caps_map: Map<String, Value> = caps_value
|
||||
.as_object()
|
||||
.expect("Capabilities should be a JSON object")
|
||||
.clone();
|
||||
// === Start ChromeDriver ===
|
||||
let mut child = std::process::Command::new("chromedriver-win64/chromedriver.exe")
|
||||
.args(["--port=9515"])
|
||||
.spawn()?;
|
||||
|
||||
let mut client = ClientBuilder::native()
|
||||
.capabilities(caps_map)
|
||||
.connect(&format!("http://localhost:{}", port))
|
||||
let client = ClientBuilder::native()
|
||||
.connect("http://localhost:9515")
|
||||
.await?;
|
||||
|
||||
// Setup graceful shutdown
|
||||
let shutdown_client = client.clone();
|
||||
// Graceful shutdown
|
||||
let client_clone = client.clone();
|
||||
tokio::spawn(async move {
|
||||
signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||
println!("\nCtrl+C received, shutting down...");
|
||||
shutdown_client.close().await.ok();
|
||||
signal::ctrl_c().await.unwrap();
|
||||
client_clone.close().await.ok();
|
||||
std::process::exit(0);
|
||||
});
|
||||
|
||||
// Navigate to page
|
||||
let url = "https://www.finanzen.net/termine/wirtschaftsdaten/";
|
||||
client.goto(url).await?;
|
||||
// === Economic Calendar Update ===
|
||||
println!("Updating Economic Calendar (High Impact Only)");
|
||||
economic::goto_and_prepare(&client).await?;
|
||||
economic::run_full_update(&client, &config).await?;
|
||||
|
||||
dismiss_overlays(&client).await?;
|
||||
|
||||
// Click high importance tab
|
||||
if let Ok(tab) = client
|
||||
.find(Locator::Css(
|
||||
r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#,
|
||||
))
|
||||
.await
|
||||
{
|
||||
tab.click().await?;
|
||||
println!("✓ High importance tab clicked");
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
|
||||
// Run intelligent update
|
||||
run_intelligent_update(&client).await?;
|
||||
|
||||
// Display final summary
|
||||
let chunks = scan_existing_chunks().await?;
|
||||
let final_events = load_existing_events(&chunks).await?;
|
||||
|
||||
println!("\n📊 FINAL SUMMARY:");
|
||||
println!(" • Total chunks: {}", chunks.len());
|
||||
println!(" • Total events: {}", final_events.len());
|
||||
// === Corporate Earnings Update ===
|
||||
println!("\nUpdating Corporate Earnings");
|
||||
let tickers = config::get_tickers();
|
||||
corporate::run_full_update(tickers, &config).await?;
|
||||
|
||||
// === Cleanup ===
|
||||
client.close().await?;
|
||||
chromedriver.kill()?;
|
||||
child.kill()?;
|
||||
|
||||
println!("\nAll data updated successfully!");
|
||||
Ok(())
|
||||
}
|
||||
23
src/util.rs
Normal file
23
src/util.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
// src/util.rs (or put it directly in main.rs if you prefer)
|
||||
use tokio::fs;
|
||||
use std::path::Path;
|
||||
|
||||
/// Create the required data folders if they do not exist yet.
|
||||
pub async fn ensure_data_dirs() -> anyhow::Result<()> {
|
||||
let dirs = [
|
||||
"economic_events",
|
||||
"economic_event_changes",
|
||||
"corporate_events",
|
||||
"corporate_prices",
|
||||
];
|
||||
|
||||
for dir in dirs {
|
||||
let path = Path::new(dir);
|
||||
if !path.exists() {
|
||||
fs::create_dir_all(path).await?;
|
||||
println!("Created directory: {dir}");
|
||||
}
|
||||
// else → silently continue
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user