// src/corporate/scraper.rs use super::types::{CompanyEvent, CompanyPrice}; use fantoccini::{Client, Locator}; use scraper::{Html, Selector}; use chrono::{NaiveDate, Datelike}; use tokio::time::{sleep, Duration}; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> { let script = r#" (() => { const agree = document.querySelector('button[name="agree"]'); if (agree) { agree.click(); return true; } return false; })() "#; for _ in 0..10 { let done: bool = client.execute(script, vec![]).await?.as_bool().unwrap_or(false); if done { break; } sleep(Duration::from_millis(500)).await; } Ok(()) } pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result> { let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker); client.goto(&url).await?; dismiss_yahoo_consent(client).await?; // Load all by clicking "Show More" if present loop { match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await { Ok(btn) => { btn.click().await?; sleep(Duration::from_secs(2)).await; } Err(_) => break, } } let html = client.source().await?; let document = Html::parse_document(&html); let row_sel = Selector::parse("table tbody tr").unwrap(); let mut events = Vec::new(); for row in document.select(&row_sel) { let cols: Vec = row.select(&Selector::parse("td").unwrap()) .map(|td| td.text().collect::>().join(" ").trim().to_string()) .collect(); if cols.len() < 6 { continue; } let full_date = &cols[2]; let parts: Vec<&str> = full_date.split(" at ").collect(); let raw_date = parts[0].trim(); let time_str = if parts.len() > 1 { parts[1].trim() } else { "" }; let date = match parse_yahoo_date(raw_date) { Ok(d) => d, Err(_) => continue, }; let eps_forecast = parse_float(&cols[3]); let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) { if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None } } else { None }; let time = if time_str.contains("PM") { "AMC".to_string() } else if time_str.contains("AM") { "BMO".to_string() } else { "".to_string() }; events.push(CompanyEvent { ticker: ticker.to_string(), date: date.format("%Y-%m-%d").to_string(), time, period: "".to_string(), // No period info available, set to empty eps_forecast, eps_actual, revenue_forecast: None, revenue_actual: None, surprise_pct, source: "Yahoo".to_string(), }); } Ok(events) } pub async fn fetch_price_history(client: &Client, ticker: &str, start: &str, end: &str) -> anyhow::Result> { let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")? .and_hms_opt(0, 0, 0).unwrap().and_utc() .timestamp(); let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")? .succ_opt().unwrap() .and_hms_opt(0, 0, 0).unwrap().and_utc() .timestamp(); let url = format!( "https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true" ); client.goto(&url).await?; let csv = client.source().await?; let mut prices = Vec::new(); for line in csv.lines().skip(1) { let cols: Vec<&str> = line.split(',').collect(); if cols.len() < 7 { continue; } prices.push(CompanyPrice { ticker: ticker.to_string(), date: cols[0].to_string(), open: cols[1].parse()?, high: cols[2].parse()?, low: cols[3].parse()?, close: cols[4].parse()?, adj_close: cols[5].parse()?, volume: cols[6].parse()?, }); } Ok(prices) } fn parse_float(s: &str) -> Option { s.replace("--", "").replace(",", "").parse::().ok() } fn parse_yahoo_date(s: &str) -> anyhow::Result { NaiveDate::parse_from_str(s, "%B %d, %Y") .or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y")) .map_err(|_| anyhow::anyhow!("Bad date: {s}")) }