// src/corporate/scraper.rs use super::types::{CompanyEvent, CompanyPrice}; use reqwest::Client; use scraper::{Html, Selector}; use chrono::{NaiveDate, Datelike}; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result> { let client = Client::new(); let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings"); let text = client .get(&url) .header("User-Agent", USER_AGENT) .send() .await? .text() .await?; let document = Html::parse_document(&text); let row_sel = Selector::parse(r#"table tbody tr"#).unwrap(); let mut events = Vec::new(); for row in document.select(&row_sel) { let cols: Vec = row.text().map(|s| s.trim().to_string()).collect(); if cols.len() < 4 { continue; } let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]); let date = match parse_yahoo_date(raw_date) { Ok(d) => d, Err(_) => continue, }; let eps_forecast = parse_float(&cols[1]); let eps_actual = parse_float(&cols[2]); let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) { if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None } } else { None }; let time = if cols[0].contains("After") || cols[0].contains("AMC") { "AMC".to_string() } else if cols[0].contains("Before") || cols[0].contains("BMO") { "BMO".to_string() } else { "".to_string() }; events.push(CompanyEvent { ticker: ticker.to_string(), date: date.format("%Y-%m-%d").to_string(), time, period: cols.get(3).cloned().unwrap_or_default(), eps_forecast, eps_actual, revenue_forecast: None, revenue_actual: None, surprise_pct, source: "Yahoo".to_string(), }); } Ok(events) } pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow::Result> { let client = Client::new(); let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")? .and_hms_opt(0, 0, 0).unwrap().and_utc() .timestamp(); let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")? .succ_opt().unwrap() .and_hms_opt(0, 0, 0).unwrap().and_utc() .timestamp(); let url = format!( "https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true" ); let csv = client .get(&url) .header("User-Agent", USER_AGENT) .send() .await? .text() .await?; let mut prices = Vec::new(); for line in csv.lines().skip(1) { let cols: Vec<&str> = line.split(',').collect(); if cols.len() < 7 { continue; } prices.push(CompanyPrice { ticker: ticker.to_string(), date: cols[0].to_string(), open: cols[1].parse()?, high: cols[2].parse()?, low: cols[3].parse()?, close: cols[4].parse()?, adj_close: cols[5].parse()?, volume: cols[6].parse()?, }); } Ok(prices) } fn parse_float(s: &str) -> Option { s.replace("--", "").replace(",", "").parse::().ok() } fn parse_yahoo_date(s: &str) -> anyhow::Result { NaiveDate::parse_from_str(s, "%b %d, %Y") .or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y")) .map_err(|_| anyhow::anyhow!("Bad date: {s}")) }