diff --git a/src/corporate/scraper.rs b/src/corporate/scraper.rs index f7474c5..f7c23f7 100644 --- a/src/corporate/scraper.rs +++ b/src/corporate/scraper.rs @@ -1,47 +1,81 @@ // src/corporate/scraper.rs use super::types::{CompanyEvent, CompanyPrice}; -use reqwest::Client; +use fantoccini::{Client, Locator}; use scraper::{Html, Selector}; use chrono::{NaiveDate, Datelike}; +use tokio::time::{sleep, Duration}; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; -pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result> { - let client = Client::new(); - let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings"); +pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> { + let script = r#" + (() => { + const agree = document.querySelector('button[name="agree"]'); + if (agree) { + agree.click(); + return true; + } + return false; + })() + "#; - let text = client - .get(&url) - .header("User-Agent", USER_AGENT) - .send() - .await? - .text() - .await?; + for _ in 0..10 { + let done: bool = client.execute(script, vec![]).await?.as_bool().unwrap_or(false); + if done { + break; + } + sleep(Duration::from_millis(500)).await; + } + Ok(()) +} - let document = Html::parse_document(&text); - let row_sel = Selector::parse(r#"table tbody tr"#).unwrap(); +pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result> { + let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker); + client.goto(&url).await?; + dismiss_yahoo_consent(client).await?; + + // Load all by clicking "Show More" if present + loop { + match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await { + Ok(btn) => { + btn.click().await?; + sleep(Duration::from_secs(2)).await; + } + Err(_) => break, + } + } + + let html = client.source().await?; + let document = Html::parse_document(&html); + let row_sel = Selector::parse("table tbody tr").unwrap(); let mut events = Vec::new(); for row in document.select(&row_sel) { - let cols: Vec = row.text().map(|s| s.trim().to_string()).collect(); - if cols.len() < 4 { continue; } + let cols: Vec = row.select(&Selector::parse("td").unwrap()) + .map(|td| td.text().collect::>().join(" ").trim().to_string()) + .collect(); + if cols.len() < 6 { continue; } + + let full_date = &cols[2]; + let parts: Vec<&str> = full_date.split(" at ").collect(); + let raw_date = parts[0].trim(); + let time_str = if parts.len() > 1 { parts[1].trim() } else { "" }; - let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]); let date = match parse_yahoo_date(raw_date) { Ok(d) => d, Err(_) => continue, }; - let eps_forecast = parse_float(&cols[1]); - let eps_actual = parse_float(&cols[2]); + let eps_forecast = parse_float(&cols[3]); + let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) { if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None } } else { None }; - let time = if cols[0].contains("After") || cols[0].contains("AMC") { + let time = if time_str.contains("PM") { "AMC".to_string() - } else if cols[0].contains("Before") || cols[0].contains("BMO") { + } else if time_str.contains("AM") { "BMO".to_string() } else { "".to_string() @@ -51,7 +85,7 @@ pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result anyhow::Result anyhow::Result> { - let client = Client::new(); +pub async fn fetch_price_history(client: &Client, ticker: &str, start: &str, end: &str) -> anyhow::Result> { let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")? .and_hms_opt(0, 0, 0).unwrap().and_utc() .timestamp(); @@ -79,13 +112,8 @@ pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow "https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true" ); - let csv = client - .get(&url) - .header("User-Agent", USER_AGENT) - .send() - .await? - .text() - .await?; + client.goto(&url).await?; + let csv = client.source().await?; let mut prices = Vec::new(); for line in csv.lines().skip(1) { @@ -110,7 +138,7 @@ fn parse_float(s: &str) -> Option { } fn parse_yahoo_date(s: &str) -> anyhow::Result { - NaiveDate::parse_from_str(s, "%b %d, %Y") - .or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y")) + NaiveDate::parse_from_str(s, "%B %d, %Y") + .or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y")) .map_err(|_| anyhow::anyhow!("Bad date: {s}")) } \ No newline at end of file diff --git a/src/corporate/update.rs b/src/corporate/update.rs index 1f1655b..c10fabd 100644 --- a/src/corporate/update.rs +++ b/src/corporate/update.rs @@ -6,7 +6,7 @@ use chrono::Local; use std::collections::{HashMap, HashSet}; -pub async fn run_full_update(tickers: Vec, config: &Config) -> anyhow::Result<()> { +pub async fn run_full_update(client: &fantoccini::Client, tickers: Vec, config: &Config) -> anyhow::Result<()> { println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date); let today = chrono::Local::now().format("%Y-%m-%d").to_string(); @@ -15,13 +15,13 @@ pub async fn run_full_update(tickers: Vec, config: &Config) -> anyhow::R print!(" → {:6} ", ticker); // Earnings - if let Ok(events) = fetch_earnings_history(&ticker).await { + if let Ok(events) = fetch_earnings_history(client, &ticker).await { merge_and_save_events(&ticker, events.clone()).await?; println!("{} earnings", events.len()); } // Prices – now using config.corporate_start_date - if let Ok(prices) = fetch_price_history(&ticker, &config.corporate_start_date, &today).await { + if let Ok(prices) = fetch_price_history(client, &ticker, &config.corporate_start_date, &today).await { save_prices_for_ticker(&ticker, prices).await?; } diff --git a/src/main.rs b/src/main.rs index 057ee06..3977876 100644 --- a/src/main.rs +++ b/src/main.rs @@ -40,7 +40,7 @@ async fn main() -> anyhow::Result<()> { // === Corporate Earnings Update === println!("\nUpdating Corporate Earnings"); let tickers = config::get_tickers(); - corporate::run_full_update(tickers, &config).await?; + corporate::run_full_update(&client, tickers, &config).await?; // === Cleanup === client.close().await?;