From 7b680f960f5724359cebc0d25c7cd11fd11a5c3e Mon Sep 17 00:00:00 2001 From: donpat1to Date: Sun, 23 Nov 2025 21:43:53 +0100 Subject: [PATCH] fetching 5min data only for the last 60 days --- src/config.rs | 23 +++-- src/corporate/scraper.rs | 185 +++++++++++++++++++++++++++++++-------- src/corporate/storage.rs | 6 +- src/corporate/update.rs | 24 ++++- 4 files changed, 192 insertions(+), 46 deletions(-) diff --git a/src/config.rs b/src/config.rs index cc0a2c1..dd6d8dd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -15,7 +15,7 @@ impl Default for Config { fn default() -> Self { Self { economic_start_date: "2007-02-13".to_string(), - corporate_start_date: "2007-01-01".to_string(), + corporate_start_date: "2010-01-01".to_string(), economic_lookahead_months: 3, } } @@ -31,8 +31,21 @@ impl Config { pub fn get_tickers() -> Vec { vec![ - "AAPL", "MSFT", "NVDA", "GOOGL", "AMZN", - "TSLA", "META", "JPM", "V", "WMT", - // ... your 100–500 tickers here - ].into_iter().map(String::from).collect() + "JPM".to_string(), // XNYS + "MSFT".to_string(), // XNAS + "601398.SS".to_string(),// XSHG + "7203.T".to_string(), // XJPX + "0700.HK".to_string(), // XHKG + "ASML.AS".to_string(), // XAMS + "RELIANCE.BO".to_string(), // XBSE + "RELIANCE.NS".to_string(), // XNSE + "000001.SZ".to_string(),// XSHE + "SHOP.TO".to_string(), // XTSE + "AZN.L".to_string(), // XLON + "2330.TW".to_string(), // XTAI + "2222.SR".to_string(), // XSAU (note: uses .SR suffix) + "SAP.DE".to_string(), // XFRA + "NESN.SW".to_string(), // XSWX + "CSL.AX".to_string(), // XASX + ] } \ No newline at end of file diff --git a/src/corporate/scraper.rs b/src/corporate/scraper.rs index 12cd53b..afc60e7 100644 --- a/src/corporate/scraper.rs +++ b/src/corporate/scraper.rs @@ -2,9 +2,11 @@ use super::types::{CompanyEvent, CompanyPrice}; use fantoccini::{Client, Locator}; use scraper::{Html, Selector}; -use chrono::{NaiveDate}; -use tokio::time::{sleep, Duration}; -use yfinance_rs::{YfClient, Ticker, Range, Interval}; +use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc}; +use tokio::time::{sleep, Duration as TokioDuration}; +use reqwest::Client as HttpClient; +use serde_json::Value; +use yfinance_rs::{YfClient, Ticker, Range, Interval, HistoryBuilder}; use yfinance_rs::core::conversions::money_to_f64; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; @@ -26,22 +28,25 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> { if done { break; } - sleep(Duration::from_millis(500)).await; + sleep(TokioDuration::from_millis(500)).await; } Ok(()) } pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result> { - let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker); + // Navigate to Yahoo Earnings Calendar for the ticker + // offset=0&size=100 to get up to 100 entries + // offset up to 99 loading older entries if needed + let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker); client.goto(&url).await?; dismiss_yahoo_consent(client).await?; - // Load all by clicking "Show More" if present + // Load all by clicking "Show More" if present (unchanged) loop { match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await { Ok(btn) => { btn.click().await?; - sleep(Duration::from_secs(2)).await; + sleep(TokioDuration::from_secs(2)).await; } Err(_) => break, } @@ -56,9 +61,9 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re let cols: Vec = row.select(&Selector::parse("td").unwrap()) .map(|td| td.text().collect::>().join(" ").trim().to_string()) .collect(); - if cols.len() < 4 { continue; } + if cols.len() < 6 { continue; } // Updated to match current 6-column structure - let full_date = &cols[0]; + let full_date = &cols[2]; // Now Earnings Date let parts: Vec<&str> = full_date.split(" at ").collect(); let raw_date = parts[0].trim(); let time_str = if parts.len() > 1 { parts[1].trim() } else { "" }; @@ -68,8 +73,8 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re Err(_) => continue, }; - let eps_forecast = parse_float(&cols[1]); - let eps_actual = if cols[2] == "-" { None } else { parse_float(&cols[2]) }; + let eps_forecast = parse_float(&cols[3]); // EPS Estimate + let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; // Reported EPS let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) { if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None } @@ -100,46 +105,154 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re Ok(events) } -pub async fn fetch_price_history( +// Helper: Yahoo returns prices as strings like "$123.45" or null +fn parse_price(v: Option<&Value>) -> f64 { + v.and_then(|x| x.as_str()) + .and_then(|s| s.replace('$', "").replace(',', "").parse::().ok()) + .or_else(|| v.and_then(|x| x.as_f64())) + .unwrap_or(0.0) +} + +fn parse_volume(v: Option<&Value>) -> u64 { + v.and_then(|x| x.as_str()) + .and_then(|s| s.replace(',', "").parse::().ok()) + .or_else(|| v.and_then(|x| x.as_u64())) + .unwrap_or(0) +} + +pub async fn fetch_daily_price_history( ticker: &str, - start: &str, - end: &str, + start_str: &str, + end_str: &str, ) -> anyhow::Result> { - let client = YfClient::default(); - let tk = Ticker::new(&client, ticker); + let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?; + let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1); // inclusive - // We request the maximum range – the library will automatically respect Yahoo's limits - let history = tk - .history(Some(Range::Max), Some(Interval::D1), true) - .await - .map_err(|e| anyhow::anyhow!("Yahoo Finance API error for {ticker}: {e:?}"))?; + let mut all_prices = Vec::new(); + let mut current = start; - let mut prices = Vec::with_capacity(history.len()); + while current < end { + let chunk_end = current + Duration::days(730); // 2-year chunks = safe + let actual_end = chunk_end.min(end); - for candle in history { - let date_str = candle.ts.format("%Y-%m-%d").to_string(); + let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp(); + let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp(); - // Filter by user-defined start / end - if date_str < (*start).to_string() || date_str > (*end).to_string() { - continue; + println!(" Fetching {ticker} {} → {}", current, actual_end - Duration::days(1)); + + let url = format!( + "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true" + ); + + let json: Value = HttpClient::new() + .get(&url) + .header("User-Agent", "Mozilla/5.0") + .send() + .await? + .json() + .await?; + + let result = &json["chart"]["result"][0]; + let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?; + let quote = &result["indicators"]["quote"][0]; + + let opens = quote["open"].as_array(); + let highs = quote["high"].as_array(); + let lows = quote["low"].as_array(); + let closes = quote["close"].as_array(); + let adj_closes = result["meta"]["adjClose"].as_array().or_else(|| quote["close"].as_array()); // fallback + let volumes = quote["volume"].as_array(); + + for (i, ts_val) in timestamps.iter().enumerate() { + let ts = ts_val.as_i64().unwrap_or(0); + let dt: DateTime = DateTime::from_timestamp(ts, 0).unwrap_or_default(); + let date_str = dt.format("%Y-%m-%d").to_string(); + + if date_str < start_str.to_string() || date_str > end_str.to_string() { + continue; + } + + let open = parse_price(opens.and_then(|a| a.get(i))); + let high = parse_price(highs.and_then(|a| a.get(i))); + let low = parse_price(lows.and_then(|a| a.get(i))); + let close = parse_price(closes.and_then(|a| a.get(i))); + let adj_close = parse_price(adj_closes.and_then(|a| a.get(i))); + let volume = parse_volume(volumes.and_then(|a| a.get(i))); + + all_prices.push(CompanyPrice { + ticker: ticker.to_string(), + date: date_str, + open, + high, + low, + close, + adj_close, + volume, + }); } + sleep(TokioDuration::from_millis(200)); + current = actual_end; + } + + all_prices.sort_by_key(|p| p.date.clone()); + all_prices.dedup_by_key(|p| p.date.clone()); + + println!(" Got {} daily bars for {ticker}", all_prices.len()); + Ok(all_prices) +} + +pub async fn fetch_price_history_5min( + ticker: &str, + _start: &str, + _end: &str, +) -> anyhow::Result> { +let now = Utc::now().timestamp(); + let period1 = now - 5184000; // 60 days ago + let period2 = now; + + let url = format!( + "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true" + ); + + let json: Value = HttpClient::new() + .get(&url) + .header("User-Agent", "Mozilla/5.0") + .send() + .await? + .json() + .await?; + + let result = &json["chart"]["result"][0]; + let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?; + let quote = &result["indicators"]["quote"][0]; + + let mut prices = Vec::new(); + + for (i, ts_val) in timestamps.iter().enumerate() { + let ts = ts_val.as_i64().unwrap_or(0); + let dt: DateTime = DateTime::from_timestamp(ts, 0).unwrap_or_default(); + let date_str = dt.format("%Y-%m-%d").to_string(); + + let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i))); + let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i))); + let low = parse_price(quote["low"].as_array().and_then(|a| a.get(i))); + let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i))); + let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i))); + prices.push(CompanyPrice { ticker: ticker.to_string(), date: date_str, - open: money_to_f64(&candle.open), - high: money_to_f64(&candle.high), - low: money_to_f64(&candle.low), - // close_unadj is the raw (non-adjusted) close; close is the adjusted one - close: money_to_f64(&candle.close_unadj.unwrap_or(candle.close.clone())), - adj_close: money_to_f64(&candle.close), - volume: candle.volume.unwrap_or(0), + open, + high, + low, + close, + adj_close: close, // intraday usually not adjusted + volume, }); } - // Sort just in case (normally already sorted) prices.sort_by_key(|p| p.date.clone()); - Ok(prices) } diff --git a/src/corporate/storage.rs b/src/corporate/storage.rs index 857be95..d4cce33 100644 --- a/src/corporate/storage.rs +++ b/src/corporate/storage.rs @@ -2,7 +2,7 @@ use super::types::{CompanyEvent, CompanyPrice, CompanyEventChange}; use super::helpers::*; use tokio::fs; -use chrono::{Local, NaiveDate, Datelike}; +use chrono::{Datelike, NaiveDate}; use std::collections::HashMap; pub async fn load_existing_events() -> anyhow::Result> { @@ -86,10 +86,10 @@ pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()> Ok(()) } -pub async fn save_prices_for_ticker(ticker: &str, mut prices: Vec) -> anyhow::Result<()> { +pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec) -> anyhow::Result<()> { let dir = std::path::Path::new("corporate_prices"); fs::create_dir_all(dir).await?; - let path = dir.join(format!("{}.json", ticker)); + let path = dir.join(format!("{}_{}.json", ticker.replace(".", "_"), timeframe)); prices.sort_by_key(|p| p.date.clone()); diff --git a/src/corporate/update.rs b/src/corporate/update.rs index ed13625..1505532 100644 --- a/src/corporate/update.rs +++ b/src/corporate/update.rs @@ -1,6 +1,7 @@ // src/corporate/update.rs use super::{scraper::*, storage::*, helpers::*, types::*}; use crate::config::Config; +use yfinance_rs::{Range, Interval}; use chrono::Local; use std::collections::HashMap; @@ -21,8 +22,27 @@ pub async fn run_full_update(client: &fantoccini::Client, tickers: Vec, println!("{} earnings, {} changes", new_events.len(), result.changes.len()); } - if let Ok(prices) = fetch_price_history(ticker, &config.corporate_start_date, &today).await { - save_prices_for_ticker(ticker, prices).await?; + // DAILY – full history + if let Ok(prices) = fetch_daily_price_history(ticker, &config.corporate_start_date, &today).await { + save_prices_for_ticker(ticker, "daily", prices).await?; + } + + tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; + + // 5-MINUTE – only last 60 days (Yahoo limit for intraday) + let sixty_days_ago = (chrono::Local::now() - chrono::Duration::days(60)) + .format("%Y-%m-%d") + .to_string(); + + if let Ok(prices) = fetch_price_history_5min(ticker, &sixty_days_ago, &today).await { + if !prices.is_empty() { + save_prices_for_ticker(ticker, "5min", prices.clone()).await?; + println!(" Saved {} 5min bars for {ticker}", prices.len()); + } else { + println!(" No 5min data available for {ticker} (market closed? retry later)"); + } + } else { + println!(" 5min fetch failed for {ticker} (rate limit? try again)"); } tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;