fetching 5min data only for the last 60 days

This commit is contained in:
2025-11-23 21:43:53 +01:00
parent 462f7ca672
commit 7b680f960f
4 changed files with 192 additions and 46 deletions

View File

@@ -15,7 +15,7 @@ impl Default for Config {
fn default() -> Self { fn default() -> Self {
Self { Self {
economic_start_date: "2007-02-13".to_string(), economic_start_date: "2007-02-13".to_string(),
corporate_start_date: "2007-01-01".to_string(), corporate_start_date: "2010-01-01".to_string(),
economic_lookahead_months: 3, economic_lookahead_months: 3,
} }
} }
@@ -31,8 +31,21 @@ impl Config {
pub fn get_tickers() -> Vec<String> { pub fn get_tickers() -> Vec<String> {
vec![ vec![
"AAPL", "MSFT", "NVDA", "GOOGL", "AMZN", "JPM".to_string(), // XNYS
"TSLA", "META", "JPM", "V", "WMT", "MSFT".to_string(), // XNAS
// ... your 100500 tickers here "601398.SS".to_string(),// XSHG
].into_iter().map(String::from).collect() "7203.T".to_string(), // XJPX
"0700.HK".to_string(), // XHKG
"ASML.AS".to_string(), // XAMS
"RELIANCE.BO".to_string(), // XBSE
"RELIANCE.NS".to_string(), // XNSE
"000001.SZ".to_string(),// XSHE
"SHOP.TO".to_string(), // XTSE
"AZN.L".to_string(), // XLON
"2330.TW".to_string(), // XTAI
"2222.SR".to_string(), // XSAU (note: uses .SR suffix)
"SAP.DE".to_string(), // XFRA
"NESN.SW".to_string(), // XSWX
"CSL.AX".to_string(), // XASX
]
} }

View File

@@ -2,9 +2,11 @@
use super::types::{CompanyEvent, CompanyPrice}; use super::types::{CompanyEvent, CompanyPrice};
use fantoccini::{Client, Locator}; use fantoccini::{Client, Locator};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use chrono::{NaiveDate}; use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration}; use tokio::time::{sleep, Duration as TokioDuration};
use yfinance_rs::{YfClient, Ticker, Range, Interval}; use reqwest::Client as HttpClient;
use serde_json::Value;
use yfinance_rs::{YfClient, Ticker, Range, Interval, HistoryBuilder};
use yfinance_rs::core::conversions::money_to_f64; use yfinance_rs::core::conversions::money_to_f64;
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
@@ -26,22 +28,25 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
if done { if done {
break; break;
} }
sleep(Duration::from_millis(500)).await; sleep(TokioDuration::from_millis(500)).await;
} }
Ok(()) Ok(())
} }
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> { pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker); // Navigate to Yahoo Earnings Calendar for the ticker
// offset=0&size=100 to get up to 100 entries
// offset up to 99 loading older entries if needed
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
client.goto(&url).await?; client.goto(&url).await?;
dismiss_yahoo_consent(client).await?; dismiss_yahoo_consent(client).await?;
// Load all by clicking "Show More" if present // Load all by clicking "Show More" if present (unchanged)
loop { loop {
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await { match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
Ok(btn) => { Ok(btn) => {
btn.click().await?; btn.click().await?;
sleep(Duration::from_secs(2)).await; sleep(TokioDuration::from_secs(2)).await;
} }
Err(_) => break, Err(_) => break,
} }
@@ -56,9 +61,9 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap()) let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string()) .map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
.collect(); .collect();
if cols.len() < 4 { continue; } if cols.len() < 6 { continue; } // Updated to match current 6-column structure
let full_date = &cols[0]; let full_date = &cols[2]; // Now Earnings Date
let parts: Vec<&str> = full_date.split(" at ").collect(); let parts: Vec<&str> = full_date.split(" at ").collect();
let raw_date = parts[0].trim(); let raw_date = parts[0].trim();
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" }; let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
@@ -68,8 +73,8 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Err(_) => continue, Err(_) => continue,
}; };
let eps_forecast = parse_float(&cols[1]); let eps_forecast = parse_float(&cols[3]); // EPS Estimate
let eps_actual = if cols[2] == "-" { None } else { parse_float(&cols[2]) }; let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; // Reported EPS
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) { let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None } if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
@@ -100,46 +105,154 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Ok(events) Ok(events)
} }
pub async fn fetch_price_history( // Helper: Yahoo returns prices as strings like "$123.45" or null
fn parse_price(v: Option<&Value>) -> f64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
.or_else(|| v.and_then(|x| x.as_f64()))
.unwrap_or(0.0)
}
fn parse_volume(v: Option<&Value>) -> u64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace(',', "").parse::<u64>().ok())
.or_else(|| v.and_then(|x| x.as_u64()))
.unwrap_or(0)
}
pub async fn fetch_daily_price_history(
ticker: &str, ticker: &str,
start: &str, start_str: &str,
end: &str, end_str: &str,
) -> anyhow::Result<Vec<CompanyPrice>> { ) -> anyhow::Result<Vec<CompanyPrice>> {
let client = YfClient::default(); let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
let tk = Ticker::new(&client, ticker); let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1); // inclusive
// We request the maximum range the library will automatically respect Yahoo's limits let mut all_prices = Vec::new();
let history = tk let mut current = start;
.history(Some(Range::Max), Some(Interval::D1), true)
.await
.map_err(|e| anyhow::anyhow!("Yahoo Finance API error for {ticker}: {e:?}"))?;
let mut prices = Vec::with_capacity(history.len()); while current < end {
let chunk_end = current + Duration::days(730); // 2-year chunks = safe
let actual_end = chunk_end.min(end);
for candle in history { let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let date_str = candle.ts.format("%Y-%m-%d").to_string(); let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
// Filter by user-defined start / end println!(" Fetching {ticker} {}{}", current, actual_end - Duration::days(1));
if date_str < (*start).to_string() || date_str > (*end).to_string() {
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let opens = quote["open"].as_array();
let highs = quote["high"].as_array();
let lows = quote["low"].as_array();
let closes = quote["close"].as_array();
let adj_closes = result["meta"]["adjClose"].as_array().or_else(|| quote["close"].as_array()); // fallback
let volumes = quote["volume"].as_array();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
if date_str < start_str.to_string() || date_str > end_str.to_string() {
continue; continue;
} }
let open = parse_price(opens.and_then(|a| a.get(i)));
let high = parse_price(highs.and_then(|a| a.get(i)));
let low = parse_price(lows.and_then(|a| a.get(i)));
let close = parse_price(closes.and_then(|a| a.get(i)));
let adj_close = parse_price(adj_closes.and_then(|a| a.get(i)));
let volume = parse_volume(volumes.and_then(|a| a.get(i)));
all_prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
open,
high,
low,
close,
adj_close,
volume,
});
}
sleep(TokioDuration::from_millis(200));
current = actual_end;
}
all_prices.sort_by_key(|p| p.date.clone());
all_prices.dedup_by_key(|p| p.date.clone());
println!(" Got {} daily bars for {ticker}", all_prices.len());
Ok(all_prices)
}
pub async fn fetch_price_history_5min(
ticker: &str,
_start: &str,
_end: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let now = Utc::now().timestamp();
let period1 = now - 5184000; // 60 days ago
let period2 = now;
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let mut prices = Vec::new();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i)));
let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i)));
let low = parse_price(quote["low"].as_array().and_then(|a| a.get(i)));
let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i)));
let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i)));
prices.push(CompanyPrice { prices.push(CompanyPrice {
ticker: ticker.to_string(), ticker: ticker.to_string(),
date: date_str, date: date_str,
open: money_to_f64(&candle.open), open,
high: money_to_f64(&candle.high), high,
low: money_to_f64(&candle.low), low,
// close_unadj is the raw (non-adjusted) close; close is the adjusted one close,
close: money_to_f64(&candle.close_unadj.unwrap_or(candle.close.clone())), adj_close: close, // intraday usually not adjusted
adj_close: money_to_f64(&candle.close), volume,
volume: candle.volume.unwrap_or(0),
}); });
} }
// Sort just in case (normally already sorted)
prices.sort_by_key(|p| p.date.clone()); prices.sort_by_key(|p| p.date.clone());
Ok(prices) Ok(prices)
} }

View File

@@ -2,7 +2,7 @@
use super::types::{CompanyEvent, CompanyPrice, CompanyEventChange}; use super::types::{CompanyEvent, CompanyPrice, CompanyEventChange};
use super::helpers::*; use super::helpers::*;
use tokio::fs; use tokio::fs;
use chrono::{Local, NaiveDate, Datelike}; use chrono::{Datelike, NaiveDate};
use std::collections::HashMap; use std::collections::HashMap;
pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> { pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
@@ -86,10 +86,10 @@ pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()>
Ok(()) Ok(())
} }
pub async fn save_prices_for_ticker(ticker: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> { pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
let dir = std::path::Path::new("corporate_prices"); let dir = std::path::Path::new("corporate_prices");
fs::create_dir_all(dir).await?; fs::create_dir_all(dir).await?;
let path = dir.join(format!("{}.json", ticker)); let path = dir.join(format!("{}_{}.json", ticker.replace(".", "_"), timeframe));
prices.sort_by_key(|p| p.date.clone()); prices.sort_by_key(|p| p.date.clone());

View File

@@ -1,6 +1,7 @@
// src/corporate/update.rs // src/corporate/update.rs
use super::{scraper::*, storage::*, helpers::*, types::*}; use super::{scraper::*, storage::*, helpers::*, types::*};
use crate::config::Config; use crate::config::Config;
use yfinance_rs::{Range, Interval};
use chrono::Local; use chrono::Local;
use std::collections::HashMap; use std::collections::HashMap;
@@ -21,8 +22,27 @@ pub async fn run_full_update(client: &fantoccini::Client, tickers: Vec<String>,
println!("{} earnings, {} changes", new_events.len(), result.changes.len()); println!("{} earnings, {} changes", new_events.len(), result.changes.len());
} }
if let Ok(prices) = fetch_price_history(ticker, &config.corporate_start_date, &today).await { // DAILY full history
save_prices_for_ticker(ticker, prices).await?; if let Ok(prices) = fetch_daily_price_history(ticker, &config.corporate_start_date, &today).await {
save_prices_for_ticker(ticker, "daily", prices).await?;
}
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
// 5-MINUTE only last 60 days (Yahoo limit for intraday)
let sixty_days_ago = (chrono::Local::now() - chrono::Duration::days(60))
.format("%Y-%m-%d")
.to_string();
if let Ok(prices) = fetch_price_history_5min(ticker, &sixty_days_ago, &today).await {
if !prices.is_empty() {
save_prices_for_ticker(ticker, "5min", prices.clone()).await?;
println!(" Saved {} 5min bars for {ticker}", prices.len());
} else {
println!(" No 5min data available for {ticker} (market closed? retry later)");
}
} else {
println!(" 5min fetch failed for {ticker} (rate limit? try again)");
} }
tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;