fetching 5min data only for the last 60 days

This commit is contained in:
2025-11-23 21:43:53 +01:00
parent 462f7ca672
commit 7b680f960f
4 changed files with 192 additions and 46 deletions

View File

@@ -15,7 +15,7 @@ impl Default for Config {
fn default() -> Self {
Self {
economic_start_date: "2007-02-13".to_string(),
corporate_start_date: "2007-01-01".to_string(),
corporate_start_date: "2010-01-01".to_string(),
economic_lookahead_months: 3,
}
}
@@ -31,8 +31,21 @@ impl Config {
pub fn get_tickers() -> Vec<String> {
vec![
"AAPL", "MSFT", "NVDA", "GOOGL", "AMZN",
"TSLA", "META", "JPM", "V", "WMT",
// ... your 100500 tickers here
].into_iter().map(String::from).collect()
"JPM".to_string(), // XNYS
"MSFT".to_string(), // XNAS
"601398.SS".to_string(),// XSHG
"7203.T".to_string(), // XJPX
"0700.HK".to_string(), // XHKG
"ASML.AS".to_string(), // XAMS
"RELIANCE.BO".to_string(), // XBSE
"RELIANCE.NS".to_string(), // XNSE
"000001.SZ".to_string(),// XSHE
"SHOP.TO".to_string(), // XTSE
"AZN.L".to_string(), // XLON
"2330.TW".to_string(), // XTAI
"2222.SR".to_string(), // XSAU (note: uses .SR suffix)
"SAP.DE".to_string(), // XFRA
"NESN.SW".to_string(), // XSWX
"CSL.AX".to_string(), // XASX
]
}

View File

@@ -2,9 +2,11 @@
use super::types::{CompanyEvent, CompanyPrice};
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
use chrono::{NaiveDate};
use tokio::time::{sleep, Duration};
use yfinance_rs::{YfClient, Ticker, Range, Interval};
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration as TokioDuration};
use reqwest::Client as HttpClient;
use serde_json::Value;
use yfinance_rs::{YfClient, Ticker, Range, Interval, HistoryBuilder};
use yfinance_rs::core::conversions::money_to_f64;
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
@@ -26,22 +28,25 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
if done {
break;
}
sleep(Duration::from_millis(500)).await;
sleep(TokioDuration::from_millis(500)).await;
}
Ok(())
}
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
// Navigate to Yahoo Earnings Calendar for the ticker
// offset=0&size=100 to get up to 100 entries
// offset up to 99 loading older entries if needed
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
client.goto(&url).await?;
dismiss_yahoo_consent(client).await?;
// Load all by clicking "Show More" if present
// Load all by clicking "Show More" if present (unchanged)
loop {
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
Ok(btn) => {
btn.click().await?;
sleep(Duration::from_secs(2)).await;
sleep(TokioDuration::from_secs(2)).await;
}
Err(_) => break,
}
@@ -56,9 +61,9 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
.collect();
if cols.len() < 4 { continue; }
if cols.len() < 6 { continue; } // Updated to match current 6-column structure
let full_date = &cols[0];
let full_date = &cols[2]; // Now Earnings Date
let parts: Vec<&str> = full_date.split(" at ").collect();
let raw_date = parts[0].trim();
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
@@ -68,8 +73,8 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Err(_) => continue,
};
let eps_forecast = parse_float(&cols[1]);
let eps_actual = if cols[2] == "-" { None } else { parse_float(&cols[2]) };
let eps_forecast = parse_float(&cols[3]); // EPS Estimate
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; // Reported EPS
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
@@ -100,46 +105,154 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Ok(events)
}
pub async fn fetch_price_history(
// Helper: Yahoo returns prices as strings like "$123.45" or null
fn parse_price(v: Option<&Value>) -> f64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
.or_else(|| v.and_then(|x| x.as_f64()))
.unwrap_or(0.0)
}
fn parse_volume(v: Option<&Value>) -> u64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace(',', "").parse::<u64>().ok())
.or_else(|| v.and_then(|x| x.as_u64()))
.unwrap_or(0)
}
pub async fn fetch_daily_price_history(
ticker: &str,
start: &str,
end: &str,
start_str: &str,
end_str: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let client = YfClient::default();
let tk = Ticker::new(&client, ticker);
let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1); // inclusive
// We request the maximum range the library will automatically respect Yahoo's limits
let history = tk
.history(Some(Range::Max), Some(Interval::D1), true)
.await
.map_err(|e| anyhow::anyhow!("Yahoo Finance API error for {ticker}: {e:?}"))?;
let mut all_prices = Vec::new();
let mut current = start;
let mut prices = Vec::with_capacity(history.len());
while current < end {
let chunk_end = current + Duration::days(730); // 2-year chunks = safe
let actual_end = chunk_end.min(end);
for candle in history {
let date_str = candle.ts.format("%Y-%m-%d").to_string();
let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
// Filter by user-defined start / end
if date_str < (*start).to_string() || date_str > (*end).to_string() {
continue;
println!(" Fetching {ticker} {}{}", current, actual_end - Duration::days(1));
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let opens = quote["open"].as_array();
let highs = quote["high"].as_array();
let lows = quote["low"].as_array();
let closes = quote["close"].as_array();
let adj_closes = result["meta"]["adjClose"].as_array().or_else(|| quote["close"].as_array()); // fallback
let volumes = quote["volume"].as_array();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
if date_str < start_str.to_string() || date_str > end_str.to_string() {
continue;
}
let open = parse_price(opens.and_then(|a| a.get(i)));
let high = parse_price(highs.and_then(|a| a.get(i)));
let low = parse_price(lows.and_then(|a| a.get(i)));
let close = parse_price(closes.and_then(|a| a.get(i)));
let adj_close = parse_price(adj_closes.and_then(|a| a.get(i)));
let volume = parse_volume(volumes.and_then(|a| a.get(i)));
all_prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
open,
high,
low,
close,
adj_close,
volume,
});
}
sleep(TokioDuration::from_millis(200));
current = actual_end;
}
all_prices.sort_by_key(|p| p.date.clone());
all_prices.dedup_by_key(|p| p.date.clone());
println!(" Got {} daily bars for {ticker}", all_prices.len());
Ok(all_prices)
}
pub async fn fetch_price_history_5min(
ticker: &str,
_start: &str,
_end: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let now = Utc::now().timestamp();
let period1 = now - 5184000; // 60 days ago
let period2 = now;
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let mut prices = Vec::new();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i)));
let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i)));
let low = parse_price(quote["low"].as_array().and_then(|a| a.get(i)));
let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i)));
let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i)));
prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
open: money_to_f64(&candle.open),
high: money_to_f64(&candle.high),
low: money_to_f64(&candle.low),
// close_unadj is the raw (non-adjusted) close; close is the adjusted one
close: money_to_f64(&candle.close_unadj.unwrap_or(candle.close.clone())),
adj_close: money_to_f64(&candle.close),
volume: candle.volume.unwrap_or(0),
open,
high,
low,
close,
adj_close: close, // intraday usually not adjusted
volume,
});
}
// Sort just in case (normally already sorted)
prices.sort_by_key(|p| p.date.clone());
Ok(prices)
}

View File

@@ -2,7 +2,7 @@
use super::types::{CompanyEvent, CompanyPrice, CompanyEventChange};
use super::helpers::*;
use tokio::fs;
use chrono::{Local, NaiveDate, Datelike};
use chrono::{Datelike, NaiveDate};
use std::collections::HashMap;
pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
@@ -86,10 +86,10 @@ pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()>
Ok(())
}
pub async fn save_prices_for_ticker(ticker: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
let dir = std::path::Path::new("corporate_prices");
fs::create_dir_all(dir).await?;
let path = dir.join(format!("{}.json", ticker));
let path = dir.join(format!("{}_{}.json", ticker.replace(".", "_"), timeframe));
prices.sort_by_key(|p| p.date.clone());

View File

@@ -1,6 +1,7 @@
// src/corporate/update.rs
use super::{scraper::*, storage::*, helpers::*, types::*};
use crate::config::Config;
use yfinance_rs::{Range, Interval};
use chrono::Local;
use std::collections::HashMap;
@@ -21,8 +22,27 @@ pub async fn run_full_update(client: &fantoccini::Client, tickers: Vec<String>,
println!("{} earnings, {} changes", new_events.len(), result.changes.len());
}
if let Ok(prices) = fetch_price_history(ticker, &config.corporate_start_date, &today).await {
save_prices_for_ticker(ticker, prices).await?;
// DAILY full history
if let Ok(prices) = fetch_daily_price_history(ticker, &config.corporate_start_date, &today).await {
save_prices_for_ticker(ticker, "daily", prices).await?;
}
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
// 5-MINUTE only last 60 days (Yahoo limit for intraday)
let sixty_days_ago = (chrono::Local::now() - chrono::Duration::days(60))
.format("%Y-%m-%d")
.to_string();
if let Ok(prices) = fetch_price_history_5min(ticker, &sixty_days_ago, &today).await {
if !prices.is_empty() {
save_prices_for_ticker(ticker, "5min", prices.clone()).await?;
println!(" Saved {} 5min bars for {ticker}", prices.len());
} else {
println!(" No 5min data available for {ticker} (market closed? retry later)");
}
} else {
println!(" 5min fetch failed for {ticker} (rate limit? try again)");
}
tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;