fetching 5min data only for the last 60 days

This commit is contained in:
2025-11-23 21:43:53 +01:00
parent 462f7ca672
commit 7b680f960f
4 changed files with 192 additions and 46 deletions

View File

@@ -2,9 +2,11 @@
use super::types::{CompanyEvent, CompanyPrice};
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
use chrono::{NaiveDate};
use tokio::time::{sleep, Duration};
use yfinance_rs::{YfClient, Ticker, Range, Interval};
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration as TokioDuration};
use reqwest::Client as HttpClient;
use serde_json::Value;
use yfinance_rs::{YfClient, Ticker, Range, Interval, HistoryBuilder};
use yfinance_rs::core::conversions::money_to_f64;
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
@@ -26,22 +28,25 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
if done {
break;
}
sleep(Duration::from_millis(500)).await;
sleep(TokioDuration::from_millis(500)).await;
}
Ok(())
}
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
// Navigate to Yahoo Earnings Calendar for the ticker
// offset=0&size=100 to get up to 100 entries
// offset up to 99 loading older entries if needed
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
client.goto(&url).await?;
dismiss_yahoo_consent(client).await?;
// Load all by clicking "Show More" if present
// Load all by clicking "Show More" if present (unchanged)
loop {
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
Ok(btn) => {
btn.click().await?;
sleep(Duration::from_secs(2)).await;
sleep(TokioDuration::from_secs(2)).await;
}
Err(_) => break,
}
@@ -56,9 +61,9 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
.collect();
if cols.len() < 4 { continue; }
if cols.len() < 6 { continue; } // Updated to match current 6-column structure
let full_date = &cols[0];
let full_date = &cols[2]; // Now Earnings Date
let parts: Vec<&str> = full_date.split(" at ").collect();
let raw_date = parts[0].trim();
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
@@ -68,8 +73,8 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Err(_) => continue,
};
let eps_forecast = parse_float(&cols[1]);
let eps_actual = if cols[2] == "-" { None } else { parse_float(&cols[2]) };
let eps_forecast = parse_float(&cols[3]); // EPS Estimate
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; // Reported EPS
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
@@ -100,46 +105,154 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Ok(events)
}
pub async fn fetch_price_history(
// Helper: Yahoo returns prices as strings like "$123.45" or null
fn parse_price(v: Option<&Value>) -> f64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
.or_else(|| v.and_then(|x| x.as_f64()))
.unwrap_or(0.0)
}
fn parse_volume(v: Option<&Value>) -> u64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace(',', "").parse::<u64>().ok())
.or_else(|| v.and_then(|x| x.as_u64()))
.unwrap_or(0)
}
pub async fn fetch_daily_price_history(
ticker: &str,
start: &str,
end: &str,
start_str: &str,
end_str: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let client = YfClient::default();
let tk = Ticker::new(&client, ticker);
let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1); // inclusive
// We request the maximum range the library will automatically respect Yahoo's limits
let history = tk
.history(Some(Range::Max), Some(Interval::D1), true)
.await
.map_err(|e| anyhow::anyhow!("Yahoo Finance API error for {ticker}: {e:?}"))?;
let mut all_prices = Vec::new();
let mut current = start;
let mut prices = Vec::with_capacity(history.len());
while current < end {
let chunk_end = current + Duration::days(730); // 2-year chunks = safe
let actual_end = chunk_end.min(end);
for candle in history {
let date_str = candle.ts.format("%Y-%m-%d").to_string();
let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
// Filter by user-defined start / end
if date_str < (*start).to_string() || date_str > (*end).to_string() {
continue;
println!(" Fetching {ticker} {}{}", current, actual_end - Duration::days(1));
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let opens = quote["open"].as_array();
let highs = quote["high"].as_array();
let lows = quote["low"].as_array();
let closes = quote["close"].as_array();
let adj_closes = result["meta"]["adjClose"].as_array().or_else(|| quote["close"].as_array()); // fallback
let volumes = quote["volume"].as_array();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
if date_str < start_str.to_string() || date_str > end_str.to_string() {
continue;
}
let open = parse_price(opens.and_then(|a| a.get(i)));
let high = parse_price(highs.and_then(|a| a.get(i)));
let low = parse_price(lows.and_then(|a| a.get(i)));
let close = parse_price(closes.and_then(|a| a.get(i)));
let adj_close = parse_price(adj_closes.and_then(|a| a.get(i)));
let volume = parse_volume(volumes.and_then(|a| a.get(i)));
all_prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
open,
high,
low,
close,
adj_close,
volume,
});
}
sleep(TokioDuration::from_millis(200));
current = actual_end;
}
all_prices.sort_by_key(|p| p.date.clone());
all_prices.dedup_by_key(|p| p.date.clone());
println!(" Got {} daily bars for {ticker}", all_prices.len());
Ok(all_prices)
}
pub async fn fetch_price_history_5min(
ticker: &str,
_start: &str,
_end: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let now = Utc::now().timestamp();
let period1 = now - 5184000; // 60 days ago
let period2 = now;
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let mut prices = Vec::new();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i)));
let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i)));
let low = parse_price(quote["low"].as_array().and_then(|a| a.get(i)));
let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i)));
let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i)));
prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
open: money_to_f64(&candle.open),
high: money_to_f64(&candle.high),
low: money_to_f64(&candle.low),
// close_unadj is the raw (non-adjusted) close; close is the adjusted one
close: money_to_f64(&candle.close_unadj.unwrap_or(candle.close.clone())),
adj_close: money_to_f64(&candle.close),
volume: candle.volume.unwrap_or(0),
open,
high,
low,
close,
adj_close: close, // intraday usually not adjusted
volume,
});
}
// Sort just in case (normally already sorted)
prices.sort_by_key(|p| p.date.clone());
Ok(prices)
}