storing data for multiple exchanges for a single isin

This commit is contained in:
2025-11-24 18:33:13 +01:00
parent 9cfcae84ea
commit bbc19f2110
6 changed files with 473 additions and 106 deletions

View File

@@ -1,16 +1,173 @@
// src/corporate/scraper.rs
use super::types::{CompanyEvent, CompanyPrice};
use super::types::{CompanyEvent, CompanyPrice, TickerInfo};
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration as TokioDuration};
use reqwest::Client as HttpClient;
use serde_json::Value;
//use yfinance_rs::{YfClient, Ticker, Range, Interval, HistoryBuilder};
//use yfinance_rs::core::conversions::money_to_f64;
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
/// Discover all exchanges where this ISIN trades by querying Yahoo Finance
pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> anyhow::Result<Vec<TickerInfo>> {
println!(" Discovering exchanges for ISIN {}", isin);
let mut discovered_tickers = Vec::new();
// Try the primary ticker first
if let Ok(info) = check_ticker_exists(known_ticker).await {
discovered_tickers.push(info);
}
// Search for ISIN directly on Yahoo to find other listings
let search_url = format!(
"https://query2.finance.yahoo.com/v1/finance/search?q={}&quotesCount=20&newsCount=0",
isin
);
match HttpClient::new()
.get(&search_url)
.header("User-Agent", USER_AGENT)
.send()
.await
{
Ok(resp) => {
if let Ok(json) = resp.json::<Value>().await {
if let Some(quotes) = json["quotes"].as_array() {
for quote in quotes {
if let Some(symbol) = quote["symbol"].as_str() {
// Skip if already found
if discovered_tickers.iter().any(|t| t.ticker == symbol) {
continue;
}
// Validate this ticker actually works
if let Ok(info) = check_ticker_exists(symbol).await {
discovered_tickers.push(info);
}
sleep(TokioDuration::from_millis(100)).await;
}
}
}
}
}
Err(e) => println!(" Search API error: {}", e),
}
// Also try common exchange suffixes for the base ticker
if let Some(base) = known_ticker.split('.').next() {
let suffixes = vec![
"", // US
".L", // London
".DE", // Frankfurt/XETRA
".PA", // Paris
".AS", // Amsterdam
".MI", // Milan
".SW", // Switzerland
".T", // Tokyo
".HK", // Hong Kong
".SS", // Shanghai
".SZ", // Shenzhen
".TO", // Toronto
".AX", // Australia
".SA", // Brazil
".MC", // Madrid
".BO", // Bombay
".NS", // National Stock Exchange India
];
for suffix in suffixes {
let test_ticker = format!("{}{}", base, suffix);
// Skip if already found
if discovered_tickers.iter().any(|t| t.ticker == test_ticker) {
continue;
}
if let Ok(info) = check_ticker_exists(&test_ticker).await {
discovered_tickers.push(info);
sleep(TokioDuration::from_millis(100)).await;
}
}
}
println!(" Found {} tradable exchanges", discovered_tickers.len());
Ok(discovered_tickers)
}
/// Check if a ticker exists and get its exchange/currency info
async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d",
ticker
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", USER_AGENT)
.timeout(std::time::Duration::from_secs(5))
.send()
.await?
.json()
.await?;
// Check if we got valid data
let result = &json["chart"]["result"];
if result.is_null() || result.as_array().map(|a| a.is_empty()).unwrap_or(true) {
return Err(anyhow::anyhow!("No data for ticker {}", ticker));
}
let meta = &result[0]["meta"];
let exchange_name = meta["exchangeName"].as_str().unwrap_or("UNKNOWN");
let exchange_mic = exchange_name_to_mic(exchange_name);
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
// Check if this ticker has actual price data
let has_data = meta["regularMarketPrice"].is_number()
|| result[0]["timestamp"].as_array().map(|a| !a.is_empty()).unwrap_or(false);
if !has_data {
return Err(anyhow::anyhow!("Ticker {} exists but has no price data", ticker));
}
Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic,
currency,
primary: false, // Will be set separately
})
}
/// Convert Yahoo's exchange name to MIC code (best effort)
fn exchange_name_to_mic(name: &str) -> String {
match name {
"NMS" | "NasdaqGS" | "NASDAQ" => "XNAS",
"NYQ" | "NYSE" => "XNYS",
"LSE" | "London" => "XLON",
"FRA" | "Frankfurt" | "GER" | "XETRA" => "XFRA",
"PAR" | "Paris" => "XPAR",
"AMS" | "Amsterdam" => "XAMS",
"MIL" | "Milan" => "XMIL",
"JPX" | "Tokyo" => "XJPX",
"HKG" | "Hong Kong" => "XHKG",
"SHH" | "Shanghai" => "XSHG",
"SHZ" | "Shenzhen" => "XSHE",
"TOR" | "Toronto" => "XTSE",
"ASX" | "Australia" => "XASX",
"SAU" | "Saudi" => "XSAU",
"SWX" | "Switzerland" => "XSWX",
"BSE" | "Bombay" => "XBSE",
"NSE" | "NSI" => "XNSE",
"TAI" | "Taiwan" => "XTAI",
"SAO" | "Sao Paulo" => "BVMF",
"MCE" | "Madrid" => "XMAD",
_ => name, // Fallback to name itself
}.to_string()
}
pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
let script = r#"
(() => {
@@ -34,14 +191,10 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
}
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
// Navigate to Yahoo Earnings Calendar for the ticker
// offset=0&size=100 to get up to 100 entries
// offset up to 99 loading older entries if needed
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
client.goto(&url).await?;
dismiss_yahoo_consent(client).await?;
// Load all by clicking "Show More" if present (unchanged)
loop {
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
Ok(btn) => {
@@ -61,9 +214,9 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
.collect();
if cols.len() < 6 { continue; } // Updated to match current 6-column structure
if cols.len() < 6 { continue; }
let full_date = &cols[2]; // Now Earnings Date
let full_date = &cols[2];
let parts: Vec<&str> = full_date.split(" at ").collect();
let raw_date = parts[0].trim();
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
@@ -73,8 +226,8 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Err(_) => continue,
};
let eps_forecast = parse_float(&cols[3]); // EPS Estimate
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; // Reported EPS
let eps_forecast = parse_float(&cols[3]);
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) };
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
@@ -105,7 +258,6 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Ok(events)
}
// Helper: Yahoo returns prices as strings like "$123.45" or null
fn parse_price(v: Option<&Value>) -> f64 {
v.and_then(|x| x.as_str())
.and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
@@ -126,13 +278,13 @@ pub async fn fetch_daily_price_history(
end_str: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1); // inclusive
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1);
let mut all_prices = Vec::new();
let mut current = start;
while current < end {
let chunk_end = current + Duration::days(730); // 2-year chunks = safe
let chunk_end = current + Duration::days(730);
let actual_end = chunk_end.min(end);
let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
@@ -146,7 +298,7 @@ pub async fn fetch_daily_price_history(
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.header("User-Agent", USER_AGENT)
.send()
.await?
.json()
@@ -155,12 +307,15 @@ pub async fn fetch_daily_price_history(
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let meta = &result["meta"];
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
let opens = quote["open"].as_array();
let highs = quote["high"].as_array();
let lows = quote["low"].as_array();
let closes = quote["close"].as_array();
let adj_closes = result["meta"]["adjClose"].as_array().or_else(|| quote["close"].as_array()); // fallback
let adj_closes = result["indicators"]["adjclose"][0]["adjclose"].as_array()
.or_else(|| closes);
let volumes = quote["volume"].as_array();
for (i, ts_val) in timestamps.iter().enumerate() {
@@ -182,14 +337,14 @@ pub async fn fetch_daily_price_history(
all_prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
time: "".to_string(), // Empty for daily
time: "".to_string(),
open,
high,
low,
close,
adj_close,
volume,
currency: "USD".to_string(), // Assuming USD for now
currency: currency.clone(),
});
}
@@ -209,8 +364,8 @@ pub async fn fetch_price_history_5min(
_start: &str,
_end: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let now = Utc::now().timestamp();
let period1 = now - 5184000; // 60 days ago
let now = Utc::now().timestamp();
let period1 = now - 5184000;
let period2 = now;
let url = format!(
@@ -219,7 +374,7 @@ let now = Utc::now().timestamp();
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.header("User-Agent", USER_AGENT)
.send()
.await?
.json()
@@ -228,6 +383,8 @@ let now = Utc::now().timestamp();
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let meta = &result["meta"];
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
let mut prices = Vec::new();
@@ -246,14 +403,14 @@ let now = Utc::now().timestamp();
prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
time: time_str, // Full time for 5min intraday
time: time_str,
open,
high,
low,
close,
adj_close: close, // intraday usually not adjusted
adj_close: close,
volume,
currency: "USD".to_string(), // Assuming USD for now
currency: currency.clone(),
});
}