storing data for multiple exchanges for a single isin
This commit is contained in:
@@ -1,16 +1,173 @@
|
||||
// src/corporate/scraper.rs
|
||||
use super::types::{CompanyEvent, CompanyPrice};
|
||||
use super::types::{CompanyEvent, CompanyPrice, TickerInfo};
|
||||
use fantoccini::{Client, Locator};
|
||||
use scraper::{Html, Selector};
|
||||
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
|
||||
use tokio::time::{sleep, Duration as TokioDuration};
|
||||
use reqwest::Client as HttpClient;
|
||||
use serde_json::Value;
|
||||
//use yfinance_rs::{YfClient, Ticker, Range, Interval, HistoryBuilder};
|
||||
//use yfinance_rs::core::conversions::money_to_f64;
|
||||
|
||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
||||
|
||||
/// Discover all exchanges where this ISIN trades by querying Yahoo Finance
|
||||
pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> anyhow::Result<Vec<TickerInfo>> {
|
||||
println!(" Discovering exchanges for ISIN {}", isin);
|
||||
|
||||
let mut discovered_tickers = Vec::new();
|
||||
|
||||
// Try the primary ticker first
|
||||
if let Ok(info) = check_ticker_exists(known_ticker).await {
|
||||
discovered_tickers.push(info);
|
||||
}
|
||||
|
||||
// Search for ISIN directly on Yahoo to find other listings
|
||||
let search_url = format!(
|
||||
"https://query2.finance.yahoo.com/v1/finance/search?q={}"esCount=20&newsCount=0",
|
||||
isin
|
||||
);
|
||||
|
||||
match HttpClient::new()
|
||||
.get(&search_url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(resp) => {
|
||||
if let Ok(json) = resp.json::<Value>().await {
|
||||
if let Some(quotes) = json["quotes"].as_array() {
|
||||
for quote in quotes {
|
||||
if let Some(symbol) = quote["symbol"].as_str() {
|
||||
// Skip if already found
|
||||
if discovered_tickers.iter().any(|t| t.ticker == symbol) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Validate this ticker actually works
|
||||
if let Ok(info) = check_ticker_exists(symbol).await {
|
||||
discovered_tickers.push(info);
|
||||
}
|
||||
|
||||
sleep(TokioDuration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => println!(" Search API error: {}", e),
|
||||
}
|
||||
|
||||
// Also try common exchange suffixes for the base ticker
|
||||
if let Some(base) = known_ticker.split('.').next() {
|
||||
let suffixes = vec![
|
||||
"", // US
|
||||
".L", // London
|
||||
".DE", // Frankfurt/XETRA
|
||||
".PA", // Paris
|
||||
".AS", // Amsterdam
|
||||
".MI", // Milan
|
||||
".SW", // Switzerland
|
||||
".T", // Tokyo
|
||||
".HK", // Hong Kong
|
||||
".SS", // Shanghai
|
||||
".SZ", // Shenzhen
|
||||
".TO", // Toronto
|
||||
".AX", // Australia
|
||||
".SA", // Brazil
|
||||
".MC", // Madrid
|
||||
".BO", // Bombay
|
||||
".NS", // National Stock Exchange India
|
||||
];
|
||||
|
||||
for suffix in suffixes {
|
||||
let test_ticker = format!("{}{}", base, suffix);
|
||||
|
||||
// Skip if already found
|
||||
if discovered_tickers.iter().any(|t| t.ticker == test_ticker) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(info) = check_ticker_exists(&test_ticker).await {
|
||||
discovered_tickers.push(info);
|
||||
sleep(TokioDuration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Found {} tradable exchanges", discovered_tickers.len());
|
||||
Ok(discovered_tickers)
|
||||
}
|
||||
|
||||
/// Check if a ticker exists and get its exchange/currency info
|
||||
async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
|
||||
let url = format!(
|
||||
"https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d",
|
||||
ticker
|
||||
);
|
||||
|
||||
let json: Value = HttpClient::new()
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.timeout(std::time::Duration::from_secs(5))
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
|
||||
// Check if we got valid data
|
||||
let result = &json["chart"]["result"];
|
||||
if result.is_null() || result.as_array().map(|a| a.is_empty()).unwrap_or(true) {
|
||||
return Err(anyhow::anyhow!("No data for ticker {}", ticker));
|
||||
}
|
||||
|
||||
let meta = &result[0]["meta"];
|
||||
|
||||
let exchange_name = meta["exchangeName"].as_str().unwrap_or("UNKNOWN");
|
||||
let exchange_mic = exchange_name_to_mic(exchange_name);
|
||||
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
|
||||
|
||||
// Check if this ticker has actual price data
|
||||
let has_data = meta["regularMarketPrice"].is_number()
|
||||
|| result[0]["timestamp"].as_array().map(|a| !a.is_empty()).unwrap_or(false);
|
||||
|
||||
if !has_data {
|
||||
return Err(anyhow::anyhow!("Ticker {} exists but has no price data", ticker));
|
||||
}
|
||||
|
||||
Ok(TickerInfo {
|
||||
ticker: ticker.to_string(),
|
||||
exchange_mic,
|
||||
currency,
|
||||
primary: false, // Will be set separately
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert Yahoo's exchange name to MIC code (best effort)
|
||||
fn exchange_name_to_mic(name: &str) -> String {
|
||||
match name {
|
||||
"NMS" | "NasdaqGS" | "NASDAQ" => "XNAS",
|
||||
"NYQ" | "NYSE" => "XNYS",
|
||||
"LSE" | "London" => "XLON",
|
||||
"FRA" | "Frankfurt" | "GER" | "XETRA" => "XFRA",
|
||||
"PAR" | "Paris" => "XPAR",
|
||||
"AMS" | "Amsterdam" => "XAMS",
|
||||
"MIL" | "Milan" => "XMIL",
|
||||
"JPX" | "Tokyo" => "XJPX",
|
||||
"HKG" | "Hong Kong" => "XHKG",
|
||||
"SHH" | "Shanghai" => "XSHG",
|
||||
"SHZ" | "Shenzhen" => "XSHE",
|
||||
"TOR" | "Toronto" => "XTSE",
|
||||
"ASX" | "Australia" => "XASX",
|
||||
"SAU" | "Saudi" => "XSAU",
|
||||
"SWX" | "Switzerland" => "XSWX",
|
||||
"BSE" | "Bombay" => "XBSE",
|
||||
"NSE" | "NSI" => "XNSE",
|
||||
"TAI" | "Taiwan" => "XTAI",
|
||||
"SAO" | "Sao Paulo" => "BVMF",
|
||||
"MCE" | "Madrid" => "XMAD",
|
||||
_ => name, // Fallback to name itself
|
||||
}.to_string()
|
||||
}
|
||||
|
||||
pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
|
||||
let script = r#"
|
||||
(() => {
|
||||
@@ -34,14 +191,10 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
// Navigate to Yahoo Earnings Calendar for the ticker
|
||||
// offset=0&size=100 to get up to 100 entries
|
||||
// offset up to 99 loading older entries if needed
|
||||
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
|
||||
client.goto(&url).await?;
|
||||
dismiss_yahoo_consent(client).await?;
|
||||
|
||||
// Load all by clicking "Show More" if present (unchanged)
|
||||
loop {
|
||||
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
|
||||
Ok(btn) => {
|
||||
@@ -61,9 +214,9 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
|
||||
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
|
||||
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
|
||||
.collect();
|
||||
if cols.len() < 6 { continue; } // Updated to match current 6-column structure
|
||||
if cols.len() < 6 { continue; }
|
||||
|
||||
let full_date = &cols[2]; // Now Earnings Date
|
||||
let full_date = &cols[2];
|
||||
let parts: Vec<&str> = full_date.split(" at ").collect();
|
||||
let raw_date = parts[0].trim();
|
||||
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
|
||||
@@ -73,8 +226,8 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let eps_forecast = parse_float(&cols[3]); // EPS Estimate
|
||||
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) }; // Reported EPS
|
||||
let eps_forecast = parse_float(&cols[3]);
|
||||
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) };
|
||||
|
||||
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
||||
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
||||
@@ -105,7 +258,6 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
// Helper: Yahoo returns prices as strings like "$123.45" or null
|
||||
fn parse_price(v: Option<&Value>) -> f64 {
|
||||
v.and_then(|x| x.as_str())
|
||||
.and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
|
||||
@@ -126,13 +278,13 @@ pub async fn fetch_daily_price_history(
|
||||
end_str: &str,
|
||||
) -> anyhow::Result<Vec<CompanyPrice>> {
|
||||
let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
|
||||
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1); // inclusive
|
||||
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1);
|
||||
|
||||
let mut all_prices = Vec::new();
|
||||
let mut current = start;
|
||||
|
||||
while current < end {
|
||||
let chunk_end = current + Duration::days(730); // 2-year chunks = safe
|
||||
let chunk_end = current + Duration::days(730);
|
||||
let actual_end = chunk_end.min(end);
|
||||
|
||||
let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
|
||||
@@ -146,7 +298,7 @@ pub async fn fetch_daily_price_history(
|
||||
|
||||
let json: Value = HttpClient::new()
|
||||
.get(&url)
|
||||
.header("User-Agent", "Mozilla/5.0")
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
@@ -155,12 +307,15 @@ pub async fn fetch_daily_price_history(
|
||||
let result = &json["chart"]["result"][0];
|
||||
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
|
||||
let quote = &result["indicators"]["quote"][0];
|
||||
let meta = &result["meta"];
|
||||
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
|
||||
|
||||
let opens = quote["open"].as_array();
|
||||
let highs = quote["high"].as_array();
|
||||
let lows = quote["low"].as_array();
|
||||
let closes = quote["close"].as_array();
|
||||
let adj_closes = result["meta"]["adjClose"].as_array().or_else(|| quote["close"].as_array()); // fallback
|
||||
let adj_closes = result["indicators"]["adjclose"][0]["adjclose"].as_array()
|
||||
.or_else(|| closes);
|
||||
let volumes = quote["volume"].as_array();
|
||||
|
||||
for (i, ts_val) in timestamps.iter().enumerate() {
|
||||
@@ -182,14 +337,14 @@ pub async fn fetch_daily_price_history(
|
||||
all_prices.push(CompanyPrice {
|
||||
ticker: ticker.to_string(),
|
||||
date: date_str,
|
||||
time: "".to_string(), // Empty for daily
|
||||
time: "".to_string(),
|
||||
open,
|
||||
high,
|
||||
low,
|
||||
close,
|
||||
adj_close,
|
||||
volume,
|
||||
currency: "USD".to_string(), // Assuming USD for now
|
||||
currency: currency.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -209,8 +364,8 @@ pub async fn fetch_price_history_5min(
|
||||
_start: &str,
|
||||
_end: &str,
|
||||
) -> anyhow::Result<Vec<CompanyPrice>> {
|
||||
let now = Utc::now().timestamp();
|
||||
let period1 = now - 5184000; // 60 days ago
|
||||
let now = Utc::now().timestamp();
|
||||
let period1 = now - 5184000;
|
||||
let period2 = now;
|
||||
|
||||
let url = format!(
|
||||
@@ -219,7 +374,7 @@ let now = Utc::now().timestamp();
|
||||
|
||||
let json: Value = HttpClient::new()
|
||||
.get(&url)
|
||||
.header("User-Agent", "Mozilla/5.0")
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
@@ -228,6 +383,8 @@ let now = Utc::now().timestamp();
|
||||
let result = &json["chart"]["result"][0];
|
||||
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
|
||||
let quote = &result["indicators"]["quote"][0];
|
||||
let meta = &result["meta"];
|
||||
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
|
||||
|
||||
let mut prices = Vec::new();
|
||||
|
||||
@@ -246,14 +403,14 @@ let now = Utc::now().timestamp();
|
||||
prices.push(CompanyPrice {
|
||||
ticker: ticker.to_string(),
|
||||
date: date_str,
|
||||
time: time_str, // Full time for 5min intraday
|
||||
time: time_str,
|
||||
open,
|
||||
high,
|
||||
low,
|
||||
close,
|
||||
adj_close: close, // intraday usually not adjusted
|
||||
adj_close: close,
|
||||
volume,
|
||||
currency: "USD".to_string(), // Assuming USD for now
|
||||
currency: currency.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user