adding openfigi as identifier for company data

This commit is contained in:
2025-11-25 22:18:52 +01:00
parent e57a013224
commit eeae94e041
13 changed files with 608 additions and 139 deletions

View File

@@ -1,5 +1,5 @@
// src/corporate/scraper.rs
use super::{types::{CompanyEvent, CompanyPrice, TickerInfo}, helpers::*};
use super::{types::*, helpers::*};
use csv::ReaderBuilder;
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
@@ -41,18 +41,34 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
if let Ok(json) = resp.json::<Value>().await {
if let Some(quotes) = json["quotes"].as_array() {
for quote in quotes {
// First: filter by quoteType directly from search results (faster rejection)
let quote_type = quote["quoteType"].as_str().unwrap_or("");
if quote_type.to_uppercase() != "EQUITY" {
continue; // Skip bonds, ETFs, mutual funds, options, etc.
}
if let Some(symbol) = quote["symbol"].as_str() {
// Skip if already found
if discovered_tickers.iter().any(|t| t.ticker == symbol) {
// Avoid duplicates
if discovered_tickers.iter().any(|t: &TickerInfo| t.ticker == symbol) {
continue;
}
// Validate this ticker actually works
if let Ok(info) = check_ticker_exists(symbol).await {
discovered_tickers.push(info);
// Double-check with full quote data (some search results are misleading)
match check_ticker_exists(symbol).await {
Ok(info) => {
println!(" Found equity listing: {} on {} ({})",
symbol, info.exchange_mic, info.currency);
discovered_tickers.push(info);
}
Err(e) => {
// Most common: it's not actually equity or not tradable
// println!(" Rejected {}: {}", symbol, e);
continue;
}
}
sleep(TokioDuration::from_millis(100)).await;
// Be respectful to Yahoo
sleep(TokioDuration::from_millis(120)).await;
}
}
}
@@ -105,45 +121,59 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
/// Check if a ticker exists and get its exchange/currency info
async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d",
"https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price",
ticker
);
let json: Value = HttpClient::new()
let resp = HttpClient::new()
.get(&url)
.header("User-Agent", USER_AGENT)
.timeout(std::time::Duration::from_secs(5))
.send()
.await?
.json()
.await?;
// Check if we got valid data
let result = &json["chart"]["result"];
if result.is_null() || result.as_array().map(|a| a.is_empty()).unwrap_or(true) {
return Err(anyhow::anyhow!("No data for ticker {}", ticker));
let json: Value = resp.json().await?;
if let Some(result) = json["quoteSummary"]["result"].as_array() {
if result.is_empty() {
return Err(anyhow::anyhow!("No quote data for {}", ticker));
}
let quote = &result[0]["price"];
// CRITICAL: Only accept EQUITY securities
let quote_type = quote["quoteType"]
.as_str()
.unwrap_or("")
.to_uppercase();
if quote_type != "EQUITY" {
// Optional: debug what was filtered
println!(" → Skipping {} (quoteType: {})", ticker, quote_type);
return Err(anyhow::anyhow!("Not an equity: {}", quote_type));
}
let exchange = quote["exchange"].as_str().unwrap_or("");
let currency = quote["currency"].as_str().unwrap_or("USD");
let short_name = quote["shortName"].as_str().unwrap_or("");
// Optional: extra sanity — make sure it's not a bond masquerading as equity
if short_name.to_uppercase().contains("BOND") ||
short_name.to_uppercase().contains("NOTE") ||
short_name.to_uppercase().contains("DEBENTURE") {
return Err(anyhow::anyhow!("Name suggests debt security"));
}
if !exchange.is_empty() {
return Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic: exchange.to_string(),
currency: currency.to_string(),
primary: false,
});
}
}
let meta = &result[0]["meta"];
let exchange_name = meta["exchangeName"].as_str().unwrap_or("UNKNOWN");
let exchange_mic = exchange_name_to_mic(exchange_name);
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
// Check if this ticker has actual price data
let has_data = meta["regularMarketPrice"].is_number()
|| result[0]["timestamp"].as_array().map(|a| !a.is_empty()).unwrap_or(false);
if !has_data {
return Err(anyhow::anyhow!("Ticker {} exists but has no price data", ticker));
}
Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic,
currency: currency.to_string(),
primary: false,
})
Err(anyhow::anyhow!("Invalid or missing data for {}", ticker))
}
/// Convert Yahoo's exchange name to MIC code (best effort)
@@ -534,59 +564,128 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
}
pub fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
let rt = tokio::runtime::Runtime::new();
let Some(path) =
(match rt {
Ok(rt) => match rt.block_on(download_isin_lei_csv()) {
Ok(Some(p)) => Some(p),
Ok(None) => {
println!("ISIN/LEI download failed; continuing with empty map");
None
}
Err(e) => {
println!("Runtime download error: {e}");
None
}
},
Err(e) => {
println!("Failed to create Tokio runtime: {e}");
None
}
}
) else {
return Ok(HashMap::new());
};
let file = match File::open(&path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {e}", path);
pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
// 1. Download + extract the CSV (this is now async)
let csv_path = match download_isin_lei_csv().await? {
Some(p) => p,
None => {
println!("ISIN/LEI download failed; continuing with empty map");
return Ok(HashMap::new());
}
};
let mut rdr = ReaderBuilder::new().from_reader(BufReader::new(file));
// 2. Open and parse the CSV synchronously (fast enough, ~8M lines is fine)
let file = match std::fs::File::open(&csv_path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {}", csv_path, e);
return Ok(HashMap::new());
}
};
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(std::io::BufReader::new(file));
let mut map: HashMap<String, Vec<String>> = HashMap::new();
for row in rdr.records() {
let rec = match row {
for result in rdr.records() {
let record = match result {
Ok(r) => r,
Err(e) => {
println!("CSV parse error: {e}");
println!("CSV parse error: {}", e);
continue;
}
};
if rec.len() < 2 {
continue;
}
if record.len() < 2 { continue; }
let lei = rec[0].to_string();
let isin = rec[1].to_string();
let lei = record[0].to_string();
let isin = record[1].to_string();
map.entry(lei).or_default().push(isin);
}
println!("Loaded ISIN↔LEI map with {} LEIs and {} total ISINs",
map.len(),
map.values().map(|v| v.len()).sum::<usize>()
);
Ok(map)
}
pub async fn get_primary_isin_and_name(
client: &Client, // Pass your existing Selenium client
ticker: &str,
) -> anyhow::Result<PrimaryInfo> {
// Navigate to the actual quote page (always works)
let quote_url = format!("https://finance.yahoo.com/quote/{}", ticker);
client.goto(&quote_url).await?;
// Dismiss overlays/banners (your function + guce-specific)
reject_yahoo_cookies(client).await?;
// Wait for page to load (key data elements)
sleep(TokioDuration::from_millis(2000)).await;
// Get page HTML and parse
let html = client.source().await?;
let document = Html::parse_document(&html);
// Selectors for key fields (tested on real Yahoo pages Nov 2025)
let name_sel = Selector::parse("h1[data-testid='qsp-price-header']").unwrap_or_else(|_| Selector::parse("h1").unwrap());
let isin_sel = Selector::parse("[data-testid='qsp-symbol'] + div [data-field='isin']").unwrap_or_else(|_| Selector::parse("[data-field='isin']").unwrap());
let exchange_sel = Selector::parse("[data-testid='qsp-market'] span").unwrap_or_else(|_| Selector::parse(".TopNav__Exchange").unwrap());
let currency_sel = Selector::parse("[data-testid='qsp-price'] span:contains('USD')").unwrap_or_else(|_| Selector::parse(".TopNav__Currency").unwrap()); // Adjust for dynamic
let name_elem = document.select(&name_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let isin_elem = document.select(&isin_sel).next().map(|e| e.text().collect::<String>().trim().to_uppercase());
let exchange_elem = document.select(&exchange_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let currency_elem = document.select(&currency_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let name = name_elem.unwrap_or_else(|| ticker.to_string());
let isin = isin_elem.unwrap_or_default();
let exchange_mic = exchange_elem.unwrap_or_default();
let currency = currency_elem.unwrap_or_else(|| "USD".to_string());
// Validate ISIN
let valid_isin = if isin.len() == 12 && isin.chars().all(|c| c.is_alphanumeric()) {
isin
} else {
"".to_string()
};
println!(" → Scraped {}: {} | ISIN: {} | Exchange: {}", ticker, name, valid_isin, exchange_mic);
Ok(PrimaryInfo {
isin: valid_isin,
name,
exchange_mic,
currency,
})
}
pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
for _ in 0..10 {
let clicked: bool = client
.execute(
r#"(() => {
const btn = document.querySelector('#consent-page .reject-all');
if (btn) {
btn.click();
return true;
}
return false;
})()"#,
vec![],
)
.await?
.as_bool()
.unwrap_or(false);
if clicked { break; }
sleep(TokioDuration::from_millis(500)).await;
}
println!("Rejected Yahoo cookies if button existed");
Ok(())
}