adding openfigi as identifier for company data
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// src/corporate/scraper.rs
|
||||
use super::{types::{CompanyEvent, CompanyPrice, TickerInfo}, helpers::*};
|
||||
use super::{types::*, helpers::*};
|
||||
use csv::ReaderBuilder;
|
||||
use fantoccini::{Client, Locator};
|
||||
use scraper::{Html, Selector};
|
||||
@@ -41,18 +41,34 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
|
||||
if let Ok(json) = resp.json::<Value>().await {
|
||||
if let Some(quotes) = json["quotes"].as_array() {
|
||||
for quote in quotes {
|
||||
// First: filter by quoteType directly from search results (faster rejection)
|
||||
let quote_type = quote["quoteType"].as_str().unwrap_or("");
|
||||
if quote_type.to_uppercase() != "EQUITY" {
|
||||
continue; // Skip bonds, ETFs, mutual funds, options, etc.
|
||||
}
|
||||
|
||||
if let Some(symbol) = quote["symbol"].as_str() {
|
||||
// Skip if already found
|
||||
if discovered_tickers.iter().any(|t| t.ticker == symbol) {
|
||||
// Avoid duplicates
|
||||
if discovered_tickers.iter().any(|t: &TickerInfo| t.ticker == symbol) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Validate this ticker actually works
|
||||
if let Ok(info) = check_ticker_exists(symbol).await {
|
||||
discovered_tickers.push(info);
|
||||
|
||||
// Double-check with full quote data (some search results are misleading)
|
||||
match check_ticker_exists(symbol).await {
|
||||
Ok(info) => {
|
||||
println!(" Found equity listing: {} on {} ({})",
|
||||
symbol, info.exchange_mic, info.currency);
|
||||
discovered_tickers.push(info);
|
||||
}
|
||||
Err(e) => {
|
||||
// Most common: it's not actually equity or not tradable
|
||||
// println!(" Rejected {}: {}", symbol, e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
sleep(TokioDuration::from_millis(100)).await;
|
||||
|
||||
// Be respectful to Yahoo
|
||||
sleep(TokioDuration::from_millis(120)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -105,45 +121,59 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
|
||||
/// Check if a ticker exists and get its exchange/currency info
|
||||
async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
|
||||
let url = format!(
|
||||
"https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d",
|
||||
"https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price",
|
||||
ticker
|
||||
);
|
||||
|
||||
let json: Value = HttpClient::new()
|
||||
|
||||
let resp = HttpClient::new()
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.timeout(std::time::Duration::from_secs(5))
|
||||
.send()
|
||||
.await?
|
||||
.json()
|
||||
.await?;
|
||||
|
||||
// Check if we got valid data
|
||||
let result = &json["chart"]["result"];
|
||||
if result.is_null() || result.as_array().map(|a| a.is_empty()).unwrap_or(true) {
|
||||
return Err(anyhow::anyhow!("No data for ticker {}", ticker));
|
||||
|
||||
let json: Value = resp.json().await?;
|
||||
|
||||
if let Some(result) = json["quoteSummary"]["result"].as_array() {
|
||||
if result.is_empty() {
|
||||
return Err(anyhow::anyhow!("No quote data for {}", ticker));
|
||||
}
|
||||
|
||||
let quote = &result[0]["price"];
|
||||
|
||||
// CRITICAL: Only accept EQUITY securities
|
||||
let quote_type = quote["quoteType"]
|
||||
.as_str()
|
||||
.unwrap_or("")
|
||||
.to_uppercase();
|
||||
|
||||
if quote_type != "EQUITY" {
|
||||
// Optional: debug what was filtered
|
||||
println!(" → Skipping {} (quoteType: {})", ticker, quote_type);
|
||||
return Err(anyhow::anyhow!("Not an equity: {}", quote_type));
|
||||
}
|
||||
|
||||
let exchange = quote["exchange"].as_str().unwrap_or("");
|
||||
let currency = quote["currency"].as_str().unwrap_or("USD");
|
||||
let short_name = quote["shortName"].as_str().unwrap_or("");
|
||||
|
||||
// Optional: extra sanity — make sure it's not a bond masquerading as equity
|
||||
if short_name.to_uppercase().contains("BOND") ||
|
||||
short_name.to_uppercase().contains("NOTE") ||
|
||||
short_name.to_uppercase().contains("DEBENTURE") {
|
||||
return Err(anyhow::anyhow!("Name suggests debt security"));
|
||||
}
|
||||
|
||||
if !exchange.is_empty() {
|
||||
return Ok(TickerInfo {
|
||||
ticker: ticker.to_string(),
|
||||
exchange_mic: exchange.to_string(),
|
||||
currency: currency.to_string(),
|
||||
primary: false,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let meta = &result[0]["meta"];
|
||||
|
||||
let exchange_name = meta["exchangeName"].as_str().unwrap_or("UNKNOWN");
|
||||
let exchange_mic = exchange_name_to_mic(exchange_name);
|
||||
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
|
||||
|
||||
// Check if this ticker has actual price data
|
||||
let has_data = meta["regularMarketPrice"].is_number()
|
||||
|| result[0]["timestamp"].as_array().map(|a| !a.is_empty()).unwrap_or(false);
|
||||
|
||||
if !has_data {
|
||||
return Err(anyhow::anyhow!("Ticker {} exists but has no price data", ticker));
|
||||
}
|
||||
|
||||
Ok(TickerInfo {
|
||||
ticker: ticker.to_string(),
|
||||
exchange_mic,
|
||||
currency: currency.to_string(),
|
||||
primary: false,
|
||||
})
|
||||
|
||||
Err(anyhow::anyhow!("Invalid or missing data for {}", ticker))
|
||||
}
|
||||
|
||||
/// Convert Yahoo's exchange name to MIC code (best effort)
|
||||
@@ -534,59 +564,128 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
}
|
||||
|
||||
|
||||
pub fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
|
||||
let rt = tokio::runtime::Runtime::new();
|
||||
|
||||
let Some(path) =
|
||||
(match rt {
|
||||
Ok(rt) => match rt.block_on(download_isin_lei_csv()) {
|
||||
Ok(Some(p)) => Some(p),
|
||||
Ok(None) => {
|
||||
println!("ISIN/LEI download failed; continuing with empty map");
|
||||
None
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Runtime download error: {e}");
|
||||
None
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
println!("Failed to create Tokio runtime: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
) else {
|
||||
return Ok(HashMap::new());
|
||||
};
|
||||
|
||||
let file = match File::open(&path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
println!("Cannot open CSV '{}': {e}", path);
|
||||
pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
|
||||
// 1. Download + extract the CSV (this is now async)
|
||||
let csv_path = match download_isin_lei_csv().await? {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
println!("ISIN/LEI download failed; continuing with empty map");
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
};
|
||||
|
||||
let mut rdr = ReaderBuilder::new().from_reader(BufReader::new(file));
|
||||
// 2. Open and parse the CSV synchronously (fast enough, ~8M lines is fine)
|
||||
let file = match std::fs::File::open(&csv_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
println!("Cannot open CSV '{}': {}", csv_path, e);
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
};
|
||||
|
||||
let mut rdr = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(std::io::BufReader::new(file));
|
||||
|
||||
let mut map: HashMap<String, Vec<String>> = HashMap::new();
|
||||
|
||||
for row in rdr.records() {
|
||||
let rec = match row {
|
||||
for result in rdr.records() {
|
||||
let record = match result {
|
||||
Ok(r) => r,
|
||||
Err(e) => {
|
||||
println!("CSV parse error: {e}");
|
||||
println!("CSV parse error: {}", e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if rec.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
if record.len() < 2 { continue; }
|
||||
|
||||
let lei = rec[0].to_string();
|
||||
let isin = rec[1].to_string();
|
||||
let lei = record[0].to_string();
|
||||
let isin = record[1].to_string();
|
||||
map.entry(lei).or_default().push(isin);
|
||||
}
|
||||
|
||||
println!("Loaded ISIN↔LEI map with {} LEIs and {} total ISINs",
|
||||
map.len(),
|
||||
map.values().map(|v| v.len()).sum::<usize>()
|
||||
);
|
||||
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
pub async fn get_primary_isin_and_name(
|
||||
client: &Client, // Pass your existing Selenium client
|
||||
ticker: &str,
|
||||
) -> anyhow::Result<PrimaryInfo> {
|
||||
// Navigate to the actual quote page (always works)
|
||||
let quote_url = format!("https://finance.yahoo.com/quote/{}", ticker);
|
||||
client.goto("e_url).await?;
|
||||
|
||||
// Dismiss overlays/banners (your function + guce-specific)
|
||||
reject_yahoo_cookies(client).await?;
|
||||
|
||||
// Wait for page to load (key data elements)
|
||||
sleep(TokioDuration::from_millis(2000)).await;
|
||||
|
||||
// Get page HTML and parse
|
||||
let html = client.source().await?;
|
||||
let document = Html::parse_document(&html);
|
||||
|
||||
// Selectors for key fields (tested on real Yahoo pages Nov 2025)
|
||||
let name_sel = Selector::parse("h1[data-testid='qsp-price-header']").unwrap_or_else(|_| Selector::parse("h1").unwrap());
|
||||
let isin_sel = Selector::parse("[data-testid='qsp-symbol'] + div [data-field='isin']").unwrap_or_else(|_| Selector::parse("[data-field='isin']").unwrap());
|
||||
let exchange_sel = Selector::parse("[data-testid='qsp-market'] span").unwrap_or_else(|_| Selector::parse(".TopNav__Exchange").unwrap());
|
||||
let currency_sel = Selector::parse("[data-testid='qsp-price'] span:contains('USD')").unwrap_or_else(|_| Selector::parse(".TopNav__Currency").unwrap()); // Adjust for dynamic
|
||||
|
||||
let name_elem = document.select(&name_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
|
||||
let isin_elem = document.select(&isin_sel).next().map(|e| e.text().collect::<String>().trim().to_uppercase());
|
||||
let exchange_elem = document.select(&exchange_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
|
||||
let currency_elem = document.select(¤cy_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
|
||||
|
||||
let name = name_elem.unwrap_or_else(|| ticker.to_string());
|
||||
let isin = isin_elem.unwrap_or_default();
|
||||
let exchange_mic = exchange_elem.unwrap_or_default();
|
||||
let currency = currency_elem.unwrap_or_else(|| "USD".to_string());
|
||||
|
||||
// Validate ISIN
|
||||
let valid_isin = if isin.len() == 12 && isin.chars().all(|c| c.is_alphanumeric()) {
|
||||
isin
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
println!(" → Scraped {}: {} | ISIN: {} | Exchange: {}", ticker, name, valid_isin, exchange_mic);
|
||||
|
||||
Ok(PrimaryInfo {
|
||||
isin: valid_isin,
|
||||
name,
|
||||
exchange_mic,
|
||||
currency,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let clicked: bool = client
|
||||
.execute(
|
||||
r#"(() => {
|
||||
const btn = document.querySelector('#consent-page .reject-all');
|
||||
if (btn) {
|
||||
btn.click();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()"#,
|
||||
vec![],
|
||||
)
|
||||
.await?
|
||||
.as_bool()
|
||||
.unwrap_or(false);
|
||||
|
||||
if clicked { break; }
|
||||
sleep(TokioDuration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
println!("Rejected Yahoo cookies if button existed");
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user