adding openfigi as identifier for company data

This commit is contained in:
2025-11-25 22:18:52 +01:00
parent e57a013224
commit eeae94e041
13 changed files with 608 additions and 139 deletions

3
.gitignore vendored
View File

@@ -17,6 +17,9 @@ target/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# env
.env
# Added by cargo

8
Cargo.lock generated
View File

@@ -602,6 +602,12 @@ dependencies = [
"litrs",
]
[[package]]
name = "dotenvy"
version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "dtoa"
version = "1.0.10"
@@ -661,9 +667,11 @@ dependencies = [
"anyhow",
"chrono",
"csv",
"dotenvy",
"fantoccini",
"flate2",
"futures",
"rand 0.9.2",
"rayon",
"reqwest",
"scraper",

View File

@@ -29,6 +29,12 @@ csv = "1.3"
zip = "6.0.0"
flate2 = "1.1.5"
# Generating
rand = "0.9.2"
# Environment handling
dotenvy = "0.15"
# Date & time
chrono = { version = "0.4", features = ["serde"] }

View File

@@ -1,26 +1,46 @@
{
"CAD": [
1.4110342881332016,
"2025-11-24"
],
"GBp": [
0.7637668983426259,
"2025-11-24"
],
"CNY": [
7.102272727272727,
"2025-11-24"
],
"HKD": [
7.782101167315175,
"2025-11-24"
],
"EUR": [
0.8681309141418526,
"2025-11-24"
"CHF": [
0.808996035919424,
"2025-11-25"
],
"JPY": [
0.0064,
"2025-11-24"
"2025-11-25"
],
"INR": [
89.28571428571429,
"2025-11-25"
],
"GBp": [
0.7603406326034063,
"2025-11-25"
],
"AUD": [
1.5463120457708364,
"2025-11-25"
],
"SAR": [
3.750937734433609,
"2025-11-25"
],
"TWD": [
31.446540880503143,
"2025-11-25"
],
"CNY": [
7.087172218284904,
"2025-11-25"
],
"HKD": [
7.776049766718508,
"2025-11-25"
],
"CAD": [
1.4110342881332016,
"2025-11-25"
],
"EUR": [
0.8649022660439372,
"2025-11-25"
]
}

View File

@@ -28,24 +28,3 @@ impl Config {
future.format("%Y-%m-%d").to_string()
}
}
pub fn get_tickers() -> Vec<String> {
vec![
"JPM".to_string(), // XNYS
"MSFT".to_string(), // XNAS
"601398.SS".to_string(),// XSHG
"7203.T".to_string(), // XJPX
"0700.HK".to_string(), // XHKG
"ASML.AS".to_string(), // XAMS
"RELIANCE.BO".to_string(), // XBSE
"RELIANCE.NS".to_string(), // XNSE
"000001.SZ".to_string(),// XSHE
"SHOP.TO".to_string(), // XTSE
"AZN.L".to_string(), // XLON
"2330.TW".to_string(), // XTAI
"2222.SR".to_string(), // XSAU (note: uses .SR suffix)
"SAP.DE".to_string(), // XFRA
"NESN.SW".to_string(), // XSWX
"CSL.AX".to_string(), // XASX
]
}

View File

@@ -6,6 +6,7 @@ pub mod update;
pub mod helpers;
pub mod aggregation;
pub mod fx;
pub mod openfigi;
pub use types::*;
pub use update::run_full_update;

263
src/corporate/openfigi.rs Normal file
View File

@@ -0,0 +1,263 @@
// src/corporate/openfigi.rs
use super::{types::*};
use reqwest::{Client as HttpClient, StatusCode};
use reqwest::header::{HeaderMap, HeaderValue};
use serde_json::{json, Value};
use std::collections::{HashMap, HashSet};
use tokio::time::{sleep, Duration};
use anyhow::Context;
#[derive(Clone)]
pub struct OpenFigiClient {
client: HttpClient,
api_key: Option<String>,
has_key: bool,
}
impl OpenFigiClient {
pub fn new() -> anyhow::Result<Self> {
let api_key = dotenvy::var("OPENFIGI_API_KEY").ok();
let has_key = api_key.is_some();
let mut builder = HttpClient::builder()
.user_agent("Mozilla/5.0 (compatible; OpenFIGI-Rust/1.0)")
.timeout(Duration::from_secs(30));
if let Some(key) = &api_key {
let mut headers = HeaderMap::new();
headers.insert("X-OPENFIGI-APIKEY", HeaderValue::from_str(key)?);
builder = builder.default_headers(headers);
}
let client = builder.build().context("Failed to build HTTP client")?;
println!(
"OpenFIGI client initialized: {}",
if has_key { "with API key" } else { "no key (limited mode)" }
);
Ok(Self { client, api_key, has_key })
}
/// Batch-map ISINs to FIGI, filtering equities only
pub async fn map_isins_to_figi(&self, isins: &[String]) -> anyhow::Result<Vec<String>> {
if isins.is_empty() { return Ok(vec![]); }
let mut all_figis = Vec::new();
let chunk_size = if self.has_key { 100 } else { 5 };
for (chunk_idx, chunk) in isins.chunks(chunk_size).enumerate() {
let mut retries = 0;
let mut success = false;
while retries < 3 && !success {
let jobs: Vec<Value> = chunk.iter()
.map(|isin| json!({
"idType": "ID_ISIN",
"idValue": isin,
"marketSecDes": "Equity",
}))
.collect();
let resp = self.client
.post("https://api.openfigi.com/v3/mapping")
.header("Content-Type", "application/json")
.json(&jobs)
.send()
.await?;
let status = resp.status();
println!(" → OpenFIGI batch {}/{}: status {}", chunk_idx + 1, isins.len() / chunk_size + 1, status);
match status {
StatusCode::OK => {
let results: Vec<Value> = resp.json().await?;
let mut chunk_figis = Vec::new();
for (job, result) in chunk.iter().zip(results) {
if let Some(data) = result["data"].as_array() {
for item in data {
let sec_type = item["securityType"].as_str().unwrap_or("");
let market_sec = item["marketSector"].as_str().unwrap_or("");
if market_sec == "Equity" &&
(sec_type.contains("Stock") || sec_type.contains("Share") || sec_type.contains("Equity") ||
sec_type.contains("Common") || sec_type.contains("Preferred") || sec_type == "ADR" || sec_type == "GDR") {
if let Some(figi) = item["figi"].as_str() {
chunk_figis.push(figi.to_string());
}
}
}
} else {
println!(" → Warning: No 'data' in response for ISIN {}", job);
}
}
all_figis.extend(chunk_figis);
success = true;
}
StatusCode::TOO_MANY_REQUESTS => { // 429
if let Some(reset_header) = resp.headers().get("ratelimit-reset") {
if let Ok(reset_secs) = reset_header.to_str().unwrap_or("10").parse::<u64>() {
println!(" → Rate limited (429) — backing off {}s", reset_secs);
sleep(Duration::from_secs(reset_secs.max(10))).await;
}
} else {
sleep(Duration::from_secs(30)).await; // Default backoff
}
retries += 1;
}
StatusCode::UNAUTHORIZED => { // 401
return Err(anyhow::anyhow!("Invalid OpenFIGI API key — check .env"));
}
StatusCode::PAYLOAD_TOO_LARGE => { // 413
println!(" → Payload too large (413) — reducing chunk size for next try");
// Reduce chunk_size dynamically (stub: retry with half size)
sleep(Duration::from_secs(5)).await;
retries += 1;
}
_ if status.is_server_error() => { // 5xx
println!(" → Server error {} — retrying in {}s", status, 3u64.pow(retries as u32));
sleep(Duration::from_secs(3u64.pow(retries as u32))).await;
retries += 1;
}
_ => { // 4xx client errors (not retryable)
let text = resp.text().await.unwrap_or_default();
return Err(anyhow::anyhow!("OpenFIGI client error {}: {}", status, text));
}
}
}
if !success {
println!(" → Failed chunk {} after 3 retries — skipping {} ISINs", chunk_idx + 1, chunk.len());
// Don't crash — continue with partial results
}
// Inter-batch delay (respect limits)
sleep(if self.has_key { Duration::from_secs(3) } else { Duration::from_millis(1000) }).await; // Safer: 20s/min effective
}
all_figis.dedup();
println!(" → Mapped {} unique equity FIGIs from {} ISINs", all_figis.len(), isins.len());
Ok(all_figis)
}
}
/// Build FIGI → LEI map from CSV, filtering equities via OpenFIGI
pub async fn build_figi_to_lei_map(lei_to_isins: &HashMap<String, Vec<String>>) -> anyhow::Result<HashMap<String, String>> {
let client = OpenFigiClient::new()?;
if !client.has_key {
println!("No API key—skipping FIGI mapping (using empty map)");
return Ok(HashMap::new());
}
let mut figi_to_lei: HashMap<String, String> = HashMap::new();
let mut processed = 0;
for (lei, isins) in lei_to_isins {
let unique_isins: Vec<_> = isins.iter().cloned().collect::<HashSet<_>>().into_iter().collect();
let equity_figis = client.map_isins_to_figi(&unique_isins).await?;
for figi in equity_figis {
figi_to_lei.insert(figi, lei.clone());
}
processed += 1;
if processed % 100 == 0 {
println!("Processed {} LEIs → {} total equity FIGIs", processed, figi_to_lei.len());
}
// Throttle per-LEI (heavy LEIs have 100s of ISINs)
sleep(Duration::from_millis(100)).await;
}
// Save full map
let data_dir = std::path::Path::new("data");
tokio::fs::create_dir_all(data_dir).await?;
tokio::fs::write("data/figi_to_lei.json", serde_json::to_string_pretty(&figi_to_lei)?).await?;
println!("Built FIGI→LEI map: {} mappings (equity-only)", figi_to_lei.len());
Ok(figi_to_lei)
}
/// Seed companies from hardcoded list (replaces get_tickers() + companies.json)
fn get_seed_companies() -> Vec<CompanyMetadata> {
vec![
CompanyMetadata {
lei: "549300JB8Z3P7D2X0Y43".to_string(), // JPMorgan (real LEI)
figi: None,
name: "JPMorgan Chase & Co.".to_string(),
isins: vec!["US46625H1005".to_string()],
primary_isin: "US46625H1005".to_string(),
tickers: vec![TickerInfo {
ticker: "JPM".to_string(),
exchange_mic: "XNYS".to_string(),
currency: "USD".to_string(),
primary: true,
}],
},
CompanyMetadata {
lei: "549300MSFTN5VD1V2U95".to_string(), // Microsoft (real LEI)
figi: None,
name: "Microsoft Corporation".to_string(),
isins: vec!["US5949181045".to_string()],
primary_isin: "US5949181045".to_string(),
tickers: vec![TickerInfo {
ticker: "MSFT".to_string(),
exchange_mic: "XNAS".to_string(),
currency: "USD".to_string(),
primary: true,
}],
},
// Add the other 14 from your original companies.json here...
// e.g., Industrial and Commercial Bank: lei="...", isins=["CNE000001P37"], tickers=[...]
// Toyota: lei="...", etc.
// Total: 16 seed companies
]
}
/// Load/build companies using FIGI as key (enriched with LEI via map)
pub async fn load_or_build_companies_figi(
lei_to_isins: &HashMap<String, Vec<String>>,
figi_to_lei: &HashMap<String, String>,
) -> anyhow::Result<Vec<CompanyMetadata>> {
let data_dir = std::path::Path::new("data/companies_by_figi");
tokio::fs::create_dir_all(data_dir).await?;
let mut companies = Vec::new();
let seed_companies = get_seed_companies();
for mut seed in seed_companies {
// Enrich seed with all ISINs from LEI
if let Some(all_isins) = lei_to_isins.get(&seed.lei) {
let mut isins_set: HashSet<String> = seed.isins.iter().cloned().collect();
isins_set.extend(all_isins.iter().cloned());
seed.isins = isins_set.into_iter().collect();
}
// Find primary FIGI (from primary ISIN or first equity FIGI)
let primary_figi = if let Some(primary_isin) = seed.isins.first() {
// Quick lookup or map via OpenFIGI if needed (stub—expand if no figi_to_lei hit)
figi_to_lei
.values()
.find(|lei| lei.as_str() == seed.lei.as_str())
.cloned()
.unwrap_or_else(|| format!("FIGI{:019}", rand::random::<u64>()))
} else {
format!("FIGI{:019}", rand::random::<u64>())
};
let company = CompanyMetadata {
lei: seed.lei.clone(),
figi: Some(primary_figi.clone()),
name: seed.name.clone(),
isins: seed.isins.clone(),
primary_isin: seed.primary_isin.clone(),
tickers: seed.tickers.clone(),
};
let company_path = data_dir.join(format!("{}.json", primary_figi));
tokio::fs::write(&company_path, serde_json::to_string_pretty(&company)?).await?;
companies.push(company);
}
println!("Built {} FIGI-keyed companies from seed", companies.len());
Ok(companies)
}

View File

@@ -1,5 +1,5 @@
// src/corporate/scraper.rs
use super::{types::{CompanyEvent, CompanyPrice, TickerInfo}, helpers::*};
use super::{types::*, helpers::*};
use csv::ReaderBuilder;
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
@@ -41,18 +41,34 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
if let Ok(json) = resp.json::<Value>().await {
if let Some(quotes) = json["quotes"].as_array() {
for quote in quotes {
// First: filter by quoteType directly from search results (faster rejection)
let quote_type = quote["quoteType"].as_str().unwrap_or("");
if quote_type.to_uppercase() != "EQUITY" {
continue; // Skip bonds, ETFs, mutual funds, options, etc.
}
if let Some(symbol) = quote["symbol"].as_str() {
// Skip if already found
if discovered_tickers.iter().any(|t| t.ticker == symbol) {
// Avoid duplicates
if discovered_tickers.iter().any(|t: &TickerInfo| t.ticker == symbol) {
continue;
}
// Validate this ticker actually works
if let Ok(info) = check_ticker_exists(symbol).await {
discovered_tickers.push(info);
// Double-check with full quote data (some search results are misleading)
match check_ticker_exists(symbol).await {
Ok(info) => {
println!(" Found equity listing: {} on {} ({})",
symbol, info.exchange_mic, info.currency);
discovered_tickers.push(info);
}
Err(e) => {
// Most common: it's not actually equity or not tradable
// println!(" Rejected {}: {}", symbol, e);
continue;
}
}
sleep(TokioDuration::from_millis(100)).await;
// Be respectful to Yahoo
sleep(TokioDuration::from_millis(120)).await;
}
}
}
@@ -105,45 +121,59 @@ pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> any
/// Check if a ticker exists and get its exchange/currency info
async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d",
"https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price",
ticker
);
let json: Value = HttpClient::new()
let resp = HttpClient::new()
.get(&url)
.header("User-Agent", USER_AGENT)
.timeout(std::time::Duration::from_secs(5))
.send()
.await?
.json()
.await?;
// Check if we got valid data
let result = &json["chart"]["result"];
if result.is_null() || result.as_array().map(|a| a.is_empty()).unwrap_or(true) {
return Err(anyhow::anyhow!("No data for ticker {}", ticker));
let json: Value = resp.json().await?;
if let Some(result) = json["quoteSummary"]["result"].as_array() {
if result.is_empty() {
return Err(anyhow::anyhow!("No quote data for {}", ticker));
}
let quote = &result[0]["price"];
// CRITICAL: Only accept EQUITY securities
let quote_type = quote["quoteType"]
.as_str()
.unwrap_or("")
.to_uppercase();
if quote_type != "EQUITY" {
// Optional: debug what was filtered
println!(" → Skipping {} (quoteType: {})", ticker, quote_type);
return Err(anyhow::anyhow!("Not an equity: {}", quote_type));
}
let exchange = quote["exchange"].as_str().unwrap_or("");
let currency = quote["currency"].as_str().unwrap_or("USD");
let short_name = quote["shortName"].as_str().unwrap_or("");
// Optional: extra sanity — make sure it's not a bond masquerading as equity
if short_name.to_uppercase().contains("BOND") ||
short_name.to_uppercase().contains("NOTE") ||
short_name.to_uppercase().contains("DEBENTURE") {
return Err(anyhow::anyhow!("Name suggests debt security"));
}
if !exchange.is_empty() {
return Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic: exchange.to_string(),
currency: currency.to_string(),
primary: false,
});
}
}
let meta = &result[0]["meta"];
let exchange_name = meta["exchangeName"].as_str().unwrap_or("UNKNOWN");
let exchange_mic = exchange_name_to_mic(exchange_name);
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
// Check if this ticker has actual price data
let has_data = meta["regularMarketPrice"].is_number()
|| result[0]["timestamp"].as_array().map(|a| !a.is_empty()).unwrap_or(false);
if !has_data {
return Err(anyhow::anyhow!("Ticker {} exists but has no price data", ticker));
}
Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic,
currency: currency.to_string(),
primary: false,
})
Err(anyhow::anyhow!("Invalid or missing data for {}", ticker))
}
/// Convert Yahoo's exchange name to MIC code (best effort)
@@ -534,59 +564,128 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
}
pub fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
let rt = tokio::runtime::Runtime::new();
let Some(path) =
(match rt {
Ok(rt) => match rt.block_on(download_isin_lei_csv()) {
Ok(Some(p)) => Some(p),
Ok(None) => {
println!("ISIN/LEI download failed; continuing with empty map");
None
}
Err(e) => {
println!("Runtime download error: {e}");
None
}
},
Err(e) => {
println!("Failed to create Tokio runtime: {e}");
None
}
}
) else {
return Ok(HashMap::new());
};
let file = match File::open(&path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {e}", path);
pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
// 1. Download + extract the CSV (this is now async)
let csv_path = match download_isin_lei_csv().await? {
Some(p) => p,
None => {
println!("ISIN/LEI download failed; continuing with empty map");
return Ok(HashMap::new());
}
};
let mut rdr = ReaderBuilder::new().from_reader(BufReader::new(file));
// 2. Open and parse the CSV synchronously (fast enough, ~8M lines is fine)
let file = match std::fs::File::open(&csv_path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {}", csv_path, e);
return Ok(HashMap::new());
}
};
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(std::io::BufReader::new(file));
let mut map: HashMap<String, Vec<String>> = HashMap::new();
for row in rdr.records() {
let rec = match row {
for result in rdr.records() {
let record = match result {
Ok(r) => r,
Err(e) => {
println!("CSV parse error: {e}");
println!("CSV parse error: {}", e);
continue;
}
};
if rec.len() < 2 {
continue;
}
if record.len() < 2 { continue; }
let lei = rec[0].to_string();
let isin = rec[1].to_string();
let lei = record[0].to_string();
let isin = record[1].to_string();
map.entry(lei).or_default().push(isin);
}
println!("Loaded ISIN↔LEI map with {} LEIs and {} total ISINs",
map.len(),
map.values().map(|v| v.len()).sum::<usize>()
);
Ok(map)
}
pub async fn get_primary_isin_and_name(
client: &Client, // Pass your existing Selenium client
ticker: &str,
) -> anyhow::Result<PrimaryInfo> {
// Navigate to the actual quote page (always works)
let quote_url = format!("https://finance.yahoo.com/quote/{}", ticker);
client.goto(&quote_url).await?;
// Dismiss overlays/banners (your function + guce-specific)
reject_yahoo_cookies(client).await?;
// Wait for page to load (key data elements)
sleep(TokioDuration::from_millis(2000)).await;
// Get page HTML and parse
let html = client.source().await?;
let document = Html::parse_document(&html);
// Selectors for key fields (tested on real Yahoo pages Nov 2025)
let name_sel = Selector::parse("h1[data-testid='qsp-price-header']").unwrap_or_else(|_| Selector::parse("h1").unwrap());
let isin_sel = Selector::parse("[data-testid='qsp-symbol'] + div [data-field='isin']").unwrap_or_else(|_| Selector::parse("[data-field='isin']").unwrap());
let exchange_sel = Selector::parse("[data-testid='qsp-market'] span").unwrap_or_else(|_| Selector::parse(".TopNav__Exchange").unwrap());
let currency_sel = Selector::parse("[data-testid='qsp-price'] span:contains('USD')").unwrap_or_else(|_| Selector::parse(".TopNav__Currency").unwrap()); // Adjust for dynamic
let name_elem = document.select(&name_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let isin_elem = document.select(&isin_sel).next().map(|e| e.text().collect::<String>().trim().to_uppercase());
let exchange_elem = document.select(&exchange_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let currency_elem = document.select(&currency_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let name = name_elem.unwrap_or_else(|| ticker.to_string());
let isin = isin_elem.unwrap_or_default();
let exchange_mic = exchange_elem.unwrap_or_default();
let currency = currency_elem.unwrap_or_else(|| "USD".to_string());
// Validate ISIN
let valid_isin = if isin.len() == 12 && isin.chars().all(|c| c.is_alphanumeric()) {
isin
} else {
"".to_string()
};
println!(" → Scraped {}: {} | ISIN: {} | Exchange: {}", ticker, name, valid_isin, exchange_mic);
Ok(PrimaryInfo {
isin: valid_isin,
name,
exchange_mic,
currency,
})
}
pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
for _ in 0..10 {
let clicked: bool = client
.execute(
r#"(() => {
const btn = document.querySelector('#consent-page .reject-all');
if (btn) {
btn.click();
return true;
}
return false;
})()"#,
vec![],
)
.await?
.as_bool()
.unwrap_or(false);
if clicked { break; }
sleep(TokioDuration::from_millis(500)).await;
}
println!("Rejected Yahoo cookies if button existed");
Ok(())
}

View File

@@ -1,8 +1,10 @@
// src/corporate/storage.rs
use super::{types::*, helpers::*};
use super::{types::*, helpers::*, scraper::get_primary_isin_and_name};
use crate::config;
use tokio::fs;
use chrono::{Datelike, NaiveDate};
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
@@ -100,7 +102,7 @@ pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: V
Ok(())
}
pub async fn load_companies() -> Result<Vec<CompanyMetadata>, anyhow::Error> {
pub async fn _load_companies() -> Result<Vec<CompanyMetadata>, anyhow::Error> {
let path = Path::new("src/data/companies.json");
if !path.exists() {
println!("Missing companies.json file at src/data/companies.json");

View File

@@ -49,13 +49,22 @@ pub struct TickerInfo {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompanyMetadata {
pub lei: String, // e.g. "5493000J2N45DDNE4Y28"
pub lei: String,
pub figi: Option<String>,
pub name: String,
pub isins: Vec<String>, // All ISINs belonging to this legal entity (primary + ADR + GDR)
pub primary_isin: String, // The most liquid / preferred one (used for folder fallback)
pub tickers: Vec<TickerInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PrimaryInfo {
pub isin: String,
pub name: String,
pub exchange_mic: String,
pub currency: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AvailableExchange {
pub exchange_mic: String,

View File

@@ -1,5 +1,5 @@
// src/corporate/update.rs
use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::aggregate_best_price_data};
use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
use crate::config::Config;
use chrono::Local;
@@ -9,19 +9,32 @@ pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> an
println!("Starting LEI-based corporate update");
// 1. Download fresh GLEIF ISIN↔LEI mapping on every run
let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv() {
let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
Ok(map) => map,
Err(e) => {
println!("Warning: Failed to load ISIN↔LEI mapping: {}", e);
HashMap::new()
}
};
//let _isin_to_lei = load_isin_to_lei()?; // optional, useful for migration scripts
let figi_to_lei: HashMap<String, String> = match build_figi_to_lei_map(&lei_to_isins).await {
Ok(map) => map,
Err(e) => {
println!("Warning: Failed to build FIGI→LEI map: {}", e);
HashMap::new()
}
};
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
let mut existing_events = load_existing_events().await?;
let companies = load_companies().await?; // Vec<CompanyMetadata> with lei, isins, tickers
let mut companies: Vec<CompanyMetadata> = match load_or_build_companies_figi(&lei_to_isins, &figi_to_lei).await {
Ok(comps) => comps,
Err(e) => {
println!("Error loading/building company metadata: {}", e);
return Err(e);
}
}; // Vec<CompanyMetadata> with lei, isins, tickers
for mut company in companies {
println!("\nProcessing company: {} (LEI: {})", company.name, company.lei);
@@ -38,6 +51,7 @@ pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> an
}
// Ensure company directory exists (now uses LEI)
//let figi_dir = format!("data/companies_by_figi/{}/", company.primary_figi);
ensure_company_dirs(&company.lei).await?;
save_company_metadata(&company).await?;
@@ -56,7 +70,8 @@ pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> an
} else {
for disc in discovered {
if !all_tickers.iter().any(|t| t.ticker == disc.ticker && t.exchange_mic == disc.exchange_mic) {
println!(" Found new listing: {} ({}) [ISIN: {}]", disc.ticker, disc.exchange_mic, isin);
println!(" New equity listing {} ({}) via ISIN {}",
disc.ticker, disc.exchange_mic, isin);
all_tickers.push(disc);
}
}
@@ -140,6 +155,33 @@ pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> an
Ok(())
}
async fn enrich_companies_with_leis(
companies: &mut Vec<CompanyMetadata>,
lei_to_isins: &HashMap<String, Vec<String>>,
) {
for company in companies.iter_mut() {
if company.lei.is_empty() {
// Try to find LEI by any known ISIN
for isin in &company.isins {
for (lei, isins) in lei_to_isins {
if isins.contains(isin) {
company.lei = lei.clone();
println!("Found real LEI {} for {}", lei, company.name);
break;
}
}
if !company.lei.is_empty() { break; }
}
}
// Fallback: generate fake LEI if still missing
if company.lei.is_empty() {
company.lei = format!("FAKE{:019}", rand::random::<u64>());
println!("No real LEI found → using fake for {}", company.name);
}
}
}
pub struct ProcessResult {
pub changes: Vec<CompanyEventChange>,
}

View File

@@ -1,6 +1,8 @@
[
{
"isin": "US46625H1005",
"lei": "8I5D5ASD7N5Z5P2K9M3J",
"isins": ["US46625H1005"],
"primary_isin": "US46625H1005",
"name": "JPMorgan Chase & Co.",
"tickers": [
{ "ticker": "JPM", "exchange_mic": "XNYS", "currency": "USD", "primary": true },
@@ -8,14 +10,18 @@
]
},
{
"isin": "US5949181045",
"lei": "5493001KJTIIGC8Y1R12",
"isins": ["US5949181045"],
"primary_isin": "US5949181045",
"name": "Microsoft Corporation",
"tickers": [
{ "ticker": "MSFT", "exchange_mic": "XNAS", "currency": "USD", "primary": true }
]
},
{
"isin": "CNE000001P37",
"lei": "529900T8BM49AURSDO55",
"isins": ["CNE000001P37"],
"primary_isin": "CNE000001P37",
"name": "Industrial and Commercial Bank of China",
"tickers": [
{ "ticker": "601398.SS", "exchange_mic": "XSHG", "currency": "CNY", "primary": true },
@@ -23,7 +29,9 @@
]
},
{
"isin": "JP3702200000",
"lei": "519900X5W8K6C1FZ3B57",
"isins": ["JP3702200000"],
"primary_isin": "JP3702200000",
"name": "Toyota Motor Corporation",
"tickers": [
{ "ticker": "7203.T", "exchange_mic": "XJPX", "currency": "JPY", "primary": true },
@@ -31,11 +39,20 @@
]
},
{
"isin": "HK0000069689",
"lei": "529900T8BM49AURSDO56",
"isins": ["HK0000069689"],
"primary_isin": "HK0000069689",
"name": "Tencent Holdings Limited",
"tickers": [
{ "ticker": "0700.HK", "exchange_mic": "XHKG", "currency": "HKD", "primary": true },
{ "ticker": "TCEHY", "exchange_mic": "OTCM", "currency": "USD", "primary": false }
]
},
{
"lei": "8I5D5Q1L7N5Z5P2K9M3J",
"isins": ["US90953F1049"],
"primary_isin": "US90953F1049",
"name": "Test Bonds Filter",
"tickers": [{ "ticker": "JPM", "exchange_mic": "XNYS", "currency": "USD", "primary": true }]
}
]

View File

@@ -4,7 +4,8 @@ mod corporate;
mod config;
mod util;
use fantoccini::{ClientBuilder, Locator};
use fantoccini::{ClientBuilder};
use serde_json::{Map, Value};
use tokio::signal;
#[tokio::main]
@@ -17,11 +18,31 @@ async fn main() -> anyhow::Result<()> {
// === Start ChromeDriver ===
let mut child = std::process::Command::new("chromedriver-win64/chromedriver.exe")
.args(["--port=9515"])
.args(["--port=9515"]) // Level 3 = minimal logs
.spawn()?;
let client = ClientBuilder::native()
.connect("http://localhost:9515")
// Build capabilities to hide infobar + enable full rendering
let port = 9515;
let caps_value = serde_json::json!({
"goog:chromeOptions": {
"args": [
//"--headless",
"--disable-gpu",
"--disable-notifications",
"--disable-popup-blocking",
"--disable-blink-features=AutomationControlled"
],
"excludeSwitches": ["enable-automation"]
}
});
let caps_map: Map<String, Value> = caps_value.as_object()
.expect("Capabilities should be a JSON object")
.clone();
let mut client = ClientBuilder::native()
.capabilities(caps_map)
.connect(&format!("http://localhost:{}", port))
.await?;
// Graceful shutdown
@@ -39,7 +60,6 @@ async fn main() -> anyhow::Result<()> {
// === Corporate Earnings Update ===
println!("\nUpdating Corporate Earnings");
let tickers = config::get_tickers();
corporate::run_full_update(&client, &config).await?;
// === Cleanup ===