adding openfigi as identifier for company data

This commit is contained in:
2025-11-25 22:18:52 +01:00
parent e57a013224
commit eeae94e041
13 changed files with 608 additions and 139 deletions

263
src/corporate/openfigi.rs Normal file
View File

@@ -0,0 +1,263 @@
// src/corporate/openfigi.rs
use super::{types::*};
use reqwest::{Client as HttpClient, StatusCode};
use reqwest::header::{HeaderMap, HeaderValue};
use serde_json::{json, Value};
use std::collections::{HashMap, HashSet};
use tokio::time::{sleep, Duration};
use anyhow::Context;
#[derive(Clone)]
pub struct OpenFigiClient {
client: HttpClient,
api_key: Option<String>,
has_key: bool,
}
impl OpenFigiClient {
pub fn new() -> anyhow::Result<Self> {
let api_key = dotenvy::var("OPENFIGI_API_KEY").ok();
let has_key = api_key.is_some();
let mut builder = HttpClient::builder()
.user_agent("Mozilla/5.0 (compatible; OpenFIGI-Rust/1.0)")
.timeout(Duration::from_secs(30));
if let Some(key) = &api_key {
let mut headers = HeaderMap::new();
headers.insert("X-OPENFIGI-APIKEY", HeaderValue::from_str(key)?);
builder = builder.default_headers(headers);
}
let client = builder.build().context("Failed to build HTTP client")?;
println!(
"OpenFIGI client initialized: {}",
if has_key { "with API key" } else { "no key (limited mode)" }
);
Ok(Self { client, api_key, has_key })
}
/// Batch-map ISINs to FIGI, filtering equities only
pub async fn map_isins_to_figi(&self, isins: &[String]) -> anyhow::Result<Vec<String>> {
if isins.is_empty() { return Ok(vec![]); }
let mut all_figis = Vec::new();
let chunk_size = if self.has_key { 100 } else { 5 };
for (chunk_idx, chunk) in isins.chunks(chunk_size).enumerate() {
let mut retries = 0;
let mut success = false;
while retries < 3 && !success {
let jobs: Vec<Value> = chunk.iter()
.map(|isin| json!({
"idType": "ID_ISIN",
"idValue": isin,
"marketSecDes": "Equity",
}))
.collect();
let resp = self.client
.post("https://api.openfigi.com/v3/mapping")
.header("Content-Type", "application/json")
.json(&jobs)
.send()
.await?;
let status = resp.status();
println!(" → OpenFIGI batch {}/{}: status {}", chunk_idx + 1, isins.len() / chunk_size + 1, status);
match status {
StatusCode::OK => {
let results: Vec<Value> = resp.json().await?;
let mut chunk_figis = Vec::new();
for (job, result) in chunk.iter().zip(results) {
if let Some(data) = result["data"].as_array() {
for item in data {
let sec_type = item["securityType"].as_str().unwrap_or("");
let market_sec = item["marketSector"].as_str().unwrap_or("");
if market_sec == "Equity" &&
(sec_type.contains("Stock") || sec_type.contains("Share") || sec_type.contains("Equity") ||
sec_type.contains("Common") || sec_type.contains("Preferred") || sec_type == "ADR" || sec_type == "GDR") {
if let Some(figi) = item["figi"].as_str() {
chunk_figis.push(figi.to_string());
}
}
}
} else {
println!(" → Warning: No 'data' in response for ISIN {}", job);
}
}
all_figis.extend(chunk_figis);
success = true;
}
StatusCode::TOO_MANY_REQUESTS => { // 429
if let Some(reset_header) = resp.headers().get("ratelimit-reset") {
if let Ok(reset_secs) = reset_header.to_str().unwrap_or("10").parse::<u64>() {
println!(" → Rate limited (429) — backing off {}s", reset_secs);
sleep(Duration::from_secs(reset_secs.max(10))).await;
}
} else {
sleep(Duration::from_secs(30)).await; // Default backoff
}
retries += 1;
}
StatusCode::UNAUTHORIZED => { // 401
return Err(anyhow::anyhow!("Invalid OpenFIGI API key — check .env"));
}
StatusCode::PAYLOAD_TOO_LARGE => { // 413
println!(" → Payload too large (413) — reducing chunk size for next try");
// Reduce chunk_size dynamically (stub: retry with half size)
sleep(Duration::from_secs(5)).await;
retries += 1;
}
_ if status.is_server_error() => { // 5xx
println!(" → Server error {} — retrying in {}s", status, 3u64.pow(retries as u32));
sleep(Duration::from_secs(3u64.pow(retries as u32))).await;
retries += 1;
}
_ => { // 4xx client errors (not retryable)
let text = resp.text().await.unwrap_or_default();
return Err(anyhow::anyhow!("OpenFIGI client error {}: {}", status, text));
}
}
}
if !success {
println!(" → Failed chunk {} after 3 retries — skipping {} ISINs", chunk_idx + 1, chunk.len());
// Don't crash — continue with partial results
}
// Inter-batch delay (respect limits)
sleep(if self.has_key { Duration::from_secs(3) } else { Duration::from_millis(1000) }).await; // Safer: 20s/min effective
}
all_figis.dedup();
println!(" → Mapped {} unique equity FIGIs from {} ISINs", all_figis.len(), isins.len());
Ok(all_figis)
}
}
/// Build FIGI → LEI map from CSV, filtering equities via OpenFIGI
pub async fn build_figi_to_lei_map(lei_to_isins: &HashMap<String, Vec<String>>) -> anyhow::Result<HashMap<String, String>> {
let client = OpenFigiClient::new()?;
if !client.has_key {
println!("No API key—skipping FIGI mapping (using empty map)");
return Ok(HashMap::new());
}
let mut figi_to_lei: HashMap<String, String> = HashMap::new();
let mut processed = 0;
for (lei, isins) in lei_to_isins {
let unique_isins: Vec<_> = isins.iter().cloned().collect::<HashSet<_>>().into_iter().collect();
let equity_figis = client.map_isins_to_figi(&unique_isins).await?;
for figi in equity_figis {
figi_to_lei.insert(figi, lei.clone());
}
processed += 1;
if processed % 100 == 0 {
println!("Processed {} LEIs → {} total equity FIGIs", processed, figi_to_lei.len());
}
// Throttle per-LEI (heavy LEIs have 100s of ISINs)
sleep(Duration::from_millis(100)).await;
}
// Save full map
let data_dir = std::path::Path::new("data");
tokio::fs::create_dir_all(data_dir).await?;
tokio::fs::write("data/figi_to_lei.json", serde_json::to_string_pretty(&figi_to_lei)?).await?;
println!("Built FIGI→LEI map: {} mappings (equity-only)", figi_to_lei.len());
Ok(figi_to_lei)
}
/// Seed companies from hardcoded list (replaces get_tickers() + companies.json)
fn get_seed_companies() -> Vec<CompanyMetadata> {
vec![
CompanyMetadata {
lei: "549300JB8Z3P7D2X0Y43".to_string(), // JPMorgan (real LEI)
figi: None,
name: "JPMorgan Chase & Co.".to_string(),
isins: vec!["US46625H1005".to_string()],
primary_isin: "US46625H1005".to_string(),
tickers: vec![TickerInfo {
ticker: "JPM".to_string(),
exchange_mic: "XNYS".to_string(),
currency: "USD".to_string(),
primary: true,
}],
},
CompanyMetadata {
lei: "549300MSFTN5VD1V2U95".to_string(), // Microsoft (real LEI)
figi: None,
name: "Microsoft Corporation".to_string(),
isins: vec!["US5949181045".to_string()],
primary_isin: "US5949181045".to_string(),
tickers: vec![TickerInfo {
ticker: "MSFT".to_string(),
exchange_mic: "XNAS".to_string(),
currency: "USD".to_string(),
primary: true,
}],
},
// Add the other 14 from your original companies.json here...
// e.g., Industrial and Commercial Bank: lei="...", isins=["CNE000001P37"], tickers=[...]
// Toyota: lei="...", etc.
// Total: 16 seed companies
]
}
/// Load/build companies using FIGI as key (enriched with LEI via map)
pub async fn load_or_build_companies_figi(
lei_to_isins: &HashMap<String, Vec<String>>,
figi_to_lei: &HashMap<String, String>,
) -> anyhow::Result<Vec<CompanyMetadata>> {
let data_dir = std::path::Path::new("data/companies_by_figi");
tokio::fs::create_dir_all(data_dir).await?;
let mut companies = Vec::new();
let seed_companies = get_seed_companies();
for mut seed in seed_companies {
// Enrich seed with all ISINs from LEI
if let Some(all_isins) = lei_to_isins.get(&seed.lei) {
let mut isins_set: HashSet<String> = seed.isins.iter().cloned().collect();
isins_set.extend(all_isins.iter().cloned());
seed.isins = isins_set.into_iter().collect();
}
// Find primary FIGI (from primary ISIN or first equity FIGI)
let primary_figi = if let Some(primary_isin) = seed.isins.first() {
// Quick lookup or map via OpenFIGI if needed (stub—expand if no figi_to_lei hit)
figi_to_lei
.values()
.find(|lei| lei.as_str() == seed.lei.as_str())
.cloned()
.unwrap_or_else(|| format!("FIGI{:019}", rand::random::<u64>()))
} else {
format!("FIGI{:019}", rand::random::<u64>())
};
let company = CompanyMetadata {
lei: seed.lei.clone(),
figi: Some(primary_figi.clone()),
name: seed.name.clone(),
isins: seed.isins.clone(),
primary_isin: seed.primary_isin.clone(),
tickers: seed.tickers.clone(),
};
let company_path = data_dir.join(format!("{}.json", primary_figi));
tokio::fs::write(&company_path, serde_json::to_string_pretty(&company)?).await?;
companies.push(company);
}
println!("Built {} FIGI-keyed companies from seed", companies.len());
Ok(companies)
}