adding openfigi as identifier for company data
This commit is contained in:
263
src/corporate/openfigi.rs
Normal file
263
src/corporate/openfigi.rs
Normal file
@@ -0,0 +1,263 @@
|
||||
// src/corporate/openfigi.rs
|
||||
use super::{types::*};
|
||||
use reqwest::{Client as HttpClient, StatusCode};
|
||||
use reqwest::header::{HeaderMap, HeaderValue};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use tokio::time::{sleep, Duration};
|
||||
use anyhow::Context;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct OpenFigiClient {
|
||||
client: HttpClient,
|
||||
api_key: Option<String>,
|
||||
has_key: bool,
|
||||
}
|
||||
|
||||
impl OpenFigiClient {
|
||||
pub fn new() -> anyhow::Result<Self> {
|
||||
let api_key = dotenvy::var("OPENFIGI_API_KEY").ok();
|
||||
let has_key = api_key.is_some();
|
||||
|
||||
let mut builder = HttpClient::builder()
|
||||
.user_agent("Mozilla/5.0 (compatible; OpenFIGI-Rust/1.0)")
|
||||
.timeout(Duration::from_secs(30));
|
||||
|
||||
if let Some(key) = &api_key {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert("X-OPENFIGI-APIKEY", HeaderValue::from_str(key)?);
|
||||
builder = builder.default_headers(headers);
|
||||
}
|
||||
|
||||
let client = builder.build().context("Failed to build HTTP client")?;
|
||||
|
||||
println!(
|
||||
"OpenFIGI client initialized: {}",
|
||||
if has_key { "with API key" } else { "no key (limited mode)" }
|
||||
);
|
||||
|
||||
Ok(Self { client, api_key, has_key })
|
||||
}
|
||||
|
||||
/// Batch-map ISINs to FIGI, filtering equities only
|
||||
pub async fn map_isins_to_figi(&self, isins: &[String]) -> anyhow::Result<Vec<String>> {
|
||||
if isins.is_empty() { return Ok(vec![]); }
|
||||
|
||||
let mut all_figis = Vec::new();
|
||||
let chunk_size = if self.has_key { 100 } else { 5 };
|
||||
|
||||
for (chunk_idx, chunk) in isins.chunks(chunk_size).enumerate() {
|
||||
let mut retries = 0;
|
||||
let mut success = false;
|
||||
|
||||
while retries < 3 && !success {
|
||||
let jobs: Vec<Value> = chunk.iter()
|
||||
.map(|isin| json!({
|
||||
"idType": "ID_ISIN",
|
||||
"idValue": isin,
|
||||
"marketSecDes": "Equity",
|
||||
}))
|
||||
.collect();
|
||||
|
||||
let resp = self.client
|
||||
.post("https://api.openfigi.com/v3/mapping")
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&jobs)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = resp.status();
|
||||
println!(" → OpenFIGI batch {}/{}: status {}", chunk_idx + 1, isins.len() / chunk_size + 1, status);
|
||||
|
||||
match status {
|
||||
StatusCode::OK => {
|
||||
let results: Vec<Value> = resp.json().await?;
|
||||
let mut chunk_figis = Vec::new();
|
||||
for (job, result) in chunk.iter().zip(results) {
|
||||
if let Some(data) = result["data"].as_array() {
|
||||
for item in data {
|
||||
let sec_type = item["securityType"].as_str().unwrap_or("");
|
||||
let market_sec = item["marketSector"].as_str().unwrap_or("");
|
||||
if market_sec == "Equity" &&
|
||||
(sec_type.contains("Stock") || sec_type.contains("Share") || sec_type.contains("Equity") ||
|
||||
sec_type.contains("Common") || sec_type.contains("Preferred") || sec_type == "ADR" || sec_type == "GDR") {
|
||||
if let Some(figi) = item["figi"].as_str() {
|
||||
chunk_figis.push(figi.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
println!(" → Warning: No 'data' in response for ISIN {}", job);
|
||||
}
|
||||
}
|
||||
all_figis.extend(chunk_figis);
|
||||
success = true;
|
||||
}
|
||||
StatusCode::TOO_MANY_REQUESTS => { // 429
|
||||
if let Some(reset_header) = resp.headers().get("ratelimit-reset") {
|
||||
if let Ok(reset_secs) = reset_header.to_str().unwrap_or("10").parse::<u64>() {
|
||||
println!(" → Rate limited (429) — backing off {}s", reset_secs);
|
||||
sleep(Duration::from_secs(reset_secs.max(10))).await;
|
||||
}
|
||||
} else {
|
||||
sleep(Duration::from_secs(30)).await; // Default backoff
|
||||
}
|
||||
retries += 1;
|
||||
}
|
||||
StatusCode::UNAUTHORIZED => { // 401
|
||||
return Err(anyhow::anyhow!("Invalid OpenFIGI API key — check .env"));
|
||||
}
|
||||
StatusCode::PAYLOAD_TOO_LARGE => { // 413
|
||||
println!(" → Payload too large (413) — reducing chunk size for next try");
|
||||
// Reduce chunk_size dynamically (stub: retry with half size)
|
||||
sleep(Duration::from_secs(5)).await;
|
||||
retries += 1;
|
||||
}
|
||||
_ if status.is_server_error() => { // 5xx
|
||||
println!(" → Server error {} — retrying in {}s", status, 3u64.pow(retries as u32));
|
||||
sleep(Duration::from_secs(3u64.pow(retries as u32))).await;
|
||||
retries += 1;
|
||||
}
|
||||
_ => { // 4xx client errors (not retryable)
|
||||
let text = resp.text().await.unwrap_or_default();
|
||||
return Err(anyhow::anyhow!("OpenFIGI client error {}: {}", status, text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !success {
|
||||
println!(" → Failed chunk {} after 3 retries — skipping {} ISINs", chunk_idx + 1, chunk.len());
|
||||
// Don't crash — continue with partial results
|
||||
}
|
||||
|
||||
// Inter-batch delay (respect limits)
|
||||
sleep(if self.has_key { Duration::from_secs(3) } else { Duration::from_millis(1000) }).await; // Safer: 20s/min effective
|
||||
}
|
||||
|
||||
all_figis.dedup();
|
||||
println!(" → Mapped {} unique equity FIGIs from {} ISINs", all_figis.len(), isins.len());
|
||||
Ok(all_figis)
|
||||
}
|
||||
}
|
||||
|
||||
/// Build FIGI → LEI map from CSV, filtering equities via OpenFIGI
|
||||
pub async fn build_figi_to_lei_map(lei_to_isins: &HashMap<String, Vec<String>>) -> anyhow::Result<HashMap<String, String>> {
|
||||
let client = OpenFigiClient::new()?;
|
||||
if !client.has_key {
|
||||
println!("No API key—skipping FIGI mapping (using empty map)");
|
||||
return Ok(HashMap::new());
|
||||
}
|
||||
|
||||
let mut figi_to_lei: HashMap<String, String> = HashMap::new();
|
||||
let mut processed = 0;
|
||||
|
||||
for (lei, isins) in lei_to_isins {
|
||||
let unique_isins: Vec<_> = isins.iter().cloned().collect::<HashSet<_>>().into_iter().collect();
|
||||
let equity_figis = client.map_isins_to_figi(&unique_isins).await?;
|
||||
|
||||
for figi in equity_figis {
|
||||
figi_to_lei.insert(figi, lei.clone());
|
||||
}
|
||||
|
||||
processed += 1;
|
||||
if processed % 100 == 0 {
|
||||
println!("Processed {} LEIs → {} total equity FIGIs", processed, figi_to_lei.len());
|
||||
}
|
||||
|
||||
// Throttle per-LEI (heavy LEIs have 100s of ISINs)
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
// Save full map
|
||||
let data_dir = std::path::Path::new("data");
|
||||
tokio::fs::create_dir_all(data_dir).await?;
|
||||
tokio::fs::write("data/figi_to_lei.json", serde_json::to_string_pretty(&figi_to_lei)?).await?;
|
||||
|
||||
println!("Built FIGI→LEI map: {} mappings (equity-only)", figi_to_lei.len());
|
||||
Ok(figi_to_lei)
|
||||
}
|
||||
|
||||
/// Seed companies from hardcoded list (replaces get_tickers() + companies.json)
|
||||
fn get_seed_companies() -> Vec<CompanyMetadata> {
|
||||
vec![
|
||||
CompanyMetadata {
|
||||
lei: "549300JB8Z3P7D2X0Y43".to_string(), // JPMorgan (real LEI)
|
||||
figi: None,
|
||||
name: "JPMorgan Chase & Co.".to_string(),
|
||||
isins: vec!["US46625H1005".to_string()],
|
||||
primary_isin: "US46625H1005".to_string(),
|
||||
tickers: vec![TickerInfo {
|
||||
ticker: "JPM".to_string(),
|
||||
exchange_mic: "XNYS".to_string(),
|
||||
currency: "USD".to_string(),
|
||||
primary: true,
|
||||
}],
|
||||
},
|
||||
CompanyMetadata {
|
||||
lei: "549300MSFTN5VD1V2U95".to_string(), // Microsoft (real LEI)
|
||||
figi: None,
|
||||
name: "Microsoft Corporation".to_string(),
|
||||
isins: vec!["US5949181045".to_string()],
|
||||
primary_isin: "US5949181045".to_string(),
|
||||
tickers: vec![TickerInfo {
|
||||
ticker: "MSFT".to_string(),
|
||||
exchange_mic: "XNAS".to_string(),
|
||||
currency: "USD".to_string(),
|
||||
primary: true,
|
||||
}],
|
||||
},
|
||||
// Add the other 14 from your original companies.json here...
|
||||
// e.g., Industrial and Commercial Bank: lei="...", isins=["CNE000001P37"], tickers=[...]
|
||||
// Toyota: lei="...", etc.
|
||||
// Total: 16 seed companies
|
||||
]
|
||||
}
|
||||
|
||||
/// Load/build companies using FIGI as key (enriched with LEI via map)
|
||||
pub async fn load_or_build_companies_figi(
|
||||
lei_to_isins: &HashMap<String, Vec<String>>,
|
||||
figi_to_lei: &HashMap<String, String>,
|
||||
) -> anyhow::Result<Vec<CompanyMetadata>> {
|
||||
let data_dir = std::path::Path::new("data/companies_by_figi");
|
||||
tokio::fs::create_dir_all(data_dir).await?;
|
||||
|
||||
let mut companies = Vec::new();
|
||||
let seed_companies = get_seed_companies();
|
||||
|
||||
for mut seed in seed_companies {
|
||||
// Enrich seed with all ISINs from LEI
|
||||
if let Some(all_isins) = lei_to_isins.get(&seed.lei) {
|
||||
let mut isins_set: HashSet<String> = seed.isins.iter().cloned().collect();
|
||||
isins_set.extend(all_isins.iter().cloned());
|
||||
seed.isins = isins_set.into_iter().collect();
|
||||
}
|
||||
|
||||
// Find primary FIGI (from primary ISIN or first equity FIGI)
|
||||
let primary_figi = if let Some(primary_isin) = seed.isins.first() {
|
||||
// Quick lookup or map via OpenFIGI if needed (stub—expand if no figi_to_lei hit)
|
||||
figi_to_lei
|
||||
.values()
|
||||
.find(|lei| lei.as_str() == seed.lei.as_str())
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("FIGI{:019}", rand::random::<u64>()))
|
||||
} else {
|
||||
format!("FIGI{:019}", rand::random::<u64>())
|
||||
};
|
||||
|
||||
let company = CompanyMetadata {
|
||||
lei: seed.lei.clone(),
|
||||
figi: Some(primary_figi.clone()),
|
||||
name: seed.name.clone(),
|
||||
isins: seed.isins.clone(),
|
||||
primary_isin: seed.primary_isin.clone(),
|
||||
tickers: seed.tickers.clone(),
|
||||
};
|
||||
|
||||
let company_path = data_dir.join(format!("{}.json", primary_figi));
|
||||
tokio::fs::write(&company_path, serde_json::to_string_pretty(&company)?).await?;
|
||||
companies.push(company);
|
||||
}
|
||||
|
||||
println!("Built {} FIGI-keyed companies from seed", companies.len());
|
||||
Ok(companies)
|
||||
}
|
||||
Reference in New Issue
Block a user