added creating CompanyInfo mapping
This commit is contained in:
@@ -287,84 +287,6 @@ pub async fn build_lei_to_figi_infos(lei_to_isins: &HashMap<String, Vec<String>>
|
||||
Ok(lei_to_figis)
|
||||
}
|
||||
|
||||
/// Loads or builds the LEI-to-FigiInfo map, filtering for equities via OpenFIGI.
|
||||
///
|
||||
/// Attempts to load from "data/companies_by_lei/lei_to_figi.jsonl" (JSON Lines format, one LEI entry per line).
|
||||
/// For any missing LEIs (compared to `lei_to_isins`), fetches their FigiInfos and appends
|
||||
/// to the .jsonl file incrementally. This allows resumption after interruptions: on restart,
|
||||
/// already processed LEIs are skipped, and only missing ones are fetched.
|
||||
///
|
||||
/// If no API key is present, skips building and returns the loaded map (possibly partial).
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `lei_to_isins` - HashMap of LEI to Vec<ISIN> (used for building missing entries).
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// The complete (or partial if interrupted) HashMap<LEI, Vec<FigiInfo>>.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if file I/O fails, JSON serialization/deserialization fails,
|
||||
/// or if OpenFIGI queries fail during building.
|
||||
pub async fn load_or_build_lei_to_figi_infos(lei_to_isins: &HashMap<String, Vec<String>>) -> anyhow::Result<HashMap<String, Vec<FigiInfo>>> {
|
||||
let data_dir = Path::new("data");
|
||||
tokio_fs::create_dir_all(data_dir).await.context("Failed to create data directory")?;
|
||||
|
||||
let path = data_dir.join("lei_to_figi.jsonl");
|
||||
let mut lei_to_figis: HashMap<String, Vec<FigiInfo>> = load_lei_to_figi_jsonl(&path)?;
|
||||
|
||||
let client = OpenFigiClient::new()?;
|
||||
if !client.has_key {
|
||||
println!("No API key—using partial LEI→FIGI map with {} entries", lei_to_figis.len());
|
||||
return Ok(lei_to_figis);
|
||||
}
|
||||
|
||||
// Sort LEIs for deterministic processing order
|
||||
let mut leis: Vec<_> = lei_to_isins.keys().cloned().collect();
|
||||
leis.sort();
|
||||
|
||||
let mut processed = lei_to_figis.len();
|
||||
let total = leis.len();
|
||||
|
||||
for lei in leis {
|
||||
if lei_to_figis.contains_key(&lei) {
|
||||
continue; // Skip already processed
|
||||
}
|
||||
|
||||
let isins = match lei_to_isins.get(&lei) {
|
||||
Some(i) => i,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let unique_isins: Vec<_> = isins.iter().cloned().collect::<HashSet<_>>().into_iter().collect();
|
||||
let equity_figi_infos = client.map_isins_to_figi_infos(&unique_isins).await?;
|
||||
|
||||
let mut figis = equity_figi_infos;
|
||||
if !figis.is_empty() {
|
||||
figis.sort_by_key(|f| f.figi.clone());
|
||||
figis.dedup_by_key(|f| f.figi.clone());
|
||||
}
|
||||
|
||||
// Append to .jsonl
|
||||
append_lei_to_figi_jsonl(&path, &lei, &figis)?;
|
||||
|
||||
// Insert into in-memory map (optional, but useful for return value)
|
||||
lei_to_figis.insert(lei.clone(), figis);
|
||||
|
||||
processed += 1;
|
||||
if processed % 100 == 0 {
|
||||
println!("Processed {}/{} LEIs → {} total equity FIGIs", processed, total, lei_to_figis.values().map(|v| v.len()).sum::<usize>());
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
println!("Completed LEI→FIGI map: {} mappings (equity-only)", lei_to_figis.len());
|
||||
Ok(lei_to_figis)
|
||||
}
|
||||
|
||||
/// Loads LEI-to-FigiInfo map from a JSON Lines file.
|
||||
///
|
||||
/// Each line is expected to be a JSON object: {"lei": "ABC", "figis": [FigiInfo...]}
|
||||
@@ -436,60 +358,396 @@ fn append_lei_to_figi_jsonl(path: &Path, lei: &str, figis: &[FigiInfo]) -> anyho
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Loads or builds a list of CompanyMetadata using LEI as the primary key.
|
||||
/// Loads or builds a HashMap of CompanyInfo objects indexed by company name.
|
||||
///
|
||||
/// Attempts to load pre-built company metadata from "data/companies_by_lei/companies_lei.json".
|
||||
/// If the cache does not exist, builds the metadata by first obtaining the LEI-to-FigiInfo map
|
||||
/// (loading or fetching via OpenFIGI if necessary), then constructs CompanyMetadata for each LEI.
|
||||
/// This function:
|
||||
/// 1. Attempts to load existing companies from cache
|
||||
/// 2. If cache exists, updates/extends it with new data from figi_to_lei
|
||||
/// 3. If no cache exists, creates a new HashMap from scratch
|
||||
/// 4. Saves the result back to cache
|
||||
///
|
||||
/// Only includes LEIs that have associated ISINs from the input map. If no FigiInfos are available
|
||||
/// for a LEI (e.g., no equity listings), the `figi` field will be `None`.
|
||||
/// For existing entries (matched by name):
|
||||
/// - Merges securities lists (deduplicates by FIGI)
|
||||
/// - Updates primary_isin if the existing one is empty or not in the securities list
|
||||
///
|
||||
/// For new entries:
|
||||
/// - Adds them to the HashMap
|
||||
///
|
||||
/// Companies with no FigiInfo data are skipped.
|
||||
/// The resulting HashMap is saved to `data/companies_by_name/companies.json`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `lei_to_isins` - Mapping of LEI to associated ISINs (used for building the FigiInfo map if needed).
|
||||
/// * `figi_to_lei` - HashMap mapping LEI to Vec<FigiInfo>.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A vector of `CompanyMetadata` structs, sorted by LEI.
|
||||
/// A HashMap mapping company name to CompanyInfo.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if file I/O fails or JSON serialization fails.
|
||||
pub async fn load_or_build_companies_by_name(
|
||||
figi_to_lei: &HashMap<String, Vec<FigiInfo>>
|
||||
) -> anyhow::Result<HashMap<String, CompanyInfo>> {
|
||||
// Try to load existing cache
|
||||
let mut companies_by_name = match load_companies_by_name_internal().await? {
|
||||
Some(existing) => {
|
||||
println!("Loaded {} existing companies from cache", existing.len());
|
||||
existing
|
||||
},
|
||||
None => {
|
||||
println!("No existing cache found, creating new companies HashMap");
|
||||
HashMap::new()
|
||||
}
|
||||
};
|
||||
|
||||
let initial_count = companies_by_name.len();
|
||||
let mut added_count = 0;
|
||||
let mut updated_count = 0;
|
||||
|
||||
println!("Processing {} LEI entries from FIGI data...", figi_to_lei.len());
|
||||
|
||||
for (lei, figi_infos) in figi_to_lei.iter() {
|
||||
// Skip entries with no FigiInfo data
|
||||
if figi_infos.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get company name from first FigiInfo entry
|
||||
let name = figi_infos[0].name.clone();
|
||||
if name.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if company already exists
|
||||
if let Some(existing_company) = companies_by_name.get_mut(&name) {
|
||||
// Update existing entry
|
||||
let merged_securities = merge_securities(&existing_company.securities, figi_infos);
|
||||
let securities_added = merged_securities.len() - existing_company.securities.len();
|
||||
|
||||
if securities_added > 0 {
|
||||
existing_company.securities = merged_securities;
|
||||
|
||||
// Update primary_isin if needed
|
||||
if existing_company.primary_isin.is_empty() ||
|
||||
!existing_company.securities.iter().any(|s| s.isin == existing_company.primary_isin) {
|
||||
existing_company.primary_isin = existing_company.securities[0].isin.clone();
|
||||
}
|
||||
|
||||
updated_count += 1;
|
||||
}
|
||||
} else {
|
||||
// Add new entry
|
||||
let primary_isin = figi_infos[0].isin.clone();
|
||||
let securities = figi_infos.clone();
|
||||
|
||||
let company_info = CompanyInfo {
|
||||
name: name.clone(),
|
||||
primary_isin,
|
||||
securities,
|
||||
};
|
||||
|
||||
companies_by_name.insert(name, company_info);
|
||||
added_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
println!(" Companies statistics:");
|
||||
println!(" - Initial: {}", initial_count);
|
||||
println!(" - Added: {}", added_count);
|
||||
println!(" - Updated: {}", updated_count);
|
||||
println!(" - Total: {}", companies_by_name.len());
|
||||
|
||||
// Save to JSON
|
||||
save_companies_by_name(&companies_by_name).await?;
|
||||
|
||||
Ok(companies_by_name)
|
||||
}
|
||||
|
||||
/// Merges two lists of FigiInfo, deduplicating by FIGI.
|
||||
///
|
||||
/// Returns an error if file I/O fails, JSON serialization/deserialization fails,
|
||||
/// or if building the LEI-to-FigiInfo map encounters issues (e.g., API errors).
|
||||
pub async fn load_or_build_companies_lei(
|
||||
lei_to_isins: &HashMap<String, Vec<String>>,
|
||||
) -> anyhow::Result<Vec<CompanyMetadata>> {
|
||||
let cache_path = Path::new("data/companies_by_lei/companies_lei.json");
|
||||
|
||||
if cache_path.exists() {
|
||||
let content = tokio_fs::read_to_string(cache_path).await.context("Failed to read companies cache")?;
|
||||
let mut companies: Vec<CompanyMetadata> = serde_json::from_str(&content).context("Failed to parse companies JSON")?;
|
||||
companies.sort_by_key(|c| c.lei.clone());
|
||||
println!("Loaded {} LEI-keyed companies from cache.", companies.len());
|
||||
return Ok(companies);
|
||||
/// # Arguments
|
||||
/// * `existing` - Existing securities list
|
||||
/// * `new_securities` - New securities to merge
|
||||
///
|
||||
/// # Returns
|
||||
/// Merged and deduplicated list of FigiInfo
|
||||
fn merge_securities(existing: &[FigiInfo], new_securities: &[FigiInfo]) -> Vec<FigiInfo> {
|
||||
let mut merged = existing.to_vec();
|
||||
let existing_figis: HashSet<String> = existing.iter()
|
||||
.map(|f| f.figi.clone())
|
||||
.collect();
|
||||
|
||||
for new_sec in new_securities {
|
||||
if !existing_figis.contains(&new_sec.figi) {
|
||||
merged.push(new_sec.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by FIGI for consistency
|
||||
merged.sort_by(|a, b| a.figi.cmp(&b.figi));
|
||||
|
||||
merged
|
||||
}
|
||||
|
||||
// Build or load the LEI-to-FigiInfo map (with incremental persistence)
|
||||
let lei_to_figi = load_or_build_lei_to_figi_infos(lei_to_isins).await?;
|
||||
|
||||
// Build companies from all LEIs in lei_to_isins (even if no FigiInfos)
|
||||
let mut companies = Vec::new();
|
||||
for lei in lei_to_isins.keys() {
|
||||
let figis = lei_to_figi.get(lei).cloned();
|
||||
companies.push(CompanyMetadata {
|
||||
lei: lei.clone(),
|
||||
figi: figis.and_then(|v| if v.is_empty() { None } else { Some(v) }),
|
||||
});
|
||||
/// Internal function to load the companies HashMap from cache.
|
||||
///
|
||||
/// # Returns
|
||||
/// Some(HashMap) if the cache file exists and is valid, None otherwise.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if file I/O fails or JSON parsing fails.
|
||||
async fn load_companies_by_name_internal() -> anyhow::Result<Option<HashMap<String, CompanyInfo>>> {
|
||||
let cache_file = Path::new("data/companies_by_name/companies.json");
|
||||
|
||||
if !cache_file.exists() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let content = tokio_fs::read_to_string(cache_file).await
|
||||
.context("Failed to read companies.json")?;
|
||||
|
||||
let companies: HashMap<String, CompanyInfo> = serde_json::from_str(&content)
|
||||
.context("Failed to parse companies.json")?;
|
||||
|
||||
Ok(Some(companies))
|
||||
}
|
||||
|
||||
companies.sort_by_key(|c| c.lei.clone());
|
||||
/// Saves the companies HashMap to cache.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `companies` - The companies HashMap to save
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if file I/O fails or JSON serialization fails.
|
||||
async fn save_companies_by_name(companies: &HashMap<String, CompanyInfo>) -> anyhow::Result<()> {
|
||||
let cache_dir = Path::new("data/companies_by_name");
|
||||
tokio_fs::create_dir_all(cache_dir).await
|
||||
.context("Failed to create data/companies_by_name directory")?;
|
||||
|
||||
let cache_file = cache_dir.join("companies.json");
|
||||
let json_str = serde_json::to_string_pretty(&companies)
|
||||
.context("Failed to serialize companies to JSON")?;
|
||||
|
||||
tokio_fs::write(&cache_file, json_str).await
|
||||
.context("Failed to write companies.json")?;
|
||||
|
||||
println!(" ✓ Saved {} companies to {}", companies.len(), cache_file.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Cache the result
|
||||
let data_dir = Path::new("data");
|
||||
tokio_fs::create_dir_all(data_dir).await.context("Failed to create data directory")?;
|
||||
tokio_fs::write(cache_path, serde_json::to_string_pretty(&companies)?).await.context("Failed to write companies cache")?;
|
||||
/// Loads all OpenFIGI mapping value lists (marketSecDes, micCode, securityType).
|
||||
///
|
||||
/// This function fetches the available values for each mapping parameter from the OpenFIGI API
|
||||
/// and caches them as JSON files in `data/openfigi/`. If the files already exist and are recent
|
||||
/// (less than 30 days old), they are reused instead of re-fetching.
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if API requests fail, JSON parsing fails, or file I/O fails.
|
||||
pub async fn load_figi_type_lists() -> anyhow::Result<()> {
|
||||
println!("Loading OpenFIGI mapping value lists...");
|
||||
|
||||
let client = OpenFigiClient::new()?;
|
||||
|
||||
// Create cache directory
|
||||
let cache_dir = Path::new("data/openfigi");
|
||||
tokio_fs::create_dir_all(cache_dir).await
|
||||
.context("Failed to create data/openfigi directory")?;
|
||||
|
||||
// Fetch each type list
|
||||
get_figi_market_sec_des(&client, cache_dir).await?;
|
||||
get_figi_mic_code(&client, cache_dir).await?;
|
||||
get_figi_security_type(&client, cache_dir).await?;
|
||||
|
||||
println!("OpenFIGI mapping value lists loaded successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
println!("Built and cached {} LEI-keyed companies.", companies.len());
|
||||
Ok(companies)
|
||||
/// Fetches and caches the list of valid marketSecDes values.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `client` - The OpenFIGI client instance.
|
||||
/// * `cache_dir` - Directory to save the cached JSON file.
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the API request fails or file I/O fails.
|
||||
async fn get_figi_market_sec_des(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
|
||||
let cache_file = cache_dir.join("marketSecDes.json");
|
||||
|
||||
// Check if cache exists and is recent (< 30 days old)
|
||||
if should_use_cache(&cache_file).await? {
|
||||
println!(" Using cached marketSecDes values");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!(" Fetching marketSecDes values from OpenFIGI API...");
|
||||
|
||||
let resp = client.client
|
||||
.get("https://api.openfigi.com/v3/mapping/values/marketSecDes")
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to fetch marketSecDes values")?;
|
||||
|
||||
handle_rate_limit(&resp).await?;
|
||||
|
||||
let values: Value = resp.json().await
|
||||
.context("Failed to parse marketSecDes response")?;
|
||||
|
||||
// Save to cache
|
||||
let json_str = serde_json::to_string_pretty(&values)?;
|
||||
tokio_fs::write(&cache_file, json_str).await
|
||||
.context("Failed to write marketSecDes cache")?;
|
||||
|
||||
println!(" ✓ Cached marketSecDes values");
|
||||
|
||||
// Respect rate limits
|
||||
sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches and caches the list of valid micCode values.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `client` - The OpenFIGI client instance.
|
||||
/// * `cache_dir` - Directory to save the cached JSON file.
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the API request fails or file I/O fails.
|
||||
async fn get_figi_mic_code(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
|
||||
let cache_file = cache_dir.join("micCode.json");
|
||||
|
||||
if should_use_cache(&cache_file).await? {
|
||||
println!(" Using cached micCode values");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!(" Fetching micCode values from OpenFIGI API...");
|
||||
|
||||
let resp = client.client
|
||||
.get("https://api.openfigi.com/v3/mapping/values/micCode")
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to fetch micCode values")?;
|
||||
|
||||
handle_rate_limit(&resp).await?;
|
||||
|
||||
let values: Value = resp.json().await
|
||||
.context("Failed to parse micCode response")?;
|
||||
|
||||
let json_str = serde_json::to_string_pretty(&values)?;
|
||||
tokio_fs::write(&cache_file, json_str).await
|
||||
.context("Failed to write micCode cache")?;
|
||||
|
||||
println!(" ✓ Cached micCode values");
|
||||
|
||||
sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches and caches the list of valid securityType values.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `client` - The OpenFIGI client instance.
|
||||
/// * `cache_dir` - Directory to save the cached JSON file.
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the API request fails or file I/O fails.
|
||||
async fn get_figi_security_type(client: &OpenFigiClient, cache_dir: &Path) -> anyhow::Result<()> {
|
||||
let cache_file = cache_dir.join("securityType.json");
|
||||
|
||||
if should_use_cache(&cache_file).await? {
|
||||
println!(" Using cached securityType values");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!(" Fetching securityType values from OpenFIGI API...");
|
||||
|
||||
let resp = client.client
|
||||
.get("https://api.openfigi.com/v3/mapping/values/securityType")
|
||||
.send()
|
||||
.await
|
||||
.context("Failed to fetch securityType values")?;
|
||||
|
||||
handle_rate_limit(&resp).await?;
|
||||
|
||||
let values: Value = resp.json().await
|
||||
.context("Failed to parse securityType response")?;
|
||||
|
||||
let json_str = serde_json::to_string_pretty(&values)?;
|
||||
tokio_fs::write(&cache_file, json_str).await
|
||||
.context("Failed to write securityType cache")?;
|
||||
|
||||
println!(" ✓ Cached securityType values");
|
||||
|
||||
sleep(Duration::from_millis(if client.has_key { 240 } else { 2400 })).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Checks if a cache file exists and is less than 30 days old.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `path` - Path to the cache file.
|
||||
///
|
||||
/// # Returns
|
||||
/// True if the cache should be used, false if it needs refreshing.
|
||||
async fn should_use_cache(path: &Path) -> anyhow::Result<bool> {
|
||||
if !path.exists() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let metadata = tokio_fs::metadata(path).await?;
|
||||
let modified = metadata.modified()?;
|
||||
let age = modified.elapsed().unwrap_or(std::time::Duration::from_secs(u64::MAX));
|
||||
|
||||
// Cache is valid for 30 days
|
||||
Ok(age < std::time::Duration::from_secs(30 * 24 * 60 * 60))
|
||||
}
|
||||
|
||||
/// Handles rate limit responses from the OpenFIGI API.
|
||||
///
|
||||
/// If a 429 status is received, this function sleeps for the duration specified
|
||||
/// in the `ratelimit-reset` header (or 10 seconds by default).
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `resp` - The HTTP response to check.
|
||||
///
|
||||
/// # Returns
|
||||
/// Ok(()) if no rate limit, or after waiting for the reset period.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the response status indicates a non-rate-limit error.
|
||||
async fn handle_rate_limit(resp: &reqwest::Response) -> anyhow::Result<()> {
|
||||
let status = resp.status();
|
||||
|
||||
if status == 429 {
|
||||
let headers = resp.headers();
|
||||
let reset_sec = headers
|
||||
.get("ratelimit-reset")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|s| s.parse::<u64>().ok())
|
||||
.unwrap_or(10);
|
||||
|
||||
println!(" Rate limited—waiting {}s", reset_sec);
|
||||
sleep(std::time::Duration::from_secs(reset_sec.max(10))).await;
|
||||
|
||||
return Err(anyhow!("Rate limited, please retry"));
|
||||
} else if status.is_client_error() || status.is_server_error() {
|
||||
return Err(anyhow!("OpenFIGI API error: {}", status));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user