added bond extraction from figi
This commit is contained in:
@@ -2,7 +2,6 @@
|
|||||||
// Bond-specific processing logic for corporate and government bonds
|
// Bond-specific processing logic for corporate and government bonds
|
||||||
|
|
||||||
use super::types::*;
|
use super::types::*;
|
||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
/// Parse bond details from ticker and security description
|
/// Parse bond details from ticker and security description
|
||||||
///
|
///
|
||||||
@@ -235,164 +234,40 @@ pub fn classify_government_issuer(name: &str) -> String {
|
|||||||
"other".to_string()
|
"other".to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process corporate bonds from FIGI data
|
/// Classify government bond type based on security_type
|
||||||
/// Mirrors the pattern used for warrants/options
|
///
|
||||||
pub fn process_corporate_bonds(
|
/// Maps OpenFIGI security types to simplified bond categories for government bonds
|
||||||
figi_infos: &[FigiInfo],
|
///
|
||||||
existing_bonds: &mut HashMap<String, CorporateBondInfo>,
|
/// # Examples
|
||||||
) -> usize {
|
/// - "DOMESTIC" -> "domestic"
|
||||||
let mut new_count = 0;
|
/// - "GLOBAL" -> "global"
|
||||||
|
/// - "EURO NON-DOLLAR" -> "euro"
|
||||||
|
/// - "DOMESTIC MTN" -> "mtn"
|
||||||
|
pub fn classify_government_bond_type(security_type: &str) -> String {
|
||||||
|
let security_type_upper = security_type.to_uppercase();
|
||||||
|
|
||||||
// Group by issuer name
|
if security_type_upper.contains("GLOBAL") {
|
||||||
let mut by_issuer: HashMap<String, Vec<FigiInfo>> = HashMap::new();
|
return "global".to_string();
|
||||||
for figi in figi_infos {
|
|
||||||
by_issuer.entry(figi.name.clone()).or_default().push(figi.clone());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (issuer_name, figis) in by_issuer {
|
if security_type_upper.contains("EURO") {
|
||||||
let bond_info = existing_bonds
|
if security_type_upper.contains("NON-DOLLAR") || !security_type_upper.contains("DOLLAR") {
|
||||||
.entry(issuer_name.clone())
|
return "euro".to_string();
|
||||||
.or_insert_with(|| CorporateBondInfo {
|
|
||||||
issuer_name: issuer_name.clone(),
|
|
||||||
bonds: HashMap::new(),
|
|
||||||
bond_details: HashMap::new(),
|
|
||||||
});
|
|
||||||
|
|
||||||
for figi in figis {
|
|
||||||
// Group by ISIN
|
|
||||||
let isin_bonds = bond_info.bonds.entry(figi.isin.clone()).or_default();
|
|
||||||
|
|
||||||
// Check if this specific FIGI already exists
|
|
||||||
if !isin_bonds.iter().any(|f| f.figi == figi.figi) {
|
|
||||||
// Parse bond details
|
|
||||||
let details = parse_bond_details(&figi.ticker, &figi.security_description);
|
|
||||||
bond_info.bond_details.insert(figi.isin.clone(), details);
|
|
||||||
|
|
||||||
isin_bonds.push(figi);
|
|
||||||
new_count += 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return "eurodollar".to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
new_count
|
if security_type_upper.contains("YANKEE") {
|
||||||
}
|
return "yankee".to_string();
|
||||||
|
}
|
||||||
/// Process government bonds from FIGI data
|
|
||||||
/// Mirrors the pattern used for warrants/options
|
if security_type_upper.contains("MTN") {
|
||||||
pub fn process_government_bonds(
|
return "mtn".to_string();
|
||||||
figi_infos: &[FigiInfo],
|
}
|
||||||
existing_bonds: &mut HashMap<String, GovernmentBondInfo>,
|
|
||||||
) -> usize {
|
if security_type_upper.contains("DOMESTIC") {
|
||||||
let mut new_count = 0;
|
return "domestic".to_string();
|
||||||
|
}
|
||||||
// Group by issuer name
|
|
||||||
let mut by_issuer: HashMap<String, Vec<FigiInfo>> = HashMap::new();
|
"other".to_string()
|
||||||
for figi in figi_infos {
|
|
||||||
by_issuer.entry(figi.name.clone()).or_default().push(figi.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
for (issuer_name, figis) in by_issuer {
|
|
||||||
let issuer_type = classify_government_issuer(&issuer_name);
|
|
||||||
|
|
||||||
let bond_info = existing_bonds
|
|
||||||
.entry(issuer_name.clone())
|
|
||||||
.or_insert_with(|| GovernmentBondInfo {
|
|
||||||
issuer_name: issuer_name.clone(),
|
|
||||||
issuer_type: issuer_type.clone(),
|
|
||||||
bonds: HashMap::new(),
|
|
||||||
bond_details: HashMap::new(),
|
|
||||||
});
|
|
||||||
|
|
||||||
for figi in figis {
|
|
||||||
// Group by ISIN
|
|
||||||
let isin_bonds = bond_info.bonds.entry(figi.isin.clone()).or_default();
|
|
||||||
|
|
||||||
// Check if this specific FIGI already exists
|
|
||||||
if !isin_bonds.iter().any(|f| f.figi == figi.figi) {
|
|
||||||
// Parse bond details
|
|
||||||
let details = parse_bond_details(&figi.ticker, &figi.security_description);
|
|
||||||
bond_info.bond_details.insert(figi.isin.clone(), details);
|
|
||||||
|
|
||||||
isin_bonds.push(figi);
|
|
||||||
new_count += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
new_count
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_corporate_bond() {
|
|
||||||
let details = parse_bond_details(
|
|
||||||
"WTFC 4.3 01/12/26 0003",
|
|
||||||
"WTFC 4.3 01/12/26"
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(details.coupon_rate, Some(4.3));
|
|
||||||
assert_eq!(details.maturity_date, Some("2026-01-12".to_string()));
|
|
||||||
assert!(!details.is_floating);
|
|
||||||
assert!(!details.is_zero_coupon);
|
|
||||||
assert_eq!(details.series_identifier, Some("0003".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_government_bond() {
|
|
||||||
let details = parse_bond_details(
|
|
||||||
"SLOVAK 1.5225 05/10/28 4Y",
|
|
||||||
"SLOVAK 1.5225 05/10/28"
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(details.coupon_rate, Some(1.5225));
|
|
||||||
assert_eq!(details.maturity_date, Some("2028-05-10".to_string()));
|
|
||||||
assert!(!details.is_floating);
|
|
||||||
assert_eq!(details.series_identifier, Some("4Y".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_floating_rate() {
|
|
||||||
let details = parse_bond_details(
|
|
||||||
"SEK Float 06/30/34",
|
|
||||||
"SEK Float 06/30/34"
|
|
||||||
);
|
|
||||||
|
|
||||||
assert!(details.is_floating);
|
|
||||||
assert_eq!(details.maturity_date, Some("2034-06-30".to_string()));
|
|
||||||
assert_eq!(details.coupon_rate, None);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_fractional_coupon() {
|
|
||||||
let details = parse_bond_details(
|
|
||||||
"DANGCE 12 1/2 05/30/26 B",
|
|
||||||
"DANGCE 12 1/2 05/30/26"
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(details.coupon_rate, Some(12.5));
|
|
||||||
assert_eq!(details.maturity_date, Some("2026-05-30".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_parse_zero_coupon() {
|
|
||||||
let details = parse_bond_details(
|
|
||||||
"GGB 0 10/15/42",
|
|
||||||
"GGB 0 10/15/42"
|
|
||||||
);
|
|
||||||
|
|
||||||
assert_eq!(details.coupon_rate, Some(0.0));
|
|
||||||
assert!(details.is_zero_coupon);
|
|
||||||
assert_eq!(details.maturity_date, Some("2042-10-15".to_string()));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_classify_issuer_types() {
|
|
||||||
assert_eq!(classify_government_issuer("SLOVAK REPUBLIC"), "sovereign");
|
|
||||||
assert_eq!(classify_government_issuer("ASNES KOMMUNE"), "municipal");
|
|
||||||
assert_eq!(classify_government_issuer("SWEDISH EXPORT CREDIT"), "agency");
|
|
||||||
assert_eq!(classify_government_issuer("REGION OCCITANIE"), "state");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
@@ -145,7 +145,7 @@ pub struct BondDetails {
|
|||||||
/// ticker: "WTFC 4.3 01/12/26 0003"
|
/// ticker: "WTFC 4.3 01/12/26 0003"
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct CorporateBondInfo {
|
pub struct CorporateBondInfo {
|
||||||
pub issuer_name: String, // key - company name issuing the bond
|
pub underlying_company_name: String, // key - company name issuing the bond
|
||||||
pub bonds: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
|
pub bonds: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo> (grouped by ISIN)
|
||||||
#[serde(skip_serializing_if = "HashMap::is_empty", default)]
|
#[serde(skip_serializing_if = "HashMap::is_empty", default)]
|
||||||
pub bond_details: HashMap<String, BondDetails>, // ISIN -> parsed bond details
|
pub bond_details: HashMap<String, BondDetails>, // ISIN -> parsed bond details
|
||||||
|
|||||||
@@ -1,15 +1,14 @@
|
|||||||
// src/corporate/update_openfigi.rs - STREAMING VERSION
|
// src/corporate/update_openfigi.rs - STREAMING VERSION
|
||||||
// Key changes: Never load entire GLEIF CSV or FIGI maps into memory
|
// Key changes: Never load entire GLEIF CSV or FIGI maps into memory
|
||||||
|
use super::types::*;
|
||||||
|
use super::bond_processing::*;
|
||||||
use crate::util::directories::DataPaths;
|
use crate::util::directories::DataPaths;
|
||||||
use crate::util::integrity::{DataStage, StateManager, directory_reference};
|
use crate::util::integrity::{DataStage, StateManager, directory_reference};
|
||||||
use crate::util::logger;
|
use crate::util::logger;
|
||||||
use crate::scraper::openfigi::{OpenFigiClient};
|
use crate::scraper::openfigi::{OpenFigiClient};
|
||||||
use super::types::*;
|
|
||||||
use serde_json::{json, Value};
|
use serde_json::{json, Value};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::io::{BufRead, BufReader};
|
|
||||||
use tokio::fs as tokio_fs;
|
use tokio::fs as tokio_fs;
|
||||||
use tokio::io::AsyncWriteExt;
|
use tokio::io::AsyncWriteExt;
|
||||||
use anyhow::{Context, anyhow};
|
use anyhow::{Context, anyhow};
|
||||||
@@ -106,9 +105,7 @@ pub async fn update_securities(date_dir: &Path) -> anyhow::Result<()> {
|
|||||||
let step_name = "securities_data_complete";
|
let step_name = "securities_data_complete";
|
||||||
|
|
||||||
let data_dir = dir.data_dir();
|
let data_dir = dir.data_dir();
|
||||||
let corporate_data_dir = data_dir.join("corporate");
|
let output_dir = data_dir.join("figi_securities");
|
||||||
let economic_data_dir = data_dir.join("economic");
|
|
||||||
let output_dir = data_dir.join("by_name");
|
|
||||||
tokio_fs::create_dir_all(&output_dir).await
|
tokio_fs::create_dir_all(&output_dir).await
|
||||||
.context("Failed to create corporate/by_name directory")?;
|
.context("Failed to create corporate/by_name directory")?;
|
||||||
|
|
||||||
@@ -209,9 +206,13 @@ pub async fn update_securities(date_dir: &Path) -> anyhow::Result<()> {
|
|||||||
&common_log,
|
&common_log,
|
||||||
&warrants_log,
|
&warrants_log,
|
||||||
&options_log,
|
&options_log,
|
||||||
|
&corporate_bonds_log,
|
||||||
|
&government_bonds_log,
|
||||||
&mut existing_companies,
|
&mut existing_companies,
|
||||||
&mut existing_warrants,
|
&mut existing_warrants,
|
||||||
&mut existing_options,
|
&mut existing_options,
|
||||||
|
&mut existing_corporate_bonds,
|
||||||
|
&mut existing_government_bonds,
|
||||||
&mut stats,
|
&mut stats,
|
||||||
).await?;
|
).await?;
|
||||||
|
|
||||||
@@ -228,6 +229,8 @@ pub async fn update_securities(date_dir: &Path) -> anyhow::Result<()> {
|
|||||||
create_checkpoint(&common_checkpoint, &common_log).await?;
|
create_checkpoint(&common_checkpoint, &common_log).await?;
|
||||||
create_checkpoint(&warrants_checkpoint, &warrants_log).await?;
|
create_checkpoint(&warrants_checkpoint, &warrants_log).await?;
|
||||||
create_checkpoint(&options_checkpoint, &options_log).await?;
|
create_checkpoint(&options_checkpoint, &options_log).await?;
|
||||||
|
create_checkpoint(&corporate_bonds_checkpoint, &corporate_bonds_log).await?;
|
||||||
|
create_checkpoint(&government_bonds_checkpoint, &government_bonds_log).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
stats.print_summary();
|
stats.print_summary();
|
||||||
@@ -251,6 +254,8 @@ async fn track_securities_completion(
|
|||||||
"common_stocks.jsonl".to_string(),
|
"common_stocks.jsonl".to_string(),
|
||||||
"warrants.jsonl".to_string(),
|
"warrants.jsonl".to_string(),
|
||||||
"options.jsonl".to_string(),
|
"options.jsonl".to_string(),
|
||||||
|
"corporate_bonds.jsonl".to_string(),
|
||||||
|
"government_bonds.jsonl".to_string(),
|
||||||
]),
|
]),
|
||||||
Some(vec![
|
Some(vec![
|
||||||
"*.log.jsonl".to_string(), // Exclude log files
|
"*.log.jsonl".to_string(), // Exclude log files
|
||||||
@@ -495,9 +500,13 @@ async fn process_lei_figi_file_batched(
|
|||||||
common_log_path: &Path,
|
common_log_path: &Path,
|
||||||
warrants_log_path: &Path,
|
warrants_log_path: &Path,
|
||||||
options_log_path: &Path,
|
options_log_path: &Path,
|
||||||
|
corporate_bonds_log_path: &Path,
|
||||||
|
government_bonds_log_path: &Path,
|
||||||
existing_companies: &mut HashSet<String>,
|
existing_companies: &mut HashSet<String>,
|
||||||
existing_warrants: &mut HashSet<String>,
|
existing_warrants: &mut HashSet<String>,
|
||||||
existing_options: &mut HashSet<String>,
|
existing_options: &mut HashSet<String>,
|
||||||
|
existing_corporate_bonds: &mut HashSet<String>,
|
||||||
|
existing_government_bonds: &mut HashSet<String>,
|
||||||
stats: &mut StreamingStats,
|
stats: &mut StreamingStats,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let content = tokio_fs::read_to_string(input_path).await
|
let content = tokio_fs::read_to_string(input_path).await
|
||||||
@@ -506,9 +515,11 @@ async fn process_lei_figi_file_batched(
|
|||||||
let batch_size = 100;
|
let batch_size = 100;
|
||||||
let mut processed_count = 0;
|
let mut processed_count = 0;
|
||||||
|
|
||||||
let mut common_batch = Vec::new();
|
let mut common_batch: Vec<CompanyInfo> = Vec::new();
|
||||||
let mut warrants_batch = Vec::new();
|
let mut warrants_batch: Vec<WarrantInfo> = Vec::new();
|
||||||
let mut options_batch = Vec::new();
|
let mut options_batch: Vec<OptionInfo> = Vec::new();
|
||||||
|
let mut corporate_bonds_batch: Vec<CorporateBondInfo> = Vec::new();
|
||||||
|
let mut government_bonds_batch: Vec<GovernmentBondInfo> = Vec::new();
|
||||||
|
|
||||||
for (line_num, line) in content.lines().enumerate() {
|
for (line_num, line) in content.lines().enumerate() {
|
||||||
if line.trim().is_empty() {
|
if line.trim().is_empty() {
|
||||||
@@ -526,7 +537,7 @@ async fn process_lei_figi_file_batched(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Group by security type
|
// Group by security type
|
||||||
let (common_stocks, warrant_securities, option_securities) =
|
let (common_stocks, warrant_securities, option_securities, corporate_bonds_securities, government_bonds_securities) =
|
||||||
group_by_security_type(&figis);
|
group_by_security_type(&figis);
|
||||||
|
|
||||||
// Collect entries for batching and update existing keys
|
// Collect entries for batching and update existing keys
|
||||||
@@ -556,6 +567,24 @@ async fn process_lei_figi_file_batched(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !corporate_bonds_securities.is_empty() {
|
||||||
|
for entry in prepare_corporate_bond_entries(&corporate_bonds_securities, existing_corporate_bonds) {
|
||||||
|
// Use underlying_company_name as the key (not issuer_company_name)
|
||||||
|
let key = entry.underlying_company_name.clone();
|
||||||
|
existing_corporate_bonds.insert(key);
|
||||||
|
corporate_bonds_batch.push(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !government_bonds_securities.is_empty() {
|
||||||
|
for entry in prepare_government_bond_entries(&government_bonds_securities, existing_government_bonds) {
|
||||||
|
// Use issuer_name as the key (not issuer_country_name)
|
||||||
|
let key = entry.issuer_name.clone();
|
||||||
|
existing_government_bonds.insert(key);
|
||||||
|
government_bonds_batch.push(entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Write batches when they reach size limit
|
// Write batches when they reach size limit
|
||||||
if common_batch.len() >= batch_size {
|
if common_batch.len() >= batch_size {
|
||||||
write_batch_with_fsync(common_log_path, &common_batch).await?;
|
write_batch_with_fsync(common_log_path, &common_batch).await?;
|
||||||
@@ -575,6 +604,18 @@ async fn process_lei_figi_file_batched(
|
|||||||
options_batch.clear();
|
options_batch.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if corporate_bonds_batch.len() >= batch_size {
|
||||||
|
write_batch_with_fsync(corporate_bonds_log_path, &corporate_bonds_batch).await?;
|
||||||
|
stats.corporate_bonds_added += corporate_bonds_batch.len();
|
||||||
|
corporate_bonds_batch.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
if government_bonds_batch.len() >= batch_size {
|
||||||
|
write_batch_with_fsync(government_bonds_log_path, &government_bonds_batch).await?;
|
||||||
|
stats.government_bonds_added += government_bonds_batch.len();
|
||||||
|
government_bonds_batch.clear();
|
||||||
|
}
|
||||||
|
|
||||||
processed_count += 1;
|
processed_count += 1;
|
||||||
if processed_count % 1000 == 0 {
|
if processed_count % 1000 == 0 {
|
||||||
logger::log_info(&format!(" Processed {} LEI entries...", processed_count)).await;
|
logger::log_info(&format!(" Processed {} LEI entries...", processed_count)).await;
|
||||||
@@ -597,6 +638,16 @@ async fn process_lei_figi_file_batched(
|
|||||||
stats.options_added += options_batch.len();
|
stats.options_added += options_batch.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !corporate_bonds_batch.is_empty() {
|
||||||
|
write_batch_with_fsync(corporate_bonds_log_path, &corporate_bonds_batch).await?;
|
||||||
|
stats.corporate_bonds_added += corporate_bonds_batch.len();
|
||||||
|
}
|
||||||
|
|
||||||
|
if !government_bonds_batch.is_empty() {
|
||||||
|
write_batch_with_fsync(government_bonds_log_path, &government_bonds_batch).await?;
|
||||||
|
stats.government_bonds_added += government_bonds_batch.len();
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -719,11 +770,140 @@ fn prepare_option_entries(
|
|||||||
entries
|
entries
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Prepares corporate bond entries for batching
|
||||||
|
///
|
||||||
|
/// Groups corporate bonds by issuer (underlying_company_name), extracting key bond details
|
||||||
|
/// like coupon rate, maturity date, and tenor from the ticker/description for each ISIN.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `corporate_bond_securities` - List of FigiInfo objects for corporate bonds
|
||||||
|
/// * `existing_keys` - Set of already-processed keys (format: "company_name")
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// Vector of CorporateBondInfo entries, one per unique issuer
|
||||||
|
fn prepare_corporate_bond_entries(
|
||||||
|
corporate_bond_securities: &[FigiInfo],
|
||||||
|
existing_keys: &HashSet<String>,
|
||||||
|
) -> Vec<CorporateBondInfo> {
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
|
// Group bonds by issuer (company name)
|
||||||
|
let mut grouped: HashMap<String, Vec<FigiInfo>> = HashMap::new();
|
||||||
|
|
||||||
|
for figi in corporate_bond_securities {
|
||||||
|
let issuer = figi.name.clone();
|
||||||
|
if issuer.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
grouped.entry(issuer).or_default().push(figi.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create entries for each unique issuer
|
||||||
|
for (issuer, figis) in grouped {
|
||||||
|
// Check if this issuer already exists
|
||||||
|
if existing_keys.contains(&issuer) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Group by ISIN
|
||||||
|
let bonds_by_isin = group_figis_by_isin(&figis);
|
||||||
|
|
||||||
|
// Parse bond details for each ISIN
|
||||||
|
let mut bond_details_map: HashMap<String, BondDetails> = HashMap::new();
|
||||||
|
|
||||||
|
for (isin, isin_figis) in &bonds_by_isin {
|
||||||
|
if let Some(first_figi) = isin_figis.first() {
|
||||||
|
let details = parse_bond_details(&first_figi.ticker, &first_figi.security_description);
|
||||||
|
bond_details_map.insert(isin.clone(), details);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let bond_info = CorporateBondInfo {
|
||||||
|
underlying_company_name: issuer.clone(),
|
||||||
|
bonds: bonds_by_isin,
|
||||||
|
bond_details: bond_details_map,
|
||||||
|
};
|
||||||
|
|
||||||
|
entries.push(bond_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
entries
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prepares government bond entries for batching
|
||||||
|
///
|
||||||
|
/// Groups government bonds by issuer (country/entity), extracting key bond
|
||||||
|
/// details like coupon rate, maturity date, and tenor from the ticker/description for each ISIN.
|
||||||
|
/// Also classifies the government issuer type (sovereign, municipal, agency, etc.)
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `government_bond_securities` - List of FigiInfo objects for government bonds
|
||||||
|
/// * `existing_keys` - Set of already-processed keys (format: "issuer_name")
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// Vector of GovernmentBondInfo entries, one per unique issuer
|
||||||
|
fn prepare_government_bond_entries(
|
||||||
|
government_bond_securities: &[FigiInfo],
|
||||||
|
existing_keys: &HashSet<String>,
|
||||||
|
) -> Vec<GovernmentBondInfo> {
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
|
||||||
|
// Group bonds by issuer (country/entity name)
|
||||||
|
let mut grouped: HashMap<String, Vec<FigiInfo>> = HashMap::new();
|
||||||
|
|
||||||
|
for figi in government_bond_securities {
|
||||||
|
let issuer = figi.name.clone();
|
||||||
|
if issuer.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
grouped.entry(issuer).or_default().push(figi.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create entries for each unique issuer
|
||||||
|
for (issuer, figis) in grouped {
|
||||||
|
// Check if this issuer already exists
|
||||||
|
if existing_keys.contains(&issuer) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Classify the government issuer type
|
||||||
|
let issuer_type = classify_government_issuer(&issuer);
|
||||||
|
|
||||||
|
// Group by ISIN
|
||||||
|
let bonds_by_isin = group_figis_by_isin(&figis);
|
||||||
|
|
||||||
|
// Parse bond details for each ISIN
|
||||||
|
let mut bond_details_map: HashMap<String, BondDetails> = HashMap::new();
|
||||||
|
|
||||||
|
for (isin, isin_figis) in &bonds_by_isin {
|
||||||
|
if let Some(first_figi) = isin_figis.first() {
|
||||||
|
let details = parse_bond_details(&first_figi.ticker, &first_figi.security_description);
|
||||||
|
bond_details_map.insert(isin.clone(), details);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let bond_info = GovernmentBondInfo {
|
||||||
|
issuer_name: issuer.clone(),
|
||||||
|
issuer_type,
|
||||||
|
bonds: bonds_by_isin,
|
||||||
|
bond_details: bond_details_map,
|
||||||
|
};
|
||||||
|
|
||||||
|
entries.push(bond_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
entries
|
||||||
|
}
|
||||||
|
|
||||||
/// Groups FigiInfo list by security type
|
/// Groups FigiInfo list by security type
|
||||||
fn group_by_security_type(figis: &[FigiInfo]) -> (Vec<FigiInfo>, Vec<FigiInfo>, Vec<FigiInfo>) {
|
fn group_by_security_type(figis: &[FigiInfo]) -> (Vec<FigiInfo>, Vec<FigiInfo>, Vec<FigiInfo>, Vec<FigiInfo>, Vec<FigiInfo>) {
|
||||||
let mut common_stocks = Vec::new();
|
let mut common_stocks:Vec<FigiInfo> = Vec::new();
|
||||||
let mut warrants = Vec::new();
|
let mut warrants:Vec<FigiInfo> = Vec::new();
|
||||||
let mut options = Vec::new();
|
let mut options:Vec<FigiInfo> = Vec::new();
|
||||||
|
let mut corporate_bonds:Vec<FigiInfo> = Vec::new();
|
||||||
|
let mut government_bonds:Vec<FigiInfo> = Vec::new();
|
||||||
|
|
||||||
for figi in figis {
|
for figi in figis {
|
||||||
match figi.security_type.as_str() {
|
match figi.security_type.as_str() {
|
||||||
@@ -732,9 +912,14 @@ fn group_by_security_type(figis: &[FigiInfo]) -> (Vec<FigiInfo>, Vec<FigiInfo>,
|
|||||||
"Equity Option" => options.push(figi.clone()),
|
"Equity Option" => options.push(figi.clone()),
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
match figi.security_type2.as_str() {
|
||||||
|
"Corp" => corporate_bonds.push(figi.clone()),
|
||||||
|
"Govt" => government_bonds.push(figi.clone()),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(common_stocks, warrants, options)
|
(common_stocks, warrants, options, corporate_bonds, government_bonds)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Groups FigiInfo by ISIN
|
/// Groups FigiInfo by ISIN
|
||||||
@@ -1193,7 +1378,6 @@ pub async fn stream_gleif_csv_and_build_figi_filtered(
|
|||||||
setup_sector_directories(&date_dir, §or_dirs).await?;
|
setup_sector_directories(&date_dir, §or_dirs).await?;
|
||||||
|
|
||||||
let mut lei_batch: HashMap<String, Vec<String>> = HashMap::new();
|
let mut lei_batch: HashMap<String, Vec<String>> = HashMap::new();
|
||||||
let mut line_count = 0;
|
|
||||||
let mut processed_leis = 0;
|
let mut processed_leis = 0;
|
||||||
let mut skipped_leis = 0;
|
let mut skipped_leis = 0;
|
||||||
|
|
||||||
@@ -1219,7 +1403,6 @@ pub async fn stream_gleif_csv_and_build_figi_filtered(
|
|||||||
}
|
}
|
||||||
|
|
||||||
lei_batch.entry(lei).or_default().push(isin);
|
lei_batch.entry(lei).or_default().push(isin);
|
||||||
line_count += 1;
|
|
||||||
|
|
||||||
// Process batch when full
|
// Process batch when full
|
||||||
if lei_batch.len() >= LEI_BATCH_SIZE {
|
if lei_batch.len() >= LEI_BATCH_SIZE {
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ use chrono::{NaiveDate, Datelike};
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use serde_json;
|
use serde_json;
|
||||||
|
|
||||||
const CHUNK_SIZE: usize = 500; // Process 500 events at a time
|
|
||||||
const MAX_EVENTS_PER_FILE: usize = 3000;
|
const MAX_EVENTS_PER_FILE: usize = 3000;
|
||||||
|
|
||||||
pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
|
pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
|
||||||
|
|||||||
Reference in New Issue
Block a user