Files
WebScraper/src/corporate/bond_processing.rs
2026-01-12 01:01:19 +01:00

398 lines
13 KiB
Rust

// src/corporate/bond_processing.rs
// Bond-specific processing logic for corporate and government bonds
use super::types::*;
use std::collections::HashMap;
/// Parse bond details from ticker and security description
///
/// Examples:
/// - "WTFC 4.3 01/12/26 0003" -> coupon: 4.3, maturity: 2026-01-12
/// - "SLOVAK 1.5225 05/10/28 4Y" -> coupon: 1.5225, maturity: 2028-05-10
/// - "SEK Float 06/30/34" -> floating rate, maturity: 2034-06-30
/// - "GGB 0 10/15/42" -> zero coupon, maturity: 2042-10-15
pub fn parse_bond_details(ticker: &str, security_description: &str) -> BondDetails {
let mut details = BondDetails {
coupon_rate: None,
maturity_date: None,
is_floating: false,
is_zero_coupon: false,
tenor_years: None,
series_identifier: None,
};
// Check for floating rate - look for "Float", " F ", "V0" patterns
if ticker.contains("Float") || ticker.contains(" F ") || ticker.contains(" V0 ")
|| security_description.contains("Float") {
details.is_floating = true;
}
// Parse coupon rate if not floating
if !details.is_floating {
if let Some(coupon) = extract_coupon_rate(ticker, security_description) {
details.coupon_rate = Some(coupon);
details.is_zero_coupon = coupon == 0.0;
}
}
// Parse maturity date
if let Some(maturity) = extract_maturity_date(ticker, security_description) {
details.maturity_date = Some(maturity.clone());
// Calculate tenor (simplified - just extract year)
if let Some(year_str) = maturity.split('-').next() {
if let Ok(mat_year) = year_str.parse::<i32>() {
let current_year = 2026; // From system prompt
let years_to_maturity = (mat_year - current_year).max(0) as u32;
details.tenor_years = Some(years_to_maturity);
}
}
}
// Extract series identifier
details.series_identifier = extract_series_identifier(ticker);
details
}
/// Extract coupon rate from ticker/description
/// Handles: "4.3", "1.5225", "12 1/2" (fractional), "0"
fn extract_coupon_rate(ticker: &str, description: &str) -> Option<f64> {
let text = format!("{} {}", ticker, description);
// Pattern 1: Fractional rates like "12 1/2" -> 12.5
if let Some(frac_result) = parse_fractional_coupon(&text) {
return Some(frac_result);
}
// Pattern 2: Decimal rates like "4.3" or "1.5225"
// Look for number followed by space and date pattern
let parts: Vec<&str> = text.split_whitespace().collect();
for i in 0..parts.len() {
if let Ok(rate) = parts[i].parse::<f64>() {
// Sanity check: coupon rates are typically 0-20%
if rate >= 0.0 && rate <= 20.0 {
// Make sure it's before a date-like pattern
if i + 1 < parts.len() {
let next = parts[i + 1];
if next.contains('/') || next.len() >= 8 {
return Some(rate);
}
}
}
}
}
None
}
/// Parse fractional coupon like "12 1/2" -> 12.5
fn parse_fractional_coupon(text: &str) -> Option<f64> {
let parts: Vec<&str> = text.split_whitespace().collect();
for i in 0..parts.len().saturating_sub(1) {
// Check if current part is a number
if let Ok(whole) = parts[i].parse::<f64>() {
// Check if next part is a fraction like "1/2"
if let Some(slash_pos) = parts[i + 1].find('/') {
let frac_str = parts[i + 1];
let num_str = &frac_str[..slash_pos];
let den_str = &frac_str[slash_pos + 1..];
if let (Ok(num), Ok(den)) = (num_str.parse::<f64>(), den_str.parse::<f64>()) {
if den != 0.0 {
return Some(whole + num / den);
}
}
}
}
}
None
}
/// Extract maturity date from ticker/description
/// Handles: "01/12/26", "05/10/28", "06/30/2034"
fn extract_maturity_date(ticker: &str, description: &str) -> Option<String> {
let text = format!("{} {}", ticker, description);
// Look for MM/DD/YY or MM/DD/YYYY patterns
let parts: Vec<&str> = text.split_whitespace().collect();
for part in parts {
if let Some(date) = parse_date_pattern(part) {
return Some(date);
}
}
None
}
/// Parse various date formats to YYYY-MM-DD
fn parse_date_pattern(s: &str) -> Option<String> {
let slash_count = s.matches('/').count();
if slash_count != 2 {
return None;
}
let parts: Vec<&str> = s.split('/').collect();
if parts.len() != 3 {
return None;
}
let month = parts[0];
let day = parts[1];
let year_part = parts[2];
// Parse year - could be 2 or 4 digits
let year = if year_part.len() == 2 {
if let Ok(yy) = year_part.parse::<u32>() {
// Assume 20xx for values <= 50, 19xx for > 50
if yy <= 50 {
format!("{}", 2000 + yy)
} else {
format!("{}", 1900 + yy)
}
} else {
return None;
}
} else if year_part.len() == 4 {
year_part.to_string()
} else {
return None;
};
// Validate month and day
if let (Ok(m), Ok(d)) = (month.parse::<u32>(), day.parse::<u32>()) {
if m >= 1 && m <= 12 && d >= 1 && d <= 31 {
return Some(format!("{}-{:02}-{:02}", year, m, d));
}
}
None
}
/// Extract series identifier (tokens after the date)
/// Examples: "0003", "4Y", "144A", "REGS", "MTN", "PSI", "CD"
fn extract_series_identifier(ticker: &str) -> Option<String> {
let parts: Vec<&str> = ticker.split_whitespace().collect();
// Look for date pattern, then take what comes after
for i in 0..parts.len() {
if parts[i].contains('/') && parts[i].matches('/').count() == 2 {
// Found date, check if there's something after
if i + 1 < parts.len() {
return Some(parts[i + 1].to_string());
}
}
}
None
}
/// Classify government issuer type
pub fn classify_government_issuer(name: &str) -> String {
let name_lower = name.to_lowercase();
// Sovereign nations
if name_lower.contains("republic")
|| name_lower.contains("kingdom")
|| name_lower.contains("federal republic")
|| name_lower.ends_with(" govt")
|| name_lower.ends_with(" government")
|| name_lower.contains("hellenic") // Greece
|| name_lower.contains("slovak") {
return "sovereign".to_string();
}
// Municipalities (Norwegian communes, cities, etc.)
if name_lower.contains("kommune")
|| name_lower.contains("municipality")
|| name_lower.contains("city of")
|| name_lower.contains("town of")
|| name_lower.contains("county council") {
return "municipal".to_string();
}
// States/Provinces/Regions
if name_lower.contains("state of")
|| name_lower.contains("province")
|| name_lower.contains("region")
|| name_lower.contains("county") {
return "state".to_string();
}
// Government agencies/entities
if name_lower.contains("export credit")
|| name_lower.contains("development bank")
|| name_lower.contains("housing")
|| name_lower.contains("akademiska")
|| name_lower.contains("byggdastofnun") {
return "agency".to_string();
}
"other".to_string()
}
/// Process corporate bonds from FIGI data
/// Mirrors the pattern used for warrants/options
pub fn process_corporate_bonds(
figi_infos: &[FigiInfo],
existing_bonds: &mut HashMap<String, CorporateBondInfo>,
) -> usize {
let mut new_count = 0;
// Group by issuer name
let mut by_issuer: HashMap<String, Vec<FigiInfo>> = HashMap::new();
for figi in figi_infos {
by_issuer.entry(figi.name.clone()).or_default().push(figi.clone());
}
for (issuer_name, figis) in by_issuer {
let bond_info = existing_bonds
.entry(issuer_name.clone())
.or_insert_with(|| CorporateBondInfo {
issuer_name: issuer_name.clone(),
bonds: HashMap::new(),
bond_details: HashMap::new(),
});
for figi in figis {
// Group by ISIN
let isin_bonds = bond_info.bonds.entry(figi.isin.clone()).or_default();
// Check if this specific FIGI already exists
if !isin_bonds.iter().any(|f| f.figi == figi.figi) {
// Parse bond details
let details = parse_bond_details(&figi.ticker, &figi.security_description);
bond_info.bond_details.insert(figi.isin.clone(), details);
isin_bonds.push(figi);
new_count += 1;
}
}
}
new_count
}
/// Process government bonds from FIGI data
/// Mirrors the pattern used for warrants/options
pub fn process_government_bonds(
figi_infos: &[FigiInfo],
existing_bonds: &mut HashMap<String, GovernmentBondInfo>,
) -> usize {
let mut new_count = 0;
// Group by issuer name
let mut by_issuer: HashMap<String, Vec<FigiInfo>> = HashMap::new();
for figi in figi_infos {
by_issuer.entry(figi.name.clone()).or_default().push(figi.clone());
}
for (issuer_name, figis) in by_issuer {
let issuer_type = classify_government_issuer(&issuer_name);
let bond_info = existing_bonds
.entry(issuer_name.clone())
.or_insert_with(|| GovernmentBondInfo {
issuer_name: issuer_name.clone(),
issuer_type: issuer_type.clone(),
bonds: HashMap::new(),
bond_details: HashMap::new(),
});
for figi in figis {
// Group by ISIN
let isin_bonds = bond_info.bonds.entry(figi.isin.clone()).or_default();
// Check if this specific FIGI already exists
if !isin_bonds.iter().any(|f| f.figi == figi.figi) {
// Parse bond details
let details = parse_bond_details(&figi.ticker, &figi.security_description);
bond_info.bond_details.insert(figi.isin.clone(), details);
isin_bonds.push(figi);
new_count += 1;
}
}
}
new_count
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_corporate_bond() {
let details = parse_bond_details(
"WTFC 4.3 01/12/26 0003",
"WTFC 4.3 01/12/26"
);
assert_eq!(details.coupon_rate, Some(4.3));
assert_eq!(details.maturity_date, Some("2026-01-12".to_string()));
assert!(!details.is_floating);
assert!(!details.is_zero_coupon);
assert_eq!(details.series_identifier, Some("0003".to_string()));
}
#[test]
fn test_parse_government_bond() {
let details = parse_bond_details(
"SLOVAK 1.5225 05/10/28 4Y",
"SLOVAK 1.5225 05/10/28"
);
assert_eq!(details.coupon_rate, Some(1.5225));
assert_eq!(details.maturity_date, Some("2028-05-10".to_string()));
assert!(!details.is_floating);
assert_eq!(details.series_identifier, Some("4Y".to_string()));
}
#[test]
fn test_parse_floating_rate() {
let details = parse_bond_details(
"SEK Float 06/30/34",
"SEK Float 06/30/34"
);
assert!(details.is_floating);
assert_eq!(details.maturity_date, Some("2034-06-30".to_string()));
assert_eq!(details.coupon_rate, None);
}
#[test]
fn test_parse_fractional_coupon() {
let details = parse_bond_details(
"DANGCE 12 1/2 05/30/26 B",
"DANGCE 12 1/2 05/30/26"
);
assert_eq!(details.coupon_rate, Some(12.5));
assert_eq!(details.maturity_date, Some("2026-05-30".to_string()));
}
#[test]
fn test_parse_zero_coupon() {
let details = parse_bond_details(
"GGB 0 10/15/42",
"GGB 0 10/15/42"
);
assert_eq!(details.coupon_rate, Some(0.0));
assert!(details.is_zero_coupon);
assert_eq!(details.maturity_date, Some("2042-10-15".to_string()));
}
#[test]
fn test_classify_issuer_types() {
assert_eq!(classify_government_issuer("SLOVAK REPUBLIC"), "sovereign");
assert_eq!(classify_government_issuer("ASNES KOMMUNE"), "municipal");
assert_eq!(classify_government_issuer("SWEDISH EXPORT CREDIT"), "agency");
assert_eq!(classify_government_issuer("REGION OCCITANIE"), "state");
}
}