From b0a471ea84778f5ccf2f9a64c887cc1846bb18f7 Mon Sep 17 00:00:00 2001 From: donpat1to Date: Thu, 4 Dec 2025 21:03:55 +0100 Subject: [PATCH] added mapping figi info onto common shares / warrants / options --- fx_rates.json | 46 ---- src/corporate/openfigi.rs | 491 +++++++++++++++++++++++++++++--------- src/corporate/scraper.rs | 4 +- src/corporate/types.rs | 35 ++- src/corporate/update.rs | 36 +-- src/data/companies.json | 58 ----- src/data/continents.json | 9 - src/data/countries.json | 54 ----- src/data/exchanges.json | 260 -------------------- src/data/index.txt | 6 - src/economic/scraper.rs | 4 +- src/economic/storage.rs | 4 +- src/util.rs | 2 +- 13 files changed, 417 insertions(+), 592 deletions(-) delete mode 100644 fx_rates.json delete mode 100644 src/data/companies.json delete mode 100644 src/data/continents.json delete mode 100644 src/data/countries.json delete mode 100644 src/data/exchanges.json delete mode 100644 src/data/index.txt diff --git a/fx_rates.json b/fx_rates.json deleted file mode 100644 index 41431ca..0000000 --- a/fx_rates.json +++ /dev/null @@ -1,46 +0,0 @@ -{ - "CHF": [ - 0.808996035919424, - "2025-11-25" - ], - "JPY": [ - 0.0064, - "2025-11-25" - ], - "INR": [ - 89.28571428571429, - "2025-11-25" - ], - "GBp": [ - 0.7603406326034063, - "2025-11-25" - ], - "AUD": [ - 1.5463120457708364, - "2025-11-25" - ], - "SAR": [ - 3.750937734433609, - "2025-11-25" - ], - "TWD": [ - 31.446540880503143, - "2025-11-25" - ], - "CNY": [ - 7.087172218284904, - "2025-11-25" - ], - "HKD": [ - 7.776049766718508, - "2025-11-25" - ], - "CAD": [ - 1.4110342881332016, - "2025-11-25" - ], - "EUR": [ - 0.8649022660439372, - "2025-11-25" - ] -} \ No newline at end of file diff --git a/src/corporate/openfigi.rs b/src/corporate/openfigi.rs index 20e98fe..22fd1c4 100644 --- a/src/corporate/openfigi.rs +++ b/src/corporate/openfigi.rs @@ -209,7 +209,7 @@ impl OpenFigiClient { /// Builds a LEI-to-FigiInfo map from the LEI-ISIN mapping, filtering for equities via OpenFIGI. /// -/// Attempts to load existing entries from "data/companies_by_lei/lei_to_figi.jsonl" (JSON Lines format, +/// Attempts to load existing entries from "data/corporate/by_lei/lei_to_figi.jsonl" (JSON Lines format, /// one LEI entry per line: {"lei": "ABC", "figis": [FigiInfo...]}). For any missing LEIs (compared to /// `lei_to_isins`), fetches their FigiInfos and appends to the .jsonl file incrementally. /// @@ -231,7 +231,7 @@ impl OpenFigiClient { /// Returns an error if file I/O fails, JSON serialization/deserialization fails, /// or if OpenFIGI queries fail during fetching. pub async fn build_lei_to_figi_infos(lei_to_isins: &HashMap>) -> anyhow::Result>> { - let data_dir = Path::new("data/companies_by_lei"); + let data_dir = Path::new("data/corporate/by_lei"); tokio_fs::create_dir_all(data_dir).await.context("Failed to create data directory")?; let path = data_dir.join("lei_to_figi.jsonl"); @@ -358,127 +358,305 @@ fn append_lei_to_figi_jsonl(path: &Path, lei: &str, figis: &[FigiInfo]) -> anyho Ok(()) } -/// Loads or builds a HashMap of CompanyInfo objects indexed by company name. +/// Loads or builds HashMaps for companies, warrants, and options. /// /// This function: -/// 1. Attempts to load existing companies from cache -/// 2. If cache exists, updates/extends it with new data from figi_to_lei -/// 3. If no cache exists, creates a new HashMap from scratch -/// 4. Saves the result back to cache -/// -/// For existing entries (matched by name): -/// - Merges securities lists (deduplicates by FIGI) -/// - Updates primary_isin if the existing one is empty or not in the securities list -/// -/// For new entries: -/// - Adds them to the HashMap -/// -/// Companies with no FigiInfo data are skipped. -/// The resulting HashMap is saved to `data/companies_by_name/companies.json`. +/// 1. Attempts to load existing data from cache +/// 2. Processes new FIGI data and classifies by securityType: +/// - "Common Stock" → companies HashMap (grouped by ISIN) +/// - "Equity WRT" → warrants HashMap (parsed from name) +/// - "Equity Option" → options HashMap (parsed from name) +/// 3. Updates/extends existing entries +/// 4. Saves results to separate JSON files /// /// # Arguments /// * `figi_to_lei` - HashMap mapping LEI to Vec. /// /// # Returns -/// A HashMap mapping company name to CompanyInfo. +/// A tuple of (companies, warrants, options) HashMaps. /// /// # Errors /// Returns an error if file I/O fails or JSON serialization fails. -pub async fn load_or_build_companies_by_name( +pub async fn load_or_build_all_securities( figi_to_lei: &HashMap> -) -> anyhow::Result> { - // Try to load existing cache - let mut companies_by_name = match load_companies_by_name_internal().await? { - Some(existing) => { - println!("Loaded {} existing companies from cache", existing.len()); - existing - }, - None => { - println!("No existing cache found, creating new companies HashMap"); - HashMap::new() - } - }; +) -> anyhow::Result<( + HashMap, + HashMap>, + HashMap> +)> { + // Load existing data + let mut companies = load_from_cache("data/corporate/by_name/common_stocks.json").await? + .unwrap_or_else(HashMap::new); + let mut warrants = load_from_cache("data/corporate/by_name/warrants.json").await? + .unwrap_or_else(HashMap::new); + let mut options = load_from_cache("data/corporate/by_name/options.json").await? + .unwrap_or_else(HashMap::new); - let initial_count = companies_by_name.len(); - let mut added_count = 0; - let mut updated_count = 0; + println!("Loaded existing data:"); + println!(" - Companies: {}", companies.len()); + println!(" - Warrants: {}", warrants.len()); + println!(" - Options: {}", options.len()); + + let mut stats = ProcessingStats::new(companies.len(), warrants.len(), options.len()); println!("Processing {} LEI entries from FIGI data...", figi_to_lei.len()); for (lei, figi_infos) in figi_to_lei.iter() { - // Skip entries with no FigiInfo data if figi_infos.is_empty() { continue; } - // Get company name from first FigiInfo entry - let name = figi_infos[0].name.clone(); - if name.is_empty() { - continue; + // Group FigiInfos by security type + let mut common_stocks = Vec::new(); + let mut warrant_securities = Vec::new(); + let mut option_securities = Vec::new(); + + for figi_info in figi_infos { + match figi_info.securityType.as_str() { + "Common Stock" => common_stocks.push(figi_info.clone()), + "Equity WRT" => warrant_securities.push(figi_info.clone()), + "Equity Option" => option_securities.push(figi_info.clone()), + _ => {} // Ignore other types + } } - // Check if company already exists - if let Some(existing_company) = companies_by_name.get_mut(&name) { - // Update existing entry - let merged_securities = merge_securities(&existing_company.securities, figi_infos); - let securities_added = merged_securities.len() - existing_company.securities.len(); - - if securities_added > 0 { - existing_company.securities = merged_securities; - - // Update primary_isin if needed - if existing_company.primary_isin.is_empty() || - !existing_company.securities.iter().any(|s| s.isin == existing_company.primary_isin) { - existing_company.primary_isin = existing_company.securities[0].isin.clone(); - } - - updated_count += 1; - } - } else { - // Add new entry - let primary_isin = figi_infos[0].isin.clone(); - let securities = figi_infos.clone(); - - let company_info = CompanyInfo { - name: name.clone(), - primary_isin, - securities, - }; - - companies_by_name.insert(name, company_info); - added_count += 1; + // Process common stocks -> companies + if !common_stocks.is_empty() { + process_common_stocks(&mut companies, &common_stocks, &mut stats); + } + + // Process warrants + if !warrant_securities.is_empty() { + process_warrants(&mut warrants, &warrant_securities, &mut stats); + } + + // Process options + if !option_securities.is_empty() { + process_options(&mut options, &option_securities, &mut stats); } } - println!(" Companies statistics:"); - println!(" - Initial: {}", initial_count); - println!(" - Added: {}", added_count); - println!(" - Updated: {}", updated_count); - println!(" - Total: {}", companies_by_name.len()); + stats.print_summary(companies.len(), warrants.len(), options.len()); - // Save to JSON - save_companies_by_name(&companies_by_name).await?; + // Save all three HashMaps + save_to_cache("data/corporate/by_name/common_stocks.json", &companies).await?; + save_to_cache("data/corporate/by_name/warrants.json", &warrants).await?; + save_to_cache("data/corporate/by_name/options.json", &options).await?; - Ok(companies_by_name) + Ok((companies, warrants, options)) } -/// Merges two lists of FigiInfo, deduplicating by FIGI. -/// -/// # Arguments -/// * `existing` - Existing securities list -/// * `new_securities` - New securities to merge -/// -/// # Returns -/// Merged and deduplicated list of FigiInfo -fn merge_securities(existing: &[FigiInfo], new_securities: &[FigiInfo]) -> Vec { +/// Statistics tracker for processing +#[derive(Debug)] +struct ProcessingStats { + initial_companies: usize, + initial_warrants: usize, + initial_options: usize, + companies_added: usize, + companies_updated: usize, + warrants_added: usize, + warrants_updated: usize, + options_added: usize, + options_updated: usize, +} + +impl ProcessingStats { + fn new(companies: usize, warrants: usize, options: usize) -> Self { + Self { + initial_companies: companies, + initial_warrants: warrants, + initial_options: options, + companies_added: 0, + companies_updated: 0, + warrants_added: 0, + warrants_updated: 0, + options_added: 0, + options_updated: 0, + } + } + + fn print_summary(&self, final_companies: usize, final_warrants: usize, final_options: usize) { + println!("\n=== Processing Statistics ==="); + println!("Companies:"); + println!(" - Initial: {}", self.initial_companies); + println!(" - Added: {}", self.companies_added); + println!(" - Updated: {}", self.companies_updated); + println!(" - Total: {}", final_companies); + println!("Warrants:"); + println!(" - Initial: {}", self.initial_warrants); + println!(" - Added: {}", self.warrants_added); + println!(" - Updated: {}", self.warrants_updated); + println!(" - Total: {}", final_warrants); + println!("Options:"); + println!(" - Initial: {}", self.initial_options); + println!(" - Added: {}", self.options_added); + println!(" - Updated: {}", self.options_updated); + println!(" - Total: {}", final_options); + } +} + +/// Process common stocks into companies HashMap +fn process_common_stocks( + companies: &mut HashMap, + figi_infos: &[FigiInfo], + stats: &mut ProcessingStats, +) { + let name = figi_infos[0].name.clone(); + if name.is_empty() { + return; + } + + // Group by ISIN + let grouped_by_isin = group_by_isin(figi_infos); + + if let Some(existing) = companies.get_mut(&name) { + // Update existing company + let mut updated = false; + for (isin, new_figis) in grouped_by_isin { + if let Some(existing_figis) = existing.securities.get_mut(&isin) { + let merged = merge_figi_list(existing_figis, &new_figis); + if merged.len() > existing_figis.len() { + *existing_figis = merged; + updated = true; + } + } else { + existing.securities.insert(isin.clone(), new_figis); + updated = true; + } + } + + // Update primary ISIN if needed + if existing.primary_isin.is_empty() || !existing.securities.contains_key(&existing.primary_isin) { + if let Some(first_isin) = existing.securities.keys().next() { + existing.primary_isin = first_isin.clone(); + } + } + + if updated { + stats.companies_updated += 1; + } + } else { + // Add new company + let primary_isin = grouped_by_isin.keys().next().cloned().unwrap_or_default(); + + companies.insert(name.clone(), CompanyInfo { + name, + primary_isin, + securities: grouped_by_isin, + }); + + stats.companies_added += 1; + } +} + +/// Process warrants into warrants HashMap +fn process_warrants( + warrants: &mut HashMap>, + warrant_securities: &[FigiInfo], + stats: &mut ProcessingStats, // Assuming Stats is a struct; adjust based on actual type if it's a HashMap or other +) { + for figi in warrant_securities.iter() { + // Parse the name to extract underlying, issuer, and warrant_type + // (Assuming a parse_warrant_name function exists; this is not changed) + let (underlying, issuer, warrant_type) = parse_warrant_name(&figi.name); + + if underlying.is_empty() { + continue; + } + + // Outer map: key by underlying + let underlying_map = warrants + .entry(underlying.clone()) + .or_insert_with(HashMap::new); + + // Inner map: key by warrant_type + let entry = underlying_map.entry(warrant_type.clone()).or_insert(WarrantInfo { + underlying_company_name: underlying.clone(), + issuer_company_name: issuer, + warrant_type: warrant_type.clone(), + warrants: HashMap::new(), + }); + + // Group by ISIN as before + entry + .warrants + .entry(figi.isin.clone()) + .or_insert_with(Vec::new) + .push(figi.clone()); + + // Update stats (assuming stats has a 'warrants' field; adjust if needed) + stats.warrants_added += 1; + } +} + +/// Process options into options HashMap +fn process_options( + options: &mut HashMap>, + option_securities: &[FigiInfo], + stats: &mut ProcessingStats, // Assuming Stats is a struct; adjust based on actual type if it's a HashMap or other +) { + for figi in option_securities.iter() { + // Parse the name to extract underlying, issuer, and option_type + // (Assuming a parse_option_name function exists; this is not changed) + let (underlying, issuer, option_type) = parse_option_name(&figi.name); + + if underlying.is_empty() { + continue; + } + + // Outer map: key by underlying + let underlying_map = options + .entry(underlying.clone()) + .or_insert_with(HashMap::new); + + // Inner map: key by option_type + let entry = underlying_map.entry(option_type.clone()).or_insert(OptionInfo { + underlying_company_name: underlying.clone(), + issuer_company_name: issuer, + option_type: option_type.clone(), + options: HashMap::new(), + }); + + // Group by ISIN as before + entry + .options + .entry(figi.isin.clone()) + .or_insert_with(Vec::new) + .push(figi.clone()); + + // Update stats (assuming stats has an 'options' field; adjust if needed) + stats.options_added += 1; + } +} + +/// Groups FigiInfo list by ISIN +fn group_by_isin(figi_infos: &[FigiInfo]) -> HashMap> { + let mut grouped: HashMap> = HashMap::new(); + + for figi_info in figi_infos { + grouped.entry(figi_info.isin.clone()) + .or_insert_with(Vec::new) + .push(figi_info.clone()); + } + + // Sort each group by FIGI for consistency + for figis in grouped.values_mut() { + figis.sort_by(|a, b| a.figi.cmp(&b.figi)); + } + + grouped +} + +/// Merges two FigiInfo lists, deduplicating by FIGI +fn merge_figi_list(existing: &[FigiInfo], new_figis: &[FigiInfo]) -> Vec { let mut merged = existing.to_vec(); let existing_figis: HashSet = existing.iter() .map(|f| f.figi.clone()) .collect(); - for new_sec in new_securities { - if !existing_figis.contains(&new_sec.figi) { - merged.push(new_sec.clone()); + for new_figi in new_figis { + if !existing_figis.contains(&new_figi.figi) { + merged.push(new_figi.clone()); } } @@ -488,49 +666,126 @@ fn merge_securities(existing: &[FigiInfo], new_securities: &[FigiInfo]) -> Vec anyhow::Result>> { - let cache_file = Path::new("data/companies_by_name/companies.json"); +/// Parse warrant name to extract underlying company, issuer, and warrant type +/// +/// Examples: +/// - "VONTOBE-PW26 LEONARDO SPA" -> ("LEONARDO SPA", Some("VONTOBEL"), "put") +/// - "BAYER H-CW25 L'OREAL" -> ("L'OREAL", Some("BAYER H"), "call") +/// - "APPLE INC WARRANT" -> ("APPLE INC", None, "unknown") +fn parse_warrant_name(name: &str) -> (String, Option, String) { + let name_upper = name.to_uppercase(); + + // Try to detect warrant type from code (PW=put, CW=call) + let warrant_type = if name_upper.contains("-PW") || name_upper.contains(" PW") { + "put".to_string() + } else if name_upper.contains("-CW") || name_upper.contains(" CW") { + "call".to_string() + } else { + "unknown".to_string() + }; + + // Try to split by warrant code pattern (e.g., "-PW26", "-CW25") + if let Some(pos) = name.find("-PW") { + let before = name[..pos].trim(); + let after_idx = name[pos..].find(' ').map(|i| pos + i + 1).unwrap_or(name.len()); + let after = if after_idx < name.len() { + name[after_idx..].trim() + } else { + "" + }; + + return ( + after.to_string(), + if !before.is_empty() { Some(before.to_string()) } else { None }, + warrant_type, + ); + } + + if let Some(pos) = name.find("-CW") { + let before = name[..pos].trim(); + let after_idx = name[pos..].find(' ').map(|i| pos + i + 1).unwrap_or(name.len()); + let after = if after_idx < name.len() { + name[after_idx..].trim() + } else { + "" + }; + + return ( + after.to_string(), + if !before.is_empty() { Some(before.to_string()) } else { None }, + warrant_type, + ); + } + + // Fallback: return entire name as underlying + (name.to_string(), None, warrant_type) +} + +/// Parse option name to extract underlying company, issuer, and option type +/// +/// Examples: +/// - "December 25 Calls on ALPHA GA" -> ("ALPHA GA", None, "call") +/// - "January 26 Puts on TESLA INC" -> ("TESLA INC", None, "put") +fn parse_option_name(name: &str) -> (String, Option, String) { + let name_upper = name.to_uppercase(); + + // Detect option type + let option_type = if name_upper.contains("CALL") { + "call".to_string() + } else if name_upper.contains("PUT") { + "put".to_string() + } else { + "unknown".to_string() + }; + + // Try to extract underlying after "on" + if let Some(pos) = name_upper.find(" ON ") { + let underlying = name[pos + 4..].trim().to_string(); + return (underlying, None, option_type); + } + + // Fallback: return entire name + (name.to_string(), None, option_type) +} + +/// Generic function to load from cache +async fn load_from_cache(path: &str) -> anyhow::Result> +where + T: serde::de::DeserializeOwned, +{ + let cache_file = Path::new(path); if !cache_file.exists() { return Ok(None); } let content = tokio_fs::read_to_string(cache_file).await - .context("Failed to read companies.json")?; + .context(format!("Failed to read {}", path))?; - let companies: HashMap = serde_json::from_str(&content) - .context("Failed to parse companies.json")?; + let data: T = serde_json::from_str(&content) + .context(format!("Failed to parse {}", path))?; - Ok(Some(companies)) + Ok(Some(data)) } -/// Saves the companies HashMap to cache. -/// -/// # Arguments -/// * `companies` - The companies HashMap to save -/// -/// # Errors -/// Returns an error if file I/O fails or JSON serialization fails. -async fn save_companies_by_name(companies: &HashMap) -> anyhow::Result<()> { - let cache_dir = Path::new("data/companies_by_name"); +/// Generic function to save to cache +async fn save_to_cache(path: &str, data: &T) -> anyhow::Result<()> +where + T: serde::Serialize, +{ + let cache_path = Path::new(path); + let cache_dir = cache_path.parent().context("Invalid cache path")?; + tokio_fs::create_dir_all(cache_dir).await - .context("Failed to create data/companies_by_name directory")?; + .context(format!("Failed to create directory for {}", path))?; - let cache_file = cache_dir.join("companies.json"); - let json_str = serde_json::to_string_pretty(&companies) - .context("Failed to serialize companies to JSON")?; + let json_str = serde_json::to_string_pretty(data) + .context("Failed to serialize data")?; - tokio_fs::write(&cache_file, json_str).await - .context("Failed to write companies.json")?; + tokio_fs::write(cache_path, json_str).await + .context(format!("Failed to write {}", path))?; - println!(" ✓ Saved {} companies to {}", companies.len(), cache_file.display()); + println!(" ✓ Saved to {}", path); Ok(()) } diff --git a/src/corporate/scraper.rs b/src/corporate/scraper.rs index a0d2b8c..d89946d 100644 --- a/src/corporate/scraper.rs +++ b/src/corporate/scraper.rs @@ -670,8 +670,8 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow pub async fn download_isin_lei_csv() -> anyhow::Result> { let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download"; - let zip_path = "data/isin_lei.zip"; - let csv_path = "data/isin_lei.csv"; + let zip_path = "data/gleif/isin_lei.zip"; + let csv_path = "data/gleif/isin_lei.csv"; if let Err(e) = std::fs::create_dir_all("data") { println!("Failed to create data directory: {e}"); diff --git a/src/corporate/types.rs b/src/corporate/types.rs index 5387575..3ced6ba 100644 --- a/src/corporate/types.rs +++ b/src/corporate/types.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; + // src/corporate/types.rs use serde::{Deserialize, Serialize}; @@ -75,11 +77,42 @@ pub struct CompanyMetadata { /// # Attributes /// * Name as primary key (for one instition) -> might have to changed when first FigiInfo is coming in /// * ISIN as the most liquid / preferred traded security (used for fallback) +/// * securities: Grouped by ISIN, filtered for Common Stock only #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CompanyInfo{ pub name: String, pub primary_isin: String, - pub securities: Vec, + pub securities: HashMap>, // ISIN -> Vec +} + + +/// Warrant Info +/// +/// Information for Warrant securities fetched out of Name in FigiInfo +/// example1: "name": "VONTOBE-PW26 LEONARDO SPA", +/// issued by VONTOBEL Put Warrant for underlying company LEONARDO SPA +/// example2: "BAYER H-CW25 L'OREAL", +/// other formats like only on company instead of two, underlying and issuing company are the same, leave issuer_company_name NULL +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WarrantInfo { + pub underlying_company_name: String, // key in CompanyInfo, key for WarrantInfo + pub issuer_company_name: Option, // key in CompanyInfo + pub warrant_type: String, // "put" or "call" + pub warrants: HashMap>, // ISIN -> Vec (grouped by ISIN) +} + +/// Option Info +/// +/// Information for Option securities fetched out of Name in FigiInfo +/// example1: "name": "December 25 Calls on ALPHA GA", +/// issued by NULL Call Option for underlying company ALPHA GA +/// other formats like only on company instead of two, underlying and issuing company are the same, leave issuer_company_name NULL +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OptionInfo { + pub underlying_company_name: String, // key in CompanyInfo, key for OptionInfo + pub issuer_company_name: Option, // key in CompanyInfo + pub option_type: String, // "put" or "call" + pub options: HashMap>, // ISIN -> Vec (grouped by ISIN) } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/src/corporate/update.rs b/src/corporate/update.rs index e202cf7..e34f294 100644 --- a/src/corporate/update.rs +++ b/src/corporate/update.rs @@ -4,9 +4,8 @@ use crate::config::Config; use crate::scraper::webdriver::ChromeDriverPool; use chrono::Local; -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap}; use std::sync::Arc; -use futures::{stream::{self, StreamExt}}; /// Main function: Full update for all companies (LEI-based) with optimized parallel execution. /// @@ -54,8 +53,8 @@ pub async fn run_full_update(config: &Config, pool: &Arc) -> a }; // 4. Load or build companies - let mut companies = load_or_build_companies_by_name(&figi_to_lei).await?; - println!("Processing {} companies", companies.len()); + let mut companies = load_or_build_all_securities(&figi_to_lei).await?; + println!("Processing {} companies", companies.0.len()); // 5. Load existing earnings events (for change detection) let today = Local::now().format("%Y-%m-%d").to_string(); @@ -96,35 +95,6 @@ pub async fn run_full_update(config: &Config, pool: &Arc) -> a Ok(()) } -async fn assign_leis_from_figi( - companies: &mut [CompanyMetadata], - lei_to_isins: &HashMap> -) -> anyhow::Result<()> { - for company in companies { - let figi_infos = company.figi.as_ref().map_or(&[][..], |v| &v[..]); - let isins: Vec = figi_infos - .iter() - .map(|f| f.isin.clone()) - .collect::>() - .into_iter() - .collect(); - - // Try to find LEI by any known ISIN - for isin in &isins { - for (lei, isins) in lei_to_isins.iter() { - if isins.contains(isin) { - company.lei = lei.clone(); - let name = figi_infos.first().map(|f| f.name.as_str()).unwrap_or("Unknown"); - println!("Found real LEI {} for {}", lei, name); - break; - } - } - if !company.lei.is_empty() { break; } - } - } - Ok(()) -} - pub struct ProcessResult { pub changes: Vec, } diff --git a/src/data/companies.json b/src/data/companies.json deleted file mode 100644 index 4d848ff..0000000 --- a/src/data/companies.json +++ /dev/null @@ -1,58 +0,0 @@ -[ - { - "lei": "8I5D5ASD7N5Z5P2K9M3J", - "isins": ["US46625H1005"], - "primary_isin": "US46625H1005", - "name": "JPMorgan Chase & Co.", - "tickers": [ - { "ticker": "JPM", "exchange_mic": "XNYS", "currency": "USD", "primary": true }, - { "ticker": "JPM-PC", "exchange_mic": "XNYS", "currency": "USD", "primary": false } - ] - }, - { - "lei": "5493001KJTIIGC8Y1R12", - "isins": ["US5949181045"], - "primary_isin": "US5949181045", - "name": "Microsoft Corporation", - "tickers": [ - { "ticker": "MSFT", "exchange_mic": "XNAS", "currency": "USD", "primary": true } - ] - }, - { - "lei": "529900T8BM49AURSDO55", - "isins": ["CNE000001P37"], - "primary_isin": "CNE000001P37", - "name": "Industrial and Commercial Bank of China", - "tickers": [ - { "ticker": "601398.SS", "exchange_mic": "XSHG", "currency": "CNY", "primary": true }, - { "ticker": "1398.HK", "exchange_mic": "XHKG", "currency": "HKD", "primary": false } - ] - }, - { - "lei": "519900X5W8K6C1FZ3B57", - "isins": ["JP3702200000"], - "primary_isin": "JP3702200000", - "name": "Toyota Motor Corporation", - "tickers": [ - { "ticker": "7203.T", "exchange_mic": "XJPX", "currency": "JPY", "primary": true }, - { "ticker": "TM", "exchange_mic": "XNYS", "currency": "USD", "primary": false } - ] - }, - { - "lei": "529900T8BM49AURSDO56", - "isins": ["HK0000069689"], - "primary_isin": "HK0000069689", - "name": "Tencent Holdings Limited", - "tickers": [ - { "ticker": "0700.HK", "exchange_mic": "XHKG", "currency": "HKD", "primary": true }, - { "ticker": "TCEHY", "exchange_mic": "OTCM", "currency": "USD", "primary": false } - ] - }, - { - "lei": "8I5D5Q1L7N5Z5P2K9M3J", - "isins": ["US90953F1049"], - "primary_isin": "US90953F1049", - "name": "Test Bonds Filter", - "tickers": [{ "ticker": "JPM", "exchange_mic": "XNYS", "currency": "USD", "primary": true }] - } -] \ No newline at end of file diff --git a/src/data/continents.json b/src/data/continents.json deleted file mode 100644 index 75b809d..0000000 --- a/src/data/continents.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - "afrika", - "asien", - "europa", - "nordamerika", - "suedamerika", - "antarktis", - "ozeanien" -] \ No newline at end of file diff --git a/src/data/countries.json b/src/data/countries.json deleted file mode 100644 index 584719c..0000000 --- a/src/data/countries.json +++ /dev/null @@ -1,54 +0,0 @@ -[ - "aegypten", - "frankreich", - "litauen", - "schweiz", - "argentinien", - "griechenland", - "mexiko", - "singapur", - "australien", - "hongkong", - "neuseeland", - "slowakei", - "bahrain", - "indien", - "niederlande", - "spanien", - "belgien", - "indonesien", - "norwegen", - "suedafrika", - "brasilien", - "irland", - "oesterreich", - "suedkorea", - "chile", - "island", - "peru", - "taiwan", - "china", - "italien", - "philippinen", - "tschechien", - "daenemark", - "japan", - "polen", - "tuerkei", - "deutschland", - "kanada", - "portugal", - "ungarn", - "estland", - "katar", - "rumaenien", - "usa", - "eurozone", - "kolumbien", - "russland", - "vereinigte-arabische-emirate", - "finnland", - "lettland", - "schweden", - "vereinigtes-koenigreich" -] \ No newline at end of file diff --git a/src/data/exchanges.json b/src/data/exchanges.json deleted file mode 100644 index af0c4d0..0000000 --- a/src/data/exchanges.json +++ /dev/null @@ -1,260 +0,0 @@ -{ - "exchanges": [ - { - "mic": "XNYS", - "name": "New York Stock Exchange", - "country": "United States", - "city": "New York City", - "market_cap_trillion_usd": 30.92, - "timezone": "America/New_York", - "tz_offset": "-05:00", - "dst": "Mar–Nov", - "open_local": "09:30", - "close_local": "16:00", - "lunch_break": false, - "open_utc": "14:30", - "close_utc": "21:00", - "currency": "USD" - }, - { - "mic": "XNAS", - "name": "Nasdaq", - "country": "United States", - "city": "New York City", - "market_cap_trillion_usd": 31.96, - "timezone": "America/New_York", - "tz_offset": "-05:00", - "dst": "Mar–Nov", - "open_local": "09:30", - "close_local": "16:00", - "lunch_break": false, - "open_utc": "14:30", - "close_utc": "21:00", - "currency": "USD" - }, - { - "mic": "XSHG", - "name": "Shanghai Stock Exchange", - "country": "China", - "city": "Shanghai", - "market_cap_trillion_usd": 7.96, - "timezone": "Asia/Shanghai", - "tz_offset": "+08:00", - "dst": null, - "open_local": "09:30", - "close_local": "15:00", - "lunch_break": "11:30–13:00", - "open_utc": "01:30", - "close_utc": "07:00", - "currency": "CNY" - }, - { - "mic": "XJPX", - "name": "Japan Exchange Group (Tokyo Stock Exchange)", - "country": "Japan", - "city": "Tokyo", - "market_cap_trillion_usd": 7.06, - "timezone": "Asia/Tokyo", - "tz_offset": "+09:00", - "dst": null, - "open_local": "09:00", - "close_local": "15:00", - "lunch_break": "11:30–12:30", - "open_utc": "00:00", - "close_utc": "06:00", - "currency": "JPY" - }, - { - "mic": "XHKG", - "name": "Hong Kong Stock Exchange", - "country": "Hong Kong", - "city": "Hong Kong", - "market_cap_trillion_usd": 6.41, - "timezone": "Asia/Hong_Kong", - "tz_offset": "+08:00", - "dst": null, - "open_local": "09:30", - "close_local": "16:00", - "lunch_break": "12:00–13:00", - "open_utc": "01:30", - "close_utc": "08:00", - "currency": "HKD" - }, - { - "mic": "XAMS", - "name": "Euronext Amsterdam", - "country": "Netherlands", - "city": "Amsterdam", - "market_cap_trillion_usd": 5.61, - "timezone": "Europe/Amsterdam", - "tz_offset": "+01:00", - "dst": "Mar–Oct", - "open_local": "09:00", - "close_local": "17:30", - "lunch_break": false, - "open_utc": "08:00", - "close_utc": "16:30", - "currency": "EUR" - }, - { - "mic": "XBSE", - "name": "Bombay Stock Exchange", - "country": "India", - "city": "Mumbai", - "market_cap_trillion_usd": 5.25, - "timezone": "Asia/Kolkata", - "tz_offset": "+05:30", - "dst": null, - "open_local": "09:15", - "close_local": "15:30", - "lunch_break": false, - "open_utc": "03:45", - "close_utc": "10:00", - "currency": "INR" - }, - { - "mic": "XNSE", - "name": "National Stock Exchange of India", - "country": "India", - "city": "Mumbai", - "market_cap_trillion_usd": 5.32, - "timezone": "Asia/Kolkata", - "tz_offset": "+05:30", - "dst": null, - "open_local": "09:15", - "close_local": "15:d30", - "lunch_break": false, - "open_utc": "03:45", - "close_utc": "10:00", - "currency": "INR" - }, - { - "mic": "XSHE", - "name": "Shenzhen Stock Exchange", - "country": "China", - "city": "Shenzhen", - "market_cap_trillion_usd": 5.11, - "timezone": "Asia/Shanghai", - "tz_offset": "+08:00", - "dst": null, - "open_local": "09:30", - "close_local": "15:00", - "lunch_break": "11:30–13:00", - "open_utc": "01:30", - "close_utc": "07:00", - "currency": "CNY" - }, - { - "mic": "XTSE", - "name": "Toronto Stock Exchange", - "country": "Canada", - "city": "Toronto", - "market_cap_trillion_usd": 4.00, - "timezone": "America/Toronto", - "tz_offset": "-05:00", - "dst": "Mar–Nov", - "open_local": "09:30", - "close_local": "16:00", - "lunch_break": false, - "open_utc": "14:30", - "close_utc": "21:00", - "currency": "CAD" - }, - { - "mic": "XLON", - "name": "London Stock Exchange", - "country": "United Kingdom", - "city": "London", - "market_cap_trillion_usd": 3.14, - "timezone": "Europe/London", - "tz_offset": "+00:00", - "dst": "Mar–Oct", - "open_local": "08:00", - "close_local": "16:30", - "lunch_break": false, - "open_utc": "08:00", - "close_utc": "16:30", - "currency": "GBP" - }, - { - "mic": "XTAI", - "name": "Taiwan Stock Exchange", - "country": "Taiwan", - "city": "Taipei", - "market_cap_trillion_usd": 2.87, - "timezone": "Asia/Taipei", - "tz_offset": "+08:00", - "dst": null, - "open_local": "09:00", - "close_local": "13:30", - "lunch_break": false, - "open_utc": "01:00", - "close_utc": "05:30", - "currency": "TWD" - }, - { - "mic": "XSAU", - "name": "Saudi Exchange (Tadawul)", - "country": "Saudi Arabia", - "city": "Riyadh", - "market_cap_trillion_usd": 2.73, - "timezone": "Asia/Riyadh", - "tz_offset": "+03:00", - "dst": null, - "open_local": "10:00", - "close_local": "15:00", - "lunch_break": false, - "open_utc": "07:00", - "close_utc": "12:00", - "currency": "SAR" - }, - { - "mic": "XFRA", - "name": "Deutsche Börse (Xetra)", - "country": "Germany", - "city": "Frankfurt", - "market_cap_trillion_usd": 2.04, - "timezone": "Europe/Berlin", - "tz_offset": "+01:00", - "dst": "Mar–Oct", - "open_local": "09:00", - "close_local": "17:30", - "lunch_break": false, - "open_utc": "08:00", - "close_utc": "16:30", - "currency": "EUR" - }, - { - "mic": "XSWX", - "name": "SIX Swiss Exchange", - "country": "Switzerland", - "city": "Zürich", - "market_cap_trillion_usd": 1.97, - "timezone": "Europe/Zurich", - "tz_offset": "+01:00", - "dst": "Mar–Oct", - "open_local": "09:00", - "close_local": "17:30", - "lunch_break": false, - "open_utc": "08:00", - "close_utc": "16:30", - "currency": "CHF" - }, - { - "mic": "XASX", - "name": "Australian Securities Exchange", - "country": "Australia", - "city": "Sydney", - "market_cap_trillion_usd": 1.89, - "timezone": "Australia/Sydney", - "tz_offset": "+10:00", - "dst": "Oct–Apr", - "open_local": "10:00", - "close_local": "16:00", - "lunch_break": false, - "open_utc": "00:00", - "close_utc": "06:00", - "currency": "AUD" - } - ] -} \ No newline at end of file diff --git a/src/data/index.txt b/src/data/index.txt deleted file mode 100644 index 547e36e..0000000 --- a/src/data/index.txt +++ /dev/null @@ -1,6 +0,0 @@ -data/* - -companies.json -continents.json -countries.json -exchanges.json \ No newline at end of file diff --git a/src/economic/scraper.rs b/src/economic/scraper.rs index 0e68a9a..c442fb6 100644 --- a/src/economic/scraper.rs +++ b/src/economic/scraper.rs @@ -9,11 +9,11 @@ pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> { client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?; //dismiss_overlays(client).await?; - if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await { + /*if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await { tab.click().await?; println!("High importance tab selected"); sleep(Duration::from_secs(2)).await; - } + }*/ Ok(()) } diff --git a/src/economic/storage.rs b/src/economic/storage.rs index 69272cd..1b89037 100644 --- a/src/economic/storage.rs +++ b/src/economic/storage.rs @@ -6,7 +6,7 @@ use chrono::{NaiveDate, Datelike}; use std::collections::HashMap; pub async fn scan_existing_chunks() -> anyhow::Result> { - let dir = std::path::Path::new("economic_events"); + let dir = std::path::Path::new("data/economic/events"); let mut chunks = Vec::new(); if dir.exists() { @@ -45,7 +45,7 @@ pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result) -> anyhow::Result<()> { - let dir = std::path::Path::new("economic_events"); + let dir = std::path::Path::new("data/economic/events"); fs::create_dir_all(dir).await?; // Delete all old chunk files to prevent duplicates and overlaps diff --git a/src/util.rs b/src/util.rs index 6da8750..a4fd2ac 100644 --- a/src/util.rs +++ b/src/util.rs @@ -3,7 +3,7 @@ use tokio::fs; use std::path::Path; /// Create the required data folders if they do not exist yet. -pub async fn ensure_data_dirs() -> anyhow::Result<()> { +pub async fn _ensure_data_dirs() -> anyhow::Result<()> { let dirs = [ "economic_events", "economic_event_changes",