öi
This commit is contained in:
@@ -102,13 +102,13 @@ pub async fn update_securities(date_dir: &Path) -> anyhow::Result<()> {
|
||||
logger::log_info("Building securities data from FIGI mappings...").await;
|
||||
|
||||
let dir = DataPaths::new(".")?;
|
||||
let state_path = dir.data_dir().join("state.jsonl");
|
||||
let manager = StateManager::new(&dir.integrity_dir())?;
|
||||
let manager = StateManager::new(&dir.integrity_dir()).await?;
|
||||
let step_name = "securities_data_complete";
|
||||
|
||||
let data_dir = dir.data_dir();
|
||||
let corporate_data_dir = data_dir.join("corporate");
|
||||
let output_dir = corporate_data_dir.join("by_name");
|
||||
let economic_data_dir = data_dir.join("economic");
|
||||
let output_dir = data_dir.join("by_name");
|
||||
tokio_fs::create_dir_all(&output_dir).await
|
||||
.context("Failed to create corporate/by_name directory")?;
|
||||
|
||||
@@ -130,6 +130,10 @@ pub async fn update_securities(date_dir: &Path) -> anyhow::Result<()> {
|
||||
let warrants_log = output_dir.join("warrants.log.jsonl");
|
||||
let options_checkpoint = output_dir.join("options.jsonl");
|
||||
let options_log = output_dir.join("options.log.jsonl");
|
||||
let corporate_bonds_checkpoint = output_dir.join("corporate_bonds.jsonl");
|
||||
let corporate_bonds_log = output_dir.join("corporate_bonds.log.jsonl");
|
||||
let government_bonds_checkpoint = output_dir.join("government_bonds.jsonl");
|
||||
let government_bonds_log = output_dir.join("government_bonds.log.jsonl");
|
||||
|
||||
// Track which sectors have been fully processed
|
||||
let processed_sectors_file = output_dir.join("state.jsonl");
|
||||
@@ -176,15 +180,19 @@ pub async fn update_securities(date_dir: &Path) -> anyhow::Result<()> {
|
||||
let mut existing_companies = load_checkpoint_and_replay(&common_checkpoint, &common_log, "name").await?;
|
||||
let mut existing_warrants = load_checkpoint_and_replay_nested(&warrants_checkpoint, &warrants_log).await?;
|
||||
let mut existing_options = load_checkpoint_and_replay_nested(&options_checkpoint, &options_log).await?;
|
||||
let mut existing_corporate_bonds = load_checkpoint_and_replay_nested(&corporate_bonds_checkpoint, &corporate_bonds_log).await?;
|
||||
let mut existing_government_bonds = load_checkpoint_and_replay_nested(&government_bonds_checkpoint, &government_bonds_log).await?;
|
||||
|
||||
logger::log_info(&format!(" Existing entries - Companies: {}, Warrants: {}, Options: {}",
|
||||
existing_companies.len(), existing_warrants.len(), existing_options.len())).await;
|
||||
logger::log_info(&format!(" Existing entries - Companies: {}, Warrants: {}, Options: {}, Corporate Bonds: {}, Government Bonds: {}",
|
||||
existing_companies.len(), existing_warrants.len(), existing_options.len(), existing_corporate_bonds.len(), existing_government_bonds.len())).await;
|
||||
|
||||
// Process statistics
|
||||
let mut stats = StreamingStats::new(
|
||||
existing_companies.len(),
|
||||
existing_warrants.len(),
|
||||
existing_options.len()
|
||||
existing_options.len(),
|
||||
existing_corporate_bonds.len(),
|
||||
existing_government_bonds.len()
|
||||
);
|
||||
|
||||
logger::log_info(&format!(" Found {} sectors to process", sectors_to_process.len())).await;
|
||||
@@ -834,20 +842,29 @@ struct StreamingStats {
|
||||
initial_companies: usize,
|
||||
initial_warrants: usize,
|
||||
initial_options: usize,
|
||||
initial_corporate_bonds: usize,
|
||||
initial_government_bonds: usize,
|
||||
companies_added: usize,
|
||||
warrants_added: usize,
|
||||
options_added: usize,
|
||||
corporate_bonds_added: usize,
|
||||
government_bonds_added: usize,
|
||||
|
||||
}
|
||||
|
||||
impl StreamingStats {
|
||||
fn new(companies: usize, warrants: usize, options: usize) -> Self {
|
||||
fn new(companies: usize, warrants: usize, options: usize, corporate_bonds: usize, government_bonds: usize) -> Self {
|
||||
Self {
|
||||
initial_companies: companies,
|
||||
initial_warrants: warrants,
|
||||
initial_options: options,
|
||||
initial_corporate_bonds: corporate_bonds,
|
||||
initial_government_bonds: government_bonds,
|
||||
companies_added: 0,
|
||||
warrants_added: 0,
|
||||
options_added: 0,
|
||||
corporate_bonds_added: 0,
|
||||
government_bonds_added: 0,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -865,6 +882,14 @@ impl StreamingStats {
|
||||
println!(" - Initial: {}", self.initial_options);
|
||||
println!(" - Added: {}", self.options_added);
|
||||
println!(" - Total: {}", self.initial_options + self.options_added);
|
||||
println!("Corporate Bonds:");
|
||||
println!(" - Initial: {}", self.initial_corporate_bonds);
|
||||
println!(" - Added: {}", self.corporate_bonds_added);
|
||||
println!(" - Total: {}", self.initial_corporate_bonds + self.corporate_bonds_added);
|
||||
println!("Government Bonds:");
|
||||
println!(" - Initial: {}", self.initial_government_bonds);
|
||||
println!(" - Added: {}", self.government_bonds_added);
|
||||
println!(" - Total: {}", self.initial_government_bonds + self.government_bonds_added);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1078,17 +1103,17 @@ async fn load_existing_mapped_leis(date_dir: &Path) -> anyhow::Result<HashSet<St
|
||||
|
||||
/// Read GLEIF CSV and return all LEIs (without loading entire file into memory)
|
||||
async fn get_all_leis_from_gleif(csv_path: &str) -> anyhow::Result<HashSet<String>> {
|
||||
let file = std::fs::File::open(csv_path)?;
|
||||
let reader = BufReader::new(file);
|
||||
let content = tokio::fs::read_to_string(csv_path)
|
||||
.await
|
||||
.context(format!("Failed to read GLEIF CSV file: {}", csv_path))?;
|
||||
|
||||
let mut all_leis = HashSet::new();
|
||||
|
||||
for (idx, line) in reader.lines().enumerate() {
|
||||
for (idx, line) in content.lines().enumerate() {
|
||||
if idx == 0 {
|
||||
continue; // Skip header
|
||||
}
|
||||
|
||||
let line = line?;
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
|
||||
if parts.len() < 2 {
|
||||
@@ -1147,8 +1172,9 @@ pub async fn stream_gleif_csv_and_build_figi_filtered(
|
||||
) -> anyhow::Result<()> {
|
||||
logger::log_info(&format!("Streaming GLEIF CSV: {}", csv_path)).await;
|
||||
|
||||
let file = std::fs::File::open(csv_path)?;
|
||||
let reader = BufReader::new(file);
|
||||
let content = tokio::fs::read_to_string(csv_path)
|
||||
.await
|
||||
.context(format!("Failed to read GLEIF CSV file: {}", csv_path))?;
|
||||
|
||||
let client = OpenFigiClient::new().await?;
|
||||
if !client.has_key {
|
||||
@@ -1171,9 +1197,7 @@ pub async fn stream_gleif_csv_and_build_figi_filtered(
|
||||
let mut processed_leis = 0;
|
||||
let mut skipped_leis = 0;
|
||||
|
||||
for (idx, line) in reader.lines().enumerate() {
|
||||
let line = line?;
|
||||
|
||||
for (idx, line) in content.lines().enumerate() {
|
||||
if idx == 0 { continue; }
|
||||
|
||||
let parts: Vec<&str> = line.split(',').collect();
|
||||
@@ -1232,8 +1256,7 @@ pub async fn update_lei_mapping(
|
||||
gleif_date: Option<&str>,
|
||||
) -> anyhow::Result<bool> {
|
||||
let dir = DataPaths::new(".")?;
|
||||
let state_path = dir.cache_dir().join("state.jsonl");
|
||||
let manager = StateManager::new(&dir.integrity_dir())?;
|
||||
let manager = StateManager::new(&dir.integrity_dir()).await?;
|
||||
let step_name = "lei_figi_mapping_complete";
|
||||
|
||||
let map_cache_dir = dir.cache_gleif_openfigi_map_dir();
|
||||
@@ -1251,7 +1274,7 @@ pub async fn update_lei_mapping(
|
||||
|
||||
if unmapped.is_empty() {
|
||||
logger::log_info("✓ All LEIs have been queried (mapped or confirmed no results)").await;
|
||||
track_lei_mapping_completion(&manager, &date_dir).await?;
|
||||
track_lei_mapping_completion(&manager, &dir.integrity_dir()).await?;
|
||||
logger::log_info(" ✓ LEI-FIGI mapping marked as complete with integrity tracking").await;
|
||||
|
||||
return Ok(true);
|
||||
|
||||
Reference in New Issue
Block a user