added cross compatiblity between shutdown flag and state entries

This commit is contained in:
2026-01-15 00:22:55 +01:00
parent f4b20f824d
commit 75ab1969c7
14 changed files with 850 additions and 543 deletions

View File

@@ -28,32 +28,42 @@ const LEI_BATCH_SIZE: usize = 100; // Process 100 LEIs at a time
///
/// # Errors
/// Returns an error if file I/O fails or JSON parsing fails.
pub async fn update_securities() -> anyhow::Result<()> {
pub async fn update_securities(paths: &DataPaths) -> anyhow::Result<()> {
logger::log_info("Building securities data from FIGI mappings...").await;
let dir = DataPaths::new(".")?;
let manager = StateManager::new(&dir.integrity_dir()).await?;
let step_name = "securities_data_complete";
let date_dir = find_most_recent_figi_date_dir(&dir).await?
let date_dir = find_most_recent_figi_date_dir(&paths).await?
.ok_or_else(|| anyhow!("No FIGI date directory found"))?;
let data_dir = dir.data_dir();
let output_dir = data_dir.join("figi_securities");
tokio_fs::create_dir_all(&output_dir).await
.context("Failed to create corporate/by_name directory")?;
let output_dir = paths.figi_securities_dir();
let manager = StateManager::new(&paths.integrity_dir()).await?;
let step_name = "securities_data_complete";
let content_reference = directory_reference(
output_dir,
Some(vec![
"common_stocks.jsonl".to_string(),
"warrants.jsonl".to_string(),
"options.jsonl".to_string(),
"corporate_bonds.jsonl".to_string(),
"government_bonds.jsonl".to_string(),
]),
Some(vec![
"*.log.jsonl".to_string(), // Exclude log files
"*.tmp".to_string(), // Exclude temp files
"state.jsonl".to_string(), // Exclude internal state tracking
]),
);
let data_stage = DataStage::Data;
if manager.is_step_valid(step_name).await? {
logger::log_info(" Securities data already built and valid").await;
logger::log_info(" All sectors already processed, nothing to do").await;
return Ok(());
}
logger::log_info(" Securities data incomplete or missing, proceeding with update").await;
let entry = manager.create_entry(step_name.to_string(), content_reference, data_stage).await?;
logger::log_info("Building securities data from FIGI mappings...").await;
tokio_fs::create_dir_all(&output_dir).await
.context("Failed to create corporate/by_name directory")?;
// Setup checkpoint and log paths for each security type
let common_checkpoint = output_dir.join("common_stocks.jsonl");
let common_log = output_dir.join("common_stocks.log.jsonl");
@@ -104,6 +114,7 @@ pub async fn update_securities() -> anyhow::Result<()> {
if sectors_to_process.is_empty() {
logger::log_info(" All sectors already processed, nothing to do").await;
manager.mark_valid(entry).await?;
return Ok(());
}
@@ -170,48 +181,12 @@ pub async fn update_securities() -> anyhow::Result<()> {
stats.print_summary();
logger::log_info(&format!("✓ Processed {} new sectors successfully", newly_processed_sectors.len())).await;
track_securities_completion(&manager, &output_dir).await?;
manager.mark_valid(entry).await?;
logger::log_info(" ✓ Securities data marked as complete with integrity tracking").await;
Ok(())
}
/// Track securities data completion with content hash verification
async fn track_securities_completion(
manager: &StateManager,
output_dir: &Path,
) -> anyhow::Result<()> {
// Create content reference for all output files
let content_reference = directory_reference(
output_dir,
Some(vec![
"common_stocks.jsonl".to_string(),
"warrants.jsonl".to_string(),
"options.jsonl".to_string(),
"corporate_bonds.jsonl".to_string(),
"government_bonds.jsonl".to_string(),
]),
Some(vec![
"*.log.jsonl".to_string(), // Exclude log files
"*.tmp".to_string(), // Exclude temp files
"state.jsonl".to_string(), // Exclude internal state tracking
]),
);
// Track completion with:
// - Content reference: All output JSONL files
// - Data stage: Data (7-day TTL) - Securities data relatively stable
// - Dependencies: LEI-FIGI mapping must be valid
manager.update_entry(
"securities_data_complete".to_string(),
content_reference,
DataStage::Data,
None, // Use default TTL (7 days)
).await?;
Ok(())
}
/// Loads the list of sectors that have been fully processed
async fn load_processed_sectors(path: &Path) -> anyhow::Result<HashSet<String>> {
let mut sectors = HashSet::new();
@@ -1442,29 +1417,42 @@ pub async fn stream_gleif_csv_and_build_figi_filtered(
/// Check mapping completion and process only unmapped LEIs
pub async fn update_lei_mapping(
paths: &DataPaths,
csv_path: &str,
gleif_date: Option<&str>,
) -> anyhow::Result<bool> {
let dir = DataPaths::new(".")?;
let manager = StateManager::new(&dir.integrity_dir()).await?;
let step_name = "lei_figi_mapping_complete";
let map_cache_dir = dir.cache_gleif_openfigi_map_dir();
let date = determine_gleif_date(gleif_date, &dir).await?;
let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
let date = determine_gleif_date(gleif_date, &paths).await?;
let date_dir = map_cache_dir.join(&date);
let manager = StateManager::new(&paths.integrity_dir()).await?;
let step_name = "lei_figi_mapping_complete";
let content_reference = directory_reference(
&date_dir,
Some(vec![
"*/lei_to_figi.jsonl".to_string(), // All sector mapping files
"no_results.jsonl".to_string(), // LEIs with no results
]),
Some(vec![
"*.tmp".to_string(), // Exclude temp files
"*.log".to_string(), // Exclude log files
]),
);
let data_stage = DataStage::Cache; // 24-hour TTL for API data
if manager.is_step_valid(step_name).await? {
logger::log_info(" LEI-FIGI mapping already completed and valid").await;
logger::log_info("✓ All LEIs have been queried (mapped or confirmed no results)").await;
return Ok(true);
}
let entry = manager.create_entry(step_name.to_string(), content_reference, data_stage).await?;
// Get unmapped LEIs (excludes both mapped and no-result LEIs)
let unmapped = get_unmapped_leis(csv_path, &date_dir).await?;
if unmapped.is_empty() {
logger::log_info("✓ All LEIs have been queried (mapped or confirmed no results)").await;
track_lei_mapping_completion(&manager, &dir.integrity_dir()).await?;
manager.mark_valid(entry).await?;
logger::log_info(" ✓ LEI-FIGI mapping marked as complete with integrity tracking").await;
return Ok(true);
@@ -1480,7 +1468,7 @@ pub async fn update_lei_mapping(
if still_unmapped.is_empty() {
logger::log_info("✓ All LEIs successfully queried").await;
track_lei_mapping_completion(&manager, &date_dir).await?;
manager.mark_valid(entry).await?;
logger::log_info(" ✓ LEI-FIGI mapping marked as complete with integrity tracking").await;
Ok(true)
} else {
@@ -1488,43 +1476,11 @@ pub async fn update_lei_mapping(
"{} LEIs still unqueried (API errors or rate limits)",
still_unmapped.len()
)).await;
manager.mark_invalid(entry, " Some LEIs remain unqueried".to_string()).await?;
Ok(false)
}
}
/// Track LEI-FIGI mapping completion with content hash verification
async fn track_lei_mapping_completion(
manager: &StateManager,
date_dir: &Path,
) -> anyhow::Result<()> {
// Create content reference for all FIGI mapping files
// This will hash ALL lei_to_figi.jsonl files in sector directories
let content_reference = directory_reference(
date_dir,
Some(vec![
"*/lei_to_figi.jsonl".to_string(), // All sector mapping files
"no_results.jsonl".to_string(), // LEIs with no results
]),
Some(vec![
"*.tmp".to_string(), // Exclude temp files
"*.log".to_string(), // Exclude log files
]),
);
// Track completion with:
// - Content reference: All FIGI mapping files in date directory
// - Data stage: Cache (24-hour TTL) - FIGI data can change frequently
// - Dependencies: None (this is a collection step from external API)
manager.update_entry(
"lei_figi_mapping_complete".to_string(),
content_reference,
DataStage::Cache, // 24-hour TTL for API data
None, // Use default TTL
).await?;
Ok(())
}
/// Load LEIs that were queried but returned no results
async fn load_no_result_leis(date_dir: &Path) -> anyhow::Result<HashSet<String>> {
let mut no_result_leis = HashSet::new();