added cross compatiblity between shutdown flag and state entries
This commit is contained in:
@@ -28,32 +28,42 @@ const LEI_BATCH_SIZE: usize = 100; // Process 100 LEIs at a time
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if file I/O fails or JSON parsing fails.
|
||||
pub async fn update_securities() -> anyhow::Result<()> {
|
||||
pub async fn update_securities(paths: &DataPaths) -> anyhow::Result<()> {
|
||||
logger::log_info("Building securities data from FIGI mappings...").await;
|
||||
|
||||
let dir = DataPaths::new(".")?;
|
||||
let manager = StateManager::new(&dir.integrity_dir()).await?;
|
||||
let step_name = "securities_data_complete";
|
||||
|
||||
let date_dir = find_most_recent_figi_date_dir(&dir).await?
|
||||
let date_dir = find_most_recent_figi_date_dir(&paths).await?
|
||||
.ok_or_else(|| anyhow!("No FIGI date directory found"))?;
|
||||
|
||||
let data_dir = dir.data_dir();
|
||||
let output_dir = data_dir.join("figi_securities");
|
||||
tokio_fs::create_dir_all(&output_dir).await
|
||||
.context("Failed to create corporate/by_name directory")?;
|
||||
let output_dir = paths.figi_securities_dir();
|
||||
|
||||
let manager = StateManager::new(&paths.integrity_dir()).await?;
|
||||
let step_name = "securities_data_complete";
|
||||
let content_reference = directory_reference(
|
||||
output_dir,
|
||||
Some(vec![
|
||||
"common_stocks.jsonl".to_string(),
|
||||
"warrants.jsonl".to_string(),
|
||||
"options.jsonl".to_string(),
|
||||
"corporate_bonds.jsonl".to_string(),
|
||||
"government_bonds.jsonl".to_string(),
|
||||
]),
|
||||
Some(vec![
|
||||
"*.log.jsonl".to_string(), // Exclude log files
|
||||
"*.tmp".to_string(), // Exclude temp files
|
||||
"state.jsonl".to_string(), // Exclude internal state tracking
|
||||
]),
|
||||
);
|
||||
let data_stage = DataStage::Data;
|
||||
|
||||
if manager.is_step_valid(step_name).await? {
|
||||
logger::log_info(" Securities data already built and valid").await;
|
||||
logger::log_info(" All sectors already processed, nothing to do").await;
|
||||
return Ok(());
|
||||
}
|
||||
logger::log_info(" Securities data incomplete or missing, proceeding with update").await;
|
||||
let entry = manager.create_entry(step_name.to_string(), content_reference, data_stage).await?;
|
||||
|
||||
logger::log_info("Building securities data from FIGI mappings...").await;
|
||||
|
||||
tokio_fs::create_dir_all(&output_dir).await
|
||||
.context("Failed to create corporate/by_name directory")?;
|
||||
|
||||
// Setup checkpoint and log paths for each security type
|
||||
let common_checkpoint = output_dir.join("common_stocks.jsonl");
|
||||
let common_log = output_dir.join("common_stocks.log.jsonl");
|
||||
@@ -104,6 +114,7 @@ pub async fn update_securities() -> anyhow::Result<()> {
|
||||
|
||||
if sectors_to_process.is_empty() {
|
||||
logger::log_info(" All sectors already processed, nothing to do").await;
|
||||
manager.mark_valid(entry).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -170,48 +181,12 @@ pub async fn update_securities() -> anyhow::Result<()> {
|
||||
stats.print_summary();
|
||||
logger::log_info(&format!("✓ Processed {} new sectors successfully", newly_processed_sectors.len())).await;
|
||||
|
||||
track_securities_completion(&manager, &output_dir).await?;
|
||||
manager.mark_valid(entry).await?;
|
||||
logger::log_info(" ✓ Securities data marked as complete with integrity tracking").await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Track securities data completion with content hash verification
|
||||
async fn track_securities_completion(
|
||||
manager: &StateManager,
|
||||
output_dir: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
// Create content reference for all output files
|
||||
let content_reference = directory_reference(
|
||||
output_dir,
|
||||
Some(vec![
|
||||
"common_stocks.jsonl".to_string(),
|
||||
"warrants.jsonl".to_string(),
|
||||
"options.jsonl".to_string(),
|
||||
"corporate_bonds.jsonl".to_string(),
|
||||
"government_bonds.jsonl".to_string(),
|
||||
]),
|
||||
Some(vec![
|
||||
"*.log.jsonl".to_string(), // Exclude log files
|
||||
"*.tmp".to_string(), // Exclude temp files
|
||||
"state.jsonl".to_string(), // Exclude internal state tracking
|
||||
]),
|
||||
);
|
||||
|
||||
// Track completion with:
|
||||
// - Content reference: All output JSONL files
|
||||
// - Data stage: Data (7-day TTL) - Securities data relatively stable
|
||||
// - Dependencies: LEI-FIGI mapping must be valid
|
||||
manager.update_entry(
|
||||
"securities_data_complete".to_string(),
|
||||
content_reference,
|
||||
DataStage::Data,
|
||||
None, // Use default TTL (7 days)
|
||||
).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Loads the list of sectors that have been fully processed
|
||||
async fn load_processed_sectors(path: &Path) -> anyhow::Result<HashSet<String>> {
|
||||
let mut sectors = HashSet::new();
|
||||
@@ -1442,29 +1417,42 @@ pub async fn stream_gleif_csv_and_build_figi_filtered(
|
||||
|
||||
/// Check mapping completion and process only unmapped LEIs
|
||||
pub async fn update_lei_mapping(
|
||||
paths: &DataPaths,
|
||||
csv_path: &str,
|
||||
gleif_date: Option<&str>,
|
||||
) -> anyhow::Result<bool> {
|
||||
let dir = DataPaths::new(".")?;
|
||||
let manager = StateManager::new(&dir.integrity_dir()).await?;
|
||||
let step_name = "lei_figi_mapping_complete";
|
||||
|
||||
let map_cache_dir = dir.cache_gleif_openfigi_map_dir();
|
||||
let date = determine_gleif_date(gleif_date, &dir).await?;
|
||||
let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
|
||||
let date = determine_gleif_date(gleif_date, &paths).await?;
|
||||
let date_dir = map_cache_dir.join(&date);
|
||||
|
||||
let manager = StateManager::new(&paths.integrity_dir()).await?;
|
||||
let step_name = "lei_figi_mapping_complete";
|
||||
let content_reference = directory_reference(
|
||||
&date_dir,
|
||||
Some(vec![
|
||||
"*/lei_to_figi.jsonl".to_string(), // All sector mapping files
|
||||
"no_results.jsonl".to_string(), // LEIs with no results
|
||||
]),
|
||||
Some(vec![
|
||||
"*.tmp".to_string(), // Exclude temp files
|
||||
"*.log".to_string(), // Exclude log files
|
||||
]),
|
||||
);
|
||||
let data_stage = DataStage::Cache; // 24-hour TTL for API data
|
||||
|
||||
if manager.is_step_valid(step_name).await? {
|
||||
logger::log_info(" LEI-FIGI mapping already completed and valid").await;
|
||||
logger::log_info("✓ All LEIs have been queried (mapped or confirmed no results)").await;
|
||||
return Ok(true);
|
||||
}
|
||||
let entry = manager.create_entry(step_name.to_string(), content_reference, data_stage).await?;
|
||||
|
||||
// Get unmapped LEIs (excludes both mapped and no-result LEIs)
|
||||
let unmapped = get_unmapped_leis(csv_path, &date_dir).await?;
|
||||
|
||||
if unmapped.is_empty() {
|
||||
logger::log_info("✓ All LEIs have been queried (mapped or confirmed no results)").await;
|
||||
track_lei_mapping_completion(&manager, &dir.integrity_dir()).await?;
|
||||
manager.mark_valid(entry).await?;
|
||||
logger::log_info(" ✓ LEI-FIGI mapping marked as complete with integrity tracking").await;
|
||||
|
||||
return Ok(true);
|
||||
@@ -1480,7 +1468,7 @@ pub async fn update_lei_mapping(
|
||||
|
||||
if still_unmapped.is_empty() {
|
||||
logger::log_info("✓ All LEIs successfully queried").await;
|
||||
track_lei_mapping_completion(&manager, &date_dir).await?;
|
||||
manager.mark_valid(entry).await?;
|
||||
logger::log_info(" ✓ LEI-FIGI mapping marked as complete with integrity tracking").await;
|
||||
Ok(true)
|
||||
} else {
|
||||
@@ -1488,43 +1476,11 @@ pub async fn update_lei_mapping(
|
||||
"⚠ {} LEIs still unqueried (API errors or rate limits)",
|
||||
still_unmapped.len()
|
||||
)).await;
|
||||
manager.mark_invalid(entry, " Some LEIs remain unqueried".to_string()).await?;
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Track LEI-FIGI mapping completion with content hash verification
|
||||
async fn track_lei_mapping_completion(
|
||||
manager: &StateManager,
|
||||
date_dir: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
// Create content reference for all FIGI mapping files
|
||||
// This will hash ALL lei_to_figi.jsonl files in sector directories
|
||||
let content_reference = directory_reference(
|
||||
date_dir,
|
||||
Some(vec![
|
||||
"*/lei_to_figi.jsonl".to_string(), // All sector mapping files
|
||||
"no_results.jsonl".to_string(), // LEIs with no results
|
||||
]),
|
||||
Some(vec![
|
||||
"*.tmp".to_string(), // Exclude temp files
|
||||
"*.log".to_string(), // Exclude log files
|
||||
]),
|
||||
);
|
||||
|
||||
// Track completion with:
|
||||
// - Content reference: All FIGI mapping files in date directory
|
||||
// - Data stage: Cache (24-hour TTL) - FIGI data can change frequently
|
||||
// - Dependencies: None (this is a collection step from external API)
|
||||
manager.update_entry(
|
||||
"lei_figi_mapping_complete".to_string(),
|
||||
content_reference,
|
||||
DataStage::Cache, // 24-hour TTL for API data
|
||||
None, // Use default TTL
|
||||
).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load LEIs that were queried but returned no results
|
||||
async fn load_no_result_leis(date_dir: &Path) -> anyhow::Result<HashSet<String>> {
|
||||
let mut no_result_leis = HashSet::new();
|
||||
|
||||
Reference in New Issue
Block a user