moved data capturing into cache folder
This commit is contained in:
@@ -1,15 +1,18 @@
|
||||
use crate::util::directories::DataPaths;
|
||||
|
||||
// src/corporate/openfigi.rs
|
||||
use super::{types::*};
|
||||
use reqwest::Client as HttpClient;
|
||||
use reqwest::header::{HeaderMap, HeaderValue};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs::{File, OpenOptions};
|
||||
use std::io::{BufRead, BufReader, Write};
|
||||
use std::path::Path;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Instant;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use tokio::fs as tokio_fs;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use anyhow::{Context, anyhow};
|
||||
|
||||
#[derive(Clone)]
|
||||
@@ -103,7 +106,7 @@ impl OpenFigiClient {
|
||||
.map(|isin| json!({
|
||||
"idType": "ID_ISIN",
|
||||
"idValue": isin,
|
||||
"marketSecDes": "Equity",
|
||||
//"marketSecDes": "Equity",
|
||||
}))
|
||||
.collect();
|
||||
|
||||
@@ -195,9 +198,69 @@ impl OpenFigiClient {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the date from a GLEIF CSV filename in the format "isin-lei-DDMMYYYY.csv".
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `filename` - The GLEIF CSV filename (e.g., "isin-lei-24112025.csv")
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A string in the format "DDMMYYYY" (e.g., "24112025") if successfully parsed, otherwise the original filename.
|
||||
fn extract_gleif_date_from_filename(filename: &str) -> String {
|
||||
// Pattern: isin-lei-DDMMYYYY.csv
|
||||
if let Some(start_idx) = filename.find("isin-lei-") {
|
||||
let rest = &filename[start_idx + 9..]; // Skip "isin-lei-"
|
||||
if rest.len() >= 8 && rest[0..8].chars().all(|c| c.is_numeric()) {
|
||||
return rest[0..8].to_string();
|
||||
}
|
||||
}
|
||||
filename.to_string()
|
||||
}
|
||||
|
||||
/// Finds the most recent GLEIF CSV file in the cache/gleif directory.
|
||||
///
|
||||
/// Returns the extracted date in format "DDMMYYYY" from the filename.
|
||||
/// If no GLEIF file is found, returns None.
|
||||
async fn find_most_recent_gleif_date(gleif_cache_dir: &Path) -> anyhow::Result<Option<String>> {
|
||||
let mut entries = tokio_fs::read_dir(gleif_cache_dir)
|
||||
.await
|
||||
.context("Failed to read gleif cache directory")?;
|
||||
|
||||
let mut csv_files = Vec::new();
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if let Some(filename) = path.file_name() {
|
||||
let filename_str = filename.to_string_lossy();
|
||||
if filename_str.ends_with(".csv") && filename_str.contains("isin-lei-") {
|
||||
csv_files.push(filename_str.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if csv_files.is_empty() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
// Sort files in reverse order (most recent first) based on date in filename
|
||||
csv_files.sort();
|
||||
csv_files.reverse();
|
||||
|
||||
let most_recent = &csv_files[0];
|
||||
let date = extract_gleif_date_from_filename(most_recent);
|
||||
|
||||
println!(" Found GLEIF data dated: {}", date);
|
||||
Ok(Some(date))
|
||||
}
|
||||
|
||||
/// Builds a LEI-to-FigiInfo map from the LEI-ISIN mapping, filtering for equities via OpenFIGI.
|
||||
///
|
||||
/// Attempts to load existing entries from "data/corporate/by_lei/lei_to_figi.jsonl" (JSON Lines format,
|
||||
/// The mapping is stored in cache/glei_openfigi/{GLEIF_DATE}/lei_to_figi.jsonl, where GLEIF_DATE
|
||||
/// is extracted from the GLEIF CSV filename (format: DDMMYYYY). If no specific date is provided,
|
||||
/// the most recent GLEIF file in cache/gleif is used.
|
||||
///
|
||||
/// Attempts to load existing entries from the date-based jsonl file (JSON Lines format,
|
||||
/// one LEI entry per line: {"lei": "ABC", "figis": [FigiInfo...]}). For any missing LEIs (compared to
|
||||
/// `lei_to_isins`), fetches their FigiInfos and appends to the .jsonl file incrementally.
|
||||
///
|
||||
@@ -209,6 +272,7 @@ impl OpenFigiClient {
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `lei_to_isins` - HashMap of LEI to Vec<ISIN> (used for fetching missing entries).
|
||||
/// * `gleif_date` - Optional date in format "DDMMYYYY". If None, uses the most recent GLEIF file.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
@@ -218,12 +282,29 @@ impl OpenFigiClient {
|
||||
///
|
||||
/// Returns an error if file I/O fails, JSON serialization/deserialization fails,
|
||||
/// or if OpenFIGI queries fail during fetching.
|
||||
pub async fn build_lei_to_figi_infos(lei_to_isins: &HashMap<String, Vec<String>>) -> anyhow::Result<HashMap<String, Vec<FigiInfo>>> {
|
||||
let data_dir = Path::new("data/corporate/by_lei");
|
||||
tokio_fs::create_dir_all(data_dir).await.context("Failed to create data directory")?;
|
||||
|
||||
let path = data_dir.join("lei_to_figi.jsonl");
|
||||
let mut lei_to_figis: HashMap<String, Vec<FigiInfo>> = load_lei_to_figi_jsonl(&path)?;
|
||||
pub async fn build_lei_to_figi_infos(lei_to_isins: &HashMap<String, Vec<String>>, gleif_date: Option<&str>) -> anyhow::Result<HashMap<String, Vec<FigiInfo>>> {
|
||||
let dir = DataPaths::new(".")?;
|
||||
let gleif_cache_dir = dir.cache_gleif_dir();
|
||||
let map_cache_dir = dir.cache_gleif_openfigi_map_dir();
|
||||
|
||||
// Determine the GLEIF date to use
|
||||
let date = if let Some(d) = gleif_date {
|
||||
d.to_string()
|
||||
} else {
|
||||
// Find the most recent GLEIF file
|
||||
match find_most_recent_gleif_date(gleif_cache_dir).await? {
|
||||
Some(d) => d,
|
||||
None => return Err(anyhow!("No GLEIF CSV file found in cache/gleif directory")),
|
||||
}
|
||||
};
|
||||
|
||||
// Create date-based subdirectory in the mapping cache
|
||||
let date_dir = map_cache_dir.join(&date);
|
||||
tokio_fs::create_dir_all(&date_dir).await.context("Failed to create date directory")?;
|
||||
|
||||
let path = date_dir.join("lei_to_figi.jsonl");
|
||||
println!("Using LEI→FIGI mapping at: {}", path.display());
|
||||
let mut lei_to_figis: HashMap<String, Vec<FigiInfo>> = load_lei_to_figi_jsonl(&path).await?;
|
||||
|
||||
let client = OpenFigiClient::new()?;
|
||||
if !client.has_key {
|
||||
@@ -258,7 +339,7 @@ pub async fn build_lei_to_figi_infos(lei_to_isins: &HashMap<String, Vec<String>>
|
||||
}
|
||||
|
||||
// Append to .jsonl incrementally
|
||||
append_lei_to_figi_jsonl(&path, &lei, &figis).context("Failed to append to JSONL")?;
|
||||
append_lei_to_figi_jsonl(&path, &lei, &figis).await.context("Failed to append to JSONL")?;
|
||||
|
||||
// Insert into in-memory map
|
||||
lei_to_figis.insert(lei.clone(), figis);
|
||||
@@ -290,18 +371,16 @@ pub async fn build_lei_to_figi_infos(lei_to_isins: &HashMap<String, Vec<String>>
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if the file cannot be opened or if any line fails to parse as JSON.
|
||||
fn load_lei_to_figi_jsonl(path: &Path) -> anyhow::Result<HashMap<String, Vec<FigiInfo>>> {
|
||||
async fn load_lei_to_figi_jsonl(path: &Path) -> anyhow::Result<HashMap<String, Vec<FigiInfo>>> {
|
||||
let mut map = HashMap::new();
|
||||
|
||||
if !path.exists() {
|
||||
return Ok(map);
|
||||
}
|
||||
|
||||
let file = File::open(path).context("Failed to open JSONL file for reading")?;
|
||||
let reader = BufReader::new(file);
|
||||
let content = tokio_fs::read_to_string(path).await.context("Failed to read JSONL file")?;
|
||||
|
||||
for (line_num, line) in reader.lines().enumerate() {
|
||||
let line = line.context(format!("Failed to read line {}", line_num + 1))?;
|
||||
for (line_num, line) in content.lines().enumerate() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
@@ -328,20 +407,24 @@ fn load_lei_to_figi_jsonl(path: &Path) -> anyhow::Result<HashMap<String, Vec<Fig
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if the file cannot be opened for append or if serialization fails.
|
||||
fn append_lei_to_figi_jsonl(path: &Path, lei: &str, figis: &[FigiInfo]) -> anyhow::Result<()> {
|
||||
let mut file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(path)
|
||||
.context("Failed to open JSONL file for append")?;
|
||||
|
||||
async fn append_lei_to_figi_jsonl(path: &Path, lei: &str, figis: &[FigiInfo]) -> anyhow::Result<()> {
|
||||
let entry = json!({
|
||||
"lei": lei,
|
||||
"figis": figis,
|
||||
});
|
||||
|
||||
let line = serde_json::to_string(&entry).context("Failed to serialize entry")? + "\n";
|
||||
file.write_all(line.as_bytes()).context("Failed to write to JSONL file")?;
|
||||
|
||||
let mut file = tokio_fs::OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(path)
|
||||
.await
|
||||
.context("Failed to open JSONL file for append")?;
|
||||
|
||||
file.write_all(line.as_bytes())
|
||||
.await
|
||||
.context("Failed to write to JSONL file")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -373,19 +456,22 @@ pub async fn load_or_build_all_securities(
|
||||
HashMap<String, HashMap<String, OptionInfo>>
|
||||
)> {
|
||||
// Load existing data
|
||||
let mut companies = load_from_cache("data/corporate/by_name/common_stocks.json").await?
|
||||
let mut commons = load_from_cache("data/corporate/by_name/common_stocks.json").await?
|
||||
.unwrap_or_else(HashMap::new);
|
||||
let mut warrants = load_from_cache("data/corporate/by_name/warrants.json").await?
|
||||
.unwrap_or_else(HashMap::new);
|
||||
let mut options = load_from_cache("data/corporate/by_name/options.json").await?
|
||||
.unwrap_or_else(HashMap::new);
|
||||
/*let mut preferred = load_from_cache("data/corporate/by_name/preferred.json").await?
|
||||
.unwrap_or_else(HashMap::new);*/
|
||||
|
||||
|
||||
println!("Loaded existing data:");
|
||||
println!(" - Companies: {}", companies.len());
|
||||
println!(" - Companies: {}", commons.len());
|
||||
println!(" - Warrants: {}", warrants.len());
|
||||
println!(" - Options: {}", options.len());
|
||||
|
||||
let mut stats = ProcessingStats::new(companies.len(), warrants.len(), options.len());
|
||||
let mut stats = ProcessingStats::new(commons.len(), warrants.len(), options.len());
|
||||
|
||||
println!("Processing {} LEI entries from FIGI data...", figi_to_lei.len());
|
||||
|
||||
@@ -410,7 +496,7 @@ pub async fn load_or_build_all_securities(
|
||||
|
||||
// Process common stocks -> companies
|
||||
if !common_stocks.is_empty() {
|
||||
process_common_stocks(&mut companies, &common_stocks, &mut stats);
|
||||
process_common_stocks(&mut commons, &common_stocks, &mut stats);
|
||||
}
|
||||
|
||||
// Process warrants
|
||||
@@ -424,14 +510,14 @@ pub async fn load_or_build_all_securities(
|
||||
}
|
||||
}
|
||||
|
||||
stats.print_summary(companies.len(), warrants.len(), options.len());
|
||||
stats.print_summary(commons.len(), warrants.len(), options.len());
|
||||
|
||||
// Save all three HashMaps
|
||||
save_to_cache("data/corporate/by_name/common_stocks.json", &companies).await?;
|
||||
save_to_cache("data/corporate/by_name/common_stocks.json", &commons).await?;
|
||||
save_to_cache("data/corporate/by_name/warrants.json", &warrants).await?;
|
||||
save_to_cache("data/corporate/by_name/options.json", &options).await?;
|
||||
|
||||
Ok((companies, warrants, options))
|
||||
Ok((commons, warrants, options))
|
||||
}
|
||||
|
||||
/// Statistics tracker for processing
|
||||
@@ -795,7 +881,8 @@ pub async fn load_figi_type_lists() -> anyhow::Result<()> {
|
||||
let client = OpenFigiClient::new()?;
|
||||
|
||||
// Create cache directory
|
||||
let cache_dir = Path::new("data/openfigi");
|
||||
let dir = DataPaths::new(".")?;
|
||||
let cache_dir = dir.cache_openfigi_dir();
|
||||
tokio_fs::create_dir_all(cache_dir).await
|
||||
.context("Failed to create data/openfigi directory")?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user