143 lines
4.2 KiB
Rust
143 lines
4.2 KiB
Rust
// src/corporate/storage.rs
|
|
use super::{types::*, helpers::*};
|
|
use crate::util::directories::DataPaths;
|
|
use crate::util::logger;
|
|
|
|
use tokio::fs;
|
|
use tokio::io::AsyncWriteExt;
|
|
use std::collections::HashMap;
|
|
use std::path::{PathBuf, Path};
|
|
|
|
|
|
/// Lightweight index entry - only metadata, no full event data
|
|
#[derive(Debug, Clone)]
|
|
pub struct EventIndex {
|
|
pub key: String,
|
|
pub ticker: String,
|
|
pub date: String,
|
|
pub file_path: PathBuf,
|
|
}
|
|
|
|
/// Build index of all events without loading them into memory
|
|
pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventIndex>> {
|
|
let dir = paths.corporate_events_dir();
|
|
if !dir.exists() {
|
|
logger::log_info("Corporate Storage: No events directory found").await;
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let mut index = Vec::new();
|
|
let mut entries = fs::read_dir(dir).await?;
|
|
|
|
while let Some(entry) = entries.next_entry().await? {
|
|
let path = entry.path();
|
|
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
|
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
|
if name.starts_with("events_") && name.len() == 17 {
|
|
let content = fs::read_to_string(&path).await?;
|
|
let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
|
|
|
|
for event in events {
|
|
index.push(EventIndex {
|
|
key: event_key(&event),
|
|
ticker: event.ticker.clone(),
|
|
date: event.date.clone(),
|
|
file_path: path.clone(),
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
logger::log_info(&format!("Corporate Storage: Built index with {} entries", index.len())).await;
|
|
Ok(index)
|
|
}
|
|
|
|
pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf {
|
|
paths.corporate_prices_dir().join(lei)
|
|
}
|
|
|
|
pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> {
|
|
let base = get_company_dir(paths, isin);
|
|
let paths_to_create = [
|
|
base.clone(),
|
|
base.join("5min"),
|
|
base.join("daily"),
|
|
base.join("aggregated").join("5min"),
|
|
base.join("aggregated").join("daily"),
|
|
];
|
|
for p in paths_to_create {
|
|
fs::create_dir_all(&p).await?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Stream companies to JSONL incrementally
|
|
pub async fn save_companies_to_jsonl_streaming(
|
|
paths: &DataPaths,
|
|
companies_iter: impl Iterator<Item = (String, HashMap<String, String>)>,
|
|
) -> anyhow::Result<usize> {
|
|
let file_path = paths.data_dir().join("companies.jsonl");
|
|
|
|
if let Some(parent) = file_path.parent() {
|
|
tokio::fs::create_dir_all(parent).await?;
|
|
}
|
|
|
|
let mut file = tokio::fs::File::create(&file_path).await?;
|
|
let mut count = 0;
|
|
|
|
for (name, securities) in companies_iter {
|
|
let line = serde_json::json!({
|
|
"name": name,
|
|
"securities": securities
|
|
});
|
|
|
|
file.write_all(line.to_string().as_bytes()).await?;
|
|
file.write_all(b"\n").await?;
|
|
count += 1;
|
|
|
|
if count % 100 == 0 {
|
|
tokio::task::yield_now().await;
|
|
}
|
|
}
|
|
|
|
logger::log_info(&format!("Saved {} companies to JSONL", count)).await;
|
|
Ok(count)
|
|
}
|
|
|
|
/// Stream read companies from JSONL
|
|
pub async fn stream_companies_from_jsonl<F>(
|
|
path: &Path,
|
|
mut callback: F
|
|
) -> anyhow::Result<usize>
|
|
where
|
|
F: FnMut(String, HashMap<String, String>) -> anyhow::Result<()>,
|
|
{
|
|
if !path.exists() {
|
|
return Ok(0);
|
|
}
|
|
|
|
let content = tokio::fs::read_to_string(path).await?;
|
|
let mut count = 0;
|
|
|
|
for line in content.lines() {
|
|
if line.trim().is_empty() {
|
|
continue;
|
|
}
|
|
|
|
let entry: serde_json::Value = serde_json::from_str(line)?;
|
|
let name = entry["name"].as_str().unwrap_or("").to_string();
|
|
let securities: HashMap<String, String> = serde_json::from_value(
|
|
entry["securities"].clone()
|
|
)?;
|
|
|
|
callback(name, securities)?;
|
|
count += 1;
|
|
|
|
if count % 100 == 0 {
|
|
tokio::task::yield_now().await;
|
|
}
|
|
}
|
|
|
|
Ok(count)
|
|
} |