added companie mapping with yahoo tickers
This commit is contained in:
@@ -6,49 +6,12 @@ use crate::util::logger;
|
||||
use tokio::fs;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use chrono::{Datelike, NaiveDate};
|
||||
use std::collections::{HashMap};
|
||||
use std::collections::HashMap;
|
||||
use std::path::{PathBuf, Path};
|
||||
|
||||
const BATCH_SIZE: usize = 500; // Process 500 events at a time
|
||||
const BATCH_SIZE: usize = 500;
|
||||
|
||||
/// Load events in streaming fashion to avoid memory buildup
|
||||
pub async fn load_existing_events_streaming(
|
||||
paths: &DataPaths,
|
||||
callback: impl Fn(CompanyEvent) -> anyhow::Result<()>
|
||||
) -> anyhow::Result<usize> {
|
||||
let dir = paths.corporate_events_dir();
|
||||
if !dir.exists() {
|
||||
logger::log_info("Corporate Storage: No existing events directory found").await;
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut total = 0;
|
||||
let mut entries = fs::read_dir(dir).await?;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||
if name.starts_with("events_") && name.len() == 17 {
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
|
||||
|
||||
for event in events {
|
||||
callback(event)?;
|
||||
total += 1;
|
||||
}
|
||||
|
||||
// Yield to prevent blocking
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger::log_info(&format!("Corporate Storage: Streamed {} events", total)).await;
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
/// Build lightweight index of events instead of loading everything
|
||||
/// Lightweight index entry - only metadata, no full event data
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EventIndex {
|
||||
pub key: String,
|
||||
@@ -57,9 +20,11 @@ pub struct EventIndex {
|
||||
pub file_path: PathBuf,
|
||||
}
|
||||
|
||||
/// Build index of all events without loading them into memory
|
||||
pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventIndex>> {
|
||||
let dir = paths.corporate_events_dir();
|
||||
if !dir.exists() {
|
||||
logger::log_info("Corporate Storage: No events directory found").await;
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
@@ -90,7 +55,7 @@ pub async fn build_event_index(paths: &DataPaths) -> anyhow::Result<Vec<EventInd
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
/// Lookup specific event by loading only its file
|
||||
/// Load specific event by key (only loads its file)
|
||||
pub async fn lookup_event_by_key(
|
||||
key: &str,
|
||||
index: &[EventIndex]
|
||||
@@ -106,9 +71,48 @@ pub async fn lookup_event_by_key(
|
||||
}
|
||||
}
|
||||
|
||||
/// Stream events file by file with callback
|
||||
pub async fn stream_events_with_callback<F>(
|
||||
paths: &DataPaths,
|
||||
mut callback: F
|
||||
) -> anyhow::Result<usize>
|
||||
where
|
||||
F: FnMut(CompanyEvent) -> anyhow::Result<()>,
|
||||
{
|
||||
let dir = paths.corporate_events_dir();
|
||||
if !dir.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut total = 0;
|
||||
let mut entries = fs::read_dir(dir).await?;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||
if name.starts_with("events_") {
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
|
||||
|
||||
for event in events {
|
||||
callback(event)?;
|
||||
total += 1;
|
||||
}
|
||||
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger::log_info(&format!("Corporate Storage: Streamed {} events", total)).await;
|
||||
Ok(total)
|
||||
}
|
||||
|
||||
/// Save events organized by month (accepts Vec, not HashMap)
|
||||
pub async fn save_optimized_events(
|
||||
paths: &DataPaths,
|
||||
events: Vec<CompanyEvent> // Changed from HashMap to Vec
|
||||
events: Vec<CompanyEvent>
|
||||
) -> anyhow::Result<()> {
|
||||
let dir = paths.corporate_events_dir();
|
||||
fs::create_dir_all(dir).await?;
|
||||
@@ -124,16 +128,14 @@ pub async fn save_optimized_events(
|
||||
removed_count += 1;
|
||||
}
|
||||
}
|
||||
logger::log_info(&format!("Corporate Storage: Removed {} old event files", removed_count)).await;
|
||||
logger::log_info(&format!("Corporate Storage: Removed {} old files", removed_count)).await;
|
||||
|
||||
let total_events = events.len();
|
||||
let mut sorted = events;
|
||||
sorted.sort_by(|a, b| {
|
||||
a.ticker.cmp(&b.ticker)
|
||||
.then(a.date.cmp(&b.date))
|
||||
a.ticker.cmp(&b.ticker).then(a.date.cmp(&b.date))
|
||||
});
|
||||
|
||||
// Process in batches to avoid memory buildup
|
||||
let mut by_month: HashMap<String, Vec<CompanyEvent>> = HashMap::new();
|
||||
|
||||
for chunk in sorted.chunks(BATCH_SIZE) {
|
||||
@@ -146,27 +148,28 @@ pub async fn save_optimized_events(
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
|
||||
let total_months = by_month.len();
|
||||
for (month, list) in by_month {
|
||||
let path = dir.join(format!("events_{}.json", month));
|
||||
fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
|
||||
logger::log_info(&format!("Corporate Storage: Saved {} events for month {}", list.len(), month)).await;
|
||||
logger::log_info(&format!("Saved {} events for month {}", list.len(), month)).await;
|
||||
}
|
||||
|
||||
logger::log_info(&format!("Corporate Storage: Saved {} total events in {} month files", total_events, total_months)).await;
|
||||
logger::log_info(&format!("Saved {} total events", total_events)).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) -> anyhow::Result<()> {
|
||||
pub async fn save_changes(
|
||||
paths: &DataPaths,
|
||||
changes: &[CompanyEventChange]
|
||||
) -> anyhow::Result<()> {
|
||||
if changes.is_empty() {
|
||||
logger::log_info("Corporate Storage: No changes to save").await;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let dir = paths.corporate_changes_dir();
|
||||
fs::create_dir_all(dir).await?;
|
||||
|
||||
logger::log_info(&format!("Corporate Storage: Saving {} changes", changes.len())).await;
|
||||
|
||||
let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
|
||||
for c in changes {
|
||||
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
||||
@@ -180,12 +183,13 @@ pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) ->
|
||||
let mut all = if path.exists() {
|
||||
let s = fs::read_to_string(&path).await?;
|
||||
serde_json::from_str(&s).unwrap_or_default()
|
||||
} else { vec![] };
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
all.extend(list.clone());
|
||||
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
||||
logger::log_info(&format!("Corporate Storage: Saved {} changes for month {}", list.len(), month)).await;
|
||||
}
|
||||
logger::log_info("Corporate Storage: All changes saved successfully").await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -203,9 +207,7 @@ pub async fn save_prices_for_ticker(
|
||||
let path = timeframe_dir.join("prices.json");
|
||||
|
||||
prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
|
||||
|
||||
let json = serde_json::to_string_pretty(&prices)?;
|
||||
fs::write(&path, json).await?;
|
||||
fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -240,7 +242,10 @@ pub async fn save_available_exchanges(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn load_available_exchanges(paths: &DataPaths, lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
|
||||
pub async fn load_available_exchanges(
|
||||
paths: &DataPaths,
|
||||
lei: &str
|
||||
) -> anyhow::Result<Vec<AvailableExchange>> {
|
||||
let path = get_company_dir(paths, lei).join("available_exchanges.json");
|
||||
if path.exists() {
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
@@ -267,15 +272,13 @@ pub async fn save_prices_by_source(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Saves companies data to a JSONL file in streaming fashion
|
||||
/// Stream companies to JSONL incrementally
|
||||
pub async fn save_companies_to_jsonl_streaming(
|
||||
paths: &DataPaths,
|
||||
companies: &HashMap<String, HashMap<String, String>>,
|
||||
) -> anyhow::Result<()> {
|
||||
companies_iter: impl Iterator<Item = (String, HashMap<String, String>)>,
|
||||
) -> anyhow::Result<usize> {
|
||||
let file_path = paths.data_dir().join("companies.jsonl");
|
||||
|
||||
logger::log_info(&format!("Corporate Storage: Saving {} companies to JSONL", companies.len())).await;
|
||||
|
||||
if let Some(parent) = file_path.parent() {
|
||||
tokio::fs::create_dir_all(parent).await?;
|
||||
}
|
||||
@@ -283,32 +286,33 @@ pub async fn save_companies_to_jsonl_streaming(
|
||||
let mut file = tokio::fs::File::create(&file_path).await?;
|
||||
let mut count = 0;
|
||||
|
||||
// Process in batches
|
||||
for (name, securities) in companies.iter() {
|
||||
for (name, securities) in companies_iter {
|
||||
let line = serde_json::json!({
|
||||
"name": name,
|
||||
"securities": securities
|
||||
});
|
||||
|
||||
file.write_all(line.to_string().as_bytes()).await?;
|
||||
file.write_all(b"\n").await?;
|
||||
|
||||
count += 1;
|
||||
|
||||
if count % 100 == 0 {
|
||||
tokio::task::yield_now().await;
|
||||
}
|
||||
}
|
||||
|
||||
let msg = format!("✓ Saved {} companies to {:?}", companies.len(), file_path);
|
||||
println!("{}", msg);
|
||||
logger::log_info(&msg).await;
|
||||
Ok(())
|
||||
logger::log_info(&format!("Saved {} companies to JSONL", count)).await;
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
/// Load companies from JSONL in streaming fashion
|
||||
pub async fn load_companies_from_jsonl_streaming(
|
||||
/// Stream read companies from JSONL
|
||||
pub async fn stream_companies_from_jsonl<F>(
|
||||
path: &Path,
|
||||
callback: impl Fn(String, HashMap<String, String>) -> anyhow::Result<()>
|
||||
) -> anyhow::Result<usize> {
|
||||
mut callback: F
|
||||
) -> anyhow::Result<usize>
|
||||
where
|
||||
F: FnMut(String, HashMap<String, String>) -> anyhow::Result<()>,
|
||||
{
|
||||
if !path.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user