added logging
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -671,6 +671,7 @@ dependencies = [
|
|||||||
"fantoccini",
|
"fantoccini",
|
||||||
"flate2",
|
"flate2",
|
||||||
"futures",
|
"futures",
|
||||||
|
"once_cell",
|
||||||
"rand 0.9.2",
|
"rand 0.9.2",
|
||||||
"rayon",
|
"rayon",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ anyhow = "1.0"
|
|||||||
# Logging (optional but recommended)
|
# Logging (optional but recommended)
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
|
tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
|
||||||
|
once_cell = "1.21.3"
|
||||||
|
|
||||||
# Parallel processing (for batch tickers)
|
# Parallel processing (for batch tickers)
|
||||||
futures = "0.3"
|
futures = "0.3"
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
// src/corporate/aggregation.rs
|
// src/corporate/aggregation.rs
|
||||||
use super::types::CompanyPrice;
|
use super::types::CompanyPrice;
|
||||||
use super::storage::*;
|
use super::storage::*;
|
||||||
|
use crate::util::directories::DataPaths;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
@@ -16,8 +17,8 @@ struct DayData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Aggregate price data from multiple exchanges, converting all to USD
|
/// Aggregate price data from multiple exchanges, converting all to USD
|
||||||
pub async fn aggregate_best_price_data(lei: &str) -> anyhow::Result<()> {
|
pub async fn aggregate_best_price_data(paths: &DataPaths, lei: &str) -> anyhow::Result<()> {
|
||||||
let company_dir = get_company_dir(lei);
|
let company_dir = get_company_dir(paths, lei);
|
||||||
|
|
||||||
for timeframe in ["daily", "5min"].iter() {
|
for timeframe in ["daily", "5min"].iter() {
|
||||||
let source_dir = company_dir.join(timeframe);
|
let source_dir = company_dir.join(timeframe);
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ use anyhow::{Context, anyhow};
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct OpenFigiClient {
|
pub struct OpenFigiClient {
|
||||||
client: HttpClient,
|
client: HttpClient,
|
||||||
api_key: Option<String>,
|
|
||||||
has_key: bool,
|
has_key: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,7 +47,7 @@ impl OpenFigiClient {
|
|||||||
if has_key { "with API key" } else { "no key (limited mode)" }
|
if has_key { "with API key" } else { "no key (limited mode)" }
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(Self { client, api_key, has_key })
|
Ok(Self { client, has_key })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Maps a batch of ISINs to FigiInfo structs, filtering for equities only.
|
/// Maps a batch of ISINs to FigiInfo structs, filtering for equities only.
|
||||||
@@ -159,8 +158,7 @@ impl OpenFigiClient {
|
|||||||
figi,
|
figi,
|
||||||
name: item["name"].as_str().unwrap_or("").to_string(),
|
name: item["name"].as_str().unwrap_or("").to_string(),
|
||||||
ticker: item["ticker"].as_str().unwrap_or("").to_string(),
|
ticker: item["ticker"].as_str().unwrap_or("").to_string(),
|
||||||
mic_code: item["exchCode"].as_str().unwrap_or("").to_string(),
|
exch_code: item["micCode"].as_str().unwrap_or("").to_string(),
|
||||||
currency: item["currency"].as_str().unwrap_or("").to_string(),
|
|
||||||
compositeFIGI: item["compositeFIGI"].as_str().unwrap_or("").to_string(),
|
compositeFIGI: item["compositeFIGI"].as_str().unwrap_or("").to_string(),
|
||||||
securityType: sec_type.to_string(),
|
securityType: sec_type.to_string(),
|
||||||
marketSector: market_sec.to_string(),
|
marketSector: market_sec.to_string(),
|
||||||
@@ -195,16 +193,6 @@ impl OpenFigiClient {
|
|||||||
|
|
||||||
Ok(all_figi_infos)
|
Ok(all_figi_infos)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks if the client has an API key configured.
|
|
||||||
pub fn has_key(&self) -> bool {
|
|
||||||
self.has_key
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a reference to the underlying HTTP client.
|
|
||||||
pub fn get_figi_client(&self) -> &HttpClient {
|
|
||||||
&self.client
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Builds a LEI-to-FigiInfo map from the LEI-ISIN mapping, filtering for equities via OpenFIGI.
|
/// Builds a LEI-to-FigiInfo map from the LEI-ISIN mapping, filtering for equities via OpenFIGI.
|
||||||
@@ -401,7 +389,7 @@ pub async fn load_or_build_all_securities(
|
|||||||
|
|
||||||
println!("Processing {} LEI entries from FIGI data...", figi_to_lei.len());
|
println!("Processing {} LEI entries from FIGI data...", figi_to_lei.len());
|
||||||
|
|
||||||
for (lei, figi_infos) in figi_to_lei.iter() {
|
for (_lei, figi_infos) in figi_to_lei.iter() {
|
||||||
if figi_infos.is_empty() {
|
if figi_infos.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
// src/corporate/scraper.rs
|
// src/corporate/scraper.rs
|
||||||
use super::{types::*, helpers::*, openfigi::*};
|
use super::{types::*, helpers::*, openfigi::*};
|
||||||
//use crate::corporate::openfigi::OpenFigiClient;
|
//use crate::corporate::openfigi::OpenFigiClient;
|
||||||
use crate::{scraper::webdriver::*};
|
use crate::{webdriver::webdriver::*};
|
||||||
use fantoccini::{Client, Locator};
|
use fantoccini::{Client, Locator};
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use chrono::{DateTime, Duration, NaiveDate, Utc};
|
use chrono::{DateTime, Duration, NaiveDate, Utc};
|
||||||
@@ -15,160 +15,6 @@ use anyhow::{anyhow, Result};
|
|||||||
|
|
||||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
||||||
|
|
||||||
/// Discover all exchanges where this ISIN trades by querying Yahoo Finance and enriching with OpenFIGI API calls.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
/// * `isin` - The ISIN to search for.
|
|
||||||
/// * `known_ticker` - A known ticker symbol for fallback or initial check.
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
/// A vector of FigiInfo structs containing enriched data from API calls.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
/// Returns an error if HTTP requests fail, JSON parsing fails, or OpenFIGI API responds with an error.
|
|
||||||
pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> anyhow::Result<Vec<FigiInfo>> {
|
|
||||||
println!(" Discovering exchanges for ISIN {}", isin);
|
|
||||||
|
|
||||||
let mut potential: Vec<(String, PrimaryInfo)> = Vec::new();
|
|
||||||
|
|
||||||
// Try the primary ticker first
|
|
||||||
if let Ok(info) = check_ticker_exists(known_ticker).await {
|
|
||||||
potential.push((known_ticker.to_string(), info));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Search for ISIN directly on Yahoo to find other listings
|
|
||||||
let search_url = format!(
|
|
||||||
"https://query2.finance.yahoo.com/v1/finance/search?q={}"esCount=20&newsCount=0",
|
|
||||||
isin
|
|
||||||
);
|
|
||||||
|
|
||||||
let resp = HttpClient::new()
|
|
||||||
.get(&search_url)
|
|
||||||
.header("User-Agent", USER_AGENT)
|
|
||||||
.send()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
let json = resp.json::<Value>().await?;
|
|
||||||
|
|
||||||
if let Some(quotes) = json["quotes"].as_array() {
|
|
||||||
for quote in quotes {
|
|
||||||
// First: filter by quoteType directly from search results (faster rejection)
|
|
||||||
let quote_type = quote["quoteType"].as_str().unwrap_or("");
|
|
||||||
if quote_type.to_uppercase() != "EQUITY" {
|
|
||||||
continue; // Skip bonds, ETFs, mutual funds, options, etc.
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(symbol) = quote["symbol"].as_str() {
|
|
||||||
// Avoid duplicates
|
|
||||||
if potential.iter().any(|(s, _)| s == symbol) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Double-check with full quote data (some search results are misleading)
|
|
||||||
if let Ok(info) = check_ticker_exists(symbol).await {
|
|
||||||
potential.push((symbol.to_string(), info));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if potential.is_empty() {
|
|
||||||
return Ok(vec![]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enrich with OpenFIGI API
|
|
||||||
let client = OpenFigiClient::new()?;
|
|
||||||
|
|
||||||
let mut discovered_figis = Vec::new();
|
|
||||||
|
|
||||||
if !client.has_key() {
|
|
||||||
// Fallback without API key - create FigiInfo with default/empty fields
|
|
||||||
for (symbol, info) in potential {
|
|
||||||
println!(" Found equity listing: {} on {} ({}) - no FIGI (fallback mode)", symbol, info.exchange_mic, info.currency);
|
|
||||||
let figi_info = FigiInfo {
|
|
||||||
isin: info.isin,
|
|
||||||
figi: String::new(),
|
|
||||||
name: info.name,
|
|
||||||
ticker: symbol,
|
|
||||||
mic_code: info.exchange_mic,
|
|
||||||
currency: info.currency,
|
|
||||||
compositeFIGI: String::new(),
|
|
||||||
securityType: String::new(),
|
|
||||||
marketSector: String::new(),
|
|
||||||
shareClassFIGI: String::new(),
|
|
||||||
securityType2: String::new(),
|
|
||||||
securityDescription: String::new(),
|
|
||||||
};
|
|
||||||
discovered_figis.push(figi_info);
|
|
||||||
}
|
|
||||||
return Ok(discovered_figis);
|
|
||||||
}
|
|
||||||
|
|
||||||
// With API key, batch the mapping requests
|
|
||||||
let chunk_size = 100;
|
|
||||||
for chunk in potential.chunks(chunk_size) {
|
|
||||||
let mut jobs = vec![];
|
|
||||||
for (symbol, info) in chunk {
|
|
||||||
jobs.push(json!({
|
|
||||||
"idType": "TICKER",
|
|
||||||
"idValue": symbol,
|
|
||||||
"micCode": info.exchange_mic,
|
|
||||||
"marketSecDes": "Equity",
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
let resp = client.get_figi_client()
|
|
||||||
.post("https://api.openfigi.com/v3/mapping")
|
|
||||||
.header("Content-Type", "application/json")
|
|
||||||
.json(&jobs)
|
|
||||||
.send()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
if !resp.status().is_success() {
|
|
||||||
return Err(anyhow::anyhow!("OpenFIGI mapping failed with status: {}", resp.status()));
|
|
||||||
}
|
|
||||||
|
|
||||||
let parsed: Vec<Value> = resp.json().await?;
|
|
||||||
|
|
||||||
for (i, item) in parsed.iter().enumerate() {
|
|
||||||
let (symbol, info) = &chunk[i];
|
|
||||||
if let Some(data) = item["data"].as_array() {
|
|
||||||
if let Some(entry) = data.first() {
|
|
||||||
let market_sec = entry["marketSector"].as_str().unwrap_or("");
|
|
||||||
if market_sec != "Equity" {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
println!(" Found equity listing: {} on {} ({}) - FIGI: {}", symbol, info.exchange_mic, info.currency, entry["figi"]);
|
|
||||||
let figi_info = FigiInfo {
|
|
||||||
isin: info.isin.clone(),
|
|
||||||
figi: entry["figi"].as_str().unwrap_or("").to_string(),
|
|
||||||
name: entry["name"].as_str().unwrap_or(&info.name).to_string(),
|
|
||||||
ticker: symbol.clone(),
|
|
||||||
mic_code: info.exchange_mic.clone(),
|
|
||||||
currency: info.currency.clone(),
|
|
||||||
compositeFIGI: entry["compositeFIGI"].as_str().unwrap_or("").to_string(),
|
|
||||||
securityType: entry["securityType"].as_str().unwrap_or("").to_string(),
|
|
||||||
marketSector: market_sec.to_string(),
|
|
||||||
shareClassFIGI: entry["shareClassFIGI"].as_str().unwrap_or("").to_string(),
|
|
||||||
securityType2: entry["securityType2"].as_str().unwrap_or("").to_string(),
|
|
||||||
securityDescription: entry["securityDescription"].as_str().unwrap_or("").to_string(),
|
|
||||||
};
|
|
||||||
discovered_figis.push(figi_info);
|
|
||||||
} else {
|
|
||||||
println!(" No data returned for ticker {} on MIC {}", symbol, info.exchange_mic);
|
|
||||||
}
|
|
||||||
} else if let Some(error) = item["error"].as_str() {
|
|
||||||
println!(" OpenFIGI error for ticker {}: {}", symbol, error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Respect rate limit (6 seconds between requests with key)
|
|
||||||
sleep(TokioDuration::from_secs(6)).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(discovered_figis)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Check if a ticker exists on Yahoo Finance and return core metadata.
|
/// Check if a ticker exists on Yahoo Finance and return core metadata.
|
||||||
///
|
///
|
||||||
/// This function calls the public Yahoo Finance quoteSummary endpoint and extracts:
|
/// This function calls the public Yahoo Finance quoteSummary endpoint and extracts:
|
||||||
@@ -305,33 +151,6 @@ pub async fn check_ticker_exists(ticker: &str) -> anyhow::Result<PrimaryInfo> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert Yahoo's exchange name to MIC code (best effort)
|
|
||||||
fn exchange_name_to_mic(name: &str) -> String {
|
|
||||||
match name {
|
|
||||||
"NMS" | "NasdaqGS" | "NASDAQ" => "XNAS",
|
|
||||||
"NYQ" | "NYSE" => "XNYS",
|
|
||||||
"LSE" | "London" => "XLON",
|
|
||||||
"FRA" | "Frankfurt" | "GER" | "XETRA" => "XFRA",
|
|
||||||
"PAR" | "Paris" => "XPAR",
|
|
||||||
"AMS" | "Amsterdam" => "XAMS",
|
|
||||||
"MIL" | "Milan" => "XMIL",
|
|
||||||
"JPX" | "Tokyo" => "XJPX",
|
|
||||||
"HKG" | "Hong Kong" => "XHKG",
|
|
||||||
"SHH" | "Shanghai" => "XSHG",
|
|
||||||
"SHZ" | "Shenzhen" => "XSHE",
|
|
||||||
"TOR" | "Toronto" => "XTSE",
|
|
||||||
"ASX" | "Australia" => "XASX",
|
|
||||||
"SAU" | "Saudi" => "XSAU",
|
|
||||||
"SWX" | "Switzerland" => "XSWX",
|
|
||||||
"BSE" | "Bombay" => "XBSE",
|
|
||||||
"NSE" | "NSI" => "XNSE",
|
|
||||||
"TAI" | "Taiwan" => "XTAI",
|
|
||||||
"SAO" | "Sao Paulo" => "BVMF",
|
|
||||||
"MCE" | "Madrid" => "XMAD",
|
|
||||||
_ => name, // Fallback to name itself
|
|
||||||
}.to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
|
/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
|
||||||
///
|
///
|
||||||
/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
|
/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
|
||||||
@@ -670,29 +489,27 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow
|
|||||||
|
|
||||||
pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||||
let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
|
let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
|
||||||
let zip_path = "data/gleif/isin_lei.zip";
|
|
||||||
let csv_path = "data/gleif/isin_lei.csv";
|
|
||||||
|
|
||||||
if let Err(e) = std::fs::create_dir_all("data") {
|
if let Err(e) = std::fs::create_dir_all("data/gleif") {
|
||||||
println!("Failed to create data directory: {e}");
|
println!("Failed to create data directory: {e}");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download ZIP
|
// Download ZIP and get the filename from Content-Disposition header
|
||||||
let bytes = match reqwest::Client::builder()
|
let client = match reqwest::Client::builder()
|
||||||
.user_agent(USER_AGENT)
|
.user_agent(USER_AGENT)
|
||||||
.timeout(std::time::Duration::from_secs(30))
|
.timeout(std::time::Duration::from_secs(30))
|
||||||
.build()
|
.build()
|
||||||
.and_then(|c| Ok(c))
|
|
||||||
{
|
{
|
||||||
Ok(client) => match client.get(url).send().await {
|
Ok(c) => c,
|
||||||
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
|
|
||||||
Ok(b) => b,
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Failed to read ZIP bytes: {e}");
|
println!("Failed to create HTTP client: {e}");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
},
|
};
|
||||||
|
|
||||||
|
let resp = match client.get(url).send().await {
|
||||||
|
Ok(r) if r.status().is_success() => r,
|
||||||
Ok(resp) => {
|
Ok(resp) => {
|
||||||
println!("Server returned HTTP {}", resp.status());
|
println!("Server returned HTTP {}", resp.status());
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
@@ -701,20 +518,34 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
|||||||
println!("Failed to download ISIN/LEI ZIP: {e}");
|
println!("Failed to download ISIN/LEI ZIP: {e}");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
},
|
};
|
||||||
|
|
||||||
|
// Extract filename from Content-Disposition header or use default
|
||||||
|
let filename = resp
|
||||||
|
.headers()
|
||||||
|
.get("content-disposition")
|
||||||
|
.and_then(|h| h.to_str().ok())
|
||||||
|
.and_then(|s| s.split("filename=").nth(1).map(|f| f.trim_matches('"').to_string()))
|
||||||
|
.unwrap_or_else(|| "isin_lei.zip".to_string());
|
||||||
|
|
||||||
|
let bytes = match resp.bytes().await {
|
||||||
|
Ok(b) => b,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("Failed to create HTTP client: {e}");
|
println!("Failed to read ZIP bytes: {e}");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Err(e) = tokio::fs::write(zip_path, &bytes).await {
|
let zip_path = format!("data/gleif/{}", filename);
|
||||||
|
let csv_path = format!("data/gleif/{}", filename.replace(".zip", ".csv"));
|
||||||
|
|
||||||
|
if let Err(e) = tokio::fs::write(&zip_path, &bytes).await {
|
||||||
println!("Failed to write ZIP file: {e}");
|
println!("Failed to write ZIP file: {e}");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract CSV
|
// Extract CSV
|
||||||
let archive = match std::fs::File::open(zip_path)
|
let archive = match std::fs::File::open(&zip_path)
|
||||||
.map(ZipArchive::new)
|
.map(ZipArchive::new)
|
||||||
{
|
{
|
||||||
Ok(Ok(a)) => a,
|
Ok(Ok(a)) => a,
|
||||||
@@ -756,12 +587,12 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
|||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Err(e) = tokio::fs::write(csv_path, &csv_bytes).await {
|
if let Err(e) = tokio::fs::write(&csv_path, &csv_bytes).await {
|
||||||
println!("Failed to save CSV file: {e}");
|
println!("Failed to save CSV file: {e}");
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Some(csv_path.to_string()))
|
Ok(Some(csv_path))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,20 +1,24 @@
|
|||||||
// src/corporate/storage.rs
|
// src/corporate/storage.rs
|
||||||
use super::{types::*, helpers::*};
|
use super::{types::*, helpers::*};
|
||||||
use crate::config;
|
use crate::util::directories::DataPaths;
|
||||||
|
use crate::util::logger;
|
||||||
|
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
|
use tokio::io::AsyncWriteExt;
|
||||||
use chrono::{Datelike, NaiveDate};
|
use chrono::{Datelike, NaiveDate};
|
||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{PathBuf};
|
||||||
|
|
||||||
pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
|
pub async fn load_existing_events(paths: &DataPaths) -> anyhow::Result<HashMap<String, CompanyEvent>> {
|
||||||
let mut map = HashMap::new();
|
let mut map = HashMap::new();
|
||||||
let dir = std::path::Path::new("corporate_events");
|
let dir = paths.corporate_events_dir();
|
||||||
if !dir.exists() {
|
if !dir.exists() {
|
||||||
|
logger::log_info("Corporate Storage: No existing events directory found").await;
|
||||||
return Ok(map);
|
return Ok(map);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut entries = fs::read_dir(dir).await?;
|
let mut entries = fs::read_dir(dir).await?;
|
||||||
|
let mut loaded_count = 0;
|
||||||
while let Some(entry) = entries.next_entry().await? {
|
while let Some(entry) = entries.next_entry().await? {
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
||||||
@@ -25,25 +29,32 @@ pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEve
|
|||||||
for event in events {
|
for event in events {
|
||||||
map.insert(event_key(&event), event);
|
map.insert(event_key(&event), event);
|
||||||
}
|
}
|
||||||
|
loaded_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger::log_info(&format!("Corporate Storage: Loaded {} events from {} files", map.len(), loaded_count)).await;
|
||||||
Ok(map)
|
Ok(map)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_optimized_events(events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
|
pub async fn save_optimized_events(paths: &DataPaths, events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
|
||||||
let dir = std::path::Path::new("corporate_events");
|
let dir = paths.corporate_events_dir();
|
||||||
fs::create_dir_all(dir).await?;
|
fs::create_dir_all(dir).await?;
|
||||||
|
|
||||||
|
logger::log_info("Corporate Storage: Removing old event files...").await;
|
||||||
|
let mut removed_count = 0;
|
||||||
let mut entries = fs::read_dir(dir).await?;
|
let mut entries = fs::read_dir(dir).await?;
|
||||||
while let Some(entry) = entries.next_entry().await? {
|
while let Some(entry) = entries.next_entry().await? {
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
|
||||||
if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
|
if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
|
||||||
fs::remove_file(&path).await?;
|
fs::remove_file(&path).await?;
|
||||||
|
removed_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger::log_info(&format!("Corporate Storage: Removed {} old event files", removed_count)).await;
|
||||||
|
|
||||||
|
let total_events = events.len();
|
||||||
let mut sorted: Vec<_> = events.into_values().collect();
|
let mut sorted: Vec<_> = events.into_values().collect();
|
||||||
sorted.sort_by_key(|e| (e.ticker.clone(), e.date.clone()));
|
sorted.sort_by_key(|e| (e.ticker.clone(), e.date.clone()));
|
||||||
|
|
||||||
@@ -55,18 +66,26 @@ pub async fn save_optimized_events(events: HashMap<String, CompanyEvent>) -> any
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let total_months = by_month.len();
|
||||||
for (month, list) in by_month {
|
for (month, list) in by_month {
|
||||||
let path = dir.join(format!("events_{}.json", month));
|
let path = dir.join(format!("events_{}.json", month));
|
||||||
fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
|
fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
|
||||||
|
logger::log_info(&format!("Corporate Storage: Saved {} events for month {}", list.len(), month)).await;
|
||||||
}
|
}
|
||||||
|
logger::log_info(&format!("Corporate Storage: Saved {} total events in {} month files", total_events, total_months)).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()> {
|
pub async fn save_changes(paths: &DataPaths, changes: &[CompanyEventChange]) -> anyhow::Result<()> {
|
||||||
if changes.is_empty() { return Ok(()); }
|
if changes.is_empty() {
|
||||||
let dir = std::path::Path::new("corporate_event_changes");
|
logger::log_info("Corporate Storage: No changes to save").await;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let dir = paths.corporate_changes_dir();
|
||||||
fs::create_dir_all(dir).await?;
|
fs::create_dir_all(dir).await?;
|
||||||
|
|
||||||
|
logger::log_info(&format!("Corporate Storage: Saving {} changes", changes.len())).await;
|
||||||
|
|
||||||
let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
|
let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
|
||||||
for c in changes {
|
for c in changes {
|
||||||
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
||||||
@@ -81,14 +100,16 @@ pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()>
|
|||||||
let s = fs::read_to_string(&path).await?;
|
let s = fs::read_to_string(&path).await?;
|
||||||
serde_json::from_str(&s).unwrap_or_default()
|
serde_json::from_str(&s).unwrap_or_default()
|
||||||
} else { vec![] };
|
} else { vec![] };
|
||||||
all.extend(list);
|
all.extend(list.clone());
|
||||||
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
||||||
|
logger::log_info(&format!("Corporate Storage: Saved {} changes for month {}", list.len(), month)).await;
|
||||||
}
|
}
|
||||||
|
logger::log_info("Corporate Storage: All changes saved successfully").await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
|
pub async fn save_prices_for_ticker(paths: &DataPaths, ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
|
||||||
let base_dir = Path::new("corporate_prices");
|
let base_dir = paths.corporate_prices_dir();
|
||||||
let company_dir = base_dir.join(ticker.replace(".", "_"));
|
let company_dir = base_dir.join(ticker.replace(".", "_"));
|
||||||
let timeframe_dir = company_dir.join(timeframe);
|
let timeframe_dir = company_dir.join(timeframe);
|
||||||
|
|
||||||
@@ -102,35 +123,35 @@ pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: V
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_company_dir(lei: &str) -> PathBuf {
|
pub fn get_company_dir(paths: &DataPaths, lei: &str) -> PathBuf {
|
||||||
PathBuf::from("corporate_prices").join(lei)
|
paths.corporate_prices_dir().join(lei)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn ensure_company_dirs(isin: &str) -> anyhow::Result<()> {
|
pub async fn ensure_company_dirs(paths: &DataPaths, isin: &str) -> anyhow::Result<()> {
|
||||||
let base = get_company_dir(isin);
|
let base = get_company_dir(paths, isin);
|
||||||
let paths = [
|
let paths_to_create = [
|
||||||
base.clone(),
|
base.clone(),
|
||||||
base.join("5min"),
|
base.join("5min"),
|
||||||
base.join("daily"),
|
base.join("daily"),
|
||||||
base.join("aggregated").join("5min"),
|
base.join("aggregated").join("5min"),
|
||||||
base.join("aggregated").join("daily"),
|
base.join("aggregated").join("daily"),
|
||||||
];
|
];
|
||||||
for p in paths {
|
for p in paths_to_create {
|
||||||
fs::create_dir_all(&p).await?;
|
fs::create_dir_all(&p).await?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_available_exchanges(isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
|
pub async fn save_available_exchanges(paths: &DataPaths, isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
|
||||||
let dir = get_company_dir(isin);
|
let dir = get_company_dir(paths, isin);
|
||||||
fs::create_dir_all(&dir).await?;
|
fs::create_dir_all(&dir).await?;
|
||||||
let path = dir.join("available_exchanges.json");
|
let path = dir.join("available_exchanges.json");
|
||||||
fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
|
fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
|
pub async fn load_available_exchanges(paths: &DataPaths, lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
|
||||||
let path = get_company_dir(lei).join("available_exchanges.json");
|
let path = get_company_dir(paths, lei).join("available_exchanges.json");
|
||||||
if path.exists() {
|
if path.exists() {
|
||||||
let content = fs::read_to_string(&path).await?;
|
let content = fs::read_to_string(&path).await?;
|
||||||
Ok(serde_json::from_str(&content)?)
|
Ok(serde_json::from_str(&content)?)
|
||||||
@@ -140,13 +161,14 @@ pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<Available
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_prices_by_source(
|
pub async fn save_prices_by_source(
|
||||||
|
paths: &DataPaths,
|
||||||
lei: &str,
|
lei: &str,
|
||||||
source_ticker: &str,
|
source_ticker: &str,
|
||||||
timeframe: &str,
|
timeframe: &str,
|
||||||
prices: Vec<CompanyPrice>,
|
prices: Vec<CompanyPrice>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let source_safe = source_ticker.replace(".", "_").replace("/", "_");
|
let source_safe = source_ticker.replace(".", "_").replace("/", "_");
|
||||||
let dir = get_company_dir(lei).join(timeframe).join(&source_safe);
|
let dir = get_company_dir(paths, lei).join(timeframe).join(&source_safe);
|
||||||
fs::create_dir_all(&dir).await?;
|
fs::create_dir_all(&dir).await?;
|
||||||
let path = dir.join("prices.json");
|
let path = dir.join("prices.json");
|
||||||
let mut prices = prices;
|
let mut prices = prices;
|
||||||
@@ -157,13 +179,14 @@ pub async fn save_prices_by_source(
|
|||||||
|
|
||||||
/// Update available_exchanges.json with fetch results
|
/// Update available_exchanges.json with fetch results
|
||||||
pub async fn update_available_exchange(
|
pub async fn update_available_exchange(
|
||||||
|
paths: &DataPaths,
|
||||||
isin: &str,
|
isin: &str,
|
||||||
ticker: &str,
|
ticker: &str,
|
||||||
exchange_mic: &str,
|
exchange_mic: &str,
|
||||||
has_daily: bool,
|
has_daily: bool,
|
||||||
has_5min: bool,
|
has_5min: bool,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
let mut exchanges = load_available_exchanges(isin).await?;
|
let mut exchanges = load_available_exchanges(paths, isin).await?;
|
||||||
|
|
||||||
if let Some(entry) = exchanges.iter_mut().find(|e| e.ticker == ticker) {
|
if let Some(entry) = exchanges.iter_mut().find(|e| e.ticker == ticker) {
|
||||||
// Update existing entry
|
// Update existing entry
|
||||||
@@ -181,38 +204,7 @@ pub async fn update_available_exchange(
|
|||||||
exchanges.push(new_entry);
|
exchanges.push(new_entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
save_available_exchanges(isin, exchanges).await
|
save_available_exchanges(paths, isin, exchanges).await
|
||||||
}
|
|
||||||
|
|
||||||
/// Add a newly discovered exchange before fetching
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
/// * `isin` - The ISIN associated with the exchange.
|
|
||||||
/// * `figi_info` - The FigiInfo containing ticker, mic_code, and currency.
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
/// Ok(()) on success.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
/// Returns an error if loading or saving available exchanges fails.
|
|
||||||
pub async fn add_discovered_exchange(
|
|
||||||
isin: &str,
|
|
||||||
figi_info: &FigiInfo,
|
|
||||||
) -> anyhow::Result<()> {
|
|
||||||
let mut exchanges = load_available_exchanges(isin).await?;
|
|
||||||
|
|
||||||
// Only add if not already present
|
|
||||||
if !exchanges.iter().any(|e| e.ticker == figi_info.ticker && e.exchange_mic == figi_info.mic_code) {
|
|
||||||
let new_entry = AvailableExchange::new(
|
|
||||||
figi_info.ticker.clone(),
|
|
||||||
figi_info.mic_code.clone(),
|
|
||||||
figi_info.currency.clone(),
|
|
||||||
);
|
|
||||||
exchanges.push(new_entry);
|
|
||||||
save_available_exchanges(isin, exchanges).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Infer currency from ticker suffix
|
/// Infer currency from ticker suffix
|
||||||
@@ -235,3 +227,41 @@ fn infer_currency_from_ticker(ticker: &str) -> String {
|
|||||||
|
|
||||||
"USD".to_string() // Default
|
"USD".to_string() // Default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Saves companies data to a JSONL file.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `paths` - Reference to DataPaths for directory management
|
||||||
|
/// * `companies` - HashMap of company names to their securities (ISIN, Ticker pairs)
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// Returns an error if file operations or serialization fails.
|
||||||
|
pub async fn save_companies_to_jsonl(
|
||||||
|
paths: &DataPaths,
|
||||||
|
companies: &HashMap<String, Vec<(String, String)>>,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let file_path = paths.data_dir().join("companies.jsonl");
|
||||||
|
|
||||||
|
logger::log_info(&format!("Corporate Storage: Saving {} companies to JSONL", companies.len())).await;
|
||||||
|
|
||||||
|
// Create parent directory if it doesn't exist
|
||||||
|
if let Some(parent) = file_path.parent() {
|
||||||
|
tokio::fs::create_dir_all(parent).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut file = tokio::fs::File::create(&file_path).await?;
|
||||||
|
|
||||||
|
for (name, securities) in companies.iter() {
|
||||||
|
let line = serde_json::json!({
|
||||||
|
"name": name,
|
||||||
|
"securities": securities
|
||||||
|
});
|
||||||
|
file.write_all(line.to_string().as_bytes()).await?;
|
||||||
|
file.write_all(b"\n").await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
let msg = format!("✓ Saved {} companies to {:?}", companies.len(), file_path);
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
// src/corporate/types.rs
|
// src/corporate/types.rs
|
||||||
|
use std::collections::HashMap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||||
@@ -53,8 +52,7 @@ pub struct FigiInfo {
|
|||||||
pub figi: String,
|
pub figi: String,
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub ticker: String,
|
pub ticker: String,
|
||||||
pub mic_code: String,
|
pub exch_code: String,
|
||||||
pub currency: String,
|
|
||||||
pub compositeFIGI: String,
|
pub compositeFIGI: String,
|
||||||
pub securityType: String,
|
pub securityType: String,
|
||||||
pub marketSector: String,
|
pub marketSector: String,
|
||||||
@@ -63,16 +61,6 @@ pub struct FigiInfo {
|
|||||||
pub securityDescription: String,
|
pub securityDescription: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Company Meta Data
|
|
||||||
/// # Attributes
|
|
||||||
/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
|
|
||||||
/// * figi: metadata with ISIN as key
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct CompanyMetadata {
|
|
||||||
pub lei: String,
|
|
||||||
pub figi: Option<Vec<FigiInfo>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Company Info
|
/// Company Info
|
||||||
/// # Attributes
|
/// # Attributes
|
||||||
/// * Name as primary key (for one instition) -> might have to changed when first FigiInfo is coming in
|
/// * Name as primary key (for one instition) -> might have to changed when first FigiInfo is coming in
|
||||||
@@ -85,6 +73,15 @@ pub struct CompanyInfo{
|
|||||||
pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
|
pub securities: HashMap<String, Vec<FigiInfo>>, // ISIN -> Vec<FigiInfo>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Company Meta Data
|
||||||
|
/// # Attributes
|
||||||
|
/// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
|
||||||
|
/// * figi: metadata with ISIN as key
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct CompanyMetadata {
|
||||||
|
pub lei: String,
|
||||||
|
pub figi: Option<Vec<FigiInfo>>,
|
||||||
|
}
|
||||||
|
|
||||||
/// Warrant Info
|
/// Warrant Info
|
||||||
///
|
///
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
// src/corporate/update.rs
|
// src/corporate/update.rs
|
||||||
use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
|
use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
|
||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
use crate::scraper::webdriver::ChromeDriverPool;
|
use crate::util::directories::DataPaths;
|
||||||
|
use crate::util::logger;
|
||||||
|
use crate::webdriver::webdriver::ChromeDriverPool;
|
||||||
|
|
||||||
use chrono::Local;
|
use chrono::Local;
|
||||||
use std::collections::{HashMap};
|
use std::collections::{HashMap};
|
||||||
@@ -24,50 +26,103 @@ use std::sync::Arc;
|
|||||||
/// # Errors
|
/// # Errors
|
||||||
/// Returns an error if any step in the update process fails.
|
/// Returns an error if any step in the update process fails.
|
||||||
pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
|
pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
|
||||||
println!("=== Starting LEI-based corporate full update ===");
|
let msg = "=== Starting LEI-based corporate full update ===";
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(msg).await;
|
||||||
|
|
||||||
|
// Initialize paths
|
||||||
|
let paths = DataPaths::new(".")?;
|
||||||
|
|
||||||
// 1. Load fresh GLEIF ISIN ↔ LEI mapping
|
// 1. Load fresh GLEIF ISIN ↔ LEI mapping
|
||||||
|
logger::log_info("Corporate Update: Loading GLEIF ISIN ↔ LEI mapping...").await;
|
||||||
let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
|
let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
|
||||||
Ok(map) => map,
|
Ok(map) => {
|
||||||
|
let msg = format!("Corporate Update: Loaded GLEIF mapping with {} LEI entries", map.len());
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
map
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("Warning: Could not load GLEIF ISIN↔LEI mapping: {}", e);
|
let msg = format!("Corporate Update: Warning - Could not load GLEIF ISIN↔LEI mapping: {}", e);
|
||||||
|
eprintln!("{}", msg);
|
||||||
|
logger::log_warn(&msg).await;
|
||||||
HashMap::new()
|
HashMap::new()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 2. Load OpenFIGI mapping value lists (cached)
|
// 2. Load OpenFIGI mapping value lists (cached)
|
||||||
|
logger::log_info("Corporate Update: Loading OpenFIGI type lists...").await;
|
||||||
if let Err(e) = load_figi_type_lists().await {
|
if let Err(e) = load_figi_type_lists().await {
|
||||||
eprintln!("Warning: Could not load OpenFIGI type lists: {}", e);
|
let msg = format!("Corporate Update: Warning - Could not load OpenFIGI type lists: {}", e);
|
||||||
|
eprintln!("{}", msg);
|
||||||
|
logger::log_warn(&msg).await;
|
||||||
}
|
}
|
||||||
|
logger::log_info("Corporate Update: OpenFIGI type lists loaded").await;
|
||||||
|
|
||||||
// 3. Build FIGI → LEI map
|
// 3. Build FIGI → LEI map
|
||||||
// # Attributes
|
logger::log_info("Corporate Update: Building FIGI → LEI map...").await;
|
||||||
// * lei: Structuring the companies by legal dependencies [LEI -> Vec<ISIN>]
|
|
||||||
// * figi: metadata with ISIN as key
|
|
||||||
let figi_to_lei:HashMap<String, Vec<FigiInfo>> = match build_lei_to_figi_infos(&lei_to_isins).await {
|
let figi_to_lei:HashMap<String, Vec<FigiInfo>> = match build_lei_to_figi_infos(&lei_to_isins).await {
|
||||||
Ok(map) => map,
|
Ok(map) => {
|
||||||
|
let msg = format!("Corporate Update: Built FIGI map with {} entries", map.len());
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
map
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("Warning: Could not build FIGI→LEI map: {}", e);
|
let msg = format!("Corporate Update: Warning - Could not build FIGI→LEI map: {}", e);
|
||||||
|
eprintln!("{}", msg);
|
||||||
|
logger::log_warn(&msg).await;
|
||||||
HashMap::new()
|
HashMap::new()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 4. Load or build companies
|
// 4. Load or build companies
|
||||||
let mut companies = load_or_build_all_securities(&figi_to_lei).await?;
|
logger::log_info("Corporate Update: Loading/building company securities...").await;
|
||||||
println!("Processing {} companies", companies.0.len());
|
let securities = load_or_build_all_securities(&figi_to_lei).await?;
|
||||||
|
let msg = format!("Corporate Update: Processing {} companies", securities.0.len());
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
|
||||||
|
// HashMap<Name, Vec<(ISIN, Ticker)>>
|
||||||
|
let companies: HashMap<String, Vec<(String, String)>> = securities.0
|
||||||
|
.iter()
|
||||||
|
.fold(HashMap::new(), |mut acc, security| {
|
||||||
|
let isin: Vec<String> = security.1.securities.values()
|
||||||
|
.flat_map(|figi_info| figi_info.iter().map(|x| x.isin.clone()))
|
||||||
|
.collect();
|
||||||
|
let ticker: Vec<String> = security.1.securities.values()
|
||||||
|
.flat_map(|figi_info| figi_info.iter().map(|x| x.ticker.clone()))
|
||||||
|
.collect();
|
||||||
|
acc.entry(security.1.name.clone())
|
||||||
|
.or_insert_with(Vec::new)
|
||||||
|
.push((isin.join(", "), ticker.join(", ")));
|
||||||
|
acc
|
||||||
|
});
|
||||||
|
|
||||||
|
logger::log_info(&format!("Corporate Update: Saving {} companies to JSONL", companies.len())).await;
|
||||||
|
save_companies_to_jsonl(&paths, &companies).await.expect("Failed to save companies List.");
|
||||||
|
logger::log_info("Corporate Update: Companies saved successfully").await;
|
||||||
|
|
||||||
// 5. Load existing earnings events (for change detection)
|
// 5. Load existing earnings events (for change detection)
|
||||||
let today = Local::now().format("%Y-%m-%d").to_string();
|
logger::log_info("Corporate Update: Loading existing events...").await;
|
||||||
let mut existing_events = match load_existing_events().await {
|
let existing_events = match load_existing_events(&paths).await {
|
||||||
Ok(events) => events,
|
Ok(events) => {
|
||||||
|
let msg = format!("Corporate Update: Loaded {} existing events", events.len());
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
events
|
||||||
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
eprintln!("Warning: Could not load existing events: {}", e);
|
let msg = format!("Corporate Update: Warning - Could not load existing events: {}", e);
|
||||||
|
eprintln!("{}", msg);
|
||||||
|
logger::log_warn(&msg).await;
|
||||||
HashMap::new()
|
HashMap::new()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// 5. Use the provided pool (no need to create a new one)
|
// 5. Use the provided pool (no need to create a new one)
|
||||||
let pool_size = pool.get_number_of_instances(); // Use the size from the shared pool
|
let pool_size = pool.get_number_of_instances(); // Use the size from the shared pool
|
||||||
|
logger::log_info(&format!("Corporate Update: Using pool size: {}", pool_size)).await;
|
||||||
|
|
||||||
// Process companies in parallel using the shared pool
|
// Process companies in parallel using the shared pool
|
||||||
/*let results: Vec<_> = stream::iter(companies.into_iter())
|
/*let results: Vec<_> = stream::iter(companies.into_iter())
|
||||||
@@ -88,10 +143,14 @@ pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> a
|
|||||||
}
|
}
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
save_optimized_events(existing_events).await?;
|
logger::log_info(&format!("Corporate Update: Saving {} events to optimized storage", existing_events.len())).await;
|
||||||
|
save_optimized_events(&paths, existing_events).await?;
|
||||||
|
logger::log_info("Corporate Update: Events saved successfully").await;
|
||||||
//save_changes(&all_changes).await?;
|
//save_changes(&all_changes).await?;
|
||||||
|
|
||||||
//println!("Corporate update complete — {} changes detected", all_changes.len());
|
let msg = "✓ Corporate update complete";
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(msg).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
// src/economic/storage.rs
|
// src/economic/storage.rs
|
||||||
use super::types::*;
|
use super::types::*;
|
||||||
use super::helpers::*;
|
use super::helpers::*;
|
||||||
|
use crate::util::directories::DataPaths;
|
||||||
|
use crate::util::logger;
|
||||||
use tokio::fs;
|
use tokio::fs;
|
||||||
use chrono::{NaiveDate, Datelike};
|
use chrono::{NaiveDate, Datelike};
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
pub async fn scan_existing_chunks(paths: &DataPaths) -> anyhow::Result<Vec<ChunkInfo>> {
|
||||||
let dir = std::path::Path::new("data/economic/events");
|
let dir = paths.economic_events_dir();
|
||||||
let mut chunks = Vec::new();
|
let mut chunks = Vec::new();
|
||||||
|
|
||||||
if dir.exists() {
|
if dir.exists() {
|
||||||
@@ -29,6 +31,7 @@ pub async fn scan_existing_chunks() -> anyhow::Result<Vec<ChunkInfo>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
chunks.sort_by_key(|c| c.start_date.clone());
|
chunks.sort_by_key(|c| c.start_date.clone());
|
||||||
|
logger::log_info(&format!("Economic Storage: Scanned {} event chunks", chunks.len())).await;
|
||||||
Ok(chunks)
|
Ok(chunks)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -41,25 +44,28 @@ pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result<HashMa
|
|||||||
map.insert(event_key(&e), e);
|
map.insert(event_key(&e), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger::log_info(&format!("Economic Storage: Loaded {} events from {} chunks", map.len(), chunks.len())).await;
|
||||||
Ok(map)
|
Ok(map)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
|
pub async fn save_optimized_chunks(paths: &DataPaths, events: HashMap<String, EconomicEvent>) -> anyhow::Result<()> {
|
||||||
let dir = std::path::Path::new("data/economic/events");
|
let dir = paths.economic_events_dir();
|
||||||
fs::create_dir_all(dir).await?;
|
fs::create_dir_all(dir).await?;
|
||||||
|
|
||||||
// Delete all old chunk files to prevent duplicates and overlaps
|
logger::log_info("Economic Storage: Removing old chunk files...").await;
|
||||||
println!("Removing old chunks...");
|
|
||||||
|
|
||||||
let mut entries = fs::read_dir(dir).await?;
|
let mut entries = fs::read_dir(dir).await?;
|
||||||
|
let mut removed_count = 0;
|
||||||
while let Some(entry) = entries.next_entry().await? {
|
while let Some(entry) = entries.next_entry().await? {
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||||
if name.starts_with("chunk_") && path.extension().map(|e| e == "json").unwrap_or(false) {
|
if name.starts_with("chunk_") && path.extension().map(|e| e == "json").unwrap_or(false) {
|
||||||
fs::remove_file(&path).await?;
|
fs::remove_file(&path).await?;
|
||||||
|
removed_count += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger::log_info(&format!("Economic Storage: Removed {} old chunk files", removed_count)).await;
|
||||||
|
|
||||||
let mut sorted: Vec<_> = events.into_values().collect();
|
let mut sorted: Vec<_> = events.into_values().collect();
|
||||||
sorted.sort_by_key(|e| e.date.clone());
|
sorted.sort_by_key(|e| e.date.clone());
|
||||||
@@ -77,6 +83,7 @@ pub async fn save_optimized_chunks(events: HashMap<String, EconomicEvent>) -> an
|
|||||||
if !chunk.is_empty() {
|
if !chunk.is_empty() {
|
||||||
save_chunk(&chunk, dir).await?;
|
save_chunk(&chunk, dir).await?;
|
||||||
}
|
}
|
||||||
|
logger::log_info(&format!("Economic Storage: Saved all event chunks to {:?}", dir)).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,14 +92,20 @@ async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::
|
|||||||
let end = events.iter().map(|e| &e.date).max().unwrap().clone();
|
let end = events.iter().map(|e| &e.date).max().unwrap().clone();
|
||||||
let path = dir.join(format!("chunk_{}_{}.json", start, end));
|
let path = dir.join(format!("chunk_{}_{}.json", start, end));
|
||||||
fs::write(&path, serde_json::to_string_pretty(events)?).await?;
|
fs::write(&path, serde_json::to_string_pretty(events)?).await?;
|
||||||
|
logger::log_info(&format!("Economic Storage: Saved chunk {} - {} ({} events)", start, end, events.len())).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
|
pub async fn save_changes(paths: &DataPaths, changes: &[EventChange]) -> anyhow::Result<()> {
|
||||||
if changes.is_empty() { return Ok(()); }
|
if changes.is_empty() {
|
||||||
let dir = std::path::Path::new("data/economic/events/changes");
|
logger::log_info("Economic Storage: No changes to save").await;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let dir = paths.economic_changes_dir();
|
||||||
fs::create_dir_all(dir).await?;
|
fs::create_dir_all(dir).await?;
|
||||||
|
|
||||||
|
logger::log_info(&format!("Economic Storage: Saving {} changes to {:?}", changes.len(), dir)).await;
|
||||||
|
|
||||||
let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
|
let mut by_month: HashMap<String, Vec<EventChange>> = HashMap::new();
|
||||||
for c in changes {
|
for c in changes {
|
||||||
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
|
||||||
@@ -107,8 +120,10 @@ pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> {
|
|||||||
let s = fs::read_to_string(&path).await?;
|
let s = fs::read_to_string(&path).await?;
|
||||||
serde_json::from_str(&s).unwrap_or_default()
|
serde_json::from_str(&s).unwrap_or_default()
|
||||||
} else { vec![] };
|
} else { vec![] };
|
||||||
all.extend(list);
|
all.extend(list.clone());
|
||||||
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
|
||||||
|
logger::log_info(&format!("Economic Storage: Saved {} changes for month {}", list.len(), month)).await;
|
||||||
}
|
}
|
||||||
|
logger::log_info("Economic Storage: All changes saved successfully").await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
// src/economic/update.rs
|
// src/economic/update.rs
|
||||||
use super::{scraper::*, storage::*, helpers::*, types::*};
|
use super::{scraper::*, storage::*, helpers::*, types::*};
|
||||||
use crate::{config::Config, scraper::webdriver::ScrapeTask};
|
use crate::{config::Config, webdriver::webdriver::{ScrapeTask, ChromeDriverPool}, util::directories::DataPaths, util::logger};
|
||||||
use crate::scraper::webdriver::ChromeDriverPool;
|
|
||||||
use chrono::{Local};
|
use chrono::{Local};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@@ -14,38 +13,69 @@ use std::sync::Arc;
|
|||||||
/// # Errors
|
/// # Errors
|
||||||
/// Returns an error if scraping, loading, or saving fails.
|
/// Returns an error if scraping, loading, or saving fails.
|
||||||
pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
|
pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
|
||||||
|
let paths = DataPaths::new(".")?;
|
||||||
|
|
||||||
|
logger::log_info("Economic Update: Initializing...").await;
|
||||||
|
|
||||||
let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
|
let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
|
||||||
let end_date = config.target_end_date();
|
let end_date = config.target_end_date();
|
||||||
|
|
||||||
let chunks = scan_existing_chunks().await?;
|
logger::log_info(&format!("Economic Update: Scanning existing chunks from {:?}", paths.economic_events_dir())).await;
|
||||||
|
let chunks = scan_existing_chunks(&paths).await?;
|
||||||
let mut events = load_existing_events(&chunks).await?;
|
let mut events = load_existing_events(&chunks).await?;
|
||||||
println!("Loaded {} events from {} chunks", events.len(), chunks.len());
|
|
||||||
|
let msg = format!("Economic Update: Loaded {} events from {} chunks", events.len(), chunks.len());
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
|
||||||
let start_date = if events.is_empty() {
|
let start_date = if events.is_empty() {
|
||||||
|
logger::log_warn("Economic Update: No existing events found, starting from config date").await;
|
||||||
config.economic_start_date.clone()
|
config.economic_start_date.clone()
|
||||||
} else if events.values().any(|e| e.date >= today_str) {
|
} else if events.values().any(|e| e.date >= today_str) {
|
||||||
|
logger::log_info("Economic Update: Events exist for today, starting from today").await;
|
||||||
today_str.clone()
|
today_str.clone()
|
||||||
} else {
|
} else {
|
||||||
events.values()
|
let next = events.values()
|
||||||
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
||||||
.max()
|
.max()
|
||||||
.and_then(|d| d.succ_opt())
|
.and_then(|d| d.succ_opt())
|
||||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||||
.unwrap_or(today_str.clone())
|
.unwrap_or(today_str.clone());
|
||||||
|
logger::log_info(&format!("Economic Update: Resuming from: {}", next)).await;
|
||||||
|
next
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("Scraping economic events: {} → {}", start_date, end_date);
|
let msg = format!("Economic Update: Scraping events from {} → {}", start_date, end_date);
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
|
||||||
// Pass the pool to the scraping function
|
// Pass the pool to the scraping function
|
||||||
let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
|
let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
|
||||||
|
|
||||||
|
let msg = format!("Economic Update: Scraped {} new events", new_events_all.len());
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
|
||||||
// Process all at once or in batches
|
// Process all at once or in batches
|
||||||
let result = process_batch(&new_events_all, &mut events, &today_str);
|
let result = process_batch(&new_events_all, &mut events, &today_str);
|
||||||
let total_changes = result.changes.len();
|
let total_changes = result.changes.len();
|
||||||
save_changes(&result.changes).await?;
|
|
||||||
|
|
||||||
save_optimized_chunks(events).await?;
|
let msg = format!("Economic Update: Detected {} changes", total_changes);
|
||||||
println!("Economic update complete — {} changes detected", total_changes);
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
|
|
||||||
|
if total_changes > 0 {
|
||||||
|
logger::log_info(&format!("Economic Update: Saving {} changes to log", total_changes)).await;
|
||||||
|
save_changes(&paths, &result.changes).await?;
|
||||||
|
logger::log_info("Economic Update: Changes saved successfully").await;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger::log_info(&format!("Economic Update: Saving {} total events to chunks", events.len())).await;
|
||||||
|
save_optimized_chunks(&paths, events).await?;
|
||||||
|
|
||||||
|
let msg = format!("✓ Economic update complete — {} changes detected", total_changes);
|
||||||
|
println!("{}", msg);
|
||||||
|
logger::log_info(&msg).await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
26
src/main.rs
26
src/main.rs
@@ -2,12 +2,14 @@
|
|||||||
mod economic;
|
mod economic;
|
||||||
mod corporate;
|
mod corporate;
|
||||||
mod config;
|
mod config;
|
||||||
|
mod webdriver;
|
||||||
mod util;
|
mod util;
|
||||||
mod scraper;
|
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use config::Config;
|
use config::Config;
|
||||||
use scraper::webdriver::ChromeDriverPool;
|
use webdriver::webdriver::ChromeDriverPool;
|
||||||
|
use util::directories::DataPaths;
|
||||||
|
use util::logger;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
/// The entry point of the application.
|
/// The entry point of the application.
|
||||||
@@ -29,15 +31,35 @@ async fn main() -> Result<()> {
|
|||||||
err
|
err
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
// Initialize paths
|
||||||
|
let paths = DataPaths::new(".")?;
|
||||||
|
|
||||||
|
// Initialize logger
|
||||||
|
logger::init_debug_logger(paths.logs_dir()).await.map_err(|e| {
|
||||||
|
anyhow::anyhow!("Logger initialization failed: {}", e)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
logger::log_info("=== Application started ===").await;
|
||||||
|
logger::log_info(&format!("Config: economic_start_date={}, corporate_start_date={}, lookahead_months={}, max_parallel_tasks={}",
|
||||||
|
config.economic_start_date, config.corporate_start_date, config.economic_lookahead_months, config.max_parallel_tasks)).await;
|
||||||
|
|
||||||
// Initialize the shared ChromeDriver pool once
|
// Initialize the shared ChromeDriver pool once
|
||||||
let pool_size = config.max_parallel_tasks;
|
let pool_size = config.max_parallel_tasks;
|
||||||
|
logger::log_info(&format!("Initializing ChromeDriver pool with size: {}", pool_size)).await;
|
||||||
|
|
||||||
let pool = Arc::new(ChromeDriverPool::new(pool_size).await?);
|
let pool = Arc::new(ChromeDriverPool::new(pool_size).await?);
|
||||||
|
logger::log_info("✓ ChromeDriver pool initialized successfully").await;
|
||||||
|
|
||||||
// Run economic update first, passing the shared pool
|
// Run economic update first, passing the shared pool
|
||||||
|
logger::log_info("--- Starting economic data update ---").await;
|
||||||
economic::run_full_update(&config, &pool).await?;
|
economic::run_full_update(&config, &pool).await?;
|
||||||
|
logger::log_info("✓ Economic data update completed").await;
|
||||||
|
|
||||||
// Then run corporate update, passing the shared pool
|
// Then run corporate update, passing the shared pool
|
||||||
|
logger::log_info("--- Starting corporate data update ---").await;
|
||||||
corporate::run_full_update(&config, &pool).await?;
|
corporate::run_full_update(&config, &pool).await?;
|
||||||
|
logger::log_info("✓ Corporate data update completed").await;
|
||||||
|
|
||||||
|
logger::log_info("=== Application completed successfully ===").await;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
22
src/util.rs
22
src/util.rs
@@ -1,22 +0,0 @@
|
|||||||
// src/util.rs (or put it directly in main.rs if you prefer)
|
|
||||||
use tokio::fs;
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
/// Create the required data folders if they do not exist yet.
|
|
||||||
pub async fn _ensure_data_dirs() -> anyhow::Result<()> {
|
|
||||||
let dirs = [
|
|
||||||
"economic_events",
|
|
||||||
"economic_event_changes",
|
|
||||||
"corporate_events",
|
|
||||||
"corporate_prices",
|
|
||||||
"data",
|
|
||||||
];
|
|
||||||
for dir in dirs {
|
|
||||||
let path = Path::new(dir);
|
|
||||||
if !path.exists() {
|
|
||||||
tokio::fs::create_dir_all(path).await?;
|
|
||||||
println!("Created directory: {dir}");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
134
src/util/directories.rs
Normal file
134
src/util/directories.rs
Normal file
@@ -0,0 +1,134 @@
|
|||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::fs;
|
||||||
|
|
||||||
|
/// Central configuration for all data paths
|
||||||
|
pub struct DataPaths {
|
||||||
|
base_dir: PathBuf,
|
||||||
|
data_dir: PathBuf,
|
||||||
|
cache_dir: PathBuf,
|
||||||
|
logs_dir: PathBuf,
|
||||||
|
// Economic data subdirectories
|
||||||
|
economic_events_dir: PathBuf,
|
||||||
|
economic_changes_dir: PathBuf,
|
||||||
|
// Corporate data subdirectories
|
||||||
|
corporate_events_dir: PathBuf,
|
||||||
|
corporate_changes_dir: PathBuf,
|
||||||
|
corporate_prices_dir: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DataPaths {
|
||||||
|
/// Initialize paths from a base directory
|
||||||
|
pub fn new(base_dir: impl AsRef<Path>) -> std::io::Result<Self> {
|
||||||
|
let base_dir = base_dir.as_ref().to_path_buf();
|
||||||
|
|
||||||
|
let data_dir = base_dir.join("data");
|
||||||
|
let cache_dir = base_dir.join("cache");
|
||||||
|
let logs_dir = base_dir.join("logs");
|
||||||
|
|
||||||
|
// Economic subdirectories
|
||||||
|
let economic_events_dir = data_dir.join("economic").join("events");
|
||||||
|
let economic_changes_dir = economic_events_dir.join("changes");
|
||||||
|
|
||||||
|
// Corporate subdirectories
|
||||||
|
let corporate_dir = data_dir.join("corporate");
|
||||||
|
let corporate_events_dir = corporate_dir.join("events");
|
||||||
|
let corporate_changes_dir = corporate_events_dir.join("changes");
|
||||||
|
let corporate_prices_dir = corporate_dir.join("prices");
|
||||||
|
|
||||||
|
// Create all directories if they don't exist
|
||||||
|
fs::create_dir_all(&data_dir)?;
|
||||||
|
fs::create_dir_all(&cache_dir)?;
|
||||||
|
fs::create_dir_all(&logs_dir)?;
|
||||||
|
fs::create_dir_all(&economic_events_dir)?;
|
||||||
|
fs::create_dir_all(&economic_changes_dir)?;
|
||||||
|
fs::create_dir_all(&corporate_events_dir)?;
|
||||||
|
fs::create_dir_all(&corporate_changes_dir)?;
|
||||||
|
fs::create_dir_all(&corporate_prices_dir)?;
|
||||||
|
|
||||||
|
Ok(Self {
|
||||||
|
base_dir,
|
||||||
|
data_dir,
|
||||||
|
cache_dir,
|
||||||
|
logs_dir,
|
||||||
|
economic_events_dir,
|
||||||
|
economic_changes_dir,
|
||||||
|
corporate_events_dir,
|
||||||
|
corporate_changes_dir,
|
||||||
|
corporate_prices_dir,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn base_dir(&self) -> &Path {
|
||||||
|
&self.base_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn data_dir(&self) -> &Path {
|
||||||
|
&self.data_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cache_dir(&self) -> &Path {
|
||||||
|
&self.cache_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn logs_dir(&self) -> &Path {
|
||||||
|
&self.logs_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the economic events directory
|
||||||
|
pub fn economic_events_dir(&self) -> &Path {
|
||||||
|
&self.economic_events_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the economic changes directory
|
||||||
|
pub fn economic_changes_dir(&self) -> &Path {
|
||||||
|
&self.economic_changes_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the corporate events directory
|
||||||
|
pub fn corporate_events_dir(&self) -> &Path {
|
||||||
|
&self.corporate_events_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the corporate changes directory
|
||||||
|
pub fn corporate_changes_dir(&self) -> &Path {
|
||||||
|
&self.corporate_changes_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the corporate prices directory
|
||||||
|
pub fn corporate_prices_dir(&self) -> &Path {
|
||||||
|
&self.corporate_prices_dir
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a specific file path within data directory
|
||||||
|
pub fn data_file(&self, filename: &str) -> PathBuf {
|
||||||
|
self.data_dir.join(filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a specific file path within cache directory
|
||||||
|
pub fn cache_file(&self, filename: &str) -> PathBuf {
|
||||||
|
self.cache_dir.join(filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a specific file path within logs directory
|
||||||
|
pub fn log_file(&self, filename: &str) -> PathBuf {
|
||||||
|
self.logs_dir.join(filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_paths_creation() {
|
||||||
|
let paths = DataPaths::new("./test_base").unwrap();
|
||||||
|
assert!(paths.data_dir().exists());
|
||||||
|
assert!(paths.cache_dir().exists());
|
||||||
|
assert!(paths.logs_dir().exists());
|
||||||
|
assert!(paths.economic_events_dir().exists());
|
||||||
|
assert!(paths.economic_changes_dir().exists());
|
||||||
|
assert!(paths.corporate_events_dir().exists());
|
||||||
|
assert!(paths.corporate_changes_dir().exists());
|
||||||
|
assert!(paths.corporate_prices_dir().exists());
|
||||||
|
}
|
||||||
|
}
|
||||||
78
src/util/logger.rs
Normal file
78
src/util/logger.rs
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
// src/util/logger.rs
|
||||||
|
use chrono::Local;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
|
use std::fs::{self, OpenOptions};
|
||||||
|
use std::io::Write;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
static LOGGER: Lazy<Mutex<Option<DebugLogger>>> = Lazy::new(|| Mutex::new(None));
|
||||||
|
|
||||||
|
pub struct DebugLogger {
|
||||||
|
file: std::fs::File,
|
||||||
|
log_path: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DebugLogger {
|
||||||
|
fn new(log_dir: &std::path::Path) -> std::io::Result<Self> {
|
||||||
|
|
||||||
|
fs::create_dir_all(log_dir)?;
|
||||||
|
let filename = format!("backtest_{}.log", Local::now().format("%Y%m%d_%H%M%S"));
|
||||||
|
let log_path = log_dir.join(&filename);
|
||||||
|
let file = OpenOptions::new()
|
||||||
|
.create(true)
|
||||||
|
.append(true)
|
||||||
|
.open(&log_path)?;
|
||||||
|
Ok(Self { file, log_path })
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn log(&mut self, msg: &str) {
|
||||||
|
let line = format!("[{}] {}\n", Local::now().format("%H:%M:%S"), msg);
|
||||||
|
let _ = self.file.write_all(line.as_bytes());
|
||||||
|
let _ = self.file.flush();
|
||||||
|
println!("{}", line.trim_end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn init_debug_logger(log_dir: &std::path::Path) -> Result<(), String> {
|
||||||
|
let mut logger = LOGGER.lock().await;
|
||||||
|
match DebugLogger::new(log_dir) {
|
||||||
|
Ok(l) => {
|
||||||
|
let log_path = l.log_path.clone();
|
||||||
|
*logger = Some(l);
|
||||||
|
println!("✓ Logger initialized at: {:?}", log_path);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let err_msg = format!("Failed to initialize logger: {}", e);
|
||||||
|
eprintln!("{}", err_msg);
|
||||||
|
Err(err_msg)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn log_message(msg: &str) {
|
||||||
|
let mut logger = LOGGER.lock().await;
|
||||||
|
if let Some(l) = logger.as_mut() {
|
||||||
|
l.log(msg).await;
|
||||||
|
} else {
|
||||||
|
println!("[LOG] {}", msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn log_detailed(level: &str, msg: &str) {
|
||||||
|
let formatted = format!("[{}] {}", level, msg);
|
||||||
|
log_message(&formatted).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn log_info(msg: &str) {
|
||||||
|
log_detailed("INFO", msg).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn log_warn(msg: &str) {
|
||||||
|
log_detailed("WARN", msg).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn log_error(msg: &str) {
|
||||||
|
log_detailed("ERROR", msg).await;
|
||||||
|
}
|
||||||
3
src/util/mod.rs
Normal file
3
src/util/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
// src/util/mod.rs
|
||||||
|
pub mod logger;
|
||||||
|
pub mod directories;
|
||||||
Reference in New Issue
Block a user