Compare commits

..

8 Commits

21 changed files with 3246 additions and 134 deletions

6
.gitignore vendored
View File

@@ -17,6 +17,9 @@ target/
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/
# env
.env
# Added by cargo # Added by cargo
@@ -29,4 +32,5 @@ target/
/economic_event_changes* /economic_event_changes*
/corporate_events* /corporate_events*
/corporate_prices* /corporate_prices*
/corporate_event_changes* /corporate_event_changes*
/data*

1289
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -17,13 +17,23 @@ categories = ["finance", "data-structures", "asynchronous"]
tokio = { version = "1.38", features = ["full"] } tokio = { version = "1.38", features = ["full"] }
# Web scraping & HTTP # Web scraping & HTTP
reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate"] } reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "blocking"] }
scraper = "0.19" # HTML parsing for Yahoo earnings pages scraper = "0.19" # HTML parsing for Yahoo earnings pages
fantoccini = { version = "0.20", features = ["rustls-tls"] } # Headless Chrome for finanzen.net fantoccini = { version = "0.20", features = ["rustls-tls"] } # Headless Chrome for finanzen.net
yfinance-rs = "0.7.2"
# Serialization # Serialization
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0" serde_json = "1.0"
csv = "1.3"
zip = "6.0.0"
flate2 = "1.1.5"
# Generating
rand = "0.9.2"
# Environment handling
dotenvy = "0.15"
# Date & time # Date & time
chrono = { version = "0.4", features = ["serde"] } chrono = { version = "0.4", features = ["serde"] }

46
fx_rates.json Normal file
View File

@@ -0,0 +1,46 @@
{
"CHF": [
0.808996035919424,
"2025-11-25"
],
"JPY": [
0.0064,
"2025-11-25"
],
"INR": [
89.28571428571429,
"2025-11-25"
],
"GBp": [
0.7603406326034063,
"2025-11-25"
],
"AUD": [
1.5463120457708364,
"2025-11-25"
],
"SAR": [
3.750937734433609,
"2025-11-25"
],
"TWD": [
31.446540880503143,
"2025-11-25"
],
"CNY": [
7.087172218284904,
"2025-11-25"
],
"HKD": [
7.776049766718508,
"2025-11-25"
],
"CAD": [
1.4110342881332016,
"2025-11-25"
],
"EUR": [
0.8649022660439372,
"2025-11-25"
]
}

View File

@@ -27,12 +27,4 @@ impl Config {
let future = now + chrono::Duration::days(30 * self.economic_lookahead_months as i64); let future = now + chrono::Duration::days(30 * self.economic_lookahead_months as i64);
future.format("%Y-%m-%d").to_string() future.format("%Y-%m-%d").to_string()
} }
}
pub fn get_tickers() -> Vec<String> {
vec![
"AAPL", "MSFT", "NVDA", "GOOGL", "AMZN",
"TSLA", "META", "JPM", "V", "WMT",
// ... your 100500 tickers here
].into_iter().map(String::from).collect()
} }

View File

@@ -0,0 +1,194 @@
// src/corporate/aggregation.rs
use super::types::CompanyPrice;
use super::storage::*;
use tokio::fs;
use std::collections::HashMap;
#[derive(Debug)]
struct DayData {
sources: Vec<(CompanyPrice, String)>, // (price, source_ticker)
total_volume: u64,
vwap: f64,
open: f64,
high: f64,
low: f64,
close: f64,
}
/// Aggregate price data from multiple exchanges, converting all to USD
pub async fn aggregate_best_price_data(lei: &str) -> anyhow::Result<()> {
let company_dir = get_company_dir(lei);
for timeframe in ["daily", "5min"].iter() {
let source_dir = company_dir.join(timeframe);
if !source_dir.exists() {
continue;
}
let mut all_prices: Vec<(CompanyPrice, String)> = Vec::new();
let mut by_date_time: HashMap<String, DayData> = HashMap::new();
// Load all sources with their ticker names
let mut entries = tokio::fs::read_dir(&source_dir).await?;
let mut source_count = 0;
let mut sources_used = std::collections::HashSet::new();
while let Some(entry) = entries.next_entry().await? {
let source_dir_path = entry.path();
if !source_dir_path.is_dir() { continue; }
let source_ticker = source_dir_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
.to_string();
let prices_path = source_dir_path.join("prices.json");
if !prices_path.exists() { continue; }
let content = tokio::fs::read_to_string(&prices_path).await?;
let mut prices: Vec<CompanyPrice> = serde_json::from_str(&content)?;
if !prices.is_empty() {
sources_used.insert(source_ticker.clone());
source_count += 1;
}
for price in prices {
all_prices.push((price, source_ticker.clone()));
}
}
if all_prices.is_empty() {
continue;
}
println!(" Aggregating from {} exchanges: {}",
sources_used.len(),
sources_used.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ")
);
// Group by date + time (for 5min) or just date
for (p, source) in all_prices {
let key = if timeframe == &"5min" && !p.time.is_empty() {
format!("{}_{}", p.date, p.time)
} else {
p.date.clone()
};
// Convert to USD immediately
let usd_rate = super::fx::get_usd_rate(&p.currency).await.unwrap_or(1.0);
let mut p_usd = p.clone();
p_usd.open *= usd_rate;
p_usd.high *= usd_rate;
p_usd.low *= usd_rate;
p_usd.close *= usd_rate;
p_usd.adj_close *= usd_rate;
p_usd.currency = "USD".to_string();
let entry = by_date_time.entry(key.clone()).or_insert(DayData {
sources: vec![],
total_volume: 0,
vwap: 0.0,
open: p_usd.open,
high: p_usd.high,
low: p_usd.low,
close: p_usd.close,
});
let volume = p.volume.max(1); // avoid div0
let vwap_contrib = p_usd.close * volume as f64;
entry.sources.push((p_usd.clone(), source));
entry.total_volume += volume;
entry.vwap += vwap_contrib;
// Use first open, last close, max high, min low
if entry.sources.len() == 1 {
entry.open = p_usd.open;
}
entry.close = p_usd.close;
entry.high = entry.high.max(p_usd.high);
entry.low = entry.low.min(p_usd.low);
}
// Finalize aggregated data
let mut aggregated: Vec<CompanyPrice> = Vec::new();
for (key, data) in by_date_time {
let vwap = data.vwap / data.total_volume as f64;
let (date, time) = if key.contains('_') {
let parts: Vec<&str> = key.split('_').collect();
(parts[0].to_string(), parts[1].to_string())
} else {
(key, "".to_string())
};
// Track which exchange contributed most volume
let best_source = data.sources.iter()
.max_by_key(|(p, _)| p.volume)
.map(|(_, src)| src.clone())
.unwrap_or_else(|| "unknown".to_string());
aggregated.push(CompanyPrice {
ticker: format!("{lei}@agg"), // Mark as aggregated
date,
time,
open: data.open,
high: data.high,
low: data.low,
close: data.close,
adj_close: vwap,
volume: data.total_volume,
currency: "USD".to_string(),
});
}
aggregated.sort_by_key(|p| (p.date.clone(), p.time.clone()));
// Save aggregated result
let agg_dir = company_dir.join("aggregated").join(timeframe);
fs::create_dir_all(&agg_dir).await?;
let path = agg_dir.join("prices.json");
fs::write(&path, serde_json::to_string_pretty(&aggregated)?).await?;
// Save aggregation metadata
let meta = AggregationMetadata {
lei: lei.to_string(), // ← CHANGE THIS
timeframe: timeframe.to_string(),
sources: sources_used.into_iter().collect(),
total_bars: aggregated.len(),
date_range: (
aggregated.first().map(|p| p.date.clone()).unwrap_or_default(),
aggregated.last().map(|p| p.date.clone()).unwrap_or_default(),
),
aggregated_at: chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
};
let meta_path = agg_dir.join("metadata.json");
fs::write(&meta_path, serde_json::to_string_pretty(&meta)?).await?;
println!("{} {} bars from {} sources (USD)",
aggregated.len(),
timeframe,
source_count
);
}
Ok(())
}
#[derive(Debug, serde::Serialize, serde::Deserialize)]
struct AggregationMetadata {
lei: String,
timeframe: String,
sources: Vec<String>,
total_bars: usize,
date_range: (String, String),
aggregated_at: String,
}

51
src/corporate/fx.rs Normal file
View File

@@ -0,0 +1,51 @@
// src/corporate/fx.rs
use std::collections::HashMap;
use reqwest;
use serde_json::Value;
use tokio::fs;
use std::path::Path;
static FX_CACHE_PATH: &str = "fx_rates.json";
pub async fn get_usd_rate(currency: &str) -> anyhow::Result<f64> {
if currency == "USD" {
return Ok(1.0);
}
let mut cache: HashMap<String, (f64, String)> = if Path::new(FX_CACHE_PATH).exists() {
let content = fs::read_to_string(FX_CACHE_PATH).await?;
serde_json::from_str(&content).unwrap_or_default()
} else {
HashMap::new()
};
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
if let Some((rate, date)) = cache.get(currency) {
if date == &today {
return Ok(*rate);
}
}
let symbol = format!("{}USD=X", currency);
let url = format!("https://query1.finance.yahoo.com/v8/finance/chart/{}?range=1d&interval=1d", symbol);
let json: Value = reqwest::Client::new()
.get(&url)
.header("User-Agent", "Mozilla/5.0")
.send()
.await?
.json()
.await?;
let close = json["chart"]["result"][0]["meta"]["regularMarketPrice"]
.as_f64()
.or_else(|| json["chart"]["result"][0]["indicators"]["quote"][0]["close"][0].as_f64())
.unwrap_or(1.0);
let rate = if currency == "JPY" || currency == "KRW" { close } else { 1.0 / close }; // inverse pairs
cache.insert(currency.to_string(), (rate, today.clone()));
let _ = fs::write(FX_CACHE_PATH, serde_json::to_string_pretty(&cache)?).await;
Ok(rate)
}

70
src/corporate/helpers.rs Normal file
View File

@@ -0,0 +1,70 @@
// src/corporate/helpers.rs
use super::types::*;
use chrono::{Local, NaiveDate};
use std::collections::{HashMap, HashSet};
pub fn event_key(e: &CompanyEvent) -> String {
format!("{}|{}|{}", e.ticker, e.date, e.time)
}
pub fn detect_changes(old: &CompanyEvent, new: &CompanyEvent, today: &str) -> Vec<CompanyEventChange> {
let mut changes = Vec::new();
let ts = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
if new.date.as_str() <= today { return changes; }
if old.time != new.time {
changes.push(CompanyEventChange {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "time".to_string(),
old_value: old.time.clone(),
new_value: new.time.clone(),
detected_at: ts.clone(),
});
}
if old.eps_forecast != new.eps_forecast {
changes.push(CompanyEventChange {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "eps_forecast".to_string(),
old_value: format!("{:?}", old.eps_forecast),
new_value: format!("{:?}", new.eps_forecast),
detected_at: ts.clone(),
});
}
if old.eps_actual != new.eps_actual {
changes.push(CompanyEventChange {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "eps_actual".to_string(),
old_value: format!("{:?}", old.eps_actual),
new_value: format!("{:?}", new.eps_actual),
detected_at: ts.clone(),
});
}
// Add similar for revenue if applicable
changes
}
pub fn price_key(p: &CompanyPrice) -> String {
if p.time.is_empty() {
format!("{}|{}", p.ticker, p.date)
} else {
format!("{}|{}|{}", p.ticker, p.date, p.time)
}
}
pub fn parse_float(s: &str) -> Option<f64> {
s.replace("--", "").replace(",", "").parse::<f64>().ok()
}
pub fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%B %d, %Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
}

View File

@@ -3,6 +3,10 @@ pub mod types;
pub mod scraper; pub mod scraper;
pub mod storage; pub mod storage;
pub mod update; pub mod update;
pub mod helpers;
pub mod aggregation;
pub mod fx;
pub mod openfigi;
pub use types::*; pub use types::*;
pub use update::run_full_update; pub use update::run_full_update;

172
src/corporate/openfigi.rs Normal file
View File

@@ -0,0 +1,172 @@
// src/corporate/openfigi.rs
use super::{types::*};
use reqwest::Client as HttpClient;
use reqwest::header::{HeaderMap, HeaderValue};
use serde_json::{json, Value};
use std::collections::{HashMap, HashSet};
use tokio::time::{sleep, Duration};
use anyhow::Context;
#[derive(Clone)]
pub struct OpenFigiClient {
client: HttpClient,
api_key: Option<String>,
has_key: bool,
}
impl OpenFigiClient {
pub fn new() -> anyhow::Result<Self> {
let api_key = dotenvy::var("OPENFIGI_API_KEY").ok();
let has_key = api_key.is_some();
let mut builder = HttpClient::builder()
.user_agent("Mozilla/5.0 (compatible; OpenFIGI-Rust/1.0)")
.timeout(Duration::from_secs(30));
if let Some(key) = &api_key {
let mut headers = HeaderMap::new();
headers.insert("X-OPENFIGI-APIKEY", HeaderValue::from_str(key)?);
builder = builder.default_headers(headers);
}
let client = builder.build().context("Failed to build HTTP client")?;
println!(
"OpenFIGI client initialized: {}",
if has_key { "with API key" } else { "no key (limited mode)" }
);
Ok(Self { client, api_key, has_key })
}
/// Batch-map ISINs to FIGI, filtering equities only
pub async fn map_isins_to_figi(&self, isins: &[String]) -> anyhow::Result<Vec<String>> {
if isins.is_empty() { return Ok(vec![]); }
let mut all_figis = Vec::new();
let chunk_size = if self.has_key { 100 } else { 5 };
for chunk in isins.chunks(chunk_size) {
let jobs: Vec<Value> = chunk.iter()
.map(|isin| json!({
"idType": "ID_ISIN",
"idValue": isin,
"marketSecDes": "Equity", // Pre-filter to equities
}))
.collect();
let resp = self.client
.post("https://api.openfigi.com/v3/mapping")
.header("Content-Type", "application/json")
.json(&jobs)
.send()
.await?;
let status = resp.status();
let headers = resp.headers().clone();
let body = resp.text().await.unwrap_or_default();
if status.is_client_error() || status.is_server_error() {
if status == 401 {
return Err(anyhow::anyhow!("Invalid OpenFIGI API key: {}", body));
} else if status == 413 {
return Err(anyhow::anyhow!("Payload too large—reduce chunk size: {}", body));
} else if status == 429 {
let reset = headers
.get("ratelimit-reset")
.and_then(|v| v.to_str().ok())
.unwrap_or("10")
.parse::<u64>()
.unwrap_or(10);
println!("Rate limited—backing off {}s", reset);
sleep(Duration::from_secs(reset.max(10))).await;
continue;
}
return Err(anyhow::anyhow!("OpenFIGI error {}: {}", status, body));
}
// JSON aus dem *Body-String* parsen
let results: Vec<Value> = serde_json::from_str(&body)?;
for (job, result) in chunk.iter().zip(results) {
if let Some(data) = result["data"].as_array() {
for item in data {
let sec_type = item["securityType"].as_str().unwrap_or("");
let market_sec = item["marketSector"].as_str().unwrap_or("");
if market_sec == "Equity" &&
(sec_type.contains("Stock") || sec_type.contains("Share") || sec_type.contains("Equity") ||
sec_type.contains("Common") || sec_type.contains("Preferred") || sec_type == "ADR" || sec_type == "GDR") {
if let Some(figi) = item["figi"].as_str() {
all_figis.push(figi.to_string());
}
}
}
}
}
// Rate limit respect: 6s between requests with key
if self.has_key {
sleep(Duration::from_secs(6)).await;
} else {
sleep(Duration::from_millis(500)).await; // Slower without key
}
}
all_figis.dedup(); // Unique FIGIs per LEI
Ok(all_figis)
}
}
/// Build FIGI → LEI map from CSV, filtering equities via OpenFIGI
pub async fn build_figi_to_lei_map(lei_to_isins: &HashMap<String, Vec<String>>) -> anyhow::Result<HashMap<String, String>> {
let client = OpenFigiClient::new()?;
if !client.has_key {
println!("No API key—skipping FIGI mapping (using empty map)");
return Ok(HashMap::new());
}
let mut figi_to_lei: HashMap<String, String> = HashMap::new();
let mut processed = 0;
for (lei, isins) in lei_to_isins {
let unique_isins: Vec<_> = isins.iter().cloned().collect::<HashSet<_>>().into_iter().collect();
let equity_figis = client.map_isins_to_figi(&unique_isins).await?;
for figi in equity_figis {
figi_to_lei.insert(figi, lei.clone());
}
processed += 1;
if processed % 100 == 0 {
println!("Processed {} LEIs → {} total equity FIGIs", processed, figi_to_lei.len());
}
// Throttle per-LEI (heavy LEIs have 100s of ISINs)
sleep(Duration::from_millis(100)).await;
}
// Save full map
let data_dir = std::path::Path::new("data");
tokio::fs::create_dir_all(data_dir).await?;
tokio::fs::write("data/figi_to_lei.json", serde_json::to_string_pretty(&figi_to_lei)?).await?;
println!("Built FIGI→LEI map: {} mappings (equity-only)", figi_to_lei.len());
Ok(figi_to_lei)
}
/// Load/build companies using FIGI as key (enriched with LEI via map)
pub async fn load_or_build_companies_figi(
lei_to_isins: &HashMap<String, Vec<String>>,
figi_to_lei: &HashMap<String, String>,
) -> anyhow::Result<Vec<CompanyMetadata>> {
let data_dir = std::path::Path::new("data/companies_by_figi");
tokio::fs::create_dir_all(data_dir).await?;
let mut companies = Vec::new();
println!("Built {} FIGI-keyed companies.", companies.len());
Ok(companies)
}

View File

@@ -1,12 +1,208 @@
// src/corporate/scraper.rs // src/corporate/scraper.rs
use super::types::{CompanyEvent, CompanyPrice}; use super::{types::*, helpers::*};
use csv::ReaderBuilder;
use fantoccini::{Client, Locator}; use fantoccini::{Client, Locator};
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use chrono::{NaiveDate, Datelike}; use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration}; use tokio::{time::{Duration as TokioDuration, sleep}};
use reqwest::Client as HttpClient;
use serde_json::Value;
use zip::ZipArchive;
use std::fs::File;
use std::{collections::HashMap};
use std::io::{Read, BufReader};
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
/// Discover all exchanges where this ISIN trades by querying Yahoo Finance
pub async fn discover_available_exchanges(isin: &str, known_ticker: &str) -> anyhow::Result<Vec<TickerInfo>> {
println!(" Discovering exchanges for ISIN {}", isin);
let mut discovered_tickers = Vec::new();
// Try the primary ticker first
if let Ok(info) = check_ticker_exists(known_ticker).await {
discovered_tickers.push(info);
}
// Search for ISIN directly on Yahoo to find other listings
let search_url = format!(
"https://query2.finance.yahoo.com/v1/finance/search?q={}&quotesCount=20&newsCount=0",
isin
);
match HttpClient::new()
.get(&search_url)
.header("User-Agent", USER_AGENT)
.send()
.await
{
Ok(resp) => {
if let Ok(json) = resp.json::<Value>().await {
if let Some(quotes) = json["quotes"].as_array() {
for quote in quotes {
// First: filter by quoteType directly from search results (faster rejection)
let quote_type = quote["quoteType"].as_str().unwrap_or("");
if quote_type.to_uppercase() != "EQUITY" {
continue; // Skip bonds, ETFs, mutual funds, options, etc.
}
if let Some(symbol) = quote["symbol"].as_str() {
// Avoid duplicates
if discovered_tickers.iter().any(|t: &TickerInfo| t.ticker == symbol) {
continue;
}
// Double-check with full quote data (some search results are misleading)
match check_ticker_exists(symbol).await {
Ok(info) => {
println!(" Found equity listing: {} on {} ({})",
symbol, info.exchange_mic, info.currency);
discovered_tickers.push(info);
}
Err(e) => {
// Most common: it's not actually equity or not tradable
// println!(" Rejected {}: {}", symbol, e);
continue;
}
}
// Be respectful to Yahoo
sleep(TokioDuration::from_millis(120)).await;
}
}
}
}
}
Err(e) => println!(" Search API error: {}", e),
}
// Also try common exchange suffixes for the base ticker
if let Some(base) = known_ticker.split('.').next() {
let suffixes = vec![
"", // US
".L", // London
".DE", // Frankfurt/XETRA
".PA", // Paris
".AS", // Amsterdam
".MI", // Milan
".SW", // Switzerland
".T", // Tokyo
".HK", // Hong Kong
".SS", // Shanghai
".SZ", // Shenzhen
".TO", // Toronto
".AX", // Australia
".SA", // Brazil
".MC", // Madrid
".BO", // Bombay
".NS", // National Stock Exchange India
];
for suffix in suffixes {
let test_ticker = format!("{}{}", base, suffix);
// Skip if already found
if discovered_tickers.iter().any(|t| t.ticker == test_ticker) {
continue;
}
if let Ok(info) = check_ticker_exists(&test_ticker).await {
discovered_tickers.push(info);
sleep(TokioDuration::from_millis(100)).await;
}
}
}
println!(" Found {} tradable exchanges", discovered_tickers.len());
Ok(discovered_tickers)
}
/// Check if a ticker exists and get its exchange/currency info
async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
let url = format!(
"https://query1.finance.yahoo.com/v10/finance/quoteSummary/{}?modules=price",
ticker
);
let resp = HttpClient::new()
.get(&url)
.header("User-Agent", USER_AGENT)
.send()
.await?;
let json: Value = resp.json().await?;
if let Some(result) = json["quoteSummary"]["result"].as_array() {
if result.is_empty() {
return Err(anyhow::anyhow!("No quote data for {}", ticker));
}
let quote = &result[0]["price"];
// CRITICAL: Only accept EQUITY securities
let quote_type = quote["quoteType"]
.as_str()
.unwrap_or("")
.to_uppercase();
if quote_type != "EQUITY" {
// Optional: debug what was filtered
println!(" → Skipping {} (quoteType: {})", ticker, quote_type);
return Err(anyhow::anyhow!("Not an equity: {}", quote_type));
}
let exchange = quote["exchange"].as_str().unwrap_or("");
let currency = quote["currency"].as_str().unwrap_or("USD");
let short_name = quote["shortName"].as_str().unwrap_or("");
// Optional: extra sanity — make sure it's not a bond masquerading as equity
if short_name.to_uppercase().contains("BOND") ||
short_name.to_uppercase().contains("NOTE") ||
short_name.to_uppercase().contains("DEBENTURE") {
return Err(anyhow::anyhow!("Name suggests debt security"));
}
if !exchange.is_empty() {
return Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic: exchange.to_string(),
currency: currency.to_string(),
primary: false,
});
}
}
Err(anyhow::anyhow!("Invalid or missing data for {}", ticker))
}
/// Convert Yahoo's exchange name to MIC code (best effort)
fn exchange_name_to_mic(name: &str) -> String {
match name {
"NMS" | "NasdaqGS" | "NASDAQ" => "XNAS",
"NYQ" | "NYSE" => "XNYS",
"LSE" | "London" => "XLON",
"FRA" | "Frankfurt" | "GER" | "XETRA" => "XFRA",
"PAR" | "Paris" => "XPAR",
"AMS" | "Amsterdam" => "XAMS",
"MIL" | "Milan" => "XMIL",
"JPX" | "Tokyo" => "XJPX",
"HKG" | "Hong Kong" => "XHKG",
"SHH" | "Shanghai" => "XSHG",
"SHZ" | "Shenzhen" => "XSHE",
"TOR" | "Toronto" => "XTSE",
"ASX" | "Australia" => "XASX",
"SAU" | "Saudi" => "XSAU",
"SWX" | "Switzerland" => "XSWX",
"BSE" | "Bombay" => "XBSE",
"NSE" | "NSI" => "XNSE",
"TAI" | "Taiwan" => "XTAI",
"SAO" | "Sao Paulo" => "BVMF",
"MCE" | "Madrid" => "XMAD",
_ => name, // Fallback to name itself
}.to_string()
}
pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> { pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
let script = r#" let script = r#"
(() => { (() => {
@@ -24,22 +220,21 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
if done { if done {
break; break;
} }
sleep(Duration::from_millis(500)).await; sleep(TokioDuration::from_millis(500)).await;
} }
Ok(()) Ok(())
} }
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> { pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker); let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
client.goto(&url).await?; client.goto(&url).await?;
dismiss_yahoo_consent(client).await?; dismiss_yahoo_consent(client).await?;
// Load all by clicking "Show More" if present
loop { loop {
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await { match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
Ok(btn) => { Ok(btn) => {
btn.click().await?; btn.click().await?;
sleep(Duration::from_secs(2)).await; sleep(TokioDuration::from_secs(2)).await;
} }
Err(_) => break, Err(_) => break,
} }
@@ -85,7 +280,7 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
ticker: ticker.to_string(), ticker: ticker.to_string(),
date: date.format("%Y-%m-%d").to_string(), date: date.format("%Y-%m-%d").to_string(),
time, time,
period: "".to_string(), // No period info available, set to empty period: "".to_string(),
eps_forecast, eps_forecast,
eps_actual, eps_actual,
revenue_forecast: None, revenue_forecast: None,
@@ -98,47 +293,399 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
Ok(events) Ok(events)
} }
pub async fn fetch_price_history(client: &Client, ticker: &str, start: &str, end: &str) -> anyhow::Result<Vec<CompanyPrice>> { fn parse_price(v: Option<&Value>) -> f64 {
let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")? v.and_then(|x| x.as_str())
.and_hms_opt(0, 0, 0).unwrap().and_utc() .and_then(|s| s.replace('$', "").replace(',', "").parse::<f64>().ok())
.timestamp(); .or_else(|| v.and_then(|x| x.as_f64()))
.unwrap_or(0.0)
}
let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")? fn parse_volume(v: Option<&Value>) -> u64 {
.succ_opt().unwrap() v.and_then(|x| x.as_str())
.and_hms_opt(0, 0, 0).unwrap().and_utc() .and_then(|s| s.replace(',', "").parse::<u64>().ok())
.timestamp(); .or_else(|| v.and_then(|x| x.as_u64()))
.unwrap_or(0)
}
pub async fn fetch_daily_price_history(
ticker: &str,
start_str: &str,
end_str: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let start = NaiveDate::parse_from_str(start_str, "%Y-%m-%d")?;
let end = NaiveDate::parse_from_str(end_str, "%Y-%m-%d")? + Duration::days(1);
let mut all_prices = Vec::new();
let mut current = start;
while current < end {
let chunk_end = current + Duration::days(730);
let actual_end = chunk_end.min(end);
let period1 = current.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
let period2 = actual_end.and_hms_opt(0, 0, 0).unwrap().and_utc().timestamp();
println!(" Fetching {ticker} {}{}", current, actual_end - Duration::days(1));
let url = format!(
"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=1d&includeAdjustedClose=true"
);
let json: Value = HttpClient::new()
.get(&url)
.header("User-Agent", USER_AGENT)
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let meta = &result["meta"];
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
let opens = quote["open"].as_array();
let highs = quote["high"].as_array();
let lows = quote["low"].as_array();
let closes = quote["close"].as_array();
let adj_closes = result["indicators"]["adjclose"][0]["adjclose"].as_array()
.or_else(|| closes);
let volumes = quote["volume"].as_array();
for (i, ts_val) in timestamps.iter().enumerate() {
let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
if date_str < start_str.to_string() || date_str > end_str.to_string() {
continue;
}
let open = parse_price(opens.and_then(|a| a.get(i)));
let high = parse_price(highs.and_then(|a| a.get(i)));
let low = parse_price(lows.and_then(|a| a.get(i)));
let close = parse_price(closes.and_then(|a| a.get(i)));
let adj_close = parse_price(adj_closes.and_then(|a| a.get(i)));
let volume = parse_volume(volumes.and_then(|a| a.get(i)));
all_prices.push(CompanyPrice {
ticker: ticker.to_string(),
date: date_str,
time: "".to_string(),
open,
high,
low,
close,
adj_close,
volume,
currency: currency.clone(),
});
}
sleep(TokioDuration::from_millis(200)).await;
current = actual_end;
}
all_prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
all_prices.dedup_by(|a, b| a.date == b.date && a.time == b.time);
println!(" Got {} daily bars for {ticker}", all_prices.len());
Ok(all_prices)
}
pub async fn fetch_price_history_5min(
ticker: &str,
_start: &str,
_end: &str,
) -> anyhow::Result<Vec<CompanyPrice>> {
let now = Utc::now().timestamp();
let period1 = now - 5184000;
let period2 = now;
let url = format!( let url = format!(
"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true" "https://query1.finance.yahoo.com/v8/finance/chart/{ticker}?period1={period1}&period2={period2}&interval=5m&includeAdjustedClose=true"
); );
client.goto(&url).await?; let json: Value = HttpClient::new()
let csv = client.source().await?; .get(&url)
.header("User-Agent", USER_AGENT)
.send()
.await?
.json()
.await?;
let result = &json["chart"]["result"][0];
let timestamps = result["timestamp"].as_array().ok_or_else(|| anyhow::anyhow!("No timestamps"))?;
let quote = &result["indicators"]["quote"][0];
let meta = &result["meta"];
let currency = meta["currency"].as_str().unwrap_or("USD").to_string();
let mut prices = Vec::new(); let mut prices = Vec::new();
for line in csv.lines().skip(1) {
let cols: Vec<&str> = line.split(',').collect(); for (i, ts_val) in timestamps.iter().enumerate() {
if cols.len() < 7 { continue; } let ts = ts_val.as_i64().unwrap_or(0);
let dt: DateTime<Utc> = DateTime::from_timestamp(ts, 0).unwrap_or_default();
let date_str = dt.format("%Y-%m-%d").to_string();
let time_str = dt.format("%H:%M:%S").to_string();
let open = parse_price(quote["open"].as_array().and_then(|a| a.get(i)));
let high = parse_price(quote["high"].as_array().and_then(|a| a.get(i)));
let low = parse_price(quote["low"].as_array().and_then(|a| a.get(i)));
let close = parse_price(quote["close"].as_array().and_then(|a| a.get(i)));
let volume = parse_volume(quote["volume"].as_array().and_then(|a| a.get(i)));
prices.push(CompanyPrice { prices.push(CompanyPrice {
ticker: ticker.to_string(), ticker: ticker.to_string(),
date: cols[0].to_string(), date: date_str,
open: cols[1].parse()?, time: time_str,
high: cols[2].parse()?, open,
low: cols[3].parse()?, high,
close: cols[4].parse()?, low,
adj_close: cols[5].parse()?, close,
volume: cols[6].parse()?, adj_close: close,
volume,
currency: currency.clone(),
}); });
} }
prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
Ok(prices) Ok(prices)
} }
fn parse_float(s: &str) -> Option<f64> { /// Fetch the URL of the latest ISIN↔LEI mapping CSV from GLEIF
s.replace("--", "").replace(",", "").parse::<f64>().ok() /// Overengineered; we could just use the static URL, but this shows how to scrape if needed
pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow::Result<String> {
let url = format!("https://www.gleif.org/de/lei-data/lei-mapping/download-isin-to-lei-relationship-files");
client.goto(&url).await?;
let html = client.source().await?;
let _document = Html::parse_document(&html);
let _row_sel = Selector::parse("table tbody tr").unwrap();
let isin_lei = "".to_string();
Ok(isin_lei)
} }
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> { pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
NaiveDate::parse_from_str(s, "%B %d, %Y") let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y")) let zip_path = "data/isin_lei.zip";
.map_err(|_| anyhow::anyhow!("Bad date: {s}")) let csv_path = "data/isin_lei.csv";
if let Err(e) = std::fs::create_dir_all("data") {
println!("Failed to create data directory: {e}");
return Ok(None);
}
// Download ZIP
let bytes = match reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(std::time::Duration::from_secs(30))
.build()
.and_then(|c| Ok(c))
{
Ok(client) => match client.get(url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(b) => b,
Err(e) => {
println!("Failed to read ZIP bytes: {e}");
return Ok(None);
}
},
Ok(resp) => {
println!("Server returned HTTP {}", resp.status());
return Ok(None);
}
Err(e) => {
println!("Failed to download ISIN/LEI ZIP: {e}");
return Ok(None);
}
},
Err(e) => {
println!("Failed to create HTTP client: {e}");
return Ok(None);
}
};
if let Err(e) = tokio::fs::write(zip_path, &bytes).await {
println!("Failed to write ZIP file: {e}");
return Ok(None);
}
// Extract CSV
let archive = match std::fs::File::open(zip_path)
.map(ZipArchive::new)
{
Ok(Ok(a)) => a,
Ok(Err(e)) => {
println!("Invalid ZIP: {e}");
return Ok(None);
}
Err(e) => {
println!("Cannot open ZIP file: {e}");
return Ok(None);
}
};
let mut archive = archive;
let idx = match (0..archive.len()).find(|&i| {
archive.by_index(i)
.map(|f| f.name().ends_with(".csv"))
.unwrap_or(false)
}) {
Some(i) => i,
None => {
println!("ZIP did not contain a CSV file");
return Ok(None);
}
};
let mut csv_file = match archive.by_index(idx) {
Ok(f) => f,
Err(e) => {
println!("Failed to read CSV entry: {e}");
return Ok(None);
}
};
let mut csv_bytes = Vec::new();
if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
println!("Failed to extract CSV: {e}");
return Ok(None);
}
if let Err(e) = tokio::fs::write(csv_path, &csv_bytes).await {
println!("Failed to save CSV file: {e}");
return Ok(None);
}
Ok(Some(csv_path.to_string()))
}
pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
// 1. Download + extract the CSV (this is now async)
let csv_path = match download_isin_lei_csv().await? {
Some(p) => p,
None => {
println!("ISIN/LEI download failed; continuing with empty map");
return Ok(HashMap::new());
}
};
// 2. Open and parse the CSV synchronously (fast enough, ~8M lines is fine)
let file = match std::fs::File::open(&csv_path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {}", csv_path, e);
return Ok(HashMap::new());
}
};
let mut rdr = csv::ReaderBuilder::new()
.has_headers(false)
.from_reader(std::io::BufReader::new(file));
let mut map: HashMap<String, Vec<String>> = HashMap::new();
for result in rdr.records() {
let record = match result {
Ok(r) => r,
Err(e) => {
println!("CSV parse error: {}", e);
continue;
}
};
if record.len() < 2 { continue; }
let lei = record[0].to_string();
let isin = record[1].to_string();
map.entry(lei).or_default().push(isin);
}
println!("Loaded ISIN↔LEI map with {} LEIs and {} total ISINs",
map.len(),
map.values().map(|v| v.len()).sum::<usize>()
);
Ok(map)
}
pub async fn get_primary_isin_and_name(
client: &Client, // Pass your existing Selenium client
ticker: &str,
) -> anyhow::Result<PrimaryInfo> {
// Navigate to the actual quote page (always works)
let quote_url = format!("https://finance.yahoo.com/quote/{}", ticker);
client.goto(&quote_url).await?;
// Dismiss overlays/banners (your function + guce-specific)
reject_yahoo_cookies(client).await?;
// Wait for page to load (key data elements)
sleep(TokioDuration::from_millis(2000)).await;
// Get page HTML and parse
let html = client.source().await?;
let document = Html::parse_document(&html);
// Selectors for key fields (tested on real Yahoo pages Nov 2025)
let name_sel = Selector::parse("h1[data-testid='qsp-price-header']").unwrap_or_else(|_| Selector::parse("h1").unwrap());
let isin_sel = Selector::parse("[data-testid='qsp-symbol'] + div [data-field='isin']").unwrap_or_else(|_| Selector::parse("[data-field='isin']").unwrap());
let exchange_sel = Selector::parse("[data-testid='qsp-market'] span").unwrap_or_else(|_| Selector::parse(".TopNav__Exchange").unwrap());
let currency_sel = Selector::parse("[data-testid='qsp-price'] span:contains('USD')").unwrap_or_else(|_| Selector::parse(".TopNav__Currency").unwrap()); // Adjust for dynamic
let name_elem = document.select(&name_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let isin_elem = document.select(&isin_sel).next().map(|e| e.text().collect::<String>().trim().to_uppercase());
let exchange_elem = document.select(&exchange_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let currency_elem = document.select(&currency_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
let name = name_elem.unwrap_or_else(|| ticker.to_string());
let isin = isin_elem.unwrap_or_default();
let exchange_mic = exchange_elem.unwrap_or_default();
let currency = currency_elem.unwrap_or_else(|| "USD".to_string());
// Validate ISIN
let valid_isin = if isin.len() == 12 && isin.chars().all(|c| c.is_alphanumeric()) {
isin
} else {
"".to_string()
};
println!(" → Scraped {}: {} | ISIN: {} | Exchange: {}", ticker, name, valid_isin, exchange_mic);
Ok(PrimaryInfo {
isin: valid_isin,
name,
exchange_mic,
currency,
})
}
pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
for _ in 0..10 {
let clicked: bool = client
.execute(
r#"(() => {
const btn = document.querySelector('#consent-page .reject-all');
if (btn) {
btn.click();
return true;
}
return false;
})()"#,
vec![],
)
.await?
.as_bool()
.unwrap_or(false);
if clicked { break; }
sleep(TokioDuration::from_millis(500)).await;
}
println!("Rejected Yahoo cookies if button existed");
Ok(())
} }

View File

@@ -1,11 +1,13 @@
// src/corporate/storage.rs // src/corporate/storage.rs
use super::types::{CompanyEvent, CompanyPrice}; use super::{types::*, helpers::*, scraper::get_primary_isin_and_name};
use std::collections::{HashMap, HashSet}; use crate::config;
use tokio::fs;
use chrono::{Local, NaiveDate};
/// Load all events from disk into a HashMap<ticker|date, event> use tokio::fs;
async fn load_all_events_map() -> anyhow::Result<HashMap<String, CompanyEvent>> { use chrono::{Datelike, NaiveDate};
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
pub async fn load_existing_events() -> anyhow::Result<HashMap<String, CompanyEvent>> {
let mut map = HashMap::new(); let mut map = HashMap::new();
let dir = std::path::Path::new("corporate_events"); let dir = std::path::Path::new("corporate_events");
if !dir.exists() { if !dir.exists() {
@@ -16,11 +18,12 @@ async fn load_all_events_map() -> anyhow::Result<HashMap<String, CompanyEvent>>
while let Some(entry) = entries.next_entry().await? { while let Some(entry) = entries.next_entry().await? {
let path = entry.path(); let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("json") { if path.extension().and_then(|s| s.to_str()) == Some("json") {
let content = fs::read_to_string(&path).await?; let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if let Ok(events) = serde_json::from_str::<Vec<CompanyEvent>>(&content) { if name.starts_with("events_") && name.len() == 17 {
let content = fs::read_to_string(&path).await?;
let events: Vec<CompanyEvent> = serde_json::from_str(&content)?;
for event in events { for event in events {
let key = format!("{}|{}", event.ticker, event.date); map.insert(event_key(&event), event);
map.insert(key, event);
} }
} }
} }
@@ -28,37 +31,222 @@ async fn load_all_events_map() -> anyhow::Result<HashMap<String, CompanyEvent>>
Ok(map) Ok(map)
} }
/// Merge new events with existing ones and save back to disk pub async fn save_optimized_events(events: HashMap<String, CompanyEvent>) -> anyhow::Result<()> {
pub async fn merge_and_save_events(ticker: &str, new_events: Vec<CompanyEvent>) -> anyhow::Result<()> {
let mut existing = load_all_events_map().await?;
// Insert or update
for event in new_events {
let key = format!("{}|{}", event.ticker, event.date);
existing.insert(key, event);
}
// Convert back to Vec and save (simple single file for now)
let all_events: Vec<CompanyEvent> = existing.into_values().collect();
let dir = std::path::Path::new("corporate_events"); let dir = std::path::Path::new("corporate_events");
fs::create_dir_all(dir).await?; fs::create_dir_all(dir).await?;
let path = dir.join("all_events.json");
let json = serde_json::to_string_pretty(&all_events)?; let mut entries = fs::read_dir(dir).await?;
fs::write(&path, json).await?; while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name.starts_with("events_") && path.extension().map(|e| e == "json").unwrap_or(false) {
fs::remove_file(&path).await?;
}
}
let mut sorted: Vec<_> = events.into_values().collect();
sorted.sort_by_key(|e| (e.ticker.clone(), e.date.clone()));
let mut by_month: HashMap<String, Vec<CompanyEvent>> = HashMap::new();
for e in sorted {
if let Ok(d) = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d") {
let key = format!("{}-{:02}", d.year(), d.month());
by_month.entry(key).or_default().push(e);
}
}
for (month, list) in by_month {
let path = dir.join(format!("events_{}.json", month));
fs::write(&path, serde_json::to_string_pretty(&list)?).await?;
}
Ok(()) Ok(())
} }
/// Save price history for a single ticker (overwrite old file) pub async fn save_changes(changes: &[CompanyEventChange]) -> anyhow::Result<()> {
pub async fn save_prices_for_ticker(ticker: &str, prices: Vec<CompanyPrice>) -> anyhow::Result<()> { if changes.is_empty() { return Ok(()); }
let dir = std::path::Path::new("corporate_prices"); let dir = std::path::Path::new("corporate_event_changes");
fs::create_dir_all(dir).await?; fs::create_dir_all(dir).await?;
let path = dir.join(format!("{}.json", ticker));
// Optional: sort by date let mut by_month: HashMap<String, Vec<CompanyEventChange>> = HashMap::new();
let mut prices = prices; for c in changes {
prices.sort_by_key(|p| p.date.clone()); if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") {
let key = format!("{}-{:02}", d.year(), d.month());
by_month.entry(key).or_default().push(c.clone());
}
}
for (month, list) in by_month {
let path = dir.join(format!("changes_{}.json", month));
let mut all = if path.exists() {
let s = fs::read_to_string(&path).await?;
serde_json::from_str(&s).unwrap_or_default()
} else { vec![] };
all.extend(list);
fs::write(&path, serde_json::to_string_pretty(&all)?).await?;
}
Ok(())
}
pub async fn save_prices_for_ticker(ticker: &str, timeframe: &str, mut prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
let base_dir = Path::new("corporate_prices");
let company_dir = base_dir.join(ticker.replace(".", "_"));
let timeframe_dir = company_dir.join(timeframe);
fs::create_dir_all(&timeframe_dir).await?;
let path = timeframe_dir.join("prices.json");
prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
let json = serde_json::to_string_pretty(&prices)?; let json = serde_json::to_string_pretty(&prices)?;
fs::write(&path, json).await?; fs::write(&path, json).await?;
Ok(()) Ok(())
}
pub async fn _load_companies() -> Result<Vec<CompanyMetadata>, anyhow::Error> {
let path = Path::new("src/data/companies.json");
if !path.exists() {
println!("Missing companies.json file at src/data/companies.json");
return Ok(vec![]);
}
let content = fs::read_to_string(path).await?;
let companies: Vec<CompanyMetadata> = serde_json::from_str(&content)?;
Ok(companies)
}
pub fn get_company_dir(lei: &str) -> PathBuf {
PathBuf::from("corporate_prices").join(lei)
}
pub async fn ensure_company_dirs(isin: &str) -> anyhow::Result<()> {
let base = get_company_dir(isin);
let paths = [
base.clone(),
base.join("5min"),
base.join("daily"),
base.join("aggregated").join("5min"),
base.join("aggregated").join("daily"),
];
for p in paths {
fs::create_dir_all(&p).await?;
}
Ok(())
}
pub async fn save_company_metadata(company: &CompanyMetadata) -> anyhow::Result<()> {
let dir = get_company_dir(&company.lei);
fs::create_dir_all(&dir).await?;
let path = dir.join("metadata.json");
fs::write(&path, serde_json::to_string_pretty(company)?).await?;
Ok(())
}
pub async fn load_company_metadata(lei: &str) -> anyhow::Result<CompanyMetadata> {
let path = get_company_dir(lei).join("metadata.json");
let content = fs::read_to_string(path).await?;
Ok(serde_json::from_str(&content)?)
}
pub async fn save_available_exchanges(isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
let dir = get_company_dir(isin);
fs::create_dir_all(&dir).await?;
let path = dir.join("available_exchanges.json");
fs::write(&path, serde_json::to_string_pretty(&exchanges)?).await?;
Ok(())
}
pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
let path = get_company_dir(lei).join("available_exchanges.json");
if path.exists() {
let content = fs::read_to_string(&path).await?;
Ok(serde_json::from_str(&content)?)
} else {
Ok(vec![])
}
}
pub async fn save_prices_by_source(
lei: &str,
source_ticker: &str,
timeframe: &str,
prices: Vec<CompanyPrice>,
) -> anyhow::Result<()> {
let source_safe = source_ticker.replace(".", "_").replace("/", "_");
let dir = get_company_dir(lei).join(timeframe).join(&source_safe);
fs::create_dir_all(&dir).await?;
let path = dir.join("prices.json");
let mut prices = prices;
prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
Ok(())
}
/// Update available_exchanges.json with fetch results
pub async fn update_available_exchange(
isin: &str,
ticker: &str,
exchange_mic: &str,
has_daily: bool,
has_5min: bool,
) -> anyhow::Result<()> {
let mut exchanges = load_available_exchanges(isin).await?;
if let Some(entry) = exchanges.iter_mut().find(|e| e.ticker == ticker) {
// Update existing entry
entry.record_success(has_daily, has_5min);
} else {
// Create new entry - need to get currency from somewhere
// Try to infer from the ticker or use a default
let currency = infer_currency_from_ticker(ticker);
let mut new_entry = AvailableExchange::new(
ticker.to_string(),
exchange_mic.to_string(),
currency,
);
new_entry.record_success(has_daily, has_5min);
exchanges.push(new_entry);
}
save_available_exchanges(isin, exchanges).await
}
/// Add a newly discovered exchange before fetching
pub async fn add_discovered_exchange(
isin: &str,
ticker_info: &TickerInfo,
) -> anyhow::Result<()> {
let mut exchanges = load_available_exchanges(isin).await?;
// Only add if not already present
if !exchanges.iter().any(|e| e.ticker == ticker_info.ticker) {
let new_entry = AvailableExchange::new(
ticker_info.ticker.clone(),
ticker_info.exchange_mic.clone(),
ticker_info.currency.clone(),
);
exchanges.push(new_entry);
save_available_exchanges(isin, exchanges).await?;
}
Ok(())
}
/// Infer currency from ticker suffix
fn infer_currency_from_ticker(ticker: &str) -> String {
if ticker.ends_with(".L") { return "GBP".to_string(); }
if ticker.ends_with(".PA") { return "EUR".to_string(); }
if ticker.ends_with(".DE") { return "EUR".to_string(); }
if ticker.ends_with(".AS") { return "EUR".to_string(); }
if ticker.ends_with(".MI") { return "EUR".to_string(); }
if ticker.ends_with(".SW") { return "CHF".to_string(); }
if ticker.ends_with(".T") { return "JPY".to_string(); }
if ticker.ends_with(".HK") { return "HKD".to_string(); }
if ticker.ends_with(".SS") { return "CNY".to_string(); }
if ticker.ends_with(".SZ") { return "CNY".to_string(); }
if ticker.ends_with(".TO") { return "CAD".to_string(); }
if ticker.ends_with(".AX") { return "AUD".to_string(); }
if ticker.ends_with(".SA") { return "BRL".to_string(); }
if ticker.ends_with(".MC") { return "EUR".to_string(); }
if ticker.ends_with(".BO") || ticker.ends_with(".NS") { return "INR".to_string(); }
"USD".to_string() // Default
} }

View File

@@ -19,20 +19,86 @@ pub struct CompanyEvent {
pub struct CompanyPrice { pub struct CompanyPrice {
pub ticker: String, pub ticker: String,
pub date: String, // YYYY-MM-DD pub date: String, // YYYY-MM-DD
pub time: String, // HH:MM:SS for intraday, "" for daily
pub open: f64, pub open: f64,
pub high: f64, pub high: f64,
pub low: f64, pub low: f64,
pub close: f64, pub close: f64,
pub adj_close: f64, pub adj_close: f64,
pub volume: u64, pub volume: u64,
pub currency: String,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompanyEventChange { pub struct CompanyEventChange {
pub ticker: String, pub ticker: String,
pub date: String, pub date: String,
pub field: String, // "time", "eps_forecast", "eps_actual", "new_event" pub field_changed: String, // "time", "eps_forecast", "eps_actual", "new_event"
pub old_value: String, pub old_value: String,
pub new_value: String, pub new_value: String,
pub detected_at: String, pub detected_at: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TickerInfo {
pub ticker: String,
pub exchange_mic: String,
pub currency: String,
pub isin: String, // ISIN belonging to this legal entity (primary + ADR + GDR)
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompanyMetadata {
pub lei: String,
pub figi: Option<String>,
pub name: String,
pub primary_isin: String, // The most liquid / preferred one (used for folder fallback)
pub tickers: Vec<TickerInfo>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PrimaryInfo {
pub isin: String,
pub name: String,
pub exchange_mic: String,
pub currency: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AvailableExchange {
pub exchange_mic: String,
pub ticker: String,
pub has_daily: bool,
pub has_5min: bool,
pub last_successful_fetch: Option<String>, // YYYY-MM-DD
#[serde(default)]
pub currency: String,
#[serde(default)]
pub discovered_at: Option<String>, // When this exchange was first discovered
#[serde(default)]
pub fetch_count: u32, // How many times successfully fetched
}
impl AvailableExchange {
pub fn new(ticker: String, exchange_mic: String, currency: String) -> Self {
Self {
exchange_mic,
ticker,
has_daily: false,
has_5min: false,
last_successful_fetch: None,
currency,
discovered_at: Some(chrono::Local::now().format("%Y-%m-%d").to_string()),
fetch_count: 0,
}
}
pub fn record_success(&mut self, has_daily: bool, has_5min: bool) {
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
self.has_daily |= has_daily;
self.has_5min |= has_5min;
self.last_successful_fetch = Some(today);
self.fetch_count += 1;
}
} }

View File

@@ -1,31 +1,233 @@
// src/corporate/update.rs // src/corporate/update.rs
use super::{scraper::*, storage::*, types::*}; use super::{scraper::*, storage::*, helpers::*, types::*, aggregation::*, openfigi::*};
use crate::config::Config; use crate::config::Config;
use chrono::Local; use chrono::Local;
use std::collections::{HashMap, HashSet}; use std::collections::HashMap;
pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> anyhow::Result<()> {
println!("Starting LEI-based corporate update");
pub async fn run_full_update(client: &fantoccini::Client, tickers: Vec<String>, config: &Config) -> anyhow::Result<()> { // 1. Download fresh GLEIF ISIN↔LEI mapping on every run
println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date); let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv().await {
Ok(map) => map,
Err(e) => {
println!("Warning: Failed to load ISIN↔LEI mapping: {}", e);
HashMap::new()
}
};
let figi_to_lei: HashMap<String, String> = match build_figi_to_lei_map(&lei_to_isins).await {
Ok(map) => map,
Err(e) => {
println!("Warning: Failed to build FIGI→LEI map: {}", e);
HashMap::new()
}
};
let today = chrono::Local::now().format("%Y-%m-%d").to_string(); let today = chrono::Local::now().format("%Y-%m-%d").to_string();
let mut existing_events = load_existing_events().await?;
for ticker in tickers { let mut companies: Vec<CompanyMetadata> = match load_or_build_companies_figi(&lei_to_isins, &figi_to_lei).await {
print!("{:6} ", ticker); Ok(comps) => comps,
Err(e) => {
println!("Error loading/building company metadata: {}", e);
return Err(e);
}
}; // Vec<CompanyMetadata> with lei, isins, tickers
// Earnings for mut company in companies {
if let Ok(events) = fetch_earnings_history(client, &ticker).await { println!("\nProcessing company: {} (LEI: {})", company.name, company.lei);
merge_and_save_events(&ticker, events.clone()).await?;
println!("{} earnings", events.len()); // === Enrich with ALL ISINs known to GLEIF (includes ADRs, GDRs, etc.) ===
if let Some(all_isins) = lei_to_isins.get(&company.lei) {
let mut seen = company.isins.iter().cloned().collect::<std::collections::HashSet<_>>();
for isin in all_isins {
if !seen.contains(isin) {
company.isins.push(isin.clone());
seen.insert(isin.clone());
}
}
} }
// Prices now using config.corporate_start_date // Ensure company directory exists (now uses LEI)
if let Ok(prices) = fetch_price_history(client, &ticker, &config.corporate_start_date, &today).await { //let figi_dir = format!("data/companies_by_figi/{}/", company.primary_figi);
save_prices_for_ticker(&ticker, prices).await?; ensure_company_dirs(&company.lei).await?;
save_company_metadata(&company).await?;
// === STEP 1: Discover additional exchanges using each known ISIN ===
let mut all_tickers = company.tickers.clone();
if let Some(primary_ticker) = company.tickers.iter().find(|t| t.primary) {
println!(" Discovering additional exchanges across {} ISIN(s)...", company.isins.len());
for isin in &company.isins {
println!(" → Checking ISIN: {}", isin);
match discover_available_exchanges(isin, &primary_ticker.ticker).await {
Ok(discovered) => {
if discovered.is_empty() {
println!(" No new exchanges found for {}", isin);
} else {
for disc in discovered {
if !all_tickers.iter().any(|t| t.ticker == disc.ticker && t.exchange_mic == disc.exchange_mic) {
println!(" New equity listing → {} ({}) via ISIN {}",
disc.ticker, disc.exchange_mic, isin);
all_tickers.push(disc);
}
}
}
}
Err(e) => println!(" Discovery failed for {}: {}", isin, e),
}
tokio::time::sleep(tokio::time::Duration::from_millis(300)).await;
}
} }
tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; // Save updated metadata if we found new listings
if all_tickers.len() > company.tickers.len() {
company.tickers = all_tickers.clone();
save_company_metadata(&company).await?;
println!(" Updated metadata: {} total tickers", all_tickers.len());
}
// === STEP 2: Fetch data from ALL available tickers ===
for ticker_info in &all_tickers {
let ticker = &ticker_info.ticker;
println!(" → Fetching: {} ({})", ticker, ticker_info.exchange_mic);
let mut daily_success = false;
let mut intraday_success = false;
// Earnings: only fetch from primary ticker to avoid duplicates
if ticker_info.primary {
if let Ok(new_events) = fetch_earnings_history(client, ticker).await {
let result = process_batch(&new_events, &mut existing_events, &today);
save_changes(&result.changes).await?;
println!(" Earnings events: {}", new_events.len());
}
}
// Daily prices
if let Ok(prices) = fetch_daily_price_history(ticker, &config.corporate_start_date, &today).await {
if !prices.is_empty() {
save_prices_by_source(&company.lei, ticker, "daily", prices).await?;
daily_success = true;
}
}
// 5-minute intraday (last 60 days)
let sixty_days_ago = (chrono::Local::now() - chrono::Duration::days(60))
.format("%Y-%m-%d")
.to_string();
if let Ok(prices) = fetch_price_history_5min(ticker, &sixty_days_ago, &today).await {
if !prices.is_empty() {
save_prices_by_source(&company.lei, ticker, "5min", prices).await?;
intraday_success = true;
}
}
// Update available_exchanges.json (now under LEI folder)
update_available_exchange(
&company.lei,
ticker,
&ticker_info.exchange_mic,
daily_success,
intraday_success,
).await?;
tokio::time::sleep(tokio::time::Duration::from_millis(800)).await;
}
// === STEP 3: Aggregate all sources into unified USD prices ===
println!(" Aggregating multi-source price data (FX-adjusted)...");
if let Err(e) = aggregate_best_price_data(&company.lei).await {
println!(" Aggregation failed: {}", e);
} else {
println!(" Aggregation complete");
}
} }
// Final save of optimized earnings events
save_optimized_events(existing_events).await?;
println!("\nCorporate update complete (LEI-based)");
Ok(()) Ok(())
}
async fn enrich_companies_with_leis(
companies: &mut Vec<CompanyMetadata>,
lei_to_isins: &HashMap<String, Vec<String>>,
) {
for company in companies.iter_mut() {
if company.lei.is_empty() {
// Try to find LEI by any known ISIN
for isin in &company.isins {
for (lei, isins) in lei_to_isins {
if isins.contains(isin) {
company.lei = lei.clone();
println!("Found real LEI {} for {}", lei, company.name);
break;
}
}
if !company.lei.is_empty() { break; }
}
}
// Fallback: generate fake LEI if still missing
if company.lei.is_empty() {
company.lei = format!("FAKE{:019}", rand::random::<u64>());
println!("No real LEI found → using fake for {}", company.name);
}
}
}
pub struct ProcessResult {
pub changes: Vec<CompanyEventChange>,
}
pub fn process_batch(
new_events: &[CompanyEvent],
existing: &mut HashMap<String, CompanyEvent>,
today: &str,
) -> ProcessResult {
let mut changes = Vec::new();
for new in new_events {
let key = event_key(new);
if let Some(old) = existing.get(&key) {
changes.extend(detect_changes(old, new, today));
existing.insert(key, new.clone());
continue;
}
// Check for time change on same date
let date_key = format!("{}|{}", new.ticker, new.date);
let mut found_old = None;
for (k, e) in existing.iter() {
if format!("{}|{}", e.ticker, e.date) == date_key && k != &key {
found_old = Some((k.clone(), e.clone()));
break;
}
}
if let Some((old_key, old_event)) = found_old {
if new.date.as_str() > today {
changes.push(CompanyEventChange {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "time".to_string(),
old_value: old_event.time.clone(),
new_value: new.time.clone(),
detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
});
}
existing.remove(&old_key);
}
existing.insert(key, new.clone());
}
ProcessResult { changes }
} }

58
src/data/companies.json Normal file
View File

@@ -0,0 +1,58 @@
[
{
"lei": "8I5D5ASD7N5Z5P2K9M3J",
"isins": ["US46625H1005"],
"primary_isin": "US46625H1005",
"name": "JPMorgan Chase & Co.",
"tickers": [
{ "ticker": "JPM", "exchange_mic": "XNYS", "currency": "USD", "primary": true },
{ "ticker": "JPM-PC", "exchange_mic": "XNYS", "currency": "USD", "primary": false }
]
},
{
"lei": "5493001KJTIIGC8Y1R12",
"isins": ["US5949181045"],
"primary_isin": "US5949181045",
"name": "Microsoft Corporation",
"tickers": [
{ "ticker": "MSFT", "exchange_mic": "XNAS", "currency": "USD", "primary": true }
]
},
{
"lei": "529900T8BM49AURSDO55",
"isins": ["CNE000001P37"],
"primary_isin": "CNE000001P37",
"name": "Industrial and Commercial Bank of China",
"tickers": [
{ "ticker": "601398.SS", "exchange_mic": "XSHG", "currency": "CNY", "primary": true },
{ "ticker": "1398.HK", "exchange_mic": "XHKG", "currency": "HKD", "primary": false }
]
},
{
"lei": "519900X5W8K6C1FZ3B57",
"isins": ["JP3702200000"],
"primary_isin": "JP3702200000",
"name": "Toyota Motor Corporation",
"tickers": [
{ "ticker": "7203.T", "exchange_mic": "XJPX", "currency": "JPY", "primary": true },
{ "ticker": "TM", "exchange_mic": "XNYS", "currency": "USD", "primary": false }
]
},
{
"lei": "529900T8BM49AURSDO56",
"isins": ["HK0000069689"],
"primary_isin": "HK0000069689",
"name": "Tencent Holdings Limited",
"tickers": [
{ "ticker": "0700.HK", "exchange_mic": "XHKG", "currency": "HKD", "primary": true },
{ "ticker": "TCEHY", "exchange_mic": "OTCM", "currency": "USD", "primary": false }
]
},
{
"lei": "8I5D5Q1L7N5Z5P2K9M3J",
"isins": ["US90953F1049"],
"primary_isin": "US90953F1049",
"name": "Test Bonds Filter",
"tickers": [{ "ticker": "JPM", "exchange_mic": "XNYS", "currency": "USD", "primary": true }]
}
]

260
src/data/exchanges.json Normal file
View File

@@ -0,0 +1,260 @@
{
"exchanges": [
{
"mic": "XNYS",
"name": "New York Stock Exchange",
"country": "United States",
"city": "New York City",
"market_cap_trillion_usd": 30.92,
"timezone": "America/New_York",
"tz_offset": "-05:00",
"dst": "MarNov",
"open_local": "09:30",
"close_local": "16:00",
"lunch_break": false,
"open_utc": "14:30",
"close_utc": "21:00",
"currency": "USD"
},
{
"mic": "XNAS",
"name": "Nasdaq",
"country": "United States",
"city": "New York City",
"market_cap_trillion_usd": 31.96,
"timezone": "America/New_York",
"tz_offset": "-05:00",
"dst": "MarNov",
"open_local": "09:30",
"close_local": "16:00",
"lunch_break": false,
"open_utc": "14:30",
"close_utc": "21:00",
"currency": "USD"
},
{
"mic": "XSHG",
"name": "Shanghai Stock Exchange",
"country": "China",
"city": "Shanghai",
"market_cap_trillion_usd": 7.96,
"timezone": "Asia/Shanghai",
"tz_offset": "+08:00",
"dst": null,
"open_local": "09:30",
"close_local": "15:00",
"lunch_break": "11:3013:00",
"open_utc": "01:30",
"close_utc": "07:00",
"currency": "CNY"
},
{
"mic": "XJPX",
"name": "Japan Exchange Group (Tokyo Stock Exchange)",
"country": "Japan",
"city": "Tokyo",
"market_cap_trillion_usd": 7.06,
"timezone": "Asia/Tokyo",
"tz_offset": "+09:00",
"dst": null,
"open_local": "09:00",
"close_local": "15:00",
"lunch_break": "11:3012:30",
"open_utc": "00:00",
"close_utc": "06:00",
"currency": "JPY"
},
{
"mic": "XHKG",
"name": "Hong Kong Stock Exchange",
"country": "Hong Kong",
"city": "Hong Kong",
"market_cap_trillion_usd": 6.41,
"timezone": "Asia/Hong_Kong",
"tz_offset": "+08:00",
"dst": null,
"open_local": "09:30",
"close_local": "16:00",
"lunch_break": "12:0013:00",
"open_utc": "01:30",
"close_utc": "08:00",
"currency": "HKD"
},
{
"mic": "XAMS",
"name": "Euronext Amsterdam",
"country": "Netherlands",
"city": "Amsterdam",
"market_cap_trillion_usd": 5.61,
"timezone": "Europe/Amsterdam",
"tz_offset": "+01:00",
"dst": "MarOct",
"open_local": "09:00",
"close_local": "17:30",
"lunch_break": false,
"open_utc": "08:00",
"close_utc": "16:30",
"currency": "EUR"
},
{
"mic": "XBSE",
"name": "Bombay Stock Exchange",
"country": "India",
"city": "Mumbai",
"market_cap_trillion_usd": 5.25,
"timezone": "Asia/Kolkata",
"tz_offset": "+05:30",
"dst": null,
"open_local": "09:15",
"close_local": "15:30",
"lunch_break": false,
"open_utc": "03:45",
"close_utc": "10:00",
"currency": "INR"
},
{
"mic": "XNSE",
"name": "National Stock Exchange of India",
"country": "India",
"city": "Mumbai",
"market_cap_trillion_usd": 5.32,
"timezone": "Asia/Kolkata",
"tz_offset": "+05:30",
"dst": null,
"open_local": "09:15",
"close_local": "15:d30",
"lunch_break": false,
"open_utc": "03:45",
"close_utc": "10:00",
"currency": "INR"
},
{
"mic": "XSHE",
"name": "Shenzhen Stock Exchange",
"country": "China",
"city": "Shenzhen",
"market_cap_trillion_usd": 5.11,
"timezone": "Asia/Shanghai",
"tz_offset": "+08:00",
"dst": null,
"open_local": "09:30",
"close_local": "15:00",
"lunch_break": "11:3013:00",
"open_utc": "01:30",
"close_utc": "07:00",
"currency": "CNY"
},
{
"mic": "XTSE",
"name": "Toronto Stock Exchange",
"country": "Canada",
"city": "Toronto",
"market_cap_trillion_usd": 4.00,
"timezone": "America/Toronto",
"tz_offset": "-05:00",
"dst": "MarNov",
"open_local": "09:30",
"close_local": "16:00",
"lunch_break": false,
"open_utc": "14:30",
"close_utc": "21:00",
"currency": "CAD"
},
{
"mic": "XLON",
"name": "London Stock Exchange",
"country": "United Kingdom",
"city": "London",
"market_cap_trillion_usd": 3.14,
"timezone": "Europe/London",
"tz_offset": "+00:00",
"dst": "MarOct",
"open_local": "08:00",
"close_local": "16:30",
"lunch_break": false,
"open_utc": "08:00",
"close_utc": "16:30",
"currency": "GBP"
},
{
"mic": "XTAI",
"name": "Taiwan Stock Exchange",
"country": "Taiwan",
"city": "Taipei",
"market_cap_trillion_usd": 2.87,
"timezone": "Asia/Taipei",
"tz_offset": "+08:00",
"dst": null,
"open_local": "09:00",
"close_local": "13:30",
"lunch_break": false,
"open_utc": "01:00",
"close_utc": "05:30",
"currency": "TWD"
},
{
"mic": "XSAU",
"name": "Saudi Exchange (Tadawul)",
"country": "Saudi Arabia",
"city": "Riyadh",
"market_cap_trillion_usd": 2.73,
"timezone": "Asia/Riyadh",
"tz_offset": "+03:00",
"dst": null,
"open_local": "10:00",
"close_local": "15:00",
"lunch_break": false,
"open_utc": "07:00",
"close_utc": "12:00",
"currency": "SAR"
},
{
"mic": "XFRA",
"name": "Deutsche Börse (Xetra)",
"country": "Germany",
"city": "Frankfurt",
"market_cap_trillion_usd": 2.04,
"timezone": "Europe/Berlin",
"tz_offset": "+01:00",
"dst": "MarOct",
"open_local": "09:00",
"close_local": "17:30",
"lunch_break": false,
"open_utc": "08:00",
"close_utc": "16:30",
"currency": "EUR"
},
{
"mic": "XSWX",
"name": "SIX Swiss Exchange",
"country": "Switzerland",
"city": "Zürich",
"market_cap_trillion_usd": 1.97,
"timezone": "Europe/Zurich",
"tz_offset": "+01:00",
"dst": "MarOct",
"open_local": "09:00",
"close_local": "17:30",
"lunch_break": false,
"open_utc": "08:00",
"close_utc": "16:30",
"currency": "CHF"
},
{
"mic": "XASX",
"name": "Australian Securities Exchange",
"country": "Australia",
"city": "Sydney",
"market_cap_trillion_usd": 1.89,
"timezone": "Australia/Sydney",
"tz_offset": "+10:00",
"dst": "OctApr",
"open_local": "10:00",
"close_local": "16:00",
"lunch_break": false,
"open_utc": "00:00",
"close_utc": "06:00",
"currency": "AUD"
}
]
}

6
src/data/index.txt Normal file
View File

@@ -0,0 +1,6 @@
data/*
companies.json
continents.json
countries.json
exchanges.json

View File

@@ -4,7 +4,8 @@ mod corporate;
mod config; mod config;
mod util; mod util;
use fantoccini::{ClientBuilder, Locator}; use fantoccini::{ClientBuilder};
use serde_json::{Map, Value};
use tokio::signal; use tokio::signal;
#[tokio::main] #[tokio::main]
@@ -17,11 +18,31 @@ async fn main() -> anyhow::Result<()> {
// === Start ChromeDriver === // === Start ChromeDriver ===
let mut child = std::process::Command::new("chromedriver-win64/chromedriver.exe") let mut child = std::process::Command::new("chromedriver-win64/chromedriver.exe")
.args(["--port=9515"]) .args(["--port=9515"]) // Level 3 = minimal logs
.spawn()?; .spawn()?;
let client = ClientBuilder::native() // Build capabilities to hide infobar + enable full rendering
.connect("http://localhost:9515") let port = 9515;
let caps_value = serde_json::json!({
"goog:chromeOptions": {
"args": [
//"--headless",
"--disable-gpu",
"--disable-notifications",
"--disable-popup-blocking",
"--disable-blink-features=AutomationControlled"
],
"excludeSwitches": ["enable-automation"]
}
});
let caps_map: Map<String, Value> = caps_value.as_object()
.expect("Capabilities should be a JSON object")
.clone();
let mut client = ClientBuilder::native()
.capabilities(caps_map)
.connect(&format!("http://localhost:{}", port))
.await?; .await?;
// Graceful shutdown // Graceful shutdown
@@ -39,8 +60,7 @@ async fn main() -> anyhow::Result<()> {
// === Corporate Earnings Update === // === Corporate Earnings Update ===
println!("\nUpdating Corporate Earnings"); println!("\nUpdating Corporate Earnings");
let tickers = config::get_tickers(); corporate::run_full_update(&client, &config).await?;
corporate::run_full_update(&client, tickers, &config).await?;
// === Cleanup === // === Cleanup ===
client.close().await?; client.close().await?;

View File

@@ -9,15 +9,14 @@ pub async fn ensure_data_dirs() -> anyhow::Result<()> {
"economic_event_changes", "economic_event_changes",
"corporate_events", "corporate_events",
"corporate_prices", "corporate_prices",
"data",
]; ];
for dir in dirs { for dir in dirs {
let path = Path::new(dir); let path = Path::new(dir);
if !path.exists() { if !path.exists() {
fs::create_dir_all(path).await?; tokio::fs::create_dir_all(path).await?;
println!("Created directory: {dir}"); println!("Created directory: {dir}");
} }
// else → silently continue
} }
Ok(()) Ok(())
} }