added lei to isin mapping

This commit is contained in:
2025-11-25 00:21:51 +01:00
parent bbc19f2110
commit e57a013224
10 changed files with 574 additions and 104 deletions

1
.gitignore vendored
View File

@@ -30,3 +30,4 @@ target/
/corporate_events*
/corporate_prices*
/corporate_event_changes*
/data*

273
Cargo.lock generated
View File

@@ -8,6 +8,17 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "aes"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
]
[[package]]
name = "ahash"
version = "0.7.8"
@@ -71,6 +82,15 @@ version = "1.0.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "arrayvec"
version = "0.7.6"
@@ -228,6 +248,15 @@ version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
[[package]]
name = "bzip2"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c"
dependencies = [
"libbz2-rs-sys",
]
[[package]]
name = "cc"
version = "1.2.46"
@@ -235,6 +264,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97463e1064cb1b1c1384ad0a0b9c8abd0988e2a91f52606c80ef14aadb63e36"
dependencies = [
"find-msvc-tools",
"jobserver",
"libc",
"shlex",
]
@@ -275,6 +306,16 @@ dependencies = [
"serde",
]
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
]
[[package]]
name = "compression-codecs"
version = "0.4.32"
@@ -293,6 +334,12 @@ version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a9b614a5787ef0c8802a55766480563cb3a93b435898c422ed2a359cf811582"
[[package]]
name = "constant_time_eq"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6"
[[package]]
name = "cookie"
version = "0.16.2"
@@ -378,6 +425,21 @@ dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc32fast"
version = "1.5.0"
@@ -445,12 +507,39 @@ dependencies = [
"syn 2.0.110",
]
[[package]]
name = "csv"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde_core",
]
[[package]]
name = "csv-core"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
dependencies = [
"memchr",
]
[[package]]
name = "data-encoding"
version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476"
[[package]]
name = "deflate64"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26bf8fc351c5ed29b5c2f0cbbac1b209b74f60ecd62e675a998df72c49af5204"
[[package]]
name = "deranged"
version = "0.5.5"
@@ -460,6 +549,17 @@ dependencies = [
"powerfmt",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
]
[[package]]
name = "derive_more"
version = "0.99.20"
@@ -479,6 +579,7 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
@@ -559,7 +660,9 @@ version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"csv",
"fantoccini",
"flate2",
"futures",
"rayon",
"reqwest",
@@ -570,6 +673,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"yfinance-rs",
"zip",
]
[[package]]
@@ -621,6 +725,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
dependencies = [
"crc32fast",
"libz-rs-sys",
"miniz_oxide",
]
@@ -854,6 +959,15 @@ version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "html5ever"
version = "0.27.0"
@@ -1205,6 +1319,15 @@ dependencies = [
"hashbrown 0.16.1",
]
[[package]]
name = "inout"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01"
dependencies = [
"generic-array",
]
[[package]]
name = "ipnet"
version = "2.11.0"
@@ -1262,6 +1385,16 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jobserver"
version = "0.1.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
dependencies = [
"getrandom 0.3.4",
"libc",
]
[[package]]
name = "js-sys"
version = "0.3.82"
@@ -1278,12 +1411,27 @@ version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libbz2-rs-sys"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
[[package]]
name = "libc"
version = "0.2.177"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
[[package]]
name = "libz-rs-sys"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "840db8cf39d9ec4dd794376f38acc40d0fc65eec2a8f484f7fd375b84602becd"
dependencies = [
"zlib-rs",
]
[[package]]
name = "linux-raw-sys"
version = "0.11.0"
@@ -1323,6 +1471,16 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "lzma-rust2"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c60a23ffb90d527e23192f1246b14746e2f7f071cb84476dd879071696c18a4a"
dependencies = [
"crc",
"sha2",
]
[[package]]
name = "mac"
version = "0.1.1"
@@ -1624,6 +1782,16 @@ dependencies = [
"windows-link",
]
[[package]]
name = "pbkdf2"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2"
dependencies = [
"digest",
"hmac",
]
[[package]]
name = "percent-encoding"
version = "2.3.2"
@@ -1781,6 +1949,12 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppmd-rust"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d558c559f0450f16f2a27a1f017ef38468c1090c9ce63c8e51366232d53717b4"
[[package]]
name = "ppv-lite86"
version = "0.2.21"
@@ -2126,6 +2300,7 @@ dependencies = [
"cookie 0.18.1",
"cookie_store",
"encoding_rs",
"futures-channel",
"futures-core",
"futures-util",
"h2",
@@ -2520,6 +2695,17 @@ dependencies = [
"digest",
]
[[package]]
name = "sha2"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
@@ -3645,6 +3831,20 @@ name = "zeroize"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
dependencies = [
"zeroize_derive",
]
[[package]]
name = "zeroize_derive"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.110",
]
[[package]]
name = "zerotrie"
@@ -3678,3 +3878,76 @@ dependencies = [
"quote",
"syn 2.0.110",
]
[[package]]
name = "zip"
version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b"
dependencies = [
"aes",
"arbitrary",
"bzip2",
"constant_time_eq",
"crc32fast",
"deflate64",
"flate2",
"getrandom 0.3.4",
"hmac",
"indexmap",
"lzma-rust2",
"memchr",
"pbkdf2",
"ppmd-rust",
"sha1",
"time",
"zeroize",
"zopfli",
"zstd",
]
[[package]]
name = "zlib-rs"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f06ae92f42f5e5c42443fd094f245eb656abf56dd7cce9b8b263236565e00f2"
[[package]]
name = "zopfli"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249"
dependencies = [
"bumpalo",
"crc32fast",
"log",
"simd-adler32",
]
[[package]]
name = "zstd"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.16+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
dependencies = [
"cc",
"pkg-config",
]

View File

@@ -17,7 +17,7 @@ categories = ["finance", "data-structures", "asynchronous"]
tokio = { version = "1.38", features = ["full"] }
# Web scraping & HTTP
reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate"] }
reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate", "blocking"] }
scraper = "0.19" # HTML parsing for Yahoo earnings pages
fantoccini = { version = "0.20", features = ["rustls-tls"] } # Headless Chrome for finanzen.net
yfinance-rs = "0.7.2"
@@ -25,6 +25,9 @@ yfinance-rs = "0.7.2"
# Serialization
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
csv = "1.3"
zip = "6.0.0"
flate2 = "1.1.5"
# Date & time
chrono = { version = "0.4", features = ["serde"] }

View File

@@ -16,8 +16,8 @@ struct DayData {
}
/// Aggregate price data from multiple exchanges, converting all to USD
pub async fn aggregate_best_price_data(isin: &str) -> anyhow::Result<()> {
let company_dir = get_company_dir(isin);
pub async fn aggregate_best_price_data(lei: &str) -> anyhow::Result<()> {
let company_dir = get_company_dir(lei);
for timeframe in ["daily", "5min"].iter() {
let source_dir = company_dir.join(timeframe);
@@ -136,7 +136,7 @@ pub async fn aggregate_best_price_data(isin: &str) -> anyhow::Result<()> {
.unwrap_or_else(|| "unknown".to_string());
aggregated.push(CompanyPrice {
ticker: format!("{}@agg", isin), // Mark as aggregated
ticker: format!("{lei}@agg"), // Mark as aggregated
date,
time,
open: data.open,
@@ -159,7 +159,7 @@ pub async fn aggregate_best_price_data(isin: &str) -> anyhow::Result<()> {
// Save aggregation metadata
let meta = AggregationMetadata {
isin: isin.to_string(),
lei: lei.to_string(), // ← CHANGE THIS
timeframe: timeframe.to_string(),
sources: sources_used.into_iter().collect(),
total_bars: aggregated.len(),
@@ -185,7 +185,7 @@ pub async fn aggregate_best_price_data(isin: &str) -> anyhow::Result<()> {
#[derive(Debug, serde::Serialize, serde::Deserialize)]
struct AggregationMetadata {
isin: String,
lei: String,
timeframe: String,
sources: Vec<String>,
total_bars: usize,

View File

@@ -58,3 +58,13 @@ pub fn price_key(p: &CompanyPrice) -> String {
format!("{}|{}|{}", p.ticker, p.date, p.time)
}
}
pub fn parse_float(s: &str) -> Option<f64> {
s.replace("--", "").replace(",", "").parse::<f64>().ok()
}
pub fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%B %d, %Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
}

View File

@@ -1,11 +1,16 @@
// src/corporate/scraper.rs
use super::types::{CompanyEvent, CompanyPrice, TickerInfo};
use super::{types::{CompanyEvent, CompanyPrice, TickerInfo}, helpers::*};
use csv::ReaderBuilder;
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration as TokioDuration};
use tokio::{time::{Duration as TokioDuration, sleep}};
use reqwest::Client as HttpClient;
use serde_json::Value;
use zip::ZipArchive;
use std::fs::File;
use std::{collections::HashMap};
use std::io::{Read, BufReader};
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
@@ -136,8 +141,8 @@ async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic,
currency,
primary: false, // Will be set separately
currency: currency.to_string(),
primary: false,
})
}
@@ -418,12 +423,170 @@ pub async fn fetch_price_history_5min(
Ok(prices)
}
fn parse_float(s: &str) -> Option<f64> {
s.replace("--", "").replace(",", "").parse::<f64>().ok()
/// Fetch the URL of the latest ISIN↔LEI mapping CSV from GLEIF
/// Overengineered; we could just use the static URL, but this shows how to scrape if needed
pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow::Result<String> {
let url = format!("https://www.gleif.org/de/lei-data/lei-mapping/download-isin-to-lei-relationship-files");
client.goto(&url).await?;
let html = client.source().await?;
let _document = Html::parse_document(&html);
let _row_sel = Selector::parse("table tbody tr").unwrap();
let isin_lei = "".to_string();
Ok(isin_lei)
}
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%B %d, %Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
let zip_path = "data/isin_lei.zip";
let csv_path = "data/isin_lei.csv";
if let Err(e) = std::fs::create_dir_all("data") {
println!("Failed to create data directory: {e}");
return Ok(None);
}
// Download ZIP
let bytes = match reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(std::time::Duration::from_secs(30))
.build()
.and_then(|c| Ok(c))
{
Ok(client) => match client.get(url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(b) => b,
Err(e) => {
println!("Failed to read ZIP bytes: {e}");
return Ok(None);
}
},
Ok(resp) => {
println!("Server returned HTTP {}", resp.status());
return Ok(None);
}
Err(e) => {
println!("Failed to download ISIN/LEI ZIP: {e}");
return Ok(None);
}
},
Err(e) => {
println!("Failed to create HTTP client: {e}");
return Ok(None);
}
};
if let Err(e) = tokio::fs::write(zip_path, &bytes).await {
println!("Failed to write ZIP file: {e}");
return Ok(None);
}
// Extract CSV
let archive = match std::fs::File::open(zip_path)
.map(ZipArchive::new)
{
Ok(Ok(a)) => a,
Ok(Err(e)) => {
println!("Invalid ZIP: {e}");
return Ok(None);
}
Err(e) => {
println!("Cannot open ZIP file: {e}");
return Ok(None);
}
};
let mut archive = archive;
let idx = match (0..archive.len()).find(|&i| {
archive.by_index(i)
.map(|f| f.name().ends_with(".csv"))
.unwrap_or(false)
}) {
Some(i) => i,
None => {
println!("ZIP did not contain a CSV file");
return Ok(None);
}
};
let mut csv_file = match archive.by_index(idx) {
Ok(f) => f,
Err(e) => {
println!("Failed to read CSV entry: {e}");
return Ok(None);
}
};
let mut csv_bytes = Vec::new();
if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
println!("Failed to extract CSV: {e}");
return Ok(None);
}
if let Err(e) = tokio::fs::write(csv_path, &csv_bytes).await {
println!("Failed to save CSV file: {e}");
return Ok(None);
}
Ok(Some(csv_path.to_string()))
}
pub fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
let rt = tokio::runtime::Runtime::new();
let Some(path) =
(match rt {
Ok(rt) => match rt.block_on(download_isin_lei_csv()) {
Ok(Some(p)) => Some(p),
Ok(None) => {
println!("ISIN/LEI download failed; continuing with empty map");
None
}
Err(e) => {
println!("Runtime download error: {e}");
None
}
},
Err(e) => {
println!("Failed to create Tokio runtime: {e}");
None
}
}
) else {
return Ok(HashMap::new());
};
let file = match File::open(&path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {e}", path);
return Ok(HashMap::new());
}
};
let mut rdr = ReaderBuilder::new().from_reader(BufReader::new(file));
let mut map: HashMap<String, Vec<String>> = HashMap::new();
for row in rdr.records() {
let rec = match row {
Ok(r) => r,
Err(e) => {
println!("CSV parse error: {e}");
continue;
}
};
if rec.len() < 2 {
continue;
}
let lei = rec[0].to_string();
let isin = rec[1].to_string();
map.entry(lei).or_default().push(isin);
}
Ok(map)
}

View File

@@ -111,8 +111,8 @@ pub async fn load_companies() -> Result<Vec<CompanyMetadata>, anyhow::Error> {
Ok(companies)
}
pub fn get_company_dir(isin: &str) -> PathBuf {
PathBuf::from("corporate_prices").join(isin)
pub fn get_company_dir(lei: &str) -> PathBuf {
PathBuf::from("corporate_prices").join(lei)
}
pub async fn ensure_company_dirs(isin: &str) -> anyhow::Result<()> {
@@ -131,13 +131,19 @@ pub async fn ensure_company_dirs(isin: &str) -> anyhow::Result<()> {
}
pub async fn save_company_metadata(company: &CompanyMetadata) -> anyhow::Result<()> {
let dir = get_company_dir(&company.isin);
let dir = get_company_dir(&company.lei);
fs::create_dir_all(&dir).await?;
let path = dir.join("metadata.json");
fs::write(path, serde_json::to_string_pretty(company)?).await?;
fs::write(&path, serde_json::to_string_pretty(company)?).await?;
Ok(())
}
pub async fn load_company_metadata(lei: &str) -> anyhow::Result<CompanyMetadata> {
let path = get_company_dir(lei).join("metadata.json");
let content = fs::read_to_string(path).await?;
Ok(serde_json::from_str(&content)?)
}
pub async fn save_available_exchanges(isin: &str, exchanges: Vec<AvailableExchange>) -> anyhow::Result<()> {
let dir = get_company_dir(isin);
fs::create_dir_all(&dir).await?;
@@ -146,8 +152,8 @@ pub async fn save_available_exchanges(isin: &str, exchanges: Vec<AvailableExchan
Ok(())
}
pub async fn load_available_exchanges(isin: &str) -> anyhow::Result<Vec<AvailableExchange>> {
let path = get_company_dir(isin).join("available_exchanges.json");
pub async fn load_available_exchanges(lei: &str) -> anyhow::Result<Vec<AvailableExchange>> {
let path = get_company_dir(lei).join("available_exchanges.json");
if path.exists() {
let content = fs::read_to_string(&path).await?;
Ok(serde_json::from_str(&content)?)
@@ -157,19 +163,17 @@ pub async fn load_available_exchanges(isin: &str) -> anyhow::Result<Vec<Availabl
}
pub async fn save_prices_by_source(
isin: &str,
lei: &str,
source_ticker: &str,
timeframe: &str,
prices: Vec<CompanyPrice>,
) -> anyhow::Result<()> {
let source_safe = source_ticker.replace(".", "_").replace("/", "_");
let dir = get_company_dir(isin).join(timeframe).join(&source_safe);
let dir = get_company_dir(lei).join(timeframe).join(&source_safe);
fs::create_dir_all(&dir).await?;
let path = dir.join("prices.json");
let mut prices = prices;
prices.sort_by_key(|p| (p.date.clone(), p.time.clone()));
fs::write(&path, serde_json::to_string_pretty(&prices)?).await?;
Ok(())
}

View File

@@ -49,8 +49,10 @@ pub struct TickerInfo {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompanyMetadata {
pub isin: String,
pub lei: String, // e.g. "5493000J2N45DDNE4Y28"
pub name: String,
pub isins: Vec<String>, // All ISINs belonging to this legal entity (primary + ADR + GDR)
pub primary_isin: String, // The most liquid / preferred one (used for folder fallback)
pub tickers: Vec<TickerInfo>,
}

View File

@@ -6,122 +6,137 @@ use chrono::Local;
use std::collections::HashMap;
pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> anyhow::Result<()> {
println!("Starting company-centric corporate update (ISIN-based)");
println!("Starting LEI-based corporate update");
// 1. Download fresh GLEIF ISIN↔LEI mapping on every run
let lei_to_isins: HashMap<String, Vec<String>> = match load_isin_lei_csv() {
Ok(map) => map,
Err(e) => {
println!("Warning: Failed to load ISIN↔LEI mapping: {}", e);
HashMap::new()
}
};
//let _isin_to_lei = load_isin_to_lei()?; // optional, useful for migration scripts
let companies = load_companies().await?;
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
let mut existing_events = load_existing_events().await?;
for company in companies {
println!("\nProcessing company: {} ({})", company.name, company.isin);
let companies = load_companies().await?; // Vec<CompanyMetadata> with lei, isins, tickers
ensure_company_dirs(&company.isin).await?;
for mut company in companies {
println!("\nProcessing company: {} (LEI: {})", company.name, company.lei);
// === Enrich with ALL ISINs known to GLEIF (includes ADRs, GDRs, etc.) ===
if let Some(all_isins) = lei_to_isins.get(&company.lei) {
let mut seen = company.isins.iter().cloned().collect::<std::collections::HashSet<_>>();
for isin in all_isins {
if !seen.contains(isin) {
company.isins.push(isin.clone());
seen.insert(isin.clone());
}
}
}
// Ensure company directory exists (now uses LEI)
ensure_company_dirs(&company.lei).await?;
save_company_metadata(&company).await?;
// === STEP 1: Discover all available exchanges ===
// === STEP 1: Discover additional exchanges using each known ISIN ===
let mut all_tickers = company.tickers.clone();
// Try to discover additional exchanges using the primary ticker
if let Some(primary_ticker) = company.tickers.iter().find(|t| t.primary) {
println!(" 🔍 Discovering additional exchanges...");
match discover_available_exchanges(&company.isin, &primary_ticker.ticker).await {
println!(" Discovering additional exchanges across {} ISIN(s)...", company.isins.len());
for isin in &company.isins {
println!(" → Checking ISIN: {}", isin);
match discover_available_exchanges(isin, &primary_ticker.ticker).await {
Ok(discovered) => {
// Merge discovered tickers with existing ones
if discovered.is_empty() {
println!(" No new exchanges found for {}", isin);
} else {
for disc in discovered {
if !all_tickers.iter().any(|t| t.ticker == disc.ticker) {
println!(" Found new exchange: {} ({})", disc.ticker, disc.exchange_mic);
if !all_tickers.iter().any(|t| t.ticker == disc.ticker && t.exchange_mic == disc.exchange_mic) {
println!(" Found new listing: {} ({}) [ISIN: {}]", disc.ticker, disc.exchange_mic, isin);
all_tickers.push(disc);
}
}
}
Err(e) => println!(" ⚠ Discovery failed: {}", e),
}
Err(e) => println!(" Discovery failed for {}: {}", isin, e),
}
tokio::time::sleep(tokio::time::Duration::from_millis(300)).await;
}
}
// Update metadata with newly discovered tickers
// Save updated metadata if we found new listings
if all_tickers.len() > company.tickers.len() {
let updated_company = CompanyMetadata {
isin: company.isin.clone(),
name: company.name.clone(),
tickers: all_tickers.clone(),
};
save_company_metadata(&updated_company).await?;
println!(" 📝 Updated metadata with {} total tickers", all_tickers.len());
company.tickers = all_tickers.clone();
save_company_metadata(&company).await?;
println!(" Updated metadata: {} total tickers", all_tickers.len());
}
// === STEP 2: Fetch data from all available exchanges ===
// === STEP 2: Fetch data from ALL available tickers ===
for ticker_info in &all_tickers {
let ticker = &ticker_info.ticker;
println!("Trying ticker: {} ({})", ticker, ticker_info.exchange_mic);
println!("Fetching: {} ({})", ticker, ticker_info.exchange_mic);
let mut daily_success = false;
let mut intraday_success = false;
// Earnings (only from primary ticker to avoid duplicates)
// Earnings: only fetch from primary ticker to avoid duplicates
if ticker_info.primary {
if let Ok(new_events) = fetch_earnings_history(client, ticker).await {
let result = process_batch(&new_events, &mut existing_events, &today);
save_changes(&result.changes).await?;
println!(" {} earnings events", new_events.len());
println!(" Earnings events: {}", new_events.len());
}
}
// Daily prices
match fetch_daily_price_history(ticker, &config.corporate_start_date, &today).await {
Ok(prices) => {
if let Ok(prices) = fetch_daily_price_history(ticker, &config.corporate_start_date, &today).await {
if !prices.is_empty() {
save_prices_by_source(&company.isin, ticker, "daily", prices.clone()).await?;
save_prices_by_source(&company.lei, ticker, "daily", prices).await?;
daily_success = true;
println!(" ✓ Saved {} daily bars ({} currency)",
prices.len(),
prices.first().map(|p| p.currency.as_str()).unwrap_or("?")
);
}
}
Err(e) => println!(" ✗ Daily fetch failed: {}", e),
}
// 5-minute prices (last 60 days)
// 5-minute intraday (last 60 days)
let sixty_days_ago = (chrono::Local::now() - chrono::Duration::days(60))
.format("%Y-%m-%d")
.to_string();
match fetch_price_history_5min(ticker, &sixty_days_ago, &today).await {
Ok(prices) => {
if let Ok(prices) = fetch_price_history_5min(ticker, &sixty_days_ago, &today).await {
if !prices.is_empty() {
save_prices_by_source(&company.isin, ticker, "5min", prices.clone()).await?;
save_prices_by_source(&company.lei, ticker, "5min", prices).await?;
intraday_success = true;
println!(" ✓ Saved {} 5min bars", prices.len());
}
}
Err(e) => println!(" ✗ 5min fetch failed: {}", e),
}
// Record success in available_exchanges.json
if daily_success || intraday_success {
// Update available_exchanges.json (now under LEI folder)
update_available_exchange(
&company.isin,
&company.lei,
ticker,
&ticker_info.exchange_mic,
daily_success,
intraday_success,
).await?;
}
tokio::time::sleep(tokio::time::Duration::from_millis(800)).await;
}
// === STEP 3: Aggregate prices from all sources ===
println!(" 📊 Aggregating multi-exchange data with FX conversion...");
match aggregate_best_price_data(&company.isin).await {
Ok(_) => println!(" Aggregation complete"),
Err(e) => println!(" ⚠ Aggregation warning: {}", e),
// === STEP 3: Aggregate all sources into unified USD prices ===
println!(" Aggregating multi-source price data (FX-adjusted)...");
if let Err(e) = aggregate_best_price_data(&company.lei).await {
println!(" Aggregation failed: {}", e);
} else {
println!(" Aggregation complete");
}
}
// Final save of optimized earnings events
save_optimized_events(existing_events).await?;
println!("\nCorporate update complete (ISIN-based)");
println!("\nCorporate update complete (LEI-based)");
Ok(())
}

View File

@@ -9,15 +9,14 @@ pub async fn ensure_data_dirs() -> anyhow::Result<()> {
"economic_event_changes",
"corporate_events",
"corporate_prices",
"data",
];
for dir in dirs {
let path = Path::new(dir);
if !path.exists() {
fs::create_dir_all(path).await?;
tokio::fs::create_dir_all(path).await?;
println!("Created directory: {dir}");
}
// else → silently continue
}
Ok(())
}