added lei to isin mapping

This commit is contained in:
2025-11-25 00:21:51 +01:00
parent bbc19f2110
commit e57a013224
10 changed files with 574 additions and 104 deletions

View File

@@ -1,11 +1,16 @@
// src/corporate/scraper.rs
use super::types::{CompanyEvent, CompanyPrice, TickerInfo};
use super::{types::{CompanyEvent, CompanyPrice, TickerInfo}, helpers::*};
use csv::ReaderBuilder;
use fantoccini::{Client, Locator};
use scraper::{Html, Selector};
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
use tokio::time::{sleep, Duration as TokioDuration};
use tokio::{time::{Duration as TokioDuration, sleep}};
use reqwest::Client as HttpClient;
use serde_json::Value;
use zip::ZipArchive;
use std::fs::File;
use std::{collections::HashMap};
use std::io::{Read, BufReader};
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
@@ -136,8 +141,8 @@ async fn check_ticker_exists(ticker: &str) -> anyhow::Result<TickerInfo> {
Ok(TickerInfo {
ticker: ticker.to_string(),
exchange_mic,
currency,
primary: false, // Will be set separately
currency: currency.to_string(),
primary: false,
})
}
@@ -418,12 +423,170 @@ pub async fn fetch_price_history_5min(
Ok(prices)
}
fn parse_float(s: &str) -> Option<f64> {
s.replace("--", "").replace(",", "").parse::<f64>().ok()
/// Fetch the URL of the latest ISIN↔LEI mapping CSV from GLEIF
/// Overengineered; we could just use the static URL, but this shows how to scrape if needed
pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow::Result<String> {
let url = format!("https://www.gleif.org/de/lei-data/lei-mapping/download-isin-to-lei-relationship-files");
client.goto(&url).await?;
let html = client.source().await?;
let _document = Html::parse_document(&html);
let _row_sel = Selector::parse("table tbody tr").unwrap();
let isin_lei = "".to_string();
Ok(isin_lei)
}
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%B %d, %Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
let zip_path = "data/isin_lei.zip";
let csv_path = "data/isin_lei.csv";
if let Err(e) = std::fs::create_dir_all("data") {
println!("Failed to create data directory: {e}");
return Ok(None);
}
// Download ZIP
let bytes = match reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(std::time::Duration::from_secs(30))
.build()
.and_then(|c| Ok(c))
{
Ok(client) => match client.get(url).send().await {
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
Ok(b) => b,
Err(e) => {
println!("Failed to read ZIP bytes: {e}");
return Ok(None);
}
},
Ok(resp) => {
println!("Server returned HTTP {}", resp.status());
return Ok(None);
}
Err(e) => {
println!("Failed to download ISIN/LEI ZIP: {e}");
return Ok(None);
}
},
Err(e) => {
println!("Failed to create HTTP client: {e}");
return Ok(None);
}
};
if let Err(e) = tokio::fs::write(zip_path, &bytes).await {
println!("Failed to write ZIP file: {e}");
return Ok(None);
}
// Extract CSV
let archive = match std::fs::File::open(zip_path)
.map(ZipArchive::new)
{
Ok(Ok(a)) => a,
Ok(Err(e)) => {
println!("Invalid ZIP: {e}");
return Ok(None);
}
Err(e) => {
println!("Cannot open ZIP file: {e}");
return Ok(None);
}
};
let mut archive = archive;
let idx = match (0..archive.len()).find(|&i| {
archive.by_index(i)
.map(|f| f.name().ends_with(".csv"))
.unwrap_or(false)
}) {
Some(i) => i,
None => {
println!("ZIP did not contain a CSV file");
return Ok(None);
}
};
let mut csv_file = match archive.by_index(idx) {
Ok(f) => f,
Err(e) => {
println!("Failed to read CSV entry: {e}");
return Ok(None);
}
};
let mut csv_bytes = Vec::new();
if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
println!("Failed to extract CSV: {e}");
return Ok(None);
}
if let Err(e) = tokio::fs::write(csv_path, &csv_bytes).await {
println!("Failed to save CSV file: {e}");
return Ok(None);
}
Ok(Some(csv_path.to_string()))
}
pub fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>> {
let rt = tokio::runtime::Runtime::new();
let Some(path) =
(match rt {
Ok(rt) => match rt.block_on(download_isin_lei_csv()) {
Ok(Some(p)) => Some(p),
Ok(None) => {
println!("ISIN/LEI download failed; continuing with empty map");
None
}
Err(e) => {
println!("Runtime download error: {e}");
None
}
},
Err(e) => {
println!("Failed to create Tokio runtime: {e}");
None
}
}
) else {
return Ok(HashMap::new());
};
let file = match File::open(&path) {
Ok(f) => f,
Err(e) => {
println!("Cannot open CSV '{}': {e}", path);
return Ok(HashMap::new());
}
};
let mut rdr = ReaderBuilder::new().from_reader(BufReader::new(file));
let mut map: HashMap<String, Vec<String>> = HashMap::new();
for row in rdr.records() {
let rec = match row {
Ok(r) => r,
Err(e) => {
println!("CSV parse error: {e}");
continue;
}
};
if rec.len() < 2 {
continue;
}
let lei = rec[0].to_string();
let isin = rec[1].to_string();
map.entry(lei).or_default().push(isin);
}
Ok(map)
}