moved data capturing into cache folder
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
// src/corporate/scraper.rs
|
||||
use super::{types::*, helpers::*, openfigi::*};
|
||||
//use crate::corporate::openfigi::OpenFigiClient;
|
||||
use crate::{webdriver::webdriver::*};
|
||||
use crate::{webdriver::webdriver::*, util::directories::DataPaths, util::logger};
|
||||
use fantoccini::{Client, Locator};
|
||||
use scraper::{Html, Selector};
|
||||
use chrono::{DateTime, Duration, NaiveDate, Utc};
|
||||
@@ -490,11 +490,19 @@ pub async fn _fetch_latest_gleif_isin_lei_mapping_url(client: &Client) -> anyhow
|
||||
pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
let url = "https://mapping.gleif.org/api/v2/isin-lei/9315e3e3-305a-4e71-b062-46714740fa8d/download";
|
||||
|
||||
if let Err(e) = std::fs::create_dir_all("data/gleif") {
|
||||
println!("Failed to create data directory: {e}");
|
||||
// Initialize DataPaths and create cache/gleif directory
|
||||
let paths = DataPaths::new(".")?;
|
||||
let gleif_cache_dir = paths.cache_gleif_dir();
|
||||
|
||||
if let Err(e) = std::fs::create_dir_all(&gleif_cache_dir) {
|
||||
let msg = format!("Failed to create cache/gleif directory: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
logger::log_info("Corporate Scraper: Downloading ISIN/LEI mapping from GLEIF...").await;
|
||||
|
||||
// Download ZIP and get the filename from Content-Disposition header
|
||||
let client = match reqwest::Client::builder()
|
||||
.user_agent(USER_AGENT)
|
||||
@@ -503,7 +511,9 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
{
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
println!("Failed to create HTTP client: {e}");
|
||||
let msg = format!("Failed to create HTTP client: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
@@ -511,11 +521,15 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
let resp = match client.get(url).send().await {
|
||||
Ok(r) if r.status().is_success() => r,
|
||||
Ok(resp) => {
|
||||
println!("Server returned HTTP {}", resp.status());
|
||||
let msg = format!("Server returned HTTP {}", resp.status());
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Failed to download ISIN/LEI ZIP: {e}");
|
||||
let msg = format!("Failed to download ISIN/LEI ZIP: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
@@ -528,21 +542,30 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
.and_then(|s| s.split("filename=").nth(1).map(|f| f.trim_matches('"').to_string()))
|
||||
.unwrap_or_else(|| "isin_lei.zip".to_string());
|
||||
|
||||
// Parse timestamp from filename and convert to DDMMYYYY format
|
||||
let parsed_filename = parse_gleif_filename(&filename);
|
||||
logger::log_info(&format!("Corporate Scraper: Downloaded file: {} -> {}", filename, parsed_filename)).await;
|
||||
|
||||
let bytes = match resp.bytes().await {
|
||||
Ok(b) => b,
|
||||
Err(e) => {
|
||||
println!("Failed to read ZIP bytes: {e}");
|
||||
let msg = format!("Failed to read ZIP bytes: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
let zip_path = format!("data/gleif/{}", filename);
|
||||
let csv_path = format!("data/gleif/{}", filename.replace(".zip", ".csv"));
|
||||
let zip_path = gleif_cache_dir.join(&parsed_filename);
|
||||
let csv_path = gleif_cache_dir.join(parsed_filename.replace(".zip", ".csv"));
|
||||
|
||||
if let Err(e) = tokio::fs::write(&zip_path, &bytes).await {
|
||||
println!("Failed to write ZIP file: {e}");
|
||||
let msg = format!("Failed to write ZIP file: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
logger::log_info(&format!("Corporate Scraper: Saved ZIP to {:?}", zip_path)).await;
|
||||
|
||||
// Extract CSV
|
||||
let archive = match std::fs::File::open(&zip_path)
|
||||
@@ -550,11 +573,15 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
{
|
||||
Ok(Ok(a)) => a,
|
||||
Ok(Err(e)) => {
|
||||
println!("Invalid ZIP: {e}");
|
||||
let msg = format!("Invalid ZIP: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Cannot open ZIP file: {e}");
|
||||
let msg = format!("Cannot open ZIP file: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
@@ -568,7 +595,9 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
}) {
|
||||
Some(i) => i,
|
||||
None => {
|
||||
println!("ZIP did not contain a CSV file");
|
||||
let msg = "ZIP did not contain a CSV file";
|
||||
logger::log_error(msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
@@ -576,23 +605,58 @@ pub async fn download_isin_lei_csv() -> anyhow::Result<Option<String>> {
|
||||
let mut csv_file = match archive.by_index(idx) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
println!("Failed to read CSV entry: {e}");
|
||||
let msg = format!("Failed to read CSV entry: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
};
|
||||
|
||||
let mut csv_bytes = Vec::new();
|
||||
if let Err(e) = csv_file.read_to_end(&mut csv_bytes) {
|
||||
println!("Failed to extract CSV: {e}");
|
||||
let msg = format!("Failed to extract CSV: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
if let Err(e) = tokio::fs::write(&csv_path, &csv_bytes).await {
|
||||
println!("Failed to save CSV file: {e}");
|
||||
let msg = format!("Failed to save CSV file: {}", e);
|
||||
logger::log_error(&msg).await;
|
||||
println!("{}", msg);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Ok(Some(csv_path))
|
||||
let msg = format!("✓ ISIN/LEI CSV extracted: {:?}", csv_path);
|
||||
println!("{}", msg);
|
||||
logger::log_info(&msg).await;
|
||||
|
||||
Ok(Some(csv_path.to_string_lossy().to_string()))
|
||||
}
|
||||
|
||||
/// Parse GLEIF filename and convert timestamp to DDMMYYYY format
|
||||
/// Example: "isin-lei-20251124T080254.csv" -> "isin-lei-24112025.csv"
|
||||
fn parse_gleif_filename(filename: &str) -> String {
|
||||
// Try to find pattern: isin-lei-YYYYMMDDTHHMMSS.zip/csv
|
||||
if let Some(start_idx) = filename.find("isin-lei-") {
|
||||
let rest = &filename[start_idx + 9..]; // After "isin-lei-"
|
||||
|
||||
// Extract the 8 digits (YYYYMMDD)
|
||||
if rest.len() >= 8 && rest[0..8].chars().all(|c| c.is_numeric()) {
|
||||
let date_part = &rest[0..8];
|
||||
// date_part is YYYYMMDD, convert to DDMMYYYY
|
||||
if date_part.len() == 8 {
|
||||
let year = &date_part[0..4];
|
||||
let month = &date_part[4..6];
|
||||
let day = &date_part[6..8];
|
||||
let extension = if filename.ends_with(".zip") { ".zip" } else { ".csv" };
|
||||
return format!("isin-lei-{}{}{}{}", day, month, year, extension);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: return original filename if parsing fails
|
||||
filename.to_string()
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user