added yahoo exchange extraction
This commit is contained in:
@@ -2,7 +2,9 @@
|
||||
pub mod types;
|
||||
pub mod scraper;
|
||||
pub mod storage;
|
||||
pub mod update;
|
||||
pub mod helpers;
|
||||
|
||||
pub mod update;
|
||||
pub mod update_forex;
|
||||
|
||||
pub use update::run_full_update;
|
||||
493
src/economic/update_forex.rs
Normal file
493
src/economic/update_forex.rs
Normal file
@@ -0,0 +1,493 @@
|
||||
// src/forex/update_rates.rs
|
||||
use crate::config::Config;
|
||||
use crate::util::directories::DataPaths;
|
||||
use crate::util::logger;
|
||||
use crate::scraper::yahoo::{YahooClientPool, ChartData};
|
||||
|
||||
use std::result::Result::Ok;
|
||||
use chrono::{TimeZone, Utc};
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use tokio::fs::{OpenOptions};
|
||||
use tokio::io::{AsyncWriteExt};
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use serde_json::json;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// Currency information
|
||||
#[derive(Debug, Clone)]
|
||||
struct CurrencyPair {
|
||||
code: String, // e.g., "EUR", "JPY"
|
||||
name: String, // e.g., "Euro", "Japanese Yen"
|
||||
yahoo_symbol: String, // e.g., "USDEUR=X", "USDJPY=X"
|
||||
}
|
||||
|
||||
impl CurrencyPair {
|
||||
fn new(code: &str, name: &str) -> Self {
|
||||
Self {
|
||||
code: code.to_string(),
|
||||
name: name.to_string(),
|
||||
yahoo_symbol: format!("USD{}=X", code),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get list of currency pairs to fetch (USD as base currency)
|
||||
fn get_currency_pairs() -> Vec<CurrencyPair> {
|
||||
vec![
|
||||
CurrencyPair::new("EUR", "Euro"),
|
||||
CurrencyPair::new("TRY", "Turkish Lira"),
|
||||
CurrencyPair::new("CHF", "Swiss Franc"),
|
||||
CurrencyPair::new("SEK", "Swedish Krona"),
|
||||
CurrencyPair::new("TWD", "New Taiwan Dollar"),
|
||||
CurrencyPair::new("AUD", "Australian Dollar"),
|
||||
CurrencyPair::new("GBP", "British Pound"), // Fixed: GBp -> GBP
|
||||
CurrencyPair::new("NOK", "Norwegian Krone"),
|
||||
CurrencyPair::new("CAD", "Canadian Dollar"),
|
||||
CurrencyPair::new("CZK", "Czech Koruna"),
|
||||
CurrencyPair::new("SGD", "Singapore Dollar"),
|
||||
CurrencyPair::new("ISK", "Icelandic Króna"),
|
||||
CurrencyPair::new("ZAR", "South African Rand"), // Fixed: ZAc -> ZAR
|
||||
CurrencyPair::new("JPY", "Japanese Yen"),
|
||||
CurrencyPair::new("PLN", "Polish Złoty"),
|
||||
CurrencyPair::new("DKK", "Danish Krone"),
|
||||
CurrencyPair::new("HKD", "Hong Kong Dollar"),
|
||||
CurrencyPair::new("ILS", "Israeli Shekel"), // Fixed: ILA -> ILS
|
||||
CurrencyPair::new("RON", "Romanian Leu"),
|
||||
CurrencyPair::new("KWD", "Kuwaiti Dinar"), // Fixed: KWF -> KWD
|
||||
]
|
||||
}
|
||||
|
||||
/// Yahoo Collect Foreign Exchange Charts WITH ABORT-SAFE INCREMENTAL PERSISTENCE
|
||||
///
|
||||
/// # Features
|
||||
/// - Graceful shutdown (abort-safe)
|
||||
/// - Task panic isolation (tasks fail independently)
|
||||
/// - Crash-safe persistence (checkpoint + log with fsync)
|
||||
/// - Smart skip logic (only process incomplete data)
|
||||
/// - Uses pending queue instead of retry mechanism
|
||||
///
|
||||
/// # Persistence Strategy
|
||||
/// - Checkpoint: fx_rates_collected.jsonl (atomic state)
|
||||
/// - Log: fx_rates_updates.log (append-only updates)
|
||||
/// - On restart: Load checkpoint + replay log
|
||||
/// - Periodic checkpoints (every 10 currencies)
|
||||
/// - Batched fsync (every 5 writes or 10 seconds)
|
||||
pub async fn collect_fx_rates(
|
||||
paths: &DataPaths,
|
||||
_config: &Config,
|
||||
yahoo_pool: Arc<YahooClientPool>,
|
||||
shutdown_flag: &Arc<AtomicBool>,
|
||||
) -> anyhow::Result<usize> {
|
||||
// Configuration constants
|
||||
const CHECKPOINT_INTERVAL: usize = 10;
|
||||
const FSYNC_BATCH_SIZE: usize = 5;
|
||||
const FSYNC_INTERVAL_SECS: u64 = 10;
|
||||
const CONCURRENCY_LIMIT: usize = 10; // Limit parallel fetch tasks
|
||||
|
||||
let data_path = paths.data_dir();
|
||||
|
||||
// File paths
|
||||
let checkpoint_path = data_path.join("fx_rates_collected.jsonl");
|
||||
let log_path = data_path.join("fx_rates_updates.log");
|
||||
let state_path = data_path.join("state.jsonl");
|
||||
|
||||
// Check if already completed (check state file)
|
||||
if state_path.exists() {
|
||||
let state_content = tokio::fs::read_to_string(&state_path).await?;
|
||||
|
||||
for line in state_content.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Ok(state) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if state.get("fx_rates_collection_complete").and_then(|v| v.as_bool()).unwrap_or(false) {
|
||||
logger::log_info(" FX rates collection already completed").await;
|
||||
|
||||
// Count collected currencies
|
||||
let count = count_collected_currencies(paths).await?;
|
||||
logger::log_info(&format!(" ✓ Found {} currencies with chart data", count)).await;
|
||||
return Ok(count);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// === RECOVERY PHASE: Track collected currencies ===
|
||||
let mut collected_currencies: HashSet<String> = HashSet::new();
|
||||
|
||||
if log_path.exists() {
|
||||
logger::log_info("Loading FX rates collection progress from log...").await;
|
||||
let log_content = tokio::fs::read_to_string(&log_path).await?;
|
||||
|
||||
for line in log_content.lines() {
|
||||
if line.trim().is_empty() || !line.ends_with('}') {
|
||||
continue; // Skip incomplete lines
|
||||
}
|
||||
|
||||
match serde_json::from_str::<serde_json::Value>(line) {
|
||||
Ok(entry) => {
|
||||
if let Some(code) = entry.get("currency_code").and_then(|v| v.as_str()) {
|
||||
if entry.get("status").and_then(|v| v.as_str()) == Some("collected") {
|
||||
collected_currencies.insert(code.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
logger::log_info(&format!("Loaded {} collected currencies from log", collected_currencies.len())).await;
|
||||
}
|
||||
|
||||
// Get all currency pairs
|
||||
let currency_pairs = get_currency_pairs();
|
||||
let total_currencies = currency_pairs.len();
|
||||
logger::log_info(&format!("Found {} currency pairs to collect", total_currencies)).await;
|
||||
|
||||
// Filter currencies that need collection
|
||||
let pending_pairs: Vec<CurrencyPair> = currency_pairs
|
||||
.into_iter()
|
||||
.filter(|pair| !collected_currencies.contains(&pair.code))
|
||||
.collect();
|
||||
|
||||
let pending_count = pending_pairs.len();
|
||||
logger::log_info(&format!(
|
||||
" {} already collected, {} pending",
|
||||
collected_currencies.len(),
|
||||
pending_count
|
||||
)).await;
|
||||
|
||||
if pending_count == 0 {
|
||||
logger::log_info(" ✓ All currencies already collected").await;
|
||||
mark_collection_complete(&state_path).await?;
|
||||
return Ok(collected_currencies.len());
|
||||
}
|
||||
|
||||
// === PROCESSING PHASE: Collect FX rates ===
|
||||
|
||||
// Shared counters
|
||||
let processed_count = Arc::new(AtomicUsize::new(collected_currencies.len()));
|
||||
let success_count = Arc::new(AtomicUsize::new(collected_currencies.len()));
|
||||
let failed_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
// Log writer channel with batching and fsync
|
||||
let (log_tx, mut log_rx) = mpsc::channel::<LogCommand>(1000);
|
||||
|
||||
// Spawn log writer task
|
||||
let log_writer_handle = {
|
||||
let log_path = log_path.clone();
|
||||
let processed_count = Arc::clone(&processed_count);
|
||||
let total_currencies = total_currencies;
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut log_file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log_path)
|
||||
.await
|
||||
.expect("Failed to open log file");
|
||||
|
||||
let mut write_count = 0;
|
||||
let mut last_fsync = tokio::time::Instant::now();
|
||||
|
||||
while let Some(cmd) = log_rx.recv().await {
|
||||
match cmd {
|
||||
LogCommand::Write(entry) => {
|
||||
let json_line = serde_json::to_string(&entry).expect("Serialization failed");
|
||||
log_file.write_all(json_line.as_bytes()).await.expect("Write failed");
|
||||
log_file.write_all(b"\n").await.expect("Write failed");
|
||||
|
||||
write_count += 1;
|
||||
|
||||
// Batched fsync
|
||||
if write_count >= FSYNC_BATCH_SIZE
|
||||
|| last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS
|
||||
{
|
||||
log_file.flush().await.expect("Flush failed");
|
||||
log_file.sync_all().await.expect("Fsync failed");
|
||||
write_count = 0;
|
||||
last_fsync = tokio::time::Instant::now();
|
||||
}
|
||||
}
|
||||
LogCommand::Checkpoint => {
|
||||
// Force fsync on checkpoint
|
||||
log_file.flush().await.expect("Flush failed");
|
||||
log_file.sync_all().await.expect("Fsync failed");
|
||||
write_count = 0;
|
||||
last_fsync = tokio::time::Instant::now();
|
||||
|
||||
let current = processed_count.load(Ordering::SeqCst);
|
||||
logger::log_info(&format!(
|
||||
" Checkpoint: {}/{} currencies processed",
|
||||
current, total_currencies
|
||||
)).await;
|
||||
}
|
||||
LogCommand::Shutdown => {
|
||||
// Final fsync before shutdown
|
||||
log_file.flush().await.expect("Flush failed");
|
||||
log_file.sync_all().await.expect("Fsync failed");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
// Process currencies concurrently with task panic isolation
|
||||
let mut tasks = FuturesUnordered::new();
|
||||
let mut pending_iter = pending_pairs.into_iter();
|
||||
let semaphore = Arc::new(tokio::sync::Semaphore::new(CONCURRENCY_LIMIT));
|
||||
|
||||
// Initial batch of tasks
|
||||
for _ in 0..CONCURRENCY_LIMIT.min(pending_count) {
|
||||
if let Some(pair) = pending_iter.next() {
|
||||
let task = spawn_collection_task(
|
||||
pair,
|
||||
Arc::clone(&yahoo_pool),
|
||||
paths.clone(),
|
||||
Arc::clone(&processed_count),
|
||||
Arc::clone(&success_count),
|
||||
Arc::clone(&failed_count),
|
||||
log_tx.clone(),
|
||||
Arc::clone(&semaphore),
|
||||
Arc::clone(shutdown_flag),
|
||||
);
|
||||
tasks.push(task);
|
||||
}
|
||||
}
|
||||
|
||||
// Process tasks as they complete and spawn new ones
|
||||
let mut checkpoint_counter = 0;
|
||||
while let Some(_result) = tasks.next().await {
|
||||
// Check for shutdown
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_warn("Shutdown signal received, stopping FX collection").await;
|
||||
break;
|
||||
}
|
||||
|
||||
// Spawn new task if more pending
|
||||
if let Some(pair) = pending_iter.next() {
|
||||
let task = spawn_collection_task(
|
||||
pair,
|
||||
Arc::clone(&yahoo_pool),
|
||||
paths.clone(),
|
||||
Arc::clone(&processed_count),
|
||||
Arc::clone(&success_count),
|
||||
Arc::clone(&failed_count),
|
||||
log_tx.clone(),
|
||||
Arc::clone(&semaphore),
|
||||
Arc::clone(shutdown_flag),
|
||||
);
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
// Periodic checkpoint
|
||||
checkpoint_counter += 1;
|
||||
if checkpoint_counter % CHECKPOINT_INTERVAL == 0 {
|
||||
let _ = log_tx.send(LogCommand::Checkpoint).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Signal shutdown to log writer
|
||||
let _ = log_tx.send(LogCommand::Shutdown).await;
|
||||
|
||||
// Wait for log writer to finish
|
||||
let _ = log_writer_handle.await;
|
||||
|
||||
// Final statistics
|
||||
let final_success = success_count.load(Ordering::SeqCst);
|
||||
let final_failed = failed_count.load(Ordering::SeqCst);
|
||||
|
||||
logger::log_info(&format!(
|
||||
" FX collection complete: {} succeeded, {} failed",
|
||||
final_success, final_failed
|
||||
)).await;
|
||||
|
||||
// Mark as complete if not shutdown
|
||||
if !shutdown_flag.load(Ordering::SeqCst) {
|
||||
mark_collection_complete(&state_path).await?;
|
||||
}
|
||||
|
||||
Ok(final_success)
|
||||
}
|
||||
|
||||
/// Spawn a collection task with panic isolation
|
||||
fn spawn_collection_task(
|
||||
pair: CurrencyPair,
|
||||
yahoo_pool: Arc<YahooClientPool>,
|
||||
paths: DataPaths,
|
||||
processed_count: Arc<AtomicUsize>,
|
||||
success_count: Arc<AtomicUsize>,
|
||||
failed_count: Arc<AtomicUsize>,
|
||||
log_tx: mpsc::Sender<LogCommand>,
|
||||
semaphore: Arc<tokio::sync::Semaphore>,
|
||||
shutdown_flag: Arc<AtomicBool>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
// Acquire semaphore permit
|
||||
let _permit = semaphore.acquire().await.expect("Semaphore closed");
|
||||
|
||||
// Check shutdown before processing
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Perform collection (panic-isolated)
|
||||
let result = collect_currency_chart(&pair, &yahoo_pool, &paths).await;
|
||||
|
||||
// Update counters
|
||||
processed_count.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
let status = match result {
|
||||
Ok(_) => {
|
||||
success_count.fetch_add(1, Ordering::SeqCst);
|
||||
logger::log_info(&format!(
|
||||
" ✓ Collected {} ({})",
|
||||
pair.code, pair.name
|
||||
)).await;
|
||||
"collected"
|
||||
}
|
||||
Err(e) => {
|
||||
failed_count.fetch_add(1, Ordering::SeqCst);
|
||||
logger::log_warn(&format!(
|
||||
" ✗ Failed to collect {} ({}): {}",
|
||||
pair.code, pair.name, e
|
||||
)).await;
|
||||
"failed"
|
||||
}
|
||||
};
|
||||
|
||||
// Log result
|
||||
let log_entry = json!({
|
||||
"currency_code": pair.code,
|
||||
"currency_name": pair.name,
|
||||
"yahoo_symbol": pair.yahoo_symbol,
|
||||
"status": status,
|
||||
"timestamp": Utc::now().to_rfc3339(),
|
||||
});
|
||||
|
||||
let _ = log_tx.send(LogCommand::Write(log_entry)).await;
|
||||
})
|
||||
}
|
||||
|
||||
/// Collect chart data for a single currency pair
|
||||
async fn collect_currency_chart(
|
||||
pair: &CurrencyPair,
|
||||
yahoo_pool: &Arc<YahooClientPool>,
|
||||
paths: &DataPaths,
|
||||
) -> anyhow::Result<()> {
|
||||
// Get historical data from year 2000 to now
|
||||
let now = Utc::now().timestamp();
|
||||
let start_2000 = Utc
|
||||
.with_ymd_and_hms(2000, 1, 1, 0, 0, 0)
|
||||
.unwrap()
|
||||
.timestamp();
|
||||
|
||||
// Fetch chart data from Yahoo
|
||||
let chart_data = yahoo_pool.get_chart_data(
|
||||
&pair.yahoo_symbol,
|
||||
"1d", // Daily interval
|
||||
start_2000,
|
||||
now,
|
||||
).await?;
|
||||
|
||||
// Validate we got data
|
||||
if chart_data.quotes.is_empty() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"No chart data available for {} ({})",
|
||||
pair.code,
|
||||
pair.yahoo_symbol
|
||||
));
|
||||
}
|
||||
|
||||
// Save chart data to currency directory
|
||||
save_currency_chart(paths, &pair.code, &chart_data).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save currency chart data to filesystem
|
||||
async fn save_currency_chart(
|
||||
paths: &DataPaths,
|
||||
currency_code: &str,
|
||||
chart_data: &ChartData,
|
||||
) -> anyhow::Result<()> {
|
||||
use tokio::fs;
|
||||
|
||||
// Create directory structure: data/economic/currency/{code}/chart/
|
||||
let economic_dir = paths.data_dir().join("economic");
|
||||
let currency_dir = economic_dir.join("currency").join(currency_code);
|
||||
let chart_dir = currency_dir.join("chart");
|
||||
|
||||
fs::create_dir_all(&chart_dir).await?;
|
||||
|
||||
// Write chart data to data.jsonl
|
||||
let data_path = chart_dir.join("data.jsonl");
|
||||
let json_line = serde_json::to_string(chart_data)?;
|
||||
|
||||
let mut file = fs::File::create(&data_path).await?;
|
||||
file.write_all(json_line.as_bytes()).await?;
|
||||
file.write_all(b"\n").await?;
|
||||
file.flush().await?;
|
||||
file.sync_all().await?; // Ensure data is persisted
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Count collected currencies (currencies with chart data)
|
||||
async fn count_collected_currencies(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
let currency_dir = paths.data_dir().join("economic").join("currency");
|
||||
|
||||
if !currency_dir.exists() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let mut count = 0;
|
||||
let mut entries = tokio::fs::read_dir(¤cy_dir).await?;
|
||||
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
let chart_file = path.join("chart").join("data.jsonl");
|
||||
|
||||
if chart_file.exists() {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
/// Mark collection as complete in state file
|
||||
async fn mark_collection_complete(state_path: &std::path::Path) -> anyhow::Result<()> {
|
||||
let collection_complete = json!({
|
||||
"fx_rates_collection_complete": true,
|
||||
"completed_at": Utc::now().to_rfc3339(),
|
||||
});
|
||||
|
||||
let mut state_file = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(state_path)
|
||||
.await?;
|
||||
|
||||
let state_line = serde_json::to_string(&collection_complete)?;
|
||||
state_file.write_all(state_line.as_bytes()).await?;
|
||||
state_file.write_all(b"\n").await?;
|
||||
state_file.flush().await?;
|
||||
state_file.sync_all().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Log command enum
|
||||
enum LogCommand {
|
||||
Write(serde_json::Value),
|
||||
Checkpoint,
|
||||
Shutdown,
|
||||
}
|
||||
Reference in New Issue
Block a user