readded yahoo
This commit is contained in:
@@ -1,15 +1,55 @@
|
||||
// src/corporate/update.rs - UPDATED WITH DATA INTEGRITY FIXES
|
||||
use super::{scraper::*, storage::*, helpers::*, types::*, openfigi::*};
|
||||
// src/corporate/update.rs - WITH ABORT-SAFE INCREMENTAL PERSISTENCE
|
||||
use super::{scraper::*, storage::*, helpers::*, types::*, openfigi::*, atomic_writer::*};
|
||||
use crate::config::Config;
|
||||
use crate::corporate::update_parallel::build_companies_jsonl_streaming_parallel;
|
||||
use crate::util::directories::DataPaths;
|
||||
use crate::util::logger;
|
||||
use crate::scraper::webdriver::ChromeDriverPool;
|
||||
use crate::scraper::yahoo::{YahooClientPool, QuoteSummaryModule};
|
||||
|
||||
use chrono::Local;
|
||||
use std::result::Result::Ok;
|
||||
use chrono::{Local, Utc};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use tokio::fs::{File, OpenOptions};
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use serde_json::json;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::sync::mpsc;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Result of processing a single company
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CompanyProcessResult {
|
||||
Valid(CompanyCrossPlatformInfo),
|
||||
FilteredLowCap { name: String, market_cap: f64 },
|
||||
FilteredNoPrice { name: String },
|
||||
Failed { company: CompanyCrossPlatformInfo, error: String, is_transient: bool },
|
||||
}
|
||||
|
||||
/// Represents a write command to be serialized through the log writer
|
||||
enum LogCommand {
|
||||
Write(CompanyCrossPlatformInfo),
|
||||
Checkpoint,
|
||||
Shutdown,
|
||||
}
|
||||
|
||||
/// Result from processing a single company with priority
|
||||
struct CompanyTaskResult {
|
||||
company: CompanyCrossPlatformInfo,
|
||||
result: CompanyProcessResult,
|
||||
}
|
||||
|
||||
/// Check if a company needs processing (validation check)
|
||||
fn company_needs_processing(
|
||||
company: &CompanyCrossPlatformInfo,
|
||||
existing_companies: &HashMap<String, CompanyCrossPlatformInfo>,
|
||||
) -> bool {
|
||||
// If company exists in cleaned output, skip it
|
||||
!existing_companies.contains_key(&company.name)
|
||||
}
|
||||
|
||||
/// Main corporate update entry point with shutdown awareness
|
||||
pub async fn run_full_update(
|
||||
@@ -87,47 +127,46 @@ pub async fn run_full_update(
|
||||
}
|
||||
|
||||
logger::log_info("Step 6: Cleansing up companies with missing essential data...").await;
|
||||
let cleansed_count = companies_yahoo_jsonl(&paths).await?;
|
||||
let cleansed_count = companies_yahoo_cleansed_no_data(&paths).await?;
|
||||
logger::log_info(&format!(" ✓ {} companies found on Yahoo ready for further use in companies_yahoo.jsonl", cleansed_count)).await;
|
||||
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_warn("Shutdown detected after companies.jsonl build").await;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
logger::log_info("Step 7: Cleansing up companies with too low profile (with abort-safe persistence)...").await;
|
||||
let proxy_pool = pool.get_proxy_pool()
|
||||
.ok_or_else(|| anyhow::anyhow!("ChromeDriverPool must be created with VPN proxy rotation enabled"))?;
|
||||
|
||||
let cleansed_count = companies_yahoo_cleansed_low_profile(&paths, _config, proxy_pool, shutdown_flag).await?;
|
||||
logger::log_info(&format!(" ✓ {} companies with sufficient profile ready for analytics", cleansed_count)).await;
|
||||
|
||||
if !shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_info("Step 7: Processing events (using index)...").await;
|
||||
logger::log_info("Step 8: Processing events (using index)...").await;
|
||||
let _event_index = build_event_index(&paths).await?;
|
||||
logger::log_info(" ✓ Event index built").await;
|
||||
} else {
|
||||
logger::log_warn("Shutdown detected, skipping event index build").await;
|
||||
}
|
||||
|
||||
logger::log_info("✓ Corporate update complete").await;
|
||||
logger::log_info("✅ Corporate update complete").await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
/// Cleansing function to remove companies with missing essential yahoo data for integrity
|
||||
/// Has to contain a ticker with 'YAHOO:'; Entries with 'YAHOO:NO_RESULTS' and 'YAHOO:ERROR' are removed
|
||||
/// The rest stays unchanged
|
||||
///
|
||||
/// Uses state.jsonl to track completion and avoid re-running the cleansing operation
|
||||
/// The '.jsonl' will be saved in the same path but 'companies_yahoo.jsonl'
|
||||
/// Only execute when 'companies.jsonl' is present
|
||||
pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
use serde_json::json;
|
||||
|
||||
pub async fn companies_yahoo_cleansed_no_data(paths: &DataPaths) -> Result<usize, anyhow::Error> {
|
||||
let data_path = paths.data_dir();
|
||||
|
||||
let input_path = data_path.join("companies.jsonl");
|
||||
let output_path = data_path.join("companies_yahoo.jsonl");
|
||||
let state_path = data_path.join("state.jsonl");
|
||||
|
||||
// Check if input file exists
|
||||
if !input_path.exists() {
|
||||
logger::log_warn("companies.jsonl not found, skipping cleansing").await;
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// Check if state file exists and cleansing was already completed
|
||||
if state_path.exists() {
|
||||
let state_content = tokio::fs::read_to_string(&state_path).await?;
|
||||
|
||||
@@ -137,10 +176,9 @@ pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
}
|
||||
|
||||
if let Ok(state) = serde_json::from_str::<serde_json::Value>(line) {
|
||||
if state.get("yahoo_companies").and_then(|v| v.as_bool()).unwrap_or(false) {
|
||||
if state.get("yahoo_companies_cleansed_no_data").and_then(|v| v.as_bool()).unwrap_or(false) {
|
||||
logger::log_info(" Yahoo companies cleansing already completed, reading existing file...").await;
|
||||
|
||||
// Count lines in existing output file
|
||||
if output_path.exists() {
|
||||
let output_content = tokio::fs::read_to_string(&output_path).await?;
|
||||
let count = output_content.lines()
|
||||
@@ -185,8 +223,6 @@ pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
}
|
||||
};
|
||||
|
||||
// Check if company has at least one valid YAHOO ticker
|
||||
// Valid means: starts with "YAHOO:" but is NOT "YAHOO:NO_RESULTS" or "YAHOO:ERROR"
|
||||
let has_valid_yahoo = company.isin_tickers_map
|
||||
.values()
|
||||
.flatten()
|
||||
@@ -197,7 +233,6 @@ pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
});
|
||||
|
||||
if has_valid_yahoo {
|
||||
// Write the company to the filtered output
|
||||
let json_line = serde_json::to_string(&company)?;
|
||||
output_file.write_all(json_line.as_bytes()).await?;
|
||||
output_file.write_all(b"\n").await?;
|
||||
@@ -205,12 +240,10 @@ pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
} else {
|
||||
removed_count += 1;
|
||||
if removed_count <= 5 {
|
||||
// Log first few removals for debugging
|
||||
logger::log_info(&format!(" Removed company '{}' (no valid Yahoo ticker)", company.name)).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Progress indicator for large files
|
||||
if total_count % 1000 == 0 {
|
||||
logger::log_info(&format!(" Processed {} companies...", total_count)).await;
|
||||
}
|
||||
@@ -223,9 +256,8 @@ pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
total_count, valid_count, removed_count
|
||||
)).await;
|
||||
|
||||
// Write state file to mark completion
|
||||
let yahoo_companies = json!({
|
||||
"yahoo_companies": true,
|
||||
"yahoo_companies_cleansed_no_data": true,
|
||||
"completed_at": chrono::Utc::now().to_rfc3339(),
|
||||
});
|
||||
|
||||
@@ -240,6 +272,768 @@ pub async fn companies_yahoo_jsonl(paths: &DataPaths) -> anyhow::Result<usize> {
|
||||
Ok(valid_count)
|
||||
}
|
||||
|
||||
/// Yahoo Low Profile Cleansing WITH ABORT-SAFE INCREMENTAL PERSISTENCE
|
||||
///
|
||||
/// # Features
|
||||
/// - ✅ Graceful shutdown (abort-safe)
|
||||
/// - ✅ Task panic isolation (tasks fail independently)
|
||||
/// - ✅ Crash-safe persistence (checkpoint + log with fsync)
|
||||
/// - ✅ Smart skip logic (only process incomplete data)
|
||||
/// - Uses pending queue instead of retry mechanism
|
||||
/// - Reuses companies_update.log for persistence
|
||||
///
|
||||
/// # Persistence Strategy
|
||||
/// - Checkpoint: companies_yahoo_cleaned.jsonl (atomic state)
|
||||
/// - Log: companies_update.log (append-only updates)
|
||||
/// - On restart: Load checkpoint + replay log
|
||||
/// - Periodic checkpoints (every 50 companies)
|
||||
/// - Batched fsync (every 10 writes or 10 seconds)
|
||||
pub async fn companies_yahoo_cleansed_low_profile(
|
||||
paths: &DataPaths,
|
||||
config: &Config,
|
||||
proxy_pool: Arc<crate::scraper::docker_vpn_proxy::DockerVpnProxyPool>,
|
||||
shutdown_flag: &Arc<AtomicBool>,
|
||||
) -> anyhow::Result<usize> {
|
||||
// Configuration constants
|
||||
const CHECKPOINT_INTERVAL: usize = 50;
|
||||
const FSYNC_BATCH_SIZE: usize = 10;
|
||||
const FSYNC_INTERVAL_SECS: u64 = 10;
|
||||
const CONCURRENCY_LIMIT: usize = 50; // Limit parallel validation tasks
|
||||
|
||||
let data_path = paths.data_dir();
|
||||
|
||||
// File paths (reusing companies_update.log)
|
||||
let input_path = data_path.join("companies_yahoo.jsonl");
|
||||
let checkpoint_path = data_path.join("companies_yahoo_cleaned.jsonl");
|
||||
let log_path = data_path.join("companies_update.log");
|
||||
|
||||
// Check input exists
|
||||
if !input_path.exists() {
|
||||
logger::log_warn(" companies_yahoo.jsonl not found, skipping low profile cleansing").await;
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
// === RECOVERY PHASE: Load checkpoint + replay log ===
|
||||
let mut existing_companies: HashMap<String, CompanyCrossPlatformInfo> = HashMap::new();
|
||||
let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
|
||||
if checkpoint_path.exists() {
|
||||
logger::log_info("Loading checkpoint from companies_yahoo_cleaned.jsonl...").await;
|
||||
let checkpoint_content = tokio::fs::read_to_string(&checkpoint_path).await?;
|
||||
|
||||
for line in checkpoint_content.lines() {
|
||||
if line.trim().is_empty() || !line.ends_with('}') {
|
||||
continue; // Skip incomplete lines
|
||||
}
|
||||
|
||||
match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
|
||||
Ok(company) => {
|
||||
processed_names.insert(company.name.clone());
|
||||
existing_companies.insert(company.name.clone(), company);
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_warn(&format!("Skipping invalid checkpoint line: {}", e)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
logger::log_info(&format!("Loaded checkpoint with {} companies", existing_companies.len())).await;
|
||||
}
|
||||
|
||||
if log_path.exists() {
|
||||
logger::log_info("Replaying update log...").await;
|
||||
let log_content = tokio::fs::read_to_string(&log_path).await?;
|
||||
let mut replayed = 0;
|
||||
|
||||
for line in log_content.lines() {
|
||||
if line.trim().is_empty() || !line.ends_with('}') {
|
||||
continue; // Skip incomplete lines
|
||||
}
|
||||
|
||||
match serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
|
||||
Ok(company) => {
|
||||
processed_names.insert(company.name.clone());
|
||||
existing_companies.insert(company.name.clone(), company);
|
||||
replayed += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_warn(&format!("Skipping invalid log line: {}", e)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
if replayed > 0 {
|
||||
logger::log_info(&format!("Replayed {} updates from log", replayed)).await;
|
||||
}
|
||||
}
|
||||
|
||||
// === LOAD INPUT COMPANIES ===
|
||||
logger::log_info(&format!("Loading companies from: {:?}", input_path)).await;
|
||||
let input_companies = load_companies_from_jsonl(&input_path).await?;
|
||||
logger::log_info(&format!("Loaded {} companies from input", input_companies.len())).await;
|
||||
|
||||
// === BUILD PENDING LIST (smart skip logic) ===
|
||||
let mut pending: Vec<CompanyCrossPlatformInfo> = input_companies
|
||||
.into_iter()
|
||||
.filter(|company| company_needs_processing(company, &existing_companies))
|
||||
.collect();
|
||||
|
||||
logger::log_info(&format!(
|
||||
"Initial scan: {} companies need processing ({} already complete)",
|
||||
pending.len(),
|
||||
existing_companies.len()
|
||||
)).await;
|
||||
|
||||
if pending.is_empty() {
|
||||
logger::log_info(" ✓ All companies already processed").await;
|
||||
return Ok(existing_companies.len());
|
||||
}
|
||||
|
||||
// === SETUP LOG WRITER TASK ===
|
||||
let (write_tx, mut write_rx) = mpsc::channel::<LogCommand>(1000);
|
||||
|
||||
let log_file_init = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log_path)
|
||||
.await?;
|
||||
|
||||
let checkpoint_path_clone = checkpoint_path.clone();
|
||||
let log_path_clone = log_path.clone();
|
||||
let existing_companies_writer = Arc::new(tokio::sync::Mutex::new(existing_companies.clone()));
|
||||
let existing_companies_writer_for_task = Arc::clone(&existing_companies_writer);
|
||||
|
||||
let write_tx_for_writer = write_tx.clone();
|
||||
let writer_task = tokio::spawn(async move {
|
||||
let mut log_file = log_file_init;
|
||||
let mut writes_since_fsync = 0;
|
||||
let mut last_fsync = std::time::Instant::now();
|
||||
let mut updates_since_checkpoint = 0;
|
||||
let mut count = 0;
|
||||
let mut new_count = 0;
|
||||
let mut updated_count = 0;
|
||||
|
||||
while let Some(cmd) = write_rx.recv().await {
|
||||
match cmd {
|
||||
LogCommand::Write(company) => {
|
||||
// Write to log
|
||||
let line = serde_json::to_string(&company).unwrap();
|
||||
if let Err(e) = log_file.write_all(line.as_bytes()).await {
|
||||
logger::log_error(&format!("Failed to write to log: {}", e)).await;
|
||||
break;
|
||||
}
|
||||
if let Err(e) = log_file.write_all(b"\n").await {
|
||||
logger::log_error(&format!("Failed to write newline: {}", e)).await;
|
||||
break;
|
||||
}
|
||||
|
||||
writes_since_fsync += 1;
|
||||
updates_since_checkpoint += 1;
|
||||
count += 1;
|
||||
|
||||
// Update in-memory state
|
||||
let mut existing_companies = existing_companies_writer_for_task.lock().await;
|
||||
let is_update = existing_companies.contains_key(&company.name);
|
||||
existing_companies.insert(company.name.clone(), company);
|
||||
drop(existing_companies);
|
||||
|
||||
if is_update {
|
||||
updated_count += 1;
|
||||
} else {
|
||||
new_count += 1;
|
||||
}
|
||||
|
||||
// Batched + time-based fsync
|
||||
let should_fsync = writes_since_fsync >= FSYNC_BATCH_SIZE
|
||||
|| last_fsync.elapsed().as_secs() >= FSYNC_INTERVAL_SECS;
|
||||
|
||||
if should_fsync {
|
||||
if let Err(e) = log_file.flush().await {
|
||||
logger::log_error(&format!("Failed to flush: {}", e)).await;
|
||||
break;
|
||||
}
|
||||
if let Err(e) = log_file.sync_data().await {
|
||||
logger::log_error(&format!("Failed to fsync: {}", e)).await;
|
||||
break;
|
||||
}
|
||||
writes_since_fsync = 0;
|
||||
last_fsync = std::time::Instant::now();
|
||||
}
|
||||
}
|
||||
LogCommand::Checkpoint => {
|
||||
if let Err(e) = log_file.flush().await {
|
||||
logger::log_error(&format!("Failed to flush before checkpoint: {}", e)).await;
|
||||
break;
|
||||
}
|
||||
if let Err(e) = log_file.sync_data().await {
|
||||
logger::log_error(&format!("Failed to fsync before checkpoint: {}", e)).await;
|
||||
break;
|
||||
}
|
||||
|
||||
let existing_companies = existing_companies_writer_for_task.lock().await;
|
||||
let companies_vec: Vec<_> = existing_companies.values().cloned().collect();
|
||||
drop(existing_companies);
|
||||
|
||||
let temp_path = checkpoint_path_clone.with_extension("tmp");
|
||||
match tokio::fs::File::create(&temp_path).await {
|
||||
Ok(mut temp_file) => {
|
||||
let mut checkpoint_ok = true;
|
||||
for company in &companies_vec {
|
||||
if let Ok(line) = serde_json::to_string(company) {
|
||||
if temp_file.write_all(line.as_bytes()).await.is_err() ||
|
||||
temp_file.write_all(b"\n").await.is_err() {
|
||||
checkpoint_ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if checkpoint_ok {
|
||||
if temp_file.flush().await.is_ok() &&
|
||||
temp_file.sync_data().await.is_ok() {
|
||||
drop(temp_file);
|
||||
|
||||
if tokio::fs::rename(&temp_path, &checkpoint_path_clone).await.is_ok() {
|
||||
if tokio::fs::remove_file(&log_path_clone).await.is_ok() {
|
||||
logger::log_info(&format!(
|
||||
"✓ Checkpoint created ({} companies), log cleared",
|
||||
companies_vec.len()
|
||||
)).await;
|
||||
|
||||
if let Ok(new_log) = OpenOptions::new()
|
||||
.create(true)
|
||||
.append(true)
|
||||
.open(&log_path_clone)
|
||||
.await {
|
||||
log_file = new_log;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_error(&format!("Failed to create checkpoint temp file: {}", e)).await;
|
||||
}
|
||||
}
|
||||
updates_since_checkpoint = 0;
|
||||
}
|
||||
LogCommand::Shutdown => {
|
||||
logger::log_info("Writer shutting down...").await;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Periodic checkpoint trigger
|
||||
if updates_since_checkpoint >= CHECKPOINT_INTERVAL {
|
||||
let _ = write_tx_for_writer.send(LogCommand::Checkpoint).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Final fsync
|
||||
let _ = log_file.flush().await;
|
||||
let _ = log_file.sync_data().await;
|
||||
|
||||
logger::log_info(&format!(
|
||||
"Writer finished: {} total ({} new, {} updated)",
|
||||
count, new_count, updated_count
|
||||
)).await;
|
||||
|
||||
(count, new_count, updated_count)
|
||||
});
|
||||
|
||||
// === CREATE YAHOO CLIENT POOL ===
|
||||
logger::log_info("Creating YahooClientPool with proxy rotation...").await;
|
||||
let yahoo_pool = Arc::new(YahooClientPool::new(proxy_pool, config, None).await?);
|
||||
logger::log_info(&format!("✓ YahooClientPool ready with {} clients", yahoo_pool.num_clients().await)).await;
|
||||
|
||||
// Wrap paths in Arc for safe sharing across tasks
|
||||
let paths = Arc::new((*paths).clone());
|
||||
|
||||
// === MAIN PROCESSING LOOP WITH TASK PANIC ISOLATION ===
|
||||
let total = pending.len();
|
||||
let mut tasks = FuturesUnordered::new();
|
||||
|
||||
// Counters
|
||||
let processed = Arc::new(AtomicUsize::new(0));
|
||||
let valid_count = Arc::new(AtomicUsize::new(0));
|
||||
let filtered_low_cap = Arc::new(AtomicUsize::new(0));
|
||||
let filtered_no_price = Arc::new(AtomicUsize::new(0));
|
||||
let failed_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
// Spawn initial batch
|
||||
for _ in 0..CONCURRENCY_LIMIT.min(pending.len()) {
|
||||
if let Some(company) = pending.pop() {
|
||||
spawn_validation_task(
|
||||
company,
|
||||
&yahoo_pool,
|
||||
&paths,
|
||||
&write_tx,
|
||||
shutdown_flag,
|
||||
&processed,
|
||||
&valid_count,
|
||||
&filtered_low_cap,
|
||||
&filtered_no_price,
|
||||
&failed_count,
|
||||
total,
|
||||
&mut tasks,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Process results and spawn new tasks (with task panic isolation)
|
||||
while let Some(task_result) = tasks.next().await {
|
||||
// Check for shutdown
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_warn("Shutdown signal received, stopping processing").await;
|
||||
break;
|
||||
}
|
||||
|
||||
match task_result {
|
||||
Ok(Ok(Some(_result))) => {
|
||||
// Success - spawn next task
|
||||
if let Some(company) = pending.pop() {
|
||||
spawn_validation_task(
|
||||
company,
|
||||
&yahoo_pool,
|
||||
&paths,
|
||||
&write_tx,
|
||||
shutdown_flag,
|
||||
&processed,
|
||||
&valid_count,
|
||||
&filtered_low_cap,
|
||||
&filtered_no_price,
|
||||
&failed_count,
|
||||
total,
|
||||
&mut tasks,
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(Ok(None)) => {
|
||||
// Filtered or failed - spawn next task
|
||||
if let Some(company) = pending.pop() {
|
||||
spawn_validation_task(
|
||||
company,
|
||||
&yahoo_pool,
|
||||
&paths,
|
||||
&write_tx,
|
||||
shutdown_flag,
|
||||
&processed,
|
||||
&valid_count,
|
||||
&filtered_low_cap,
|
||||
&filtered_no_price,
|
||||
&failed_count,
|
||||
total,
|
||||
&mut tasks,
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
// Processing error
|
||||
logger::log_error(&format!("Company processing error: {}", e)).await;
|
||||
|
||||
if let Some(company) = pending.pop() {
|
||||
spawn_validation_task(
|
||||
company,
|
||||
&yahoo_pool,
|
||||
&paths,
|
||||
&write_tx,
|
||||
shutdown_flag,
|
||||
&processed,
|
||||
&valid_count,
|
||||
&filtered_low_cap,
|
||||
&filtered_no_price,
|
||||
&failed_count,
|
||||
total,
|
||||
&mut tasks,
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// Task panic (isolated - doesn't crash entire process)
|
||||
logger::log_error(&format!("Task panic: {}", e)).await;
|
||||
|
||||
if let Some(company) = pending.pop() {
|
||||
spawn_validation_task(
|
||||
company,
|
||||
&yahoo_pool,
|
||||
&paths,
|
||||
&write_tx,
|
||||
shutdown_flag,
|
||||
&processed,
|
||||
&valid_count,
|
||||
&filtered_low_cap,
|
||||
&filtered_no_price,
|
||||
&failed_count,
|
||||
total,
|
||||
&mut tasks,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger::log_info("Main processing loop completed").await;
|
||||
|
||||
// Signal writer to finish
|
||||
let _ = write_tx.send(LogCommand::Checkpoint).await;
|
||||
let _ = write_tx.send(LogCommand::Shutdown).await;
|
||||
drop(write_tx);
|
||||
|
||||
// Wait for writer to finish
|
||||
let (final_count, final_new, final_updated) = writer_task.await
|
||||
.unwrap_or((0, 0, 0));
|
||||
|
||||
let final_valid = valid_count.load(Ordering::SeqCst);
|
||||
let final_filtered_low_cap = filtered_low_cap.load(Ordering::SeqCst);
|
||||
let final_filtered_no_price = filtered_no_price.load(Ordering::SeqCst);
|
||||
let final_failed = failed_count.load(Ordering::SeqCst);
|
||||
|
||||
logger::log_info(&format!(
|
||||
"✅ Completed: {} total companies ({} new, {} updated)",
|
||||
final_count, final_new, final_updated
|
||||
)).await;
|
||||
logger::log_info(&format!(
|
||||
" Valid: {}, Filtered (low cap): {}, Filtered (no price): {}, Failed: {}",
|
||||
final_valid, final_filtered_low_cap, final_filtered_no_price, final_failed
|
||||
)).await;
|
||||
|
||||
// Shutdown Yahoo pool
|
||||
yahoo_pool.shutdown().await?;
|
||||
|
||||
Ok(final_valid)
|
||||
}
|
||||
|
||||
/// Helper function to spawn a validation task (reduces code duplication)
|
||||
fn spawn_validation_task(
|
||||
company: CompanyCrossPlatformInfo,
|
||||
yahoo_pool: &Arc<YahooClientPool>,
|
||||
paths: &Arc<DataPaths>,
|
||||
write_tx: &mpsc::Sender<LogCommand>,
|
||||
shutdown_flag: &Arc<AtomicBool>,
|
||||
processed: &Arc<AtomicUsize>,
|
||||
valid_count: &Arc<AtomicUsize>,
|
||||
filtered_low_cap: &Arc<AtomicUsize>,
|
||||
filtered_no_price: &Arc<AtomicUsize>,
|
||||
failed_count: &Arc<AtomicUsize>,
|
||||
total: usize,
|
||||
tasks: &mut FuturesUnordered<tokio::task::JoinHandle<anyhow::Result<Option<CompanyTaskResult>>>>,
|
||||
) {
|
||||
let yahoo_pool_clone = Arc::clone(yahoo_pool);
|
||||
let paths_clone = Arc::clone(paths);
|
||||
let shutdown_flag_clone = Arc::clone(shutdown_flag);
|
||||
let write_tx_clone = write_tx.clone();
|
||||
let processed_clone = Arc::clone(processed);
|
||||
let valid_count_clone = Arc::clone(valid_count);
|
||||
let filtered_low_cap_clone = Arc::clone(filtered_low_cap);
|
||||
let filtered_no_price_clone = Arc::clone(filtered_no_price);
|
||||
let failed_count_clone = Arc::clone(failed_count);
|
||||
|
||||
let task = tokio::spawn(async move {
|
||||
// Check shutdown at start
|
||||
if shutdown_flag_clone.load(Ordering::SeqCst) {
|
||||
return Ok::<_, anyhow::Error>(None);
|
||||
}
|
||||
|
||||
let result = process_company_with_validation(
|
||||
&company,
|
||||
&yahoo_pool_clone,
|
||||
&*paths_clone,
|
||||
).await;
|
||||
|
||||
let task_result = match result {
|
||||
CompanyProcessResult::Valid(validated_company) => {
|
||||
// Send to writer
|
||||
let _ = write_tx_clone.send(LogCommand::Write(validated_company.clone())).await;
|
||||
valid_count_clone.fetch_add(1, Ordering::SeqCst);
|
||||
Some(CompanyTaskResult {
|
||||
company: validated_company.clone(),
|
||||
result: CompanyProcessResult::Valid(validated_company),
|
||||
})
|
||||
}
|
||||
CompanyProcessResult::FilteredLowCap { name, market_cap } => {
|
||||
filtered_low_cap_clone.fetch_add(1, Ordering::SeqCst);
|
||||
if filtered_low_cap_clone.load(Ordering::SeqCst) <= 10 {
|
||||
logger::log_info(&format!(" Filtered {} - low market cap: {:.0} EUR", name, market_cap)).await;
|
||||
}
|
||||
None
|
||||
}
|
||||
CompanyProcessResult::FilteredNoPrice { name } => {
|
||||
filtered_no_price_clone.fetch_add(1, Ordering::SeqCst);
|
||||
if filtered_no_price_clone.load(Ordering::SeqCst) <= 10 {
|
||||
logger::log_info(&format!(" Filtered {} - no recent price data", name)).await;
|
||||
}
|
||||
None
|
||||
}
|
||||
CompanyProcessResult::Failed { company: failed_company, error, is_transient: _ } => {
|
||||
failed_count_clone.fetch_add(1, Ordering::SeqCst);
|
||||
logger::log_warn(&format!(" Failed to process '{}': {}", failed_company.name, error)).await;
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Progress reporting
|
||||
let current = processed_clone.fetch_add(1, Ordering::SeqCst) + 1;
|
||||
if current % 100 == 0 {
|
||||
logger::log_info(&format!(
|
||||
"Progress: {}/{} ({} valid, {} low cap, {} no price, {} failed)",
|
||||
current, total,
|
||||
valid_count_clone.load(Ordering::SeqCst),
|
||||
filtered_low_cap_clone.load(Ordering::SeqCst),
|
||||
filtered_no_price_clone.load(Ordering::SeqCst),
|
||||
failed_count_clone.load(Ordering::SeqCst)
|
||||
)).await;
|
||||
}
|
||||
|
||||
Ok(task_result)
|
||||
});
|
||||
|
||||
tasks.push(task);
|
||||
}
|
||||
|
||||
/// Process a single company with full error categorization
|
||||
async fn process_company_with_validation(
|
||||
company: &CompanyCrossPlatformInfo,
|
||||
yahoo_pool: &Arc<YahooClientPool>,
|
||||
paths: &DataPaths,
|
||||
) -> CompanyProcessResult {
|
||||
// Extract Yahoo ticker
|
||||
let ticker = match extract_first_yahoo_ticker(company) {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
return CompanyProcessResult::Failed {
|
||||
company: company.clone(),
|
||||
error: "No valid Yahoo ticker found".to_string(),
|
||||
is_transient: false, // Permanent - no ticker means no data
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Fetch core modules from Yahoo
|
||||
let summary = match yahoo_pool.get_quote_summary(
|
||||
&ticker,
|
||||
&QuoteSummaryModule::core_modules(),
|
||||
).await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
let error_msg = e.to_string();
|
||||
let is_transient = is_transient_error(&error_msg);
|
||||
|
||||
return CompanyProcessResult::Failed {
|
||||
company: company.clone(),
|
||||
error: format!("API error fetching summary: {}", error_msg),
|
||||
is_transient,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Validate market cap
|
||||
let market_cap = extract_market_cap(&summary);
|
||||
if market_cap < 1_000_000.0 {
|
||||
return CompanyProcessResult::FilteredLowCap {
|
||||
name: company.name.clone(),
|
||||
market_cap,
|
||||
};
|
||||
}
|
||||
|
||||
// Validate recent price activity
|
||||
let has_recent_price = match check_recent_price_activity(yahoo_pool, &ticker).await {
|
||||
Ok(has) => has,
|
||||
Err(e) => {
|
||||
let error_msg = e.to_string();
|
||||
let is_transient = is_transient_error(&error_msg);
|
||||
|
||||
return CompanyProcessResult::Failed {
|
||||
company: company.clone(),
|
||||
error: format!("API error fetching price history: {}", error_msg),
|
||||
is_transient,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
if !has_recent_price {
|
||||
return CompanyProcessResult::FilteredNoPrice {
|
||||
name: company.name.clone(),
|
||||
};
|
||||
}
|
||||
|
||||
// Save core data
|
||||
if let Err(e) = save_company_core_data(paths, &company.name, &summary).await {
|
||||
logger::log_warn(&format!(
|
||||
" Failed to save core data for {}: {}",
|
||||
company.name, e
|
||||
)).await;
|
||||
}
|
||||
|
||||
CompanyProcessResult::Valid(company.clone())
|
||||
}
|
||||
|
||||
/// Determine if an error is transient (should retry) or permanent (skip)
|
||||
fn is_transient_error(error: &str) -> bool {
|
||||
let error_lower = error.to_lowercase();
|
||||
|
||||
// Transient errors (network, rate limiting, timeouts)
|
||||
let transient_patterns = [
|
||||
"timeout",
|
||||
"timed out",
|
||||
"connection",
|
||||
"network",
|
||||
"rate limit",
|
||||
"too many requests",
|
||||
"429",
|
||||
"503",
|
||||
"502",
|
||||
"500",
|
||||
"temporarily",
|
||||
"unavailable",
|
||||
];
|
||||
|
||||
for pattern in &transient_patterns {
|
||||
if error_lower.contains(pattern) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Permanent errors (invalid ticker, no data, parsing errors)
|
||||
let permanent_patterns = [
|
||||
"404",
|
||||
"not found",
|
||||
"invalid",
|
||||
"no data",
|
||||
"parse error",
|
||||
"400",
|
||||
"401",
|
||||
"403",
|
||||
];
|
||||
|
||||
for pattern in &permanent_patterns {
|
||||
if error_lower.contains(pattern) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Default: treat unknown errors as transient (safer to retry)
|
||||
true
|
||||
}
|
||||
|
||||
/// Load companies from JSONL file
|
||||
async fn load_companies_from_jsonl(path: &std::path::Path) -> anyhow::Result<Vec<CompanyCrossPlatformInfo>> {
|
||||
let content = tokio::fs::read_to_string(path).await?;
|
||||
let mut companies = Vec::new();
|
||||
|
||||
for line in content.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Ok(company) = serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
|
||||
companies.push(company);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(companies)
|
||||
}
|
||||
|
||||
fn extract_first_yahoo_ticker(company: &CompanyCrossPlatformInfo) -> Option<String> {
|
||||
for tickers in company.isin_tickers_map.values() {
|
||||
for ticker in tickers {
|
||||
if ticker.starts_with("YAHOO:")
|
||||
&& ticker != "YAHOO:NO_RESULTS"
|
||||
&& ticker != "YAHOO:ERROR"
|
||||
{
|
||||
return Some(ticker.trim_start_matches("YAHOO:").to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_market_cap(summary: &crate::scraper::yahoo::QuoteSummary) -> f64 {
|
||||
let price_module = match summary.modules.get("price") {
|
||||
Some(m) => m,
|
||||
None => return 0.0,
|
||||
};
|
||||
|
||||
let market_cap_raw = price_module
|
||||
.get("marketCap")
|
||||
.and_then(|v| v.get("raw"))
|
||||
.and_then(|v| v.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
|
||||
let currency = price_module
|
||||
.get("currency")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("USD");
|
||||
|
||||
let market_cap_eur = match currency {
|
||||
"EUR" => market_cap_raw,
|
||||
"USD" => market_cap_raw * 0.92,
|
||||
"GBP" => market_cap_raw * 1.17,
|
||||
"JPY" => market_cap_raw * 0.0061,
|
||||
"CHF" => market_cap_raw * 1.05,
|
||||
_ => market_cap_raw * 0.92,
|
||||
};
|
||||
|
||||
market_cap_eur
|
||||
}
|
||||
|
||||
async fn check_recent_price_activity(
|
||||
yahoo_pool: &Arc<YahooClientPool>,
|
||||
ticker: &str,
|
||||
) -> anyhow::Result<bool> {
|
||||
let now = Utc::now().timestamp();
|
||||
let one_year_ago = now - (365 * 24 * 60 * 60);
|
||||
let sixty_days_ago = now - (60 * 24 * 60 * 60);
|
||||
|
||||
let chart_data = yahoo_pool.get_chart_data(
|
||||
ticker,
|
||||
"1d",
|
||||
sixty_days_ago,
|
||||
now,
|
||||
).await?;
|
||||
|
||||
if chart_data.quotes.is_empty() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
let most_recent_timestamp = chart_data.quotes
|
||||
.iter()
|
||||
.map(|q| q.timestamp)
|
||||
.max()
|
||||
.unwrap_or(0);
|
||||
|
||||
Ok(most_recent_timestamp >= one_year_ago)
|
||||
}
|
||||
|
||||
async fn save_company_core_data(
|
||||
paths: &DataPaths,
|
||||
company_name: &str,
|
||||
summary: &crate::scraper::yahoo::QuoteSummary,
|
||||
) -> anyhow::Result<()> {
|
||||
use tokio::fs;
|
||||
|
||||
let safe_name = company_name
|
||||
.replace("/", "_")
|
||||
.replace("\\", "_")
|
||||
.replace(":", "_")
|
||||
.replace("*", "_")
|
||||
.replace("?", "_")
|
||||
.replace("\"", "_")
|
||||
.replace("<", "_")
|
||||
.replace(">", "_")
|
||||
.replace("|", "_");
|
||||
|
||||
let company_dir = paths.corporate_dir().join(&safe_name).join("core");
|
||||
fs::create_dir_all(&company_dir).await?;
|
||||
|
||||
let data_path = company_dir.join("data.jsonl");
|
||||
let json_line = serde_json::to_string(summary)?;
|
||||
|
||||
let mut file = fs::File::create(&data_path).await?;
|
||||
file.write_all(json_line.as_bytes()).await?;
|
||||
file.write_all(b"\n").await?;
|
||||
file.flush().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_most_recent_figi_date_dir(paths: &DataPaths) -> anyhow::Result<Option<std::path::PathBuf>> {
|
||||
let map_cache_dir = paths.cache_gleif_openfigi_map_dir();
|
||||
|
||||
|
||||
@@ -316,7 +316,7 @@ pub async fn fetch_earnings_with_pool(
|
||||
ticker: &str,
|
||||
) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
let ticker = ticker.to_string();
|
||||
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
|
||||
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
|
||||
|
||||
let ticker_cloned = ticker.clone();
|
||||
|
||||
|
||||
@@ -15,3 +15,7 @@ pub use monitoring::{init_monitoring, ConfigSnapshot, MonitoringEvent};
|
||||
pub use config::Config;
|
||||
pub use scraper::webdriver::{ChromeDriverPool, ChromeInstance, ScrapeTask};
|
||||
pub use util::logger;
|
||||
pub use scraper::yahoo::{
|
||||
YahooClient, YahooClientPool, QuoteSummaryModule, QuoteSummary, ChartData,
|
||||
OptionsData, SearchResult
|
||||
};
|
||||
|
||||
@@ -250,6 +250,35 @@
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
/* Yahoo Stats */
|
||||
.yahoo-stats-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
||||
gap: 12px;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.yahoo-stat-box {
|
||||
background: #2a3a4a;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
text-align: center;
|
||||
border-left: 4px solid #FF9800;
|
||||
}
|
||||
|
||||
.yahoo-stat-value {
|
||||
font-size: 28px;
|
||||
font-weight: bold;
|
||||
color: #FF9800;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.yahoo-stat-label {
|
||||
font-size: 11px;
|
||||
color: #aaa;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
/* Logs */
|
||||
.log-container {
|
||||
max-height: 300px;
|
||||
@@ -339,6 +368,31 @@
|
||||
.pulse {
|
||||
animation: pulse 2s infinite;
|
||||
}
|
||||
|
||||
/* Yahoo Client Box */
|
||||
.yahoo-client-box {
|
||||
background: #2a3a4a;
|
||||
border: 2px solid #FF9800;
|
||||
border-radius: 5px;
|
||||
padding: 12px;
|
||||
display: flex;
|
||||
gap: 0;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.yahoo-client-side {
|
||||
flex: 1;
|
||||
padding: 12px;
|
||||
}
|
||||
|
||||
.yahoo-client-side.left {
|
||||
background: #3a4a5a;
|
||||
border-right: 1px solid #555;
|
||||
}
|
||||
|
||||
.yahoo-client-side.right {
|
||||
background: #2a3a4a;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@@ -363,6 +417,13 @@
|
||||
<div class="instance-grid" id="instances"></div>
|
||||
</div>
|
||||
|
||||
<!-- Yahoo API Section -->
|
||||
<div class="section">
|
||||
<div class="section-title">📈 YAHOO API METRICS</div>
|
||||
<div class="yahoo-stats-grid" id="yahoo-stats"></div>
|
||||
<div class="instance-grid" id="yahoo-clients"></div>
|
||||
</div>
|
||||
|
||||
<!-- Global Metrics Section -->
|
||||
<div class="section">
|
||||
<div class="section-title">📊 GLOBAL METRICS</div>
|
||||
@@ -432,6 +493,8 @@
|
||||
updateConfig(state.config);
|
||||
updateInstances(state.instances);
|
||||
updateGlobalStats(state.global);
|
||||
updateYahooStats(state.global);
|
||||
updateYahooClients(state.yahoo_clients);
|
||||
updateLogs(state.logs);
|
||||
}
|
||||
|
||||
@@ -480,6 +543,10 @@
|
||||
? ((inst.success_count / inst.total_requests) * 100).toFixed(1)
|
||||
: '0.0';
|
||||
|
||||
const yahooSuccessRate = inst.yahoo_requests > 0
|
||||
? ((inst.yahoo_success / inst.yahoo_requests) * 100).toFixed(1)
|
||||
: '0.0';
|
||||
|
||||
return `
|
||||
<div class="instance-box ${statusClass}">
|
||||
<div class="instance-side">
|
||||
@@ -511,6 +578,16 @@
|
||||
${successRate}%
|
||||
</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Yahoo Requests</span>
|
||||
<span class="metric-value">${inst.yahoo_requests}</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Yahoo Rate</span>
|
||||
<span class="metric-value ${yahooSuccessRate < 50 ? 'danger' : yahooSuccessRate < 80 ? 'warning' : ''}">
|
||||
${yahooSuccessRate}%
|
||||
</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Last Activity</span>
|
||||
<span class="metric-value">${inst.last_activity}</span>
|
||||
@@ -556,6 +633,115 @@
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function updateYahooStats(global) {
|
||||
const container = document.getElementById('yahoo-stats');
|
||||
const yahooSuccessRate = global.total_yahoo_requests > 0
|
||||
? ((global.successful_yahoo_requests / global.total_yahoo_requests) * 100).toFixed(1)
|
||||
: '0.0';
|
||||
|
||||
container.innerHTML = `
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${global.total_yahoo_requests || 0}</div>
|
||||
<div class="yahoo-stat-label">Total Requests</div>
|
||||
</div>
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${yahooSuccessRate}%</div>
|
||||
<div class="yahoo-stat-label">Success Rate</div>
|
||||
</div>
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${global.successful_yahoo_requests || 0}</div>
|
||||
<div class="yahoo-stat-label">Successful</div>
|
||||
</div>
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${global.failed_yahoo_requests || 0}</div>
|
||||
<div class="yahoo-stat-label">Failed</div>
|
||||
</div>
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${global.yahoo_client_count || 0}</div>
|
||||
<div class="yahoo-stat-label">Active Clients</div>
|
||||
</div>
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${global.yahoo_batch_requests || 0}</div>
|
||||
<div class="yahoo-stat-label">Batch Requests</div>
|
||||
</div>
|
||||
<div class="yahoo-stat-box">
|
||||
<div class="yahoo-stat-value">${global.yahoo_session_renewals || 0}</div>
|
||||
<div class="yahoo-stat-label">Session Renewals</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
function updateYahooClients(yahooClients) {
|
||||
const container = document.getElementById('yahoo-clients');
|
||||
if (!yahooClients || yahooClients.length === 0) {
|
||||
container.innerHTML = '<div style="text-align: center; padding: 40px; color: #666;">No Yahoo clients available</div>';
|
||||
return;
|
||||
}
|
||||
|
||||
container.innerHTML = yahooClients.map(client => {
|
||||
const successRate = client.requests_total > 0
|
||||
? ((client.requests_successful / client.requests_total) * 100).toFixed(1)
|
||||
: '0.0';
|
||||
|
||||
return `
|
||||
<div class="yahoo-client-box">
|
||||
<div class="yahoo-client-side left">
|
||||
<div class="side-header">
|
||||
📊 Yahoo Client #${client.instance_id}
|
||||
${client.has_proxy ? '🔗' : '🌐'}
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Total Requests</span>
|
||||
<span class="metric-value">${client.requests_total}</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Success / Fail</span>
|
||||
<span class="metric-value">${client.requests_successful} / ${client.requests_failed}</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Success Rate</span>
|
||||
<span class="metric-value ${successRate < 50 ? 'danger' : successRate < 80 ? 'warning' : ''}">
|
||||
${successRate}%
|
||||
</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Current / Max</span>
|
||||
<span class="metric-value ${client.current_requests >= client.max_requests ? 'danger' : ''}">
|
||||
${client.current_requests} / ${client.max_requests}
|
||||
</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Last Activity</span>
|
||||
<span class="metric-value">${client.last_activity}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="yahoo-client-side right">
|
||||
${client.proxy_info ? `
|
||||
<div class="side-header">🔗 ${client.proxy_info.container_name}</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">IP Address</span>
|
||||
<span class="metric-value">${client.proxy_info.ip_address}</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Port</span>
|
||||
<span class="metric-value">${client.proxy_info.port}</span>
|
||||
</div>
|
||||
<div class="metric-row">
|
||||
<span class="metric-label">Status</span>
|
||||
<span class="metric-value">${client.proxy_info.status}</span>
|
||||
</div>
|
||||
` : `
|
||||
<div class="no-proxy">
|
||||
${client.has_proxy ? '⚠️' : '🌐'}<br>
|
||||
${client.has_proxy ? 'Proxy Not Connected' : 'Direct Connection'}
|
||||
</div>
|
||||
`}
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
function updateGlobalStats(global) {
|
||||
const container = document.getElementById('global-stats');
|
||||
|
||||
|
||||
@@ -92,6 +92,45 @@ pub enum MonitoringEvent {
|
||||
reason: String,
|
||||
},
|
||||
|
||||
// Yahoo API events
|
||||
YahooRequestStarted {
|
||||
instance_id: usize,
|
||||
endpoint: String,
|
||||
symbol: Option<String>,
|
||||
},
|
||||
|
||||
YahooRequestCompleted {
|
||||
instance_id: usize,
|
||||
success: bool,
|
||||
duration_ms: u64,
|
||||
error: Option<String>,
|
||||
},
|
||||
|
||||
YahooBatchRequestStarted {
|
||||
count: usize,
|
||||
symbols: Vec<String>,
|
||||
endpoint: String,
|
||||
},
|
||||
|
||||
YahooBatchRequestCompleted {
|
||||
successful: usize,
|
||||
failed: usize,
|
||||
total: usize,
|
||||
duration_ms: u64,
|
||||
},
|
||||
|
||||
YahooClientCreated {
|
||||
instance_id: usize,
|
||||
has_proxy: bool,
|
||||
max_requests: u32,
|
||||
},
|
||||
|
||||
YahooClientReset {
|
||||
instance_id: usize,
|
||||
previous_requests: u32,
|
||||
reason: String,
|
||||
},
|
||||
|
||||
// Logging
|
||||
LogMessage {
|
||||
level: LogLevel,
|
||||
|
||||
@@ -9,6 +9,7 @@ pub struct DashboardState {
|
||||
pub config: ConfigSnapshot,
|
||||
pub instances: Vec<InstanceMetrics>,
|
||||
pub proxies: Vec<ProxyMetrics>,
|
||||
pub yahoo_clients: Vec<YahooClientMetrics>,
|
||||
pub global: GlobalMetrics,
|
||||
pub logs: Vec<LogEntry>,
|
||||
}
|
||||
@@ -38,6 +39,14 @@ pub struct InstanceMetrics {
|
||||
pub failure_count: usize,
|
||||
pub connected_proxy: Option<ProxyInfo>,
|
||||
pub last_activity: String, // Timestamp
|
||||
pub yahoo_requests: usize,
|
||||
pub yahoo_success: usize,
|
||||
pub yahoo_failures: usize,
|
||||
pub yahoo_success_rate: f64,
|
||||
pub yahoo_current_requests: u32,
|
||||
pub yahoo_max_requests: u32,
|
||||
pub yahoo_last_endpoint: Option<String>,
|
||||
pub yahoo_last_symbol: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
@@ -75,6 +84,20 @@ pub struct ProxyMetrics {
|
||||
pub instances_using: Vec<usize>,
|
||||
}
|
||||
|
||||
/// Metrics for a Yahoo client
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct YahooClientMetrics {
|
||||
pub instance_id: usize,
|
||||
pub requests_total: usize,
|
||||
pub requests_successful: usize,
|
||||
pub requests_failed: usize,
|
||||
pub current_requests: u32,
|
||||
pub max_requests: u32,
|
||||
pub has_proxy: bool,
|
||||
pub last_activity: String,
|
||||
pub proxy_info: Option<ProxyInfo>,
|
||||
}
|
||||
|
||||
/// Global pool metrics
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct GlobalMetrics {
|
||||
@@ -88,6 +111,13 @@ pub struct GlobalMetrics {
|
||||
pub bot_detection_hits: usize,
|
||||
pub proxy_failures: usize,
|
||||
pub uptime_seconds: u64,
|
||||
pub total_yahoo_requests: usize,
|
||||
pub successful_yahoo_requests: usize,
|
||||
pub failed_yahoo_requests: usize,
|
||||
pub yahoo_success_rate: f64,
|
||||
pub yahoo_batch_requests: usize,
|
||||
pub yahoo_session_renewals: usize,
|
||||
pub yahoo_client_count: usize,
|
||||
}
|
||||
|
||||
/// Log entry for display in dashboard
|
||||
@@ -111,6 +141,7 @@ pub enum LogLevel {
|
||||
pub struct MonitoringState {
|
||||
pub instances: HashMap<usize, InstanceState>,
|
||||
pub proxies: HashMap<String, ProxyState>,
|
||||
pub yahoo_clients: HashMap<usize, YahooClientState>,
|
||||
pub global: GlobalState,
|
||||
pub start_time: Instant,
|
||||
}
|
||||
@@ -128,6 +159,13 @@ pub struct InstanceState {
|
||||
pub failure_count: usize,
|
||||
pub connected_proxy: Option<ProxyInfo>,
|
||||
pub last_activity: Instant,
|
||||
pub yahoo_requests: usize,
|
||||
pub yahoo_success: usize,
|
||||
pub yahoo_failures: usize,
|
||||
pub yahoo_current_requests: u32,
|
||||
pub yahoo_max_requests: u32,
|
||||
pub yahoo_last_endpoint: Option<String>,
|
||||
pub yahoo_last_symbol: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -139,6 +177,19 @@ pub struct ProxyState {
|
||||
pub instances_using: Vec<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct YahooClientState {
|
||||
pub instance_id: usize,
|
||||
pub requests_total: usize,
|
||||
pub requests_successful: usize,
|
||||
pub requests_failed: usize,
|
||||
pub current_requests: u32,
|
||||
pub max_requests: u32,
|
||||
pub has_proxy: bool,
|
||||
pub last_activity: Instant,
|
||||
pub proxy_info: Option<ProxyInfo>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct GlobalState {
|
||||
pub total_requests: usize,
|
||||
@@ -149,6 +200,12 @@ pub struct GlobalState {
|
||||
pub navigation_timeouts: usize,
|
||||
pub bot_detection_hits: usize,
|
||||
pub proxy_failures: usize,
|
||||
pub total_yahoo_requests: usize,
|
||||
pub successful_yahoo_requests: usize,
|
||||
pub failed_yahoo_requests: usize,
|
||||
pub yahoo_batch_requests: usize,
|
||||
pub yahoo_session_renewals: usize,
|
||||
pub yahoo_client_count: usize,
|
||||
}
|
||||
|
||||
impl MonitoringState {
|
||||
@@ -156,6 +213,7 @@ impl MonitoringState {
|
||||
Self {
|
||||
instances: HashMap::new(),
|
||||
proxies: HashMap::new(),
|
||||
yahoo_clients: HashMap::new(),
|
||||
global: GlobalState {
|
||||
total_requests: 0,
|
||||
successful_requests: 0,
|
||||
@@ -165,6 +223,12 @@ impl MonitoringState {
|
||||
navigation_timeouts: 0,
|
||||
bot_detection_hits: 0,
|
||||
proxy_failures: 0,
|
||||
total_yahoo_requests: 0,
|
||||
successful_yahoo_requests: 0,
|
||||
failed_yahoo_requests: 0,
|
||||
yahoo_batch_requests: 0,
|
||||
yahoo_session_renewals: 0,
|
||||
yahoo_client_count: 0,
|
||||
},
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
@@ -175,18 +239,34 @@ impl MonitoringState {
|
||||
let instances: Vec<InstanceMetrics> = self
|
||||
.instances
|
||||
.values()
|
||||
.map(|inst| InstanceMetrics {
|
||||
id: inst.id,
|
||||
status: inst.status.clone(),
|
||||
current_task: inst.current_task.clone(),
|
||||
tasks_current_session: inst.tasks_current_session,
|
||||
tasks_max: inst.tasks_max,
|
||||
session_requests: inst.session_requests,
|
||||
total_requests: inst.total_requests,
|
||||
success_count: inst.success_count,
|
||||
failure_count: inst.failure_count,
|
||||
connected_proxy: inst.connected_proxy.clone(),
|
||||
last_activity: format_timestamp(inst.last_activity),
|
||||
.map(|inst| {
|
||||
let yahoo_success_rate = if inst.yahoo_success + inst.yahoo_failures > 0 {
|
||||
(inst.yahoo_success as f64 / (inst.yahoo_success + inst.yahoo_failures) as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
InstanceMetrics {
|
||||
id: inst.id,
|
||||
status: inst.status.clone(),
|
||||
current_task: inst.current_task.clone(),
|
||||
tasks_current_session: inst.tasks_current_session,
|
||||
tasks_max: inst.tasks_max,
|
||||
session_requests: inst.session_requests,
|
||||
total_requests: inst.total_requests,
|
||||
success_count: inst.success_count,
|
||||
failure_count: inst.failure_count,
|
||||
connected_proxy: inst.connected_proxy.clone(),
|
||||
last_activity: format_timestamp(inst.last_activity),
|
||||
yahoo_requests: inst.yahoo_requests,
|
||||
yahoo_success: inst.yahoo_success,
|
||||
yahoo_failures: inst.yahoo_failures,
|
||||
yahoo_success_rate,
|
||||
yahoo_current_requests: inst.yahoo_current_requests,
|
||||
yahoo_max_requests: inst.yahoo_max_requests,
|
||||
yahoo_last_endpoint: inst.yahoo_last_endpoint.clone(),
|
||||
yahoo_last_symbol: inst.yahoo_last_symbol.clone(),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
@@ -202,12 +282,34 @@ impl MonitoringState {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let yahoo_clients: Vec<YahooClientMetrics> = self
|
||||
.yahoo_clients
|
||||
.values()
|
||||
.map(|client| YahooClientMetrics {
|
||||
instance_id: client.instance_id,
|
||||
requests_total: client.requests_total,
|
||||
requests_successful: client.requests_successful,
|
||||
requests_failed: client.requests_failed,
|
||||
current_requests: client.current_requests,
|
||||
max_requests: client.max_requests,
|
||||
has_proxy: client.has_proxy,
|
||||
last_activity: format_timestamp(client.last_activity),
|
||||
proxy_info: client.proxy_info.clone(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let success_rate = if self.global.total_requests > 0 {
|
||||
(self.global.successful_requests as f64 / self.global.total_requests as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let yahoo_success_rate = if self.global.total_yahoo_requests > 0 {
|
||||
(self.global.successful_yahoo_requests as f64 / self.global.total_yahoo_requests as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
let global = GlobalMetrics {
|
||||
total_requests: self.global.total_requests,
|
||||
successful_requests: self.global.successful_requests,
|
||||
@@ -219,12 +321,20 @@ impl MonitoringState {
|
||||
bot_detection_hits: self.global.bot_detection_hits,
|
||||
proxy_failures: self.global.proxy_failures,
|
||||
uptime_seconds: self.start_time.elapsed().as_secs(),
|
||||
total_yahoo_requests: self.global.total_yahoo_requests,
|
||||
successful_yahoo_requests: self.global.successful_yahoo_requests,
|
||||
failed_yahoo_requests: self.global.failed_yahoo_requests,
|
||||
yahoo_success_rate,
|
||||
yahoo_batch_requests: self.global.yahoo_batch_requests,
|
||||
yahoo_session_renewals: self.global.yahoo_session_renewals,
|
||||
yahoo_client_count: self.global.yahoo_client_count,
|
||||
};
|
||||
|
||||
DashboardState {
|
||||
config,
|
||||
instances,
|
||||
proxies,
|
||||
yahoo_clients,
|
||||
global,
|
||||
logs,
|
||||
}
|
||||
@@ -233,7 +343,6 @@ impl MonitoringState {
|
||||
|
||||
fn format_timestamp(instant: Instant) -> String {
|
||||
use chrono::Local;
|
||||
// This is a placeholder - in real impl we'd track actual wall-clock time
|
||||
Local::now().format("%H:%M:%S").to_string()
|
||||
}
|
||||
|
||||
|
||||
@@ -76,6 +76,13 @@ impl MonitoringService {
|
||||
failure_count: 0,
|
||||
connected_proxy: proxy.clone(),
|
||||
last_activity: Instant::now(),
|
||||
yahoo_requests: 0,
|
||||
yahoo_success: 0,
|
||||
yahoo_failures: 0,
|
||||
yahoo_current_requests: 0,
|
||||
yahoo_max_requests: 0,
|
||||
yahoo_last_endpoint: None,
|
||||
yahoo_last_symbol: None,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -193,9 +200,9 @@ impl MonitoringService {
|
||||
if let Some(inst) = state.instances.get(&instance_id) {
|
||||
Some(SessionSummary {
|
||||
instance_id,
|
||||
session_start: "N/A".to_string(), // We'd need to track this
|
||||
session_start: "N/A".to_string(),
|
||||
session_end: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(),
|
||||
duration_seconds: 0, // We'd need to track session start time
|
||||
duration_seconds: 0,
|
||||
total_requests: old_request_count,
|
||||
successful_requests: inst.success_count,
|
||||
failed_requests: inst.failure_count,
|
||||
@@ -283,6 +290,154 @@ impl MonitoringService {
|
||||
self.log_info(format!("Pool rotation triggered: {}", reason)).await;
|
||||
}
|
||||
|
||||
// Yahoo API Events
|
||||
MonitoringEvent::YahooRequestStarted { instance_id, endpoint, symbol } => {
|
||||
let mut state = self.state.write().await;
|
||||
|
||||
// Update global Yahoo stats
|
||||
state.global.total_yahoo_requests += 1;
|
||||
|
||||
// Update instance stats
|
||||
if let Some(inst) = state.instances.get_mut(&instance_id) {
|
||||
inst.yahoo_requests += 1;
|
||||
inst.yahoo_current_requests += 1;
|
||||
inst.yahoo_last_endpoint = Some(endpoint.clone());
|
||||
inst.yahoo_last_symbol = symbol.clone();
|
||||
inst.last_activity = Instant::now();
|
||||
}
|
||||
|
||||
// Update Yahoo client stats
|
||||
if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
|
||||
client.requests_total += 1;
|
||||
client.current_requests += 1;
|
||||
client.last_activity = Instant::now();
|
||||
}
|
||||
|
||||
self.log_info(format!(
|
||||
"YahooClient[{}] started request: {} {}",
|
||||
instance_id,
|
||||
endpoint,
|
||||
symbol.unwrap_or_else(|| "search".to_string())
|
||||
)).await;
|
||||
}
|
||||
|
||||
MonitoringEvent::YahooRequestCompleted { instance_id, success, duration_ms, error } => {
|
||||
let mut state = self.state.write().await;
|
||||
|
||||
// Update global Yahoo stats
|
||||
if success {
|
||||
state.global.successful_yahoo_requests += 1;
|
||||
} else {
|
||||
state.global.failed_yahoo_requests += 1;
|
||||
}
|
||||
|
||||
// Update instance stats
|
||||
if let Some(inst) = state.instances.get_mut(&instance_id) {
|
||||
inst.yahoo_current_requests = inst.yahoo_current_requests.saturating_sub(1);
|
||||
if success {
|
||||
inst.yahoo_success += 1;
|
||||
} else {
|
||||
inst.yahoo_failures += 1;
|
||||
}
|
||||
inst.last_activity = Instant::now();
|
||||
}
|
||||
|
||||
// Update Yahoo client stats
|
||||
if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
|
||||
client.current_requests = client.current_requests.saturating_sub(1);
|
||||
if success {
|
||||
client.requests_successful += 1;
|
||||
} else {
|
||||
client.requests_failed += 1;
|
||||
}
|
||||
client.last_activity = Instant::now();
|
||||
}
|
||||
|
||||
if success {
|
||||
self.log_info(format!(
|
||||
"YahooClient[{}] completed request in {}ms",
|
||||
instance_id, duration_ms
|
||||
)).await;
|
||||
} else {
|
||||
self.log_error(format!(
|
||||
"YahooClient[{}] failed request in {}ms: {}",
|
||||
instance_id,
|
||||
duration_ms,
|
||||
error.unwrap_or_else(|| "unknown error".to_string())
|
||||
)).await;
|
||||
}
|
||||
}
|
||||
|
||||
MonitoringEvent::YahooBatchRequestStarted { count, symbols, endpoint } => {
|
||||
let mut state = self.state.write().await;
|
||||
state.global.yahoo_batch_requests += 1;
|
||||
|
||||
self.log_info(format!(
|
||||
"Yahoo batch request started: {} symbols, endpoint: {}",
|
||||
count, endpoint
|
||||
)).await;
|
||||
|
||||
if !symbols.is_empty() {
|
||||
self.log_debug(format!(
|
||||
"Batch symbols: {}",
|
||||
symbols.join(", ")
|
||||
)).await;
|
||||
}
|
||||
}
|
||||
|
||||
MonitoringEvent::YahooBatchRequestCompleted { successful, failed, total, duration_ms } => {
|
||||
let success_rate = if total > 0 {
|
||||
(successful as f64 / total as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
self.log_info(format!(
|
||||
"Yahoo batch completed: {}/{} successful ({:.1}%) in {}ms",
|
||||
successful, total, success_rate, duration_ms
|
||||
)).await;
|
||||
}
|
||||
|
||||
MonitoringEvent::YahooClientCreated { instance_id, has_proxy, max_requests } => {
|
||||
let mut state = self.state.write().await;
|
||||
state.global.yahoo_client_count += 1;
|
||||
|
||||
state.yahoo_clients.insert(
|
||||
instance_id,
|
||||
YahooClientState {
|
||||
instance_id,
|
||||
requests_total: 0,
|
||||
requests_successful: 0,
|
||||
requests_failed: 0,
|
||||
current_requests: 0,
|
||||
max_requests,
|
||||
has_proxy,
|
||||
last_activity: Instant::now(),
|
||||
proxy_info: None,
|
||||
},
|
||||
);
|
||||
|
||||
self.log_info(format!(
|
||||
"YahooClient[{}] created (proxy: {}, max requests: {})",
|
||||
instance_id, has_proxy, max_requests
|
||||
)).await;
|
||||
}
|
||||
|
||||
MonitoringEvent::YahooClientReset { instance_id, previous_requests, reason } => {
|
||||
let mut state = self.state.write().await;
|
||||
state.global.yahoo_session_renewals += 1;
|
||||
|
||||
if let Some(client) = state.yahoo_clients.get_mut(&instance_id) {
|
||||
client.current_requests = 0;
|
||||
client.last_activity = Instant::now();
|
||||
}
|
||||
|
||||
self.log_info(format!(
|
||||
"YahooClient[{}] reset (had {} requests, reason: {})",
|
||||
instance_id, previous_requests, reason
|
||||
)).await;
|
||||
}
|
||||
|
||||
MonitoringEvent::LogMessage { level, message } => {
|
||||
match level {
|
||||
crate::monitoring::events::LogLevel::Info => self.log_info(message).await,
|
||||
@@ -317,6 +472,17 @@ impl MonitoringService {
|
||||
}).await;
|
||||
}
|
||||
|
||||
async fn log_debug(&self, message: String) {
|
||||
// Only log debug if DEBUG_LOGGING is enabled
|
||||
if std::env::var("DEBUG_LOGGING").is_ok() {
|
||||
self.add_log(LogEntry {
|
||||
timestamp: Local::now().format("%H:%M:%S").to_string(),
|
||||
level: super::metrics::LogLevel::Info,
|
||||
message: format!("[DEBUG] {}", message),
|
||||
}).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn add_log(&self, entry: LogEntry) {
|
||||
let mut logs = self.logs.write().await;
|
||||
if logs.len() >= MAX_LOGS {
|
||||
|
||||
@@ -355,7 +355,7 @@ impl DockerVpnProxyPool {
|
||||
|
||||
pub fn get_proxy_url(&self, index: usize) -> String {
|
||||
let port = self.proxy_ports[index % self.proxy_ports.len()];
|
||||
format!("socks5://localhost:{}", port)
|
||||
format!("socks5h://localhost:{}", port)
|
||||
}
|
||||
|
||||
pub fn num_proxies(&self) -> usize {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod webdriver;
|
||||
pub mod docker_vpn_proxy;
|
||||
pub mod helpers;
|
||||
pub mod hard_reset;
|
||||
pub mod hard_reset;
|
||||
pub mod yahoo;
|
||||
|
||||
@@ -582,6 +582,9 @@ impl ChromeDriverPool {
|
||||
self.instances.len()
|
||||
}
|
||||
}
|
||||
pub fn get_proxy_pool(&self) -> Option<Arc<DockerVpnProxyPool>> {
|
||||
self.proxy_pool.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a single instance of chromedriver process, optionally bound to a VPN.
|
||||
|
||||
1349
src/scraper/yahoo.rs
Normal file
1349
src/scraper/yahoo.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -2,6 +2,7 @@ use std::path::{Path, PathBuf};
|
||||
use std::fs;
|
||||
|
||||
/// Central configuration for all data paths
|
||||
#[derive(Clone)]
|
||||
pub struct DataPaths {
|
||||
base_dir: PathBuf,
|
||||
data_dir: PathBuf,
|
||||
@@ -16,6 +17,7 @@ pub struct DataPaths {
|
||||
economic_events_dir: PathBuf,
|
||||
economic_changes_dir: PathBuf,
|
||||
// Corporate data subdirectories
|
||||
corporate_dir: PathBuf,
|
||||
corporate_events_dir: PathBuf,
|
||||
corporate_changes_dir: PathBuf,
|
||||
corporate_prices_dir: PathBuf,
|
||||
@@ -56,6 +58,7 @@ impl DataPaths {
|
||||
fs::create_dir_all(&cache_openvpn_dir)?;
|
||||
fs::create_dir_all(&economic_events_dir)?;
|
||||
fs::create_dir_all(&economic_changes_dir)?;
|
||||
fs::create_dir_all(&corporate_dir)?;
|
||||
fs::create_dir_all(&corporate_events_dir)?;
|
||||
fs::create_dir_all(&corporate_changes_dir)?;
|
||||
fs::create_dir_all(&corporate_prices_dir)?;
|
||||
@@ -71,6 +74,7 @@ impl DataPaths {
|
||||
cache_openvpn_dir,
|
||||
economic_events_dir,
|
||||
economic_changes_dir,
|
||||
corporate_dir,
|
||||
corporate_events_dir,
|
||||
corporate_changes_dir,
|
||||
corporate_prices_dir,
|
||||
@@ -119,6 +123,11 @@ impl DataPaths {
|
||||
&self.economic_changes_dir
|
||||
}
|
||||
|
||||
/// Get the corporate events directory
|
||||
pub fn corporate_dir(&self) -> &Path {
|
||||
&self.corporate_dir
|
||||
}
|
||||
|
||||
/// Get the corporate events directory
|
||||
pub fn corporate_events_dir(&self) -> &Path {
|
||||
&self.corporate_events_dir
|
||||
|
||||
Reference in New Issue
Block a user