added pool rotation to chromedriver pool
This commit is contained in:
@@ -12,13 +12,18 @@ use crate::util::directories::DataPaths;
|
||||
use crate::util::logger;
|
||||
use crate::scraper::webdriver::ChromeDriverPool;
|
||||
|
||||
use rand::Rng;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::fs::OpenOptions;
|
||||
use tokio::time::sleep;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::time::Duration;
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
|
||||
|
||||
/// Represents a write command to be serialized through the log writer
|
||||
enum LogCommand {
|
||||
@@ -413,6 +418,37 @@ pub async fn build_companies_jsonl_streaming_parallel(
|
||||
Ok(final_count)
|
||||
}
|
||||
|
||||
async fn scrape_with_retry(
|
||||
pool: &Arc<ChromeDriverPool>,
|
||||
isin: &str,
|
||||
max_retries: u32,
|
||||
) -> Result<Option<YahooCompanyDetails>> {
|
||||
let mut retries = 0;
|
||||
|
||||
loop {
|
||||
match scrape_company_details_by_isin(pool, isin).await {
|
||||
Ok(result) => return Ok(result),
|
||||
Err(e) => {
|
||||
if retries >= max_retries {
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
let backoff_ms = 1000 * 2u64.pow(retries); // 1s, 2s, 4s, 8s
|
||||
let jitter_ms = rand::rng().random_range(0..500); // +0-500ms Jitter
|
||||
let total_delay = backoff_ms + jitter_ms;
|
||||
|
||||
logger::log_warn(&format!(
|
||||
"Retry {}/{} for ISIN {} after {}ms: {}",
|
||||
retries + 1, max_retries, isin, total_delay, e
|
||||
)).await;
|
||||
|
||||
sleep(Duration::from_millis(total_delay)).await;
|
||||
retries += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a single company: fetch Yahoo data for its ISINs
|
||||
async fn process_single_company(
|
||||
name: String,
|
||||
@@ -469,8 +505,7 @@ async fn process_single_company(
|
||||
|
||||
if !has_yahoo_ticker && !shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_info(&format!("Fetching Yahoo details for {} (ISIN: {})", name, isin)).await;
|
||||
|
||||
match scrape_company_details_by_isin(pool, &isin).await {
|
||||
match scrape_with_retry(pool, &isin, 3).await {
|
||||
Ok(Some(details)) => {
|
||||
logger::log_info(&format!("✓ Found Yahoo ticker {} for ISIN {}", details.ticker, isin)).await;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user