added pool rotation to chromedriver pool
This commit is contained in:
133
src/config.rs
133
src/config.rs
@@ -1,32 +1,49 @@
|
||||
// src/config.rs - FIXED VERSION
|
||||
|
||||
use std::sync::{Arc, atomic::{AtomicUsize, Ordering}};
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::{self};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Config {
|
||||
// Economic calendar start (usually the earliest available on finanzen.net)
|
||||
pub economic_start_date: String, // e.g. "2007-02-13"
|
||||
// Corporate earnings & price history start
|
||||
pub corporate_start_date: String, // e.g. "2000-01-01" or "2010-01-01"
|
||||
// How far into the future we scrape economic events
|
||||
pub economic_lookahead_months: u32, // default: 3
|
||||
/// Maximum number of parallel scraping tasks (default: 10).
|
||||
/// This limits concurrency to protect system load and prevent website spamming.
|
||||
pub economic_start_date: String,
|
||||
pub corporate_start_date: String,
|
||||
pub economic_lookahead_months: u32,
|
||||
|
||||
#[serde(default = "default_max_parallel_instances")]
|
||||
pub max_parallel_instances: usize,
|
||||
|
||||
pub max_tasks_per_instance: usize,
|
||||
|
||||
/// VPN rotation configuration
|
||||
/// If set to "true", enables automatic VPN rotation between sessions
|
||||
#[serde(default)]
|
||||
pub enable_vpn_rotation: bool,
|
||||
|
||||
// IMPROVEMENT: Reduzierte Defaults für weniger aggressive Scraping
|
||||
#[serde(default = "default_max_requests_per_session")]
|
||||
pub max_requests_per_session: usize,
|
||||
|
||||
#[serde(default = "default_min_request_interval_ms")]
|
||||
pub min_request_interval_ms: u64,
|
||||
|
||||
#[serde(default = "default_max_retry_attempts")]
|
||||
pub max_retry_attempts: u32,
|
||||
}
|
||||
|
||||
fn default_max_parallel_instances() -> usize {
|
||||
10
|
||||
4 // Reduziert von 10 auf 4
|
||||
}
|
||||
|
||||
fn default_max_requests_per_session() -> usize {
|
||||
10 // Reduziert von 25 auf 10
|
||||
}
|
||||
|
||||
fn default_min_request_interval_ms() -> u64 {
|
||||
1200 // Erhöht von 300 auf 1200
|
||||
}
|
||||
|
||||
fn default_max_retry_attempts() -> u32 { 3 }
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
@@ -35,26 +52,71 @@ impl Default for Config {
|
||||
economic_lookahead_months: 3,
|
||||
max_parallel_instances: default_max_parallel_instances(),
|
||||
max_tasks_per_instance: 0,
|
||||
max_requests_per_session: default_max_requests_per_session(),
|
||||
min_request_interval_ms: default_min_request_interval_ms(),
|
||||
max_retry_attempts: default_max_retry_attempts(),
|
||||
enable_vpn_rotation: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct PoolMetrics {
|
||||
pub total_requests: Arc<AtomicUsize>,
|
||||
pub successful_requests: Arc<AtomicUsize>,
|
||||
pub failed_requests: Arc<AtomicUsize>,
|
||||
pub session_renewals: Arc<AtomicUsize>,
|
||||
pub rotation_events: Arc<AtomicUsize>,
|
||||
pub retries: Arc<AtomicUsize>,
|
||||
|
||||
// IMPROVEMENT: Neue Metriken für besseres Monitoring
|
||||
pub navigation_timeouts: Arc<AtomicUsize>,
|
||||
pub bot_detection_hits: Arc<AtomicUsize>,
|
||||
pub proxy_failures: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl PoolMetrics {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
total_requests: Arc::new(AtomicUsize::new(0)),
|
||||
successful_requests: Arc::new(AtomicUsize::new(0)),
|
||||
failed_requests: Arc::new(AtomicUsize::new(0)),
|
||||
session_renewals: Arc::new(AtomicUsize::new(0)),
|
||||
rotation_events: Arc::new(AtomicUsize::new(0)),
|
||||
retries: Arc::new(AtomicUsize::new(0)),
|
||||
navigation_timeouts: Arc::new(AtomicUsize::new(0)),
|
||||
bot_detection_hits: Arc::new(AtomicUsize::new(0)),
|
||||
proxy_failures: Arc::new(AtomicUsize::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn log_stats(&self) {
|
||||
let total = self.total_requests.load(Ordering::Relaxed);
|
||||
let success = self.successful_requests.load(Ordering::Relaxed);
|
||||
// FIX: Prefix unused variable with underscore
|
||||
let _failed = self.failed_requests.load(Ordering::Relaxed);
|
||||
let renewals = self.session_renewals.load(Ordering::Relaxed);
|
||||
let rotations = self.rotation_events.load(Ordering::Relaxed);
|
||||
let retries = self.retries.load(Ordering::Relaxed);
|
||||
let timeouts = self.navigation_timeouts.load(Ordering::Relaxed);
|
||||
let bot_hits = self.bot_detection_hits.load(Ordering::Relaxed);
|
||||
let proxy_fails = self.proxy_failures.load(Ordering::Relaxed);
|
||||
|
||||
let success_rate = if total > 0 {
|
||||
(success as f64 / total as f64) * 100.0
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Pool Metrics: {} total requests, {:.1}% success rate, {} renewals, {} rotations, {} retries, {} timeouts, {} bot detections, {} proxy failures",
|
||||
total, success_rate, renewals, rotations, retries, timeouts, bot_hits, proxy_fails
|
||||
)).await;
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Loads the configuration from environment variables using dotenvy.
|
||||
///
|
||||
/// This function loads a `.env` file if present (via `dotenvy::dotenv()`),
|
||||
/// then retrieves each configuration value from environment variables.
|
||||
/// If a variable is missing, it falls back to the default value.
|
||||
/// Variable names are uppercase with underscores (e.g., ECONOMIC_START_DATE).
|
||||
///
|
||||
/// # Returns
|
||||
/// The loaded Config on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if parsing fails (e.g., invalid integer for lookahead months).
|
||||
/// Loads configuration from environment variables using dotenvy.
|
||||
pub fn load() -> Result<Self> {
|
||||
// Load .env file if it exists; ignore if not found (dotenvy::dotenv returns Ok if no file)
|
||||
let _ = dotenvy::dotenv().context("Failed to load .env file (optional)")?;
|
||||
|
||||
let economic_start_date = dotenvy::var("ECONOMIC_START_DATE")
|
||||
@@ -68,13 +130,14 @@ impl Config {
|
||||
.parse()
|
||||
.context("Failed to parse ECONOMIC_LOOKAHEAD_MONTHS as u32")?;
|
||||
|
||||
// IMPROVEMENT: Reduzierte Defaults
|
||||
let max_parallel_instances: usize = dotenvy::var("MAX_PARALLEL_INSTANCES")
|
||||
.unwrap_or_else(|_| "10".to_string())
|
||||
.unwrap_or_else(|_| "4".to_string()) // Geändert von 10
|
||||
.parse()
|
||||
.context("Failed to parse MAX_PARALLEL_INSTANCES as usize")?;
|
||||
|
||||
let max_tasks_per_instance: usize = dotenvy::var("MAX_TASKS_PER_INSTANCE")
|
||||
.unwrap_or_else(|_| "0".to_string())
|
||||
.unwrap_or_else(|_| "5".to_string()) // Geändert von 0
|
||||
.parse()
|
||||
.context("Failed to parse MAX_TASKS_PER_INSTANCE as usize")?;
|
||||
|
||||
@@ -83,6 +146,21 @@ impl Config {
|
||||
.parse::<bool>()
|
||||
.context("Failed to parse ENABLE_VPN_ROTATION as bool")?;
|
||||
|
||||
let max_requests_per_session: usize = dotenvy::var("MAX_REQUESTS_PER_SESSION")
|
||||
.unwrap_or_else(|_| "10".to_string()) // Geändert von 25
|
||||
.parse()
|
||||
.context("Failed to parse MAX_REQUESTS_PER_SESSION as usize")?;
|
||||
|
||||
let min_request_interval_ms: u64 = dotenvy::var("MIN_REQUEST_INTERVAL_MS")
|
||||
.unwrap_or_else(|_| "1200".to_string()) // Geändert von 300
|
||||
.parse()
|
||||
.context("Failed to parse MIN_REQUEST_INTERVAL_MS as u64")?;
|
||||
|
||||
let max_retry_attempts: u32 = dotenvy::var("MAX_RETRY_ATTEMPTS")
|
||||
.unwrap_or_else(|_| "3".to_string())
|
||||
.parse()
|
||||
.context("Failed to parse MAX_RETRY_ATTEMPTS as u32")?;
|
||||
|
||||
Ok(Self {
|
||||
economic_start_date,
|
||||
corporate_start_date,
|
||||
@@ -90,6 +168,9 @@ impl Config {
|
||||
max_parallel_instances,
|
||||
max_tasks_per_instance,
|
||||
enable_vpn_rotation,
|
||||
max_requests_per_session,
|
||||
min_request_interval_ms,
|
||||
max_retry_attempts,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user