Files
WebScraper/src/config.rs

147 lines
5.1 KiB
Rust

// src/config.rs - FIXED VERSION
use anyhow::{Context, Result};
use chrono::{self};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
pub economic_start_date: String,
pub corporate_start_date: String,
pub economic_lookahead_months: u32,
#[serde(default = "default_max_parallel_instances")]
pub max_parallel_instances: usize,
pub max_tasks_per_instance: usize,
#[serde(default = "default_enable_vpn_rotation")]
pub enable_vpn_rotation: bool,
// IMPROVEMENT: Reduzierte Defaults für weniger aggressive Scraping
#[serde(default = "default_max_requests_per_session")]
pub max_requests_per_session: usize,
#[serde(default = "default_min_request_interval_ms")]
pub min_request_interval_ms: u64,
#[serde(default = "default_max_retry_attempts")]
pub max_retry_attempts: u32,
#[serde(default = "default_proxy_instances_per_certificate")]
pub proxy_instances_per_certificate: Option<usize>,
}
fn default_enable_vpn_rotation() -> bool {
false
}
fn default_max_parallel_instances() -> usize {
4
}
fn default_max_requests_per_session() -> usize {
10
}
fn default_min_request_interval_ms() -> u64 {
1200
}
fn default_max_retry_attempts() -> u32 { 3 }
fn default_proxy_instances_per_certificate() -> Option<usize> {
Some(1)
}
impl Default for Config {
fn default() -> Self {
Self {
economic_start_date: "2007-02-13".to_string(),
corporate_start_date: "2010-01-01".to_string(),
economic_lookahead_months: 3,
max_parallel_instances: default_max_parallel_instances(),
max_tasks_per_instance: 0,
max_requests_per_session: default_max_requests_per_session(),
min_request_interval_ms: default_min_request_interval_ms(),
max_retry_attempts: default_max_retry_attempts(),
enable_vpn_rotation: false,
proxy_instances_per_certificate: default_proxy_instances_per_certificate(),
}
}
}
impl Config {
/// Loads configuration from environment variables using dotenvy.
pub fn load() -> Result<Self> {
let _ = dotenvy::dotenv().context("Failed to load .env file (optional)")?;
let economic_start_date = dotenvy::var("ECONOMIC_START_DATE")
.unwrap_or_else(|_| "2007-02-13".to_string());
let corporate_start_date = dotenvy::var("CORPORATE_START_DATE")
.unwrap_or_else(|_| "2010-01-01".to_string());
let economic_lookahead_months: u32 = dotenvy::var("ECONOMIC_LOOKAHEAD_MONTHS")
.unwrap_or_else(|_| "3".to_string())
.parse()
.context("Failed to parse ECONOMIC_LOOKAHEAD_MONTHS as u32")?;
// IMPROVEMENT: Reduzierte Defaults
let max_parallel_instances: usize = dotenvy::var("MAX_PARALLEL_INSTANCES")
.unwrap_or_else(|_| "4".to_string()) // Geändert von 10
.parse()
.context("Failed to parse MAX_PARALLEL_INSTANCES as usize")?;
let max_tasks_per_instance: usize = dotenvy::var("MAX_TASKS_PER_INSTANCE")
.unwrap_or_else(|_| "5".to_string()) // Geändert von 0
.parse()
.context("Failed to parse MAX_TASKS_PER_INSTANCE as usize")?;
let enable_vpn_rotation = dotenvy::var("ENABLE_VPN_ROTATION")
.unwrap_or_else(|_| "false".to_string())
.parse::<bool>()
.context("Failed to parse ENABLE_VPN_ROTATION as bool")?;
let max_requests_per_session: usize = dotenvy::var("MAX_REQUESTS_PER_SESSION")
.unwrap_or_else(|_| "10".to_string()) // Geändert von 25
.parse()
.context("Failed to parse MAX_REQUESTS_PER_SESSION as usize")?;
let min_request_interval_ms: u64 = dotenvy::var("MIN_REQUEST_INTERVAL_MS")
.unwrap_or_else(|_| "1200".to_string()) // Geändert von 300
.parse()
.context("Failed to parse MIN_REQUEST_INTERVAL_MS as u64")?;
let max_retry_attempts: u32 = dotenvy::var("MAX_RETRY_ATTEMPTS")
.unwrap_or_else(|_| "3".to_string())
.parse()
.context("Failed to parse MAX_RETRY_ATTEMPTS as u32")?;
let proxy_instances_per_certificate: Option<usize> = match dotenvy::var("PROXY_INSTANCES_PER_CERTIFICATE") {
Ok(val) => Some(val.parse().context("Failed to parse PROXY_INSTANCES_PER_CERTIFICATE as usize")?),
Err(_) => Some(1),
};
Ok(Self {
economic_start_date,
corporate_start_date,
economic_lookahead_months,
max_parallel_instances,
max_tasks_per_instance,
enable_vpn_rotation,
max_requests_per_session,
min_request_interval_ms,
max_retry_attempts,
proxy_instances_per_certificate,
})
}
pub fn target_end_date(&self) -> String {
let now = chrono::Local::now().naive_local().date();
let future = now + chrono::Duration::days(30 * self.economic_lookahead_months as i64);
future.format("%Y-%m-%d").to_string()
}
}