From cd91de253b3ede6cc2b3abe8735d373d12c4b810 Mon Sep 17 00:00:00 2001 From: donpat1to Date: Thu, 18 Dec 2025 15:59:56 +0100 Subject: [PATCH] added pool rotation to chromedriver pool --- .env.example | 38 ++--- src/config.rs | 133 +++++++++++++--- src/corporate/update_parallel.rs | 39 ++++- src/corporate/yahoo.rs | 12 +- src/scraper/webdriver.rs | 262 +++++++++++++++++-------------- 5 files changed, 314 insertions(+), 170 deletions(-) diff --git a/.env.example b/.env.example index ffa621b..b1e705e 100644 --- a/.env.example +++ b/.env.example @@ -3,46 +3,38 @@ # This file configures the behavior of the WebScraper application # Copy to .env and adjust values as needed -# ===== ECONOMIC DATA ===== -# Start date for economic event scraping +OPENFIGI_API_KEY= + +# Economic calendar start (usually the earliest available on finanzen.net) ECONOMIC_START_DATE=2007-02-13 -# How far into the future to look ahead for economic events (in months) -ECONOMIC_LOOKAHEAD_MONTHS=3 - -# ===== CORPORATE DATA ===== -# Start date for corporate earnings/data scraping +# Corporate earnings & price history start CORPORATE_START_DATE=2010-01-01 -# ===== PERFORMANCE & CONCURRENCY ===== -# Maximum number of parallel ChromeDriver instances -# Higher = more concurrent tasks, but higher resource usage -MAX_PARALLEL_INSTANCES=3 +# How far into the future we scrape economic events (in months) +ECONOMIC_LOOKAHEAD_MONTHS=3 -# Maximum tasks per ChromeDriver instance before recycling -# 0 = unlimited (instance lives for entire application runtime) -MAX_TASKS_PER_INSTANCE=0 +# Maximum number of parallel scraping tasks (default: 10) +MAX_PARALLEL_TASKS=10 # ===== VPN ROTATION (ProtonVPN Integration) ===== # Enable automatic VPN rotation between sessions? # If false, all traffic goes through system without VPN tunneling -ENABLE_VPN_ROTATION=false - -# Comma-separated list of ProtonVPN servers to rotate through -# Examples: -# "US-Free#1,US-Free#2,UK-Free#1" -# "US,UK,JP,DE,NL" -# NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect -VPN_SERVERS= +ENABLE_VPN_ROTATION=true # Number of tasks per VPN session before rotating to new server/IP # 0 = rotate between economic and corporate phases (one phase = one IP) # 5 = rotate every 5 tasks # NOTE: Must have ENABLE_VPN_ROTATION=true for this to take effect -TASKS_PER_VPN_SESSION=0 +TASKS_PER_VPN_SESSION=50 # ===== LOGGING ===== # Set via RUST_LOG environment variable: # RUST_LOG=info cargo run # RUST_LOG=debug cargo run # Leave empty or unset for default logging level + + +MAX_REQUESTS_PER_SESSION=25 +MIN_REQUEST_INTERVAL_MS=300 +MAX_RETRY_ATTEMPTS=3 \ No newline at end of file diff --git a/src/config.rs b/src/config.rs index e1b8864..d9acc83 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,32 +1,49 @@ +// src/config.rs - FIXED VERSION + +use std::sync::{Arc, atomic::{AtomicUsize, Ordering}}; use anyhow::{Context, Result}; use chrono::{self}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Config { - // Economic calendar start (usually the earliest available on finanzen.net) - pub economic_start_date: String, // e.g. "2007-02-13" - // Corporate earnings & price history start - pub corporate_start_date: String, // e.g. "2000-01-01" or "2010-01-01" - // How far into the future we scrape economic events - pub economic_lookahead_months: u32, // default: 3 - /// Maximum number of parallel scraping tasks (default: 10). - /// This limits concurrency to protect system load and prevent website spamming. + pub economic_start_date: String, + pub corporate_start_date: String, + pub economic_lookahead_months: u32, + #[serde(default = "default_max_parallel_instances")] pub max_parallel_instances: usize, pub max_tasks_per_instance: usize, - /// VPN rotation configuration - /// If set to "true", enables automatic VPN rotation between sessions #[serde(default)] pub enable_vpn_rotation: bool, + + // IMPROVEMENT: Reduzierte Defaults für weniger aggressive Scraping + #[serde(default = "default_max_requests_per_session")] + pub max_requests_per_session: usize, + + #[serde(default = "default_min_request_interval_ms")] + pub min_request_interval_ms: u64, + + #[serde(default = "default_max_retry_attempts")] + pub max_retry_attempts: u32, } fn default_max_parallel_instances() -> usize { - 10 + 4 // Reduziert von 10 auf 4 } +fn default_max_requests_per_session() -> usize { + 10 // Reduziert von 25 auf 10 +} + +fn default_min_request_interval_ms() -> u64 { + 1200 // Erhöht von 300 auf 1200 +} + +fn default_max_retry_attempts() -> u32 { 3 } + impl Default for Config { fn default() -> Self { Self { @@ -35,26 +52,71 @@ impl Default for Config { economic_lookahead_months: 3, max_parallel_instances: default_max_parallel_instances(), max_tasks_per_instance: 0, + max_requests_per_session: default_max_requests_per_session(), + min_request_interval_ms: default_min_request_interval_ms(), + max_retry_attempts: default_max_retry_attempts(), enable_vpn_rotation: false, } } } +pub struct PoolMetrics { + pub total_requests: Arc, + pub successful_requests: Arc, + pub failed_requests: Arc, + pub session_renewals: Arc, + pub rotation_events: Arc, + pub retries: Arc, + + // IMPROVEMENT: Neue Metriken für besseres Monitoring + pub navigation_timeouts: Arc, + pub bot_detection_hits: Arc, + pub proxy_failures: Arc, +} + +impl PoolMetrics { + pub fn new() -> Self { + Self { + total_requests: Arc::new(AtomicUsize::new(0)), + successful_requests: Arc::new(AtomicUsize::new(0)), + failed_requests: Arc::new(AtomicUsize::new(0)), + session_renewals: Arc::new(AtomicUsize::new(0)), + rotation_events: Arc::new(AtomicUsize::new(0)), + retries: Arc::new(AtomicUsize::new(0)), + navigation_timeouts: Arc::new(AtomicUsize::new(0)), + bot_detection_hits: Arc::new(AtomicUsize::new(0)), + proxy_failures: Arc::new(AtomicUsize::new(0)), + } + } + + pub async fn log_stats(&self) { + let total = self.total_requests.load(Ordering::Relaxed); + let success = self.successful_requests.load(Ordering::Relaxed); + // FIX: Prefix unused variable with underscore + let _failed = self.failed_requests.load(Ordering::Relaxed); + let renewals = self.session_renewals.load(Ordering::Relaxed); + let rotations = self.rotation_events.load(Ordering::Relaxed); + let retries = self.retries.load(Ordering::Relaxed); + let timeouts = self.navigation_timeouts.load(Ordering::Relaxed); + let bot_hits = self.bot_detection_hits.load(Ordering::Relaxed); + let proxy_fails = self.proxy_failures.load(Ordering::Relaxed); + + let success_rate = if total > 0 { + (success as f64 / total as f64) * 100.0 + } else { + 0.0 + }; + + crate::util::logger::log_info(&format!( + "Pool Metrics: {} total requests, {:.1}% success rate, {} renewals, {} rotations, {} retries, {} timeouts, {} bot detections, {} proxy failures", + total, success_rate, renewals, rotations, retries, timeouts, bot_hits, proxy_fails + )).await; + } +} + impl Config { - /// Loads the configuration from environment variables using dotenvy. - /// - /// This function loads a `.env` file if present (via `dotenvy::dotenv()`), - /// then retrieves each configuration value from environment variables. - /// If a variable is missing, it falls back to the default value. - /// Variable names are uppercase with underscores (e.g., ECONOMIC_START_DATE). - /// - /// # Returns - /// The loaded Config on success. - /// - /// # Errors - /// Returns an error if parsing fails (e.g., invalid integer for lookahead months). + /// Loads configuration from environment variables using dotenvy. pub fn load() -> Result { - // Load .env file if it exists; ignore if not found (dotenvy::dotenv returns Ok if no file) let _ = dotenvy::dotenv().context("Failed to load .env file (optional)")?; let economic_start_date = dotenvy::var("ECONOMIC_START_DATE") @@ -68,13 +130,14 @@ impl Config { .parse() .context("Failed to parse ECONOMIC_LOOKAHEAD_MONTHS as u32")?; + // IMPROVEMENT: Reduzierte Defaults let max_parallel_instances: usize = dotenvy::var("MAX_PARALLEL_INSTANCES") - .unwrap_or_else(|_| "10".to_string()) + .unwrap_or_else(|_| "4".to_string()) // Geändert von 10 .parse() .context("Failed to parse MAX_PARALLEL_INSTANCES as usize")?; let max_tasks_per_instance: usize = dotenvy::var("MAX_TASKS_PER_INSTANCE") - .unwrap_or_else(|_| "0".to_string()) + .unwrap_or_else(|_| "5".to_string()) // Geändert von 0 .parse() .context("Failed to parse MAX_TASKS_PER_INSTANCE as usize")?; @@ -83,6 +146,21 @@ impl Config { .parse::() .context("Failed to parse ENABLE_VPN_ROTATION as bool")?; + let max_requests_per_session: usize = dotenvy::var("MAX_REQUESTS_PER_SESSION") + .unwrap_or_else(|_| "10".to_string()) // Geändert von 25 + .parse() + .context("Failed to parse MAX_REQUESTS_PER_SESSION as usize")?; + + let min_request_interval_ms: u64 = dotenvy::var("MIN_REQUEST_INTERVAL_MS") + .unwrap_or_else(|_| "1200".to_string()) // Geändert von 300 + .parse() + .context("Failed to parse MIN_REQUEST_INTERVAL_MS as u64")?; + + let max_retry_attempts: u32 = dotenvy::var("MAX_RETRY_ATTEMPTS") + .unwrap_or_else(|_| "3".to_string()) + .parse() + .context("Failed to parse MAX_RETRY_ATTEMPTS as u32")?; + Ok(Self { economic_start_date, corporate_start_date, @@ -90,6 +168,9 @@ impl Config { max_parallel_instances, max_tasks_per_instance, enable_vpn_rotation, + max_requests_per_session, + min_request_interval_ms, + max_retry_attempts, }) } diff --git a/src/corporate/update_parallel.rs b/src/corporate/update_parallel.rs index f6c340a..78eaf30 100644 --- a/src/corporate/update_parallel.rs +++ b/src/corporate/update_parallel.rs @@ -12,13 +12,18 @@ use crate::util::directories::DataPaths; use crate::util::logger; use crate::scraper::webdriver::ChromeDriverPool; +use rand::Rng; use tokio::sync::mpsc; use tokio::io::AsyncWriteExt; use tokio::fs::OpenOptions; +use tokio::time::sleep; use std::collections::HashMap; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::Duration; use futures::stream::{FuturesUnordered, StreamExt}; +use anyhow::{anyhow, Context, Result}; + /// Represents a write command to be serialized through the log writer enum LogCommand { @@ -413,6 +418,37 @@ pub async fn build_companies_jsonl_streaming_parallel( Ok(final_count) } +async fn scrape_with_retry( + pool: &Arc, + isin: &str, + max_retries: u32, +) -> Result> { + let mut retries = 0; + + loop { + match scrape_company_details_by_isin(pool, isin).await { + Ok(result) => return Ok(result), + Err(e) => { + if retries >= max_retries { + return Err(e); + } + + let backoff_ms = 1000 * 2u64.pow(retries); // 1s, 2s, 4s, 8s + let jitter_ms = rand::rng().random_range(0..500); // +0-500ms Jitter + let total_delay = backoff_ms + jitter_ms; + + logger::log_warn(&format!( + "Retry {}/{} for ISIN {} after {}ms: {}", + retries + 1, max_retries, isin, total_delay, e + )).await; + + sleep(Duration::from_millis(total_delay)).await; + retries += 1; + } + } + } +} + /// Process a single company: fetch Yahoo data for its ISINs async fn process_single_company( name: String, @@ -469,8 +505,7 @@ async fn process_single_company( if !has_yahoo_ticker && !shutdown_flag.load(Ordering::SeqCst) { logger::log_info(&format!("Fetching Yahoo details for {} (ISIN: {})", name, isin)).await; - - match scrape_company_details_by_isin(pool, &isin).await { + match scrape_with_retry(pool, &isin, 3).await { Ok(Some(details)) => { logger::log_info(&format!("✓ Found Yahoo ticker {} for ISIN {}", details.ticker, isin)).await; diff --git a/src/corporate/yahoo.rs b/src/corporate/yahoo.rs index f34c0b2..28c71c0 100644 --- a/src/corporate/yahoo.rs +++ b/src/corporate/yahoo.rs @@ -3,6 +3,7 @@ use super::{types::*, helpers::*}; use crate::{scraper::webdriver::*, util::{directories::DataPaths}}; use event_backtest_engine::logger; use fantoccini::{Client, Locator}; +use rand::Rng; use serde::{Deserialize, Serialize}; use tokio::time::{Duration as TokioDuration, sleep, timeout}; use std::{sync::Arc}; @@ -73,9 +74,16 @@ pub async fn scrape_company_details_by_isin( pool.execute(format!("https://finance.yahoo.com/lookup/?s={}", isin), move |client| { let isin = isin.clone(); Box::pin(async move { - sleep(TokioDuration::from_millis(1000)).await; + // Random Delay between 800-1500ms + let delay = rand::rng().random_range(800..1500); + sleep(TokioDuration::from_millis(delay)).await; + reject_yahoo_cookies(&client).await?; - sleep(TokioDuration::from_millis(1000)).await; + + // Random Delay + let delay = rand::rng().random_range(800..1500); + sleep(TokioDuration::from_millis(delay)).await; + extract_company_details(&client, &isin).await }) }).await diff --git a/src/scraper/webdriver.rs b/src/scraper/webdriver.rs index 5a744b4..97e201f 100644 --- a/src/scraper/webdriver.rs +++ b/src/scraper/webdriver.rs @@ -2,13 +2,14 @@ use anyhow::{anyhow, Context, Result}; use fantoccini::{Client, ClientBuilder}; -use rand::seq::{IndexedRandom, SliceRandom}; +use rand::seq::{IndexedRandom}; use rand::rngs::ThreadRng; use rand::Rng; // for the RNG trait use serde_json::{Map, Value}; use std::pin::Pin; use std::process::Stdio; use std::sync::Arc; +use std::time::Instant; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::{Child, Command}; use tokio::task::JoinHandle; @@ -26,6 +27,9 @@ pub struct ChromeDriverPool { rotation_enabled: bool, /// Index for round-robin instance selection (when rotation is enabled) next_instance: Arc>, + + last_request_time: Arc>, + min_request_interval_ms: u64, } impl ChromeDriverPool { @@ -94,11 +98,12 @@ impl ChromeDriverPool { } for i in 0..actual_pool_size { - let proxy_url = proxy_pool - .as_ref() - .map(|pp| pp.get_proxy_url(i)); - - let instance = ChromeInstance::new(proxy_url, max_tasks_per_instance).await?; + // Pass the entire proxy_pool and the index + let instance = ChromeInstance::new( + proxy_pool.clone(), // Clone the Arc + i, // This instance's proxy index + max_tasks_per_instance + ).await?; crate::util::logger::log_info(&format!(" Instance {} ready", i + 1)).await; instances.push(Arc::new(Mutex::new(instance))); @@ -110,18 +115,11 @@ impl ChromeDriverPool { proxy_pool, rotation_enabled, next_instance: Arc::new(Mutex::new(0)), + last_request_time: Arc::new(Mutex::new(Instant::now())), + min_request_interval_ms: 300, }) } - /// Execute a scraping task using an available instance from the pool. - /// - /// When rotation is enabled: - /// - Uses only half of the instances at a time - /// - Rotates to the other half when an instance reaches its task limit - /// - Cycles through instances in round-robin fashion within the active half - /// - /// When rotation is disabled: - /// - Uses all instances with random selection pub async fn execute(&self, url: String, parse: F) -> Result where T: Send + 'static, @@ -130,108 +128,113 @@ impl ChromeDriverPool { { let _permit = self.semaphore.acquire().await.map_err(|_| anyhow!("Pool closed"))?; - let index = if self.rotation_enabled { - // Rotation mode: use only half of instances at a time - let total_instances = self.instances.len(); - let half_size = (total_instances + 1) / 2; // Round up for odd numbers + { + let mut last_time = self.last_request_time.lock().await; + let elapsed = last_time.elapsed().as_millis() as u64; - let mut next_idx = self.next_instance.lock().await; - let base_idx = *next_idx; - let mut selected_idx = base_idx; - let mut found_in_current_half = false; - - // Try to find an available instance in the current half - for offset in 0..half_size { - let candidate_idx = (base_idx + offset) % half_size; + if elapsed < self.min_request_interval_ms { + let wait_ms = self.min_request_interval_ms - elapsed; + drop(last_time); // Lock vor Sleep freigeben! - // Check if this instance has reached its task limit - let instance = &self.instances[candidate_idx]; - let guard = instance.lock().await; + sleep(Duration::from_millis(wait_ms)).await; - if guard.max_tasks_per_instance == 0 || - guard.task_count < guard.max_tasks_per_instance { - // This instance is available - *next_idx = (candidate_idx + 1) % half_size; - selected_idx = candidate_idx; - found_in_current_half = true; - drop(guard); - break; - } else { - drop(guard); - } + let mut last_time = self.last_request_time.lock().await; + *last_time = Instant::now(); + } else { + *last_time = Instant::now(); } - - if !found_in_current_half { - // All instances in current half are at limit, switch to other half - crate::util::logger::log_info( - "Current half saturated, rotating to other half of instances" - ).await; - - let other_half_start = half_size; - let other_half_size = total_instances - half_size; - - // Find available instance in other half - let mut found_in_other_half = false; - for offset in 0..other_half_size { - let candidate_idx = other_half_start + offset; - - let instance = &self.instances[candidate_idx]; - let guard = instance.lock().await; - - if guard.max_tasks_per_instance == 0 || - guard.task_count < guard.max_tasks_per_instance { - // Switch to this half for future requests - *next_idx = offset; - selected_idx = candidate_idx; - found_in_other_half = true; - drop(guard); - break; - } else { - drop(guard); - } - } - - if !found_in_other_half { - // All instances saturated - use round-robin anyway - selected_idx = *next_idx % total_instances; - *next_idx = (*next_idx + 1) % total_instances; - } - } - - drop(next_idx); - selected_idx - } else { - // Non-rotation mode: random selection as before - rand::random_range(..self.instances.len()) - }; - - let instance = self.instances[index].clone(); - let mut guard = instance.lock().await; - - guard.increment_task_count(); - - if guard.max_tasks_per_instance > 0 { - crate::util::logger::log_info(&format!( - "Instance {} task count: {}/{}", - index, - guard.get_task_count(), - guard.max_tasks_per_instance - )) - .await; } - let client = guard.new_session().await?; + // Index-Auswahl (vereinfacht, siehe unten für vollständige Rotation) + let index = if self.rotation_enabled { + self.get_rotated_index().await? + } else { + rand::rng().random_range(0..self.instances.len()) + }; - drop(guard); // release lock early + let instance = &self.instances[index]; + let mut guard = instance.lock().await; + + // NEU: Session mit automatischer Erneuerung holen! + let client = guard.get_or_renew_session().await?; + + guard.increment_task_count(); + let (task_count, session_requests) = guard.get_session_stats().await; + + crate::util::logger::log_info(&format!( + "Instance {} executing task (tasks: {}/{}, session requests: {})", + index, task_count, guard.max_tasks_per_instance, session_requests + )).await; + + drop(guard); // Lock freigeben vor Navigation - crate::util::logger::log_info(&format!("Scraping {} ...", url)).await; - client.goto(&url).await.context("Navigation failed")?; + // Navigation mit Timeout + let navigation_result = timeout( + Duration::from_secs(60), + client.goto(&url) + ).await; + + match navigation_result { + Ok(Ok(_)) => { + crate::util::logger::log_info(&format!("✓ Navigated to {}", url)).await; + + // Parse-Funktion ausführen + parse(client).await + } + Ok(Err(e)) => { + crate::util::logger::log_error(&format!("Navigation failed: {}", e)).await; + Err(anyhow!("Navigation failed: {}", e)) + } + Err(_) => { + crate::util::logger::log_error("Navigation timeout (60s)").await; + Err(anyhow!("Navigation timeout")) + } + } + } - let result = timeout(Duration::from_secs(90), parse(client)) - .await - .context("Parse timeout")??; - - Ok(result) + async fn get_rotated_index(&self) -> Result { + let total = self.instances.len(); + let half_size = total / 2; + + if half_size == 0 { + return Ok(0); // Pool zu klein für Rotation + } + + let mut next_idx = self.next_instance.lock().await; + let current_half_start = if *next_idx < half_size { 0 } else { half_size }; + let current_half_end = if *next_idx < half_size { half_size } else { total }; + + // Suche verfügbare Instanz in aktueller Hälfte + for offset in 0..(current_half_end - current_half_start) { + let candidate_idx = current_half_start + ((*next_idx + offset) % half_size); + + let instance = &self.instances[candidate_idx]; + let guard = instance.lock().await; + + if guard.max_tasks_per_instance == 0 || + guard.task_count < guard.max_tasks_per_instance { + *next_idx = (candidate_idx + 1) % total; + drop(guard); + return Ok(candidate_idx); + } + } + + // Aktuelle Hälfte voll → Zur anderen wechseln + crate::util::logger::log_info("Current half saturated, rotating to other half").await; + + let new_half_start = if current_half_start == 0 { half_size } else { 0 }; + let new_half_end = if current_half_start == 0 { total } else { half_size }; + + // Alte Hälfte zurücksetzen (für nächste Rotation) + for i in current_half_start..current_half_end { + let mut instance = self.instances[i].lock().await; + instance.reset_task_count(); + } + + *next_idx = new_half_start; + drop(next_idx); + + Ok(new_half_start) } /// Gracefully shut down all ChromeDriver processes and Docker proxy containers. @@ -277,16 +280,24 @@ pub struct ChromeInstance { max_tasks_per_instance: usize, proxy_url: Option, - // NEU: Session-Management - current_session: Arc>>, + current_session: Arc>>, // Current active session session_request_count: Arc>, max_requests_per_session: usize, // z.B. 25 + + proxy_pool: Option>, // Referernce to the proxy pool + current_proxy_index: Arc>, // Current proxy index in use } impl ChromeInstance { - pub async fn new(proxy_url: Option, max_tasks_per_instance: usize) -> Result { + pub async fn new( + proxy_pool: Option>, + initial_proxy_index: usize, + max_tasks_per_instance: usize) -> Result { let (base_url, process, stderr_handle) = Self::spawn_chromedriver().await?; + // Get proxy URL if proxy pool is provided + let proxy_url = proxy_pool.as_ref().map(|pp| pp.get_proxy_url(initial_proxy_index)); + Ok(Self { base_url, process, @@ -294,10 +305,13 @@ impl ChromeInstance { task_count: 0, max_tasks_per_instance, proxy_url, - // NEU + current_session: Arc::new(Mutex::new(None)), session_request_count: Arc::new(Mutex::new(0)), max_requests_per_session: 25, // Konfigurierbar machen! + + proxy_pool, + current_proxy_index: Arc::new(Mutex::new(initial_proxy_index)), }) } @@ -338,9 +352,24 @@ impl ChromeInstance { } async fn create_fresh_session(&self) -> Result { - // WICHTIG: User-Agent hier wählen, nicht in chrome_args()! + // Hole aktuellen Proxy-URL ohne self zu mutieren + let proxy_url = if let Some(ref pool) = self.proxy_pool { + let mut proxy_idx = self.current_proxy_index.lock().await; + *proxy_idx = (*proxy_idx + 1) % pool.num_proxies(); + let url = pool.get_proxy_url(*proxy_idx); + + crate::util::logger::log_info(&format!( + "Using proxy {} for new session", + *proxy_idx + )).await; + + Some(url) + } else { + self.proxy_url.clone() + }; + let user_agent = Self::chrome_user_agent(); - let capabilities = self.chrome_args_with_ua(user_agent); + let capabilities = self.chrome_args_with_ua(user_agent, &proxy_url); ClientBuilder::native() .capabilities(capabilities) @@ -349,7 +378,7 @@ impl ChromeInstance { .context("Failed to connect to ChromeDriver") } - fn chrome_args_with_ua(&self, user_agent: &str) -> Map { + fn chrome_args_with_ua(&self, user_agent: &str, proxy_url: &Option) -> Map { let mut args = vec![ "--headless=new".to_string(), "--disable-gpu".to_string(), @@ -364,11 +393,10 @@ impl ChromeInstance { "--disable-default-apps".to_string(), "--disable-translate".to_string(), "--disable-blink-features=AutomationControlled".to_string(), - // User-Agent als Parameter! format!("--user-agent={}", user_agent), ]; - if let Some(ref proxy) = self.proxy_url { + if let Some(proxy) = proxy_url { args.push(format!("--proxy-server={}", proxy)); }