added hard reset for navigation timeout after 3 hours
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
// src/scraper/webdriver.rs
|
||||
use super::helpers::*;
|
||||
use super::hard_reset::HardResetController;
|
||||
use super::docker_vpn_proxy::DockerVpnProxyPool;
|
||||
use crate::Config;
|
||||
use crate::logger;
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use fantoccini::{Client, ClientBuilder};
|
||||
@@ -13,8 +17,6 @@ use tokio::process::{Child, Command};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::sync::{Mutex, Semaphore};
|
||||
use tokio::time::{sleep, timeout, Duration};
|
||||
use crate::scraper::docker_vpn_proxy::{DockerVpnProxyPool};
|
||||
use crate::Config;
|
||||
|
||||
/// Manages a pool of ChromeDriver instances for parallel scraping with optional VPN binding.
|
||||
pub struct ChromeDriverPool {
|
||||
@@ -31,10 +33,16 @@ pub struct ChromeDriverPool {
|
||||
min_request_interval_ms: u64,
|
||||
|
||||
monitoring: Option<crate::monitoring::MonitoringHandle>,
|
||||
hard_reset_controller: Arc<HardResetController>,
|
||||
config: Arc<Config>,
|
||||
}
|
||||
|
||||
impl ChromeDriverPool {
|
||||
/// Creates a new pool without any proxy (direct connection).
|
||||
/// When consecutive errors reach this value, execute() will return a special error
|
||||
/// that signals the caller to trigger a hard reset
|
||||
const HARD_RESET_ERROR_THRESHOLD: usize = 12;
|
||||
|
||||
/// Creates a new pool without any proxy (direct connection).
|
||||
pub async fn _new(config: &Config, monitoring: Option<crate::monitoring::MonitoringHandle>,) -> Result<Self> {
|
||||
Self::new_with_proxy_and_task_limit(None, config, monitoring).await
|
||||
}
|
||||
@@ -85,6 +93,11 @@ impl ChromeDriverPool {
|
||||
|
||||
// Rotation is enabled when task limiting is active
|
||||
let rotation_enabled = task_per_instance_limit > 0;
|
||||
let half_size = if rotation_enabled {
|
||||
(actual_pool_size + 1) / 2 // Runde auf bei ungerader Zahl
|
||||
} else {
|
||||
actual_pool_size
|
||||
};
|
||||
|
||||
let mut instances = Vec::with_capacity(actual_pool_size);
|
||||
|
||||
@@ -105,8 +118,8 @@ impl ChromeDriverPool {
|
||||
for i in 0..actual_pool_size {
|
||||
// Pass the entire proxy_pool and the index
|
||||
let instance = ChromeInstance::new(
|
||||
proxy_pool.clone(), // Clone the Arc
|
||||
i, // This instance's proxy index
|
||||
proxy_pool.clone(),
|
||||
i,
|
||||
config,
|
||||
monitoring.clone(),
|
||||
).await?;
|
||||
@@ -162,15 +175,21 @@ impl ChromeDriverPool {
|
||||
|
||||
let min_request_interval_ms = config.min_request_interval_ms;
|
||||
|
||||
let hard_reset_controller = Arc::new(HardResetController::new());
|
||||
|
||||
let config_clone = Arc::new(config.clone());
|
||||
|
||||
Ok(Self {
|
||||
instances,
|
||||
semaphore: Arc::new(Semaphore::new(actual_pool_size)),
|
||||
semaphore: Arc::new(Semaphore::new(half_size)),
|
||||
proxy_pool,
|
||||
rotation_enabled,
|
||||
next_instance: Arc::new(Mutex::new(0)),
|
||||
last_request_time: Arc::new(Mutex::new(Instant::now())),
|
||||
min_request_interval_ms,
|
||||
monitoring,
|
||||
hard_reset_controller,
|
||||
config: config_clone,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -188,10 +207,8 @@ impl ChromeDriverPool {
|
||||
|
||||
if elapsed < self.min_request_interval_ms {
|
||||
let wait_ms = self.min_request_interval_ms - elapsed;
|
||||
drop(last_time); // Lock vor Sleep freigeben!
|
||||
|
||||
drop(last_time);
|
||||
sleep(Duration::from_millis(wait_ms)).await;
|
||||
|
||||
let mut last_time = self.last_request_time.lock().await;
|
||||
*last_time = Instant::now();
|
||||
} else {
|
||||
@@ -199,12 +216,20 @@ impl ChromeDriverPool {
|
||||
}
|
||||
}
|
||||
|
||||
let random_index = random_range(0, self.instances.len() as u64) as usize;
|
||||
// Index-Auswahl (vereinfacht, siehe unten für vollständige Rotation)
|
||||
let index = if self.rotation_enabled {
|
||||
self.get_rotated_index().await?
|
||||
let instance = if self.rotation_enabled {
|
||||
self.select_instance_with_rotation().await?
|
||||
} else {
|
||||
random_index
|
||||
self.select_instance_round_robin().await
|
||||
};
|
||||
|
||||
{
|
||||
let mut inst = instance.lock().await;
|
||||
inst.increment_task_count();
|
||||
}
|
||||
|
||||
let index: usize = {
|
||||
let instances = &self.instances;
|
||||
instances.iter().position(|inst| Arc::ptr_eq(inst, &instance)).unwrap_or(0)
|
||||
};
|
||||
|
||||
if let Some(ref mon) = self.monitoring {
|
||||
@@ -216,15 +241,10 @@ impl ChromeDriverPool {
|
||||
instance_id: index,
|
||||
status: crate::monitoring::InstanceStatusChange::Active,
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let instance = &self.instances[index];
|
||||
let mut guard = instance.lock().await;
|
||||
|
||||
// NEU: Session mit automatischer Erneuerung holen!
|
||||
let client = guard.get_or_renew_session().await?;
|
||||
|
||||
guard.increment_task_count();
|
||||
let (task_count, session_requests) = guard.get_session_stats().await;
|
||||
|
||||
crate::util::logger::log_info(&format!(
|
||||
@@ -232,17 +252,17 @@ impl ChromeDriverPool {
|
||||
index, task_count, guard.max_tasks_per_instance, session_requests
|
||||
)).await;
|
||||
|
||||
drop(guard); // Lock freigeben vor Navigation
|
||||
drop(guard);
|
||||
|
||||
let start_time = Instant::now();
|
||||
|
||||
// Navigation mit Timeout
|
||||
// Navigation with timeout
|
||||
let navigation_result = timeout(
|
||||
Duration::from_secs(60),
|
||||
client.goto(&url)
|
||||
).await;
|
||||
|
||||
match navigation_result {
|
||||
let result = match navigation_result {
|
||||
Ok(Ok(_)) => {
|
||||
if let Some(ref mon) = self.monitoring {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::TaskCompleted {
|
||||
@@ -258,14 +278,111 @@ impl ChromeDriverPool {
|
||||
}
|
||||
crate::util::logger::log_info(&format!("✓ Navigated to {}", url)).await;
|
||||
|
||||
// Parse-Funktion ausführen
|
||||
parse(client).await
|
||||
// Execute parse function
|
||||
match parse(client).await {
|
||||
Ok(data) => {
|
||||
// ✅ SUCCESS: Record and log
|
||||
let prev_count = self.hard_reset_controller.get_count();
|
||||
self.hard_reset_controller.record_success();
|
||||
|
||||
if prev_count > 0 {
|
||||
logger::log_info(&format!(
|
||||
"✓ Success - reset counter cleared (was: {}/{})",
|
||||
prev_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
)).await;
|
||||
}
|
||||
|
||||
Ok(data)
|
||||
}
|
||||
Err(e) => {
|
||||
// ❌ PARSE ERROR: Record, check threshold, invalidate session
|
||||
let error_count = self.hard_reset_controller.record_error();
|
||||
|
||||
{
|
||||
let mut inst = instance.lock().await;
|
||||
inst.invalidate_current_session().await;
|
||||
}
|
||||
|
||||
// Enhanced logging with threshold status
|
||||
let threshold_pct = (error_count as f64 / Self::HARD_RESET_ERROR_THRESHOLD as f64) * 100.0;
|
||||
logger::log_warn(&format!(
|
||||
"Parse error. Reset counter: {}/{} ({:.0}%)",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD,
|
||||
threshold_pct
|
||||
)).await;
|
||||
|
||||
// Check if threshold reached
|
||||
if error_count >= Self::HARD_RESET_ERROR_THRESHOLD {
|
||||
logger::log_error(&format!(
|
||||
"🔴 HARD RESET THRESHOLD REACHED ({}/{})",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
)).await;
|
||||
|
||||
return Err(anyhow!(
|
||||
"HARD_RESET_REQUIRED: Parse failed: {}. Threshold reached ({}/{})",
|
||||
e,
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
));
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"Parse failed: {}. Hard reset at {}/{}",
|
||||
e,
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
// ❌ NAVIGATION ERROR: Record, check threshold, invalidate session
|
||||
crate::util::logger::log_error(&format!("Navigation failed: {}", e)).await;
|
||||
Err(anyhow!("Navigation failed: {}", e))
|
||||
|
||||
{
|
||||
let mut inst = instance.lock().await;
|
||||
inst.invalidate_current_session().await;
|
||||
}
|
||||
|
||||
let error_count = self.hard_reset_controller.record_error();
|
||||
|
||||
// Enhanced logging
|
||||
let threshold_pct = (error_count as f64 / Self::HARD_RESET_ERROR_THRESHOLD as f64) * 100.0;
|
||||
logger::log_warn(&format!(
|
||||
"Navigation error. Reset counter: {}/{} ({:.0}%)",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD,
|
||||
threshold_pct
|
||||
)).await;
|
||||
|
||||
// Check if threshold reached
|
||||
if error_count >= Self::HARD_RESET_ERROR_THRESHOLD {
|
||||
logger::log_error(&format!(
|
||||
"🔴 HARD RESET THRESHOLD REACHED ({}/{})",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
)).await;
|
||||
|
||||
return Err(anyhow!(
|
||||
"HARD_RESET_REQUIRED: Navigation failed: {}. Threshold reached ({}/{})",
|
||||
e,
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
));
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"Navigation failed: {}. Hard reset at {}/{}",
|
||||
e,
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
))
|
||||
}
|
||||
Err(_) => {
|
||||
// ❌ TIMEOUT ERROR: Record, check threshold, invalidate session
|
||||
if let Some(ref mon) = self.monitoring {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::NavigationTimeout {
|
||||
instance_id: index,
|
||||
@@ -273,55 +390,138 @@ impl ChromeDriverPool {
|
||||
});
|
||||
}
|
||||
|
||||
let error_count = self.hard_reset_controller.record_error();
|
||||
|
||||
crate::util::logger::log_error("Navigation timeout (60s)").await;
|
||||
Err(anyhow!("Navigation timeout"))
|
||||
|
||||
{
|
||||
let mut inst = instance.lock().await;
|
||||
inst.invalidate_current_session().await;
|
||||
}
|
||||
|
||||
// Enhanced logging
|
||||
let threshold_pct = (error_count as f64 / Self::HARD_RESET_ERROR_THRESHOLD as f64) * 100.0;
|
||||
logger::log_warn(&format!(
|
||||
"Timeout error. Reset counter: {}/{} ({:.0}%)",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD,
|
||||
threshold_pct
|
||||
)).await;
|
||||
|
||||
// Check if threshold reached
|
||||
if error_count >= Self::HARD_RESET_ERROR_THRESHOLD {
|
||||
logger::log_error(&format!(
|
||||
"🔴 HARD RESET THRESHOLD REACHED ({}/{})",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
)).await;
|
||||
|
||||
return Err(anyhow!(
|
||||
"HARD_RESET_REQUIRED: Navigation timeout. Threshold reached ({}/{})",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
));
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"Navigation timeout. Hard reset at {}/{}",
|
||||
error_count,
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
{
|
||||
let mut inst = instance.lock().await;
|
||||
inst.task_count = inst.task_count.saturating_sub(1);
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
async fn get_rotated_index(&self) -> Result<usize> {
|
||||
let total = self.instances.len();
|
||||
let half_size = total / 2;
|
||||
/// Simple round-robin instance selection (no rotation)
|
||||
async fn select_instance_round_robin(&self) -> Arc<Mutex<ChromeInstance>> {
|
||||
let mut next = self.next_instance.lock().await;
|
||||
let index = *next;
|
||||
*next = (*next + 1) % self.instances.len();
|
||||
drop(next);
|
||||
|
||||
Arc::clone(&self.instances[index])
|
||||
}
|
||||
|
||||
/// Round-robin with half-pool rotation
|
||||
async fn select_instance_with_rotation(&self) -> Result<Arc<Mutex<ChromeInstance>>> {
|
||||
let pool_size = self.instances.len();
|
||||
let half_size = pool_size / 2;
|
||||
|
||||
if half_size == 0 {
|
||||
return Ok(0); // Pool zu klein für Rotation
|
||||
// Pool too small for rotation, fall back to simple round-robin
|
||||
return Ok(self.select_instance_round_robin().await);
|
||||
}
|
||||
|
||||
let mut next_idx = self.next_instance.lock().await;
|
||||
let current_half_start = if *next_idx < half_size { 0 } else { half_size };
|
||||
let current_half_end = if *next_idx < half_size { half_size } else { total };
|
||||
let mut next = self.next_instance.lock().await;
|
||||
let current_half_start = (*next / half_size) * half_size;
|
||||
let current_half_end = (current_half_start + half_size).min(pool_size);
|
||||
|
||||
// Suche verfügbare Instanz in aktueller Hälfte
|
||||
for offset in 0..(current_half_end - current_half_start) {
|
||||
let candidate_idx = current_half_start + ((*next_idx + offset) % half_size);
|
||||
// Try to find available instance in current half
|
||||
let mut attempts = 0;
|
||||
let max_attempts = half_size * 2; // Try both halves
|
||||
|
||||
while attempts < max_attempts {
|
||||
let index = current_half_start + (*next % half_size);
|
||||
let instance = &self.instances[index];
|
||||
|
||||
let instance = &self.instances[candidate_idx];
|
||||
let guard = instance.lock().await;
|
||||
// Check if instance can accept more tasks
|
||||
let mut inst = instance.lock().await;
|
||||
let can_accept = inst.get_task_count() < inst.max_tasks_per_instance;
|
||||
drop(inst);
|
||||
|
||||
if guard.max_tasks_per_instance == 0 ||
|
||||
guard.task_count < guard.max_tasks_per_instance {
|
||||
*next_idx = (candidate_idx + 1) % total;
|
||||
drop(guard);
|
||||
return Ok(candidate_idx);
|
||||
if can_accept {
|
||||
*next = (*next + 1) % pool_size;
|
||||
drop(next);
|
||||
|
||||
if let Some(ref mon) = self.monitoring {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::InstanceSelected {
|
||||
instance_id: index,
|
||||
half: if index < half_size { 1 } else { 2 },
|
||||
});
|
||||
}
|
||||
|
||||
return Ok(Arc::clone(instance));
|
||||
}
|
||||
|
||||
// Current half saturated, try other half
|
||||
if attempts == half_size - 1 {
|
||||
logger::log_info("Current half saturated, rotating to other half").await;
|
||||
*next = if current_half_start == 0 { half_size } else { 0 };
|
||||
} else {
|
||||
*next = (*next + 1) % pool_size;
|
||||
}
|
||||
|
||||
attempts += 1;
|
||||
}
|
||||
|
||||
// Aktuelle Hälfte voll → Zur anderen wechseln
|
||||
crate::util::logger::log_info("Current half saturated, rotating to other half").await;
|
||||
drop(next);
|
||||
|
||||
let new_half_start = if current_half_start == 0 { half_size } else { 0 };
|
||||
let new_half_end = if current_half_start == 0 { total } else { half_size };
|
||||
|
||||
// Alte Hälfte zurücksetzen (für nächste Rotation)
|
||||
for i in current_half_start..current_half_end {
|
||||
let mut instance = self.instances[i].lock().await;
|
||||
instance.reset_task_count();
|
||||
}
|
||||
|
||||
*next_idx = new_half_start;
|
||||
drop(next_idx);
|
||||
|
||||
Ok(new_half_start)
|
||||
// All instances saturated
|
||||
Err(anyhow!("All instances at task capacity"))
|
||||
}
|
||||
|
||||
pub fn get_reset_controller(&self) -> Arc<HardResetController> {
|
||||
Arc::clone(&self.hard_reset_controller)
|
||||
}
|
||||
|
||||
/// Check if hard reset threshold has been reached
|
||||
pub fn should_perform_hard_reset(&self) -> bool {
|
||||
self.hard_reset_controller.get_count() >= Self::HARD_RESET_ERROR_THRESHOLD
|
||||
}
|
||||
|
||||
/// Get current error count and threshold for monitoring
|
||||
pub fn get_reset_status(&self) -> (usize, usize) {
|
||||
(
|
||||
self.hard_reset_controller.get_count(),
|
||||
Self::HARD_RESET_ERROR_THRESHOLD
|
||||
)
|
||||
}
|
||||
|
||||
/// Gracefully shut down all ChromeDriver processes and Docker proxy containers.
|
||||
@@ -369,7 +569,7 @@ pub struct ChromeInstance {
|
||||
|
||||
current_session: Arc<Mutex<Option<Client>>>, // Current active session
|
||||
session_request_count: Arc<Mutex<usize>>,
|
||||
max_requests_per_session: usize, // z.B. 25
|
||||
max_requests_per_session: usize,
|
||||
|
||||
proxy_pool: Option<Arc<DockerVpnProxyPool>>, // Referernce to the proxy pool
|
||||
current_proxy_index: Arc<Mutex<usize>>, // Current proxy index in use
|
||||
@@ -411,8 +611,6 @@ impl ChromeInstance {
|
||||
pub async fn get_or_renew_session(&self) -> Result<Client> {
|
||||
let mut session_opt = self.current_session.lock().await;
|
||||
let mut request_count = self.session_request_count.lock().await;
|
||||
|
||||
let old_request_count = *request_count;
|
||||
|
||||
// Session erneuern wenn:
|
||||
// 1. Keine Session vorhanden
|
||||
@@ -476,7 +674,7 @@ impl ChromeInstance {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::SessionRenewed {
|
||||
instance_id: self.instance_id,
|
||||
old_request_count: *request_count,
|
||||
reason: crate::monitoring::RenewalReason::RequestLimit,
|
||||
reason: reason,
|
||||
new_proxy: new_proxy_info,
|
||||
});
|
||||
}
|
||||
@@ -490,15 +688,21 @@ impl ChromeInstance {
|
||||
}
|
||||
|
||||
async fn create_fresh_session(&self) -> Result<Client> {
|
||||
// Hole aktuellen Proxy-URL ohne self zu mutieren
|
||||
let proxy_url = if let Some(ref pool) = self.proxy_pool {
|
||||
let mut proxy_idx = self.current_proxy_index.lock().await;
|
||||
*proxy_idx = (*proxy_idx + 1) % pool.num_proxies();
|
||||
let url = pool.get_proxy_url(*proxy_idx);
|
||||
let num_proxies = pool.num_proxies();
|
||||
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Using proxy {} for new session",
|
||||
*proxy_idx
|
||||
// Round-robin through all proxies
|
||||
let selected_proxy = *proxy_idx % num_proxies;
|
||||
*proxy_idx = (*proxy_idx + 1) % num_proxies;
|
||||
|
||||
let url = pool.get_proxy_url(selected_proxy);
|
||||
|
||||
logger::log_info(&format!(
|
||||
"Instance {} creating session with proxy {}/{} (rotation)",
|
||||
self.instance_id,
|
||||
selected_proxy,
|
||||
num_proxies
|
||||
)).await;
|
||||
|
||||
Some(url)
|
||||
@@ -516,38 +720,19 @@ impl ChromeInstance {
|
||||
.context("Failed to connect to ChromeDriver")
|
||||
}
|
||||
|
||||
fn chrome_args_with_ua(&self, user_agent: &str, proxy_url: &Option<String>) -> Map<String, Value> {
|
||||
let mut args = vec![
|
||||
"--headless=new".to_string(),
|
||||
"--disable-gpu".to_string(),
|
||||
"--no-sandbox".to_string(),
|
||||
"--disable-dev-shm-usage".to_string(),
|
||||
"--disable-infobars".to_string(),
|
||||
"--disable-extensions".to_string(),
|
||||
"--disable-popup-blocking".to_string(),
|
||||
"--disable-notifications".to_string(),
|
||||
"--disable-autofill".to_string(),
|
||||
"--disable-sync".to_string(),
|
||||
"--disable-default-apps".to_string(),
|
||||
"--disable-translate".to_string(),
|
||||
"--disable-blink-features=AutomationControlled".to_string(),
|
||||
format!("--user-agent={}", user_agent),
|
||||
];
|
||||
pub async fn invalidate_current_session(&self) {
|
||||
let mut session_opt = self.current_session.lock().await;
|
||||
|
||||
if let Some(proxy) = proxy_url {
|
||||
args.push(format!("--proxy-server={}", proxy));
|
||||
if let Some(old_session) = session_opt.take() {
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Invalidating broken session for instance {}",
|
||||
self.instance_id
|
||||
)).await;
|
||||
let _ = old_session.close().await;
|
||||
}
|
||||
|
||||
let caps = serde_json::json!({
|
||||
"goog:chromeOptions": {
|
||||
"args": args,
|
||||
"excludeSwitches": ["enable-logging", "enable-automation"],
|
||||
"prefs": {
|
||||
"profile.default_content_setting_values.notifications": 2
|
||||
}
|
||||
}
|
||||
});
|
||||
caps.as_object().cloned().unwrap()
|
||||
let mut request_count = self.session_request_count.lock().await;
|
||||
*request_count = 0;
|
||||
}
|
||||
|
||||
pub fn reset_task_count(&mut self) {
|
||||
@@ -578,6 +763,20 @@ impl ChromeInstance {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn is_available(&self) -> bool {
|
||||
if self.max_tasks_per_instance == 0 {
|
||||
return true; // No limit
|
||||
}
|
||||
self.task_count < self.max_tasks_per_instance
|
||||
}
|
||||
|
||||
pub fn tasks_remaining(&self) -> usize {
|
||||
if self.max_tasks_per_instance == 0 {
|
||||
return usize::MAX;
|
||||
}
|
||||
self.max_tasks_per_instance.saturating_sub(self.task_count)
|
||||
}
|
||||
|
||||
/// Spawns the actual `chromedriver` binary and waits for it to become ready.
|
||||
async fn spawn_chromedriver() -> Result<(String, Child, JoinHandle<()>)> {
|
||||
let mut process = Command::new("chromedriver-win64/chromedriver.exe")
|
||||
@@ -624,6 +823,40 @@ impl ChromeInstance {
|
||||
Err(anyhow!("ChromeDriver failed to start within 30s"))
|
||||
}
|
||||
|
||||
fn chrome_args_with_ua(&self, user_agent: &str, proxy_url: &Option<String>) -> Map<String, Value> {
|
||||
let mut args = vec![
|
||||
"--headless=new".to_string(),
|
||||
"--disable-gpu".to_string(),
|
||||
"--no-sandbox".to_string(),
|
||||
"--disable-dev-shm-usage".to_string(),
|
||||
"--disable-infobars".to_string(),
|
||||
"--disable-extensions".to_string(),
|
||||
"--disable-popup-blocking".to_string(),
|
||||
"--disable-notifications".to_string(),
|
||||
"--disable-autofill".to_string(),
|
||||
"--disable-sync".to_string(),
|
||||
"--disable-default-apps".to_string(),
|
||||
"--disable-translate".to_string(),
|
||||
"--disable-blink-features=AutomationControlled".to_string(),
|
||||
format!("--user-agent={}", user_agent),
|
||||
];
|
||||
|
||||
if let Some(proxy) = proxy_url {
|
||||
args.push(format!("--proxy-server={}", proxy));
|
||||
}
|
||||
|
||||
let caps = serde_json::json!({
|
||||
"goog:chromeOptions": {
|
||||
"args": args,
|
||||
"excludeSwitches": ["enable-logging", "enable-automation"],
|
||||
"prefs": {
|
||||
"profile.default_content_setting_values.notifications": 2
|
||||
}
|
||||
}
|
||||
});
|
||||
caps.as_object().cloned().unwrap()
|
||||
}
|
||||
|
||||
pub fn chrome_user_agent() -> &'static str {
|
||||
static UAS: &[&str] = &[
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.91 Safari/537.36",
|
||||
|
||||
Reference in New Issue
Block a user