// src/scraper/hard_reset.rs - FIXED: Proper cleanup without Arc leaks use std::sync::{Arc, atomic::{AtomicBool, AtomicUsize, Ordering}}; use crate::{ChromeDriverPool, Config, logger, scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers}, util::directories::DataPaths}; /// Simple error counter for triggering hard resets pub struct HardResetController { consecutive_errors: AtomicUsize, } impl HardResetController { pub fn new() -> Self { Self { consecutive_errors: AtomicUsize::new(0), } } /// Record success - resets counter pub fn record_success(&self) { self.consecutive_errors.store(0, Ordering::SeqCst); } /// Record error - returns new count pub fn record_error(&self) -> usize { self.consecutive_errors.fetch_add(1, Ordering::SeqCst) + 1 } /// Reset counter pub fn reset(&self) { self.consecutive_errors.store(0, Ordering::SeqCst); } /// Get current count pub fn get_count(&self) -> usize { self.consecutive_errors.load(Ordering::SeqCst) } } /// ✅ FIXED: Perform hard reset without Arc reference leaks /// /// Key improvements: /// 1. Don't clone old_pool - just shutdown through mutex guard /// 2. Verify all processes killed before creating new pool /// 3. Explicitly shutdown temp pools with error handling /// 4. Add process counting/verification pub async fn perform_hard_reset( pool_mutex: &Arc>>, config: &Config, paths: &DataPaths, monitoring: &Option, shutdown_flag: &Arc, ) -> anyhow::Result<()> { let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1); logger::log_error("🔴 STARTING HARD RESET SEQUENCE").await; // Check if shutdown was requested if shutdown_flag.load(Ordering::SeqCst) { logger::log_warn("Shutdown requested during hard reset, aborting").await; return Ok(()); } // ===== STEP 1: ACQUIRE POOL LOCK (NO CLONING!) ===== logger::log_info(" [1/12] Acquiring pool lock...").await; let mut pool_guard = pool_mutex.lock().await; // Get instance count before shutdown for verification let old_instance_count = pool_guard.get_number_of_instances(); logger::log_info(&format!(" [1/12] Pool has {} instances", old_instance_count)).await; // ===== STEP 2: SHUTDOWN OLD POOL (NO ARC CLONE!) ===== logger::log_info(" [2/12] Shutting down old pool (NO Arc clone)...").await; // Shutdown through the Arc without cloning it // This is safe because we hold the mutex lock match pool_guard.shutdown().await { Ok(()) => { logger::log_info(" [2/12] ✓ Pool shutdown complete").await; } Err(e) => { logger::log_error(&format!(" [2/12] ✗ Pool shutdown error: {}", e)).await; // Continue anyway - we'll force-kill processes } } // ===== STEP 3: FORCE-KILL ANY REMAINING CHROME PROCESSES ===== logger::log_info(" [3/12] Force-killing any remaining Chrome/ChromeDriver processes...").await; #[cfg(target_os = "windows")] { // Kill all chrome.exe processes let chrome_result = tokio::process::Command::new("taskkill") .args(["/F", "/IM", "chrome.exe"]) .output() .await; match chrome_result { Ok(output) if output.status.success() => { logger::log_info(" [3/12] ✓ Chrome processes killed").await; } _ => { logger::log_info(" [3/12] ⊘ No Chrome processes found").await; } } // Kill all chromedriver.exe processes let chromedriver_result = tokio::process::Command::new("taskkill") .args(["/F", "/IM", "chromedriver.exe"]) .output() .await; match chromedriver_result { Ok(output) if output.status.success() => { logger::log_info(" [3/12] ✓ ChromeDriver processes killed").await; } _ => { logger::log_info(" [3/12] ⊘ No ChromeDriver processes found").await; } } } #[cfg(not(target_os = "windows"))] { // Kill all chrome processes let _ = tokio::process::Command::new("pkill") .arg("chrome") .output() .await; let _ = tokio::process::Command::new("pkill") .arg("chromedriver") .output() .await; logger::log_info(" [3/12] ✓ Force-killed Chrome/ChromeDriver").await; } // ===== STEP 4: SHUTDOWN PROXIES ===== logger::log_info(" [4/12] Shutting down proxy containers...").await; cleanup_all_proxy_containers().await.ok(); // ===== STEP 5: WAIT FOR CLEANUP ===== logger::log_info(" [5/12] Waiting 30 seconds for cleanup...").await; tokio::time::sleep(tokio::time::Duration::from_secs(30)).await; // ===== STEP 6: VERIFY CLEANUP ===== logger::log_info(" [6/12] Verifying process cleanup...").await; #[cfg(target_os = "windows")] { let check_chrome = tokio::process::Command::new("tasklist") .args(["/FI", "IMAGENAME eq chrome.exe"]) .output() .await; if let Ok(output) = check_chrome { let stdout = String::from_utf8_lossy(&output.stdout); let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count(); if chrome_count > 0 { logger::log_warn(&format!(" [6/12] ⚠️ {} Chrome processes still running!", chrome_count)).await; } else { logger::log_info(" [6/12] ✓ No Chrome processes running").await; } } } // Check shutdown again if shutdown_flag.load(Ordering::SeqCst) { logger::log_warn("Shutdown requested during cleanup, aborting reset").await; return Ok(()); } // ===== STEP 7: RECREATE PROXY POOL ===== logger::log_info(" [7/12] Recreating proxy pool...").await; let new_proxy_pool = if config.enable_vpn_rotation { match recreate_proxy_pool_with_fresh_credentials(config, paths, monitoring, shutdown_flag).await { Ok(pool) => { logger::log_info(&format!( " [7/12] ✓ Proxy pool created with {} proxies", pool.num_proxies() )).await; Some(pool) } Err(e) => { logger::log_warn(&format!( " [7/12] ⚠️ Proxy creation failed: {}. Continuing without proxies.", e )).await; None } } } else { logger::log_info(" [7/12] ⊘ VPN rotation disabled, skipping proxy pool").await; None }; // ===== STEP 8: RECREATE CHROMEDRIVER POOL ===== logger::log_info(" [8/12] Recreating ChromeDriver pool...").await; let new_pool = Arc::new( ChromeDriverPool::new_with_proxy_and_task_limit( new_proxy_pool, config, monitoring.clone(), ).await? ); logger::log_info(&format!( " [8/12] ✓ ChromeDriver pool created with {} instances", new_pool.get_number_of_instances() )).await; // ===== STEP 9: RESET ERROR COUNTER ===== logger::log_info(" [9/12] Resetting error counter...").await; new_pool.get_reset_controller().reset(); logger::log_info(" [9/12] ✓ Error counter cleared").await; // ===== STEP 10: REPLACE POOL ATOMICALLY ===== logger::log_info(" [10/12] Activating new pool...").await; *pool_guard = new_pool; drop(pool_guard); logger::log_info(" [10/12] ✓ New pool activated").await; // ===== STEP 11: EMIT MONITORING EVENT ===== logger::log_info(" [11/12] Updating monitoring...").await; if let Some(mon) = monitoring { mon.emit(crate::monitoring::MonitoringEvent::PoolInitialized { pool_size: config.max_parallel_instances, with_proxy: config.enable_vpn_rotation, with_rotation: config.max_tasks_per_instance > 0, }); } // ===== STEP 12: FINAL VERIFICATION ===== logger::log_info(" [12/12] Final verification...").await; #[cfg(target_os = "windows")] { let check_chrome = tokio::process::Command::new("tasklist") .args(["/FI", "IMAGENAME eq chrome.exe"]) .output() .await; if let Ok(output) = check_chrome { let stdout = String::from_utf8_lossy(&output.stdout); let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count(); logger::log_info(&format!(" [12/12] Chrome processes: {}", chrome_count)).await; } let check_chromedriver = tokio::process::Command::new("tasklist") .args(["/FI", "IMAGENAME eq chromedriver.exe"]) .output() .await; if let Ok(output) = check_chromedriver { let stdout = String::from_utf8_lossy(&output.stdout); let chromedriver_count = stdout.lines().filter(|line| line.contains("chromedriver.exe")).count(); logger::log_info(&format!(" [12/12] ChromeDriver processes: {}", chromedriver_count)).await; } } logger::log_info("✅ HARD RESET COMPLETE").await; Ok(()) } /// ✅ FIXED: Recreate proxy pool with temp pool that's properly shut down async fn recreate_proxy_pool_with_fresh_credentials( config: &Config, paths: &DataPaths, monitoring: &Option, shutdown_flag: &Arc, ) -> anyhow::Result> { let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1); // Check shutdown if shutdown_flag.load(Ordering::SeqCst) { return Err(anyhow::anyhow!("Shutdown requested during proxy recreation")); } logger::log_info(" [7.1] Creating temporary ChromeDriver pool for credential fetch...").await; // Create temporary pool WITHOUT proxy let temp_pool = Arc::new( ChromeDriverPool::new_with_proxy_and_task_limit( None, // No proxy for temp pool config, monitoring.clone(), ).await? ); logger::log_info(" [7.2] Fetching fresh VPNBook credentials...").await; // Fetch fresh VPNBook credentials let (username, password, _files) = crate::util::opnv::fetch_vpnbook_configs( &temp_pool, paths.cache_dir() ).await?; logger::log_info(&format!(" [7.3] Got credentials → User: {}", username)).await; // ✅ FIXED: Properly shutdown temp pool with error handling logger::log_info(" [7.4] Shutting down temporary pool...").await; match temp_pool.shutdown().await { Ok(()) => { logger::log_info(" [7.4] ✓ Temp pool shut down successfully").await; } Err(e) => { logger::log_error(&format!(" [7.4] ✗ Temp pool shutdown error: {}", e)).await; // Force-kill processes as backup #[cfg(target_os = "windows")] { let _ = tokio::process::Command::new("taskkill") .args(["/F", "/IM", "chrome.exe"]) .output() .await; let _ = tokio::process::Command::new("taskkill") .args(["/F", "/IM", "chromedriver.exe"]) .output() .await; } } } // Wait a moment for temp pool cleanup tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; // Check shutdown again if shutdown_flag.load(Ordering::SeqCst) { return Err(anyhow::anyhow!("Shutdown requested during proxy recreation")); } // Check if we have VPN server configs let server_count = std::fs::read_dir(paths.cache_openvpn_dir())? .filter(|e| e.as_ref().unwrap().path().is_dir()) .count(); if server_count == 0 { return Err(anyhow::anyhow!("No VPN servers found after credential fetch")); } logger::log_info(&format!( " [7.5] Found {} VPN servers → Creating proxy pool with {} instances per server...", server_count, number_proxy_instances )).await; // Create new proxy pool let proxy_pool = Arc::new( DockerVpnProxyPool::new( paths.cache_openvpn_dir(), username, password, number_proxy_instances, ).await? ); logger::log_info(&format!( " [7.6] ✓ Proxy pool ready with {} total proxies", proxy_pool.num_proxies() )).await; // Emit proxy connected events for monitoring if let Some(mon) = monitoring { for i in 0..proxy_pool.num_proxies() { if let Some(proxy_info) = proxy_pool.get_proxy_info(i) { mon.emit(crate::monitoring::MonitoringEvent::ProxyConnected { container_name: proxy_info.container_name.clone(), ip_address: proxy_info.ip_address.clone(), port: proxy_info.port, }); } } } Ok(proxy_pool) }