377 lines
13 KiB
Rust
377 lines
13 KiB
Rust
// src/scraper/hard_reset.rs - FIXED: Proper cleanup without Arc leaks
|
|
use std::sync::{Arc, atomic::{AtomicBool, AtomicUsize, Ordering}};
|
|
|
|
use crate::{ChromeDriverPool, Config, logger, scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers}, util::directories::DataPaths};
|
|
|
|
/// Simple error counter for triggering hard resets
|
|
pub struct HardResetController {
|
|
consecutive_errors: AtomicUsize,
|
|
}
|
|
|
|
impl HardResetController {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
consecutive_errors: AtomicUsize::new(0),
|
|
}
|
|
}
|
|
|
|
/// Record success - resets counter
|
|
pub fn record_success(&self) {
|
|
self.consecutive_errors.store(0, Ordering::SeqCst);
|
|
}
|
|
|
|
/// Record error - returns new count
|
|
pub fn record_error(&self) -> usize {
|
|
self.consecutive_errors.fetch_add(1, Ordering::SeqCst) + 1
|
|
}
|
|
|
|
/// Reset counter
|
|
pub fn reset(&self) {
|
|
self.consecutive_errors.store(0, Ordering::SeqCst);
|
|
}
|
|
|
|
/// Get current count
|
|
pub fn get_count(&self) -> usize {
|
|
self.consecutive_errors.load(Ordering::SeqCst)
|
|
}
|
|
}
|
|
|
|
/// ✅ FIXED: Perform hard reset without Arc reference leaks
|
|
///
|
|
/// Key improvements:
|
|
/// 1. Don't clone old_pool - just shutdown through mutex guard
|
|
/// 2. Verify all processes killed before creating new pool
|
|
/// 3. Explicitly shutdown temp pools with error handling
|
|
/// 4. Add process counting/verification
|
|
pub async fn perform_hard_reset(
|
|
pool_mutex: &Arc<tokio::sync::Mutex<Arc<ChromeDriverPool>>>,
|
|
config: &Config,
|
|
paths: &DataPaths,
|
|
monitoring: &Option<crate::monitoring::MonitoringHandle>,
|
|
shutdown_flag: &Arc<AtomicBool>,
|
|
) -> anyhow::Result<()> {
|
|
let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
|
|
logger::log_error("🔴 STARTING HARD RESET SEQUENCE").await;
|
|
|
|
// Check if shutdown was requested
|
|
if shutdown_flag.load(Ordering::SeqCst) {
|
|
logger::log_warn("Shutdown requested during hard reset, aborting").await;
|
|
return Ok(());
|
|
}
|
|
|
|
// ===== STEP 1: ACQUIRE POOL LOCK (NO CLONING!) =====
|
|
logger::log_info(" [1/12] Acquiring pool lock...").await;
|
|
let mut pool_guard = pool_mutex.lock().await;
|
|
|
|
// Get instance count before shutdown for verification
|
|
let old_instance_count = pool_guard.get_number_of_instances();
|
|
logger::log_info(&format!(" [1/12] Pool has {} instances", old_instance_count)).await;
|
|
|
|
// ===== STEP 2: SHUTDOWN OLD POOL (NO ARC CLONE!) =====
|
|
logger::log_info(" [2/12] Shutting down old pool (NO Arc clone)...").await;
|
|
|
|
// Shutdown through the Arc without cloning it
|
|
// This is safe because we hold the mutex lock
|
|
match pool_guard.shutdown().await {
|
|
Ok(()) => {
|
|
logger::log_info(" [2/12] ✓ Pool shutdown complete").await;
|
|
}
|
|
Err(e) => {
|
|
logger::log_error(&format!(" [2/12] ✗ Pool shutdown error: {}", e)).await;
|
|
// Continue anyway - we'll force-kill processes
|
|
}
|
|
}
|
|
|
|
// ===== STEP 3: FORCE-KILL ANY REMAINING CHROME PROCESSES =====
|
|
logger::log_info(" [3/12] Force-killing any remaining Chrome/ChromeDriver processes...").await;
|
|
|
|
#[cfg(target_os = "windows")]
|
|
{
|
|
// Kill all chrome.exe processes
|
|
let chrome_result = tokio::process::Command::new("taskkill")
|
|
.args(["/F", "/IM", "chrome.exe"])
|
|
.output()
|
|
.await;
|
|
|
|
match chrome_result {
|
|
Ok(output) if output.status.success() => {
|
|
logger::log_info(" [3/12] ✓ Chrome processes killed").await;
|
|
}
|
|
_ => {
|
|
logger::log_info(" [3/12] ⊘ No Chrome processes found").await;
|
|
}
|
|
}
|
|
|
|
// Kill all chromedriver.exe processes
|
|
let chromedriver_result = tokio::process::Command::new("taskkill")
|
|
.args(["/F", "/IM", "chromedriver.exe"])
|
|
.output()
|
|
.await;
|
|
|
|
match chromedriver_result {
|
|
Ok(output) if output.status.success() => {
|
|
logger::log_info(" [3/12] ✓ ChromeDriver processes killed").await;
|
|
}
|
|
_ => {
|
|
logger::log_info(" [3/12] ⊘ No ChromeDriver processes found").await;
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(not(target_os = "windows"))]
|
|
{
|
|
// Kill all chrome processes
|
|
let _ = tokio::process::Command::new("pkill")
|
|
.arg("chrome")
|
|
.output()
|
|
.await;
|
|
|
|
let _ = tokio::process::Command::new("pkill")
|
|
.arg("chromedriver")
|
|
.output()
|
|
.await;
|
|
|
|
logger::log_info(" [3/12] ✓ Force-killed Chrome/ChromeDriver").await;
|
|
}
|
|
|
|
// ===== STEP 4: SHUTDOWN PROXIES =====
|
|
logger::log_info(" [4/12] Shutting down proxy containers...").await;
|
|
cleanup_all_proxy_containers().await.ok();
|
|
|
|
// ===== STEP 5: WAIT FOR CLEANUP =====
|
|
logger::log_info(" [5/12] Waiting 30 seconds for cleanup...").await;
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
|
|
|
|
// ===== STEP 6: VERIFY CLEANUP =====
|
|
logger::log_info(" [6/12] Verifying process cleanup...").await;
|
|
|
|
#[cfg(target_os = "windows")]
|
|
{
|
|
let check_chrome = tokio::process::Command::new("tasklist")
|
|
.args(["/FI", "IMAGENAME eq chrome.exe"])
|
|
.output()
|
|
.await;
|
|
|
|
if let Ok(output) = check_chrome {
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
|
|
|
|
if chrome_count > 0 {
|
|
logger::log_warn(&format!(" [6/12] ⚠️ {} Chrome processes still running!", chrome_count)).await;
|
|
} else {
|
|
logger::log_info(" [6/12] ✓ No Chrome processes running").await;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check shutdown again
|
|
if shutdown_flag.load(Ordering::SeqCst) {
|
|
logger::log_warn("Shutdown requested during cleanup, aborting reset").await;
|
|
return Ok(());
|
|
}
|
|
|
|
// ===== STEP 7: RECREATE PROXY POOL =====
|
|
logger::log_info(" [7/12] Recreating proxy pool...").await;
|
|
let new_proxy_pool = if config.enable_vpn_rotation {
|
|
match recreate_proxy_pool_with_fresh_credentials(config, paths, monitoring, shutdown_flag).await {
|
|
Ok(pool) => {
|
|
logger::log_info(&format!(
|
|
" [7/12] ✓ Proxy pool created with {} proxies",
|
|
pool.num_proxies()
|
|
)).await;
|
|
Some(pool)
|
|
}
|
|
Err(e) => {
|
|
logger::log_warn(&format!(
|
|
" [7/12] ⚠️ Proxy creation failed: {}. Continuing without proxies.",
|
|
e
|
|
)).await;
|
|
None
|
|
}
|
|
}
|
|
} else {
|
|
logger::log_info(" [7/12] ⊘ VPN rotation disabled, skipping proxy pool").await;
|
|
None
|
|
};
|
|
|
|
// ===== STEP 8: RECREATE CHROMEDRIVER POOL =====
|
|
logger::log_info(" [8/12] Recreating ChromeDriver pool...").await;
|
|
let new_pool = Arc::new(
|
|
ChromeDriverPool::new_with_proxy_and_task_limit(
|
|
new_proxy_pool,
|
|
config,
|
|
monitoring.clone(),
|
|
).await?
|
|
);
|
|
|
|
logger::log_info(&format!(
|
|
" [8/12] ✓ ChromeDriver pool created with {} instances",
|
|
new_pool.get_number_of_instances()
|
|
)).await;
|
|
|
|
// ===== STEP 9: RESET ERROR COUNTER =====
|
|
logger::log_info(" [9/12] Resetting error counter...").await;
|
|
new_pool.get_reset_controller().reset();
|
|
logger::log_info(" [9/12] ✓ Error counter cleared").await;
|
|
|
|
// ===== STEP 10: REPLACE POOL ATOMICALLY =====
|
|
logger::log_info(" [10/12] Activating new pool...").await;
|
|
*pool_guard = new_pool;
|
|
drop(pool_guard);
|
|
logger::log_info(" [10/12] ✓ New pool activated").await;
|
|
|
|
// ===== STEP 11: EMIT MONITORING EVENT =====
|
|
logger::log_info(" [11/12] Updating monitoring...").await;
|
|
if let Some(mon) = monitoring {
|
|
mon.emit(crate::monitoring::MonitoringEvent::PoolInitialized {
|
|
pool_size: config.max_parallel_instances,
|
|
with_proxy: config.enable_vpn_rotation,
|
|
with_rotation: config.max_tasks_per_instance > 0,
|
|
});
|
|
}
|
|
|
|
// ===== STEP 12: FINAL VERIFICATION =====
|
|
logger::log_info(" [12/12] Final verification...").await;
|
|
|
|
#[cfg(target_os = "windows")]
|
|
{
|
|
let check_chrome = tokio::process::Command::new("tasklist")
|
|
.args(["/FI", "IMAGENAME eq chrome.exe"])
|
|
.output()
|
|
.await;
|
|
|
|
if let Ok(output) = check_chrome {
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let chrome_count = stdout.lines().filter(|line| line.contains("chrome.exe")).count();
|
|
logger::log_info(&format!(" [12/12] Chrome processes: {}", chrome_count)).await;
|
|
}
|
|
|
|
let check_chromedriver = tokio::process::Command::new("tasklist")
|
|
.args(["/FI", "IMAGENAME eq chromedriver.exe"])
|
|
.output()
|
|
.await;
|
|
|
|
if let Ok(output) = check_chromedriver {
|
|
let stdout = String::from_utf8_lossy(&output.stdout);
|
|
let chromedriver_count = stdout.lines().filter(|line| line.contains("chromedriver.exe")).count();
|
|
logger::log_info(&format!(" [12/12] ChromeDriver processes: {}", chromedriver_count)).await;
|
|
}
|
|
}
|
|
|
|
logger::log_info("✅ HARD RESET COMPLETE").await;
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// ✅ FIXED: Recreate proxy pool with temp pool that's properly shut down
|
|
async fn recreate_proxy_pool_with_fresh_credentials(
|
|
config: &Config,
|
|
paths: &DataPaths,
|
|
monitoring: &Option<crate::monitoring::MonitoringHandle>,
|
|
shutdown_flag: &Arc<AtomicBool>,
|
|
) -> anyhow::Result<Arc<DockerVpnProxyPool>> {
|
|
|
|
let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
|
|
|
|
// Check shutdown
|
|
if shutdown_flag.load(Ordering::SeqCst) {
|
|
return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
|
|
}
|
|
|
|
logger::log_info(" [7.1] Creating temporary ChromeDriver pool for credential fetch...").await;
|
|
|
|
// Create temporary pool WITHOUT proxy
|
|
let temp_pool = Arc::new(
|
|
ChromeDriverPool::new_with_proxy_and_task_limit(
|
|
None, // No proxy for temp pool
|
|
config,
|
|
monitoring.clone(),
|
|
).await?
|
|
);
|
|
|
|
logger::log_info(" [7.2] Fetching fresh VPNBook credentials...").await;
|
|
|
|
// Fetch fresh VPNBook credentials
|
|
let (username, password, _files) = crate::util::opnv::fetch_vpnbook_configs(
|
|
&temp_pool,
|
|
paths.cache_dir()
|
|
).await?;
|
|
|
|
logger::log_info(&format!(" [7.3] Got credentials → User: {}", username)).await;
|
|
|
|
// ✅ FIXED: Properly shutdown temp pool with error handling
|
|
logger::log_info(" [7.4] Shutting down temporary pool...").await;
|
|
match temp_pool.shutdown().await {
|
|
Ok(()) => {
|
|
logger::log_info(" [7.4] ✓ Temp pool shut down successfully").await;
|
|
}
|
|
Err(e) => {
|
|
logger::log_error(&format!(" [7.4] ✗ Temp pool shutdown error: {}", e)).await;
|
|
// Force-kill processes as backup
|
|
#[cfg(target_os = "windows")]
|
|
{
|
|
let _ = tokio::process::Command::new("taskkill")
|
|
.args(["/F", "/IM", "chrome.exe"])
|
|
.output()
|
|
.await;
|
|
let _ = tokio::process::Command::new("taskkill")
|
|
.args(["/F", "/IM", "chromedriver.exe"])
|
|
.output()
|
|
.await;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Wait a moment for temp pool cleanup
|
|
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
|
|
|
|
// Check shutdown again
|
|
if shutdown_flag.load(Ordering::SeqCst) {
|
|
return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
|
|
}
|
|
|
|
// Check if we have VPN server configs
|
|
let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
|
|
.filter(|e| e.as_ref().unwrap().path().is_dir())
|
|
.count();
|
|
|
|
if server_count == 0 {
|
|
return Err(anyhow::anyhow!("No VPN servers found after credential fetch"));
|
|
}
|
|
|
|
logger::log_info(&format!(
|
|
" [7.5] Found {} VPN servers → Creating proxy pool with {} instances per server...",
|
|
server_count,
|
|
number_proxy_instances
|
|
)).await;
|
|
|
|
// Create new proxy pool
|
|
let proxy_pool = Arc::new(
|
|
DockerVpnProxyPool::new(
|
|
paths.cache_openvpn_dir(),
|
|
username,
|
|
password,
|
|
number_proxy_instances,
|
|
).await?
|
|
);
|
|
|
|
logger::log_info(&format!(
|
|
" [7.6] ✓ Proxy pool ready with {} total proxies",
|
|
proxy_pool.num_proxies()
|
|
)).await;
|
|
|
|
// Emit proxy connected events for monitoring
|
|
if let Some(mon) = monitoring {
|
|
for i in 0..proxy_pool.num_proxies() {
|
|
if let Some(proxy_info) = proxy_pool.get_proxy_info(i) {
|
|
mon.emit(crate::monitoring::MonitoringEvent::ProxyConnected {
|
|
container_name: proxy_info.container_name.clone(),
|
|
ip_address: proxy_info.ip_address.clone(),
|
|
port: proxy_info.port,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(proxy_pool)
|
|
} |