added hard reset for navigation timeout after 3 hours
This commit is contained in:
239
src/scraper/hard_reset.rs
Normal file
239
src/scraper/hard_reset.rs
Normal file
@@ -0,0 +1,239 @@
|
||||
// src/scraper/hard_reset.rs - PROPERLY FIXED: Matches main.rs initialization pattern
|
||||
use std::sync::{Arc, atomic::{AtomicBool, AtomicUsize, Ordering}};
|
||||
|
||||
use crate::{ChromeDriverPool, Config, logger, scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers}, util::directories::DataPaths};
|
||||
|
||||
/// Simple error counter for triggering hard resets
|
||||
pub struct HardResetController {
|
||||
consecutive_errors: AtomicUsize,
|
||||
}
|
||||
|
||||
impl HardResetController {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
consecutive_errors: AtomicUsize::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record success - resets counter
|
||||
pub fn record_success(&self) {
|
||||
self.consecutive_errors.store(0, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Record error - returns new count
|
||||
pub fn record_error(&self) -> usize {
|
||||
self.consecutive_errors.fetch_add(1, Ordering::SeqCst) + 1
|
||||
}
|
||||
|
||||
/// Reset counter
|
||||
pub fn reset(&self) {
|
||||
self.consecutive_errors.store(0, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
/// Get current count
|
||||
pub fn get_count(&self) -> usize {
|
||||
self.consecutive_errors.load(Ordering::SeqCst)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform hard reset: shutdown everything and recreate
|
||||
pub async fn perform_hard_reset(
|
||||
pool_mutex: &Arc<tokio::sync::Mutex<Arc<ChromeDriverPool>>>,
|
||||
config: &Config,
|
||||
paths: &DataPaths,
|
||||
monitoring: &Option<crate::monitoring::MonitoringHandle>,
|
||||
shutdown_flag: &Arc<AtomicBool>,
|
||||
) -> anyhow::Result<()> {
|
||||
let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
|
||||
logger::log_error("🔴 STARTING HARD RESET SEQUENCE").await;
|
||||
|
||||
// Check if shutdown was requested
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_warn("Shutdown requested during hard reset, aborting").await;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Step 1: Acquire pool lock (prevents new tasks from using it)
|
||||
logger::log_info(" [1/10] Acquiring pool lock...").await;
|
||||
let mut pool_guard = pool_mutex.lock().await;
|
||||
let old_pool = Arc::clone(&*pool_guard);
|
||||
|
||||
// Step 2: Wait a moment for active tasks to complete
|
||||
logger::log_info(" [2/10] Waiting 10 seconds for active tasks...").await;
|
||||
drop(pool_guard); // Release lock so tasks can finish
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(10)).await;
|
||||
|
||||
// Re-acquire lock
|
||||
let mut pool_guard = pool_mutex.lock().await;
|
||||
|
||||
// Step 3: Shutdown ChromeDriver pool
|
||||
logger::log_info(" [3/10] Shutting down ChromeDriver pool...").await;
|
||||
if let Err(e) = old_pool.shutdown().await {
|
||||
logger::log_warn(&format!(" Warning: Pool shutdown error: {}", e)).await;
|
||||
}
|
||||
|
||||
// Step 4: Shutdown proxies
|
||||
logger::log_info(" [4/10] Shutting down proxy containers...").await;
|
||||
cleanup_all_proxy_containers().await.ok();
|
||||
|
||||
// Step 5: Wait for cleanup
|
||||
logger::log_info(" [5/10] Waiting 30 seconds for cleanup...").await;
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(30)).await;
|
||||
|
||||
// Check shutdown again
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
logger::log_warn("Shutdown requested during cleanup, aborting reset").await;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Step 6: Recreate proxy pool (if VPN rotation is enabled)
|
||||
logger::log_info(" [6/10] Recreating proxy pool...").await;
|
||||
let new_proxy_pool = if config.enable_vpn_rotation {
|
||||
match recreate_proxy_pool_with_fresh_credentials(config, paths, monitoring, shutdown_flag).await {
|
||||
Ok(pool) => {
|
||||
logger::log_info(&format!(
|
||||
" ✓ Proxy pool created with {} proxies",
|
||||
pool.num_proxies()
|
||||
)).await;
|
||||
Some(pool)
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_warn(&format!(
|
||||
" ⚠️ Proxy creation failed: {}. Continuing without proxies.",
|
||||
e
|
||||
)).await;
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger::log_info(" ⊘ VPN rotation disabled, skipping proxy pool").await;
|
||||
None
|
||||
};
|
||||
|
||||
// Step 7: Recreate ChromeDriver pool
|
||||
logger::log_info(" [7/10] Recreating ChromeDriver pool...").await;
|
||||
let new_pool = Arc::new(
|
||||
ChromeDriverPool::new_with_proxy_and_task_limit(
|
||||
new_proxy_pool,
|
||||
config,
|
||||
monitoring.clone(),
|
||||
).await?
|
||||
);
|
||||
|
||||
logger::log_info(" ✓ ChromeDriver pool created").await;
|
||||
|
||||
// Step 8: Reset the error counter on the NEW pool
|
||||
logger::log_info(" [8/10] Resetting error counter...").await;
|
||||
new_pool.get_reset_controller().reset();
|
||||
logger::log_info(" ✓ Error counter cleared").await;
|
||||
|
||||
// Step 9: Replace pool atomically
|
||||
logger::log_info(" [9/10] Activating new pool...").await;
|
||||
*pool_guard = new_pool;
|
||||
drop(pool_guard);
|
||||
|
||||
// Step 10: Emit monitoring event
|
||||
logger::log_info(" [10/10] Updating monitoring...").await;
|
||||
if let Some(mon) = monitoring {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::PoolInitialized {
|
||||
pool_size: config.max_parallel_instances,
|
||||
with_proxy: config.enable_vpn_rotation,
|
||||
with_rotation: config.max_tasks_per_instance > 0,
|
||||
});
|
||||
}
|
||||
|
||||
logger::log_info("✅ HARD RESET COMPLETE").await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Recreate proxy pool with fresh VPNBook credentials (matches main.rs pattern)
|
||||
async fn recreate_proxy_pool_with_fresh_credentials(
|
||||
config: &Config,
|
||||
paths: &DataPaths,
|
||||
monitoring: &Option<crate::monitoring::MonitoringHandle>,
|
||||
shutdown_flag: &Arc<AtomicBool>,
|
||||
) -> anyhow::Result<Arc<DockerVpnProxyPool>> {
|
||||
|
||||
let number_proxy_instances = config.proxy_instances_per_certificate.unwrap_or(1);
|
||||
|
||||
// Check shutdown
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
|
||||
}
|
||||
|
||||
logger::log_info(" [6.1] Creating temporary ChromeDriver pool for credential fetch...").await;
|
||||
|
||||
// Create temporary pool WITHOUT proxy (just like main.rs does)
|
||||
let temp_pool = Arc::new(
|
||||
ChromeDriverPool::new_with_proxy_and_task_limit(
|
||||
None, // No proxy for temp pool
|
||||
config,
|
||||
monitoring.clone(),
|
||||
).await?
|
||||
);
|
||||
|
||||
logger::log_info(" [6.2] Fetching fresh VPNBook credentials...").await;
|
||||
|
||||
// Fetch fresh VPNBook credentials (just like main.rs does)
|
||||
let (username, password, _files) = crate::util::opnv::fetch_vpnbook_configs(
|
||||
&temp_pool,
|
||||
paths.cache_dir()
|
||||
).await?;
|
||||
|
||||
logger::log_info(&format!(" [6.3] Got credentials → User: {}", username)).await;
|
||||
|
||||
// Shutdown temp pool
|
||||
logger::log_info(" [6.4] Shutting down temporary pool...").await;
|
||||
temp_pool.shutdown().await.ok();
|
||||
|
||||
// Check shutdown again
|
||||
if shutdown_flag.load(Ordering::SeqCst) {
|
||||
return Err(anyhow::anyhow!("Shutdown requested during proxy recreation"));
|
||||
}
|
||||
|
||||
// Check if we have VPN server configs
|
||||
let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
|
||||
.filter(|e| e.as_ref().unwrap().path().is_dir())
|
||||
.count();
|
||||
|
||||
if server_count == 0 {
|
||||
return Err(anyhow::anyhow!("No VPN servers found after credential fetch"));
|
||||
}
|
||||
|
||||
logger::log_info(&format!(
|
||||
" [6.5] Found {} VPN servers → Creating proxy pool with {} instances per server...",
|
||||
server_count,
|
||||
number_proxy_instances
|
||||
)).await;
|
||||
|
||||
// Create new proxy pool (just like main.rs does)
|
||||
let proxy_pool = Arc::new(
|
||||
DockerVpnProxyPool::new(
|
||||
paths.cache_openvpn_dir(),
|
||||
username,
|
||||
password,
|
||||
number_proxy_instances,
|
||||
).await?
|
||||
);
|
||||
|
||||
logger::log_info(&format!(
|
||||
" [6.6] ✓ Proxy pool ready with {} total proxies",
|
||||
proxy_pool.num_proxies()
|
||||
)).await;
|
||||
|
||||
// Emit proxy connected events for monitoring
|
||||
if let Some(mon) = monitoring {
|
||||
for i in 0..proxy_pool.num_proxies() {
|
||||
if let Some(proxy_info) = proxy_pool.get_proxy_info(i) {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::ProxyConnected {
|
||||
container_name: proxy_info.container_name.clone(),
|
||||
ip_address: proxy_info.ip_address.clone(),
|
||||
port: proxy_info.port,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(proxy_pool)
|
||||
}
|
||||
Reference in New Issue
Block a user