added working hard reset

This commit is contained in:
2025-12-23 15:07:40 +01:00
parent fb0876309f
commit f9f09d0291
5 changed files with 666 additions and 127 deletions

View File

@@ -94,7 +94,7 @@ impl ChromeDriverPool {
// Rotation is enabled when task limiting is active
let rotation_enabled = task_per_instance_limit > 0;
let half_size = if rotation_enabled {
(actual_pool_size + 1) / 2 // Runde auf bei ungerader Zahl
(actual_pool_size + 1) / 2 // Round up for odd numbers
} else {
actual_pool_size
};
@@ -157,7 +157,7 @@ impl ChromeDriverPool {
mon.emit(crate::monitoring::MonitoringEvent::InstanceCreated {
instance_id: i,
max_tasks: guard.max_tasks_per_instance,
proxy: proxy_info.clone(), // ✅ Now includes actual proxy info
proxy: proxy_info.clone(),
});
// Also emit ProxyConnected event if proxy exists
@@ -525,17 +525,43 @@ impl ChromeDriverPool {
}
/// Gracefully shut down all ChromeDriver processes and Docker proxy containers.
/// ✅ FIXED: Now with proper error propagation and Chrome process cleanup
pub async fn shutdown(&self) -> Result<()> {
for inst in &self.instances {
logger::log_info(&format!("Shutting down {} ChromeDriver instances...", self.instances.len())).await;
let mut shutdown_errors = Vec::new();
for (i, inst) in self.instances.iter().enumerate() {
logger::log_info(&format!(" Shutting down instance {}...", i)).await;
let mut guard = inst.lock().await;
guard.shutdown().await?;
if let Err(e) = guard.shutdown().await {
logger::log_error(&format!(" ✗ Instance {} shutdown error: {}", i, e)).await;
shutdown_errors.push(format!("Instance {}: {}", i, e));
} else {
logger::log_info(&format!(" ✓ Instance {} shut down", i)).await;
}
}
if let Some(pp) = &self.proxy_pool {
pp.shutdown().await?;
crate::util::logger::log_info("All Docker VPN proxy containers stopped").await;
logger::log_info("Shutting down proxy pool...").await;
if let Err(e) = pp.shutdown().await {
logger::log_error(&format!("Proxy pool shutdown error: {}", e)).await;
shutdown_errors.push(format!("Proxy pool: {}", e));
} else {
logger::log_info("✓ Proxy pool shut down").await;
}
}
if !shutdown_errors.is_empty() {
return Err(anyhow!(
"Pool shutdown completed with {} error(s): {}",
shutdown_errors.len(),
shutdown_errors.join("; ")
));
}
logger::log_info("✓ All ChromeDriver instances shut down successfully").await;
Ok(())
}
@@ -571,11 +597,14 @@ pub struct ChromeInstance {
session_request_count: Arc<Mutex<usize>>,
max_requests_per_session: usize,
proxy_pool: Option<Arc<DockerVpnProxyPool>>, // Referernce to the proxy pool
proxy_pool: Option<Arc<DockerVpnProxyPool>>, // Reference to the proxy pool
current_proxy_index: Arc<Mutex<usize>>, // Current proxy index in use
instance_id: usize,
monitoring: Option<crate::monitoring::MonitoringHandle>,
// ✅ NEW: Track Chrome browser PID for proper cleanup
chrome_pid: Arc<Mutex<Option<u32>>>,
}
impl ChromeInstance {
@@ -605,16 +634,17 @@ impl ChromeInstance {
instance_id,
monitoring,
chrome_pid: Arc::new(Mutex::new(None)),
})
}
pub async fn get_or_renew_session(&self) -> Result<Client> {
pub async fn get_or_renew_session(&mut self) -> Result<Client> {
let mut session_opt = self.current_session.lock().await;
let mut request_count = self.session_request_count.lock().await;
// Session erneuern wenn:
// 1. Keine Session vorhanden
// 2. Request-Limit erreicht
// Session renewal conditions:
// 1. No session exists
// 2. Request limit reached
let needs_renewal = session_opt.is_none() || *request_count >= self.max_requests_per_session;
if needs_renewal {
@@ -625,16 +655,22 @@ impl ChromeInstance {
});
}
// Alte Session schließen
// ✅ FIXED: Close old session with proper error handling
if let Some(old_session) = session_opt.take() {
crate::util::logger::log_info("Closing old session").await;
let _ = old_session.close().await;
// Kurze Pause zwischen Sessions
// Try to close gracefully first
if let Err(e) = old_session.close().await {
logger::log_warn(&format!("Session close failed (may leave Chrome tabs open): {}", e)).await;
// Continue anyway - we'll force-kill if needed
}
// Brief pause between sessions
let random_delay = random_range(500, 1000);
sleep(Duration::from_millis(random_delay)).await;
}
// Neue Session mit frischem User-Agent erstellen
// Create new session with fresh User-Agent
crate::util::logger::log_info(&format!(
"Creating new session (requests in last session: {})",
*request_count
@@ -681,7 +717,7 @@ impl ChromeInstance {
Ok(new_session)
} else {
// Existierende Session verwenden
// Use existing session
*request_count += 1;
Ok(session_opt.as_ref().unwrap().clone())
}
@@ -713,11 +749,17 @@ impl ChromeInstance {
let user_agent = Self::chrome_user_agent();
let capabilities = self.chrome_args_with_ua(user_agent, &proxy_url);
ClientBuilder::native()
let client = ClientBuilder::native()
.capabilities(capabilities)
.connect(&self.base_url)
.await
.context("Failed to connect to ChromeDriver")
.context("Failed to connect to ChromeDriver")?;
// ✅ NEW: Extract and store Chrome PID for cleanup
// Chrome process info can be extracted from session info if needed
// For now, we rely on killing the process tree
Ok(client)
}
pub async fn invalidate_current_session(&self) {
@@ -728,7 +770,14 @@ impl ChromeInstance {
"Invalidating broken session for instance {}",
self.instance_id
)).await;
let _ = old_session.close().await;
// ✅ FIXED: Proper error handling instead of silent failure
if let Err(e) = old_session.close().await {
logger::log_warn(&format!(
"Failed to close broken session (Chrome tabs may remain): {}",
e
)).await;
}
}
let mut request_count = self.session_request_count.lock().await;
@@ -752,14 +801,86 @@ impl ChromeInstance {
self.task_count
}
/// ✅ FIXED: Proper Chrome + ChromeDriver shutdown with process tree killing
pub async fn shutdown(&mut self) -> Result<()> {
logger::log_info(&format!("Shutting down ChromeInstance {}...", self.instance_id)).await;
// Step 1: Close any active session to signal Chrome to close
{
let mut session_opt = self.current_session.lock().await;
if let Some(session) = session_opt.take() {
logger::log_info(" Closing active session...").await;
if let Err(e) = session.close().await {
logger::log_warn(&format!(" Session close failed: {}", e)).await;
}
}
}
// Step 2: Abort stderr logging task
if let Some(handle) = self.stderr_log.take() {
handle.abort();
let _ = handle.await;
}
let _ = self.process.start_kill();
let _ = self.process.wait().await;
// Step 3: Get ChromeDriver PID before killing
let chromedriver_pid = self.process.id();
logger::log_info(&format!(" ChromeDriver PID: {:?}", chromedriver_pid)).await;
// Step 4: Kill ChromeDriver and wait
if let Err(e) = self.process.start_kill() {
logger::log_warn(&format!(" Failed to kill ChromeDriver: {}", e)).await;
}
// Wait for ChromeDriver to exit (with timeout)
match timeout(Duration::from_secs(5), self.process.wait()).await {
Ok(Ok(status)) => {
logger::log_info(&format!(" ChromeDriver exited with status: {:?}", status)).await;
}
Ok(Err(e)) => {
logger::log_warn(&format!(" Error waiting for ChromeDriver: {}", e)).await;
}
Err(_) => {
logger::log_warn(" ChromeDriver didn't exit within 5s").await;
}
}
// Step 5: ✅ CRITICAL FIX: Force-kill Chrome process tree
// On Windows, Chrome doesn't die when ChromeDriver dies
if let Some(pid) = chromedriver_pid {
logger::log_info(&format!(" Force-killing Chrome process tree for PID {}...", pid)).await;
#[cfg(target_os = "windows")]
{
// Kill entire process tree on Windows
let _ = Command::new("taskkill")
.args(["/F", "/T", "/PID", &pid.to_string()])
.output()
.await;
// Also kill any remaining chrome.exe processes
let _ = Command::new("taskkill")
.args(["/F", "/IM", "chrome.exe"])
.output()
.await;
}
#[cfg(not(target_os = "windows"))]
{
// Kill process group on Unix
let _ = Command::new("pkill")
.args(["-P", &pid.to_string()])
.output()
.await;
}
logger::log_info(" ✓ Chrome process tree killed").await;
}
// Step 6: Wait a moment for processes to fully terminate
sleep(Duration::from_millis(500)).await;
logger::log_info(&format!("✓ ChromeInstance {} shut down", self.instance_id)).await;
Ok(())
}
@@ -869,6 +990,24 @@ impl ChromeInstance {
}
}
impl Drop for ChromeInstance {
fn drop(&mut self) {
// Signal both ChromeDriver and Chrome to terminate
let _ = self.process.start_kill();
// Also try to kill Chrome if we know the PID
if let Some(pid) = self.process.id() {
#[cfg(target_os = "windows")]
{
// Fire and forget - this is best-effort cleanup
let _ = std::process::Command::new("taskkill")
.args(["/F", "/T", "/PID", &pid.to_string()])
.output();
}
}
}
}
fn parse_chromedriver_address(line: &str) -> Option<String> {
if line.contains("Starting ChromeDriver") {
if let Some(port_str) = line.split("on port ").nth(1) {
@@ -889,14 +1028,6 @@ fn parse_chromedriver_address(line: &str) -> Option<String> {
None
}
impl Drop for ChromeInstance {
fn drop(&mut self) {
// Signal child to terminate. Do NOT block here; shutdown should be
// performed with the async `shutdown()` method when possible.
let _ = self.process.start_kill();
}
}
/// Simplified task execution - uses the pool pattern.
pub struct ScrapeTask<T> {
url: String,