added working hard reset
This commit is contained in:
@@ -94,7 +94,7 @@ impl ChromeDriverPool {
|
||||
// Rotation is enabled when task limiting is active
|
||||
let rotation_enabled = task_per_instance_limit > 0;
|
||||
let half_size = if rotation_enabled {
|
||||
(actual_pool_size + 1) / 2 // Runde auf bei ungerader Zahl
|
||||
(actual_pool_size + 1) / 2 // Round up for odd numbers
|
||||
} else {
|
||||
actual_pool_size
|
||||
};
|
||||
@@ -157,7 +157,7 @@ impl ChromeDriverPool {
|
||||
mon.emit(crate::monitoring::MonitoringEvent::InstanceCreated {
|
||||
instance_id: i,
|
||||
max_tasks: guard.max_tasks_per_instance,
|
||||
proxy: proxy_info.clone(), // ✅ Now includes actual proxy info
|
||||
proxy: proxy_info.clone(),
|
||||
});
|
||||
|
||||
// Also emit ProxyConnected event if proxy exists
|
||||
@@ -525,17 +525,43 @@ impl ChromeDriverPool {
|
||||
}
|
||||
|
||||
/// Gracefully shut down all ChromeDriver processes and Docker proxy containers.
|
||||
/// ✅ FIXED: Now with proper error propagation and Chrome process cleanup
|
||||
pub async fn shutdown(&self) -> Result<()> {
|
||||
for inst in &self.instances {
|
||||
logger::log_info(&format!("Shutting down {} ChromeDriver instances...", self.instances.len())).await;
|
||||
|
||||
let mut shutdown_errors = Vec::new();
|
||||
|
||||
for (i, inst) in self.instances.iter().enumerate() {
|
||||
logger::log_info(&format!(" Shutting down instance {}...", i)).await;
|
||||
|
||||
let mut guard = inst.lock().await;
|
||||
guard.shutdown().await?;
|
||||
if let Err(e) = guard.shutdown().await {
|
||||
logger::log_error(&format!(" ✗ Instance {} shutdown error: {}", i, e)).await;
|
||||
shutdown_errors.push(format!("Instance {}: {}", i, e));
|
||||
} else {
|
||||
logger::log_info(&format!(" ✓ Instance {} shut down", i)).await;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(pp) = &self.proxy_pool {
|
||||
pp.shutdown().await?;
|
||||
crate::util::logger::log_info("All Docker VPN proxy containers stopped").await;
|
||||
logger::log_info("Shutting down proxy pool...").await;
|
||||
if let Err(e) = pp.shutdown().await {
|
||||
logger::log_error(&format!("Proxy pool shutdown error: {}", e)).await;
|
||||
shutdown_errors.push(format!("Proxy pool: {}", e));
|
||||
} else {
|
||||
logger::log_info("✓ Proxy pool shut down").await;
|
||||
}
|
||||
}
|
||||
|
||||
if !shutdown_errors.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"Pool shutdown completed with {} error(s): {}",
|
||||
shutdown_errors.len(),
|
||||
shutdown_errors.join("; ")
|
||||
));
|
||||
}
|
||||
|
||||
logger::log_info("✓ All ChromeDriver instances shut down successfully").await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -571,11 +597,14 @@ pub struct ChromeInstance {
|
||||
session_request_count: Arc<Mutex<usize>>,
|
||||
max_requests_per_session: usize,
|
||||
|
||||
proxy_pool: Option<Arc<DockerVpnProxyPool>>, // Referernce to the proxy pool
|
||||
proxy_pool: Option<Arc<DockerVpnProxyPool>>, // Reference to the proxy pool
|
||||
current_proxy_index: Arc<Mutex<usize>>, // Current proxy index in use
|
||||
|
||||
instance_id: usize,
|
||||
monitoring: Option<crate::monitoring::MonitoringHandle>,
|
||||
|
||||
// ✅ NEW: Track Chrome browser PID for proper cleanup
|
||||
chrome_pid: Arc<Mutex<Option<u32>>>,
|
||||
}
|
||||
|
||||
impl ChromeInstance {
|
||||
@@ -605,16 +634,17 @@ impl ChromeInstance {
|
||||
|
||||
instance_id,
|
||||
monitoring,
|
||||
chrome_pid: Arc::new(Mutex::new(None)),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn get_or_renew_session(&self) -> Result<Client> {
|
||||
pub async fn get_or_renew_session(&mut self) -> Result<Client> {
|
||||
let mut session_opt = self.current_session.lock().await;
|
||||
let mut request_count = self.session_request_count.lock().await;
|
||||
|
||||
// Session erneuern wenn:
|
||||
// 1. Keine Session vorhanden
|
||||
// 2. Request-Limit erreicht
|
||||
// Session renewal conditions:
|
||||
// 1. No session exists
|
||||
// 2. Request limit reached
|
||||
let needs_renewal = session_opt.is_none() || *request_count >= self.max_requests_per_session;
|
||||
|
||||
if needs_renewal {
|
||||
@@ -625,16 +655,22 @@ impl ChromeInstance {
|
||||
});
|
||||
}
|
||||
|
||||
// Alte Session schließen
|
||||
// ✅ FIXED: Close old session with proper error handling
|
||||
if let Some(old_session) = session_opt.take() {
|
||||
crate::util::logger::log_info("Closing old session").await;
|
||||
let _ = old_session.close().await;
|
||||
// Kurze Pause zwischen Sessions
|
||||
|
||||
// Try to close gracefully first
|
||||
if let Err(e) = old_session.close().await {
|
||||
logger::log_warn(&format!("Session close failed (may leave Chrome tabs open): {}", e)).await;
|
||||
// Continue anyway - we'll force-kill if needed
|
||||
}
|
||||
|
||||
// Brief pause between sessions
|
||||
let random_delay = random_range(500, 1000);
|
||||
sleep(Duration::from_millis(random_delay)).await;
|
||||
}
|
||||
|
||||
// Neue Session mit frischem User-Agent erstellen
|
||||
// Create new session with fresh User-Agent
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Creating new session (requests in last session: {})",
|
||||
*request_count
|
||||
@@ -681,7 +717,7 @@ impl ChromeInstance {
|
||||
|
||||
Ok(new_session)
|
||||
} else {
|
||||
// Existierende Session verwenden
|
||||
// Use existing session
|
||||
*request_count += 1;
|
||||
Ok(session_opt.as_ref().unwrap().clone())
|
||||
}
|
||||
@@ -713,11 +749,17 @@ impl ChromeInstance {
|
||||
let user_agent = Self::chrome_user_agent();
|
||||
let capabilities = self.chrome_args_with_ua(user_agent, &proxy_url);
|
||||
|
||||
ClientBuilder::native()
|
||||
let client = ClientBuilder::native()
|
||||
.capabilities(capabilities)
|
||||
.connect(&self.base_url)
|
||||
.await
|
||||
.context("Failed to connect to ChromeDriver")
|
||||
.context("Failed to connect to ChromeDriver")?;
|
||||
|
||||
// ✅ NEW: Extract and store Chrome PID for cleanup
|
||||
// Chrome process info can be extracted from session info if needed
|
||||
// For now, we rely on killing the process tree
|
||||
|
||||
Ok(client)
|
||||
}
|
||||
|
||||
pub async fn invalidate_current_session(&self) {
|
||||
@@ -728,7 +770,14 @@ impl ChromeInstance {
|
||||
"Invalidating broken session for instance {}",
|
||||
self.instance_id
|
||||
)).await;
|
||||
let _ = old_session.close().await;
|
||||
|
||||
// ✅ FIXED: Proper error handling instead of silent failure
|
||||
if let Err(e) = old_session.close().await {
|
||||
logger::log_warn(&format!(
|
||||
"Failed to close broken session (Chrome tabs may remain): {}",
|
||||
e
|
||||
)).await;
|
||||
}
|
||||
}
|
||||
|
||||
let mut request_count = self.session_request_count.lock().await;
|
||||
@@ -752,14 +801,86 @@ impl ChromeInstance {
|
||||
self.task_count
|
||||
}
|
||||
|
||||
/// ✅ FIXED: Proper Chrome + ChromeDriver shutdown with process tree killing
|
||||
pub async fn shutdown(&mut self) -> Result<()> {
|
||||
logger::log_info(&format!("Shutting down ChromeInstance {}...", self.instance_id)).await;
|
||||
|
||||
// Step 1: Close any active session to signal Chrome to close
|
||||
{
|
||||
let mut session_opt = self.current_session.lock().await;
|
||||
if let Some(session) = session_opt.take() {
|
||||
logger::log_info(" Closing active session...").await;
|
||||
if let Err(e) = session.close().await {
|
||||
logger::log_warn(&format!(" Session close failed: {}", e)).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Abort stderr logging task
|
||||
if let Some(handle) = self.stderr_log.take() {
|
||||
handle.abort();
|
||||
let _ = handle.await;
|
||||
}
|
||||
|
||||
let _ = self.process.start_kill();
|
||||
let _ = self.process.wait().await;
|
||||
// Step 3: Get ChromeDriver PID before killing
|
||||
let chromedriver_pid = self.process.id();
|
||||
|
||||
logger::log_info(&format!(" ChromeDriver PID: {:?}", chromedriver_pid)).await;
|
||||
|
||||
// Step 4: Kill ChromeDriver and wait
|
||||
if let Err(e) = self.process.start_kill() {
|
||||
logger::log_warn(&format!(" Failed to kill ChromeDriver: {}", e)).await;
|
||||
}
|
||||
|
||||
// Wait for ChromeDriver to exit (with timeout)
|
||||
match timeout(Duration::from_secs(5), self.process.wait()).await {
|
||||
Ok(Ok(status)) => {
|
||||
logger::log_info(&format!(" ChromeDriver exited with status: {:?}", status)).await;
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
logger::log_warn(&format!(" Error waiting for ChromeDriver: {}", e)).await;
|
||||
}
|
||||
Err(_) => {
|
||||
logger::log_warn(" ChromeDriver didn't exit within 5s").await;
|
||||
}
|
||||
}
|
||||
|
||||
// Step 5: ✅ CRITICAL FIX: Force-kill Chrome process tree
|
||||
// On Windows, Chrome doesn't die when ChromeDriver dies
|
||||
if let Some(pid) = chromedriver_pid {
|
||||
logger::log_info(&format!(" Force-killing Chrome process tree for PID {}...", pid)).await;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
// Kill entire process tree on Windows
|
||||
let _ = Command::new("taskkill")
|
||||
.args(["/F", "/T", "/PID", &pid.to_string()])
|
||||
.output()
|
||||
.await;
|
||||
|
||||
// Also kill any remaining chrome.exe processes
|
||||
let _ = Command::new("taskkill")
|
||||
.args(["/F", "/IM", "chrome.exe"])
|
||||
.output()
|
||||
.await;
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
// Kill process group on Unix
|
||||
let _ = Command::new("pkill")
|
||||
.args(["-P", &pid.to_string()])
|
||||
.output()
|
||||
.await;
|
||||
}
|
||||
|
||||
logger::log_info(" ✓ Chrome process tree killed").await;
|
||||
}
|
||||
|
||||
// Step 6: Wait a moment for processes to fully terminate
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
|
||||
logger::log_info(&format!("✓ ChromeInstance {} shut down", self.instance_id)).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -869,6 +990,24 @@ impl ChromeInstance {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for ChromeInstance {
|
||||
fn drop(&mut self) {
|
||||
// Signal both ChromeDriver and Chrome to terminate
|
||||
let _ = self.process.start_kill();
|
||||
|
||||
// Also try to kill Chrome if we know the PID
|
||||
if let Some(pid) = self.process.id() {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
// Fire and forget - this is best-effort cleanup
|
||||
let _ = std::process::Command::new("taskkill")
|
||||
.args(["/F", "/T", "/PID", &pid.to_string()])
|
||||
.output();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_chromedriver_address(line: &str) -> Option<String> {
|
||||
if line.contains("Starting ChromeDriver") {
|
||||
if let Some(port_str) = line.split("on port ").nth(1) {
|
||||
@@ -889,14 +1028,6 @@ fn parse_chromedriver_address(line: &str) -> Option<String> {
|
||||
None
|
||||
}
|
||||
|
||||
impl Drop for ChromeInstance {
|
||||
fn drop(&mut self) {
|
||||
// Signal child to terminate. Do NOT block here; shutdown should be
|
||||
// performed with the async `shutdown()` method when possible.
|
||||
let _ = self.process.start_kill();
|
||||
}
|
||||
}
|
||||
|
||||
/// Simplified task execution - uses the pool pattern.
|
||||
pub struct ScrapeTask<T> {
|
||||
url: String,
|
||||
|
||||
Reference in New Issue
Block a user