added session detection with requests per task

This commit is contained in:
2025-12-18 14:01:51 +01:00
parent 9c66f0d361
commit c51b36c125

View File

@@ -276,6 +276,11 @@ pub struct ChromeInstance {
task_count: usize, task_count: usize,
max_tasks_per_instance: usize, max_tasks_per_instance: usize,
proxy_url: Option<String>, proxy_url: Option<String>,
// NEU: Session-Management
current_session: Arc<Mutex<Option<Client>>>,
session_request_count: Arc<Mutex<usize>>,
max_requests_per_session: usize, // z.B. 25
} }
impl ChromeInstance { impl ChromeInstance {
@@ -289,17 +294,110 @@ impl ChromeInstance {
task_count: 0, task_count: 0,
max_tasks_per_instance, max_tasks_per_instance,
proxy_url, proxy_url,
// NEU
current_session: Arc::new(Mutex::new(None)),
session_request_count: Arc::new(Mutex::new(0)),
max_requests_per_session: 25, // Konfigurierbar machen!
}) })
} }
pub async fn new_session(&self) -> Result<Client> { pub async fn get_or_renew_session(&self) -> Result<Client> {
let mut session_opt = self.current_session.lock().await;
let mut request_count = self.session_request_count.lock().await;
// Session erneuern wenn:
// 1. Keine Session vorhanden
// 2. Request-Limit erreicht
let needs_renewal = session_opt.is_none() || *request_count >= self.max_requests_per_session;
if needs_renewal {
// Alte Session schließen
if let Some(old_session) = session_opt.take() {
crate::util::logger::log_info("Closing old session").await;
let _ = old_session.close().await;
// Kurze Pause zwischen Sessions
sleep(Duration::from_millis(rand::rng().random_range(500..1000))).await;
}
// Neue Session mit frischem User-Agent erstellen
crate::util::logger::log_info(&format!(
"Creating new session (requests in last session: {})",
*request_count
)).await;
let new_session = self.create_fresh_session().await?;
*session_opt = Some(new_session.clone());
*request_count = 0;
Ok(new_session)
} else {
// Existierende Session verwenden
*request_count += 1;
Ok(session_opt.as_ref().unwrap().clone())
}
}
async fn create_fresh_session(&self) -> Result<Client> {
// WICHTIG: User-Agent hier wählen, nicht in chrome_args()!
let user_agent = Self::chrome_user_agent();
let capabilities = self.chrome_args_with_ua(user_agent);
ClientBuilder::native() ClientBuilder::native()
.capabilities(self.chrome_args()) .capabilities(capabilities)
.connect(&self.base_url) .connect(&self.base_url)
.await .await
.context("Failed to connect to ChromeDriver") .context("Failed to connect to ChromeDriver")
} }
fn chrome_args_with_ua(&self, user_agent: &str) -> Map<String, Value> {
let mut args = vec![
"--headless=new".to_string(),
"--disable-gpu".to_string(),
"--no-sandbox".to_string(),
"--disable-dev-shm-usage".to_string(),
"--disable-infobars".to_string(),
"--disable-extensions".to_string(),
"--disable-popup-blocking".to_string(),
"--disable-notifications".to_string(),
"--disable-autofill".to_string(),
"--disable-sync".to_string(),
"--disable-default-apps".to_string(),
"--disable-translate".to_string(),
"--disable-blink-features=AutomationControlled".to_string(),
// User-Agent als Parameter!
format!("--user-agent={}", user_agent),
];
if let Some(ref proxy) = self.proxy_url {
args.push(format!("--proxy-server={}", proxy));
}
let caps = serde_json::json!({
"goog:chromeOptions": {
"args": args,
"excludeSwitches": ["enable-logging", "enable-automation"],
"prefs": {
"profile.default_content_setting_values.notifications": 2
}
}
});
caps.as_object().cloned().unwrap()
}
pub async fn new_session(&self) -> Result<Client> {
// Für Backward-Compatibility, aber sollte get_or_renew_session() nutzen!
self.create_fresh_session().await
}
pub fn reset_task_count(&mut self) {
self.task_count = 0;
}
pub async fn get_session_stats(&self) -> (usize, usize) {
let request_count = *self.session_request_count.lock().await;
(self.task_count, request_count)
}
pub fn increment_task_count(&mut self) { pub fn increment_task_count(&mut self) {
self.task_count += 1; self.task_count += 1;
} }
@@ -402,7 +500,6 @@ impl ChromeInstance {
caps.as_object().cloned().unwrap() caps.as_object().cloned().unwrap()
} }
pub fn chrome_user_agent() -> &'static str { pub fn chrome_user_agent() -> &'static str {
static UAS: &[&str] = &[ static UAS: &[&str] = &[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.91 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.91 Safari/537.36",