// src/main.rs use web_scraper::{*, scraper, economic, corporate}; use anyhow::Result; use web_scraper::config::Config; use scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers}; use scraper::webdriver::ChromeDriverPool; use util::directories::DataPaths; use util::{logger, opnv}; use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; use std::process::Command; #[tokio::main] async fn main() -> Result<()> { let output = if cfg!(target_os = "windows") { Command::new("cmd") .args(["/C", "docker desktop start"]) .output() .expect("failed to execute process") } else { Command::new("sh") .arg("-c") .arg("echo hello") .output() .expect("failed to execute process") }; let _start_docker_desktop = output.stdout; cleanup_all_proxy_containers().await.ok(); let config = match Config::load() { Ok(cfg) => cfg, Err(_) => { eprintln!("Using default configuration"); Config::default() } }; let paths = DataPaths::new(".")?; // Initialize monitoring system let config_snapshot = ConfigSnapshot { max_parallel_instances: config.max_parallel_instances, max_tasks_per_instance: config.max_tasks_per_instance, enable_vpn_rotation: config.enable_vpn_rotation, max_requests_per_session: config.max_requests_per_session, min_request_interval_ms: config.min_request_interval_ms, max_retry_attempts: config.max_retry_attempts, }; let (monitoring_handle, _monitoring_task) = init_monitoring( config_snapshot, paths.logs_dir().to_path_buf(), 3030, // Dashboard port ).await?; // Emit pool initialization event monitoring_handle.emit(monitoring::MonitoringEvent::PoolInitialized { pool_size: config.max_parallel_instances, with_proxy: config.enable_vpn_rotation, with_rotation: config.max_tasks_per_instance > 0, }); logger::log_info("Monitoring dashboard available at http://localhost:3030").await; logger::init_debug_logger(paths.logs_dir()).await.ok(); logger::log_info("=== Economic Webscraper Started ===").await; logger::log_info(&format!( "Config → parallel_instances: {}, task_limit: {} vpn_rotation: {} proxy_instances_per_certificate: {:?}", config.max_parallel_instances, config.max_tasks_per_instance, config.enable_vpn_rotation, config.proxy_instances_per_certificate )).await; let number_proxy_instances_per_certificate = config.proxy_instances_per_certificate.unwrap_or(1); // Simple shutdown flag let shutdown_flag = Arc::new(AtomicBool::new(false)); // === Step 1: Fetch VPNBook configs === let proxy_pool: Option> = if config.enable_vpn_rotation { logger::log_info("VPN Rotation Enabled – Fetching latest VPNBook configs").await; let temp_pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(None, &config, Some(monitoring_handle.clone())).await?); let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?; logger::log_info(&format!("VPNBook credentials → User: {}", username)).await; let server_count = std::fs::read_dir(paths.cache_openvpn_dir())? .filter(|e| e.as_ref().unwrap().path().is_dir()) .count(); if server_count == 0 { logger::log_warn("No VPN servers found – continuing without VPN").await; None } else { logger::log_info(&format!("Found {} VPN servers – starting Docker proxy containers", server_count)).await; let pp = Arc::new(DockerVpnProxyPool::new(paths.cache_openvpn_dir(), username, password, number_proxy_instances_per_certificate).await?); logger::log_info(&format!("All {} Docker proxy containers started and ready", pp.num_proxies())).await; for i in 0..pp.num_proxies() { if let Some(proxy_info) = pp.get_proxy_info(i) { monitoring_handle.emit(monitoring::MonitoringEvent::ProxyConnected { container_name: proxy_info.container_name.clone(), ip_address: proxy_info.ip_address.clone(), port: proxy_info.port, }); } } Some(pp) } } else { logger::log_info("VPN rotation disabled – using direct connection").await; None }; // === Step 2: Initialize ChromeDriver pool === let pool_size_limit = config.max_parallel_instances; let task_limit = config.max_tasks_per_instance; logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", pool_size_limit)).await; let pool = Arc::new( if task_limit > 0 { ChromeDriverPool::new_with_proxy_and_task_limit(proxy_pool.clone(), &config, Some(monitoring_handle.clone())).await? } else { ChromeDriverPool::new_with_proxy_and_task_limit(proxy_pool.clone(), &config, Some(monitoring_handle.clone())).await? } ); logger::log_info(&format!("ChromeDriver pool ready with {} instances", pool_size_limit)).await; // === Step 3: Ctrl+C handler === { let shutdown_flag_clone = Arc::clone(&shutdown_flag); let pool_clone = Arc::clone(&pool); let proxy_clone = proxy_pool.clone(); tokio::spawn(async move { tokio::signal::ctrl_c().await.ok(); logger::log_info("Ctrl+C received – shutting down gracefully...").await; // Set flag first shutdown_flag_clone.store(true, Ordering::SeqCst); // Wait a bit for tasks to notice tokio::time::sleep(tokio::time::Duration::from_secs(2)).await; // Cleanup if let Err(e) = (&*pool_clone).shutdown().await { logger::log_error(&format!("Error during pool shutdown: {}", e)).await; } if let Some(pp) = proxy_clone { if let Err(e) = pp.shutdown().await { logger::log_warn(&format!("Failed to stop Docker containers: {}", e)).await; } else { logger::log_info("All Docker VPN containers stopped").await; } } let _ = cleanup_all_proxy_containers().await; std::process::exit(0); }); } // === Step 4: Run scraping jobs === logger::log_info("--- Starting ECONOMIC data update ---").await; //economic::run_full_update(&config, &pool).await?; logger::log_info("Economic update completed").await; if !shutdown_flag.load(Ordering::SeqCst) { logger::log_info("--- Starting CORPORATE data update ---").await; corporate::run_full_update(&config, &pool, &shutdown_flag).await?; logger::log_info("Corporate update completed").await; } // === Step 5: Final cleanup === if !shutdown_flag.load(Ordering::SeqCst) { logger::log_info("Shutting down ChromeDriver pool...").await; pool.shutdown().await?; if let Some(pp) = proxy_pool { logger::log_info("Stopping Docker VPN proxy containers...").await; pp.shutdown().await?; cleanup_all_proxy_containers().await.ok(); } logger::log_info("=== Application finished successfully ===").await; } Ok(()) }