197 lines
7.4 KiB
Rust
197 lines
7.4 KiB
Rust
// src/main.rs
|
||
|
||
use web_scraper::{*, scraper, economic, corporate};
|
||
|
||
use anyhow::Result;
|
||
use web_scraper::config::Config;
|
||
use scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers};
|
||
use scraper::webdriver::ChromeDriverPool;
|
||
use util::directories::DataPaths;
|
||
use util::{logger, opnv};
|
||
use std::sync::Arc;
|
||
use std::sync::atomic::{AtomicBool, Ordering};
|
||
use std::process::Command;
|
||
|
||
#[tokio::main]
|
||
async fn main() -> Result<()> {
|
||
let output = if cfg!(target_os = "windows") {
|
||
Command::new("cmd")
|
||
.args(["/C", "docker desktop start"])
|
||
.output()
|
||
.expect("failed to execute process")
|
||
} else {
|
||
Command::new("sh")
|
||
.arg("-c")
|
||
.arg("echo hello")
|
||
.output()
|
||
.expect("failed to execute process")
|
||
};
|
||
let _start_docker_desktop = output.stdout;
|
||
|
||
cleanup_all_proxy_containers().await.ok();
|
||
|
||
let config = match Config::load() {
|
||
Ok(cfg) => cfg,
|
||
Err(_) => {
|
||
eprintln!("Using default configuration");
|
||
Config::default()
|
||
}
|
||
};
|
||
|
||
let paths = DataPaths::new(".")?;
|
||
|
||
// Initialize monitoring system
|
||
let config_snapshot = ConfigSnapshot {
|
||
max_parallel_instances: config.max_parallel_instances,
|
||
max_tasks_per_instance: config.max_tasks_per_instance,
|
||
enable_vpn_rotation: config.enable_vpn_rotation,
|
||
max_requests_per_session: config.max_requests_per_session,
|
||
min_request_interval_ms: config.min_request_interval_ms,
|
||
max_retry_attempts: config.max_retry_attempts,
|
||
};
|
||
|
||
let (monitoring_handle, _monitoring_task) = init_monitoring(
|
||
config_snapshot,
|
||
paths.logs_dir().to_path_buf(),
|
||
3030, // Dashboard port
|
||
).await?;
|
||
|
||
// Emit pool initialization event
|
||
monitoring_handle.emit(monitoring::MonitoringEvent::PoolInitialized {
|
||
pool_size: config.max_parallel_instances,
|
||
with_proxy: config.enable_vpn_rotation,
|
||
with_rotation: config.max_tasks_per_instance > 0,
|
||
});
|
||
|
||
logger::log_info("Monitoring dashboard available at http://localhost:3030").await;
|
||
|
||
logger::init_debug_logger(paths.logs_dir()).await.ok();
|
||
logger::log_info("=== Economic Webscraper Started ===").await;
|
||
logger::log_info(&format!(
|
||
"Config → parallel_instances: {}, task_limit: {} vpn_rotation: {} proxy_instances_per_certificate: {:?}",
|
||
config.max_parallel_instances,
|
||
config.max_tasks_per_instance,
|
||
config.enable_vpn_rotation,
|
||
config.proxy_instances_per_certificate
|
||
)).await;
|
||
|
||
let number_proxy_instances_per_certificate = config.proxy_instances_per_certificate.unwrap_or(1);
|
||
|
||
|
||
// Simple shutdown flag
|
||
let shutdown_flag = Arc::new(AtomicBool::new(false));
|
||
|
||
// === Step 1: Fetch VPNBook configs ===
|
||
let proxy_pool: Option<Arc<DockerVpnProxyPool>> = if config.enable_vpn_rotation {
|
||
logger::log_info("VPN Rotation Enabled – Fetching latest VPNBook configs").await;
|
||
let temp_pool = Arc::new(ChromeDriverPool::new_with_proxy_and_task_limit(None, &config, Some(monitoring_handle.clone())).await?);
|
||
|
||
let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?;
|
||
logger::log_info(&format!("VPNBook credentials → User: {}", username)).await;
|
||
|
||
let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
|
||
.filter(|e| e.as_ref().unwrap().path().is_dir())
|
||
.count();
|
||
|
||
if server_count == 0 {
|
||
logger::log_warn("No VPN servers found – continuing without VPN").await;
|
||
None
|
||
} else {
|
||
logger::log_info(&format!("Found {} VPN servers – starting Docker proxy containers", server_count)).await;
|
||
let pp = Arc::new(DockerVpnProxyPool::new(paths.cache_openvpn_dir(), username, password, number_proxy_instances_per_certificate).await?);
|
||
|
||
logger::log_info(&format!("All {} Docker proxy containers started and ready", pp.num_proxies())).await;
|
||
for i in 0..pp.num_proxies() {
|
||
if let Some(proxy_info) = pp.get_proxy_info(i) {
|
||
monitoring_handle.emit(monitoring::MonitoringEvent::ProxyConnected {
|
||
container_name: proxy_info.container_name.clone(),
|
||
ip_address: proxy_info.ip_address.clone(),
|
||
port: proxy_info.port,
|
||
});
|
||
}
|
||
}
|
||
|
||
Some(pp)
|
||
}
|
||
} else {
|
||
logger::log_info("VPN rotation disabled – using direct connection").await;
|
||
None
|
||
};
|
||
|
||
// === Step 2: Initialize ChromeDriver pool ===
|
||
let pool_size_limit = config.max_parallel_instances;
|
||
let task_limit = config.max_tasks_per_instance;
|
||
|
||
logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", pool_size_limit)).await;
|
||
|
||
let pool = Arc::new(
|
||
if task_limit > 0 {
|
||
ChromeDriverPool::new_with_proxy_and_task_limit(proxy_pool.clone(), &config, Some(monitoring_handle.clone())).await?
|
||
} else {
|
||
ChromeDriverPool::new_with_proxy_and_task_limit(proxy_pool.clone(), &config, Some(monitoring_handle.clone())).await?
|
||
}
|
||
);
|
||
|
||
logger::log_info(&format!("ChromeDriver pool ready with {} instances", pool_size_limit)).await;
|
||
|
||
// === Step 3: Ctrl+C handler ===
|
||
{
|
||
let shutdown_flag_clone = Arc::clone(&shutdown_flag);
|
||
let pool_clone = Arc::clone(&pool);
|
||
let proxy_clone = proxy_pool.clone();
|
||
|
||
tokio::spawn(async move {
|
||
tokio::signal::ctrl_c().await.ok();
|
||
logger::log_info("Ctrl+C received – shutting down gracefully...").await;
|
||
|
||
// Set flag first
|
||
shutdown_flag_clone.store(true, Ordering::SeqCst);
|
||
|
||
// Wait a bit for tasks to notice
|
||
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
|
||
|
||
// Cleanup
|
||
if let Err(e) = (&*pool_clone).shutdown().await {
|
||
logger::log_error(&format!("Error during pool shutdown: {}", e)).await;
|
||
}
|
||
|
||
if let Some(pp) = proxy_clone {
|
||
if let Err(e) = pp.shutdown().await {
|
||
logger::log_warn(&format!("Failed to stop Docker containers: {}", e)).await;
|
||
} else {
|
||
logger::log_info("All Docker VPN containers stopped").await;
|
||
}
|
||
}
|
||
|
||
let _ = cleanup_all_proxy_containers().await;
|
||
std::process::exit(0);
|
||
});
|
||
}
|
||
|
||
// === Step 4: Run scraping jobs ===
|
||
logger::log_info("--- Starting ECONOMIC data update ---").await;
|
||
//economic::run_full_update(&config, &pool).await?;
|
||
logger::log_info("Economic update completed").await;
|
||
|
||
if !shutdown_flag.load(Ordering::SeqCst) {
|
||
logger::log_info("--- Starting CORPORATE data update ---").await;
|
||
corporate::run_full_update(&config, &pool, &shutdown_flag).await?;
|
||
logger::log_info("Corporate update completed").await;
|
||
}
|
||
|
||
// === Step 5: Final cleanup ===
|
||
if !shutdown_flag.load(Ordering::SeqCst) {
|
||
logger::log_info("Shutting down ChromeDriver pool...").await;
|
||
pool.shutdown().await?;
|
||
|
||
if let Some(pp) = proxy_pool {
|
||
logger::log_info("Stopping Docker VPN proxy containers...").await;
|
||
pp.shutdown().await?;
|
||
cleanup_all_proxy_containers().await.ok();
|
||
}
|
||
|
||
logger::log_info("=== Application finished successfully ===").await;
|
||
}
|
||
|
||
Ok(())
|
||
} |