implement vpn pool
This commit is contained in:
@@ -10,10 +10,6 @@ pub mod util;
|
||||
// Re-export commonly used types for convenience
|
||||
pub use config::Config;
|
||||
pub use scraper::webdriver::{ChromeDriverPool, ChromeInstance, ScrapeTask};
|
||||
pub use scraper::vpn_manager::{VpnInstance, VpnPool};
|
||||
pub use util::directories::DataPaths;
|
||||
pub use util::logger;
|
||||
pub use util::opnv;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub use scraper::forcebindip::ForceBindIpManager;
|
||||
pub use util::opnv;
|
||||
254
src/main.rs
254
src/main.rs
@@ -1,4 +1,5 @@
|
||||
// src/main.rs
|
||||
|
||||
mod config;
|
||||
mod corporate;
|
||||
mod economic;
|
||||
@@ -7,163 +8,162 @@ mod scraper;
|
||||
|
||||
use anyhow::Result;
|
||||
use config::Config;
|
||||
use scraper::docker_vpn_proxy::{DockerVpnProxyPool, cleanup_all_proxy_containers};
|
||||
use scraper::webdriver::ChromeDriverPool;
|
||||
use scraper::vpn_manager::VpnPool;
|
||||
use util::directories::DataPaths;
|
||||
use util::{logger, opnv};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// The entry point of the application.
|
||||
///
|
||||
/// This function loads the configuration, optionally initializes a VPN pool,
|
||||
/// initializes a shared ChromeDriver pool bound to the VPN pool (if enabled),
|
||||
/// and sequentially runs the full updates for corporate and economic data.
|
||||
///
|
||||
/// If VPN rotation is enabled:
|
||||
/// 1. Fetches latest VPNBook OpenVPN configurations
|
||||
/// 2. Creates a VPN pool and connects all VPN instances
|
||||
/// 3. Binds each ChromeDriver instance to a different VPN for IP rotation
|
||||
/// 4. Performs periodic health checks to reconnect unhealthy VPN instances
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if configuration loading fails, pool initialization fails,
|
||||
/// VPN fetching fails (if enabled), or if either update function encounters an issue
|
||||
/// (e.g., network errors, scraping failures, or chromedriver spawn failures).
|
||||
/// Application entry point
|
||||
// src/main.rs
|
||||
|
||||
// ... existing imports ...
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
cleanup_all_proxy_containers().await.ok();
|
||||
|
||||
// Load configuration from .env
|
||||
let config = Config::load().map_err(|err| {
|
||||
println!("Failed to load Config .env: {}", err);
|
||||
eprintln!("Failed to load config: {}", err);
|
||||
err
|
||||
})?;
|
||||
|
||||
// Initialize paths
|
||||
// Initialize paths and logger
|
||||
let paths = DataPaths::new(".")?;
|
||||
|
||||
// Initialize logger
|
||||
logger::init_debug_logger(paths.logs_dir()).await.map_err(|e| {
|
||||
anyhow::anyhow!("Logger initialization failed: {}", e)
|
||||
})?;
|
||||
|
||||
logger::log_info("=== Application started ===").await;
|
||||
logger::log_info(&format!("Config: economic_start_date={}, corporate_start_date={}, lookahead_months={}, max_parallel_instances={}, enable_vpn_rotation={}, max_tasks_per_instance={}",
|
||||
config.economic_start_date, config.corporate_start_date, config.economic_lookahead_months, config.max_parallel_instances, config.enable_vpn_rotation, config.max_tasks_per_instance)).await;
|
||||
|
||||
// Initialize VPN pool if enabled
|
||||
let vpn_pool = if config.enable_vpn_rotation {
|
||||
logger::log_info("=== VPN Rotation Enabled ===").await;
|
||||
logger::log_info("--- Fetching latest VPNBook OpenVPN configurations ---").await;
|
||||
|
||||
let (username, password, _files) =
|
||||
util::opnv::fetch_vpnbook_configs(&Arc::new(ChromeDriverPool::new(1).await?), paths.cache_dir()).await?;
|
||||
|
||||
let amount_of_openvpn_servers = _files.len();
|
||||
|
||||
logger::log_info(&format!("✓ Fetched VPN credentials - Username: {}", username)).await;
|
||||
|
||||
// Create VPN pool
|
||||
let openvpn_dir = paths.cache_dir().join("openvpn");
|
||||
logger::log_info("--- Initializing VPN Pool ---").await;
|
||||
let vp = Arc::new(VpnPool::new(
|
||||
&openvpn_dir,
|
||||
username,
|
||||
password,
|
||||
true, // enable rotation
|
||||
config.tasks_per_vpn_session,
|
||||
amount_of_openvpn_servers,
|
||||
).await?);
|
||||
|
||||
// Connect all VPN instances (gracefully handles failures)
|
||||
logger::log_info("--- Connecting to VPN servers ---").await;
|
||||
match vp.connect_all().await {
|
||||
Ok(()) => {
|
||||
logger::log_info("✓ VPN initialization complete").await;
|
||||
Some(vp)
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_warn(&format!(
|
||||
"⚠ VPN initialization failed: {}. Continuing without VPN.",
|
||||
e
|
||||
)).await;
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Initialize the shared ChromeDriver pool with VPN pool
|
||||
let pool_size = config.max_parallel_instances;
|
||||
let max_tasks_per_instance = config.max_tasks_per_instance;
|
||||
|
||||
logger::init_debug_logger(paths.logs_dir()).await.ok();
|
||||
logger::log_info("=== Event Backtest Engine Started ===").await;
|
||||
logger::log_info(&format!(
|
||||
"Initializing ChromeDriver pool with size: {}{}",
|
||||
pool_size,
|
||||
if max_tasks_per_instance > 0 { &format!(" (max {} tasks/instance)", max_tasks_per_instance) } else { "" }
|
||||
"Config → parallel_instances: {}, task_limit: {} vpn_rotation: {}",
|
||||
config.max_parallel_instances,
|
||||
config.max_tasks_per_instance,
|
||||
config.enable_vpn_rotation
|
||||
)).await;
|
||||
|
||||
let pool = Arc::new(
|
||||
if max_tasks_per_instance > 0 {
|
||||
ChromeDriverPool::new_with_vpn_and_task_limit(pool_size, vpn_pool.clone(), max_tasks_per_instance).await?
|
||||
} else if vpn_pool.is_some() {
|
||||
ChromeDriverPool::new_with_vpn(pool_size, vpn_pool.clone()).await?
|
||||
|
||||
// === Step 1: Fetch fresh VPNBook credentials and .ovpn files (if rotation enabled) ===
|
||||
let proxy_pool: Option<Arc<DockerVpnProxyPool>> = if config.enable_vpn_rotation {
|
||||
logger::log_info("VPN Rotation Enabled — Fetching latest VPNBook configs").await;
|
||||
|
||||
// We only need 1 Chrome instance to scrape vpnbook.com (no proxy yet)
|
||||
let temp_pool = Arc::new(ChromeDriverPool::new(1).await?);
|
||||
let (username, password, _files) = opnv::fetch_vpnbook_configs(&temp_pool, paths.cache_dir()).await?;
|
||||
|
||||
logger::log_info(&format!("VPNBook credentials → User: {}", username)).await;
|
||||
|
||||
// Count how many distinct servers (subfolders) we have in cache/openvpn/
|
||||
let server_count = std::fs::read_dir(paths.cache_openvpn_dir())?
|
||||
.filter(|e| e.as_ref().unwrap().path().is_dir())
|
||||
.count();
|
||||
|
||||
if server_count == 0 {
|
||||
logger::log_warn("No VPN servers found — continuing without VPN").await;
|
||||
None
|
||||
} else {
|
||||
ChromeDriverPool::new(pool_size).await?
|
||||
}
|
||||
);
|
||||
|
||||
logger::log_info("✓ ChromeDriver pool initialized successfully").await;
|
||||
logger::log_info(&format!("Found {} VPN servers — starting Docker proxy containers", server_count)).await;
|
||||
|
||||
// Spawn background Ctrl-C handler to gracefully shutdown pool and VPNs
|
||||
{
|
||||
let pool_for_signal = Arc::clone(&pool);
|
||||
let vpn_for_signal = vpn_pool.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = tokio::signal::ctrl_c().await {
|
||||
let _ = util::logger::log_error(&format!("Ctrl-C handler failed to install: {}", e)).await;
|
||||
return;
|
||||
}
|
||||
let pp = Arc::new(
|
||||
DockerVpnProxyPool::new(paths.cache_openvpn_dir(), username, password).await?
|
||||
);
|
||||
|
||||
let _ = util::logger::log_info("Ctrl-C received — initiating graceful shutdown").await;
|
||||
|
||||
if let Err(e) = pool_for_signal.shutdown().await {
|
||||
let _ = util::logger::log_warn(&format!("Error shutting down ChromeDriver pool: {}", e)).await;
|
||||
}
|
||||
|
||||
if let Some(vp) = vpn_for_signal {
|
||||
if let Err(e) = vp.disconnect_all().await {
|
||||
let _ = util::logger::log_warn(&format!("Error disconnecting VPNs: {}", e)).await;
|
||||
// Verify all proxies are working before proceeding
|
||||
logger::log_info("Verifying all proxy connections...").await;
|
||||
let mut all_working = true;
|
||||
for i in 0..pp.num_proxies() {
|
||||
match pp.test_proxy_connection(i).await {
|
||||
Ok(ip) => {
|
||||
logger::log_info(&format!(" Proxy {}: working with IP: {}", i + 1, ip)).await;
|
||||
}
|
||||
Err(e) => {
|
||||
logger::log_error(&format!(" Proxy {}: FAILED - {}", i + 1, e)).await;
|
||||
all_working = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let _ = util::logger::log_info("Graceful shutdown complete (from Ctrl-C)").await;
|
||||
// Exit the process now that cleanup is done
|
||||
if !all_working {
|
||||
logger::log_warn("Some proxies failed, but continuing with working ones...").await;
|
||||
} else {
|
||||
logger::log_info("All proxies verified and ready!").await;
|
||||
}
|
||||
|
||||
logger::log_info(&format!("All {} Docker proxy containers started and ready", pp.num_proxies())).await;
|
||||
Some(pp)
|
||||
}
|
||||
} else {
|
||||
logger::log_info("VPN rotation disabled — using direct connection").await;
|
||||
None
|
||||
};
|
||||
|
||||
// === Step 2: Initialize the main ChromeDriver pool (with proxy if enabled) ===
|
||||
let pool_size = config.max_parallel_instances;
|
||||
let task_limit = config.max_tasks_per_instance;
|
||||
|
||||
logger::log_info(&format!("Creating ChromeDriver pool with {} instances...", pool_size)).await;
|
||||
|
||||
let pool = Arc::new(
|
||||
if task_limit > 0 {
|
||||
ChromeDriverPool::new_with_proxy_and_task_limit(pool_size, proxy_pool.clone(), task_limit).await?
|
||||
} else {
|
||||
ChromeDriverPool::new_with_proxy(pool_size, proxy_pool.clone()).await?
|
||||
}
|
||||
);
|
||||
|
||||
logger::log_info(&format!("ChromeDriver pool ready with {} instances", pool_size)).await;
|
||||
|
||||
// === Step 3: Graceful Ctrl+C handler ===
|
||||
{
|
||||
let pool_clone = Arc::clone(&pool);
|
||||
let proxy_clone = proxy_pool.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
tokio::signal::ctrl_c().await.ok();
|
||||
|
||||
logger::log_info("Ctrl+C received — shutting down gracefully...").await;
|
||||
|
||||
// Now works: &*pool_clone derefs Arc → &ChromeDriverPool
|
||||
if let Err(e) = (&*pool_clone).shutdown().await {
|
||||
logger::log_error(&format!("Error during pool shutdown: {}", e)).await;
|
||||
}
|
||||
|
||||
if let Some(pp) = proxy_clone {
|
||||
if let Err(e) = pp.shutdown().await {
|
||||
logger::log_warn(&format!("Failed to stop Docker containers: {}", e)).await;
|
||||
} else {
|
||||
logger::log_info("All Docker VPN containers stopped").await;
|
||||
}
|
||||
}
|
||||
|
||||
let _ = cleanup_all_proxy_containers().await;
|
||||
|
||||
std::process::exit(0);
|
||||
});
|
||||
}
|
||||
|
||||
// Run economic update first, passing the shared pool
|
||||
logger::log_info("--- Starting economic data update ---").await;
|
||||
// === Step 4: Run the actual scraping jobs ===
|
||||
logger::log_info("--- Starting ECONOMIC data update ---").await;
|
||||
economic::run_full_update(&config, &pool).await?;
|
||||
logger::log_info("✓ Economic data update completed").await;
|
||||
logger::log_info("Economic update completed").await;
|
||||
|
||||
// Then run corporate update, passing the shared pool
|
||||
logger::log_info("--- Starting corporate data update ---").await;
|
||||
logger::log_info("--- Starting CORPORATE data update ---").await;
|
||||
corporate::run_full_update(&config, &pool).await?;
|
||||
logger::log_info("✓ Corporate data update completed").await;
|
||||
logger::log_info("Corporate update completed").await;
|
||||
|
||||
// Shutdown ChromeDriver pool before disconnecting VPNs so instances can
|
||||
// cleanly terminate any network-bound processes.
|
||||
logger::log_info("--- Shutting down ChromeDriver pool ---").await;
|
||||
// === Step 5: Final cleanup ===
|
||||
logger::log_info("Shutting down ChromeDriver pool...").await;
|
||||
pool.shutdown().await?;
|
||||
logger::log_info("✓ ChromeDriver pool shutdown complete").await;
|
||||
|
||||
// Disconnect all VPN instances if enabled
|
||||
if let Some(vp) = vpn_pool {
|
||||
logger::log_info("--- Disconnecting VPN instances ---").await;
|
||||
vp.disconnect_all().await?;
|
||||
if let Some(pp) = proxy_pool {
|
||||
logger::log_info("Stopping Docker VPN proxy containers...").await;
|
||||
pp.shutdown().await?;
|
||||
// CLEANUP ANY LEFTOVER CONTAINERS FROM PREVIOUS RUNS
|
||||
cleanup_all_proxy_containers().await.ok();
|
||||
}
|
||||
|
||||
logger::log_info("=== Application completed successfully ===").await;
|
||||
logger::log_info("=== Application finished successfully ===").await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
memory allocation of 4294967296 bytes failed
|
||||
error: process didn't exit successfully: `target\debug\event_backtest_engine.exe` (exit code: 0xc0000409, STATUS_STACK_BUFFER_OVERRUN)
|
||||
*/
|
||||
@@ -1,7 +0,0 @@
|
||||
# Als Administrator ausführen
|
||||
cd "C:\Program Files\OpenVPN\bin"
|
||||
|
||||
# 10 TAP-Adapter hinzufügen
|
||||
for ($i=2; $i -le 10; $i++) {
|
||||
.\tapctl.exe create --name "OpenVPN-TAP-$i"
|
||||
}
|
||||
407
src/scraper/docker_vpn_proxy.rs
Normal file
407
src/scraper/docker_vpn_proxy.rs
Normal file
@@ -0,0 +1,407 @@
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use futures::future::join_all;
|
||||
use std::{path::{Path, PathBuf}, time::Duration};
|
||||
use tokio::{process::Command, time::{sleep}};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub struct DockerVpnProxyPool {
|
||||
container_names: Vec<String>,
|
||||
proxy_ports: Vec<u16>, // e.g., [10801, 10802, ...]
|
||||
}
|
||||
|
||||
impl DockerVpnProxyPool {
|
||||
pub async fn new(ovpn_dir: &Path, username: String, password: String) -> Result<Self> {
|
||||
// Count hostnames (subdirs in ovpn_dir)
|
||||
let hostnames: Vec<_> = std::fs::read_dir(ovpn_dir)?
|
||||
.filter_map(Result::ok)
|
||||
.filter(|e| e.path().is_dir())
|
||||
.map(|e| e.file_name().into_string().unwrap())
|
||||
.collect();
|
||||
|
||||
let num_servers = hostnames.len();
|
||||
if num_servers == 0 {
|
||||
return Err(anyhow!("No VPN hostnames found in {:?}", ovpn_dir));
|
||||
}
|
||||
|
||||
crate::util::logger::log_info(&format!("Found {} VPN hostnames", num_servers)).await;
|
||||
|
||||
let mut container_names = Vec::with_capacity(num_servers);
|
||||
let mut proxy_ports = Vec::with_capacity(num_servers);
|
||||
let base_port: u16 = 10800;
|
||||
|
||||
// === STEP 1: Start ALL containers first ===
|
||||
for (i, hostname) in hostnames.iter().enumerate() {
|
||||
// Pick tcp443.ovpn if exists, else first .ovpn
|
||||
let hostname_dir = ovpn_dir.join(hostname);
|
||||
let mut ovpn_path: Option<PathBuf> = None;
|
||||
for entry in WalkDir::new(&hostname_dir).max_depth(1) {
|
||||
let entry = entry?;
|
||||
if entry.path().extension().map_or(false, |ext| ext == "ovpn") {
|
||||
if entry.file_name().to_str().unwrap_or("").contains("tcp443") {
|
||||
ovpn_path = Some(entry.path().to_path_buf());
|
||||
break;
|
||||
} else if ovpn_path.is_none() {
|
||||
ovpn_path = Some(entry.path().to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let ovpn_path = ovpn_path.ok_or_else(|| anyhow!("No .ovpn found for {}", hostname))?;
|
||||
|
||||
let name = format!("vpn-proxy-{}", i);
|
||||
let port = base_port + i as u16 + 1;
|
||||
|
||||
// Clean up any existing container with the same name
|
||||
let _ = Command::new("docker")
|
||||
.args(["rm", "-f", &name])
|
||||
.status()
|
||||
.await;
|
||||
|
||||
// Run Docker container
|
||||
let status = Command::new("docker")
|
||||
.args([
|
||||
"run", "-d",
|
||||
"--name", &name,
|
||||
"--cap-add=NET_ADMIN",
|
||||
"--device", "/dev/net/tun",
|
||||
"--sysctl", "net.ipv4.ip_forward=1",
|
||||
"-v", &format!("{}:/vpn/config.ovpn", ovpn_path.display()),
|
||||
"-e", &format!("VPN_USERNAME={}", username),
|
||||
"-e", &format!("VPN_PASSWORD={}", password),
|
||||
"-p", &format!("{}:1080", port),
|
||||
"rust-vpn-proxy",
|
||||
])
|
||||
.status()
|
||||
.await
|
||||
.context("Failed to run Docker")?;
|
||||
|
||||
if !status.success() {
|
||||
return Err(anyhow!("Docker run failed for {}", name));
|
||||
}
|
||||
|
||||
crate::util::logger::log_info(&format!("Started container {} on port {} (waiting for VPN...)", name, port)).await;
|
||||
|
||||
container_names.push(name);
|
||||
proxy_ports.push(port);
|
||||
}
|
||||
|
||||
// Brief pause to let containers start
|
||||
sleep(Duration::from_secs(8)).await;
|
||||
crate::util::logger::log_info(&format!("All {} containers started, beginning health checks...", container_names.len())).await;
|
||||
|
||||
// === STEP 2: Test ALL proxies in parallel with 10-second intervals ===
|
||||
let results = Self::test_all_proxies_parallel(&container_names, &proxy_ports).await;
|
||||
|
||||
// Filter out failed containers
|
||||
let mut working_containers = Vec::new();
|
||||
let mut working_ports = Vec::new();
|
||||
let mut failed_count = 0;
|
||||
|
||||
for (i, (container_name, port)) in container_names.into_iter().zip(proxy_ports.into_iter()).enumerate() {
|
||||
match &results[i] {
|
||||
Ok(Some(ip)) => {
|
||||
crate::util::logger::log_info(&format!("✓ Container {} on port {} ready with IP: {}",
|
||||
container_name, port, ip)).await;
|
||||
working_containers.push(container_name);
|
||||
working_ports.push(port);
|
||||
}
|
||||
Ok(None) => {
|
||||
crate::util::logger::log_warn(&format!("✓ Container {} on port {} ready but IP detection failed",
|
||||
container_name, port)).await;
|
||||
working_containers.push(container_name);
|
||||
working_ports.push(port);
|
||||
}
|
||||
Err(e) => {
|
||||
// Get container logs to debug
|
||||
let logs = Command::new("docker")
|
||||
.args(["logs", "--tail", "20", &container_name])
|
||||
.output()
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|output| String::from_utf8_lossy(&output.stdout).to_string().into());
|
||||
|
||||
crate::util::logger::log_error(&format!("✗ Container {} on port {} failed: {}. Logs: {:?}",
|
||||
container_name, port, e, logs)).await;
|
||||
failed_count += 1;
|
||||
// Clean up failed container
|
||||
let _ = Self::cleanup_container(&container_name).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if working_containers.is_empty() {
|
||||
return Err(anyhow!("All {} VPN proxy containers failed to start", num_servers));
|
||||
}
|
||||
|
||||
crate::util::logger::log_info(&format!("Started {}/{} VPN proxy containers successfully",
|
||||
working_containers.len(), num_servers)).await;
|
||||
|
||||
if failed_count > 0 {
|
||||
crate::util::logger::log_warn(&format!("{} containers failed and were cleaned up", failed_count)).await;
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
container_names: working_containers,
|
||||
proxy_ports: working_ports,
|
||||
})
|
||||
}
|
||||
|
||||
/// Test all proxies in parallel with 10-second intervals between tests
|
||||
async fn test_all_proxies_parallel(container_names: &[String], proxy_ports: &[u16]) -> Vec<Result<Option<String>>> {
|
||||
let mut tasks = Vec::new();
|
||||
|
||||
for (i, (container_name, port)) in container_names.iter().zip(proxy_ports.iter()).enumerate() {
|
||||
let name = container_name.clone();
|
||||
let port = *port;
|
||||
|
||||
tasks.push(tokio::spawn(async move {
|
||||
// Try up to 6 times with 10-second intervals (total 60 seconds)
|
||||
for attempt in 1..=6 {
|
||||
crate::util::logger::log_info(&format!("Testing proxy {} (port {}) - Attempt {}/6",
|
||||
name, port, attempt)).await;
|
||||
|
||||
match Self::test_single_proxy(port).await {
|
||||
Ok(Some(ip)) => {
|
||||
return Ok(Some(ip));
|
||||
}
|
||||
Ok(None) => {
|
||||
// Connection works but IP detection failed
|
||||
return Ok(None);
|
||||
}
|
||||
Err(e) if attempt < 6 => {
|
||||
crate::util::logger::log_info(&format!("Attempt {}/6 for {}: {} - retrying in 10s",
|
||||
attempt, name, e)).await;
|
||||
sleep(Duration::from_secs(10)).await;
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow!("Failed after 6 attempts: {}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(anyhow!("Unexpected exit from retry loop"))
|
||||
}));
|
||||
}
|
||||
|
||||
// Wait for all tasks to complete
|
||||
join_all(tasks)
|
||||
.await
|
||||
.into_iter()
|
||||
.map(|result| match result {
|
||||
Ok(inner) => inner,
|
||||
Err(e) => Err(anyhow!("Task panicked: {}", e)),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Test a single proxy connection
|
||||
async fn test_single_proxy(port: u16) -> Result<Option<String>> {
|
||||
use std::io::{Read, Write};
|
||||
use std::net::TcpStream;
|
||||
use std::time::Duration as StdDuration;
|
||||
|
||||
// First, test SOCKS5 handshake directly
|
||||
crate::util::logger::log_info(&format!("Testing SOCKS5 handshake on port {}...", port)).await;
|
||||
|
||||
// Use spawn_blocking for synchronous I/O
|
||||
let test_result = tokio::task::spawn_blocking(move || {
|
||||
// Connect to SOCKS5 proxy
|
||||
let mut stream = match TcpStream::connect_timeout(
|
||||
&format!("127.0.0.1:{}", port).parse().unwrap(),
|
||||
StdDuration::from_secs(5)
|
||||
) {
|
||||
Ok(stream) => stream,
|
||||
Err(e) => return Err(anyhow!("Failed to connect: {}", e)),
|
||||
};
|
||||
|
||||
// Send SOCKS5 greeting: version 5, 1 method (no auth)
|
||||
let greeting: [u8; 3] = [0x05, 0x01, 0x00]; // SOCKS5, 1 method, no auth
|
||||
if let Err(e) = stream.write_all(&greeting) {
|
||||
return Err(anyhow!("Failed to send greeting: {}", e));
|
||||
}
|
||||
|
||||
// Read response
|
||||
let mut response = [0u8; 2];
|
||||
if let Err(e) = stream.read_exact(&mut response) {
|
||||
return Err(anyhow!("Failed to read response: {}", e));
|
||||
}
|
||||
|
||||
// Check response: should be [0x05, 0x00] for no auth required
|
||||
if response[0] != 0x05 || response[1] != 0x00 {
|
||||
return Err(anyhow!("Unexpected SOCKS5 response: {:?}", response));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}).await;
|
||||
|
||||
match test_result {
|
||||
Ok(Ok(())) => {
|
||||
crate::util::logger::log_info(&format!("✓ SOCKS5 proxy on port {} accepts connections", port)).await;
|
||||
|
||||
// Try to get IP through proxy using curl (fallback method)
|
||||
let curl_result = tokio::process::Command::new("curl")
|
||||
.args([
|
||||
"-s",
|
||||
"--socks5", &format!("localhost:{}", port),
|
||||
"--max-time", "10",
|
||||
"https://checkip.amazonaws.com"
|
||||
])
|
||||
.output()
|
||||
.await;
|
||||
|
||||
match curl_result {
|
||||
Ok(output) if output.status.success() => {
|
||||
let ip = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
if Self::is_valid_ip(&ip) {
|
||||
crate::util::logger::log_info(&format!("✓ Got IP via proxy: {}", ip)).await;
|
||||
return Ok(Some(ip));
|
||||
} else {
|
||||
crate::util::logger::log_info(&format!("✓ Proxy works, invalid IP format: {}", ip)).await;
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Proxy accepts connections but curl failed - still acceptable
|
||||
crate::util::logger::log_info(&format!("✓ Proxy accepts connections (curl test failed)")).await;
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
return Err(anyhow!("SOCKS5 test failed: {}", e));
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(anyhow!("Task failed: {}", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Clean up a failed container
|
||||
async fn cleanup_container(container_name: &str) -> Result<()> {
|
||||
let _ = Command::new("docker")
|
||||
.args(["stop", container_name])
|
||||
.status()
|
||||
.await;
|
||||
|
||||
let _ = Command::new("docker")
|
||||
.args(["rm", container_name])
|
||||
.status()
|
||||
.await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn is_valid_ip(ip: &str) -> bool {
|
||||
let parts: Vec<&str> = ip.split('.').collect();
|
||||
if parts.len() != 4 {
|
||||
return false;
|
||||
}
|
||||
|
||||
for part in parts {
|
||||
if let Ok(num) = part.parse::<u8>() {
|
||||
if part != num.to_string() {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Test if a specific proxy is working
|
||||
pub async fn test_proxy_connection(&self, index: usize) -> Result<String> {
|
||||
let port = self.proxy_ports[index];
|
||||
let proxy_url = format!("socks5://localhost:{}", port);
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.proxy(reqwest::Proxy::all(&proxy_url)?)
|
||||
.timeout(Duration::from_secs(10))
|
||||
.build()?;
|
||||
|
||||
let response = client.get("http://checkip.amazonaws.com")
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
Ok(response.trim().to_string())
|
||||
}
|
||||
|
||||
pub fn get_proxy_url(&self, index: usize) -> String {
|
||||
let port = self.proxy_ports[index % self.proxy_ports.len()];
|
||||
format!("socks5://localhost:{}", port)
|
||||
}
|
||||
|
||||
pub fn num_proxies(&self) -> usize {
|
||||
self.proxy_ports.len()
|
||||
}
|
||||
|
||||
pub async fn shutdown(&self) -> Result<()> {
|
||||
crate::util::logger::log_info(&format!("Shutting down {} Docker proxy containers...",
|
||||
self.container_names.len())).await;
|
||||
|
||||
for name in &self.container_names {
|
||||
let _ = Command::new("docker")
|
||||
.args(["stop", name])
|
||||
.status()
|
||||
.await;
|
||||
let _ = Command::new("docker")
|
||||
.args(["rm", name])
|
||||
.status()
|
||||
.await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn cleanup_all_proxy_containers() -> Result<()> {
|
||||
// Step 1: List all container IDs that match our pattern
|
||||
let output = Command::new("docker")
|
||||
.args(["ps", "-a", "--format", "{{.ID}} {{.Names}} {{.Image}}"])
|
||||
.output()
|
||||
.await?;
|
||||
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
|
||||
let mut containers_to_kill = Vec::new();
|
||||
|
||||
for line in stdout.lines() {
|
||||
let parts: Vec<&str> = line.split_whitespace().collect();
|
||||
if parts.len() >= 2 {
|
||||
let name_or_id = parts[0];
|
||||
let name = parts[1];
|
||||
let image = if parts.len() >= 3 { parts[2] } else { "" };
|
||||
|
||||
// Match by name prefix OR by image name
|
||||
if name.starts_with("vpn-proxy-") || image.contains("rust-vpn-proxy") {
|
||||
containers_to_kill.push(name_or_id.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if containers_to_kill.is_empty() {
|
||||
crate::util::logger::log_info("No old rust-vpn-proxy containers found").await;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Step 2: Kill and remove them all at once
|
||||
let status = Command::new("docker")
|
||||
.arg("rm")
|
||||
.arg("-f")
|
||||
.args(&containers_to_kill)
|
||||
.status()
|
||||
.await?;
|
||||
|
||||
if status.success() {
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Successfully removed {} old rust-vpn-proxy container(s)",
|
||||
containers_to_kill.len()
|
||||
))
|
||||
.await;
|
||||
} else {
|
||||
crate::util::logger::log_warn("Some containers may still remain (non-critical)").await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,163 +0,0 @@
|
||||
// src/scraper/forcebindip.rs
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
|
||||
/// Manages ForceBindIP integration for binding processes to specific IP addresses
|
||||
pub struct ForceBindIpManager {
|
||||
forcebindip_path: PathBuf,
|
||||
}
|
||||
|
||||
impl ForceBindIpManager {
|
||||
/// Creates a new ForceBindIP manager
|
||||
///
|
||||
/// On Windows, looks for ForceBindIP.exe in common locations or PATH
|
||||
/// On other platforms, returns an error as ForceBindIP is Windows-only
|
||||
pub fn new() -> Result<Self> {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
let possible_paths = vec![
|
||||
PathBuf::from("ForceBindIP.exe"),
|
||||
PathBuf::from("tools/ForceBindIP.exe"),
|
||||
PathBuf::from("C:/Program Files/ForceBindIP/ForceBindIP.exe"),
|
||||
PathBuf::from("C:/Program Files (x86)/ForceBindIP/ForceBindIP.exe"),
|
||||
];
|
||||
|
||||
for path in possible_paths {
|
||||
if path.exists() {
|
||||
return Ok(Self {
|
||||
forcebindip_path: path,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Try to find in PATH
|
||||
if let Ok(output) = Command::new("where").arg("ForceBindIP.exe").output() {
|
||||
if output.status.success() {
|
||||
let path_str = String::from_utf8_lossy(&output.stdout);
|
||||
let path = PathBuf::from(path_str.trim());
|
||||
if path.exists() {
|
||||
return Ok(Self {
|
||||
forcebindip_path: path,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!(
|
||||
"ForceBindIP.exe not found. Please download from http://r1ch.net/projects/forcebindip \
|
||||
and place it in the project directory or add to PATH"
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
Err(anyhow!(
|
||||
"ForceBindIP is only available on Windows. For Linux/macOS, consider using \
|
||||
network namespaces or other routing mechanisms"
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a command that will run the given program bound to the specified IP
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `bind_ip` - The IP address to bind to
|
||||
/// * `program` - Path to the program to execute
|
||||
/// * `args` - Arguments to pass to the program
|
||||
///
|
||||
/// # Returns
|
||||
/// A configured Command ready to be spawned
|
||||
pub fn create_bound_command(
|
||||
&self,
|
||||
bind_ip: &str,
|
||||
program: &Path,
|
||||
args: &[&str],
|
||||
) -> Command {
|
||||
let mut cmd = Command::new(&self.forcebindip_path);
|
||||
|
||||
// ForceBindIP syntax: ForceBindIP.exe [IP] [program] [args...]
|
||||
cmd.arg(bind_ip)
|
||||
.arg(program);
|
||||
|
||||
for arg in args {
|
||||
cmd.arg(arg);
|
||||
}
|
||||
|
||||
cmd
|
||||
}
|
||||
|
||||
/// Verifies that ForceBindIP is working by testing with a simple command
|
||||
pub async fn verify_installation(&self) -> Result<()> {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
// Test by running a simple command
|
||||
let output = Command::new(&self.forcebindip_path)
|
||||
.arg("0.0.0.0")
|
||||
.arg("cmd.exe")
|
||||
.arg("/c")
|
||||
.arg("echo test")
|
||||
.output()
|
||||
.context("Failed to execute ForceBindIP verification")?;
|
||||
|
||||
if !output.status.success() {
|
||||
return Err(anyhow!(
|
||||
"ForceBindIP verification failed. stderr: {}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
{
|
||||
Err(anyhow!("ForceBindIP verification not available on non-Windows platforms"))
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the path to the ForceBindIP executable
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.forcebindip_path
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "windows")]
|
||||
fn test_forcebindip_manager_creation() {
|
||||
// This test will only pass if ForceBindIP is actually installed
|
||||
// In CI/CD, you might want to skip this or mock it
|
||||
match ForceBindIpManager::new() {
|
||||
Ok(manager) => {
|
||||
println!("ForceBindIP found at: {:?}", manager.path());
|
||||
}
|
||||
Err(e) => {
|
||||
println!("ForceBindIP not found (expected in dev environments): {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_command_creation() {
|
||||
#[cfg(target_os = "windows")]
|
||||
{
|
||||
if let Ok(manager) = ForceBindIpManager::new() {
|
||||
let cmd = manager.create_bound_command(
|
||||
"192.168.1.1",
|
||||
Path::new("test.exe"),
|
||||
&["--arg1", "--arg2"],
|
||||
);
|
||||
|
||||
// Verify the command is constructed correctly
|
||||
let cmd_str = format!("{:?}", cmd);
|
||||
assert!(cmd_str.contains("192.168.1.1"));
|
||||
assert!(cmd_str.contains("test.exe"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,135 +0,0 @@
|
||||
# install_tap_adapters.ps1
|
||||
# Installs additional TAP-Windows adapters for parallel OpenVPN connections
|
||||
# MUST BE RUN AS ADMINISTRATOR
|
||||
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
Write-Host "========================================" -ForegroundColor Cyan
|
||||
Write-Host "TAP Adapter Installation Script" -ForegroundColor Cyan
|
||||
Write-Host "========================================" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
|
||||
# Check if running as Administrator
|
||||
$currentPrincipal = New-Object Security.Principal.WindowsPrincipal([Security.Principal.WindowsIdentity]::GetCurrent())
|
||||
$isAdmin = $currentPrincipal.IsInRole([Security.Principal.WindowsBuiltInRole]::Administrator)
|
||||
|
||||
if (-not $isAdmin) {
|
||||
Write-Host "ERROR: This script must be run as Administrator!" -ForegroundColor Red
|
||||
Write-Host ""
|
||||
Write-Host "To run as Administrator:" -ForegroundColor Yellow
|
||||
Write-Host " 1. Right-click PowerShell" -ForegroundColor Yellow
|
||||
Write-Host " 2. Select 'Run as Administrator'" -ForegroundColor Yellow
|
||||
Write-Host " 3. Run this script again" -ForegroundColor Yellow
|
||||
Write-Host ""
|
||||
Read-Host "Press Enter to exit"
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "✓ Running with Administrator privileges" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
|
||||
# Check for OpenVPN installation
|
||||
$tapctlPath = "C:\Program Files\OpenVPN\bin\tapctl.exe"
|
||||
|
||||
if (-not (Test-Path $tapctlPath)) {
|
||||
Write-Host "ERROR: OpenVPN not found!" -ForegroundColor Red
|
||||
Write-Host ""
|
||||
Write-Host "Expected location: $tapctlPath" -ForegroundColor Yellow
|
||||
Write-Host ""
|
||||
Write-Host "Please install OpenVPN from:" -ForegroundColor Yellow
|
||||
Write-Host "https://openvpn.net/community-downloads/" -ForegroundColor Cyan
|
||||
Write-Host ""
|
||||
Read-Host "Press Enter to exit"
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "✓ OpenVPN found at: $tapctlPath" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
|
||||
# Count existing TAP adapters
|
||||
Write-Host "Checking existing TAP adapters..." -ForegroundColor Cyan
|
||||
$existingAdapters = Get-NetAdapter | Where-Object { $_.InterfaceDescription -like "*TAP*" }
|
||||
$existingCount = $existingAdapters.Count
|
||||
|
||||
Write-Host " Found $existingCount existing TAP adapter(s)" -ForegroundColor Yellow
|
||||
|
||||
if ($existingCount -ge 10) {
|
||||
Write-Host ""
|
||||
Write-Host "✓ You already have $existingCount TAP adapters (sufficient)" -ForegroundColor Green
|
||||
Write-Host ""
|
||||
Read-Host "Press Enter to exit"
|
||||
exit 0
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Installing additional TAP adapters..." -ForegroundColor Cyan
|
||||
Write-Host " Target: 10 total adapters" -ForegroundColor Yellow
|
||||
Write-Host " To install: $(10 - $existingCount) adapters" -ForegroundColor Yellow
|
||||
Write-Host ""
|
||||
|
||||
$targetCount = 10
|
||||
$successCount = 0
|
||||
$failCount = 0
|
||||
|
||||
for ($i = ($existingCount + 1); $i -le $targetCount; $i++) {
|
||||
$adapterName = "OpenVPN-TAP-$i"
|
||||
Write-Host "[$i/$targetCount] Creating $adapterName..." -ForegroundColor Cyan
|
||||
|
||||
try {
|
||||
$output = & $tapctlPath create --name $adapterName 2>&1
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Host " ✓ Successfully created $adapterName" -ForegroundColor Green
|
||||
$successCount++
|
||||
} else {
|
||||
Write-Host " ⚠ Failed to create $adapterName (exit code: $LASTEXITCODE)" -ForegroundColor Red
|
||||
Write-Host " Output: $output" -ForegroundColor Gray
|
||||
$failCount++
|
||||
}
|
||||
} catch {
|
||||
Write-Host " ✗ Error creating $adapterName : $_" -ForegroundColor Red
|
||||
$failCount++
|
||||
}
|
||||
|
||||
# Small delay to prevent resource conflicts
|
||||
Start-Sleep -Milliseconds 500
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "========================================" -ForegroundColor Cyan
|
||||
Write-Host "Installation Summary" -ForegroundColor Cyan
|
||||
Write-Host "========================================" -ForegroundColor Cyan
|
||||
Write-Host " Successfully created: $successCount adapter(s)" -ForegroundColor Green
|
||||
Write-Host " Failed: $failCount adapter(s)" -ForegroundColor $(if ($failCount -gt 0) { "Red" } else { "Gray" })
|
||||
Write-Host ""
|
||||
|
||||
# Verify final count
|
||||
Write-Host "Verifying installation..." -ForegroundColor Cyan
|
||||
Start-Sleep -Seconds 2
|
||||
|
||||
$finalAdapters = Get-NetAdapter | Where-Object { $_.InterfaceDescription -like "*TAP*" }
|
||||
$finalCount = $finalAdapters.Count
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Total TAP adapters now: $finalCount" -ForegroundColor $(if ($finalCount -ge 10) { "Green" } else { "Yellow" })
|
||||
Write-Host ""
|
||||
|
||||
if ($finalCount -ge 10) {
|
||||
Write-Host "✓ Installation complete! You now have sufficient TAP adapters." -ForegroundColor Green
|
||||
Write-Host " You can now run up to $(($finalCount * 3/4)) VPN connections in parallel." -ForegroundColor Cyan
|
||||
} elseif ($finalCount -gt $existingCount) {
|
||||
Write-Host "⚠ Partial success. Added $(($finalCount - $existingCount)) adapter(s)." -ForegroundColor Yellow
|
||||
Write-Host " You can run up to $(($finalCount * 3/4)) VPN connections in parallel." -ForegroundColor Cyan
|
||||
Write-Host " Consider running this script again if you need more." -ForegroundColor Yellow
|
||||
} else {
|
||||
Write-Host "✗ No adapters were added. Check error messages above." -ForegroundColor Red
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "Adapter List:" -ForegroundColor Cyan
|
||||
$finalAdapters | ForEach-Object {
|
||||
Write-Host " • $($_.Name) ($($_.InterfaceDescription))" -ForegroundColor Gray
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Read-Host "Press Enter to exit"
|
||||
@@ -1,5 +1,2 @@
|
||||
pub mod webdriver;
|
||||
pub mod vpn_manager;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
pub mod forcebindip;
|
||||
pub mod docker_vpn_proxy;
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,397 +0,0 @@
|
||||
# VPN Rotation System - Setup Checklist
|
||||
|
||||
## 🚀 Quick Setup (5 Minutes)
|
||||
|
||||
Follow these steps to get your VPN rotation system up and running:
|
||||
|
||||
### ✅ Step 1: Install OpenVPN
|
||||
|
||||
**Windows:**
|
||||
```powershell
|
||||
# Download installer
|
||||
# https://openvpn.net/community-downloads/
|
||||
|
||||
# Install to default location
|
||||
# Add to PATH: C:\Program Files\OpenVPN\bin
|
||||
|
||||
# Verify installation
|
||||
openvpn --version
|
||||
```
|
||||
|
||||
**Linux (Ubuntu/Debian):**
|
||||
```bash
|
||||
sudo apt-get update
|
||||
sudo apt-get install openvpn
|
||||
openvpn --version
|
||||
```
|
||||
|
||||
**macOS:**
|
||||
```bash
|
||||
brew install openvpn
|
||||
openvpn --version
|
||||
```
|
||||
|
||||
### ✅ Step 2: Install ForceBindIP (Windows Only)
|
||||
|
||||
```powershell
|
||||
# Download from: http://r1ch.net/projects/forcebindip
|
||||
|
||||
# Extract ForceBindIP.exe and place in one of:
|
||||
# Option 1: Project root
|
||||
.\ForceBindIP.exe
|
||||
|
||||
# Option 2: Tools directory
|
||||
.\tools\ForceBindIP.exe
|
||||
|
||||
# Option 3: Add to PATH
|
||||
C:\Program Files\ForceBindIP\ForceBindIP.exe
|
||||
|
||||
# Verify installation
|
||||
ForceBindIP.exe
|
||||
```
|
||||
|
||||
**Linux/macOS Users:**
|
||||
- ForceBindIP is Windows-only
|
||||
- Use network namespaces (Linux) or alternative routing
|
||||
- See documentation for workarounds
|
||||
|
||||
### ✅ Step 3: Update Cargo.toml
|
||||
|
||||
Add these dependencies if not already present:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
tokio = { version = "1.0", features = ["full"] }
|
||||
fantoccini = "0.19"
|
||||
reqwest = { version = "0.11", features = ["blocking"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
chrono = "0.4"
|
||||
once_cell = "1.19"
|
||||
dotenvy = "0.15"
|
||||
url = "2.5"
|
||||
zip = "0.6"
|
||||
```
|
||||
|
||||
### ✅ Step 4: Configure Environment
|
||||
|
||||
Create or update `.env` file in project root:
|
||||
|
||||
```bash
|
||||
# Required: Date ranges
|
||||
ECONOMIC_START_DATE=2007-02-13
|
||||
CORPORATE_START_DATE=2010-01-01
|
||||
ECONOMIC_LOOKAHEAD_MONTHS=3
|
||||
|
||||
# Required: Parallelism
|
||||
MAX_PARALLEL_INSTANCES=5
|
||||
MAX_TASKS_PER_INSTANCE=0
|
||||
|
||||
# VPN Configuration
|
||||
ENABLE_VPN_ROTATION=true
|
||||
TASKS_PER_VPN_SESSION=50
|
||||
```
|
||||
|
||||
**Configuration Presets:**
|
||||
|
||||
**Conservative (Recommended for first run):**
|
||||
```bash
|
||||
MAX_PARALLEL_INSTANCES=3
|
||||
TASKS_PER_VPN_SESSION=100
|
||||
```
|
||||
|
||||
**Balanced:**
|
||||
```bash
|
||||
MAX_PARALLEL_INSTANCES=5
|
||||
TASKS_PER_VPN_SESSION=50
|
||||
```
|
||||
|
||||
**Aggressive (Use with caution):**
|
||||
```bash
|
||||
MAX_PARALLEL_INSTANCES=10
|
||||
TASKS_PER_VPN_SESSION=25
|
||||
```
|
||||
|
||||
### ✅ Step 5: Add VPN Module Files
|
||||
|
||||
Copy these files to your project:
|
||||
|
||||
```
|
||||
src/
|
||||
├── scraper/
|
||||
│ ├── mod.rs (update with: pub mod vpn_manager; pub mod forcebindip;)
|
||||
│ ├── vpn_manager.rs (new file - from artifact)
|
||||
│ ├── forcebindip.rs (new file - from artifact)
|
||||
│ └── webdriver.rs (replace with VPN-enabled version)
|
||||
├── util/
|
||||
│ ├── mod.rs (already includes opnv)
|
||||
│ ├── opnv.rs (already present)
|
||||
│ └── ...
|
||||
├── main.rs (replace with VPN-enabled version)
|
||||
└── lib.rs (update to expose VPN modules)
|
||||
```
|
||||
|
||||
### ✅ Step 6: Verify Directory Structure
|
||||
|
||||
Ensure these directories exist (will be auto-created):
|
||||
|
||||
```
|
||||
project/
|
||||
├── cache/
|
||||
│ ├── openvpn/ (VPN configs stored here)
|
||||
│ └── temp_vpn_zips/ (temporary, auto-cleaned)
|
||||
├── logs/ (application logs)
|
||||
├── data/
|
||||
│ ├── economic/
|
||||
│ └── corporate/
|
||||
└── chromedriver-win64/
|
||||
└── chromedriver.exe
|
||||
```
|
||||
|
||||
### ✅ Step 7: Test Installation
|
||||
|
||||
**Test 1: OpenVPN**
|
||||
```bash
|
||||
openvpn --version
|
||||
# Should output version info
|
||||
```
|
||||
|
||||
**Test 2: ForceBindIP (Windows)**
|
||||
```powershell
|
||||
ForceBindIP.exe 127.0.0.1 cmd.exe /c echo test
|
||||
# Should output: test
|
||||
```
|
||||
|
||||
**Test 3: Build Project**
|
||||
```bash
|
||||
cargo build --release
|
||||
# Should compile without errors
|
||||
```
|
||||
|
||||
**Test 4: Dry Run (No VPN)**
|
||||
```bash
|
||||
# Temporarily disable VPN
|
||||
# Set in .env: ENABLE_VPN_ROTATION=false
|
||||
|
||||
cargo run --release
|
||||
# Should initialize ChromeDriver pool and run
|
||||
```
|
||||
|
||||
### ✅ Step 8: First VPN-Enabled Run
|
||||
|
||||
```bash
|
||||
# Enable VPN in .env
|
||||
ENABLE_VPN_ROTATION=true
|
||||
TASKS_PER_VPN_SESSION=0 # Start with phase rotation only
|
||||
|
||||
# Run application
|
||||
cargo run --release
|
||||
|
||||
# Watch logs
|
||||
tail -f logs/backtest_*.log
|
||||
```
|
||||
|
||||
**Expected Output:**
|
||||
```
|
||||
[HH:MM:SS] [INFO] === Application started ===
|
||||
[HH:MM:SS] [INFO] === VPN Rotation Enabled ===
|
||||
[HH:MM:SS] [INFO] --- Fetching latest VPNBook OpenVPN configurations ---
|
||||
[HH:MM:SS] [INFO] ✓ Fetched VPN credentials - Username: vpnbook
|
||||
[HH:MM:SS] [INFO] ✓ Downloaded 6 .ovpn configuration files
|
||||
[HH:MM:SS] [INFO] --- Initializing VPN Pool ---
|
||||
[HH:MM:SS] [INFO] Found 6 OpenVPN configurations
|
||||
[HH:MM:SS] [INFO] --- Connecting to VPN servers ---
|
||||
[HH:MM:SS] [INFO] Starting VPN connection for ca149.vpnbook.com
|
||||
[HH:MM:SS] [INFO] ✓ VPN ca149.vpnbook.com connected with IP: 142.4.217.133
|
||||
...
|
||||
[HH:MM:SS] [INFO] ✓ ChromeDriver pool initialized successfully
|
||||
```
|
||||
|
||||
## 🎯 Common Issues and Solutions
|
||||
|
||||
### Issue: "openvpn: command not found"
|
||||
```bash
|
||||
# Windows: Add to PATH
|
||||
setx PATH "%PATH%;C:\Program Files\OpenVPN\bin"
|
||||
|
||||
# Linux: Install package
|
||||
sudo apt-get install openvpn
|
||||
|
||||
# Verify
|
||||
which openvpn
|
||||
```
|
||||
|
||||
### Issue: "ForceBindIP.exe not found"
|
||||
```powershell
|
||||
# Place in project root
|
||||
curl -o ForceBindIP.exe http://r1ch.net/projects/forcebindip/ForceBindIP.exe
|
||||
|
||||
# Or add to PATH
|
||||
setx PATH "%PATH%;C:\path\to\ForceBindIP"
|
||||
```
|
||||
|
||||
### Issue: VPN Connection Timeout
|
||||
```bash
|
||||
# Try different config file
|
||||
# VPNBook offers multiple servers/protocols
|
||||
# Look in cache/openvpn/ after first fetch
|
||||
|
||||
# Files named like:
|
||||
# - vpnbook-ca149-tcp80.ovpn (TCP port 80 - most compatible)
|
||||
# - vpnbook-ca149-tcp443.ovpn (TCP port 443 - works through most firewalls)
|
||||
# - vpnbook-ca149-udp53.ovpn (UDP port 53 - faster but may be blocked)
|
||||
|
||||
# Check firewall settings
|
||||
# - Allow OpenVPN.exe through Windows Firewall
|
||||
# - Allow outbound connections on ports 80, 443, 53, 1194
|
||||
```
|
||||
|
||||
### Issue: "Failed to spawn chromedriver"
|
||||
```bash
|
||||
# Verify chromedriver path
|
||||
ls chromedriver-win64/chromedriver.exe
|
||||
|
||||
# Check Chrome/ChromeDriver version match
|
||||
chromedriver.exe --version
|
||||
# Chrome version should be compatible
|
||||
|
||||
# Update ChromeDriver if needed
|
||||
# Download from: https://chromedriver.chromium.org/
|
||||
```
|
||||
|
||||
### Issue: "Semaphore closed"
|
||||
```bash
|
||||
# Reduce parallelism in .env
|
||||
MAX_PARALLEL_INSTANCES=3
|
||||
|
||||
# Or increase system resources
|
||||
# Check Task Manager / Activity Monitor
|
||||
```
|
||||
|
||||
## 📊 Performance Tuning
|
||||
|
||||
### Optimize for Speed
|
||||
```bash
|
||||
MAX_PARALLEL_INSTANCES=10
|
||||
TASKS_PER_VPN_SESSION=100
|
||||
# More instances, less frequent rotation
|
||||
# Risk: More aggressive, may hit rate limits
|
||||
```
|
||||
|
||||
### Optimize for Stealth
|
||||
```bash
|
||||
MAX_PARALLEL_INSTANCES=2
|
||||
TASKS_PER_VPN_SESSION=10
|
||||
# Fewer instances, frequent rotation
|
||||
# Risk: Slower, but more IP diversity
|
||||
```
|
||||
|
||||
### Optimize for Stability
|
||||
```bash
|
||||
MAX_PARALLEL_INSTANCES=5
|
||||
TASKS_PER_VPN_SESSION=50
|
||||
# Balanced approach (recommended)
|
||||
```
|
||||
|
||||
## 🔍 Monitoring and Logs
|
||||
|
||||
### Key Log Files
|
||||
```
|
||||
logs/
|
||||
└── backtest_YYYYMMDD_HHMMSS.log
|
||||
```
|
||||
|
||||
### Important Log Patterns
|
||||
|
||||
**Successful VPN Connection:**
|
||||
```
|
||||
[INFO] ✓ VPN ca149.vpnbook.com connected with IP: 142.4.217.133
|
||||
```
|
||||
|
||||
**VPN Rotation:**
|
||||
```
|
||||
[INFO] ✓ VPN ca149.vpnbook.com rotated: 142.4.217.133 -> 142.4.217.201
|
||||
```
|
||||
|
||||
**Health Issues:**
|
||||
```
|
||||
[WARN] ⚠ Health check failed for VPN us1.vpnbook.com
|
||||
[INFO] Attempting to reconnect unhealthy VPN: us1.vpnbook.com
|
||||
```
|
||||
|
||||
**Binding ChromeDriver:**
|
||||
```
|
||||
[INFO] Binding ChromeDriver to VPN IP: 142.4.217.133
|
||||
```
|
||||
|
||||
### Monitor Real-Time
|
||||
```bash
|
||||
# Linux/macOS
|
||||
tail -f logs/backtest_*.log
|
||||
|
||||
# Windows PowerShell
|
||||
Get-Content logs\backtest_*.log -Wait -Tail 50
|
||||
```
|
||||
|
||||
### Search Logs
|
||||
```bash
|
||||
# Count successful connections
|
||||
grep "connected with IP" logs/*.log | wc -l
|
||||
|
||||
# Find errors
|
||||
grep ERROR logs/*.log
|
||||
|
||||
# Track rotations
|
||||
grep "rotated:" logs/*.log
|
||||
|
||||
# Find failed tasks
|
||||
grep "failed" logs/*.log
|
||||
```
|
||||
|
||||
## 🚦 Next Steps
|
||||
|
||||
1. **✅ Complete Setup**: Verify all checkboxes above
|
||||
2. **🧪 Test Run**: Run with `TASKS_PER_VPN_SESSION=0` first
|
||||
3. **📊 Monitor**: Watch logs during first run
|
||||
4. **⚙️ Tune**: Adjust configuration based on results
|
||||
5. **🔄 Iterate**: Increase parallelism gradually
|
||||
6. **📈 Scale**: Once stable, increase to production levels
|
||||
|
||||
## 📚 Additional Resources
|
||||
|
||||
- **VPNBook Website**: https://www.vpnbook.com/freevpn
|
||||
- **OpenVPN Docs**: https://openvpn.net/community-resources/
|
||||
- **ForceBindIP**: http://r1ch.net/projects/forcebindip
|
||||
- **ChromeDriver**: https://chromedriver.chromium.org/
|
||||
|
||||
## 🆘 Getting Help
|
||||
|
||||
If you encounter issues:
|
||||
|
||||
1. **Check Prerequisites**: Verify all software is installed
|
||||
2. **Review Logs**: Look in `logs/` directory
|
||||
3. **Test Components**: Test OpenVPN and ForceBindIP independently
|
||||
4. **Simplify**: Start with `ENABLE_VPN_ROTATION=false`
|
||||
5. **Document Error**: Note exact error message and context
|
||||
|
||||
## 🎉 Success Criteria
|
||||
|
||||
You're ready to proceed when you see:
|
||||
|
||||
```
|
||||
✓ OpenVPN installed and in PATH
|
||||
✓ ForceBindIP.exe accessible (Windows)
|
||||
✓ Project compiles successfully
|
||||
✓ VPN configurations fetched
|
||||
✓ All VPNs connected
|
||||
✓ ChromeDriver pool initialized
|
||||
✓ First scraping task completed
|
||||
```
|
||||
|
||||
**Congratulations! Your VPN rotation system is now operational.**
|
||||
|
||||
---
|
||||
|
||||
*Last Updated: December 2024*
|
||||
*Version: 1.0*
|
||||
@@ -11,426 +11,258 @@ use tokio::process::{Child, Command};
|
||||
use tokio::task::JoinHandle;
|
||||
use tokio::sync::{Mutex, Semaphore};
|
||||
use tokio::time::{sleep, timeout, Duration};
|
||||
|
||||
use super::vpn_manager::{VpnInstance, VpnPool};
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
use super::forcebindip::ForceBindIpManager;
|
||||
use crate::scraper::docker_vpn_proxy::{DockerVpnProxyPool};
|
||||
|
||||
/// Manages a pool of ChromeDriver instances for parallel scraping with optional VPN binding.
|
||||
pub struct ChromeDriverPool {
|
||||
instances: Vec<Arc<Mutex<ChromeInstance>>>,
|
||||
semaphore: Arc<Semaphore>,
|
||||
vpn_pool: Option<Arc<VpnPool>>,
|
||||
#[cfg(target_os = "windows")]
|
||||
forcebindip: Option<Arc<ForceBindIpManager>>,
|
||||
/// Optional Docker-based proxy pool (one proxy per Chrome instance)
|
||||
proxy_pool: Option<Arc<DockerVpnProxyPool>>,
|
||||
}
|
||||
|
||||
impl ChromeDriverPool {
|
||||
/// Creates a new pool with the specified number of ChromeDriver instances (no VPN).
|
||||
/// Creates a new pool without any proxy (direct connection).
|
||||
pub async fn new(pool_size: usize) -> Result<Self> {
|
||||
Self::new_with_vpn_and_task_limit(pool_size, None, 0).await
|
||||
Self::new_with_proxy_and_task_limit(pool_size, None, 0).await
|
||||
}
|
||||
|
||||
/// Creates a new ChromeDriver pool with task-per-instance tracking.
|
||||
pub async fn new_with_task_limit(
|
||||
/// Creates a new pool with task-per-instance limit but no proxy.
|
||||
pub async fn new_with_task_limit(pool_size: usize, max_tasks_per_instance: usize) -> Result<Self> {
|
||||
Self::new_with_proxy_and_task_limit(pool_size, None, max_tasks_per_instance).await
|
||||
}
|
||||
|
||||
/// Creates a new pool where each Chrome instance uses a different SOCKS5 proxy from the Docker pool.
|
||||
pub async fn new_with_proxy(
|
||||
pool_size: usize,
|
||||
max_tasks_per_instance: usize,
|
||||
proxy_pool: Option<Arc<DockerVpnProxyPool>>,
|
||||
) -> Result<Self> {
|
||||
Self::new_with_vpn_and_task_limit(pool_size, None, max_tasks_per_instance).await
|
||||
Self::new_with_proxy_and_task_limit(pool_size, proxy_pool, 0).await
|
||||
}
|
||||
|
||||
/// Creates a new pool with VPN support.
|
||||
pub async fn new_with_vpn(
|
||||
/// Full constructor: supports proxy + task limiting.
|
||||
pub async fn new_with_proxy_and_task_limit(
|
||||
pool_size: usize,
|
||||
vpn_pool: Option<Arc<VpnPool>>,
|
||||
) -> Result<Self> {
|
||||
Self::new_with_vpn_and_task_limit(pool_size, vpn_pool, 0).await
|
||||
}
|
||||
|
||||
/// Creates a new pool with VPN support and task-per-instance limits.
|
||||
pub async fn new_with_vpn_and_task_limit(
|
||||
pool_size: usize,
|
||||
vpn_pool: Option<Arc<VpnPool>>,
|
||||
proxy_pool: Option<Arc<DockerVpnProxyPool>>,
|
||||
max_tasks_per_instance: usize,
|
||||
) -> Result<Self> {
|
||||
let mut instances = Vec::with_capacity(pool_size);
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
let forcebindip = if vpn_pool.is_some() {
|
||||
match ForceBindIpManager::new() {
|
||||
Ok(manager) => {
|
||||
crate::util::logger::log_info("✓ ForceBindIP manager initialized").await;
|
||||
Some(Arc::new(manager))
|
||||
}
|
||||
Err(e) => {
|
||||
crate::util::logger::log_warn(&format!(
|
||||
"⚠ ForceBindIP not available: {}. Proceeding without IP binding.",
|
||||
e
|
||||
)).await;
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Initializing ChromeDriver pool with {} instances{}{}...",
|
||||
"Initializing ChromeDriver pool with {} instances{}...",
|
||||
pool_size,
|
||||
if vpn_pool.is_some() { " (VPN-enabled)" } else { "" },
|
||||
if max_tasks_per_instance > 0 { &format!(" (max {} tasks/instance)", max_tasks_per_instance) } else { "" }
|
||||
)).await;
|
||||
if proxy_pool.is_some() { " (each using a unique Docker SOCKS5 proxy)" } else { "" }
|
||||
))
|
||||
.await;
|
||||
|
||||
for i in 0..pool_size {
|
||||
// If VPN pool exists, acquire a VPN instance for this ChromeDriver
|
||||
let vpn_instance = if let Some(ref vp) = vpn_pool {
|
||||
Some(vp.acquire().await?)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let proxy_url = proxy_pool
|
||||
.as_ref()
|
||||
.map(|pp| pp.get_proxy_url(i));
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
let instance = ChromeInstance::new_with_task_limit(vpn_instance, forcebindip.clone(), max_tasks_per_instance).await?;
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
let instance = ChromeInstance::new_with_task_limit(vpn_instance, max_tasks_per_instance).await?;
|
||||
let instance = ChromeInstance::new(proxy_url, max_tasks_per_instance).await?;
|
||||
|
||||
crate::util::logger::log_info(&format!(" ✓ Instance {} ready", i + 1)).await;
|
||||
crate::util::logger::log_info(&format!(" Instance {} ready", i + 1)).await;
|
||||
instances.push(Arc::new(Mutex::new(instance)));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
instances,
|
||||
semaphore: Arc::new(Semaphore::new(pool_size)),
|
||||
vpn_pool,
|
||||
#[cfg(target_os = "windows")]
|
||||
forcebindip,
|
||||
proxy_pool,
|
||||
})
|
||||
}
|
||||
|
||||
/// Executes a scrape task using an available instance from the pool.
|
||||
/// Execute a scraping task using an available instance from the pool.
|
||||
pub async fn execute<T, F, Fut>(&self, url: String, parse: F) -> Result<T>
|
||||
where
|
||||
T: Send + 'static,
|
||||
F: FnOnce(Client) -> Fut + Send + 'static,
|
||||
Fut: std::future::Future<Output = Result<T>> + Send + 'static,
|
||||
Fut: std::future::Future<Output = Result<T>> + Send,
|
||||
{
|
||||
// Acquire semaphore permit
|
||||
let _permit = self
|
||||
.semaphore
|
||||
.acquire()
|
||||
.await
|
||||
.map_err(|_| anyhow!("Semaphore closed"))?;
|
||||
let _permit = self.semaphore.acquire().await.map_err(|_| anyhow!("Pool closed"))?;
|
||||
|
||||
// Find an available instance (round-robin or first available)
|
||||
let instance = self.instances[0].clone();
|
||||
// Round-robin selection
|
||||
let index = rand::random_range(..self.instances.len());
|
||||
let instance = self.instances[index].clone();
|
||||
let mut guard = instance.lock().await;
|
||||
|
||||
// Track task count
|
||||
guard.increment_task_count();
|
||||
|
||||
// Get VPN info before creating session
|
||||
let vpn_info = if let Some(ref vpn) = guard.vpn_instance {
|
||||
let vpn_guard = vpn.lock().await;
|
||||
Some(format!("{} ({})",
|
||||
vpn_guard.hostname(),
|
||||
vpn_guard.external_ip().unwrap_or("unknown")))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Log task count if limit is set
|
||||
if guard.max_tasks_per_instance > 0 {
|
||||
crate::util::logger::log_info(&format!(
|
||||
"Instance task count: {}/{}",
|
||||
guard.get_task_count(),
|
||||
guard.max_tasks_per_instance
|
||||
)).await;
|
||||
))
|
||||
.await;
|
||||
}
|
||||
|
||||
// Create a new session for this task
|
||||
let client = guard.new_session().await?;
|
||||
|
||||
// Release lock while we do the actual scraping
|
||||
drop(guard);
|
||||
drop(guard); // release lock early
|
||||
|
||||
// Navigate and parse
|
||||
if let Some(ref info) = vpn_info {
|
||||
crate::util::logger::log_info(&format!("Scraping {} via VPN: {}", url, info)).await;
|
||||
}
|
||||
crate::util::logger::log_info(&format!("Scraping {} ...", url)).await;
|
||||
client.goto(&url).await.context("Navigation failed")?;
|
||||
|
||||
client.goto(&url).await.context("Failed to navigate")?;
|
||||
let result = timeout(Duration::from_secs(60), parse(client))
|
||||
let result = timeout(Duration::from_secs(90), parse(client))
|
||||
.await
|
||||
.context("Parse function timed out after 60s")??;
|
||||
|
||||
// Handle VPN rotation if needed
|
||||
if let Some(ref vpn_pool) = self.vpn_pool {
|
||||
let mut guard = instance.lock().await;
|
||||
if let Some(ref vpn) = guard.vpn_instance {
|
||||
vpn_pool.rotate_if_needed(vpn.clone()).await?;
|
||||
guard.reset_task_count(); // Reset task count on VPN rotation
|
||||
}
|
||||
}
|
||||
.context("Parse timeout")??;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Gracefully shut down all ChromeDriver processes and Docker proxy containers.
|
||||
pub async fn shutdown(&self) -> Result<()> {
|
||||
for inst in &self.instances {
|
||||
let mut guard = inst.lock().await;
|
||||
guard.shutdown().await?;
|
||||
}
|
||||
|
||||
if let Some(pp) = &self.proxy_pool {
|
||||
pp.shutdown().await?;
|
||||
crate::util::logger::log_info("All Docker VPN proxy containers stopped").await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_number_of_instances(&self) -> usize {
|
||||
self.instances.len()
|
||||
}
|
||||
|
||||
/// Returns whether VPN is enabled for this pool
|
||||
pub fn is_vpn_enabled(&self) -> bool {
|
||||
self.vpn_pool.is_some()
|
||||
}
|
||||
|
||||
/// Gracefully shutdown all ChromeDriver instances in the pool.
|
||||
pub async fn shutdown(&self) -> Result<()> {
|
||||
crate::util::logger::log_info("Shutting down ChromeDriverPool instances...").await;
|
||||
for inst in &self.instances {
|
||||
crate::util::logger::log_info("Shutting down a ChromeDriver instance...").await;
|
||||
let mut guard = inst.lock().await;
|
||||
if let Err(e) = guard.shutdown().await {
|
||||
crate::util::logger::log_warn(&format!("Error shutting down instance: {}", e)).await;
|
||||
}
|
||||
}
|
||||
crate::util::logger::log_info("All ChromeDriver instances shut down").await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a single instance of chromedriver process, optionally bound to a VPN.
|
||||
pub struct ChromeInstance {
|
||||
process: Child,
|
||||
base_url: String,
|
||||
vpn_instance: Option<Arc<Mutex<VpnInstance>>>,
|
||||
process: Child,
|
||||
stderr_log: Option<JoinHandle<()>>,
|
||||
task_count: usize,
|
||||
max_tasks_per_instance: usize,
|
||||
// Optional join handle for background stderr logging task
|
||||
stderr_log: Option<JoinHandle<()>>,
|
||||
proxy_url: Option<String>,
|
||||
}
|
||||
|
||||
impl ChromeInstance {
|
||||
/// Creates a new ChromeInstance, optionally bound to a VPN IP.
|
||||
#[cfg(target_os = "windows")]
|
||||
pub async fn new(
|
||||
vpn_instance: Option<Arc<Mutex<VpnInstance>>>,
|
||||
forcebindip: Option<Arc<ForceBindIpManager>>,
|
||||
) -> Result<Self> {
|
||||
Self::new_with_task_limit(vpn_instance, forcebindip, 0).await
|
||||
}
|
||||
|
||||
/// Creates a new ChromeInstance with task-per-instance limit, bound to a VPN IP if provided.
|
||||
#[cfg(target_os = "windows")]
|
||||
pub async fn new_with_task_limit(
|
||||
vpn_instance: Option<Arc<Mutex<VpnInstance>>>,
|
||||
forcebindip: Option<Arc<ForceBindIpManager>>,
|
||||
max_tasks_per_instance: usize,
|
||||
) -> Result<Self> {
|
||||
let bind_ip = if let Some(ref vpn) = vpn_instance {
|
||||
let vpn_guard = vpn.lock().await;
|
||||
vpn_guard.external_ip().map(|s| s.to_string())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut command = if let (Some(ip), Some(fb)) = (&bind_ip, &forcebindip) {
|
||||
// Use ForceBindIP to bind ChromeDriver to specific VPN IP
|
||||
crate::util::logger::log_info(&format!("Binding ChromeDriver to VPN IP: {}", ip)).await;
|
||||
let mut std_cmd = fb.create_bound_command(
|
||||
ip,
|
||||
std::path::Path::new("chromedriver-win64/chromedriver.exe"),
|
||||
&["--port=0"],
|
||||
);
|
||||
Command::from(std_cmd)
|
||||
} else {
|
||||
let mut cmd = Command::new("chromedriver-win64/chromedriver.exe");
|
||||
cmd.arg("--port=0");
|
||||
cmd
|
||||
};
|
||||
|
||||
command.stdout(Stdio::piped()).stderr(Stdio::piped());
|
||||
|
||||
let mut process = command
|
||||
.spawn()
|
||||
.context("Failed to spawn chromedriver. Ensure it's installed and in PATH.")?;
|
||||
|
||||
let (base_url, stderr_handle) = Self::wait_for_chromedriver_start(&mut process).await?;
|
||||
pub async fn new(proxy_url: Option<String>, max_tasks_per_instance: usize) -> Result<Self> {
|
||||
let (base_url, process, stderr_handle) = Self::spawn_chromedriver().await?;
|
||||
|
||||
Ok(Self {
|
||||
process,
|
||||
base_url,
|
||||
vpn_instance,
|
||||
process,
|
||||
stderr_log: Some(stderr_handle),
|
||||
task_count: 0,
|
||||
max_tasks_per_instance,
|
||||
stderr_log: stderr_handle,
|
||||
proxy_url,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new ChromeInstance on non-Windows platforms (no ForceBindIP support).
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub async fn new(vpn_instance: Option<Arc<Mutex<VpnInstance>>>) -> Result<Self> {
|
||||
Self::new_with_task_limit(vpn_instance, 0).await
|
||||
}
|
||||
|
||||
/// Creates a new ChromeInstance on non-Windows platforms with task-per-instance limit.
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
pub async fn new_with_task_limit(vpn_instance: Option<Arc<Mutex<VpnInstance>>>, max_tasks_per_instance: usize) -> Result<Self> {
|
||||
if vpn_instance.is_some() {
|
||||
crate::util::logger::log_warn(
|
||||
"⚠ VPN binding requested but ForceBindIP is not available on this platform"
|
||||
).await;
|
||||
}
|
||||
|
||||
let mut command = Command::new("chromedriver");
|
||||
command
|
||||
.arg("--port=0")
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
||||
let mut process = command
|
||||
.spawn()
|
||||
.context("Failed to spawn chromedriver. Ensure it's installed and in PATH.")?;
|
||||
|
||||
let (base_url, stderr_handle) = Self::wait_for_chromedriver_start(&mut process).await?;
|
||||
|
||||
Ok(Self {
|
||||
process,
|
||||
base_url,
|
||||
vpn_instance,
|
||||
task_count: 0,
|
||||
max_tasks_per_instance,
|
||||
stderr_log: stderr_handle,
|
||||
})
|
||||
}
|
||||
|
||||
/// Waits for ChromeDriver to start and extracts the listening address.
|
||||
async fn wait_for_chromedriver_start(process: &mut Child) -> Result<(String, Option<JoinHandle<()>>)> {
|
||||
let mut stdout =
|
||||
BufReader::new(process.stdout.take().context("Failed to capture stdout")?).lines();
|
||||
|
||||
let stderr_reader = process.stderr.take().context("Failed to capture stderr")?;
|
||||
|
||||
let start_time = std::time::Instant::now();
|
||||
let mut address: Option<String> = None;
|
||||
let mut success = false;
|
||||
|
||||
// Log stderr in background for debugging and return the JoinHandle so we can
|
||||
// abort/await it during shutdown.
|
||||
let stderr_handle: JoinHandle<()> = tokio::spawn(async move {
|
||||
let mut stderr_lines = BufReader::new(stderr_reader).lines();
|
||||
while let Ok(Some(line)) = stderr_lines.next_line().await {
|
||||
let trimmed = line.trim();
|
||||
if !trimmed.is_empty() {
|
||||
crate::util::logger::log_info(&format!("ChromeDriver stderr: {}", trimmed)).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Wait for address and success (up to 30s)
|
||||
while start_time.elapsed() < Duration::from_secs(30) {
|
||||
if let Ok(Ok(Some(line))) = timeout(Duration::from_secs(1), stdout.next_line()).await {
|
||||
if let Some(addr) = parse_chromedriver_address(&line) {
|
||||
address = Some(addr.to_string());
|
||||
}
|
||||
|
||||
if line.contains("ChromeDriver was started successfully") {
|
||||
success = true;
|
||||
}
|
||||
|
||||
if let (Some(addr), true) = (&address, success) {
|
||||
return Ok((addr.clone(), Some(stderr_handle)));
|
||||
}
|
||||
}
|
||||
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
// Cleanup on failure
|
||||
let _ = process.kill().await;
|
||||
// If we timed out, abort stderr logging task
|
||||
stderr_handle.abort();
|
||||
let _ = stderr_handle.await;
|
||||
Err(anyhow!("Timeout: ChromeDriver did not start within 30 seconds"))
|
||||
}
|
||||
|
||||
/// Creates a new browser session (client) from this ChromeDriver instance.
|
||||
pub async fn new_session(&self) -> Result<Client> {
|
||||
ClientBuilder::native()
|
||||
.capabilities(Self::chrome_args())
|
||||
.capabilities(self.chrome_args())
|
||||
.connect(&self.base_url)
|
||||
.await
|
||||
.context("Failed to create new session")
|
||||
.context("Failed to connect to ChromeDriver")
|
||||
}
|
||||
|
||||
/// Increments task counter and returns whether limit has been reached
|
||||
pub fn increment_task_count(&mut self) -> bool {
|
||||
if self.max_tasks_per_instance > 0 {
|
||||
self.task_count += 1;
|
||||
self.task_count >= self.max_tasks_per_instance
|
||||
} else {
|
||||
false
|
||||
}
|
||||
pub fn increment_task_count(&mut self) {
|
||||
self.task_count += 1;
|
||||
}
|
||||
|
||||
/// Resets task counter (called when VPN is rotated)
|
||||
pub fn reset_task_count(&mut self) {
|
||||
self.task_count = 0;
|
||||
}
|
||||
|
||||
/// Returns current task count for this instance
|
||||
pub fn get_task_count(&self) -> usize {
|
||||
self.task_count
|
||||
}
|
||||
|
||||
/// Gracefully shutdown the chromedriver process and background log tasks.
|
||||
pub async fn shutdown(&mut self) -> Result<()> {
|
||||
// Abort and await stderr logging task if present
|
||||
if let Some(handle) = self.stderr_log.take() {
|
||||
handle.abort();
|
||||
let _ = handle.await;
|
||||
}
|
||||
|
||||
// Try to terminate the child process
|
||||
let _ = self.process.start_kill();
|
||||
// Await the process to ensure resources are released
|
||||
let _ = self.process.wait().await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn chrome_args() -> Map<String, Value> {
|
||||
let args = serde_json::json!({
|
||||
/// Spawns the actual `chromedriver` binary and waits for it to become ready.
|
||||
async fn spawn_chromedriver() -> Result<(String, Child, JoinHandle<()>)> {
|
||||
let mut process = Command::new("chromedriver-win64/chromedriver.exe")
|
||||
.arg("--port=0") // let OS choose free port
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn()
|
||||
.context("Failed to start chromedriver. Is it in PATH?")?;
|
||||
|
||||
let stdout = process.stdout.take().unwrap();
|
||||
let stderr = process.stderr.take().unwrap();
|
||||
|
||||
let stdout_reader = BufReader::new(stdout);
|
||||
let mut stdout_lines = stdout_reader.lines();
|
||||
|
||||
let stderr_reader = BufReader::new(stderr);
|
||||
let stderr_handle = tokio::spawn(async move {
|
||||
let mut lines = stderr_reader.lines();
|
||||
while let Ok(Some(line)) = lines.next_line().await {
|
||||
let t = line.trim();
|
||||
if !t.is_empty() {
|
||||
let _ = crate::util::logger::log_info(&format!("ChromeDriver: {}", t)).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let start = tokio::time::Instant::now();
|
||||
let mut address: Option<String> = None;
|
||||
|
||||
while start.elapsed() < Duration::from_secs(30) {
|
||||
if let Ok(Ok(Some(line))) = timeout(Duration::from_secs(1), stdout_lines.next_line()).await {
|
||||
if let Some(addr) = parse_chromedriver_address(&line) {
|
||||
address = Some(addr);
|
||||
}
|
||||
if line.contains("ChromeDriver was started successfully") && address.is_some() {
|
||||
return Ok((address.unwrap(), process, stderr_handle));
|
||||
}
|
||||
}
|
||||
sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
let _ = process.kill().await;
|
||||
stderr_handle.abort();
|
||||
Err(anyhow!("ChromeDriver failed to start within 30s"))
|
||||
}
|
||||
|
||||
fn chrome_args(&self) -> Map<String, Value> {
|
||||
let mut args = vec![
|
||||
"--headless=new".to_string(),
|
||||
"--disable-gpu".to_string(),
|
||||
"--no-sandbox".to_string(),
|
||||
"--disable-dev-shm-usage".to_string(),
|
||||
"--disable-infobars".to_string(),
|
||||
"--disable-extensions".to_string(),
|
||||
"--disable-popup-blocking".to_string(),
|
||||
"--disable-notifications".to_string(),
|
||||
"--disable-logging".to_string(),
|
||||
"--disable-autofill".to_string(),
|
||||
"--disable-sync".to_string(),
|
||||
"--disable-default-apps".to_string(),
|
||||
"--disable-translate".to_string(),
|
||||
"--window-size=1920,1080".to_string(),
|
||||
"--disable-blink-features=AutomationControlled".to_string(),
|
||||
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36".to_string()
|
||||
];
|
||||
if let Some(ref proxy) = self.proxy_url {
|
||||
let proxy = proxy.clone();
|
||||
let proxy_formatted = format!("--proxy-server={}", proxy);
|
||||
args.push(proxy_formatted);
|
||||
}
|
||||
let caps = serde_json::json!({
|
||||
"goog:chromeOptions": {
|
||||
"args": [
|
||||
"--headless",
|
||||
"--disable-gpu",
|
||||
"--no-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-infobars",
|
||||
"--disable-extensions",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-notifications",
|
||||
"--disable-logging",
|
||||
"--disable-autofill",
|
||||
"--disable-sync",
|
||||
"--disable-default-apps",
|
||||
"--disable-translate",
|
||||
"--window-size=1920,1080",
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
],
|
||||
"args": args,
|
||||
"excludeSwitches": ["enable-logging", "enable-automation"],
|
||||
"prefs": {
|
||||
"profile.default_content_setting_values.notifications": 2
|
||||
}
|
||||
}
|
||||
});
|
||||
args.as_object()
|
||||
.expect("Capabilities should be a JSON object")
|
||||
.clone()
|
||||
caps.as_object().cloned().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user