// src/scraper/opnv.rs //! Module for fetching, downloading, and extracting OpenVPN configurations from VPNBook. //! //! This module provides functionality to scrape the VPNBook free VPN page using //! a headless browser, handle potential consent popups, extract current credentials, //! collect download URLs for OpenVPN ZIP files, download them, and then extract //! the .ovpn files into a structured directory: cache/openvpn//. //! It is designed to fetch the most recent data on every run, as credentials and //! server configurations change periodically. use anyhow::{anyhow, Context, Result}; use fantoccini::{Client, Locator}; use regex::Regex; use reqwest; use std::io::{Read}; use std::path::{Path, PathBuf}; use tokio::fs::File; use tokio::io::AsyncWriteExt; use url::Url; use zip::ZipArchive; use crate::scraper::webdriver::{ChromeDriverPool, ScrapeTask}; /// Fetches, downloads, and extracts the latest OpenVPN configurations from VPNBook. /// /// This asynchronous function uses the provided `ChromeDriverPool` to scrape the /// VPNBook free VPN page. It dismisses any consent popup if present, extracts the /// current username and password, collects all OpenVPN ZIP download URLs, downloads /// the ZIP files temporarily, extracts the .ovpn files into the specified directory /// structure under `cache_dir`/openvpn//, and cleans up the ZIP files. /// /// The directory structure is: cache/openvpn//, where /// is derived from the ZIP filename (e.g., "ca149.vpnbook.com"). /// /// The function ensures fresh data is fetched each time it runs, making it suitable /// for periodic updates where credentials may change. /// /// # Arguments /// /// * `pool` - A reference to the `ChromeDriverPool` for managing browser instances. /// * `cache_dir` - The path to the base cache directory. The OpenVPN files will be saved /// under `cache_dir`/openvpn//. /// /// # Returns /// /// A `Result` containing a tuple with: /// - `String`: The scraped username. /// - `String`: The scraped password. /// - `Vec`: Paths to the extracted .ovpn files. /// /// # Errors /// /// Returns an `anyhow::Error` if: /// - Navigation to the page fails. /// - The consent popup cannot be dismissed (if present). /// - Credentials cannot be parsed from the page. /// - Download URLs cannot be found or are invalid. /// - HTTP downloads fail or file writing errors occur. /// - ZIP extraction fails (e.g., invalid ZIP or I/O errors). /// /// # Dependencies /// /// This function requires the following crates (add to Cargo.toml if not present): /// - `anyhow` for error handling. /// - `fantoccini` for browser automation. /// - `regex` for parsing credentials from HTML. /// - `reqwest` (with `tokio` features) for HTTP downloads. /// - `tokio` for asynchronous file operations. /// - `url` for URL manipulation. /// - `zip` for ZIP extraction. /// /// # Examples /// /// ```no_run /// use anyhow::Result; /// use event_backtest_engine::scraper::opnv::fetch_vpnbook_configs; /// use event_backtest_engine::scraper::webdriver::ChromeDriverPool; /// use std::path::Path; /// /// #[tokio::main] /// async fn main() -> Result<()> { /// let pool = ChromeDriverPool::new(1).await?; /// let (username, password, files) = /// fetch_vpnbook_configs(&pool, Path::new("./cache")).await?; /// println!("Username: {}, Password: {}", username, password); /// for file in files { /// println!("Extracted: {:?}", file); /// } /// Ok(()) /// } /// ``` pub async fn fetch_vpnbook_configs( pool: &ChromeDriverPool, cache_dir: &Path, ) -> Result<(String, String, Vec)> { // Prepare the openvpn directory let vpn_dir = cache_dir.join("openvpn"); tokio::fs::create_dir_all(&vpn_dir) .await .context("Failed to create openvpn directory")?; // Temporary directory for ZIP downloads (under cache for consistency) let temp_dir = cache_dir.join("temp_vpn_zips"); tokio::fs::create_dir_all(&temp_dir) .await .context("Failed to create temp directory")?; let url = "https://www.vpnbook.com/freevpn".to_string(); // Define the scraping task let task = ScrapeTask::new(url, |client: Client| async move { // Attempt to dismiss consent popup if present let consent_selector = r#"body > div.fc-consent-root > div.fc-dialog-container > div.fc-dialog.fc-choice-dialog > div.fc-footer-buttons-container > div.fc-footer-buttons > button.fc-button.fc-cta-do-not-consent.fc-secondary-button > p"#; if let Ok(consent_elem) = client.find(Locator::Css(consent_selector)).await { consent_elem .click() .await .context("Failed to click consent dismissal button")?; // Brief delay to allow popup to close tokio::time::sleep(std::time::Duration::from_secs(1)).await; } // Get the full page source for parsing let page_source = client .source() .await .context("Failed to retrieve page source")?; // Parse username and password using regex (assuming HTML structure like Username: value) let user_re = Regex::new(r"Username:\s*\s*(\w+)").context("Invalid regex for username")?; let pass_re = Regex::new(r"Password:\s*\s*(\w+)").context("Invalid regex for password")?; let username = user_re .captures(&page_source) .and_then(|c| c.get(1)) .map(|m| m.as_str().to_string()) .ok_or_else(|| anyhow!("Username not found in page source"))?; let password = pass_re .captures(&page_source) .and_then(|c| c.get(1)) .map(|m| m.as_str().to_string()) .ok_or_else(|| anyhow!("Password not found in page source"))?; // Locate all download links for OpenVPN ZIP files let links = client .find_all(Locator::Css(r#"a[href^="/free-openvpn-account/"][download=""]"#)) .await .context("Failed to find download links")?; // Collect relative hrefs let mut rel_urls = Vec::new(); for link in links { if let Some(href) = link.attr("href").await.context("Failed to get href attribute")? { rel_urls.push(href); } } Ok::<(String, String, Vec), anyhow::Error>((username, password, rel_urls)) }); // Execute the scraping task using the pool let (username, password, rel_urls) = task.execute_with_pool(pool).await?; // Base URL for resolving relative paths let base_url = Url::parse("https://www.vpnbook.com/")?; // Download each ZIP file to temp_dir let mut zip_paths = Vec::new(); for rel in &rel_urls { let full_url = base_url.join(rel).context("Failed to join URL")?; let filename = rel .split('/') .last() .ok_or_else(|| anyhow!("Invalid filename in URL"))? .to_string(); let out_path = temp_dir.join(&filename); // Perform HTTP GET request let resp = reqwest::get(full_url.clone()) .await .with_context(|| format!("Failed to send download request for {}", full_url))?; if resp.status().is_success() { let bytes = resp .bytes() .await .context("Failed to read response bytes")?; // Write to file asynchronously let mut file = File::create(&out_path) .await .context("Failed to create output file")?; file.write_all(&bytes) .await .context("Failed to write to file")?; zip_paths.push(out_path); } else { return Err(anyhow!( "Download failed with status: {} for URL: {}", resp.status(), full_url )); } } // Now extract .ovpn files from each ZIP let mut extracted_paths = Vec::new(); for zip_path in zip_paths { let hostname = get_hostname_from_zip_filename( zip_path.file_name().unwrap().to_str().unwrap(), ); let hostname_dir = vpn_dir.join(&hostname); tokio::fs::create_dir_all(&hostname_dir) .await .context("Failed to create hostname directory")?; // Use spawn_blocking for sync ZIP operations let zip_path_clone = zip_path.clone(); let hostname_dir_clone = hostname_dir.clone(); let extract_result = tokio::task::spawn_blocking(move || { let file = std::fs::File::open(&zip_path_clone) .with_context(|| format!("Failed to open ZIP file: {:?}", zip_path_clone))?; let mut archive = ZipArchive::new(file) .with_context(|| format!("Failed to read ZIP archive: {:?}", zip_path_clone))?; let mut paths = Vec::new(); for i in 0..archive.len() { let mut zip_file = archive.by_index(i)?; if zip_file.name().ends_with(".ovpn") { let target_path = hostname_dir_clone.join(zip_file.name()); let mut content = Vec::new(); zip_file.read_to_end(&mut content)?; std::fs::write(&target_path, &content) .with_context(|| format!("Failed to write .ovpn file: {:?}", target_path))?; paths.push(target_path); } } Ok::, anyhow::Error>(paths) }) .await .context("Spawn blocking failed")??; extracted_paths.extend(extract_result); // Clean up the ZIP file after extraction tokio::fs::remove_file(&zip_path) .await .context("Failed to remove temp ZIP file")?; } // Optional: Clean up temp_dir if empty let _ = tokio::fs::remove_dir(&temp_dir).await; Ok((username, password, extracted_paths)) } /// Derives the hostname from the ZIP filename. /// /// For example, "vpnbook-openvpn-ca149.zip" -> "ca149.vpnbook.com" /// /// If the format doesn't match, returns "unknown.vpnbook.com". fn get_hostname_from_zip_filename(filename: &str) -> String { if filename.starts_with("vpnbook-openvpn-") && filename.ends_with(".zip") { let code = filename .strip_prefix("vpnbook-openvpn-") .unwrap() .strip_suffix(".zip") .unwrap(); format!("{}.vpnbook.com", code) } else { "unknown.vpnbook.com".to_string() } }