added gettin opnv setup files
This commit is contained in:
278
src/util/opnv.rs
Normal file
278
src/util/opnv.rs
Normal file
@@ -0,0 +1,278 @@
|
||||
// src/scraper/opnv.rs
|
||||
|
||||
//! Module for fetching, downloading, and extracting OpenVPN configurations from VPNBook.
|
||||
//!
|
||||
//! This module provides functionality to scrape the VPNBook free VPN page using
|
||||
//! a headless browser, handle potential consent popups, extract current credentials,
|
||||
//! collect download URLs for OpenVPN ZIP files, download them, and then extract
|
||||
//! the .ovpn files into a structured directory: cache/openvpn/<hostname>/<ovpn_filename>.
|
||||
//! It is designed to fetch the most recent data on every run, as credentials and
|
||||
//! server configurations change periodically.
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use fantoccini::{Client, Locator};
|
||||
use regex::Regex;
|
||||
use reqwest;
|
||||
use std::io::{Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tokio::fs::File;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use url::Url;
|
||||
use zip::ZipArchive;
|
||||
use crate::scraper::webdriver::{ChromeDriverPool, ScrapeTask};
|
||||
|
||||
/// Fetches, downloads, and extracts the latest OpenVPN configurations from VPNBook.
|
||||
///
|
||||
/// This asynchronous function uses the provided `ChromeDriverPool` to scrape the
|
||||
/// VPNBook free VPN page. It dismisses any consent popup if present, extracts the
|
||||
/// current username and password, collects all OpenVPN ZIP download URLs, downloads
|
||||
/// the ZIP files temporarily, extracts the .ovpn files into the specified directory
|
||||
/// structure under `cache_dir`/openvpn/<hostname>/, and cleans up the ZIP files.
|
||||
///
|
||||
/// The directory structure is: cache/openvpn/<hostname>/<ovpn_filename>, where
|
||||
/// <hostname> is derived from the ZIP filename (e.g., "ca149.vpnbook.com").
|
||||
///
|
||||
/// The function ensures fresh data is fetched each time it runs, making it suitable
|
||||
/// for periodic updates where credentials may change.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `pool` - A reference to the `ChromeDriverPool` for managing browser instances.
|
||||
/// * `cache_dir` - The path to the base cache directory. The OpenVPN files will be saved
|
||||
/// under `cache_dir`/openvpn/<hostname>/.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// A `Result` containing a tuple with:
|
||||
/// - `String`: The scraped username.
|
||||
/// - `String`: The scraped password.
|
||||
/// - `Vec<PathBuf>`: Paths to the extracted .ovpn files.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an `anyhow::Error` if:
|
||||
/// - Navigation to the page fails.
|
||||
/// - The consent popup cannot be dismissed (if present).
|
||||
/// - Credentials cannot be parsed from the page.
|
||||
/// - Download URLs cannot be found or are invalid.
|
||||
/// - HTTP downloads fail or file writing errors occur.
|
||||
/// - ZIP extraction fails (e.g., invalid ZIP or I/O errors).
|
||||
///
|
||||
/// # Dependencies
|
||||
///
|
||||
/// This function requires the following crates (add to Cargo.toml if not present):
|
||||
/// - `anyhow` for error handling.
|
||||
/// - `fantoccini` for browser automation.
|
||||
/// - `regex` for parsing credentials from HTML.
|
||||
/// - `reqwest` (with `tokio` features) for HTTP downloads.
|
||||
/// - `tokio` for asynchronous file operations.
|
||||
/// - `url` for URL manipulation.
|
||||
/// - `zip` for ZIP extraction.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// use anyhow::Result;
|
||||
/// use event_backtest_engine::scraper::opnv::fetch_vpnbook_configs;
|
||||
/// use event_backtest_engine::scraper::webdriver::ChromeDriverPool;
|
||||
/// use std::path::Path;
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() -> Result<()> {
|
||||
/// let pool = ChromeDriverPool::new(1).await?;
|
||||
/// let (username, password, files) =
|
||||
/// fetch_vpnbook_configs(&pool, Path::new("./cache")).await?;
|
||||
/// println!("Username: {}, Password: {}", username, password);
|
||||
/// for file in files {
|
||||
/// println!("Extracted: {:?}", file);
|
||||
/// }
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// ```
|
||||
pub async fn fetch_vpnbook_configs(
|
||||
pool: &ChromeDriverPool,
|
||||
cache_dir: &Path,
|
||||
) -> Result<(String, String, Vec<PathBuf>)> {
|
||||
// Prepare the openvpn directory
|
||||
let vpn_dir = cache_dir.join("openvpn");
|
||||
tokio::fs::create_dir_all(&vpn_dir)
|
||||
.await
|
||||
.context("Failed to create openvpn directory")?;
|
||||
|
||||
// Temporary directory for ZIP downloads (under cache for consistency)
|
||||
let temp_dir = cache_dir.join("temp_vpn_zips");
|
||||
tokio::fs::create_dir_all(&temp_dir)
|
||||
.await
|
||||
.context("Failed to create temp directory")?;
|
||||
|
||||
let url = "https://www.vpnbook.com/freevpn".to_string();
|
||||
|
||||
// Define the scraping task
|
||||
let task = ScrapeTask::new(url, |client: Client| async move {
|
||||
// Attempt to dismiss consent popup if present
|
||||
let consent_selector = r#"body > div.fc-consent-root > div.fc-dialog-container > div.fc-dialog.fc-choice-dialog > div.fc-footer-buttons-container > div.fc-footer-buttons > button.fc-button.fc-cta-do-not-consent.fc-secondary-button > p"#;
|
||||
if let Ok(consent_elem) = client.find(Locator::Css(consent_selector)).await {
|
||||
consent_elem
|
||||
.click()
|
||||
.await
|
||||
.context("Failed to click consent dismissal button")?;
|
||||
// Brief delay to allow popup to close
|
||||
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
|
||||
}
|
||||
|
||||
// Get the full page source for parsing
|
||||
let page_source = client
|
||||
.source()
|
||||
.await
|
||||
.context("Failed to retrieve page source")?;
|
||||
|
||||
// Parse username and password using regex (assuming HTML structure like <strong>Username:</strong> value)
|
||||
let user_re =
|
||||
Regex::new(r"Username:\s*</strong>\s*(\w+)").context("Invalid regex for username")?;
|
||||
let pass_re =
|
||||
Regex::new(r"Password:\s*</strong>\s*(\w+)").context("Invalid regex for password")?;
|
||||
|
||||
let username = user_re
|
||||
.captures(&page_source)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str().to_string())
|
||||
.ok_or_else(|| anyhow!("Username not found in page source"))?;
|
||||
|
||||
let password = pass_re
|
||||
.captures(&page_source)
|
||||
.and_then(|c| c.get(1))
|
||||
.map(|m| m.as_str().to_string())
|
||||
.ok_or_else(|| anyhow!("Password not found in page source"))?;
|
||||
|
||||
// Locate all download links for OpenVPN ZIP files
|
||||
let links = client
|
||||
.find_all(Locator::Css(r#"a[href^="/free-openvpn-account/"][download=""]"#))
|
||||
.await
|
||||
.context("Failed to find download links")?;
|
||||
|
||||
// Collect relative hrefs
|
||||
let mut rel_urls = Vec::new();
|
||||
for link in links {
|
||||
if let Some(href) = link.attr("href").await.context("Failed to get href attribute")? {
|
||||
rel_urls.push(href);
|
||||
}
|
||||
}
|
||||
|
||||
Ok::<(String, String, Vec<String>), anyhow::Error>((username, password, rel_urls))
|
||||
});
|
||||
|
||||
// Execute the scraping task using the pool
|
||||
let (username, password, rel_urls) = task.execute_with_pool(pool).await?;
|
||||
|
||||
// Base URL for resolving relative paths
|
||||
let base_url = Url::parse("https://www.vpnbook.com/")?;
|
||||
|
||||
// Download each ZIP file to temp_dir
|
||||
let mut zip_paths = Vec::new();
|
||||
for rel in &rel_urls {
|
||||
let full_url = base_url.join(rel).context("Failed to join URL")?;
|
||||
let filename = rel
|
||||
.split('/')
|
||||
.last()
|
||||
.ok_or_else(|| anyhow!("Invalid filename in URL"))?
|
||||
.to_string();
|
||||
let out_path = temp_dir.join(&filename);
|
||||
|
||||
// Perform HTTP GET request
|
||||
let resp = reqwest::get(full_url.clone())
|
||||
.await
|
||||
.with_context(|| format!("Failed to send download request for {}", full_url))?;
|
||||
|
||||
if resp.status().is_success() {
|
||||
let bytes = resp
|
||||
.bytes()
|
||||
.await
|
||||
.context("Failed to read response bytes")?;
|
||||
|
||||
// Write to file asynchronously
|
||||
let mut file = File::create(&out_path)
|
||||
.await
|
||||
.context("Failed to create output file")?;
|
||||
file.write_all(&bytes)
|
||||
.await
|
||||
.context("Failed to write to file")?;
|
||||
|
||||
zip_paths.push(out_path);
|
||||
} else {
|
||||
return Err(anyhow!(
|
||||
"Download failed with status: {} for URL: {}",
|
||||
resp.status(),
|
||||
full_url
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Now extract .ovpn files from each ZIP
|
||||
let mut extracted_paths = Vec::new();
|
||||
for zip_path in zip_paths {
|
||||
let hostname = get_hostname_from_zip_filename(
|
||||
zip_path.file_name().unwrap().to_str().unwrap(),
|
||||
);
|
||||
let hostname_dir = vpn_dir.join(&hostname);
|
||||
tokio::fs::create_dir_all(&hostname_dir)
|
||||
.await
|
||||
.context("Failed to create hostname directory")?;
|
||||
|
||||
// Use spawn_blocking for sync ZIP operations
|
||||
let zip_path_clone = zip_path.clone();
|
||||
let hostname_dir_clone = hostname_dir.clone();
|
||||
let extract_result = tokio::task::spawn_blocking(move || {
|
||||
let file = std::fs::File::open(&zip_path_clone)
|
||||
.with_context(|| format!("Failed to open ZIP file: {:?}", zip_path_clone))?;
|
||||
let mut archive = ZipArchive::new(file)
|
||||
.with_context(|| format!("Failed to read ZIP archive: {:?}", zip_path_clone))?;
|
||||
|
||||
let mut paths = Vec::new();
|
||||
for i in 0..archive.len() {
|
||||
let mut zip_file = archive.by_index(i)?;
|
||||
if zip_file.name().ends_with(".ovpn") {
|
||||
let target_path = hostname_dir_clone.join(zip_file.name());
|
||||
let mut content = Vec::new();
|
||||
zip_file.read_to_end(&mut content)?;
|
||||
|
||||
std::fs::write(&target_path, &content)
|
||||
.with_context(|| format!("Failed to write .ovpn file: {:?}", target_path))?;
|
||||
paths.push(target_path);
|
||||
}
|
||||
}
|
||||
Ok::<Vec<PathBuf>, anyhow::Error>(paths)
|
||||
})
|
||||
.await
|
||||
.context("Spawn blocking failed")??;
|
||||
|
||||
extracted_paths.extend(extract_result);
|
||||
|
||||
// Clean up the ZIP file after extraction
|
||||
tokio::fs::remove_file(&zip_path)
|
||||
.await
|
||||
.context("Failed to remove temp ZIP file")?;
|
||||
}
|
||||
|
||||
// Optional: Clean up temp_dir if empty
|
||||
let _ = tokio::fs::remove_dir(&temp_dir).await;
|
||||
|
||||
Ok((username, password, extracted_paths))
|
||||
}
|
||||
|
||||
/// Derives the hostname from the ZIP filename.
|
||||
///
|
||||
/// For example, "vpnbook-openvpn-ca149.zip" -> "ca149.vpnbook.com"
|
||||
///
|
||||
/// If the format doesn't match, returns "unknown.vpnbook.com".
|
||||
fn get_hostname_from_zip_filename(filename: &str) -> String {
|
||||
if filename.starts_with("vpnbook-openvpn-") && filename.ends_with(".zip") {
|
||||
let code = filename
|
||||
.strip_prefix("vpnbook-openvpn-")
|
||||
.unwrap()
|
||||
.strip_suffix(".zip")
|
||||
.unwrap();
|
||||
format!("{}.vpnbook.com", code)
|
||||
} else {
|
||||
"unknown.vpnbook.com".to_string()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user