added fetching openvpn packages

This commit is contained in:
2025-12-09 19:51:11 +01:00
parent c2408d9a56
commit 3ab5d0dcc3
50 changed files with 4406 additions and 25 deletions

View File

@@ -1,6 +1,8 @@
use std::path::{Path, PathBuf};
use std::fs;
use crate::util::opnv;
/// Central configuration for all data paths
pub struct DataPaths {
base_dir: PathBuf,
@@ -11,6 +13,7 @@ pub struct DataPaths {
cache_gleif_dir: PathBuf,
cache_openfigi_dir: PathBuf,
cache_gleif_openfigi_map_dir: PathBuf,
cache_openvpn_dir: PathBuf,
// Economic data subdirectories
economic_events_dir: PathBuf,
economic_changes_dir: PathBuf,
@@ -33,6 +36,7 @@ impl DataPaths {
let cache_gleif_dir = cache_dir.join("gleif");
let cache_openfigi_dir = cache_dir.join("openfigi");
let cache_gleif_openfigi_map_dir = cache_dir.join("glei_openfigi");
let cache_openvpn_dir = cache_dir.join("openvpn");
// Economic subdirectories
let economic_events_dir = data_dir.join("economic").join("events");
@@ -51,6 +55,7 @@ impl DataPaths {
fs::create_dir_all(&cache_gleif_dir)?;
fs::create_dir_all(&cache_openfigi_dir)?;
fs::create_dir_all(&cache_gleif_openfigi_map_dir)?;
fs::create_dir_all(&cache_openvpn_dir)?;
fs::create_dir_all(&economic_events_dir)?;
fs::create_dir_all(&economic_changes_dir)?;
fs::create_dir_all(&corporate_events_dir)?;
@@ -65,6 +70,7 @@ impl DataPaths {
cache_gleif_dir,
cache_openfigi_dir,
cache_gleif_openfigi_map_dir,
cache_openvpn_dir,
economic_events_dir,
economic_changes_dir,
corporate_events_dir,
@@ -101,6 +107,10 @@ impl DataPaths {
&self.cache_gleif_openfigi_map_dir
}
pub fn cache_openvpn_dir(&self) -> &Path {
&self.cache_openvpn_dir
}
/// Get the economic events directory
pub fn economic_events_dir(&self) -> &Path {
&self.economic_events_dir

View File

@@ -11,15 +11,15 @@
use anyhow::{anyhow, Context, Result};
use fantoccini::{Client, Locator};
use regex::Regex;
use reqwest;
use std::io::{Read};
use std::io::{self, Read};
use std::path::{Path, PathBuf};
use tokio::fs::File;
use tokio::io::AsyncWriteExt;
use url::Url;
use zip::ZipArchive;
use crate::scraper::webdriver::{ChromeDriverPool, ScrapeTask};
use crate::util::{logger, directories::DataPaths};
/// Fetches, downloads, and extracts the latest OpenVPN configurations from VPNBook.
///
@@ -63,7 +63,6 @@ use crate::scraper::webdriver::{ChromeDriverPool, ScrapeTask};
/// This function requires the following crates (add to Cargo.toml if not present):
/// - `anyhow` for error handling.
/// - `fantoccini` for browser automation.
/// - `regex` for parsing credentials from HTML.
/// - `reqwest` (with `tokio` features) for HTTP downloads.
/// - `tokio` for asynchronous file operations.
/// - `url` for URL manipulation.
@@ -94,7 +93,8 @@ pub async fn fetch_vpnbook_configs(
cache_dir: &Path,
) -> Result<(String, String, Vec<PathBuf>)> {
// Prepare the openvpn directory
let vpn_dir = cache_dir.join("openvpn");
let dir = DataPaths::new(".")?;
let vpn_dir = dir.cache_openvpn_dir();
tokio::fs::create_dir_all(&vpn_dir)
.await
.context("Failed to create openvpn directory")?;
@@ -120,29 +120,26 @@ pub async fn fetch_vpnbook_configs(
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
// Get the full page source for parsing
let page_source = client
.source()
// Find all <code> elements
let codes = client
.find_all(Locator::Css("code"))
.await
.context("Failed to retrieve page source")?;
.context("Failed to find code elements")?;
// Parse username and password using regex (assuming HTML structure like <strong>Username:</strong> value)
let user_re =
Regex::new(r"Username:\s*</strong>\s*(\w+)").context("Invalid regex for username")?;
let pass_re =
Regex::new(r"Password:\s*</strong>\s*(\w+)").context("Invalid regex for password")?;
if codes.len() < 2 {
return Err(anyhow!("Insufficient code elements found for credentials"));
}
let username = user_re
.captures(&page_source)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.ok_or_else(|| anyhow!("Username not found in page source"))?;
// The first <code> is username, second is password
let username = codes[0]
.text()
.await
.context("Failed to get username text")?;
let password = pass_re
.captures(&page_source)
.and_then(|c| c.get(1))
.map(|m| m.as_str().to_string())
.ok_or_else(|| anyhow!("Password not found in page source"))?;
let password = codes[1]
.text()
.await
.context("Failed to get password text")?;
// Locate all download links for OpenVPN ZIP files
let links = client
@@ -158,7 +155,7 @@ pub async fn fetch_vpnbook_configs(
}
}
Ok::<(String, String, Vec<String>), anyhow::Error>((username, password, rel_urls))
Ok((username, password, rel_urls))
});
// Execute the scraping task using the pool
@@ -231,7 +228,13 @@ pub async fn fetch_vpnbook_configs(
for i in 0..archive.len() {
let mut zip_file = archive.by_index(i)?;
if zip_file.name().ends_with(".ovpn") {
let target_path = hostname_dir_clone.join(zip_file.name());
// Get just the filename, stripping any path
let file_name = Path::new(zip_file.name()).file_name()
.ok_or_else(|| anyhow!("Invalid file name in ZIP: {}", zip_file.name()))?
.to_str()
.ok_or_else(|| anyhow!("Invalid UTF-8 in file name: {}", zip_file.name()))?
.to_string();
let target_path = hostname_dir_clone.join(file_name);
let mut content = Vec::new();
zip_file.read_to_end(&mut content)?;