Files
WebScraper/PRACTICAL_EXAMPLES.md
2025-12-09 14:57:18 +01:00

12 KiB

// PRACTICAL EXAMPLES: Integration in Economic & Corporate Module // ================================================================ // Diese Datei zeigt konkrete Implementierungen für die VPN-Integration // in die bestehenden economic:: und corporate:: Module

use anyhow::Result; use std::sync::Arc; use tokio::time::{sleep, Duration};

// ============================================================================ // EXAMPLE 1: Vereinfachte Integration in economic::run_full_update() // ============================================================================

/// Beispiel: Economic Update mit VPN-Session-Management /// Kopieren Sie diese Struktur in src/economic/mod.rs /// /// VORHER (ohne VPN): /// ignore /// pub async fn run_full_update( /// config: &Config, /// pool: &Arc<ChromeDriverPool>, /// ) -> Result<()> { /// let tickers = fetch_tickers().await?; /// for ticker in tickers { /// pool.execute(ticker, |client| async { /* scrape */ }).await?; /// } /// Ok(()) /// } /// /// /// NACHHER (mit VPN): pub async fn example_economic_with_vpn( config: &crate::config::Config, pool: &Arccrate::scraper::webdriver::ChromeDriverPool, vpn: &crate::scraper::vpn_integration::VpnIntegration, ) -> Result<()> { use crate::scraper::vpn_integration::VpnIntegration;

println!("📊 Running economic update with VPN support");

// Schritt 1: VPN initialisieren (falls aktiviert)
if vpn.enabled {
    vpn.initialize_session().await?;
    sleep(Duration::from_secs(2)).await;
}

// Schritt 2: Tickers/Events laden
// let tickers = fetch_economic_events().await?;
let tickers = vec!["example1", "example2", "example3"]; // Mock

// Schritt 3: Für jeden Task
for (idx, ticker) in tickers.iter().enumerate() {
    // A. Prüfe ob VPN-Rotation erforderlich
    if vpn.check_and_rotate_if_needed().await? {
        println!("🔄 Rotating VPN session...");
        sleep(Duration::from_secs(3)).await; // Warte auf neue IP
    }

    // B. Führe Task aus
    match execute_economic_task(pool, ticker).await {
        Ok(_) => {
            // C. Inkrementiere Task-Counter
            vpn.increment_task().await;

            // D. Logging
            if let Some(session_id) = vpn.get_current_session_id().await {
                println!(
                    "✓ Task {}/{} completed in session {}",
                    idx + 1,
                    tickers.len(),
                    session_id
                );
            } else {
                println!("✓ Task {}/{} completed", idx + 1, tickers.len());
            }
        }
        Err(e) => {
            eprintln!("❌ Task failed: {}", e);
            // Optional: Bei kritischen Fehlern brechen, sonst fortfahren
        }
    }

    // E. Rate-Limiting (wichtig für Zielwebsite)
    sleep(Duration::from_millis(500)).await;
}

println!("✓ Economic update completed");
Ok(())

}

async fn execute_economic_task( _pool: &Arccrate::scraper::webdriver::ChromeDriverPool, _ticker: &str, ) -> Result<()> { // TODO: Implementierung mit pool.execute() Ok(()) }

// ============================================================================ // EXAMPLE 2: Corporate Update mit VPN // ============================================================================

pub async fn example_corporate_with_vpn( config: &crate::config::Config, pool: &Arccrate::scraper::webdriver::ChromeDriverPool, vpn: &crate::scraper::vpn_integration::VpnIntegration, ) -> Result<()> { println!("📊 Running corporate update with VPN support");

if vpn.enabled {
    vpn.initialize_session().await?;
    sleep(Duration::from_secs(2)).await;
}

// Corporate tasks verarbeiten
let companies = vec!["AAPL", "MSFT", "GOOGL"]; // Mock

for (idx, company) in companies.iter().enumerate() {
    // Rotation check
    if vpn.check_and_rotate_if_needed().await? {
        println!("🔄 Rotating VPN for corporate update");
        sleep(Duration::from_secs(3)).await;
    }

    // Task execution
    match execute_corporate_task(pool, company).await {
        Ok(_) => {
            vpn.increment_task().await;
            println!("✓ Corporate task {}/{} completed", idx + 1, companies.len());
        }
        Err(e) => {
            eprintln!("❌ Corporate task failed: {}", e);
        }
    }

    sleep(Duration::from_millis(500)).await;
}

println!("✓ Corporate update completed");
Ok(())

}

async fn execute_corporate_task( _pool: &Arccrate::scraper::webdriver::ChromeDriverPool, _company: &str, ) -> Result<()> { // TODO: Implementierung Ok(()) }

// ============================================================================ // EXAMPLE 3: Advanced - Custom VPN-Rotation pro Task // ============================================================================

/// Wenn Sie eine IP pro Task haben möchten (nicht empfohlen, aber möglich): pub async fn example_rotation_per_task( pool: &Arccrate::scraper::webdriver::ChromeDriverPool, vpn: &crate::scraper::vpn_integration::VpnIntegration, ) -> Result<()> { let tasks = vec!["task1", "task2", "task3"];

for task in tasks {
    // Vor jedem Task: Neue Session erstellen
    if vpn.enabled {
        vpn.initialize_session().await?;
        sleep(Duration::from_secs(5)).await; // Warte auf Verbindung
        
        if let Some(ip) = vpn.get_current_ip().await {
            println!("📍 Task '{}' uses IP: {}", task, ip);
        }
    }

    // Task ausführen
    println!("Executing task: {}", task);

    // Nach Task: Task-Counter (hier nur 1)
    vpn.increment_task().await;
}

Ok(())

}

// ============================================================================ // EXAMPLE 4: Error Handling & Retry Logic // ============================================================================

pub async fn example_with_retry( pool: &Arccrate::scraper::webdriver::ChromeDriverPool, vpn: &crate::scraper::vpn_integration::VpnIntegration, max_retries: u32, ) -> Result<()> { let tasks = vec!["task1", "task2"];

for task in tasks {
    let mut attempt = 0;

    loop {
        attempt += 1;

        // Rotation check
        if vpn.check_and_rotate_if_needed().await? {
            sleep(Duration::from_secs(3)).await;
        }

        // Versuche Task
        match execute_economic_task(pool, task).await {
            Ok(_) => {
                vpn.increment_task().await;
                println!("✓ Task succeeded on attempt {}", attempt);
                break;
            }
            Err(e) if attempt < max_retries => {
                eprintln!("⚠️  Task failed (attempt {}): {}, retrying...", attempt, e);

                // Exponential backoff
                let backoff = Duration::from_secs(2 ^ (attempt - 1));
                sleep(backoff).await;

                // Optional: Neue VPN-Session vor Retry
                if attempt % 2 == 0 && vpn.enabled {
                    println!("🔄 Rotating VPN before retry");
                    vpn.initialize_session().await?;
                    sleep(Duration::from_secs(3)).await;
                }
            }
            Err(e) => {
                eprintln!("❌ Task failed after {} attempts: {}", max_retries, e);
                break;
            }
        }
    }
}

Ok(())

}

// ============================================================================ // EXAMPLE 5: Batch Processing (mehrere Tasks pro Session) // ============================================================================

pub async fn example_batch_processing( pool: &Arccrate::scraper::webdriver::ChromeDriverPool, vpn: &crate::scraper::vpn_integration::VpnIntegration, batch_size: usize, ) -> Result<()> { let all_tasks = vec!["t1", "t2", "t3", "t4", "t5"];

// Gruppiere Tasks in Batches
for batch in all_tasks.chunks(batch_size) {
    // Neue Session pro Batch
    if vpn.enabled {
        vpn.initialize_session().await?;
        sleep(Duration::from_secs(2)).await;
        
        if let Some(ip) = vpn.get_current_ip().await {
            println!("🔗 New batch session with IP: {}", ip);
        }
    }

    // Tasks in Batch verarbeiten
    for task in batch {
        if let Ok(_) = execute_economic_task(pool, task).await {
            vpn.increment_task().await;
            println!("✓ Task {} completed", task);
        }
    }

    sleep(Duration::from_millis(500)).await;
}

Ok(())

}

// ============================================================================ // EXAMPLE 6: Parallel Scraping mit VPN-Awareness // ============================================================================

/// Nutze ChromeDriver-Pool-Parallelism mit VPN pub async fn example_parallel_with_vpn( pool: &Arccrate::scraper::webdriver::ChromeDriverPool, vpn: &crate::scraper::vpn_integration::VpnIntegration, ) -> Result<()> { let tasks = vec!["url1", "url2", "url3"];

// Stellt sicher, dass nur pool_size Tasks parallel laufen
// (Semaphore im ChromeDriverPool kontrolliert das)
let mut handles = vec![];

for task in tasks {
    let vpn_clone = std::sync::Arc::new(
        crate::scraper::vpn_integration::VpnIntegration::from_config(&crate::config::Config::default())?
    );
    
    let handle = tokio::spawn(async move {
        // Jeder Task rotiert unabhängig
        vpn_clone.increment_task().await;
        println!("Task {} executed", task);
    });

    handles.push(handle);
}

// Warte auf alle Tasks
for handle in handles {
    handle.await?;
}

Ok(())

}

// ============================================================================ // EXAMPLE 7: Monitoring & Stats // ============================================================================

pub struct VpnSessionStats { pub total_sessions: usize, pub total_tasks: usize, pub tasks_per_session: Vec, pub ips_used: Vec, }

pub async fn collect_stats( vpn: &crate::scraper::vpn_integration::VpnIntegration, ) -> VpnSessionStats { // TODO: Sammeln von Statistiken // In echtem Code würde man einen Analytics-Service haben

VpnSessionStats {
    total_sessions: 0,
    total_tasks: 0,
    tasks_per_session: vec![],
    ips_used: vec![],
}

}

pub async fn print_stats(stats: &VpnSessionStats) { println!("\n📊 VPN Session Statistics:"); println!(" Total sessions: {}", stats.total_sessions); println!(" Total tasks: {}", stats.total_tasks); println!(" Avg tasks/session: {}", if stats.total_sessions > 0 { stats.total_tasks / stats.total_sessions } else { 0 } ); println!(" Unique IPs: {}", stats.ips_used.len()); }

// ============================================================================ // EXAMPLE 8: Integration in main.rs // ============================================================================

/// Wie Sie alles in main.rs zusammenbringen: /// /// ignore /// #[tokio::main] /// async fn main() -> Result<()> { /// // 1. Setup /// tracing_subscriber::fmt().init(); /// let config = Config::load()?; /// /// // 2. VPN initialisieren /// let vpn = VpnIntegration::from_config(&config)?; /// /// // 3. Pool erstellen /// let pool = Arc::new(ChromeDriverPool::new(config.max_parallel_tasks).await?); /// /// // 4. Updates mit VPN /// economic::run_full_update_with_vpn(&config, &pool, &vpn).await?; /// corporate::run_full_update_with_vpn(&config, &pool, &vpn).await?; /// /// Ok(()) /// } ///

// ============================================================================ // EXAMPLE 9: Unit Tests // ============================================================================

#[cfg(test)] mod tests { use super::*;

#[tokio::test]
async fn test_rotation_trigger() {
    // Mock VPN-Integration testen
    let vpn = crate::scraper::vpn_integration::VpnIntegration {
        session_manager: None,
        automater: None,
        enabled: false,
    };

    assert!(!vpn.enabled);
}

}