added creating CompanyInfo mapping

This commit is contained in:
2025-12-04 13:33:32 +01:00
parent 95fd9ca141
commit ef2393ab70
13 changed files with 965 additions and 696 deletions

View File

@@ -8,7 +8,7 @@ const EXTRACTION_JS: &str = include_str!("extraction_script.js");
pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
dismiss_overlays(client).await?;
//dismiss_overlays(client).await?;
if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
tab.click().await?;
@@ -18,7 +18,7 @@ pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
Ok(())
}
pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
/*pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
for _ in 0..10 {
let removed: bool = client
.execute(
@@ -39,7 +39,7 @@ pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
sleep(Duration::from_millis(500)).await;
}
Ok(())
}
}*/
pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
let script = format!(

View File

@@ -1,9 +1,19 @@
// src/economic/update.rs
use super::{scraper::*, storage::*, helpers::*, types::*};
use crate::config::Config;
use crate::{config::Config, scraper::webdriver::ScrapeTask};
use crate::scraper::webdriver::ChromeDriverPool;
use chrono::{Local, NaiveDate};
use std::sync::Arc;
pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> anyhow::Result<()> {
/// Runs the full update for economic data, using the provided ChromeDriver pool.
///
/// # Arguments
/// * `config` - The application configuration.
/// * `pool` - Shared pool of ChromeDriver instances for scraping.
///
/// # Errors
/// Returns an error if scraping, loading, or saving fails.
pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
let end_date = config.target_end_date();
@@ -26,36 +36,68 @@ pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> an
println!("Scraping economic events: {}{}", start_date, end_date);
let mut current = start_date;
let mut total_changes = 0;
// Pass the pool to the scraping function
let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
while current <= end_date {
set_date_range(client, &current, &end_date).await?;
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
let new_events = extract_events(client).await?;
if new_events.is_empty() { break; }
let result = process_batch(&new_events, &mut events, &today_str);
total_changes += result.changes.len();
save_changes(&result.changes).await?;
let next = new_events.iter()
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
.max()
.and_then(|d| d.succ_opt())
.map(|d| d.format("%Y-%m-%d").to_string())
.unwrap_or(end_date.clone());
if next > end_date { break; }
current = next;
}
// Process all at once or in batches
let result = process_batch(&new_events_all, &mut events, &today_str);
let total_changes = result.changes.len();
save_changes(&result.changes).await?;
save_optimized_chunks(events).await?;
println!("Economic update complete — {} changes detected", total_changes);
Ok(())
}
/// Scrapes all economic events from start to end date using a dedicated ScrapeTask with the provided pool.
///
/// This function creates a ScrapeTask to navigate to the Finanzen.net page, prepare it,
/// and then loop through date ranges to extract events.
///
/// # Arguments
/// * `start` - Start date in YYYY-MM-DD.
/// * `end` - End date in YYYY-MM-DD.
/// * `pool` - Shared pool of ChromeDriver instances.
///
/// # Returns
/// A vector of all extracted EconomicEvent structs.
///
/// # Errors
/// Returns an error if task execution fails or extraction issues occur.
pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<Vec<EconomicEvent>> {
let url = "https://www.finanzen.net/termine/wirtschaftsdaten/".to_string();
let start_clone = start.to_string();
let end_clone = end.to_string();
let task = ScrapeTask::new(url, move |client| async move {
goto_and_prepare(&client).await?;
let mut all_events = Vec::new();
let mut current = start_clone;
while current <= end_clone {
set_date_range(&client, &current, &end_clone).await?;
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
let new_events = extract_events(&client).await?;
if new_events.is_empty() { break; }
all_events.extend(new_events.clone());
let next = new_events.iter()
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
.max()
.and_then(|d| d.succ_opt())
.map(|d| d.format("%Y-%m-%d").to_string())
.unwrap_or(end_clone.clone());
if next > end_clone { break; }
current = next;
}
Ok(all_events)
});
// Use the pool for execution
task.execute_with_pool(pool).await
}
pub fn process_batch(
new_events: &[EconomicEvent],
existing: &mut std::collections::HashMap<String, EconomicEvent>,