added creating CompanyInfo mapping
This commit is contained in:
@@ -8,7 +8,7 @@ const EXTRACTION_JS: &str = include_str!("extraction_script.js");
|
||||
|
||||
pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
|
||||
client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
|
||||
dismiss_overlays(client).await?;
|
||||
//dismiss_overlays(client).await?;
|
||||
|
||||
if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
|
||||
tab.click().await?;
|
||||
@@ -18,7 +18,7 @@ pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
|
||||
/*pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let removed: bool = client
|
||||
.execute(
|
||||
@@ -39,7 +39,7 @@ pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}*/
|
||||
|
||||
pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
|
||||
let script = format!(
|
||||
|
||||
@@ -1,9 +1,19 @@
|
||||
// src/economic/update.rs
|
||||
use super::{scraper::*, storage::*, helpers::*, types::*};
|
||||
use crate::config::Config;
|
||||
use crate::{config::Config, scraper::webdriver::ScrapeTask};
|
||||
use crate::scraper::webdriver::ChromeDriverPool;
|
||||
use chrono::{Local, NaiveDate};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> anyhow::Result<()> {
|
||||
/// Runs the full update for economic data, using the provided ChromeDriver pool.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `config` - The application configuration.
|
||||
/// * `pool` - Shared pool of ChromeDriver instances for scraping.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if scraping, loading, or saving fails.
|
||||
pub async fn run_full_update(config: &Config, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<()> {
|
||||
let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string();
|
||||
let end_date = config.target_end_date();
|
||||
|
||||
@@ -26,36 +36,68 @@ pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> an
|
||||
|
||||
println!("Scraping economic events: {} → {}", start_date, end_date);
|
||||
|
||||
let mut current = start_date;
|
||||
let mut total_changes = 0;
|
||||
// Pass the pool to the scraping function
|
||||
let new_events_all = scrape_all_economic_events(&start_date, &end_date, pool).await?;
|
||||
|
||||
while current <= end_date {
|
||||
set_date_range(client, ¤t, &end_date).await?;
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
|
||||
|
||||
let new_events = extract_events(client).await?;
|
||||
if new_events.is_empty() { break; }
|
||||
|
||||
let result = process_batch(&new_events, &mut events, &today_str);
|
||||
total_changes += result.changes.len();
|
||||
save_changes(&result.changes).await?;
|
||||
|
||||
let next = new_events.iter()
|
||||
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
||||
.max()
|
||||
.and_then(|d| d.succ_opt())
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or(end_date.clone());
|
||||
|
||||
if next > end_date { break; }
|
||||
current = next;
|
||||
}
|
||||
// Process all at once or in batches
|
||||
let result = process_batch(&new_events_all, &mut events, &today_str);
|
||||
let total_changes = result.changes.len();
|
||||
save_changes(&result.changes).await?;
|
||||
|
||||
save_optimized_chunks(events).await?;
|
||||
println!("Economic update complete — {} changes detected", total_changes);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Scrapes all economic events from start to end date using a dedicated ScrapeTask with the provided pool.
|
||||
///
|
||||
/// This function creates a ScrapeTask to navigate to the Finanzen.net page, prepare it,
|
||||
/// and then loop through date ranges to extract events.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `start` - Start date in YYYY-MM-DD.
|
||||
/// * `end` - End date in YYYY-MM-DD.
|
||||
/// * `pool` - Shared pool of ChromeDriver instances.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of all extracted EconomicEvent structs.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if task execution fails or extraction issues occur.
|
||||
pub async fn scrape_all_economic_events(start: &str, end: &str, pool: &Arc<ChromeDriverPool>) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
let url = "https://www.finanzen.net/termine/wirtschaftsdaten/".to_string();
|
||||
let start_clone = start.to_string();
|
||||
let end_clone = end.to_string();
|
||||
|
||||
let task = ScrapeTask::new(url, move |client| async move {
|
||||
goto_and_prepare(&client).await?;
|
||||
let mut all_events = Vec::new();
|
||||
let mut current = start_clone;
|
||||
|
||||
while current <= end_clone {
|
||||
set_date_range(&client, ¤t, &end_clone).await?;
|
||||
tokio::time::sleep(tokio::time::Duration::from_secs(3)).await;
|
||||
let new_events = extract_events(&client).await?;
|
||||
if new_events.is_empty() { break; }
|
||||
all_events.extend(new_events.clone());
|
||||
|
||||
let next = new_events.iter()
|
||||
.filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
||||
.max()
|
||||
.and_then(|d| d.succ_opt())
|
||||
.map(|d| d.format("%Y-%m-%d").to_string())
|
||||
.unwrap_or(end_clone.clone());
|
||||
|
||||
if next > end_clone { break; }
|
||||
current = next;
|
||||
}
|
||||
Ok(all_events)
|
||||
});
|
||||
|
||||
// Use the pool for execution
|
||||
task.execute_with_pool(pool).await
|
||||
}
|
||||
|
||||
pub fn process_batch(
|
||||
new_events: &[EconomicEvent],
|
||||
existing: &mut std::collections::HashMap<String, EconomicEvent>,
|
||||
|
||||
Reference in New Issue
Block a user