adding corporate data to webscraper

This commit is contained in:
2025-11-21 00:17:59 +01:00
parent 0ea3fcc3b5
commit 9d0d15f3f8
18 changed files with 2128 additions and 970 deletions

84
src/economic/scraper.rs Normal file
View File

@@ -0,0 +1,84 @@
// src/economic/scraper.rs
use super::types::{EconomicEvent, ScrapeResult};
use fantoccini::Client;
use tokio::time::{sleep, Duration};
use chrono::{Local, NaiveDate};
const EXTRACTION_JS: &str = include_str!("extraction_script.js");
pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
dismiss_overlays(client).await?;
if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
tab.click().await?;
println!("High importance tab selected");
sleep(Duration::from_secs(2)).await;
}
Ok(())
}
pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
for _ in 0..10 {
let removed: bool = client
.execute(
r#"(() => {
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
if (iframe && iframe.parentNode) {
iframe.parentNode.removeChild(iframe);
return true;
}
return false;
})()"#,
vec![],
)
.await?
.as_bool()
.unwrap_or(false);
if removed { break; }
sleep(Duration::from_millis(500)).await;
}
Ok(())
}
pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
let script = format!(
r#"
(() => {{
const from = document.querySelector('#dtTeletraderFromDate');
const to = document.querySelector('#dtTeletraderEndDate');
if (from) {{ from.value = '{}'; from.dispatchEvent(new Event('change', {{bubbles: true}})); }}
if (to) {{ to.value = '{}'; to.dispatchEvent(new Event('change', {{bubbles: true}})); }}
return true;
}})()
"#,
start, end
);
client.execute(&script, vec![]).await?;
sleep(Duration::from_millis(1200)).await;
Ok(())
}
pub async fn extract_events(client: &Client) -> anyhow::Result<Vec<EconomicEvent>> {
let result = client.execute(EXTRACTION_JS, vec![]).await?;
let array = result.as_array().ok_or_else(|| anyhow::anyhow!("Expected array"))?;
let mut events = Vec::with_capacity(array.len());
for val in array {
if let Some(obj) = val.as_object() {
events.push(EconomicEvent {
country: obj["country"].as_str().unwrap_or("").to_string(),
date: obj["date"].as_str().unwrap_or("").to_string(),
time: obj["time"].as_str().unwrap_or("").to_string(),
event: obj["event"].as_str().unwrap_or("").to_string(),
actual: obj["actual"].as_str().unwrap_or("").to_string(),
forecast: obj["forecast"].as_str().unwrap_or("").to_string(),
previous: obj["previous"].as_str().unwrap_or("").to_string(),
importance: "High".to_string(),
description: obj["description"].as_str().unwrap_or("").to_string(),
});
}
}
println!("Extracted {} high-impact events", events.len());
Ok(events)
}