adding corporate data to webscraper
This commit is contained in:
84
src/economic/scraper.rs
Normal file
84
src/economic/scraper.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
// src/economic/scraper.rs
|
||||
use super::types::{EconomicEvent, ScrapeResult};
|
||||
use fantoccini::Client;
|
||||
use tokio::time::{sleep, Duration};
|
||||
use chrono::{Local, NaiveDate};
|
||||
|
||||
const EXTRACTION_JS: &str = include_str!("extraction_script.js");
|
||||
|
||||
pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> {
|
||||
client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?;
|
||||
dismiss_overlays(client).await?;
|
||||
|
||||
if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
|
||||
tab.click().await?;
|
||||
println!("High importance tab selected");
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let removed: bool = client
|
||||
.execute(
|
||||
r#"(() => {
|
||||
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
|
||||
if (iframe && iframe.parentNode) {
|
||||
iframe.parentNode.removeChild(iframe);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()"#,
|
||||
vec![],
|
||||
)
|
||||
.await?
|
||||
.as_bool()
|
||||
.unwrap_or(false);
|
||||
if removed { break; }
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> {
|
||||
let script = format!(
|
||||
r#"
|
||||
(() => {{
|
||||
const from = document.querySelector('#dtTeletraderFromDate');
|
||||
const to = document.querySelector('#dtTeletraderEndDate');
|
||||
if (from) {{ from.value = '{}'; from.dispatchEvent(new Event('change', {{bubbles: true}})); }}
|
||||
if (to) {{ to.value = '{}'; to.dispatchEvent(new Event('change', {{bubbles: true}})); }}
|
||||
return true;
|
||||
}})()
|
||||
"#,
|
||||
start, end
|
||||
);
|
||||
client.execute(&script, vec![]).await?;
|
||||
sleep(Duration::from_millis(1200)).await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn extract_events(client: &Client) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
let result = client.execute(EXTRACTION_JS, vec![]).await?;
|
||||
let array = result.as_array().ok_or_else(|| anyhow::anyhow!("Expected array"))?;
|
||||
|
||||
let mut events = Vec::with_capacity(array.len());
|
||||
for val in array {
|
||||
if let Some(obj) = val.as_object() {
|
||||
events.push(EconomicEvent {
|
||||
country: obj["country"].as_str().unwrap_or("").to_string(),
|
||||
date: obj["date"].as_str().unwrap_or("").to_string(),
|
||||
time: obj["time"].as_str().unwrap_or("").to_string(),
|
||||
event: obj["event"].as_str().unwrap_or("").to_string(),
|
||||
actual: obj["actual"].as_str().unwrap_or("").to_string(),
|
||||
forecast: obj["forecast"].as_str().unwrap_or("").to_string(),
|
||||
previous: obj["previous"].as_str().unwrap_or("").to_string(),
|
||||
importance: "High".to_string(),
|
||||
description: obj["description"].as_str().unwrap_or("").to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
println!("Extracted {} high-impact events", events.len());
|
||||
Ok(events)
|
||||
}
|
||||
Reference in New Issue
Block a user