getting on website; edit date and importance
This commit is contained in:
305
src/main.rs
Normal file
305
src/main.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
use fantoccini::{ClientBuilder, Locator};
|
||||
use serde_json::{Map, Value};
|
||||
use std::{collections::HashMap, process::Command};
|
||||
use tokio::{time::{Duration, sleep}, signal};
|
||||
use futures::future::join_all;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EconomicEvent {
|
||||
country: String,
|
||||
date: String,
|
||||
time: String,
|
||||
event: String,
|
||||
actual: String,
|
||||
forecast: String,
|
||||
previous: String,
|
||||
importance: String,
|
||||
description: String,
|
||||
}
|
||||
|
||||
fn start_chromedriver(port: u16) -> std::process::Child {
|
||||
Command::new("chromedriver-win64/chromedriver.exe")
|
||||
.args(&[format!("--port={}", port)])
|
||||
.spawn()
|
||||
.expect("Failed to start ChromeDriver")
|
||||
}
|
||||
|
||||
async fn hide_contentpass_overlay(client: &fantoccini::Client) -> anyhow::Result<()> {
|
||||
for _ in 0..20 {
|
||||
let hidden: bool = client.execute(
|
||||
r#"(() => {
|
||||
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
|
||||
if (!iframe) return true;
|
||||
iframe.style.display = 'none';
|
||||
iframe.style.visibility = 'hidden';
|
||||
return false;
|
||||
})()"#,
|
||||
vec![]
|
||||
).await?.as_bool().unwrap_or(false);
|
||||
|
||||
if hidden { break; }
|
||||
tokio::time::sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn scrape_events(client: &fantoccini::Client) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
// Find all rows
|
||||
let rows = client.find_all(Locator::Css(
|
||||
"#TeletraderForm table tbody tr"
|
||||
)).await?;
|
||||
|
||||
let mut events = vec![];
|
||||
let mut empty_count = 0;
|
||||
let mut useful_count = 0;
|
||||
|
||||
let mut i = 0;
|
||||
while i < rows.len() {
|
||||
let cells = rows[i].find_all(Locator::Css("td")).await?;
|
||||
let texts: Vec<String> = join_all(
|
||||
cells.iter().map(|c| async move { c.text().await.unwrap_or_default() })
|
||||
).await;
|
||||
|
||||
let mut description = String::new();
|
||||
// Try to get the description from the next row if it exists
|
||||
if i + 1 < rows.len() {
|
||||
if let Ok(desc_row) = rows[i + 1].find(Locator::Css("td p")).await {
|
||||
description = desc_row.text().await.unwrap_or_default();
|
||||
}
|
||||
}
|
||||
|
||||
let event = EconomicEvent {
|
||||
country: texts.get(2).cloned().unwrap_or_default(),
|
||||
date: "".to_string(),
|
||||
time: texts.get(0).cloned().unwrap_or_default(),
|
||||
event: texts.get(4).cloned().unwrap_or_default(),
|
||||
actual: texts.get(7).cloned().unwrap_or_default(),
|
||||
forecast: texts.get(6).cloned().unwrap_or_default(),
|
||||
previous: texts.get(5).cloned().unwrap_or_default(),
|
||||
importance: texts.get(3).cloned().unwrap_or_default(),
|
||||
description,
|
||||
};
|
||||
|
||||
if event.event.trim().is_empty() && event.country.trim().is_empty() {
|
||||
empty_count += 1;
|
||||
} else {
|
||||
useful_count += 1;
|
||||
}
|
||||
|
||||
events.push(event);
|
||||
i += 2; // skip the description row
|
||||
}
|
||||
|
||||
println!("Total events found: {}", events.len());
|
||||
println!("Useful events: {}", useful_count);
|
||||
println!("Empty events: {}", empty_count);
|
||||
|
||||
for e in events.iter().filter(|ev| !ev.event.trim().is_empty() && !ev.country.trim().is_empty()) {
|
||||
println!("{:?}", e);
|
||||
}
|
||||
|
||||
/*for e in &events {
|
||||
println!("{:#?}", e);
|
||||
}*/
|
||||
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let port = 9515; // pick a port you like
|
||||
let mut chromedriver = start_chromedriver(port);
|
||||
sleep(Duration::from_secs(2)).await; // wait for ChromeDriver to start
|
||||
|
||||
// Chrome options (non-headless so it opens)
|
||||
let caps_value = serde_json::json!({
|
||||
"goog:chromeOptions": {
|
||||
"args": [
|
||||
//"--headless",
|
||||
"--disable-gpu",
|
||||
"--disable-notifications",
|
||||
"--disable-popup-blocking",
|
||||
"--disable-blink-features=AutomationControlled"
|
||||
],
|
||||
"excludeSwitches": ["enable-automation"]
|
||||
}
|
||||
});
|
||||
|
||||
let caps_map: Map<String, Value> = caps_value.as_object()
|
||||
.expect("Capabilities should be a JSON object")
|
||||
.clone();
|
||||
|
||||
let mut client = ClientBuilder::native()
|
||||
.capabilities(caps_map)
|
||||
.connect(&format!("http://localhost:{}", port))
|
||||
.await?;
|
||||
|
||||
// Setup graceful shutdown on Ctrl+C
|
||||
let shutdown_client = client.clone();
|
||||
let shutdown_handle = tokio::spawn(async move {
|
||||
signal::ctrl_c().await.expect("Failed to listen for ctrl+c");
|
||||
println!("\nCtrl+C received, shutting down...");
|
||||
shutdown_client.close().await.ok();
|
||||
chromedriver.kill().ok();
|
||||
std::process::exit(0);
|
||||
});
|
||||
|
||||
// Go to page
|
||||
let url = "https://www.finanzen.net/termine/wirtschaftsdaten/";
|
||||
client.goto(url).await?;
|
||||
|
||||
let _ = client.execute(
|
||||
r#"(() => {
|
||||
const overlay = document.querySelector('iframe[title="Contentpass First Layer"]');
|
||||
if (overlay) {
|
||||
overlay.style.display = "none";
|
||||
overlay.style.visibility = "hidden";
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()"#,
|
||||
vec![]
|
||||
).await;
|
||||
|
||||
for _ in 0..30 {
|
||||
// Check if the iframe exists
|
||||
let overlay_hidden: bool = client.execute(
|
||||
r#"(() => {
|
||||
const iframe = document.querySelector('iframe[title="Contentpass First Layer"]');
|
||||
if (!iframe) return true; // already gone
|
||||
|
||||
// Try clicking button inside iframe via contentWindow
|
||||
try {
|
||||
const btn = iframe.contentWindow.document.querySelector('button');
|
||||
if(btn) btn.click();
|
||||
} catch(e) {}
|
||||
|
||||
// Hide the iframe itself
|
||||
iframe.style.display = 'none';
|
||||
iframe.style.visibility = 'hidden';
|
||||
return false; // still hidden
|
||||
})()"#,
|
||||
vec![]
|
||||
).await.ok()
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
if overlay_hidden {
|
||||
break;
|
||||
}
|
||||
|
||||
sleep(Duration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
client.enter_parent_frame().await.ok();
|
||||
|
||||
// Set start and end dates
|
||||
let start_date = "2024-01-01";
|
||||
let end_date = "2025-01-01";
|
||||
|
||||
let set_dates_script = format!(r#"
|
||||
(() => {{
|
||||
const fromInput = document.querySelector('#dtTeletraderFromDate');
|
||||
const toInput = document.querySelector('#dtTeletraderEndDate');
|
||||
|
||||
if (fromInput) {{
|
||||
fromInput.value = '{}';
|
||||
fromInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
||||
fromInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
}}
|
||||
|
||||
if (toInput) {{
|
||||
toInput.value = '{}';
|
||||
toInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
||||
toInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||
}}
|
||||
|
||||
return !!fromInput && !!toInput;
|
||||
}})()
|
||||
"#, start_date, end_date);
|
||||
|
||||
let dates_set = client.execute(&set_dates_script, vec![])
|
||||
.await
|
||||
.ok()
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false);
|
||||
|
||||
if dates_set {
|
||||
println!("Dates set successfully from {} to {}", start_date, end_date);
|
||||
} else {
|
||||
println!("Failed to set dates");
|
||||
}
|
||||
|
||||
// Hide any reappearing overlay
|
||||
hide_contentpass_overlay(&client).await?;
|
||||
|
||||
// Wait for the tab to appear and click it
|
||||
if let Ok(_) = client.find(Locator::Css(
|
||||
"#TeletraderForm > article.page-content__item.page-content__item--space.margin-bottom-1\\.00.margin-top-1\\.00-md > div.tab-region > nav > div > div > div.tab__item.tab__item--active"
|
||||
)).await {
|
||||
// Example: click "Hohe Relevanz" tab
|
||||
if let Ok(tab) = client.find(Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await {
|
||||
tab.click().await?;
|
||||
}
|
||||
println!("Importance tab clicked");
|
||||
} else {
|
||||
println!("Importance tab not found");
|
||||
}
|
||||
|
||||
// Wait a bit for the table to load
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
|
||||
// Find all table rows
|
||||
let rows = client.find_all(Locator::Css(
|
||||
"#TeletraderForm table.table tbody tr"
|
||||
)).await?;
|
||||
|
||||
println!("Found {} table rows", rows.len());
|
||||
|
||||
// HashMap to store "Termin" -> description
|
||||
let mut termin_map: HashMap<String, String> = HashMap::new();
|
||||
|
||||
let mut i = 0;
|
||||
while i < rows.len() {
|
||||
let row = &rows[i];
|
||||
|
||||
// Extract all cells
|
||||
let cells = row.find_all(Locator::Css("td")).await?;
|
||||
|
||||
if cells.len() >= 5 {
|
||||
// Get Termin column text
|
||||
let termin_text = cells[4].text().await.unwrap_or_default();
|
||||
|
||||
// Check if next row is a hidden description row
|
||||
if i + 1 < rows.len() {
|
||||
let next_row = &rows[i + 1];
|
||||
let class = next_row.attr("class").await.unwrap_or(None).unwrap_or_default();
|
||||
if class.starts_with("table__td teletrader") {
|
||||
// Get the hidden description
|
||||
let desc_cell = next_row.find(Locator::Css("td")).await?;
|
||||
let desc_text = desc_cell.text().await.unwrap_or_default();
|
||||
termin_map.insert(termin_text.clone(), desc_text);
|
||||
i += 1; // skip next row since it's the hidden description
|
||||
} else {
|
||||
termin_map.insert(termin_text.clone(), "".to_string());
|
||||
}
|
||||
} else {
|
||||
termin_map.insert(termin_text.clone(), "".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let events = scrape_events(&client).await?;
|
||||
|
||||
println!("Collected {} Termin entries", termin_map.len());
|
||||
for (k, v) in &termin_map {
|
||||
println!("{:?} => {:?}", k, v);
|
||||
}
|
||||
|
||||
// Wait for Ctrl+C
|
||||
shutdown_handle.await.ok();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user