moved functions for date parsing together
This commit is contained in:
507
src/main.rs
507
src/main.rs
@@ -1,8 +1,8 @@
|
|||||||
use chrono::{NaiveDate, Duration as ChronoDuration};
|
use chrono::{NaiveDate};
|
||||||
use fantoccini::{ClientBuilder, Locator};
|
use fantoccini::{ClientBuilder, Locator};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
use std::{collections::HashMap, process::Command};
|
use std::{process::Command};
|
||||||
use tokio::{time::{Duration, sleep}, signal};
|
use tokio::{time::{Duration, sleep}, signal};
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
#[derive(Debug, Serialize, Clone)]
|
||||||
@@ -183,141 +183,201 @@ async fn extract_all_data_via_js(client: &fantoccini::Client) -> anyhow::Result<
|
|||||||
Ok(vec![])
|
Ok(vec![])
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn check_data_consistency(events: &[EconomicEvent]) {
|
async fn set_date_range(client: &fantoccini::Client, start: &str, end: &str) -> anyhow::Result<()> {
|
||||||
println!("\n=== DATA CONSISTENCY CHECKS ===");
|
let set_dates_script = format!(r#"
|
||||||
|
(() => {{
|
||||||
// Count event name occurrences
|
const fromInput = document.querySelector('#dtTeletraderFromDate');
|
||||||
let mut event_names: HashMap<String, usize> = HashMap::new();
|
const toInput = document.querySelector('#dtTeletraderEndDate');
|
||||||
for event in events {
|
|
||||||
*event_names.entry(event.event.clone()).or_insert(0) += 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Detect duplicates
|
if (fromInput) {{
|
||||||
let duplicates: Vec<_> = event_names
|
fromInput.value = '{}';
|
||||||
.iter()
|
fromInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
||||||
.filter(|(_, count)| **count > 1)
|
fromInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||||
.collect();
|
}}
|
||||||
|
|
||||||
if !duplicates.is_empty() {
|
if (toInput) {{
|
||||||
println!("⚠️ Found {} duplicate event names:", duplicates.len());
|
toInput.value = '{}';
|
||||||
for (name, count) in duplicates.iter().take(5) {
|
toInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
||||||
println!(" - '{}' appears {} times", name, count);
|
toInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
||||||
}
|
}}
|
||||||
|
|
||||||
|
return !!fromInput && !!toInput;
|
||||||
|
}})()
|
||||||
|
"#, start, end);
|
||||||
|
|
||||||
|
client.execute(&set_dates_script, vec![]).await?;
|
||||||
|
sleep(Duration::from_millis(1000)).await; // Wait for table to update
|
||||||
|
|
||||||
|
// Now read the values
|
||||||
|
let from_date_value: String = client.execute(
|
||||||
|
r#"return document.querySelector('#dtTeletraderFromDate')?.value;"#,
|
||||||
|
vec![],
|
||||||
|
).await?.as_str().unwrap_or_default().to_string();
|
||||||
|
|
||||||
|
let to_date_value: String = client.execute(
|
||||||
|
r#"return document.querySelector('#dtTeletraderEndDate')?.value;"#,
|
||||||
|
vec![],
|
||||||
|
).await?.as_str().unwrap_or_default().to_string();
|
||||||
|
|
||||||
|
if from_date_value == start && to_date_value == end {
|
||||||
|
println!(" Dates set correctly");
|
||||||
} else {
|
} else {
|
||||||
println!("✅ No duplicate event names found");
|
println!(" ❌ Date not set correctly. Expected: {}-{}, Got: {}-{}",
|
||||||
}
|
start, end, from_date_value, to_date_value);
|
||||||
|
|
||||||
// Check time format consistency
|
|
||||||
let valid_time_format = events.iter()
|
|
||||||
.filter(|e| {
|
|
||||||
// Time should be in format "HH:MM"
|
|
||||||
e.time.len() == 5 &&
|
|
||||||
e.time.chars().nth(2) == Some(':') &&
|
|
||||||
e.time[0..2].chars().all(|c| c.is_ascii_digit()) &&
|
|
||||||
e.time[3..5].chars().all(|c| c.is_ascii_digit())
|
|
||||||
})
|
|
||||||
.count();
|
|
||||||
|
|
||||||
println!("⏰ Valid time formats: {}/{}", valid_time_format, events.len());
|
|
||||||
|
|
||||||
// Check for missing critical data
|
|
||||||
let critical_fields_missing: Vec<_> = events.iter()
|
|
||||||
.enumerate()
|
|
||||||
.filter(|(_, e)| e.event.trim().is_empty() || e.time.trim().is_empty())
|
|
||||||
.map(|(i, e)| (i, e))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if !critical_fields_missing.is_empty() {
|
|
||||||
println!("❌ {} events missing critical fields", critical_fields_missing.len());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn validate_events(events: &[EconomicEvent]) -> anyhow::Result<()> {
|
|
||||||
println!("\n=== EVENT VALIDATION ===");
|
|
||||||
|
|
||||||
// Check if we have any events at all
|
|
||||||
if events.is_empty() {
|
|
||||||
println!("❌ ERROR: No events extracted!");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("📊 Total events: {}", events.len());
|
|
||||||
|
|
||||||
// 1. Check date range compliance
|
|
||||||
let date_range_events: Vec<_> = events.iter()
|
|
||||||
.filter(|e| {
|
|
||||||
// Extract year from German date format "Dienstag, 2. Januar 2024"
|
|
||||||
e.date.contains("2024") || e.date.contains("2025")
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
println!("📅 Events in 2024-2025 range: {}/{}",
|
|
||||||
date_range_events.len(), events.len());
|
|
||||||
|
|
||||||
// 2. Check importance filtering
|
|
||||||
let high_importance_count = events.iter()
|
|
||||||
.filter(|e| e.importance == "High")
|
|
||||||
.count();
|
|
||||||
println!("⭐ High importance events: {}/{}", high_importance_count, events.len());
|
|
||||||
|
|
||||||
// 3. Check data completeness
|
|
||||||
let complete_events = events.iter()
|
|
||||||
.filter(|e| {
|
|
||||||
!e.event.trim().is_empty() &&
|
|
||||||
!e.time.trim().is_empty() &&
|
|
||||||
!e.country.trim().is_empty() &&
|
|
||||||
(!e.actual.trim().is_empty() || !e.forecast.trim().is_empty() || !e.previous.trim().is_empty())
|
|
||||||
})
|
|
||||||
.count();
|
|
||||||
|
|
||||||
println!("✅ Complete events: {}/{}", complete_events, events.len());
|
|
||||||
|
|
||||||
// 4. Check description coverage
|
|
||||||
let events_with_descriptions = events.iter()
|
|
||||||
.filter(|e| !e.description.trim().is_empty())
|
|
||||||
.count();
|
|
||||||
println!("📝 Events with descriptions: {}/{}", events_with_descriptions, events.len());
|
|
||||||
|
|
||||||
// 5. Distribution analysis
|
|
||||||
use std::collections::HashMap;
|
|
||||||
let mut country_distribution: HashMap<String, usize> = HashMap::new();
|
|
||||||
let mut month_distribution: HashMap<String, usize> = HashMap::new();
|
|
||||||
|
|
||||||
for event in events {
|
|
||||||
*country_distribution.entry(event.country.clone()).or_insert(0) += 1;
|
|
||||||
|
|
||||||
// Extract month from German date
|
|
||||||
if let Some(month) = extract_month(&event.date) {
|
|
||||||
*month_distribution.entry(month).or_insert(0) += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
println!("🌍 Country distribution: {:?}", country_distribution);
|
|
||||||
println!("📈 Month distribution: {:?}", month_distribution);
|
|
||||||
|
|
||||||
// 6. Sample output for manual inspection
|
|
||||||
println!("\n🔍 Sample events (first 5):");
|
|
||||||
for event in events.iter().take(5) {
|
|
||||||
println!(" • {} {}: {} - {} (Importance: {})",
|
|
||||||
event.date, event.time, event.country, event.event, event.importance);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_month(date_str: &str) -> Option<String> {
|
fn parse_any_date(date: &str) -> Option<NaiveDate> {
|
||||||
// Extract month from German date format
|
// Attempt ISO first
|
||||||
let months = [
|
if let Ok(d) = NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d") {
|
||||||
"Januar", "Februar", "März", "April", "Mai", "Juni",
|
return Some(d);
|
||||||
"Juli", "August", "September", "Oktober", "November", "Dezember"
|
}
|
||||||
|
|
||||||
|
// Convert German → English once
|
||||||
|
let month_map = [
|
||||||
|
("Januar", "January"), ("Februar", "February"), ("März", "March"),
|
||||||
|
("April", "April"), ("Mai", "May"), ("Juni", "June"),
|
||||||
|
("Juli", "July"), ("August", "August"), ("September", "September"),
|
||||||
|
("Oktober", "October"), ("November", "November"), ("Dezember", "December"),
|
||||||
];
|
];
|
||||||
|
|
||||||
for month in months {
|
let mut english = date.to_string();
|
||||||
if date_str.contains(month) {
|
for (de, en) in month_map {
|
||||||
return Some(month.to_string());
|
english = english.replace(de, en);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try two formats max
|
||||||
|
NaiveDate::parse_from_str(&english, "%A, %d. %B %Y")
|
||||||
|
.or_else(|_| NaiveDate::parse_from_str(&english, "%d. %B %Y"))
|
||||||
|
.ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn calculate_next_start_date(events: &[EconomicEvent]) -> anyhow::Result<String> {
|
||||||
|
let mut dates: Vec<_> = events
|
||||||
|
.iter()
|
||||||
|
.filter_map(|e| parse_any_date(&e.date))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if dates.is_empty() {
|
||||||
|
return Err(anyhow::anyhow!("No parseable dates found"));
|
||||||
|
}
|
||||||
|
|
||||||
|
dates.sort();
|
||||||
|
let next = dates.last().unwrap().succ_opt().unwrap();
|
||||||
|
|
||||||
|
Ok(next.format("%Y-%m-%d").to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_month(date: &str) -> Option<String> {
|
||||||
|
parse_any_date(date).map(|d| d.format("%B").to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn count_valid_times(events: &[EconomicEvent]) -> usize {
|
||||||
|
events.iter().filter(|e| {
|
||||||
|
e.time.len() == 5 &&
|
||||||
|
e.time.as_bytes()[2] == b':' &&
|
||||||
|
e.time[..2].chars().all(|c| c.is_ascii_digit()) &&
|
||||||
|
e.time[3..].chars().all(|c| c.is_ascii_digit())
|
||||||
|
}).count()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn missing_critical(e: &EconomicEvent) -> bool {
|
||||||
|
e.event.trim().is_empty() || e.time.trim().is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_complete(e: &EconomicEvent) -> bool {
|
||||||
|
!(e.event.trim().is_empty() ||
|
||||||
|
e.time.trim().is_empty() ||
|
||||||
|
e.country.trim().is_empty()) &&
|
||||||
|
(!e.actual.trim().is_empty() ||
|
||||||
|
!e.forecast.trim().is_empty() ||
|
||||||
|
!e.previous.trim().is_empty())
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn check_data_consistency(events: &[EconomicEvent]) {
|
||||||
|
println!("\n=== DATA CONSISTENCY CHECKS ===");
|
||||||
|
|
||||||
|
println!("⏰ Valid time formats: {}/{}", count_valid_times(events), events.len());
|
||||||
|
|
||||||
|
let missing: Vec<_> = events.iter().enumerate()
|
||||||
|
.filter(|(_, e)| missing_critical(e))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if !missing.is_empty() {
|
||||||
|
println!("❌ {} events missing critical fields", missing.len());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn validate_events(events: &[EconomicEvent]) -> anyhow::Result<()> {
|
||||||
|
println!("\n=== EVENT VALIDATION ===");
|
||||||
|
|
||||||
|
if events.is_empty() {
|
||||||
|
println!("❌ ERROR: No events extracted!");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("📊 Total events: {}", events.len());
|
||||||
|
|
||||||
|
// 1. Description coverage
|
||||||
|
let desc_count = events.iter()
|
||||||
|
.filter(|e| !e.description.trim().is_empty())
|
||||||
|
.count();
|
||||||
|
|
||||||
|
println!("📝 Events with descriptions: {}/{}", desc_count, events.len());
|
||||||
|
|
||||||
|
// 2. Distributions
|
||||||
|
use std::collections::HashMap;
|
||||||
|
let mut country_dist: HashMap<String, usize> = HashMap::new();
|
||||||
|
let mut month_dist: HashMap<String, usize> = HashMap::new();
|
||||||
|
|
||||||
|
for e in events {
|
||||||
|
*country_dist.entry(e.country.clone()).or_insert(0) += 1;
|
||||||
|
|
||||||
|
if let Some(month) = extract_month(&e.date) {
|
||||||
|
*month_dist.entry(month).or_insert(0) += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
|
||||||
|
println!("🌍 Country distribution: {:?}", country_dist);
|
||||||
|
println!("📈 Month distribution: {:?}", month_dist);
|
||||||
|
|
||||||
|
// 3. Sample events (first 5)
|
||||||
|
println!("\n🔍 Sample events (first 5):");
|
||||||
|
for event in events.iter().take(5) {
|
||||||
|
println!(
|
||||||
|
" • {} {}: {} - {} (Importance: {})",
|
||||||
|
event.date, event.time, event.country, event.event, event.importance
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Completeness check
|
||||||
|
let complete_count = events.iter().filter(|e| is_complete(e)).count();
|
||||||
|
println!(
|
||||||
|
"✅ Complete events: {}/{}",
|
||||||
|
complete_count,
|
||||||
|
events.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
// 5. Date range
|
||||||
|
let (earliest, latest) = calculate_actual_date_range(events);
|
||||||
|
println!("📅 Actual date range: {} to {}", earliest, latest);
|
||||||
|
|
||||||
|
// Final summary
|
||||||
|
println!("\n=== VALIDATION SUMMARY ===");
|
||||||
|
println!(" • Total events: {}", events.len());
|
||||||
|
println!(
|
||||||
|
" • Events with descriptions [%]: {}",
|
||||||
|
(desc_count * 100) / events.len().max(1)
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
" • Complete events [%]: {}",
|
||||||
|
(complete_count * 100) / events.len().max(1)
|
||||||
|
);
|
||||||
|
println!(" • Date range: {} to {}", earliest, latest);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_actual_date_range(events: &[EconomicEvent]) -> (String, String) {
|
fn calculate_actual_date_range(events: &[EconomicEvent]) -> (String, String) {
|
||||||
@@ -369,71 +429,6 @@ fn extract_date_from_german_format(german_date: &str) -> Option<NaiveDate> {
|
|||||||
NaiveDate::parse_from_str(&english_date, "%A, %d. %B %Y").ok()
|
NaiveDate::parse_from_str(&english_date, "%A, %d. %B %Y").ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_german_date(german_date: &str) -> Option<NaiveDate> {
|
|
||||||
if german_date.trim().is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Map German month names to numbers
|
|
||||||
let month_map = [
|
|
||||||
("Januar", 1), ("Februar", 2), ("März", 3), ("April", 4),
|
|
||||||
("Mai", 5), ("Juni", 6), ("Juli", 7), ("August", 8),
|
|
||||||
("September", 9), ("Oktober", 10), ("November", 11), ("Dezember", 12)
|
|
||||||
];
|
|
||||||
|
|
||||||
// Parse German format: "Montag, 30. April 2007"
|
|
||||||
let pattern = r"(\d{1,2})\.\s+([a-zA-Zäöüß]+)\s+(\d{4})";
|
|
||||||
let re = regex::Regex::new(pattern).unwrap();
|
|
||||||
|
|
||||||
if let Some(caps) = re.captures(german_date) {
|
|
||||||
let day = caps.get(1).unwrap().as_str().parse::<u32>().ok()?;
|
|
||||||
let german_month = caps.get(2).unwrap().as_str();
|
|
||||||
let year = caps.get(3).unwrap().as_str().parse::<i32>().ok()?;
|
|
||||||
|
|
||||||
// Find the month number
|
|
||||||
let month = month_map.iter()
|
|
||||||
.find(|(name, _)| *name == german_month)
|
|
||||||
.map(|(_, num)| *num)?;
|
|
||||||
|
|
||||||
NaiveDate::from_ymd_opt(year, month, day)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn calculate_next_start_date(events: &[EconomicEvent]) -> Result<String, anyhow::Error> {
|
|
||||||
// Try to find dates in ISO format first
|
|
||||||
let iso_dates: Vec<NaiveDate> = events
|
|
||||||
.iter()
|
|
||||||
.filter_map(|e| NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if !iso_dates.is_empty() {
|
|
||||||
if let Some(latest) = iso_dates.iter().max() {
|
|
||||||
let next_date = (*latest + ChronoDuration::days(1)).format("%Y-%m-%d").to_string();
|
|
||||||
println!("📅 Calculated next start date from ISO: {} (from latest: {})", next_date, latest);
|
|
||||||
return Ok(next_date);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: parse German dates
|
|
||||||
println!("⚠️ No ISO dates found, trying to parse German dates...");
|
|
||||||
let german_dates: Vec<NaiveDate> = events
|
|
||||||
.iter()
|
|
||||||
.filter_map(|e| parse_german_date(&e.date))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if let Some(latest) = german_dates.iter().max() {
|
|
||||||
let next_date = (*latest + ChronoDuration::days(1)).format("%Y-%m-%d").to_string();
|
|
||||||
println!("📅 Calculated next start date from German: {} (from latest: {})", next_date, latest);
|
|
||||||
Ok(next_date)
|
|
||||||
} else {
|
|
||||||
// Final fallback: use manual date increment
|
|
||||||
println!("❌ No parseable dates found, using manual increment");
|
|
||||||
Err(anyhow::anyhow!("No parseable dates found"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn scrape_all_events_with_chunking(
|
async fn scrape_all_events_with_chunking(
|
||||||
client: &fantoccini::Client,
|
client: &fantoccini::Client,
|
||||||
start_date: &str,
|
start_date: &str,
|
||||||
@@ -452,62 +447,41 @@ async fn scrape_all_events_with_chunking(
|
|||||||
println!("⚠️ Reached maximum attempts ({})", max_attempts);
|
println!("⚠️ Reached maximum attempts ({})", max_attempts);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("🚀 Chunk {}: {} to {}", attempts, current_start, end_date);
|
println!("🚀 Chunk {}: {} to {}", attempts, current_start, end_date);
|
||||||
|
|
||||||
// Set dates for current chunk
|
|
||||||
set_date_range(client, ¤t_start, end_date).await?;
|
set_date_range(client, ¤t_start, end_date).await?;
|
||||||
|
|
||||||
// Wait a bit longer for table to load
|
|
||||||
sleep(Duration::from_secs(3)).await;
|
sleep(Duration::from_secs(3)).await;
|
||||||
|
|
||||||
// Extract events
|
let chunk = extract_all_data_via_js(client).await?;
|
||||||
let chunk_events = extract_all_data_via_js(client).await?;
|
if chunk.is_empty() {
|
||||||
|
|
||||||
if chunk_events.is_empty() {
|
|
||||||
println!("✅ No more events found. Completed!");
|
println!("✅ No more events found. Completed!");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add to total
|
|
||||||
let chunk_count = chunk_events.len();
|
|
||||||
all_events.extend(chunk_events.clone());
|
|
||||||
|
|
||||||
println!("📊 Chunk {}: {} events (Total: {})",
|
|
||||||
attempts, chunk_count, all_events.len());
|
|
||||||
|
|
||||||
// Debug: check what dates we got
|
|
||||||
let sample_dates: Vec<&str> = chunk_events.iter()
|
|
||||||
.map(|e| e.date.as_str())
|
|
||||||
.filter(|d| !d.is_empty())
|
|
||||||
.take(3)
|
|
||||||
.collect();
|
|
||||||
println!(" Sample dates in chunk: {:?}", sample_dates);
|
|
||||||
|
|
||||||
// Calculate next start date
|
println!("📊 Chunk {}: {} events (Total: {})",
|
||||||
match calculate_next_start_date(&chunk_events) {
|
attempts, chunk.len(), all_events.len() + chunk.len());
|
||||||
Ok(next_start) => {
|
|
||||||
if next_start > end_date.to_string() {
|
all_events.extend(chunk.clone());
|
||||||
println!("✅ Reached end date. Completed!");
|
|
||||||
break;
|
let next = match calculate_next_start_date(&chunk) {
|
||||||
}
|
Ok(n) => n,
|
||||||
current_start = next_start;
|
|
||||||
}
|
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
println!("❌ Could not calculate next start date. Stopping.");
|
println!("❌ Could not calculate next start date. Stopping.");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if next > end_date.to_string() {
|
||||||
|
println!("✅ Reached end date. Completed!");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Small delay between requests
|
current_start = next;
|
||||||
|
|
||||||
sleep(Duration::from_secs(2)).await;
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
|
||||||
// Export chunk
|
export_chunk(&chunk, attempts, json_export_now.to_string().clone()).await?;
|
||||||
if let Ok(json) = serde_json::to_string_pretty(&chunk_events) {
|
|
||||||
let filename = format!("economic_events_{}_chunk_{}.json", json_export_now, attempts);
|
|
||||||
tokio::fs::write(&filename, json).await?;
|
|
||||||
println!(" Chunk data exported to: {}", filename);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Remove duplicates
|
// Remove duplicates
|
||||||
@@ -527,49 +501,11 @@ async fn scrape_all_events_with_chunking(
|
|||||||
Ok(all_events)
|
Ok(all_events)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn set_date_range(client: &fantoccini::Client, start: &str, end: &str) -> anyhow::Result<()> {
|
async fn export_chunk(chunk: &[EconomicEvent], n: usize, ts: String) -> anyhow::Result<()> {
|
||||||
let set_dates_script = format!(r#"
|
let filename = format!("economic_events_{}_chunk_{}.json", ts, n);
|
||||||
(() => {{
|
let json = serde_json::to_string_pretty(chunk)?;
|
||||||
const fromInput = document.querySelector('#dtTeletraderFromDate');
|
tokio::fs::write(&filename, json).await?;
|
||||||
const toInput = document.querySelector('#dtTeletraderEndDate');
|
println!(" Chunk data exported to: {}", filename);
|
||||||
|
|
||||||
if (fromInput) {{
|
|
||||||
fromInput.value = '{}';
|
|
||||||
fromInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
|
||||||
fromInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
|
||||||
}}
|
|
||||||
|
|
||||||
if (toInput) {{
|
|
||||||
toInput.value = '{}';
|
|
||||||
toInput.dispatchEvent(new Event('input', {{ bubbles: true }}));
|
|
||||||
toInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
|
|
||||||
}}
|
|
||||||
|
|
||||||
return !!fromInput && !!toInput;
|
|
||||||
}})()
|
|
||||||
"#, start, end);
|
|
||||||
|
|
||||||
client.execute(&set_dates_script, vec![]).await?;
|
|
||||||
sleep(Duration::from_millis(1000)).await; // Wait for table to update
|
|
||||||
|
|
||||||
// Now read the values
|
|
||||||
let from_date_value: String = client.execute(
|
|
||||||
r#"return document.querySelector('#dtTeletraderFromDate')?.value;"#,
|
|
||||||
vec![],
|
|
||||||
).await?.as_str().unwrap_or_default().to_string();
|
|
||||||
|
|
||||||
let to_date_value: String = client.execute(
|
|
||||||
r#"return document.querySelector('#dtTeletraderEndDate')?.value;"#,
|
|
||||||
vec![],
|
|
||||||
).await?.as_str().unwrap_or_default().to_string();
|
|
||||||
|
|
||||||
if from_date_value == start && to_date_value == end {
|
|
||||||
println!(" Dates set correctly");
|
|
||||||
} else {
|
|
||||||
println!(" ❌ Date not set correctly. Expected: {}-{}, Got: {}-{}",
|
|
||||||
start, end, from_date_value, to_date_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -634,25 +570,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
validate_events(&events).await?;
|
validate_events(&events).await?;
|
||||||
check_data_consistency(&events).await;
|
check_data_consistency(&events).await;
|
||||||
|
|
||||||
// Calculate actual date range from extracted data
|
|
||||||
let actual_date_range = calculate_actual_date_range(&events);
|
|
||||||
let current_date = chrono::Local::now().format("%Y-%m-%d").to_string();
|
|
||||||
|
|
||||||
// Final summary
|
|
||||||
println!("\n🎯 EXTRACTION SUMMARY:");
|
|
||||||
println!(" • Total high-importance events: {}", events.len());
|
|
||||||
println!(" • Requested range: 2007-02-13 to 2025-12-01");
|
|
||||||
println!(" • Actual extracted range: {} to {}", actual_date_range.0, actual_date_range.1);
|
|
||||||
println!(" • Data extracted until: {}", current_date);
|
|
||||||
println!(" • Data quality: {}% complete",
|
|
||||||
(events.iter().filter(|e| !e.event.trim().is_empty()).count() * 100) / events.len().max(1));
|
|
||||||
|
|
||||||
// Check coverage
|
|
||||||
if actual_date_range.1 < "2025-12-01".to_string() {
|
|
||||||
println!("⚠️ WARNING: Did not reach end date. Last extracted date: {}", actual_date_range.1);
|
|
||||||
println!(" • Next run should start from: {}", calculate_next_start_date(&events).unwrap_or_else(|_| actual_date_range.1));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Export for further analysis
|
// Export for further analysis
|
||||||
if let Ok(json) = serde_json::to_string_pretty(&events) {
|
if let Ok(json) = serde_json::to_string_pretty(&events) {
|
||||||
let filename = format!("economic_events_{}_combined.json", chrono::Local::now().format("%Y%m%d_%H%M%S"));
|
let filename = format!("economic_events_{}_combined.json", chrono::Local::now().format("%Y%m%d_%H%M%S"));
|
||||||
|
|||||||
Reference in New Issue
Block a user