restricting collected data to important
This commit is contained in:
71
src/main.rs
71
src/main.rs
@@ -45,52 +45,50 @@ async fn dismiss_overlays(client: &fantoccini::Client) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
println!("Extracting events with {}+ stars via JavaScript...", min_stars);
|
||||
async fn extract_all_data_via_js(client: &fantoccini::Client) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||
println!("Extracting ONLY 3-star events via JavaScript...");
|
||||
|
||||
let extraction_script = format!(r#"
|
||||
let extraction_script = r#"
|
||||
const events = [];
|
||||
let currentDate = '';
|
||||
const minStars = {};
|
||||
|
||||
const rows = document.querySelectorAll('#TeletraderForm table tbody tr');
|
||||
|
||||
for (let i = 0; i < rows.length; i++) {{
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
const cells = row.querySelectorAll('td');
|
||||
|
||||
if (cells.length === 1 && cells[0].colSpan === 9) {{
|
||||
if (cells.length === 1 && cells[0].colSpan === 9) {
|
||||
currentDate = cells[0].textContent.trim();
|
||||
continue;
|
||||
}}
|
||||
}
|
||||
|
||||
if (cells.length >= 8) {{
|
||||
if (cells.length >= 8) {
|
||||
const time = cells[0]?.textContent?.trim() || '';
|
||||
const country = cells[2]?.textContent?.trim() || '';
|
||||
const eventName = cells[4]?.textContent?.trim() || '';
|
||||
|
||||
if (!time || !country || !eventName) continue;
|
||||
|
||||
const importanceHtml = cells[3]?.innerHTML || '';
|
||||
const starCount = (importanceHtml.match(/icon--star/g) || []).length;
|
||||
// Count ONLY YELLOW stars (high importance)
|
||||
const importanceCell = cells[3];
|
||||
const yellowStarCount = importanceCell?.querySelectorAll('.icon--star.font-color-yellow').length || 0;
|
||||
|
||||
// Filter by minimum stars
|
||||
if (starCount >= minStars) {{
|
||||
// STRICT FILTER: Only include events with EXACTLY 3 YELLOW stars
|
||||
if (yellowStarCount === 3) {
|
||||
let description = '';
|
||||
if (i + 1 < rows.length) {{
|
||||
if (i + 1 < rows.length) {
|
||||
const nextRow = rows[i + 1];
|
||||
const nextCells = nextRow.querySelectorAll('td');
|
||||
if (nextCells.length === 1 || nextCells[0].colSpan === 8) {{
|
||||
if (nextCells.length === 1 || nextCells[0].colSpan === 8) {
|
||||
const descPara = nextRow.querySelector('p');
|
||||
if (descPara) {{
|
||||
if (descPara) {
|
||||
description = descPara.textContent?.trim() || '';
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const importanceLevel = starCount === 3 ? 'High' : starCount === 2 ? 'Medium' : 'Low';
|
||||
|
||||
events.push({{
|
||||
events.push({
|
||||
country: country,
|
||||
date: currentDate,
|
||||
time: time,
|
||||
@@ -98,17 +96,17 @@ async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) ->
|
||||
actual: cells[7]?.textContent?.trim() || '',
|
||||
forecast: cells[6]?.textContent?.trim() || '',
|
||||
previous: cells[5]?.textContent?.trim() || '',
|
||||
importance: importanceLevel,
|
||||
importance: 'High',
|
||||
description: description
|
||||
}});
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return events;
|
||||
"#, min_stars);
|
||||
"#;
|
||||
|
||||
let result = client.execute(&extraction_script, vec![]).await?;
|
||||
let result = client.execute(extraction_script, vec![]).await?;
|
||||
|
||||
// Parse the JSON result into EconomicEvent structs
|
||||
if let Some(events_array) = result.as_array() {
|
||||
@@ -129,7 +127,7 @@ async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) ->
|
||||
events.push(event);
|
||||
}
|
||||
}
|
||||
println!("Extracted {} events via JavaScript", events.len());
|
||||
println!("Extracted {} events (3 YELLOW stars ONLY) via JavaScript", events.len());
|
||||
return Ok(events);
|
||||
}
|
||||
|
||||
@@ -137,7 +135,7 @@ async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) ->
|
||||
}
|
||||
|
||||
async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyhow::Result<HashMap<String, String>> {
|
||||
println!("Extracting event descriptions via JavaScript (3-star events only)...");
|
||||
println!("Extracting event descriptions via JavaScript (3 YELLOW stars only)...");
|
||||
|
||||
let description_script = r#"
|
||||
const descriptions = {};
|
||||
@@ -154,13 +152,14 @@ async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyho
|
||||
// Find the corresponding event name by looking for the row above
|
||||
let eventRow = descRow.parentElement.previousElementSibling;
|
||||
if (eventRow) {
|
||||
// Check if this is a 3-star event
|
||||
// Check if this is a 3 YELLOW star event
|
||||
const importanceCell = eventRow.querySelector('td:nth-child(4)');
|
||||
if (importanceCell) {
|
||||
const starCount = (importanceCell.innerHTML.match(/icon--star/g) || []).length;
|
||||
// Count ONLY YELLOW stars
|
||||
const yellowStarCount = importanceCell.querySelectorAll('.icon--star.font-color-yellow').length;
|
||||
|
||||
// Only process 3-star events
|
||||
if (starCount === 3) {
|
||||
// Only process events with 3 YELLOW stars
|
||||
if (yellowStarCount === 3) {
|
||||
const eventCell = eventRow.querySelector('td:nth-child(5)');
|
||||
if (eventCell) {
|
||||
const eventName = eventCell.textContent?.trim() || '';
|
||||
@@ -188,7 +187,7 @@ async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyho
|
||||
}
|
||||
}
|
||||
|
||||
println!("Extracted {} event descriptions (3-star only)", event_type_map.len());
|
||||
println!("Extracted {} event descriptions (3 YELLOW stars only)", event_type_map.len());
|
||||
Ok(event_type_map)
|
||||
}
|
||||
|
||||
@@ -531,7 +530,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
// Extract using JavaScript
|
||||
let events = extract_all_data_via_js(&client, 3).await?;
|
||||
let events = extract_all_data_via_js(&client).await?;
|
||||
|
||||
// Extract descriptions using JavaScript
|
||||
let event_type_map = extract_event_descriptions_via_js(&client).await?;
|
||||
|
||||
Reference in New Issue
Block a user