restricting collected data to important
This commit is contained in:
19360
economic_events.json
19360
economic_events.json
File diff suppressed because it is too large
Load Diff
71
src/main.rs
71
src/main.rs
@@ -45,52 +45,50 @@ async fn dismiss_overlays(client: &fantoccini::Client) -> anyhow::Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) -> anyhow::Result<Vec<EconomicEvent>> {
|
async fn extract_all_data_via_js(client: &fantoccini::Client) -> anyhow::Result<Vec<EconomicEvent>> {
|
||||||
println!("Extracting events with {}+ stars via JavaScript...", min_stars);
|
println!("Extracting ONLY 3-star events via JavaScript...");
|
||||||
|
|
||||||
let extraction_script = format!(r#"
|
let extraction_script = r#"
|
||||||
const events = [];
|
const events = [];
|
||||||
let currentDate = '';
|
let currentDate = '';
|
||||||
const minStars = {};
|
|
||||||
|
|
||||||
const rows = document.querySelectorAll('#TeletraderForm table tbody tr');
|
const rows = document.querySelectorAll('#TeletraderForm table tbody tr');
|
||||||
|
|
||||||
for (let i = 0; i < rows.length; i++) {{
|
for (let i = 0; i < rows.length; i++) {
|
||||||
const row = rows[i];
|
const row = rows[i];
|
||||||
const cells = row.querySelectorAll('td');
|
const cells = row.querySelectorAll('td');
|
||||||
|
|
||||||
if (cells.length === 1 && cells[0].colSpan === 9) {{
|
if (cells.length === 1 && cells[0].colSpan === 9) {
|
||||||
currentDate = cells[0].textContent.trim();
|
currentDate = cells[0].textContent.trim();
|
||||||
continue;
|
continue;
|
||||||
}}
|
}
|
||||||
|
|
||||||
if (cells.length >= 8) {{
|
if (cells.length >= 8) {
|
||||||
const time = cells[0]?.textContent?.trim() || '';
|
const time = cells[0]?.textContent?.trim() || '';
|
||||||
const country = cells[2]?.textContent?.trim() || '';
|
const country = cells[2]?.textContent?.trim() || '';
|
||||||
const eventName = cells[4]?.textContent?.trim() || '';
|
const eventName = cells[4]?.textContent?.trim() || '';
|
||||||
|
|
||||||
if (!time || !country || !eventName) continue;
|
if (!time || !country || !eventName) continue;
|
||||||
|
|
||||||
const importanceHtml = cells[3]?.innerHTML || '';
|
// Count ONLY YELLOW stars (high importance)
|
||||||
const starCount = (importanceHtml.match(/icon--star/g) || []).length;
|
const importanceCell = cells[3];
|
||||||
|
const yellowStarCount = importanceCell?.querySelectorAll('.icon--star.font-color-yellow').length || 0;
|
||||||
|
|
||||||
// Filter by minimum stars
|
// STRICT FILTER: Only include events with EXACTLY 3 YELLOW stars
|
||||||
if (starCount >= minStars) {{
|
if (yellowStarCount === 3) {
|
||||||
let description = '';
|
let description = '';
|
||||||
if (i + 1 < rows.length) {{
|
if (i + 1 < rows.length) {
|
||||||
const nextRow = rows[i + 1];
|
const nextRow = rows[i + 1];
|
||||||
const nextCells = nextRow.querySelectorAll('td');
|
const nextCells = nextRow.querySelectorAll('td');
|
||||||
if (nextCells.length === 1 || nextCells[0].colSpan === 8) {{
|
if (nextCells.length === 1 || nextCells[0].colSpan === 8) {
|
||||||
const descPara = nextRow.querySelector('p');
|
const descPara = nextRow.querySelector('p');
|
||||||
if (descPara) {{
|
if (descPara) {
|
||||||
description = descPara.textContent?.trim() || '';
|
description = descPara.textContent?.trim() || '';
|
||||||
}}
|
}
|
||||||
}}
|
}
|
||||||
}}
|
}
|
||||||
|
|
||||||
const importanceLevel = starCount === 3 ? 'High' : starCount === 2 ? 'Medium' : 'Low';
|
events.push({
|
||||||
|
|
||||||
events.push({{
|
|
||||||
country: country,
|
country: country,
|
||||||
date: currentDate,
|
date: currentDate,
|
||||||
time: time,
|
time: time,
|
||||||
@@ -98,17 +96,17 @@ async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) ->
|
|||||||
actual: cells[7]?.textContent?.trim() || '',
|
actual: cells[7]?.textContent?.trim() || '',
|
||||||
forecast: cells[6]?.textContent?.trim() || '',
|
forecast: cells[6]?.textContent?.trim() || '',
|
||||||
previous: cells[5]?.textContent?.trim() || '',
|
previous: cells[5]?.textContent?.trim() || '',
|
||||||
importance: importanceLevel,
|
importance: 'High',
|
||||||
description: description
|
description: description
|
||||||
}});
|
});
|
||||||
}}
|
}
|
||||||
}}
|
}
|
||||||
}}
|
}
|
||||||
|
|
||||||
return events;
|
return events;
|
||||||
"#, min_stars);
|
"#;
|
||||||
|
|
||||||
let result = client.execute(&extraction_script, vec![]).await?;
|
let result = client.execute(extraction_script, vec![]).await?;
|
||||||
|
|
||||||
// Parse the JSON result into EconomicEvent structs
|
// Parse the JSON result into EconomicEvent structs
|
||||||
if let Some(events_array) = result.as_array() {
|
if let Some(events_array) = result.as_array() {
|
||||||
@@ -129,7 +127,7 @@ async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) ->
|
|||||||
events.push(event);
|
events.push(event);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
println!("Extracted {} events via JavaScript", events.len());
|
println!("Extracted {} events (3 YELLOW stars ONLY) via JavaScript", events.len());
|
||||||
return Ok(events);
|
return Ok(events);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -137,7 +135,7 @@ async fn extract_all_data_via_js(client: &fantoccini::Client, min_stars: u8) ->
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyhow::Result<HashMap<String, String>> {
|
async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyhow::Result<HashMap<String, String>> {
|
||||||
println!("Extracting event descriptions via JavaScript (3-star events only)...");
|
println!("Extracting event descriptions via JavaScript (3 YELLOW stars only)...");
|
||||||
|
|
||||||
let description_script = r#"
|
let description_script = r#"
|
||||||
const descriptions = {};
|
const descriptions = {};
|
||||||
@@ -154,13 +152,14 @@ async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyho
|
|||||||
// Find the corresponding event name by looking for the row above
|
// Find the corresponding event name by looking for the row above
|
||||||
let eventRow = descRow.parentElement.previousElementSibling;
|
let eventRow = descRow.parentElement.previousElementSibling;
|
||||||
if (eventRow) {
|
if (eventRow) {
|
||||||
// Check if this is a 3-star event
|
// Check if this is a 3 YELLOW star event
|
||||||
const importanceCell = eventRow.querySelector('td:nth-child(4)');
|
const importanceCell = eventRow.querySelector('td:nth-child(4)');
|
||||||
if (importanceCell) {
|
if (importanceCell) {
|
||||||
const starCount = (importanceCell.innerHTML.match(/icon--star/g) || []).length;
|
// Count ONLY YELLOW stars
|
||||||
|
const yellowStarCount = importanceCell.querySelectorAll('.icon--star.font-color-yellow').length;
|
||||||
|
|
||||||
// Only process 3-star events
|
// Only process events with 3 YELLOW stars
|
||||||
if (starCount === 3) {
|
if (yellowStarCount === 3) {
|
||||||
const eventCell = eventRow.querySelector('td:nth-child(5)');
|
const eventCell = eventRow.querySelector('td:nth-child(5)');
|
||||||
if (eventCell) {
|
if (eventCell) {
|
||||||
const eventName = eventCell.textContent?.trim() || '';
|
const eventName = eventCell.textContent?.trim() || '';
|
||||||
@@ -188,7 +187,7 @@ async fn extract_event_descriptions_via_js(client: &fantoccini::Client) -> anyho
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("Extracted {} event descriptions (3-star only)", event_type_map.len());
|
println!("Extracted {} event descriptions (3 YELLOW stars only)", event_type_map.len());
|
||||||
Ok(event_type_map)
|
Ok(event_type_map)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -531,7 +530,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract using JavaScript
|
// Extract using JavaScript
|
||||||
let events = extract_all_data_via_js(&client, 3).await?;
|
let events = extract_all_data_via_js(&client).await?;
|
||||||
|
|
||||||
// Extract descriptions using JavaScript
|
// Extract descriptions using JavaScript
|
||||||
let event_type_map = extract_event_descriptions_via_js(&client).await?;
|
let event_type_map = extract_event_descriptions_via_js(&client).await?;
|
||||||
|
|||||||
Reference in New Issue
Block a user