added creating CompanyInfo mapping
This commit is contained in:
@@ -1,18 +1,17 @@
|
||||
use crate::corporate::openfigi::OpenFigiClient;
|
||||
|
||||
// src/corporate/scraper.rs
|
||||
use super::{types::*, helpers::*};
|
||||
use csv::ReaderBuilder;
|
||||
use super::{types::*, helpers::*, openfigi::*};
|
||||
//use crate::corporate::openfigi::OpenFigiClient;
|
||||
use crate::{scraper::webdriver::*};
|
||||
use fantoccini::{Client, Locator};
|
||||
use scraper::{Html, Selector};
|
||||
use chrono::{DateTime, Duration, NaiveDate, Timelike, Utc};
|
||||
use chrono::{DateTime, Duration, NaiveDate, Utc};
|
||||
use tokio::{time::{Duration as TokioDuration, sleep}};
|
||||
use reqwest::Client as HttpClient;
|
||||
use serde_json::{json, Value};
|
||||
use zip::ZipArchive;
|
||||
use std::fs::File;
|
||||
use std::{collections::HashMap};
|
||||
use std::io::{Read, BufReader};
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
use std::io::{Read};
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
||||
|
||||
@@ -333,28 +332,6 @@ fn exchange_name_to_mic(name: &str) -> String {
|
||||
}.to_string()
|
||||
}
|
||||
|
||||
pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
|
||||
let script = r#"
|
||||
(() => {
|
||||
const agree = document.querySelector('button[name="agree"]');
|
||||
if (agree) {
|
||||
agree.click();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()
|
||||
"#;
|
||||
|
||||
for _ in 0..10 {
|
||||
let done: bool = client.execute(script, vec![]).await?.as_bool().unwrap_or(false);
|
||||
if done {
|
||||
break;
|
||||
}
|
||||
sleep(TokioDuration::from_millis(500)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
|
||||
///
|
||||
/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
|
||||
@@ -368,74 +345,137 @@ pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the task execution fails, e.g., chromedriver spawn or navigation issues.
|
||||
pub async fn get_earnings_events_task(ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
pub async fn fetch_earnings_with_pool(
|
||||
ticker: &str,
|
||||
pool: &Arc<ChromeDriverPool>,
|
||||
) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
let ticker = ticker.to_string();
|
||||
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
|
||||
let task: ScrapeTask<Vec<CompanyEvent>> = ScrapeTask::new(
|
||||
url,
|
||||
|client| Box::pin(async move {
|
||||
reject_yahoo_cookies(client).await?;
|
||||
extract_earnings(client).await // Assuming extract_earnings is an async fn that uses client
|
||||
}),
|
||||
);
|
||||
task.execute().await
|
||||
|
||||
let ticker_cloned = ticker.clone();
|
||||
|
||||
pool.execute(url, move |client| {
|
||||
let ticker = ticker_cloned.clone();
|
||||
Box::pin(async move {
|
||||
reject_yahoo_cookies(&client).await?;
|
||||
extract_earnings_events(&client, &ticker).await
|
||||
})
|
||||
}).await
|
||||
}
|
||||
|
||||
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}&offset=0&size=100", ticker);
|
||||
client.goto(&url).await?;
|
||||
dismiss_yahoo_consent(client).await?;
|
||||
/// Extracts earnings events from the currently loaded Yahoo Finance earnings calendar page.
|
||||
///
|
||||
/// This function assumes the client is already navigated to the correct URL (e.g.,
|
||||
/// https://finance.yahoo.com/calendar/earnings?symbol={ticker}) and cookies are handled.
|
||||
///
|
||||
/// It waits for the earnings table, extracts rows, parses cells into CompanyEvent structs,
|
||||
/// and handles date parsing, float parsing, and optional fields.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `client` - The fantoccini Client with the page loaded.
|
||||
/// * `ticker` - The stock ticker symbol for the events.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of CompanyEvent on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if:
|
||||
/// - Table or elements not found.
|
||||
/// - Date or float parsing fails.
|
||||
/// - WebDriver operations fail.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// use fantoccini::Client;
|
||||
/// use crate::corporate::scraper::extract_earnings;
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() -> Result<()> {
|
||||
/// // Assume client is set up and navigated
|
||||
/// let events = extract_earnings(&client, "AAPL").await?;
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// ```
|
||||
pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEvent>> {
|
||||
// Wait for the table to load
|
||||
let table = client
|
||||
.wait()
|
||||
.for_element(Locator::Css(r#"table[data-test="cal-table"]"#))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find earnings table: {}", e))?;
|
||||
|
||||
loop {
|
||||
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
|
||||
Ok(btn) => {
|
||||
btn.click().await?;
|
||||
sleep(TokioDuration::from_secs(2)).await;
|
||||
}
|
||||
Err(_) => break,
|
||||
// Find all rows in tbody
|
||||
let rows = table
|
||||
.find_all(Locator::Css("tbody tr"))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find table rows: {}", e))?;
|
||||
|
||||
let mut events = Vec::with_capacity(rows.len());
|
||||
|
||||
for row in rows {
|
||||
let cells = row
|
||||
.find_all(Locator::Css("td"))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find cells in row: {}", e))?;
|
||||
|
||||
if cells.len() < 5 {
|
||||
continue; // Skip incomplete rows
|
||||
}
|
||||
}
|
||||
|
||||
let html = client.source().await?;
|
||||
let document = Html::parse_document(&html);
|
||||
let row_sel = Selector::parse("table tbody tr").unwrap();
|
||||
let mut events = Vec::new();
|
||||
// Extract and parse date
|
||||
let date_str = cells[0]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get date text: {}", e))?;
|
||||
let date = parse_yahoo_date(&date_str)
|
||||
.map_err(|e| anyhow!("Failed to parse date '{}': {}", date_str, e))?
|
||||
.format("%Y-%m-%d")
|
||||
.to_string();
|
||||
|
||||
for row in document.select(&row_sel) {
|
||||
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
|
||||
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
|
||||
.collect();
|
||||
if cols.len() < 6 { continue; }
|
||||
// Extract time, replace "Time Not Supplied" with empty
|
||||
let time = cells[1]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get time text: {}", e))?
|
||||
.replace("Time Not Supplied", "");
|
||||
|
||||
let full_date = &cols[2];
|
||||
let parts: Vec<&str> = full_date.split(" at ").collect();
|
||||
let raw_date = parts[0].trim();
|
||||
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
|
||||
// Extract period
|
||||
let period = cells[2]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get period text: {}", e))?;
|
||||
|
||||
let date = match parse_yahoo_date(raw_date) {
|
||||
Ok(d) => d,
|
||||
Err(_) => continue,
|
||||
};
|
||||
// Parse EPS forecast
|
||||
let eps_forecast_str = cells[3]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get EPS forecast text: {}", e))?;
|
||||
let eps_forecast = parse_float(&eps_forecast_str);
|
||||
|
||||
let eps_forecast = parse_float(&cols[3]);
|
||||
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) };
|
||||
// Parse EPS actual
|
||||
let eps_actual_str = cells[4]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get EPS actual text: {}", e))?;
|
||||
let eps_actual = parse_float(&eps_actual_str);
|
||||
|
||||
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
||||
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
||||
} else { None };
|
||||
|
||||
let time = if time_str.contains("PM") {
|
||||
"AMC".to_string()
|
||||
} else if time_str.contains("AM") {
|
||||
"BMO".to_string()
|
||||
// Parse surprise % if available
|
||||
let surprise_pct = if cells.len() > 5 {
|
||||
let surprise_str = cells[5]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get surprise text: {}", e))?;
|
||||
parse_float(&surprise_str)
|
||||
} else {
|
||||
"".to_string()
|
||||
None
|
||||
};
|
||||
|
||||
events.push(CompanyEvent {
|
||||
ticker: ticker.to_string(),
|
||||
date: date.format("%Y-%m-%d").to_string(),
|
||||
date,
|
||||
time,
|
||||
period: "".to_string(),
|
||||
period,
|
||||
eps_forecast,
|
||||
eps_actual,
|
||||
revenue_forecast: None,
|
||||
@@ -445,6 +485,12 @@ pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Re
|
||||
});
|
||||
}
|
||||
|
||||
if events.is_empty() {
|
||||
eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
|
||||
} else {
|
||||
println!("Extracted {} earnings events for {}", events.len(), ticker);
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
@@ -768,57 +814,6 @@ pub async fn load_isin_lei_csv() -> anyhow::Result<HashMap<String, Vec<String>>>
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
pub async fn get_primary_isin_and_name(
|
||||
client: &Client, // Pass your existing Selenium client
|
||||
ticker: &str,
|
||||
) -> anyhow::Result<PrimaryInfo> {
|
||||
// Navigate to the actual quote page (always works)
|
||||
let quote_url = format!("https://finance.yahoo.com/quote/{}", ticker);
|
||||
client.goto("e_url).await?;
|
||||
|
||||
// Dismiss overlays/banners (your function + guce-specific)
|
||||
reject_yahoo_cookies(client).await?;
|
||||
|
||||
// Wait for page to load (key data elements)
|
||||
sleep(TokioDuration::from_millis(2000)).await;
|
||||
|
||||
// Get page HTML and parse
|
||||
let html = client.source().await?;
|
||||
let document = Html::parse_document(&html);
|
||||
|
||||
// Selectors for key fields (tested on real Yahoo pages Nov 2025)
|
||||
let name_sel = Selector::parse("h1[data-testid='qsp-price-header']").unwrap_or_else(|_| Selector::parse("h1").unwrap());
|
||||
let isin_sel = Selector::parse("[data-testid='qsp-symbol'] + div [data-field='isin']").unwrap_or_else(|_| Selector::parse("[data-field='isin']").unwrap());
|
||||
let exchange_sel = Selector::parse("[data-testid='qsp-market'] span").unwrap_or_else(|_| Selector::parse(".TopNav__Exchange").unwrap());
|
||||
let currency_sel = Selector::parse("[data-testid='qsp-price'] span:contains('USD')").unwrap_or_else(|_| Selector::parse(".TopNav__Currency").unwrap()); // Adjust for dynamic
|
||||
|
||||
let name_elem = document.select(&name_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
|
||||
let isin_elem = document.select(&isin_sel).next().map(|e| e.text().collect::<String>().trim().to_uppercase());
|
||||
let exchange_elem = document.select(&exchange_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
|
||||
let currency_elem = document.select(¤cy_sel).next().map(|e| e.text().collect::<String>().trim().to_string());
|
||||
|
||||
let name = name_elem.unwrap_or_else(|| ticker.to_string());
|
||||
let isin = isin_elem.unwrap_or_default();
|
||||
let exchange_mic = exchange_elem.unwrap_or_default();
|
||||
let currency = currency_elem.unwrap_or_else(|| "USD".to_string());
|
||||
|
||||
// Validate ISIN
|
||||
let valid_isin = if isin.len() == 12 && isin.chars().all(|c| c.is_alphanumeric()) {
|
||||
isin
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
println!(" → Scraped {}: {} | ISIN: {} | Exchange: {}", ticker, name, valid_isin, exchange_mic);
|
||||
|
||||
Ok(PrimaryInfo {
|
||||
isin: valid_isin,
|
||||
name,
|
||||
exchange_mic,
|
||||
currency,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let clicked: bool = client
|
||||
|
||||
Reference in New Issue
Block a user