added companie mapping with yahoo tickers
This commit is contained in:
312
src/corporate/yahoo.rs
Normal file
312
src/corporate/yahoo.rs
Normal file
@@ -0,0 +1,312 @@
|
||||
// src/corporate/yahoo.rs
|
||||
use super::{types::*, helpers::*};
|
||||
use crate::{scraper::webdriver::*, util::{directories::DataPaths}};
|
||||
use event_backtest_engine::logger;
|
||||
use fantoccini::{Client, Locator};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tokio::{time::{Duration as TokioDuration, sleep}};
|
||||
use std::{sync::Arc};
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
/// Mapping existing
|
||||
|
||||
/// getting historical stock price data daily (xxxx - 2025) and hourly (last 30 days)
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub enum YahooTickerResult {
|
||||
Found(String),
|
||||
NotFound,
|
||||
NoResults,
|
||||
AmbiguousResults,
|
||||
}
|
||||
|
||||
impl YahooTickerResult {
|
||||
pub fn to_tagged_string(&self) -> String {
|
||||
match self {
|
||||
YahooTickerResult::Found(ticker) => format!("YAHOO:{}", ticker),
|
||||
YahooTickerResult::NotFound => "YAHOO:NOT_FOUND".to_string(),
|
||||
YahooTickerResult::NoResults => "YAHOO:NO_RESULTS".to_string(),
|
||||
YahooTickerResult::AmbiguousResults => "YAHOO:AMBIGUOUS".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_found(&self) -> bool {
|
||||
matches!(self, YahooTickerResult::Found(_))
|
||||
}
|
||||
|
||||
pub fn get_ticker(&self) -> Option<&str> {
|
||||
match self {
|
||||
YahooTickerResult::Found(ticker) => Some(ticker),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn scrape_ticker_by_isin(
|
||||
pool: &Arc<ChromeDriverPool>,
|
||||
isin: &str,
|
||||
) -> anyhow::Result<YahooTickerResult> {
|
||||
let isin = isin.to_string();
|
||||
pool.execute(format!("https://finance.yahoo.com/lookup?s={}", isin), move |client| {
|
||||
let isin = isin.clone();
|
||||
Box::pin(async move {
|
||||
sleep(TokioDuration::from_millis(1000)).await;
|
||||
reject_yahoo_cookies(&client).await?;
|
||||
sleep(TokioDuration::from_millis(1000)).await;
|
||||
extract_ticker_by_isin(&client, &isin).await
|
||||
})
|
||||
}).await
|
||||
}
|
||||
|
||||
pub async fn extract_ticker_by_isin(
|
||||
client: &Client,
|
||||
_isin: &str,
|
||||
) -> Result<YahooTickerResult> {
|
||||
//let search_url = format!("https://finance.yahoo.com/lookup?s={}", isin);
|
||||
|
||||
// Check for "No results found" message
|
||||
if client.find(Locator::Css(".noData")).await.is_ok() {
|
||||
return Ok(YahooTickerResult::NoResults);
|
||||
}
|
||||
|
||||
// Wait for results table
|
||||
let table = match client
|
||||
.wait()
|
||||
.for_element(Locator::Css("table[data-test='lookup-table']"))
|
||||
.await
|
||||
{
|
||||
Ok(t) => t,
|
||||
Err(_) => return Ok(YahooTickerResult::NoResults),
|
||||
};
|
||||
|
||||
// Find first row
|
||||
let first_row = match table
|
||||
.find(Locator::Css("tbody tr"))
|
||||
.await
|
||||
{
|
||||
Ok(row) => row,
|
||||
Err(_) => return Ok(YahooTickerResult::NoResults),
|
||||
};
|
||||
|
||||
// Extract ticker from first cell
|
||||
let ticker_cell = first_row
|
||||
.find(Locator::Css("td:nth-child(1)"))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find ticker cell: {}", e))?;
|
||||
|
||||
let ticker = ticker_cell
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get ticker text: {}", e))?
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if ticker.is_empty() {
|
||||
Ok(YahooTickerResult::NotFound)
|
||||
} else {
|
||||
Ok(YahooTickerResult::Found(ticker))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_all_tickers_from_companies_jsonl(paths: &DataPaths) -> anyhow::Result<Vec<String>> {
|
||||
let corporate_path = paths.data_dir().join("corporate").join("by_name");
|
||||
let companies_file = corporate_path.join("companies.jsonl");
|
||||
let content = tokio::fs::read_to_string(companies_file).await?;
|
||||
let mut tickers = Vec::new();
|
||||
for line in content.lines() {
|
||||
let company: CompanyCrossPlatformInfo = serde_json::from_str(line)?;
|
||||
for (_isin, ticker_vec) in company.isin_tickers_map {
|
||||
tickers.extend(ticker_vec);
|
||||
}
|
||||
}
|
||||
Ok(tickers)
|
||||
}
|
||||
|
||||
/// Fetches earnings events for a ticker using a dedicated ScrapeTask.
|
||||
///
|
||||
/// This function creates and executes a ScrapeTask to navigate to the Yahoo Finance earnings calendar,
|
||||
/// reject cookies, and extract the events.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `ticker` - The stock ticker symbol.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of CompanyEvent structs on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if the task execution fails, e.g., chromedriver spawn or navigation issues.
|
||||
pub async fn fetch_earnings_with_pool(
|
||||
pool: &Arc<ChromeDriverPool>,
|
||||
ticker: &str,
|
||||
) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
let ticker = ticker.to_string();
|
||||
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
|
||||
|
||||
let ticker_cloned = ticker.clone();
|
||||
|
||||
pool.execute(url, move |client| {
|
||||
let ticker = ticker_cloned.clone();
|
||||
Box::pin(async move {
|
||||
reject_yahoo_cookies(&client).await?;
|
||||
extract_earnings_events(&client, &ticker).await
|
||||
})
|
||||
}).await
|
||||
}
|
||||
|
||||
/// Extracts earnings events from the currently loaded Yahoo Finance earnings calendar page.
|
||||
///
|
||||
/// This function assumes the client is already navigated to the correct URL (e.g.,
|
||||
/// https://finance.yahoo.com/calendar/earnings?symbol={ticker}) and cookies are handled.
|
||||
///
|
||||
/// It waits for the earnings table, extracts rows, parses cells into CompanyEvent structs,
|
||||
/// and handles date parsing, float parsing, and optional fields.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `client` - The fantoccini Client with the page loaded.
|
||||
/// * `ticker` - The stock ticker symbol for the events.
|
||||
///
|
||||
/// # Returns
|
||||
/// A vector of CompanyEvent on success.
|
||||
///
|
||||
/// # Errors
|
||||
/// Returns an error if:
|
||||
/// - Table or elements not found.
|
||||
/// - Date or float parsing fails.
|
||||
/// - WebDriver operations fail.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```no_run
|
||||
/// use fantoccini::Client;
|
||||
/// use crate::corporate::scraper::extract_earnings;
|
||||
///
|
||||
/// #[tokio::main]
|
||||
/// async fn main() -> Result<()> {
|
||||
/// // Assume client is set up and navigated
|
||||
/// let events = extract_earnings(&client, "AAPL").await?;
|
||||
/// Ok(())
|
||||
/// }
|
||||
/// ```
|
||||
pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Vec<CompanyEvent>> {
|
||||
// Wait for the table to load
|
||||
let table = client
|
||||
.wait()
|
||||
.for_element(Locator::Css(r#"table[data-test="cal-table"]"#))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find earnings table: {}", e))?;
|
||||
|
||||
// Find all rows in tbody
|
||||
let rows = table
|
||||
.find_all(Locator::Css("tbody tr"))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find table rows: {}", e))?;
|
||||
|
||||
let mut events = Vec::with_capacity(rows.len());
|
||||
|
||||
for row in rows {
|
||||
let cells = row
|
||||
.find_all(Locator::Css("td"))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find cells in row: {}", e))?;
|
||||
|
||||
if cells.len() < 5 {
|
||||
continue; // Skip incomplete rows
|
||||
}
|
||||
|
||||
// Extract and parse date
|
||||
let date_str = cells[0]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get date text: {}", e))?;
|
||||
let date = parse_yahoo_date(&date_str)
|
||||
.map_err(|e| anyhow!("Failed to parse date '{}': {}", date_str, e))?
|
||||
.format("%Y-%m-%d")
|
||||
.to_string();
|
||||
|
||||
// Extract time, replace "Time Not Supplied" with empty
|
||||
let time = cells[1]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get time text: {}", e))?
|
||||
.replace("Time Not Supplied", "");
|
||||
|
||||
// Extract period
|
||||
let period = cells[2]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get period text: {}", e))?;
|
||||
|
||||
// Parse EPS forecast
|
||||
let eps_forecast_str = cells[3]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get EPS forecast text: {}", e))?;
|
||||
let eps_forecast = parse_float(&eps_forecast_str);
|
||||
|
||||
// Parse EPS actual
|
||||
let eps_actual_str = cells[4]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get EPS actual text: {}", e))?;
|
||||
let eps_actual = parse_float(&eps_actual_str);
|
||||
|
||||
// Parse surprise % if available
|
||||
let surprise_pct = if cells.len() > 5 {
|
||||
let surprise_str = cells[5]
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get surprise text: {}", e))?;
|
||||
parse_float(&surprise_str)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
events.push(CompanyEvent {
|
||||
ticker: ticker.to_string(),
|
||||
date,
|
||||
time,
|
||||
period,
|
||||
eps_forecast,
|
||||
eps_actual,
|
||||
revenue_forecast: None,
|
||||
revenue_actual: None,
|
||||
surprise_pct,
|
||||
source: "Yahoo".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
if events.is_empty() {
|
||||
eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
|
||||
} else {
|
||||
println!("Extracted {} earnings events for {}", events.len(), ticker);
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
/// Rejecting Yahoo Cookies
|
||||
async fn reject_yahoo_cookies(client: &Client) -> anyhow::Result<()> {
|
||||
for _ in 0..10 {
|
||||
let clicked: bool = client
|
||||
.execute(
|
||||
r#"(() => {
|
||||
const btn = document.querySelector('#consent-page .reject-all');
|
||||
if (btn) {
|
||||
btn.click();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
})()"#,
|
||||
vec![],
|
||||
)
|
||||
.await?
|
||||
.as_bool()
|
||||
.unwrap_or(false);
|
||||
|
||||
if clicked { break; }
|
||||
sleep(TokioDuration::from_millis(500)).await;
|
||||
}
|
||||
|
||||
logger::log_info("Rejected Yahoo cookies if button existed").await;
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user