using chromedriver for collecting corporate earnings
This commit is contained in:
@@ -1,47 +1,81 @@
|
|||||||
// src/corporate/scraper.rs
|
// src/corporate/scraper.rs
|
||||||
use super::types::{CompanyEvent, CompanyPrice};
|
use super::types::{CompanyEvent, CompanyPrice};
|
||||||
use reqwest::Client;
|
use fantoccini::{Client, Locator};
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use chrono::{NaiveDate, Datelike};
|
use chrono::{NaiveDate, Datelike};
|
||||||
|
use tokio::time::{sleep, Duration};
|
||||||
|
|
||||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
||||||
|
|
||||||
pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
pub async fn dismiss_yahoo_consent(client: &Client) -> anyhow::Result<()> {
|
||||||
let client = Client::new();
|
let script = r#"
|
||||||
let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings");
|
(() => {
|
||||||
|
const agree = document.querySelector('button[name="agree"]');
|
||||||
|
if (agree) {
|
||||||
|
agree.click();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
})()
|
||||||
|
"#;
|
||||||
|
|
||||||
let text = client
|
for _ in 0..10 {
|
||||||
.get(&url)
|
let done: bool = client.execute(script, vec![]).await?.as_bool().unwrap_or(false);
|
||||||
.header("User-Agent", USER_AGENT)
|
if done {
|
||||||
.send()
|
break;
|
||||||
.await?
|
}
|
||||||
.text()
|
sleep(Duration::from_millis(500)).await;
|
||||||
.await?;
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
let document = Html::parse_document(&text);
|
pub async fn fetch_earnings_history(client: &Client, ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||||
let row_sel = Selector::parse(r#"table tbody tr"#).unwrap();
|
let url = format!("https://finance.yahoo.com/calendar/earnings?symbol={}", ticker);
|
||||||
|
client.goto(&url).await?;
|
||||||
|
dismiss_yahoo_consent(client).await?;
|
||||||
|
|
||||||
|
// Load all by clicking "Show More" if present
|
||||||
|
loop {
|
||||||
|
match client.find(Locator::XPath(r#"//button[contains(text(), 'Show More')]"#)).await {
|
||||||
|
Ok(btn) => {
|
||||||
|
btn.click().await?;
|
||||||
|
sleep(Duration::from_secs(2)).await;
|
||||||
|
}
|
||||||
|
Err(_) => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let html = client.source().await?;
|
||||||
|
let document = Html::parse_document(&html);
|
||||||
|
let row_sel = Selector::parse("table tbody tr").unwrap();
|
||||||
let mut events = Vec::new();
|
let mut events = Vec::new();
|
||||||
|
|
||||||
for row in document.select(&row_sel) {
|
for row in document.select(&row_sel) {
|
||||||
let cols: Vec<String> = row.text().map(|s| s.trim().to_string()).collect();
|
let cols: Vec<String> = row.select(&Selector::parse("td").unwrap())
|
||||||
if cols.len() < 4 { continue; }
|
.map(|td| td.text().collect::<Vec<_>>().join(" ").trim().to_string())
|
||||||
|
.collect();
|
||||||
|
if cols.len() < 6 { continue; }
|
||||||
|
|
||||||
|
let full_date = &cols[2];
|
||||||
|
let parts: Vec<&str> = full_date.split(" at ").collect();
|
||||||
|
let raw_date = parts[0].trim();
|
||||||
|
let time_str = if parts.len() > 1 { parts[1].trim() } else { "" };
|
||||||
|
|
||||||
let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]);
|
|
||||||
let date = match parse_yahoo_date(raw_date) {
|
let date = match parse_yahoo_date(raw_date) {
|
||||||
Ok(d) => d,
|
Ok(d) => d,
|
||||||
Err(_) => continue,
|
Err(_) => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
let eps_forecast = parse_float(&cols[1]);
|
let eps_forecast = parse_float(&cols[3]);
|
||||||
let eps_actual = parse_float(&cols[2]);
|
let eps_actual = if cols[4] == "-" { None } else { parse_float(&cols[4]) };
|
||||||
|
|
||||||
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
||||||
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
||||||
} else { None };
|
} else { None };
|
||||||
|
|
||||||
let time = if cols[0].contains("After") || cols[0].contains("AMC") {
|
let time = if time_str.contains("PM") {
|
||||||
"AMC".to_string()
|
"AMC".to_string()
|
||||||
} else if cols[0].contains("Before") || cols[0].contains("BMO") {
|
} else if time_str.contains("AM") {
|
||||||
"BMO".to_string()
|
"BMO".to_string()
|
||||||
} else {
|
} else {
|
||||||
"".to_string()
|
"".to_string()
|
||||||
@@ -51,7 +85,7 @@ pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result<Vec<CompanyE
|
|||||||
ticker: ticker.to_string(),
|
ticker: ticker.to_string(),
|
||||||
date: date.format("%Y-%m-%d").to_string(),
|
date: date.format("%Y-%m-%d").to_string(),
|
||||||
time,
|
time,
|
||||||
period: cols.get(3).cloned().unwrap_or_default(),
|
period: "".to_string(), // No period info available, set to empty
|
||||||
eps_forecast,
|
eps_forecast,
|
||||||
eps_actual,
|
eps_actual,
|
||||||
revenue_forecast: None,
|
revenue_forecast: None,
|
||||||
@@ -64,8 +98,7 @@ pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result<Vec<CompanyE
|
|||||||
Ok(events)
|
Ok(events)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow::Result<Vec<CompanyPrice>> {
|
pub async fn fetch_price_history(client: &Client, ticker: &str, start: &str, end: &str) -> anyhow::Result<Vec<CompanyPrice>> {
|
||||||
let client = Client::new();
|
|
||||||
let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")?
|
let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")?
|
||||||
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
||||||
.timestamp();
|
.timestamp();
|
||||||
@@ -79,13 +112,8 @@ pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow
|
|||||||
"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true"
|
"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true"
|
||||||
);
|
);
|
||||||
|
|
||||||
let csv = client
|
client.goto(&url).await?;
|
||||||
.get(&url)
|
let csv = client.source().await?;
|
||||||
.header("User-Agent", USER_AGENT)
|
|
||||||
.send()
|
|
||||||
.await?
|
|
||||||
.text()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
let mut prices = Vec::new();
|
let mut prices = Vec::new();
|
||||||
for line in csv.lines().skip(1) {
|
for line in csv.lines().skip(1) {
|
||||||
@@ -110,7 +138,7 @@ fn parse_float(s: &str) -> Option<f64> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
|
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
|
||||||
NaiveDate::parse_from_str(s, "%b %d, %Y")
|
NaiveDate::parse_from_str(s, "%B %d, %Y")
|
||||||
.or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y"))
|
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
|
||||||
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
|
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
|
||||||
}
|
}
|
||||||
@@ -6,7 +6,7 @@ use chrono::Local;
|
|||||||
use std::collections::{HashMap, HashSet};
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
|
||||||
pub async fn run_full_update(tickers: Vec<String>, config: &Config) -> anyhow::Result<()> {
|
pub async fn run_full_update(client: &fantoccini::Client, tickers: Vec<String>, config: &Config) -> anyhow::Result<()> {
|
||||||
println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date);
|
println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date);
|
||||||
|
|
||||||
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
|
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
|
||||||
@@ -15,13 +15,13 @@ pub async fn run_full_update(tickers: Vec<String>, config: &Config) -> anyhow::R
|
|||||||
print!(" → {:6} ", ticker);
|
print!(" → {:6} ", ticker);
|
||||||
|
|
||||||
// Earnings
|
// Earnings
|
||||||
if let Ok(events) = fetch_earnings_history(&ticker).await {
|
if let Ok(events) = fetch_earnings_history(client, &ticker).await {
|
||||||
merge_and_save_events(&ticker, events.clone()).await?;
|
merge_and_save_events(&ticker, events.clone()).await?;
|
||||||
println!("{} earnings", events.len());
|
println!("{} earnings", events.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prices – now using config.corporate_start_date
|
// Prices – now using config.corporate_start_date
|
||||||
if let Ok(prices) = fetch_price_history(&ticker, &config.corporate_start_date, &today).await {
|
if let Ok(prices) = fetch_price_history(client, &ticker, &config.corporate_start_date, &today).await {
|
||||||
save_prices_for_ticker(&ticker, prices).await?;
|
save_prices_for_ticker(&ticker, prices).await?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// === Corporate Earnings Update ===
|
// === Corporate Earnings Update ===
|
||||||
println!("\nUpdating Corporate Earnings");
|
println!("\nUpdating Corporate Earnings");
|
||||||
let tickers = config::get_tickers();
|
let tickers = config::get_tickers();
|
||||||
corporate::run_full_update(tickers, &config).await?;
|
corporate::run_full_update(&client, tickers, &config).await?;
|
||||||
|
|
||||||
// === Cleanup ===
|
// === Cleanup ===
|
||||||
client.close().await?;
|
client.close().await?;
|
||||||
|
|||||||
Reference in New Issue
Block a user