116 lines
3.7 KiB
Rust
116 lines
3.7 KiB
Rust
// src/corporate/scraper.rs
|
|
use super::types::{CompanyEvent, CompanyPrice};
|
|
use reqwest::Client;
|
|
use scraper::{Html, Selector};
|
|
use chrono::{NaiveDate, Datelike};
|
|
|
|
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
|
|
|
pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
|
let client = Client::new();
|
|
let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings");
|
|
|
|
let text = client
|
|
.get(&url)
|
|
.header("User-Agent", USER_AGENT)
|
|
.send()
|
|
.await?
|
|
.text()
|
|
.await?;
|
|
|
|
let document = Html::parse_document(&text);
|
|
let row_sel = Selector::parse(r#"table tbody tr"#).unwrap();
|
|
let mut events = Vec::new();
|
|
|
|
for row in document.select(&row_sel) {
|
|
let cols: Vec<String> = row.text().map(|s| s.trim().to_string()).collect();
|
|
if cols.len() < 4 { continue; }
|
|
|
|
let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]);
|
|
let date = match parse_yahoo_date(raw_date) {
|
|
Ok(d) => d,
|
|
Err(_) => continue,
|
|
};
|
|
|
|
let eps_forecast = parse_float(&cols[1]);
|
|
let eps_actual = parse_float(&cols[2]);
|
|
|
|
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
|
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
|
} else { None };
|
|
|
|
let time = if cols[0].contains("After") || cols[0].contains("AMC") {
|
|
"AMC".to_string()
|
|
} else if cols[0].contains("Before") || cols[0].contains("BMO") {
|
|
"BMO".to_string()
|
|
} else {
|
|
"".to_string()
|
|
};
|
|
|
|
events.push(CompanyEvent {
|
|
ticker: ticker.to_string(),
|
|
date: date.format("%Y-%m-%d").to_string(),
|
|
time,
|
|
period: cols.get(3).cloned().unwrap_or_default(),
|
|
eps_forecast,
|
|
eps_actual,
|
|
revenue_forecast: None,
|
|
revenue_actual: None,
|
|
surprise_pct,
|
|
source: "Yahoo".to_string(),
|
|
});
|
|
}
|
|
|
|
Ok(events)
|
|
}
|
|
|
|
pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow::Result<Vec<CompanyPrice>> {
|
|
let client = Client::new();
|
|
let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")?
|
|
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
|
.timestamp();
|
|
|
|
let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")?
|
|
.succ_opt().unwrap()
|
|
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
|
.timestamp();
|
|
|
|
let url = format!(
|
|
"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true"
|
|
);
|
|
|
|
let csv = client
|
|
.get(&url)
|
|
.header("User-Agent", USER_AGENT)
|
|
.send()
|
|
.await?
|
|
.text()
|
|
.await?;
|
|
|
|
let mut prices = Vec::new();
|
|
for line in csv.lines().skip(1) {
|
|
let cols: Vec<&str> = line.split(',').collect();
|
|
if cols.len() < 7 { continue; }
|
|
prices.push(CompanyPrice {
|
|
ticker: ticker.to_string(),
|
|
date: cols[0].to_string(),
|
|
open: cols[1].parse()?,
|
|
high: cols[2].parse()?,
|
|
low: cols[3].parse()?,
|
|
close: cols[4].parse()?,
|
|
adj_close: cols[5].parse()?,
|
|
volume: cols[6].parse()?,
|
|
});
|
|
}
|
|
Ok(prices)
|
|
}
|
|
|
|
fn parse_float(s: &str) -> Option<f64> {
|
|
s.replace("--", "").replace(",", "").parse::<f64>().ok()
|
|
}
|
|
|
|
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
|
|
NaiveDate::parse_from_str(s, "%b %d, %Y")
|
|
.or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y"))
|
|
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
|
|
} |