adding corporate data to webscraper
This commit is contained in:
8
src/corporate/mod.rs
Normal file
8
src/corporate/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
// src/corporate/mod.rs
|
||||
pub mod types;
|
||||
pub mod scraper;
|
||||
pub mod storage;
|
||||
pub mod update;
|
||||
|
||||
pub use types::*;
|
||||
pub use update::run_full_update;
|
||||
116
src/corporate/scraper.rs
Normal file
116
src/corporate/scraper.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
// src/corporate/scraper.rs
|
||||
use super::types::{CompanyEvent, CompanyPrice};
|
||||
use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
use chrono::{NaiveDate, Datelike};
|
||||
|
||||
const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
|
||||
|
||||
pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result<Vec<CompanyEvent>> {
|
||||
let client = Client::new();
|
||||
let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings");
|
||||
|
||||
let text = client
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
let document = Html::parse_document(&text);
|
||||
let row_sel = Selector::parse(r#"table tbody tr"#).unwrap();
|
||||
let mut events = Vec::new();
|
||||
|
||||
for row in document.select(&row_sel) {
|
||||
let cols: Vec<String> = row.text().map(|s| s.trim().to_string()).collect();
|
||||
if cols.len() < 4 { continue; }
|
||||
|
||||
let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]);
|
||||
let date = match parse_yahoo_date(raw_date) {
|
||||
Ok(d) => d,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let eps_forecast = parse_float(&cols[1]);
|
||||
let eps_actual = parse_float(&cols[2]);
|
||||
|
||||
let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) {
|
||||
if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None }
|
||||
} else { None };
|
||||
|
||||
let time = if cols[0].contains("After") || cols[0].contains("AMC") {
|
||||
"AMC".to_string()
|
||||
} else if cols[0].contains("Before") || cols[0].contains("BMO") {
|
||||
"BMO".to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
events.push(CompanyEvent {
|
||||
ticker: ticker.to_string(),
|
||||
date: date.format("%Y-%m-%d").to_string(),
|
||||
time,
|
||||
period: cols.get(3).cloned().unwrap_or_default(),
|
||||
eps_forecast,
|
||||
eps_actual,
|
||||
revenue_forecast: None,
|
||||
revenue_actual: None,
|
||||
surprise_pct,
|
||||
source: "Yahoo".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
}
|
||||
|
||||
pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow::Result<Vec<CompanyPrice>> {
|
||||
let client = Client::new();
|
||||
let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")?
|
||||
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
||||
.timestamp();
|
||||
|
||||
let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")?
|
||||
.succ_opt().unwrap()
|
||||
.and_hms_opt(0, 0, 0).unwrap().and_utc()
|
||||
.timestamp();
|
||||
|
||||
let url = format!(
|
||||
"https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true"
|
||||
);
|
||||
|
||||
let csv = client
|
||||
.get(&url)
|
||||
.header("User-Agent", USER_AGENT)
|
||||
.send()
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
||||
let mut prices = Vec::new();
|
||||
for line in csv.lines().skip(1) {
|
||||
let cols: Vec<&str> = line.split(',').collect();
|
||||
if cols.len() < 7 { continue; }
|
||||
prices.push(CompanyPrice {
|
||||
ticker: ticker.to_string(),
|
||||
date: cols[0].to_string(),
|
||||
open: cols[1].parse()?,
|
||||
high: cols[2].parse()?,
|
||||
low: cols[3].parse()?,
|
||||
close: cols[4].parse()?,
|
||||
adj_close: cols[5].parse()?,
|
||||
volume: cols[6].parse()?,
|
||||
});
|
||||
}
|
||||
Ok(prices)
|
||||
}
|
||||
|
||||
fn parse_float(s: &str) -> Option<f64> {
|
||||
s.replace("--", "").replace(",", "").parse::<f64>().ok()
|
||||
}
|
||||
|
||||
fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
|
||||
NaiveDate::parse_from_str(s, "%b %d, %Y")
|
||||
.or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y"))
|
||||
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
|
||||
}
|
||||
64
src/corporate/storage.rs
Normal file
64
src/corporate/storage.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
// src/corporate/storage.rs
|
||||
use super::types::{CompanyEvent, CompanyPrice};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use tokio::fs;
|
||||
use chrono::{Local, NaiveDate};
|
||||
|
||||
/// Load all events from disk into a HashMap<ticker|date, event>
|
||||
async fn load_all_events_map() -> anyhow::Result<HashMap<String, CompanyEvent>> {
|
||||
let mut map = HashMap::new();
|
||||
let dir = std::path::Path::new("corporate_events");
|
||||
if !dir.exists() {
|
||||
return Ok(map);
|
||||
}
|
||||
|
||||
let mut entries = fs::read_dir(dir).await?;
|
||||
while let Some(entry) = entries.next_entry().await? {
|
||||
let path = entry.path();
|
||||
if path.extension().and_then(|s| s.to_str()) == Some("json") {
|
||||
let content = fs::read_to_string(&path).await?;
|
||||
if let Ok(events) = serde_json::from_str::<Vec<CompanyEvent>>(&content) {
|
||||
for event in events {
|
||||
let key = format!("{}|{}", event.ticker, event.date);
|
||||
map.insert(key, event);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Merge new events with existing ones and save back to disk
|
||||
pub async fn merge_and_save_events(ticker: &str, new_events: Vec<CompanyEvent>) -> anyhow::Result<()> {
|
||||
let mut existing = load_all_events_map().await?;
|
||||
|
||||
// Insert or update
|
||||
for event in new_events {
|
||||
let key = format!("{}|{}", event.ticker, event.date);
|
||||
existing.insert(key, event);
|
||||
}
|
||||
|
||||
// Convert back to Vec and save (simple single file for now)
|
||||
let all_events: Vec<CompanyEvent> = existing.into_values().collect();
|
||||
let dir = std::path::Path::new("corporate_events");
|
||||
fs::create_dir_all(dir).await?;
|
||||
let path = dir.join("all_events.json");
|
||||
let json = serde_json::to_string_pretty(&all_events)?;
|
||||
fs::write(&path, json).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Save price history for a single ticker (overwrite old file)
|
||||
pub async fn save_prices_for_ticker(ticker: &str, prices: Vec<CompanyPrice>) -> anyhow::Result<()> {
|
||||
let dir = std::path::Path::new("corporate_prices");
|
||||
fs::create_dir_all(dir).await?;
|
||||
let path = dir.join(format!("{}.json", ticker));
|
||||
|
||||
// Optional: sort by date
|
||||
let mut prices = prices;
|
||||
prices.sort_by_key(|p| p.date.clone());
|
||||
|
||||
let json = serde_json::to_string_pretty(&prices)?;
|
||||
fs::write(&path, json).await?;
|
||||
Ok(())
|
||||
}
|
||||
38
src/corporate/types.rs
Normal file
38
src/corporate/types.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
// src/corporate/types.rs
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub struct CompanyEvent {
|
||||
pub ticker: String,
|
||||
pub date: String, // YYYY-MM-DD
|
||||
pub time: String, // "AMC", "BMO", "TAS", or ""
|
||||
pub period: String, // "Q1 2025", "FY 2024"
|
||||
pub eps_forecast: Option<f64>,
|
||||
pub eps_actual: Option<f64>,
|
||||
pub revenue_forecast: Option<f64>,
|
||||
pub revenue_actual: Option<f64>,
|
||||
pub surprise_pct: Option<f64>, // (actual - forecast) / |forecast|
|
||||
pub source: String, // "Yahoo"
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CompanyPrice {
|
||||
pub ticker: String,
|
||||
pub date: String, // YYYY-MM-DD
|
||||
pub open: f64,
|
||||
pub high: f64,
|
||||
pub low: f64,
|
||||
pub close: f64,
|
||||
pub adj_close: f64,
|
||||
pub volume: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CompanyEventChange {
|
||||
pub ticker: String,
|
||||
pub date: String,
|
||||
pub field: String, // "time", "eps_forecast", "eps_actual", "new_event"
|
||||
pub old_value: String,
|
||||
pub new_value: String,
|
||||
pub detected_at: String,
|
||||
}
|
||||
31
src/corporate/update.rs
Normal file
31
src/corporate/update.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// src/corporate/update.rs
|
||||
use super::{scraper::*, storage::*, types::*};
|
||||
use crate::config::Config;
|
||||
|
||||
use chrono::Local;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
|
||||
pub async fn run_full_update(tickers: Vec<String>, config: &Config) -> anyhow::Result<()> {
|
||||
println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date);
|
||||
|
||||
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
|
||||
|
||||
for ticker in tickers {
|
||||
print!(" → {:6} ", ticker);
|
||||
|
||||
// Earnings
|
||||
if let Ok(events) = fetch_earnings_history(&ticker).await {
|
||||
merge_and_save_events(&ticker, events.clone()).await?;
|
||||
println!("{} earnings", events.len());
|
||||
}
|
||||
|
||||
// Prices – now using config.corporate_start_date
|
||||
if let Ok(prices) = fetch_price_history(&ticker, &config.corporate_start_date, &today).await {
|
||||
save_prices_for_ticker(&ticker, prices).await?;
|
||||
}
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(250)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Reference in New Issue
Block a user