Files
WebScraper/src/corporate/helpers.rs
2026-01-12 18:50:44 +01:00

122 lines
3.7 KiB
Rust

// src/corporate/helpers.rs
use super::types::*;
use chrono::{Local, NaiveDate};
use rand::rngs::StdRng;
use rand::prelude::{Rng, SeedableRng, IndexedRandom};
pub fn event_key(e: &CompanyEventData) -> String {
format!("{}|{}|{}", e.ticker, e.date, e.time)
}
pub fn detect_changes(old: &CompanyEventData, new: &CompanyEventData, today: &str) -> Vec<CompanyEventChangeData> {
let mut changes = Vec::new();
let ts = Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
if new.date.as_str() <= today { return changes; }
if old.time != new.time {
changes.push(CompanyEventChangeData {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "time".to_string(),
old_value: old.time.clone(),
new_value: new.time.clone(),
detected_at: ts.clone(),
});
}
if old.eps_forecast != new.eps_forecast {
changes.push(CompanyEventChangeData {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "eps_forecast".to_string(),
old_value: format!("{:?}", old.eps_forecast),
new_value: format!("{:?}", new.eps_forecast),
detected_at: ts.clone(),
});
}
if old.eps_actual != new.eps_actual {
changes.push(CompanyEventChangeData {
ticker: new.ticker.clone(),
date: new.date.clone(),
field_changed: "eps_actual".to_string(),
old_value: format!("{:?}", old.eps_actual),
new_value: format!("{:?}", new.eps_actual),
detected_at: ts.clone(),
});
}
// Add similar for revenue if applicable
changes
}
pub fn parse_float(s: &str) -> Option<f64> {
s.replace("--", "").replace(",", "").parse::<f64>().ok()
}
pub fn parse_yahoo_date(s: &str) -> anyhow::Result<NaiveDate> {
NaiveDate::parse_from_str(s, "%B %d, %Y")
.or_else(|_| NaiveDate::parse_from_str(s, "%b %d, %Y"))
.map_err(|_| anyhow::anyhow!("Bad date: {s}"))
}
/// Send-safe random range
pub fn random_range(min: u64, max: u64) -> u64 {
let mut rng = StdRng::from_rng(&mut rand::rng());
rng.random_range(min..max)
}
/// Send-safe random choice
pub fn choose_random<T: Clone>(items: &[T]) -> T {
let mut rng = StdRng::from_rng(&mut rand::rng());
items.choose(&mut rng).unwrap().clone()
}
/// Extract first valid Yahoo ticker from company
pub fn extract_first_yahoo_ticker(company: &CompanyCrossPlatformData) -> Option<String> {
for tickers in company.isin_tickers_map.values() {
for ticker in tickers {
if ticker.starts_with("YAHOO:")
&& ticker != "YAHOO:NO_RESULTS"
&& ticker != "YAHOO:ERROR"
{
return Some(ticker.trim_start_matches("YAHOO:").to_string());
}
}
}
None
}
/// Sanitize company name for file system use
pub fn sanitize_company_name(name: &str) -> String {
name.replace("/", "_")
.replace("\\", "_")
.replace(":", "_")
.replace("*", "_")
.replace("?", "_")
.replace("\"", "_")
.replace("<", "_")
.replace(">", "_")
.replace("|", "_")
}
/// Load companies from JSONL file
pub async fn load_companies_from_jsonl(
path: &std::path::Path
) -> anyhow::Result<Vec<CompanyCrossPlatformData>> {
let content = tokio::fs::read_to_string(path).await?;
let mut companies = Vec::new();
for line in content.lines() {
if line.trim().is_empty() {
continue;
}
if let Ok(company) = serde_json::from_str::<CompanyCrossPlatformData>(line) {
companies.push(company);
}
}
Ok(companies)
}