added update_rule for incremental change

This commit is contained in:
2025-12-15 23:47:28 +01:00
parent d744769138
commit d26e833d93
10 changed files with 566 additions and 241 deletions

View File

@@ -8,6 +8,7 @@ use tokio::{time::{Duration as TokioDuration, sleep}};
use std::{sync::Arc};
use anyhow::{anyhow, Result};
const YAHOO_COMPANY_EXTRACTION_JS: &str = include_str!("yahoo_company_extraction.js");
/// Mapping existing
/// getting historical stock price data daily (xxxx - 2025) and hourly (last 30 days)
@@ -20,6 +21,16 @@ pub enum YahooTickerResult {
AmbiguousResults,
}
#[derive(Debug, Deserialize)]
pub struct ExtractionResult {
status: String,
ticker: Option<String>,
sector: Option<String>,
exchange: Option<String>,
#[serde(default)]
error_message: Option<String>,
}
impl YahooTickerResult {
pub fn to_tagged_string(&self) -> String {
match self {
@@ -42,69 +53,52 @@ impl YahooTickerResult {
}
}
pub async fn scrape_ticker_by_isin(
pub async fn scrape_company_details_by_isin(
pool: &Arc<ChromeDriverPool>,
isin: &str,
) -> anyhow::Result<YahooTickerResult> {
) -> anyhow::Result<Option<YahooCompanyDetails>> {
let isin = isin.to_string();
pool.execute(format!("https://finance.yahoo.com/lookup?s={}", isin), move |client| {
pool.execute(format!("https://finance.yahoo.com/lookup/?s={}", isin), move |client| {
let isin = isin.clone();
Box::pin(async move {
sleep(TokioDuration::from_millis(1000)).await;
reject_yahoo_cookies(&client).await?;
sleep(TokioDuration::from_millis(1000)).await;
extract_ticker_by_isin(&client, &isin).await
extract_company_details(&client, &isin).await
})
}).await
}
pub async fn extract_ticker_by_isin(
pub async fn extract_company_details(
client: &Client,
_isin: &str,
) -> Result<YahooTickerResult> {
//let search_url = format!("https://finance.yahoo.com/lookup?s={}", isin);
// Check for "No results found" message
if client.find(Locator::Css(".noData")).await.is_ok() {
return Ok(YahooTickerResult::NoResults);
}
// Wait for results table
let table = match client
.wait()
.for_element(Locator::Css("table[data-test='lookup-table']"))
.await
{
Ok(t) => t,
Err(_) => return Ok(YahooTickerResult::NoResults),
};
) -> Result<Option<YahooCompanyDetails>> {
// Execute the JavaScript extraction script
let result = client.execute(YAHOO_COMPANY_EXTRACTION_JS, vec![]).await?;
// Find first row
let first_row = match table
.find(Locator::Css("tbody tr"))
.await
{
Ok(row) => row,
Err(_) => return Ok(YahooTickerResult::NoResults),
};
// Parse the JSON result
let extraction: ExtractionResult = serde_json::from_value(result)
.map_err(|e| anyhow!("Failed to parse extraction result: {}", e))?;
// Extract ticker from first cell
let ticker_cell = first_row
.find(Locator::Css("td:nth-child(1)"))
.await
.map_err(|e| anyhow!("Failed to find ticker cell: {}", e))?;
let ticker = ticker_cell
.text()
.await
.map_err(|e| anyhow!("Failed to get ticker text: {}", e))?
.trim()
.to_string();
if ticker.is_empty() {
Ok(YahooTickerResult::NotFound)
} else {
Ok(YahooTickerResult::Found(ticker))
match extraction.status.as_str() {
"found" => {
if let Some(ticker) = extraction.ticker {
Ok(Some(YahooCompanyDetails {
ticker,
sector: extraction.sector,
exchange: extraction.exchange,
}))
} else {
Ok(None)
}
},
"no_results" => Ok(None),
"not_found" => Ok(None),
"error" => {
let error_msg = extraction.error_message.unwrap_or_else(|| "Unknown error".to_string());
Err(anyhow!("JavaScript extraction error: {}", error_msg))
},
_ => Ok(None),
}
}
@@ -276,9 +270,9 @@ pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Ve
}
if events.is_empty() {
eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
logger::log_warn(&format!("Warning: No earnings events extracted for ticker {}", ticker)).await;
} else {
println!("Extracted {} earnings events for {}", events.len(), ticker);
logger::log_info(&format!("Extracted {} earnings events for {}", events.len(), ticker)).await;
}
Ok(events)