added update_rule for incremental change
This commit is contained in:
@@ -8,6 +8,7 @@ use tokio::{time::{Duration as TokioDuration, sleep}};
|
||||
use std::{sync::Arc};
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
const YAHOO_COMPANY_EXTRACTION_JS: &str = include_str!("yahoo_company_extraction.js");
|
||||
/// Mapping existing
|
||||
|
||||
/// getting historical stock price data daily (xxxx - 2025) and hourly (last 30 days)
|
||||
@@ -20,6 +21,16 @@ pub enum YahooTickerResult {
|
||||
AmbiguousResults,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub struct ExtractionResult {
|
||||
status: String,
|
||||
ticker: Option<String>,
|
||||
sector: Option<String>,
|
||||
exchange: Option<String>,
|
||||
#[serde(default)]
|
||||
error_message: Option<String>,
|
||||
}
|
||||
|
||||
impl YahooTickerResult {
|
||||
pub fn to_tagged_string(&self) -> String {
|
||||
match self {
|
||||
@@ -42,69 +53,52 @@ impl YahooTickerResult {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn scrape_ticker_by_isin(
|
||||
pub async fn scrape_company_details_by_isin(
|
||||
pool: &Arc<ChromeDriverPool>,
|
||||
isin: &str,
|
||||
) -> anyhow::Result<YahooTickerResult> {
|
||||
) -> anyhow::Result<Option<YahooCompanyDetails>> {
|
||||
let isin = isin.to_string();
|
||||
pool.execute(format!("https://finance.yahoo.com/lookup?s={}", isin), move |client| {
|
||||
pool.execute(format!("https://finance.yahoo.com/lookup/?s={}", isin), move |client| {
|
||||
let isin = isin.clone();
|
||||
Box::pin(async move {
|
||||
sleep(TokioDuration::from_millis(1000)).await;
|
||||
reject_yahoo_cookies(&client).await?;
|
||||
sleep(TokioDuration::from_millis(1000)).await;
|
||||
extract_ticker_by_isin(&client, &isin).await
|
||||
extract_company_details(&client, &isin).await
|
||||
})
|
||||
}).await
|
||||
}
|
||||
|
||||
pub async fn extract_ticker_by_isin(
|
||||
pub async fn extract_company_details(
|
||||
client: &Client,
|
||||
_isin: &str,
|
||||
) -> Result<YahooTickerResult> {
|
||||
//let search_url = format!("https://finance.yahoo.com/lookup?s={}", isin);
|
||||
|
||||
// Check for "No results found" message
|
||||
if client.find(Locator::Css(".noData")).await.is_ok() {
|
||||
return Ok(YahooTickerResult::NoResults);
|
||||
}
|
||||
|
||||
// Wait for results table
|
||||
let table = match client
|
||||
.wait()
|
||||
.for_element(Locator::Css("table[data-test='lookup-table']"))
|
||||
.await
|
||||
{
|
||||
Ok(t) => t,
|
||||
Err(_) => return Ok(YahooTickerResult::NoResults),
|
||||
};
|
||||
) -> Result<Option<YahooCompanyDetails>> {
|
||||
// Execute the JavaScript extraction script
|
||||
let result = client.execute(YAHOO_COMPANY_EXTRACTION_JS, vec![]).await?;
|
||||
|
||||
// Find first row
|
||||
let first_row = match table
|
||||
.find(Locator::Css("tbody tr"))
|
||||
.await
|
||||
{
|
||||
Ok(row) => row,
|
||||
Err(_) => return Ok(YahooTickerResult::NoResults),
|
||||
};
|
||||
// Parse the JSON result
|
||||
let extraction: ExtractionResult = serde_json::from_value(result)
|
||||
.map_err(|e| anyhow!("Failed to parse extraction result: {}", e))?;
|
||||
|
||||
// Extract ticker from first cell
|
||||
let ticker_cell = first_row
|
||||
.find(Locator::Css("td:nth-child(1)"))
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to find ticker cell: {}", e))?;
|
||||
|
||||
let ticker = ticker_cell
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| anyhow!("Failed to get ticker text: {}", e))?
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if ticker.is_empty() {
|
||||
Ok(YahooTickerResult::NotFound)
|
||||
} else {
|
||||
Ok(YahooTickerResult::Found(ticker))
|
||||
match extraction.status.as_str() {
|
||||
"found" => {
|
||||
if let Some(ticker) = extraction.ticker {
|
||||
Ok(Some(YahooCompanyDetails {
|
||||
ticker,
|
||||
sector: extraction.sector,
|
||||
exchange: extraction.exchange,
|
||||
}))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
},
|
||||
"no_results" => Ok(None),
|
||||
"not_found" => Ok(None),
|
||||
"error" => {
|
||||
let error_msg = extraction.error_message.unwrap_or_else(|| "Unknown error".to_string());
|
||||
Err(anyhow!("JavaScript extraction error: {}", error_msg))
|
||||
},
|
||||
_ => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -276,9 +270,9 @@ pub async fn extract_earnings_events(client: &Client, ticker: &str) -> Result<Ve
|
||||
}
|
||||
|
||||
if events.is_empty() {
|
||||
eprintln!("Warning: No earnings events extracted for ticker {}", ticker);
|
||||
logger::log_warn(&format!("Warning: No earnings events extracted for ticker {}", ticker)).await;
|
||||
} else {
|
||||
println!("Extracted {} earnings events for {}", events.len(), ticker);
|
||||
logger::log_info(&format!("Extracted {} earnings events for {}", events.len(), ticker)).await;
|
||||
}
|
||||
|
||||
Ok(events)
|
||||
|
||||
Reference in New Issue
Block a user