removed crossplatformcompany from types
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
//! This module extracts common patterns used across multiple update modules
|
//! This module extracts common patterns used across multiple update modules
|
||||||
//! to reduce code duplication and improve maintainability.
|
//! to reduce code duplication and improve maintainability.
|
||||||
|
|
||||||
use super::types::CompanyCrossPlatformData;
|
use super::types::CompanyData;
|
||||||
use crate::util::logger;
|
use crate::util::logger;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::path::{Path};
|
use std::path::{Path};
|
||||||
@@ -22,7 +22,7 @@ pub async fn load_checkpoint_with_log<P1, P2>(
|
|||||||
checkpoint_path: P1,
|
checkpoint_path: P1,
|
||||||
log_path: P2,
|
log_path: P2,
|
||||||
checkpoint_desc: &str,
|
checkpoint_desc: &str,
|
||||||
) -> Result<HashMap<String, CompanyCrossPlatformData>>
|
) -> Result<HashMap<String, CompanyData>>
|
||||||
where
|
where
|
||||||
P1: AsRef<Path>,
|
P1: AsRef<Path>,
|
||||||
P2: AsRef<Path>,
|
P2: AsRef<Path>,
|
||||||
@@ -30,7 +30,7 @@ where
|
|||||||
let checkpoint_path = checkpoint_path.as_ref();
|
let checkpoint_path = checkpoint_path.as_ref();
|
||||||
let log_path = log_path.as_ref();
|
let log_path = log_path.as_ref();
|
||||||
|
|
||||||
let mut companies: HashMap<String, CompanyCrossPlatformData> = HashMap::new();
|
let mut companies: HashMap<String, CompanyData> = HashMap::new();
|
||||||
|
|
||||||
// Load checkpoint if it exists
|
// Load checkpoint if it exists
|
||||||
if checkpoint_path.exists() {
|
if checkpoint_path.exists() {
|
||||||
@@ -42,7 +42,7 @@ where
|
|||||||
continue; // Skip incomplete lines
|
continue; // Skip incomplete lines
|
||||||
}
|
}
|
||||||
|
|
||||||
match serde_json::from_str::<CompanyCrossPlatformData>(line) {
|
match serde_json::from_str::<CompanyData>(line) {
|
||||||
Ok(company) => {
|
Ok(company) => {
|
||||||
companies.insert(company.name.clone(), company);
|
companies.insert(company.name.clone(), company);
|
||||||
}
|
}
|
||||||
@@ -65,7 +65,7 @@ where
|
|||||||
continue; // Skip incomplete lines
|
continue; // Skip incomplete lines
|
||||||
}
|
}
|
||||||
|
|
||||||
match serde_json::from_str::<CompanyCrossPlatformData>(line) {
|
match serde_json::from_str::<CompanyData>(line) {
|
||||||
Ok(company) => {
|
Ok(company) => {
|
||||||
companies.insert(company.name.clone(), company);
|
companies.insert(company.name.clone(), company);
|
||||||
replayed += 1;
|
replayed += 1;
|
||||||
@@ -91,7 +91,7 @@ where
|
|||||||
pub async fn consolidate_checkpoint<P1, P2>(
|
pub async fn consolidate_checkpoint<P1, P2>(
|
||||||
checkpoint_path: P1,
|
checkpoint_path: P1,
|
||||||
log_path: P2,
|
log_path: P2,
|
||||||
companies: &HashMap<String, CompanyCrossPlatformData>,
|
companies: &HashMap<String, CompanyData>,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
P1: AsRef<Path>,
|
P1: AsRef<Path>,
|
||||||
|
|||||||
@@ -79,14 +79,16 @@ pub fn choose_random<T: Clone>(items: &[T]) -> T {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Extract first valid Yahoo ticker from company
|
/// Extract first valid Yahoo ticker from company
|
||||||
pub fn extract_first_yahoo_ticker(company: &CompanyCrossPlatformData) -> Option<String> {
|
pub fn extract_first_yahoo_ticker(company: &CompanyData) -> Option<String> {
|
||||||
for tickers in company.isin_tickers_map.values() {
|
if let Some(isin_tickers_map) = &company.isin_tickers_map {
|
||||||
for ticker in tickers {
|
for tickers in isin_tickers_map.values() {
|
||||||
if ticker.starts_with("YAHOO:")
|
for ticker in tickers {
|
||||||
&& ticker != "YAHOO:NO_RESULTS"
|
if ticker.starts_with("YAHOO:")
|
||||||
&& ticker != "YAHOO:ERROR"
|
&& ticker != "YAHOO:NO_RESULTS"
|
||||||
{
|
&& ticker != "YAHOO:ERROR"
|
||||||
return Some(ticker.trim_start_matches("YAHOO:").to_string());
|
{
|
||||||
|
return Some(ticker.trim_start_matches("YAHOO:").to_string());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -109,7 +111,7 @@ pub fn sanitize_company_name(name: &str) -> String {
|
|||||||
/// Load companies from JSONL file
|
/// Load companies from JSONL file
|
||||||
pub async fn load_companies_from_jsonl(
|
pub async fn load_companies_from_jsonl(
|
||||||
path: &std::path::Path
|
path: &std::path::Path
|
||||||
) -> anyhow::Result<Vec<CompanyCrossPlatformData>> {
|
) -> anyhow::Result<Vec<CompanyData>> {
|
||||||
let content = tokio::fs::read_to_string(path).await?;
|
let content = tokio::fs::read_to_string(path).await?;
|
||||||
let mut companies = Vec::new();
|
let mut companies = Vec::new();
|
||||||
|
|
||||||
@@ -117,7 +119,7 @@ pub async fn load_companies_from_jsonl(
|
|||||||
if line.trim().is_empty() {
|
if line.trim().is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if let Ok(company) = serde_json::from_str::<CompanyCrossPlatformData>(line) {
|
if let Ok(company) = serde_json::from_str::<CompanyData>(line) {
|
||||||
companies.push(company);
|
companies.push(company);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -52,12 +52,14 @@ pub struct FigiData {
|
|||||||
/// * Name as primary key (for one institution) -> might have to changed when first FigiInfo is coming in
|
/// * Name as primary key (for one institution) -> might have to changed when first FigiInfo is coming in
|
||||||
/// * ISIN as the most liquid / preferred traded security (used for fallback)
|
/// * ISIN as the most liquid / preferred traded security (used for fallback)
|
||||||
/// * securities: Grouped by ISIN, filtered for Common Stock only
|
/// * securities: Grouped by ISIN, filtered for Common Stock only
|
||||||
|
/// * isin_tickers_map: Map of ISINs to their associated tickers across platforms
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct CompanyData{
|
pub struct CompanyData{
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub primary_isin: String,
|
pub primary_isin: String,
|
||||||
pub securities: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo>
|
pub securities: HashMap<String, Vec<FigiData>>, // ISIN -> Vec<FigiInfo>
|
||||||
pub yahoo_company_data: Option<Vec<YahooCompanyData>>,
|
pub yahoo_company_data: Option<Vec<YahooCompanyData>>,
|
||||||
|
pub isin_tickers_map: Option<HashMap<String, Vec<String>>>, // ISIN -> Tickers
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -67,14 +69,6 @@ pub struct YahooCompanyData {
|
|||||||
pub exchange: Option<String>,
|
pub exchange: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct CompanyCrossPlatformData {
|
|
||||||
pub name: String,
|
|
||||||
pub isin_tickers_map: HashMap<String, Vec<String>>, // ISIN -> Tickers
|
|
||||||
pub sector: Option<String>,
|
|
||||||
pub exchange: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct WarrantData {
|
pub struct WarrantData {
|
||||||
pub company_name: String, // key in CompanyData
|
pub company_name: String, // key in CompanyData
|
||||||
|
|||||||
@@ -20,14 +20,14 @@ use anyhow::{anyhow, Result};
|
|||||||
|
|
||||||
/// Represents a write command to be serialized through the log writer
|
/// Represents a write command to be serialized through the log writer
|
||||||
enum LogCommand {
|
enum LogCommand {
|
||||||
Write(CompanyCrossPlatformData),
|
Write(CompanyData),
|
||||||
Checkpoint,
|
Checkpoint,
|
||||||
Shutdown,
|
Shutdown,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Result from processing a single company
|
/// Result from processing a single company
|
||||||
struct CompanyProcessResult {
|
struct CompanyProcessResult {
|
||||||
company: CompanyCrossPlatformData,
|
company: CompanyData,
|
||||||
is_update: bool,
|
is_update: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ struct CompanyProcessResult {
|
|||||||
fn company_needs_processing(
|
fn company_needs_processing(
|
||||||
company_name: &str,
|
company_name: &str,
|
||||||
company_info: &CompanyData,
|
company_info: &CompanyData,
|
||||||
existing_companies: &HashMap<String, CompanyCrossPlatformData>,
|
existing_companies: &HashMap<String, CompanyData>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
// If company not in existing data at all, definitely needs processing
|
// If company not in existing data at all, definitely needs processing
|
||||||
let Some(existing_entry) = existing_companies.get(company_name) else {
|
let Some(existing_entry) = existing_companies.get(company_name) else {
|
||||||
@@ -56,20 +56,25 @@ fn company_needs_processing(
|
|||||||
// Check each required ISIN
|
// Check each required ISIN
|
||||||
for isin in required_isins {
|
for isin in required_isins {
|
||||||
// Check if this ISIN exists in the company's ticker map
|
// Check if this ISIN exists in the company's ticker map
|
||||||
if let Some(tickers) = existing_entry.isin_tickers_map.get(&isin) {
|
if let Some(map) = &existing_entry.isin_tickers_map {
|
||||||
// Check if this ISIN has valid Yahoo data
|
if let Some(tickers) = map.get(&isin) {
|
||||||
let has_valid_yahoo = tickers.iter().any(|t| {
|
// Check if this ISIN has valid Yahoo data
|
||||||
t.starts_with("YAHOO:") &&
|
let has_valid_yahoo = tickers.iter().any(|t| {
|
||||||
t != "YAHOO:ERROR" //&& // Error marker means needs retry
|
t.starts_with("YAHOO:") &&
|
||||||
//t != "YAHOO:NO_RESULTS" // This is actually valid (legitimately not found)
|
t != "YAHOO:ERROR" //&& // Error marker means needs retry
|
||||||
});
|
//t != "YAHOO:NO_RESULTS" // This is actually valid (legitimately not found)
|
||||||
|
});
|
||||||
|
|
||||||
// If no valid Yahoo data for this ISIN, company needs processing
|
// If no valid Yahoo data for this ISIN, company needs processing
|
||||||
if !has_valid_yahoo {
|
if !has_valid_yahoo {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// ISIN not in map at all, needs processing
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// ISIN not in map at all, needs processing
|
// No isin_tickers_map at all, needs processing
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -731,7 +736,7 @@ async fn scrape_with_retry(
|
|||||||
async fn process_single_company_validated(
|
async fn process_single_company_validated(
|
||||||
name: String,
|
name: String,
|
||||||
company_info: CompanyData,
|
company_info: CompanyData,
|
||||||
existing_entry: Option<CompanyCrossPlatformData>,
|
existing_entry: Option<CompanyData>,
|
||||||
pool: &Arc<ChromeDriverPool>,
|
pool: &Arc<ChromeDriverPool>,
|
||||||
shutdown_flag: &Arc<AtomicBool>,
|
shutdown_flag: &Arc<AtomicBool>,
|
||||||
) -> anyhow::Result<Option<CompanyProcessResult>> {
|
) -> anyhow::Result<Option<CompanyProcessResult>> {
|
||||||
@@ -746,12 +751,9 @@ async fn process_single_company_validated(
|
|||||||
let mut isin_tickers_map: HashMap<String, Vec<String>> =
|
let mut isin_tickers_map: HashMap<String, Vec<String>> =
|
||||||
existing_entry
|
existing_entry
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|e| e.isin_tickers_map.clone())
|
.and_then(|e| e.isin_tickers_map.clone())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
|
|
||||||
let mut sector = existing_entry.as_ref().and_then(|e| e.sector.clone());
|
|
||||||
let mut exchange = existing_entry.as_ref().and_then(|e| e.exchange.clone());
|
|
||||||
|
|
||||||
// Collect unique ISIN-ticker pairs
|
// Collect unique ISIN-ticker pairs
|
||||||
let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
|
let mut unique_isin_ticker_pairs: HashMap<String, Vec<String>> = HashMap::new();
|
||||||
|
|
||||||
@@ -808,16 +810,6 @@ async fn process_single_company_validated(
|
|||||||
)).await;
|
)).await;
|
||||||
|
|
||||||
tickers.push(format!("YAHOO:{}", details.ticker));
|
tickers.push(format!("YAHOO:{}", details.ticker));
|
||||||
|
|
||||||
if sector.is_none() && details.sector.is_some() {
|
|
||||||
sector = details.sector.clone();
|
|
||||||
logger::log_info(&format!(" Sector: {}", details.sector.as_ref().unwrap())).await;
|
|
||||||
}
|
|
||||||
|
|
||||||
if exchange.is_none() && details.exchange.is_some() {
|
|
||||||
exchange = details.exchange.clone();
|
|
||||||
logger::log_info(&format!(" Exchange: {}", details.exchange.as_ref().unwrap())).await;
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
logger::log_warn(&format!("◯ No search results for ISIN {} (company: {})", isin, name)).await;
|
logger::log_warn(&format!("◯ No search results for ISIN {} (company: {})", isin, name)).await;
|
||||||
@@ -866,11 +858,12 @@ async fn process_single_company_validated(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !isin_tickers_map.is_empty() {
|
if !isin_tickers_map.is_empty() {
|
||||||
let company_entry = CompanyCrossPlatformData {
|
let company_entry = CompanyData {
|
||||||
name: name.clone(),
|
name: name.clone(),
|
||||||
isin_tickers_map,
|
primary_isin: company_info.primary_isin.clone(),
|
||||||
sector,
|
securities: company_info.securities.clone(),
|
||||||
exchange,
|
yahoo_company_data: company_info.yahoo_company_data.clone(),
|
||||||
|
isin_tickers_map: Some(isin_tickers_map),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Some(CompanyProcessResult {
|
Ok(Some(CompanyProcessResult {
|
||||||
|
|||||||
@@ -20,15 +20,15 @@ use tokio::sync::mpsc;
|
|||||||
/// Result of processing a single company
|
/// Result of processing a single company
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum CompanyProcessResult {
|
pub enum CompanyProcessResult {
|
||||||
Valid(CompanyCrossPlatformData),
|
Valid(CompanyData),
|
||||||
FilteredLowCap { name: String, market_cap: f64 },
|
FilteredLowCap { name: String, market_cap: f64 },
|
||||||
FilteredNoPrice { name: String },
|
FilteredNoPrice { name: String },
|
||||||
Failed { company: CompanyCrossPlatformData, error: String, is_transient: bool },
|
Failed { company: CompanyData, error: String, is_transient: bool },
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Represents a write command to be serialized through the log writer
|
/// Represents a write command to be serialized through the log writer
|
||||||
enum LogCommand {
|
enum LogCommand {
|
||||||
Write(CompanyCrossPlatformData),
|
Write(CompanyData),
|
||||||
Checkpoint,
|
Checkpoint,
|
||||||
Shutdown,
|
Shutdown,
|
||||||
}
|
}
|
||||||
@@ -81,7 +81,7 @@ pub async fn companies_yahoo_cleansed_no_data(paths: &DataPaths) -> Result<usize
|
|||||||
|
|
||||||
total_count += 1;
|
total_count += 1;
|
||||||
|
|
||||||
let company: CompanyCrossPlatformData = match serde_json::from_str(&line) {
|
let company: CompanyData = match serde_json::from_str(&line) {
|
||||||
Ok(c) => c,
|
Ok(c) => c,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
logger::log_warn(&format!(" Failed to parse company on line {}: {}", total_count, e)).await;
|
logger::log_warn(&format!(" Failed to parse company on line {}: {}", total_count, e)).await;
|
||||||
@@ -90,13 +90,17 @@ pub async fn companies_yahoo_cleansed_no_data(paths: &DataPaths) -> Result<usize
|
|||||||
};
|
};
|
||||||
|
|
||||||
let has_valid_yahoo = company.isin_tickers_map
|
let has_valid_yahoo = company.isin_tickers_map
|
||||||
.values()
|
.as_ref()
|
||||||
.flatten()
|
.map(|map| {
|
||||||
.any(|ticker| {
|
map.values()
|
||||||
ticker.starts_with("YAHOO:")
|
.flatten()
|
||||||
&& ticker != "YAHOO:NO_RESULTS"
|
.any(|ticker| {
|
||||||
&& ticker != "YAHOO:ERROR"
|
ticker.starts_with("YAHOO:")
|
||||||
});
|
&& ticker != "YAHOO:NO_RESULTS"
|
||||||
|
&& ticker != "YAHOO:ERROR"
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.unwrap_or(false);
|
||||||
|
|
||||||
if has_valid_yahoo {
|
if has_valid_yahoo {
|
||||||
let json_line = serde_json::to_string(&company)?;
|
let json_line = serde_json::to_string(&company)?;
|
||||||
@@ -194,7 +198,7 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
|||||||
logger::log_info(" Cleansing companies with low Yahoo profile...").await;
|
logger::log_info(" Cleansing companies with low Yahoo profile...").await;
|
||||||
|
|
||||||
// === RECOVERY PHASE: Load checkpoint + replay log ===
|
// === RECOVERY PHASE: Load checkpoint + replay log ===
|
||||||
let mut existing_companies: HashMap<String, CompanyCrossPlatformData> = HashMap::new();
|
let mut existing_companies: HashMap<String, CompanyData> = HashMap::new();
|
||||||
let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
|
let mut processed_names: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||||
|
|
||||||
if checkpoint_path.exists() {
|
if checkpoint_path.exists() {
|
||||||
@@ -206,7 +210,7 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
|||||||
continue; // Skip incomplete lines
|
continue; // Skip incomplete lines
|
||||||
}
|
}
|
||||||
|
|
||||||
match serde_json::from_str::<CompanyCrossPlatformData>(line) {
|
match serde_json::from_str::<CompanyData>(line) {
|
||||||
Ok(company) => {
|
Ok(company) => {
|
||||||
processed_names.insert(company.name.clone());
|
processed_names.insert(company.name.clone());
|
||||||
existing_companies.insert(company.name.clone(), company);
|
existing_companies.insert(company.name.clone(), company);
|
||||||
@@ -229,7 +233,7 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
|||||||
continue; // Skip incomplete lines
|
continue; // Skip incomplete lines
|
||||||
}
|
}
|
||||||
|
|
||||||
match serde_json::from_str::<CompanyCrossPlatformData>(line) {
|
match serde_json::from_str::<CompanyData>(line) {
|
||||||
Ok(company) => {
|
Ok(company) => {
|
||||||
processed_names.insert(company.name.clone());
|
processed_names.insert(company.name.clone());
|
||||||
existing_companies.insert(company.name.clone(), company);
|
existing_companies.insert(company.name.clone(), company);
|
||||||
@@ -251,7 +255,7 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
|||||||
logger::log_info(&format!("Loaded {} companies from input", input_companies.len())).await;
|
logger::log_info(&format!("Loaded {} companies from input", input_companies.len())).await;
|
||||||
|
|
||||||
// === BUILD PENDING LIST (smart skip logic) ===
|
// === BUILD PENDING LIST (smart skip logic) ===
|
||||||
let mut pending: Vec<CompanyCrossPlatformData> = input_companies
|
let mut pending: Vec<CompanyData> = input_companies
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|company| company_needs_processing(company, &existing_companies))
|
.filter(|company| company_needs_processing(company, &existing_companies))
|
||||||
.collect();
|
.collect();
|
||||||
@@ -608,7 +612,7 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
|||||||
|
|
||||||
/// Helper function to spawn a validation task (reduces code duplication)
|
/// Helper function to spawn a validation task (reduces code duplication)
|
||||||
fn spawn_validation_task(
|
fn spawn_validation_task(
|
||||||
company: CompanyCrossPlatformData,
|
company: CompanyData,
|
||||||
yahoo_pool: &Arc<YahooClientPool>,
|
yahoo_pool: &Arc<YahooClientPool>,
|
||||||
paths: &Arc<DataPaths>,
|
paths: &Arc<DataPaths>,
|
||||||
write_tx: &mpsc::Sender<LogCommand>,
|
write_tx: &mpsc::Sender<LogCommand>,
|
||||||
@@ -688,7 +692,7 @@ fn spawn_validation_task(
|
|||||||
|
|
||||||
/// Process a single company with full error categorization
|
/// Process a single company with full error categorization
|
||||||
async fn process_company_with_validation(
|
async fn process_company_with_validation(
|
||||||
company: &CompanyCrossPlatformData,
|
company: &CompanyData,
|
||||||
yahoo_pool: &Arc<YahooClientPool>,
|
yahoo_pool: &Arc<YahooClientPool>,
|
||||||
paths: &DataPaths,
|
paths: &DataPaths,
|
||||||
) -> CompanyProcessResult {
|
) -> CompanyProcessResult {
|
||||||
@@ -897,8 +901,8 @@ async fn save_company_core_data(
|
|||||||
|
|
||||||
/// Check if a company needs processing (validation check)
|
/// Check if a company needs processing (validation check)
|
||||||
fn company_needs_processing(
|
fn company_needs_processing(
|
||||||
company: &CompanyCrossPlatformData,
|
company: &CompanyData,
|
||||||
existing_companies: &HashMap<String, CompanyCrossPlatformData>,
|
existing_companies: &HashMap<String, CompanyData>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
// If company exists in cleaned output, skip it
|
// If company exists in cleaned output, skip it
|
||||||
!existing_companies.contains_key(&company.name)
|
!existing_companies.contains_key(&company.name)
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ enum LogCommand {
|
|||||||
|
|
||||||
/// Type alias for enrichment function
|
/// Type alias for enrichment function
|
||||||
type EnrichmentFn = Arc<
|
type EnrichmentFn = Arc<
|
||||||
dyn Fn(CompanyCrossPlatformData, Arc<YahooClientPool>, DataPaths)
|
dyn Fn(CompanyData, Arc<YahooClientPool>, DataPaths)
|
||||||
-> Pin<Box<dyn Future<Output = anyhow::Result<()>> + Send>>
|
-> Pin<Box<dyn Future<Output = anyhow::Result<()>> + Send>>
|
||||||
+ Send
|
+ Send
|
||||||
+ Sync
|
+ Sync
|
||||||
@@ -104,7 +104,7 @@ pub async fn enrich_companies_with_events(
|
|||||||
logger::log_info(&format!("Found {} companies to process", total_companies)).await;
|
logger::log_info(&format!("Found {} companies to process", total_companies)).await;
|
||||||
|
|
||||||
// Filter companies that need enrichment
|
// Filter companies that need enrichment
|
||||||
let pending_companies: Vec<CompanyCrossPlatformData> = companies
|
let pending_companies: Vec<CompanyData> = companies
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|company| !enriched_companies.contains(&company.name))
|
.filter(|company| !enriched_companies.contains(&company.name))
|
||||||
.collect();
|
.collect();
|
||||||
@@ -283,7 +283,7 @@ async fn track_events_completion(
|
|||||||
|
|
||||||
/// Enrich a single company with event data
|
/// Enrich a single company with event data
|
||||||
async fn enrich_company_with_events(
|
async fn enrich_company_with_events(
|
||||||
company: &CompanyCrossPlatformData,
|
company: &CompanyData,
|
||||||
yahoo_pool: &Arc<YahooClientPool>,
|
yahoo_pool: &Arc<YahooClientPool>,
|
||||||
paths: &DataPaths,
|
paths: &DataPaths,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
@@ -438,7 +438,7 @@ pub async fn enrich_companies_with_option(
|
|||||||
logger::log_info(&format!("Found {} companies to process", total_companies)).await;
|
logger::log_info(&format!("Found {} companies to process", total_companies)).await;
|
||||||
|
|
||||||
// Filter companies that need enrichment
|
// Filter companies that need enrichment
|
||||||
let pending_companies: Vec<CompanyCrossPlatformData> = companies
|
let pending_companies: Vec<CompanyData> = companies
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|company| !enriched_companies.contains(&company.name))
|
.filter(|company| !enriched_companies.contains(&company.name))
|
||||||
.collect();
|
.collect();
|
||||||
@@ -605,7 +605,7 @@ async fn track_option_completion(
|
|||||||
|
|
||||||
/// Enrich a single company with option data
|
/// Enrich a single company with option data
|
||||||
async fn enrich_company_with_option(
|
async fn enrich_company_with_option(
|
||||||
company: &CompanyCrossPlatformData,
|
company: &CompanyData,
|
||||||
yahoo_pool: &Arc<YahooClientPool>,
|
yahoo_pool: &Arc<YahooClientPool>,
|
||||||
paths: &DataPaths,
|
paths: &DataPaths,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
@@ -697,7 +697,7 @@ pub async fn enrich_companies_with_chart(
|
|||||||
logger::log_info(&format!("Found {} companies to process", total_companies)).await;
|
logger::log_info(&format!("Found {} companies to process", total_companies)).await;
|
||||||
|
|
||||||
// Filter companies that need enrichment
|
// Filter companies that need enrichment
|
||||||
let pending_companies: Vec<CompanyCrossPlatformData> = companies
|
let pending_companies: Vec<CompanyData> = companies
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(|company| !enriched_companies.contains(&company.name))
|
.filter(|company| !enriched_companies.contains(&company.name))
|
||||||
.collect();
|
.collect();
|
||||||
@@ -864,7 +864,7 @@ async fn track_chart_completion(
|
|||||||
|
|
||||||
/// Enrich a single company with chart data
|
/// Enrich a single company with chart data
|
||||||
async fn enrich_company_with_chart(
|
async fn enrich_company_with_chart(
|
||||||
company: &CompanyCrossPlatformData,
|
company: &CompanyData,
|
||||||
yahoo_pool: &Arc<YahooClientPool>,
|
yahoo_pool: &Arc<YahooClientPool>,
|
||||||
paths: &DataPaths,
|
paths: &DataPaths,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
@@ -1005,7 +1005,7 @@ fn spawn_log_writer(
|
|||||||
/// - `shutdown_flag`: Flag to signal shutdown
|
/// - `shutdown_flag`: Flag to signal shutdown
|
||||||
/// - `enrichment_fn`: The specific enrichment function to call (events, option, chart, etc.)
|
/// - `enrichment_fn`: The specific enrichment function to call (events, option, chart, etc.)
|
||||||
fn spawn_enrichment_task(
|
fn spawn_enrichment_task(
|
||||||
company: CompanyCrossPlatformData,
|
company: CompanyData,
|
||||||
yahoo_pool: Arc<YahooClientPool>,
|
yahoo_pool: Arc<YahooClientPool>,
|
||||||
paths: DataPaths,
|
paths: DataPaths,
|
||||||
processed_count: Arc<AtomicUsize>,
|
processed_count: Arc<AtomicUsize>,
|
||||||
|
|||||||
@@ -655,6 +655,7 @@ fn prepare_common_stock_entry(
|
|||||||
primary_isin,
|
primary_isin,
|
||||||
securities: grouped_by_isin,
|
securities: grouped_by_isin,
|
||||||
yahoo_company_data: None,
|
yahoo_company_data: None,
|
||||||
|
isin_tickers_map: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -303,9 +303,11 @@ pub async fn get_all_tickers_from_companies_jsonl(paths: &DataPaths) -> anyhow::
|
|||||||
let content = tokio::fs::read_to_string(companies_file).await?;
|
let content = tokio::fs::read_to_string(companies_file).await?;
|
||||||
let mut tickers = Vec::new();
|
let mut tickers = Vec::new();
|
||||||
for line in content.lines() {
|
for line in content.lines() {
|
||||||
let company: CompanyCrossPlatformData = serde_json::from_str(line)?;
|
let company: CompanyData = serde_json::from_str(line)?;
|
||||||
for (_isin, ticker_vec) in company.isin_tickers_map {
|
if let Some(isin_tickers_map) = company.isin_tickers_map {
|
||||||
tickers.extend(ticker_vec);
|
for (_isin, ticker_vec) in isin_tickers_map {
|
||||||
|
tickers.extend(ticker_vec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(tickers)
|
Ok(tickers)
|
||||||
|
|||||||
Reference in New Issue
Block a user