added helper functions to reduce bloat

This commit is contained in:
2026-01-09 21:24:18 +01:00
parent ba841248f0
commit c6d301d434
14 changed files with 410 additions and 832 deletions

View File

@@ -1,6 +1,7 @@
// src/corporate/update_companies_cleanse.rs
use super::{helpers::*, types::*};
use crate::config::Config;
use crate::corporate::checkpoint_helpers;
use crate::util::directories::DataPaths;
use crate::util::logger;
use crate::scraper::yahoo::{YahooClientPool, QuoteSummaryModule};
@@ -271,29 +272,8 @@ pub async fn companies_yahoo_cleansed_low_profile(
logger::log_info(" ✓ All companies already processed").await;
// Consolidate log into checkpoint before exiting
if log_path.exists() {
let log_metadata = tokio::fs::metadata(&log_path).await.ok();
if log_metadata.map(|m| m.len() > 0).unwrap_or(false) {
logger::log_info(" Consolidating update log into checkpoint...").await;
let temp_checkpoint = checkpoint_path.with_extension("tmp");
let mut temp_file = File::create(&temp_checkpoint).await?;
for company in existing_companies.values() {
let json_line = serde_json::to_string(company)?;
temp_file.write_all(json_line.as_bytes()).await?;
temp_file.write_all(b"\n").await?;
}
temp_file.flush().await?;
temp_file.sync_data().await?;
drop(temp_file);
tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
tokio::fs::remove_file(&log_path).await.ok();
logger::log_info(&format!(" ✓ Consolidated {} companies", existing_companies.len())).await;
}
if checkpoint_helpers::log_has_content(&log_path).await {
checkpoint_helpers::consolidate_checkpoint(&checkpoint_path, &log_path, &existing_companies).await?;
}
return Ok(existing_companies.len());
@@ -851,37 +831,6 @@ fn is_transient_error(error: &str) -> bool {
true
}
/// Load companies from JSONL file
async fn load_companies_from_jsonl(path: &std::path::Path) -> anyhow::Result<Vec<CompanyCrossPlatformInfo>> {
let content = tokio::fs::read_to_string(path).await?;
let mut companies = Vec::new();
for line in content.lines() {
if line.trim().is_empty() {
continue;
}
if let Ok(company) = serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
companies.push(company);
}
}
Ok(companies)
}
fn extract_first_yahoo_ticker(company: &CompanyCrossPlatformInfo) -> Option<String> {
for tickers in company.isin_tickers_map.values() {
for ticker in tickers {
if ticker.starts_with("YAHOO:")
&& ticker != "YAHOO:NO_RESULTS"
&& ticker != "YAHOO:ERROR"
{
return Some(ticker.trim_start_matches("YAHOO:").to_string());
}
}
}
None
}
fn extract_market_cap(summary: &crate::scraper::yahoo::QuoteSummary) -> f64 {
let price_module = match summary.modules.get("price") {
Some(m) => m,
@@ -946,16 +895,7 @@ async fn save_company_core_data(
) -> anyhow::Result<()> {
use tokio::fs;
let safe_name = company_name
.replace("/", "_")
.replace("\\", "_")
.replace(":", "_")
.replace("*", "_")
.replace("?", "_")
.replace("\"", "_")
.replace("<", "_")
.replace(">", "_")
.replace("|", "_");
let safe_name = sanitize_company_name(company_name);
let company_dir = paths.corporate_dir().join(&safe_name).join("core");
fs::create_dir_all(&company_dir).await?;