added helper functions to reduce bloat
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
// src/corporate/update_companies_cleanse.rs
|
||||
use super::{helpers::*, types::*};
|
||||
use crate::config::Config;
|
||||
use crate::corporate::checkpoint_helpers;
|
||||
use crate::util::directories::DataPaths;
|
||||
use crate::util::logger;
|
||||
use crate::scraper::yahoo::{YahooClientPool, QuoteSummaryModule};
|
||||
@@ -271,29 +272,8 @@ pub async fn companies_yahoo_cleansed_low_profile(
|
||||
logger::log_info(" ✓ All companies already processed").await;
|
||||
|
||||
// Consolidate log into checkpoint before exiting
|
||||
if log_path.exists() {
|
||||
let log_metadata = tokio::fs::metadata(&log_path).await.ok();
|
||||
if log_metadata.map(|m| m.len() > 0).unwrap_or(false) {
|
||||
logger::log_info(" Consolidating update log into checkpoint...").await;
|
||||
|
||||
let temp_checkpoint = checkpoint_path.with_extension("tmp");
|
||||
let mut temp_file = File::create(&temp_checkpoint).await?;
|
||||
|
||||
for company in existing_companies.values() {
|
||||
let json_line = serde_json::to_string(company)?;
|
||||
temp_file.write_all(json_line.as_bytes()).await?;
|
||||
temp_file.write_all(b"\n").await?;
|
||||
}
|
||||
|
||||
temp_file.flush().await?;
|
||||
temp_file.sync_data().await?;
|
||||
drop(temp_file);
|
||||
|
||||
tokio::fs::rename(&temp_checkpoint, &checkpoint_path).await?;
|
||||
tokio::fs::remove_file(&log_path).await.ok();
|
||||
|
||||
logger::log_info(&format!(" ✓ Consolidated {} companies", existing_companies.len())).await;
|
||||
}
|
||||
if checkpoint_helpers::log_has_content(&log_path).await {
|
||||
checkpoint_helpers::consolidate_checkpoint(&checkpoint_path, &log_path, &existing_companies).await?;
|
||||
}
|
||||
|
||||
return Ok(existing_companies.len());
|
||||
@@ -851,37 +831,6 @@ fn is_transient_error(error: &str) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
/// Load companies from JSONL file
|
||||
async fn load_companies_from_jsonl(path: &std::path::Path) -> anyhow::Result<Vec<CompanyCrossPlatformInfo>> {
|
||||
let content = tokio::fs::read_to_string(path).await?;
|
||||
let mut companies = Vec::new();
|
||||
|
||||
for line in content.lines() {
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
if let Ok(company) = serde_json::from_str::<CompanyCrossPlatformInfo>(line) {
|
||||
companies.push(company);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(companies)
|
||||
}
|
||||
|
||||
fn extract_first_yahoo_ticker(company: &CompanyCrossPlatformInfo) -> Option<String> {
|
||||
for tickers in company.isin_tickers_map.values() {
|
||||
for ticker in tickers {
|
||||
if ticker.starts_with("YAHOO:")
|
||||
&& ticker != "YAHOO:NO_RESULTS"
|
||||
&& ticker != "YAHOO:ERROR"
|
||||
{
|
||||
return Some(ticker.trim_start_matches("YAHOO:").to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_market_cap(summary: &crate::scraper::yahoo::QuoteSummary) -> f64 {
|
||||
let price_module = match summary.modules.get("price") {
|
||||
Some(m) => m,
|
||||
@@ -946,16 +895,7 @@ async fn save_company_core_data(
|
||||
) -> anyhow::Result<()> {
|
||||
use tokio::fs;
|
||||
|
||||
let safe_name = company_name
|
||||
.replace("/", "_")
|
||||
.replace("\\", "_")
|
||||
.replace(":", "_")
|
||||
.replace("*", "_")
|
||||
.replace("?", "_")
|
||||
.replace("\"", "_")
|
||||
.replace("<", "_")
|
||||
.replace(">", "_")
|
||||
.replace("|", "_");
|
||||
let safe_name = sanitize_company_name(company_name);
|
||||
|
||||
let company_dir = paths.corporate_dir().join(&safe_name).join("core");
|
||||
fs::create_dir_all(&company_dir).await?;
|
||||
|
||||
Reference in New Issue
Block a user