346 lines
11 KiB
Rust
346 lines
11 KiB
Rust
// src/corporate/atomic_writer.rs
|
|
//
|
|
// Atomic JSONL writer that prevents partial/corrupted results from being written
|
|
|
|
use anyhow::Result;
|
|
use serde::Serialize;
|
|
use std::collections::HashMap;
|
|
use std::path::PathBuf;
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
use std::sync::Arc;
|
|
use tokio::fs::{File, OpenOptions};
|
|
use tokio::io::AsyncWriteExt;
|
|
use tokio::sync::mpsc;
|
|
|
|
/// Command to write or validate data
|
|
#[derive(Debug)]
|
|
pub enum WriteCommand<T> {
|
|
/// Stage a result for writing (held in memory until committed)
|
|
Stage { id: String, data: T },
|
|
|
|
/// Commit staged result to disk (atomic write)
|
|
Commit { id: String },
|
|
|
|
/// Rollback staged result (discard without writing)
|
|
Rollback { id: String },
|
|
|
|
/// Commit all pending staged results and flush
|
|
CommitAll,
|
|
|
|
/// Shutdown writer gracefully (only commits valid staged results)
|
|
Shutdown,
|
|
}
|
|
|
|
/// Result of a write operation
|
|
#[derive(Debug)]
|
|
pub struct WriteResult {
|
|
pub id: String,
|
|
pub success: bool,
|
|
pub error: Option<String>,
|
|
}
|
|
|
|
/// Atomic writer that prevents partial results from being written
|
|
pub struct AtomicJsonlWriter<T> {
|
|
file: File,
|
|
staged: HashMap<String, T>,
|
|
committed_count: usize,
|
|
rollback_count: usize,
|
|
}
|
|
|
|
impl<T: Serialize + Clone> AtomicJsonlWriter<T> {
|
|
pub async fn new(path: PathBuf) -> Result<Self> {
|
|
// Ensure parent directory exists
|
|
if let Some(parent) = path.parent() {
|
|
tokio::fs::create_dir_all(parent).await?;
|
|
}
|
|
|
|
let file = OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(&path)
|
|
.await?;
|
|
|
|
crate::util::logger::log_info(&format!(
|
|
"Atomic writer initialized: {:?}",
|
|
path
|
|
)).await;
|
|
|
|
Ok(Self {
|
|
file,
|
|
staged: HashMap::new(),
|
|
committed_count: 0,
|
|
rollback_count: 0,
|
|
})
|
|
}
|
|
|
|
/// Stage data for writing (held in memory, not yet written)
|
|
pub async fn stage(&mut self, id: String, data: T) {
|
|
crate::util::logger::log_info(&format!(
|
|
"Staging result for: {} (total staged: {})",
|
|
id,
|
|
self.staged.len() + 1
|
|
)).await;
|
|
|
|
self.staged.insert(id, data);
|
|
}
|
|
|
|
/// Commit a staged result to disk (atomic write)
|
|
pub async fn commit(&mut self, id: &str) -> Result<()> {
|
|
if let Some(data) = self.staged.remove(id) {
|
|
// Serialize to JSON
|
|
let json_line = serde_json::to_string(&data)?;
|
|
|
|
// Write atomically (single syscall)
|
|
self.file.write_all(json_line.as_bytes()).await?;
|
|
self.file.write_all(b"\n").await?;
|
|
self.file.flush().await?;
|
|
|
|
self.committed_count += 1;
|
|
|
|
crate::util::logger::log_info(&format!(
|
|
"✓ Committed result for: {} (total committed: {})",
|
|
id, self.committed_count
|
|
)).await;
|
|
|
|
Ok(())
|
|
} else {
|
|
Err(anyhow::anyhow!("No staged result found for id: {}", id))
|
|
}
|
|
}
|
|
|
|
/// Rollback a staged result (discard without writing)
|
|
pub async fn rollback(&mut self, id: &str) {
|
|
if self.staged.remove(id).is_some() {
|
|
self.rollback_count += 1;
|
|
|
|
crate::util::logger::log_warn(&format!(
|
|
"⚠ Rolled back result for: {} (total rollbacks: {})",
|
|
id, self.rollback_count
|
|
)).await;
|
|
}
|
|
}
|
|
|
|
/// Commit all staged results
|
|
pub async fn commit_all(&mut self) -> Result<usize> {
|
|
let ids: Vec<String> = self.staged.keys().cloned().collect();
|
|
let mut committed = 0;
|
|
|
|
for id in ids {
|
|
if let Ok(()) = self.commit(&id).await {
|
|
committed += 1;
|
|
}
|
|
}
|
|
|
|
Ok(committed)
|
|
}
|
|
|
|
/// Rollback all staged results (discard everything)
|
|
pub async fn rollback_all(&mut self) -> usize {
|
|
let count = self.staged.len();
|
|
self.staged.clear();
|
|
self.rollback_count += count;
|
|
|
|
crate::util::logger::log_warn(&format!(
|
|
"⚠ Rolled back all {} staged results",
|
|
count
|
|
)).await;
|
|
|
|
count
|
|
}
|
|
|
|
/// Get statistics
|
|
pub fn stats(&self) -> WriterStats {
|
|
WriterStats {
|
|
staged_count: self.staged.len(),
|
|
committed_count: self.committed_count,
|
|
rollback_count: self.rollback_count,
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct WriterStats {
|
|
pub staged_count: usize,
|
|
pub committed_count: usize,
|
|
pub rollback_count: usize,
|
|
}
|
|
|
|
/// Managed writer service that runs in its own task
|
|
pub struct AtomicWriterService<T> {
|
|
rx: mpsc::UnboundedReceiver<WriteCommand<T>>,
|
|
writer: AtomicJsonlWriter<T>,
|
|
shutdown_flag: Arc<AtomicBool>,
|
|
}
|
|
|
|
impl<T: Serialize + Clone> AtomicWriterService<T> {
|
|
pub async fn new(
|
|
path: PathBuf,
|
|
rx: mpsc::UnboundedReceiver<WriteCommand<T>>,
|
|
shutdown_flag: Arc<AtomicBool>,
|
|
) -> Result<Self> {
|
|
let writer = AtomicJsonlWriter::new(path).await?;
|
|
|
|
Ok(Self {
|
|
rx,
|
|
writer,
|
|
shutdown_flag,
|
|
})
|
|
}
|
|
|
|
/// Main service loop
|
|
pub async fn run(mut self) {
|
|
crate::util::logger::log_info("Atomic writer service started").await;
|
|
|
|
while let Some(cmd) = self.rx.recv().await {
|
|
// Check for shutdown flag
|
|
if self.shutdown_flag.load(Ordering::SeqCst) {
|
|
crate::util::logger::log_warn(
|
|
"Shutdown detected - processing only Commit/Rollback commands"
|
|
).await;
|
|
|
|
// Only process commit/rollback commands during shutdown
|
|
match cmd {
|
|
WriteCommand::Commit { id } => {
|
|
if let Err(e) = self.writer.commit(&id).await {
|
|
crate::util::logger::log_error(&format!(
|
|
"Failed to commit {}: {}",
|
|
id, e
|
|
)).await;
|
|
}
|
|
}
|
|
WriteCommand::Rollback { id } => {
|
|
self.writer.rollback(&id).await;
|
|
}
|
|
WriteCommand::CommitAll => {
|
|
match self.writer.commit_all().await {
|
|
Ok(count) => {
|
|
crate::util::logger::log_info(&format!(
|
|
"Committed {} results during shutdown",
|
|
count
|
|
)).await;
|
|
}
|
|
Err(e) => {
|
|
crate::util::logger::log_error(&format!(
|
|
"Failed to commit all: {}",
|
|
e
|
|
)).await;
|
|
}
|
|
}
|
|
}
|
|
WriteCommand::Shutdown => break,
|
|
_ => {
|
|
// Ignore Stage commands during shutdown
|
|
crate::util::logger::log_warn(
|
|
"Ignoring new Stage command during shutdown"
|
|
).await;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Normal operation
|
|
match cmd {
|
|
WriteCommand::Stage { id, data } => {
|
|
self.writer.stage(id, data).await;
|
|
}
|
|
WriteCommand::Commit { id } => {
|
|
if let Err(e) = self.writer.commit(&id).await {
|
|
crate::util::logger::log_error(&format!(
|
|
"Failed to commit {}: {}",
|
|
id, e
|
|
)).await;
|
|
}
|
|
}
|
|
WriteCommand::Rollback { id } => {
|
|
self.writer.rollback(&id).await;
|
|
}
|
|
WriteCommand::CommitAll => {
|
|
match self.writer.commit_all().await {
|
|
Ok(count) => {
|
|
crate::util::logger::log_info(&format!(
|
|
"Committed all {} staged results",
|
|
count
|
|
)).await;
|
|
}
|
|
Err(e) => {
|
|
crate::util::logger::log_error(&format!(
|
|
"Failed to commit all: {}",
|
|
e
|
|
)).await;
|
|
}
|
|
}
|
|
}
|
|
WriteCommand::Shutdown => break,
|
|
}
|
|
}
|
|
|
|
// Final shutdown - rollback any remaining staged items
|
|
let stats = self.writer.stats();
|
|
if stats.staged_count > 0 {
|
|
crate::util::logger::log_warn(&format!(
|
|
"⚠ Shutdown with {} uncommitted results - rolling back",
|
|
stats.staged_count
|
|
)).await;
|
|
|
|
self.writer.rollback_all().await;
|
|
}
|
|
|
|
crate::util::logger::log_info(&format!(
|
|
"Atomic writer service stopped. Final stats: {} committed, {} rolled back",
|
|
stats.committed_count,
|
|
stats.rollback_count
|
|
)).await;
|
|
}
|
|
}
|
|
|
|
/// Handle for sending write commands
|
|
#[derive(Clone)]
|
|
pub struct AtomicWriterHandle<T> {
|
|
tx: mpsc::UnboundedSender<WriteCommand<T>>,
|
|
}
|
|
|
|
impl<T> AtomicWriterHandle<T> {
|
|
pub fn new(tx: mpsc::UnboundedSender<WriteCommand<T>>) -> Self {
|
|
Self { tx }
|
|
}
|
|
|
|
/// Stage data for writing (does not write immediately)
|
|
pub fn stage(&self, id: String, data: T) {
|
|
let _ = self.tx.send(WriteCommand::Stage { id, data });
|
|
}
|
|
|
|
/// Commit staged data to disk
|
|
pub fn commit(&self, id: String) {
|
|
let _ = self.tx.send(WriteCommand::Commit { id });
|
|
}
|
|
|
|
/// Rollback staged data (discard)
|
|
pub fn rollback(&self, id: String) {
|
|
let _ = self.tx.send(WriteCommand::Rollback { id });
|
|
}
|
|
|
|
/// Commit all staged data
|
|
pub fn commit_all(&self) {
|
|
let _ = self.tx.send(WriteCommand::CommitAll);
|
|
}
|
|
|
|
/// Shutdown writer gracefully
|
|
pub fn shutdown(&self) {
|
|
let _ = self.tx.send(WriteCommand::Shutdown);
|
|
}
|
|
}
|
|
|
|
/// Create atomic writer service
|
|
pub async fn create_atomic_writer<T: Serialize + Clone + Send + 'static>(
|
|
path: PathBuf,
|
|
shutdown_flag: Arc<AtomicBool>,
|
|
) -> Result<(AtomicWriterHandle<T>, tokio::task::JoinHandle<()>)> {
|
|
let (tx, rx) = mpsc::unbounded_channel();
|
|
|
|
let service = AtomicWriterService::new(path, rx, shutdown_flag).await?;
|
|
let handle = tokio::spawn(async move {
|
|
service.run().await;
|
|
});
|
|
|
|
Ok((AtomicWriterHandle::new(tx), handle))
|
|
} |