watcheragent/WatcherAgent/src/metrics.rs



/// # Metrics Module
///
/// This module orchestrates the collection and reporting of hardware and network metrics for WatcherAgent.
///
/// ## Responsibilities
/// - **Metric Collection:** Gathers real-time statistics from all hardware subsystems (CPU, GPU, RAM, disk, network).
/// - **Reporting:** Periodically sends metrics to the backend server using the API module.
/// - **Error Handling:** Robust to hardware failures and network errors, with retry logic and logging.
///
/// ## Usage
/// The [`Collector`] struct is instantiated in the main loop and runs as a background task, continuously collecting and reporting metrics.
use std::error::Error;
use std::time::Duration;

use crate::api;
use crate::hardware::network::NetworkMonitor;
use crate::hardware::HardwareInfo;
use crate::models::MetricDto;


/// Main orchestrator for hardware and network metric collection and reporting.
///
/// The `Collector` struct manages the state required to collect metrics and send them to the backend server. It maintains a network monitor for bandwidth tracking, the agent's server ID, and its IP address.
///
/// # Fields
/// - `network_monitor`: Tracks network usage rates (rx/tx).
/// - `server_id`: Unique server ID assigned by the backend.
/// - `ip_address`: IP address of the agent.
pub struct Collector {
    network_monitor: NetworkMonitor,
    server_id: i32,
    ip_address: String,
}


impl Collector {
    /// Creates a new `Collector` instance for metric collection and reporting.
    ///
    /// # Arguments
    /// * `server_id` - The server ID assigned by the backend.
    /// * `ip_address` - The IP address of the agent.
    ///
    /// # Returns
    /// A new `Collector` ready to collect and report metrics.
    pub fn new(server_id: i32, ip_address: String) -> Self {
        Self {
            network_monitor: NetworkMonitor::new(),
            server_id,
            ip_address,
        }
    }

    /// Runs the main metrics collection loop, periodically sending metrics to the backend server.
    ///
    /// This function continuously collects hardware and network metrics, sends them to the backend, and handles errors gracefully. It uses a configurable interval and retries on failures.
    ///
    /// # Arguments
    /// * `base_url` - The base URL of the backend server.
    ///
    /// # Returns
    /// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if metrics are sent successfully.
    pub async fn run(&mut self, base_url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
        loop {
            println!(
                "[{}] Starting metrics collection...",
                chrono::Local::now().format("%H:%M:%S")
            );
            let metrics = match self.collect().await {
                Ok(metrics) => metrics,
                Err(e) => {
                    eprintln!("Error collecting metrics: {}", e);
                    tokio::time::sleep(Duration::from_secs(10)).await;
                    continue;
                }
            };
            api::send_metrics(base_url, &metrics).await?;
            tokio::time::sleep(Duration::from_secs(20)).await;
        }
    }

    /// Collects hardware and network metrics from all subsystems.
    ///
    /// This function queries the hardware module for CPU, GPU, RAM, disk, and network statistics, and packages them into a [`MetricDto`] for reporting.
    ///
    /// # Returns
    /// * `Result<MetricDto, Box<dyn Error + Send + Sync>>` - The collected metrics or an error if hardware info is unavailable.
    pub async fn collect(&mut self) -> Result<MetricDto, Box<dyn Error + Send + Sync>> {
        let hardware = match HardwareInfo::collect().await {
            Ok(hw) => hw,
            Err(e) => {
                eprintln!("Error collecting hardware-infos: {e}");
                return Err(e);
            }
        };
        // Collect network usage
        let (_, _) = self.network_monitor.update_usage().unwrap_or((0.0, 0.0));

        Ok(MetricDto {
            server_id: self.server_id,
            ip_address: self.ip_address.clone(),
            cpu_load: hardware.cpu.current_load.unwrap_or_default(),
            cpu_temp: hardware.cpu.current_temp.unwrap_or_default(),
            gpu_load: hardware.gpu.current_load.unwrap_or_default(),
            gpu_temp: hardware.gpu.current_temp.unwrap_or_default(),
            gpu_vram_size: hardware.gpu.vram_total.unwrap_or_default(),
            gpu_vram_load: hardware.gpu.current_load.unwrap_or_default(),
            ram_load: hardware.memory.current_load.unwrap_or_default(),
            ram_size: hardware.memory.total_size.unwrap_or_default(),
            disk_size: hardware.disk.total_size.unwrap_or_default(),
            disk_usage: hardware.disk.total_usage.unwrap_or_default(),
            disk_temp: 0.0, // not supported
            net_rx: hardware.network.rx_rate.unwrap_or_default(),
            net_tx: hardware.network.tx_rate.unwrap_or_default(),
        })
    }
}