Files
watcheragent/WatcherAgent/src/metrics.rs
donpat1to 4681e0c694
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 5s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 4s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m7s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 2m56s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 3m40s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m11s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 2s
Rust Cross-Platform Build / Create Tag (push) Successful in 5s
fixed id type
2025-10-01 19:00:34 +02:00

119 lines
4.9 KiB
Rust

/// # Metrics Module
///
/// This module orchestrates the collection and reporting of hardware and network metrics for WatcherAgent.
///
/// ## Responsibilities
/// - **Metric Collection:** Gathers real-time statistics from all hardware subsystems (CPU, GPU, RAM, disk, network).
/// - **Reporting:** Periodically sends metrics to the backend server using the API module.
/// - **Error Handling:** Robust to hardware failures and network errors, with retry logic and logging.
///
/// ## Usage
/// The [`Collector`] struct is instantiated in the main loop and runs as a background task, continuously collecting and reporting metrics.
use std::error::Error;
use std::time::Duration;
use crate::api;
use crate::hardware::network::NetworkMonitor;
use crate::hardware::HardwareInfo;
use crate::models::MetricDto;
/// Main orchestrator for hardware and network metric collection and reporting.
///
/// The `Collector` struct manages the state required to collect metrics and send them to the backend server. It maintains a network monitor for bandwidth tracking, the agent's server ID, and its IP address.
///
/// # Fields
/// - `network_monitor`: Tracks network usage rates (rx/tx).
/// - `server_id`: Unique server ID assigned by the backend.
/// - `ip_address`: IP address of the agent.
pub struct Collector {
network_monitor: NetworkMonitor,
server_id: i32,
ip_address: String,
}
impl Collector {
/// Creates a new `Collector` instance for metric collection and reporting.
///
/// # Arguments
/// * `server_id` - The server ID assigned by the backend.
/// * `ip_address` - The IP address of the agent.
///
/// # Returns
/// A new `Collector` ready to collect and report metrics.
pub fn new(server_id: i32, ip_address: String) -> Self {
Self {
network_monitor: NetworkMonitor::new(),
server_id,
ip_address,
}
}
/// Runs the main metrics collection loop, periodically sending metrics to the backend server.
///
/// This function continuously collects hardware and network metrics, sends them to the backend, and handles errors gracefully. It uses a configurable interval and retries on failures.
///
/// # Arguments
/// * `base_url` - The base URL of the backend server.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if metrics are sent successfully.
pub async fn run(&mut self, base_url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
loop {
println!(
"[{}] Starting metrics collection...",
chrono::Local::now().format("%H:%M:%S")
);
let metrics = match self.collect().await {
Ok(metrics) => metrics,
Err(e) => {
eprintln!("Error collecting metrics: {}", e);
tokio::time::sleep(Duration::from_secs(10)).await;
continue;
}
};
api::send_metrics(base_url, &metrics).await?;
tokio::time::sleep(Duration::from_secs(20)).await;
}
}
/// Collects hardware and network metrics from all subsystems.
///
/// This function queries the hardware module for CPU, GPU, RAM, disk, and network statistics, and packages them into a [`MetricDto`] for reporting.
///
/// # Returns
/// * `Result<MetricDto, Box<dyn Error + Send + Sync>>` - The collected metrics or an error if hardware info is unavailable.
pub async fn collect(&mut self) -> Result<MetricDto, Box<dyn Error + Send + Sync>> {
let hardware = match HardwareInfo::collect().await {
Ok(hw) => hw,
Err(e) => {
eprintln!("Error collecting hardware-infos: {e}");
return Err(e);
}
};
// Collect network usage
let (_, _) = self.network_monitor.update_usage().unwrap_or((0.0, 0.0));
Ok(MetricDto {
server_id: self.server_id,
ip_address: self.ip_address.clone(),
cpu_load: hardware.cpu.current_load.unwrap_or_default(),
cpu_temp: hardware.cpu.current_temp.unwrap_or_default(),
gpu_load: hardware.gpu.current_load.unwrap_or_default(),
gpu_temp: hardware.gpu.current_temp.unwrap_or_default(),
gpu_vram_size: hardware.gpu.vram_total.unwrap_or_default(),
gpu_vram_load: hardware.gpu.current_load.unwrap_or_default(),
ram_load: hardware.memory.current_load.unwrap_or_default(),
ram_size: hardware.memory.total_size.unwrap_or_default(),
disk_size: hardware.disk.total_size.unwrap_or_default(),
disk_usage: hardware.disk.total_usage.unwrap_or_default(),
disk_temp: 0.0, // not supported
net_rx: hardware.network.rx_rate.unwrap_or_default(),
net_tx: hardware.network.tx_rate.unwrap_or_default(),
})
}
}