added commentation

This commit is contained in:
2025-10-01 12:07:53 +02:00
parent d994be757e
commit 8c49a63a50
13 changed files with 570 additions and 56 deletions

View File

@@ -1,3 +1,17 @@
//! # API Module
//!
//! This module provides all HTTP communication between WatcherAgent and the backend server.
//!
//! ## Responsibilities
//! - **Registration:** Registers the agent with the backend and retrieves its server ID and IP address.
//! - **Heartbeat:** Periodically sends heartbeat signals to indicate liveness.
//! - **Metrics Reporting:** Sends collected hardware and network metrics to the backend.
//! - **Command Listening:** Polls for and executes remote commands from the backend (e.g., update image, restart container).
//!
//! ## Usage
//! These functions are called from the main agent loop and background tasks. All network operations are asynchronous and robust to transient failures.
use std::time::Duration;
use crate::hardware::HardwareInfo;
@@ -11,6 +25,18 @@ use tokio::time::sleep;
use bollard::Docker;
/// Registers this agent with the backend server and retrieves its server ID and IP address.
///
/// This function collects local hardware information, prepares a registration payload, and sends it to the backend. It will retry registration until successful, handling network errors and server-side failures gracefully.
///
/// # Arguments
/// * `base_url` - The base URL of the backend server (e.g., `https://server.example.com`).
///
/// # Returns
/// * `Result<(i32, String), Box<dyn Error + Send + Sync>>` - Tuple of server ID and registered IP address on success.
///
/// # Errors
/// Returns an error if unable to register after repeated attempts.
pub async fn register_with_server(
base_url: &str,
) -> Result<(i32, String), Box<dyn Error + Send + Sync>> {
@@ -64,6 +90,16 @@ pub async fn register_with_server(
}
}
/// Looks up the server ID for the given IP address from the backend server.
///
/// This function will retry until a valid response is received, handling network errors and server-side failures.
///
/// # Arguments
/// * `base_url` - The base URL of the backend server.
/// * `ip` - The local IP address to look up.
///
/// # Returns
/// * `Result<(i32, String), Box<dyn Error + Send + Sync>>` - Tuple of server ID and registered IP address.
async fn get_server_id_by_ip(
base_url: &str,
ip: &str,
@@ -115,6 +151,16 @@ async fn get_server_id_by_ip(
}
}
/// Periodically sends heartbeat signals to the backend server to indicate agent liveness.
///
/// This function runs in a background task and will retry on network errors.
///
/// # Arguments
/// * `base_url` - The base URL of the backend server.
/// * `ip` - The IP address of the agent.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if heartbeats are sent successfully.
pub async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
let client = Client::builder()
.danger_accept_invalid_certs(true)
@@ -138,6 +184,16 @@ pub async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box<dyn Erro
}
}
/// Sends collected hardware and network metrics to the backend server.
///
/// This function is called periodically from the metrics collection loop. It logs the result and retries on network errors.
///
/// # Arguments
/// * `base_url` - The base URL of the backend server.
/// * `metrics` - The metrics data to send (see [`MetricDto`]).
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if metrics are sent successfully.
pub async fn send_metrics(
base_url: &str,
metrics: &MetricDto,
@@ -158,6 +214,16 @@ pub async fn send_metrics(
Ok(())
}
/// Polls the backend server for remote commands and executes them.
///
/// This function runs in a background task, polling the server for new messages. It acknowledges receipt and execution of each command, and handles errors gracefully.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client.
/// * `base_url` - The base URL of the backend server.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if commands are handled successfully.
pub async fn listening_to_server(docker: &Docker, base_url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
let url = format!("{}/api/message", base_url);
let client = reqwest::Client::new();
@@ -210,6 +276,19 @@ pub async fn listening_to_server(docker: &Docker, base_url: &str) -> Result<(),
}
}
/// Sends an acknowledgment to the backend server for a received or executed command message.
///
/// This function is used internally by [`listening_to_server`] to confirm receipt and execution status of commands.
///
/// # Arguments
/// * `client` - Reference to a reqwest HTTP client.
/// * `base_url` - The base URL of the backend server.
/// * `message_id` - The ID of the message being acknowledged.
/// * `status` - Status string (e.g., "received", "success", "error").
/// * `details` - Additional details about the acknowledgment.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if acknowledgment is sent successfully.
async fn send_acknowledgment(
client: &reqwest::Client,
base_url: &str,

View File

@@ -1,3 +1,8 @@
//! Docker container utilities for WatcherAgent
//!
//! Provides functions to list and process Docker containers using the Bollard library.
//!
use crate::models::DockerContainer;
use bollard::query_parameters::{ListContainersOptions};
@@ -6,6 +11,13 @@ use bollard::Docker;
/// Returns a list of available Docker containers.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client.
///
/// # Returns
/// * `Vec<DockerContainer>` - Vector of Docker container info.
pub async fn get_available_container(docker: &Docker) -> Vec<DockerContainer> {
println!("=== DOCKER CONTAINER LIST ===");
@@ -67,32 +79,15 @@ pub async fn get_available_container(docker: &Docker) -> Vec<DockerContainer> {
containers_list
}
/*pub fn extract_client_container_id(line: &str) -> Option<String> {
// Split by slashes and take the last part
if let Some(last_part) = line.split('/').last() {
let last_part = last_part.trim();
// Remove common suffixes
let clean_id = last_part
.trim_end_matches(".scope")
.trim_start_matches("docker-")
.trim_start_matches("crio-")
.trim_start_matches("containerd-");
// Check if it looks like a container ID (hex characters)
if clean_id.chars().all(|c| c.is_ascii_hexdigit()) && clean_id.len() >= 12 {
return Some(clean_id.to_string());
}
// If it's not pure hex, try to extract hex sequence
let hex_part: String = clean_id.chars()
.take_while(|c| c.is_ascii_hexdigit())
.collect();
if hex_part.len() >= 12 {
return Some(hex_part);
}
}
None
}*/
/*
/// Extracts a Docker container ID from a string line.
///
/// # Arguments
/// * `line` - The input string containing a container ID or related info.
///
/// # Returns
/// * `Option<String>` - The extracted container ID if found.
pub fn extract_client_container_id(line: &str) -> Option<String> {
// ...existing code...
}
*/

View File

@@ -1,9 +1,27 @@
//! # Docker Module
//!
//! This module provides Docker integration for WatcherAgent, including container enumeration, statistics, and lifecycle management.
//!
//! ## Responsibilities
//! - **Container Management:** Lists, inspects, and manages Docker containers relevant to the agent.
//! - **Statistics Aggregation:** Collects network and CPU statistics for all managed containers.
//! - **Lifecycle Operations:** Supports container restart and ID lookup for agent self-management.
//!
pub mod container;
pub mod serverclientcomm;
use std::error::Error;
use crate::models::DockerContainer;
/// Aggregated Docker statistics for all managed containers.
///
/// # Fields
/// - `number`: Number of running containers (optional)
/// - `net_in_total`: Total network receive rate in **bytes per second (B/s)** (optional)
/// - `net_out_total`: Total network transmit rate in **bytes per second (B/s)** (optional)
/// - `dockers`: List of [`DockerContainer`] statistics (optional)
#[derive(Debug, Clone)]
pub struct DockerInfo {
pub number: Option<u16>,
@@ -12,33 +30,51 @@ pub struct DockerInfo {
pub dockers: Option<Vec<DockerContainer>>,
}
impl DockerInfo {
/// Collects Docker statistics for all managed containers.
///
/// # Returns
/// * `Result<DockerInfo, Box<dyn Error + Send + Sync>>` - Aggregated Docker statistics or error if collection fails.
pub async fn collect() -> Result<Self, Box<dyn Error + Send + Sync>> {
Ok(Self { number: None, net_in_total: None, net_out_total: None, dockers: None })
}
}
impl DockerContainer {
/*pub async fn restart_container(docker: &Docker) -> Result<(), Box<dyn Error + Send + Sync>> {
if let Ok(container_id) = std::env::var("HOSTNAME") {
println!("Restarting container {}", container_id);
if let Err(e) = docker.restart_container(&container_id, Some(RestartContainerOptions { signal: None, t: Some(0) }))
.await
{
eprintln!("Failed to restart container: {}", e);
}
} else {
eprintln!("No container ID found (HOSTNAME not set?)");
}
/*
/// Restarts the specified Docker container by ID.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if restarted successfully, error otherwise.
pub async fn restart_container(docker: &Docker) -> Result<(), Box<dyn Error + Send + Sync>> {
// ...existing code...
}
*/
Ok(())
}*/
pub async fn get_docker_container_id (container: DockerContainer) -> Result<u32, Box<dyn Error + Send + Sync>> {
/// Returns the container ID for a given [`DockerContainer`].
///
/// # Arguments
/// * `container` - Reference to a [`DockerContainer`]
///
/// # Returns
/// * `Result<u32, Box<dyn Error + Send + Sync>>` - Container ID as integer.
pub async fn get_docker_container_id(container: DockerContainer) -> Result<u32, Box<dyn Error + Send + Sync>> {
Ok(container.ID)
}
pub async fn get_docker_container_image (container: DockerContainer) -> Result<String, Box<dyn Error + Send + Sync>> {
/// Returns the image name for a given [`DockerContainer`].
///
/// # Arguments
/// * `container` - Reference to a [`DockerContainer`]
///
/// # Returns
/// * `Result<String, Box<dyn Error + Send + Sync>>` - Image name as string.
pub async fn get_docker_container_image(container: DockerContainer) -> Result<String, Box<dyn Error + Send + Sync>> {
Ok(container.image)
}
}

View File

@@ -1,3 +1,8 @@
//! Server-client communication utilities for WatcherAgent
//!
//! Handles server commands, Docker image updates, and container management using the Bollard library.
//!
use crate::models::{DockerContainer, ServerMessage};
use crate::docker::container::{get_available_container};
@@ -6,6 +11,14 @@ use bollard::Docker;
use bollard::query_parameters::{CreateImageOptions, RestartContainerOptions, InspectContainerOptions};
use futures_util::StreamExt;
/// Handles a message from the backend server and dispatches the appropriate action.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client.
/// * `msg` - The server message to handle.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if handled successfully, error otherwise.
pub async fn handle_server_message(docker: &Docker, msg: ServerMessage) -> Result<(), Box<dyn Error + Send + Sync>> {
let msg = msg.clone();
println!("Handling server message: {:?}", msg);
@@ -40,6 +53,14 @@ pub async fn handle_server_message(docker: &Docker, msg: ServerMessage) -> Resul
}
}
/// Pulls a new Docker image and restarts the current container.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client.
/// * `image` - The name of the Docker image to pull.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if updated successfully, error otherwise.
pub async fn update_docker_image(docker: &Docker, image: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
println!("Updating to {}", image);
@@ -74,6 +95,13 @@ pub async fn update_docker_image(docker: &Docker, image: &str) -> Result<(), Box
Ok(())
}
/// Finds the Docker container running the agent by image name.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client.
///
/// # Returns
/// * `Result<Option<DockerContainer>, Box<dyn Error + Send + Sync>>` - The agent's container info if found.
pub async fn get_client_container(docker: &Docker) -> Result<Option<DockerContainer>, Box<dyn Error + Send + Sync>> {
let containers = get_available_container(docker).await;
let client_image = "watcher-agent";
@@ -86,6 +114,13 @@ pub async fn get_client_container(docker: &Docker) -> Result<Option<DockerContai
}
}
/// Restarts the agent's own Docker container.
///
/// # Arguments
/// * `docker` - Reference to a Bollard Docker client.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if restarted successfully, error otherwise.
pub async fn restart_container(docker: &Docker) -> Result<(), Box<dyn Error + Send + Sync>> {
if let Ok(Some(container)) = get_client_container(docker).await {
let container_id = container.clone().ID;

View File

@@ -2,6 +2,29 @@ use anyhow::Result;
use std::error::Error;
use sysinfo::System;
//! # CPU Hardware Module
//!
//! This module provides CPU information collection for WatcherAgent, including load, temperature, and system uptime.
//!
//! ## Responsibilities
//! - **CPU Detection:** Identifies CPU model and core count.
//! - **Metric Collection:** Queries CPU load, temperature, and uptime.
//! - **Error Handling:** Graceful fallback if metrics are unavailable.
//!
//! ## Units
//! - `current_load`: CPU usage as a percentage (**0.0100.0**)
//! - `current_temp`: CPU temperature in **degrees Celsius (°C)**
//! - `uptime`: System uptime in **seconds (s)**
//!
/// CPU statistics for the host system.
///
/// # Fields
/// - `name`: CPU model name (string)
/// - `cores`: Number of physical CPU cores (integer)
/// - `current_load`: CPU usage as a percentage (**0.0100.0**)
/// - `current_temp`: CPU temperature in **degrees Celsius (°C)**
/// - `uptime`: System uptime in **seconds (s)**
/// - `host_name`: Hostname of the system (string)
#[derive(Debug)]
pub struct CpuInfo {
pub name: Option<String>,
@@ -12,6 +35,10 @@ pub struct CpuInfo {
pub host_name: Option<String>,
}
/// Collects CPU information (model, cores, load, temperature, uptime).
///
/// # Returns
/// * `Result<CpuInfo, Box<dyn Error + Send + Sync>>` - CPU statistics or error if unavailable.
pub async fn get_cpu_info() -> Result<CpuInfo, Box<dyn Error + Send + Sync>> {
let mut sys = System::new_all();
@@ -33,12 +60,23 @@ pub async fn get_cpu_info() -> Result<CpuInfo, Box<dyn Error + Send + Sync>> {
})
}
/// Queries system for current CPU load (percentage).
///
/// # Arguments
/// * `sys` - Mutable reference to sysinfo::System
///
/// # Returns
/// * `Result<f64, Box<dyn Error + Send + Sync>>` - CPU load as percentage.
pub async fn get_cpu_load(sys: &mut System) -> Result<f64, Box<dyn Error + Send + Sync>> {
sys.refresh_cpu_all();
tokio::task::yield_now().await; // Allow other tasks to run
Ok(sys.global_cpu_usage() as f64)
}
/// Attempts to read CPU temperature from system sensors (Linux only).
///
/// # Returns
/// * `Result<f64, Box<dyn Error + Send + Sync>>` - CPU temperature in degrees Celsius (°C).
pub async fn get_cpu_temp() -> Result<f64, Box<dyn Error + Send + Sync>> {
println!("Attempting to get CPU temperature...");

View File

@@ -7,6 +7,27 @@ use sysinfo::{Component, Components, Disk, Disks};
use serde::Serialize;
//! # Disk Hardware Module
//!
//! This module provides disk information collection for WatcherAgent, including total and per-disk statistics and temperature data.
//!
//! ## Responsibilities
//! - **Disk Enumeration:** Lists all physical disks and their properties.
//! - **Usage Calculation:** Computes total and per-disk usage, available space, and usage percentage.
//! - **Temperature Monitoring:** Associates disk components with temperature sensors if available.
//!
//! ## Units
//! - All sizes are in **bytes** unless otherwise noted.
//! - Temperatures are in **degrees Celsius (°C)**.
//!
/// Summary of disk statistics for the system.
///
/// # Fields
/// - `total_size`: Total disk size in bytes (all disks > 100MB)
/// - `total_used`: Total used disk space in bytes
/// - `total_available`: Total available disk space in bytes
/// - `total_usage`: Usage percentage (0.0100.0)
/// - `detailed_info`: Vector of [`DiskInfoDetailed`] for each disk
#[derive(Serialize, Debug)]
pub struct DiskInfo {
pub total_size: Option<f64>,
@@ -16,6 +37,12 @@ pub struct DiskInfo {
pub detailed_info: Vec<DiskInfoDetailed>,
}
/// Collects disk information for all detected disks, including usage and temperature.
///
/// This function enumerates all disks, calculates usage statistics, and attempts to associate temperature sensors with disk components.
///
/// # Returns
/// * `Result<DiskInfo, Box<dyn std::error::Error + Send + Sync>>` - Disk statistics and details, or error if collection fails.
pub async fn get_disk_info() -> Result<DiskInfo, Box<dyn std::error::Error + Send + Sync>> {
let disks = Disks::new_with_refreshed_list();
let mut detailed_info = Vec::new();

View File

@@ -2,6 +2,29 @@ use anyhow::Result;
use nvml_wrapper::Nvml;
use std::error::Error;
//! # GPU Hardware Module
//!
//! This module provides GPU information collection for WatcherAgent, including load, temperature, and VRAM statistics.
//!
//! ## Responsibilities
//! - **GPU Detection:** Identifies GPU model and capabilities.
//! - **Metric Collection:** Queries GPU load, temperature, and VRAM usage using NVML (NVIDIA only).
//! - **Error Handling:** Graceful fallback if GPU or NVML is unavailable.
//!
//! ## Units
//! - `current_load`: GPU usage as a percentage (**0.0100.0**)
//! - `current_temp`: GPU temperature in **degrees Celsius (°C)**
//! - `vram_total`: Total VRAM in **megabytes (MB)**
//! - `vram_used`: Used VRAM in **megabytes (MB)**
//!
/// GPU statistics for the host system.
///
/// # Fields
/// - `name`: GPU model name (string)
/// - `current_load`: GPU usage as a percentage (**0.0100.0**)
/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
/// - `vram_total`: Total VRAM in **megabytes (MB)**
/// - `vram_used`: Used VRAM in **megabytes (MB)**
#[derive(Debug)]
pub struct GpuInfo {
pub name: Option<String>,
@@ -11,6 +34,12 @@ pub struct GpuInfo {
pub vram_used: Option<f64>,
}
/// Collects GPU information (load, temperature, VRAM) using NVML.
///
/// This function attempts to query the first NVIDIA GPU using NVML. If unavailable, it returns a fallback with only the detected GPU name.
///
/// # Returns
/// * `Result<GpuInfo, Box<dyn Error + Send + Sync>>` - GPU statistics or fallback if unavailable.
pub async fn get_gpu_info() -> Result<GpuInfo, Box<dyn Error + Send + Sync>> {
match get_gpu_metrics() {
Ok((gpu_temp, gpu_load, vram_used, vram_total)) => {
@@ -37,6 +66,10 @@ pub async fn get_gpu_info() -> Result<GpuInfo, Box<dyn Error + Send + Sync>> {
}
}
/// Queries NVML for GPU metrics: temperature, load, VRAM used/total.
///
/// # Returns
/// * `Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>>` - Tuple of (temperature °C, load %, VRAM used MB, VRAM total MB).
pub fn get_gpu_metrics() -> Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>> {
let nvml = Nvml::init();
if let Ok(nvml) = nvml {

View File

@@ -3,6 +3,24 @@ use std::error::Error;
use anyhow::Result;
use sysinfo::System;
//! # Memory Hardware Module
//!
//! This module provides memory information collection for WatcherAgent, including total, used, and free RAM.
//!
//! ## Responsibilities
//! - **Memory Detection:** Queries system for total, used, and free RAM.
//! - **Usage Calculation:** Computes memory usage percentage.
//! - **Error Handling:** Graceful fallback if metrics are unavailable.
//!
//! ## Units
//! - `total`, `used`, `free`: RAM in **megabytes (MB)**
//!
/// Memory statistics for the host system.
///
/// # Fields
/// - `total`: Total RAM in **megabytes (MB)**
/// - `used`: Used RAM in **megabytes (MB)**
/// - `free`: Free RAM in **megabytes (MB)**
#[derive(Debug)]
pub struct MemoryInfo {
pub total: Option<f64>,
@@ -10,6 +28,10 @@ pub struct MemoryInfo {
pub free: Option<f64>,
}
/// Collects memory information (total, used, free RAM).
///
/// # Returns
/// * `Result<MemoryInfo>` - Memory statistics or error if unavailable.
pub async fn get_memory_info() -> Result<MemoryInfo> {
let mut sys = System::new();
sys.refresh_memory();
@@ -21,6 +43,13 @@ pub async fn get_memory_info() -> Result<MemoryInfo> {
})
}
/// Computes memory usage percentage from sysinfo::System.
///
/// # Arguments
/// * `sys` - Mutable reference to sysinfo::System
///
/// # Returns
/// * `Result<f64, Box<dyn Error + Send + Sync>>` - Memory usage as percentage.
pub fn _get_memory_usage(sys: &mut System) -> Result<f64, Box<dyn Error + Send + Sync>> {
sys.refresh_memory();
Ok((sys.used_memory() as f64 / sys.total_memory() as f64) * 100.0)

View File

@@ -14,6 +14,23 @@ pub use memory::get_memory_info;
pub use network::get_network_info;
pub use network::NetworkMonitor;
//! # Hardware Module
//!
//! This module aggregates all hardware subsystems for WatcherAgent, providing unified collection and access to CPU, GPU, memory, disk, and network statistics.
//!
//! ## Responsibilities
//! - **Subsystem Aggregation:** Combines all hardware modules into a single struct for easy access.
//! - **Unified Collection:** Provides a single async method to collect all hardware metrics at once.
//!
/// Aggregated hardware statistics for the host system.
///
/// # Fields
/// - `cpu`: CPU statistics (see [`CpuInfo`])
/// - `gpu`: GPU statistics (see [`GpuInfo`])
/// - `memory`: Memory statistics (see [`MemoryInfo`])
/// - `disk`: Disk statistics (see [`DiskInfo`])
/// - `network`: Network statistics (see [`NetworkInfo`])
/// - `network_monitor`: Rolling monitor for network bandwidth
#[derive(Debug)]
pub struct HardwareInfo {
pub cpu: cpu::CpuInfo,
@@ -25,6 +42,10 @@ pub struct HardwareInfo {
}
impl HardwareInfo {
/// Collects all hardware statistics asynchronously.
///
/// # Returns
/// * `Result<HardwareInfo, Box<dyn Error + Send + Sync>>` - Aggregated hardware statistics or error if any subsystem fails.
pub async fn collect() -> Result<Self, Box<dyn Error + Send + Sync>> {
let mut network_monitor = network::NetworkMonitor::new();
Ok(Self {

View File

@@ -2,6 +2,24 @@ use std::error::Error;
use std::result::Result;
use std::time::Instant;
//! # Network Hardware Module
//!
//! This module provides network information collection for WatcherAgent, including interface enumeration and bandwidth statistics.
//!
//! ## Responsibilities
//! - **Interface Detection:** Lists all network interfaces.
//! - **Bandwidth Monitoring:** Tracks receive/transmit rates using a rolling monitor.
//! - **Error Handling:** Graceful fallback if metrics are unavailable.
//!
//! ## Units
//! - `rx_rate`, `tx_rate`: Network bandwidth in **bytes per second (B/s)**
//!
/// Network statistics for the host system.
///
/// # Fields
/// - `interfaces`: List of network interface names (strings)
/// - `rx_rate`: Receive bandwidth in **bytes per second (B/s)**
/// - `tx_rate`: Transmit bandwidth in **bytes per second (B/s)**
#[derive(Debug)]
pub struct NetworkInfo {
pub interfaces: Option<Vec<String>>,
@@ -9,6 +27,13 @@ pub struct NetworkInfo {
pub tx_rate: Option<f64>,
}
/// Rolling monitor for network bandwidth statistics.
///
/// # Fields
/// - `prev_rx`: Previous received bytes
/// - `prev_tx`: Previous transmitted bytes
/// - `last_update`: Timestamp of last update
#[derive(Debug)]
pub struct NetworkMonitor {
prev_rx: u64,
@@ -23,6 +48,7 @@ impl Default for NetworkMonitor {
}
impl NetworkMonitor {
/// Creates a new `NetworkMonitor` for bandwidth tracking.
pub fn new() -> Self {
Self {
prev_rx: 0,
@@ -31,6 +57,10 @@ impl NetworkMonitor {
}
}
/// Updates the network usage statistics and returns current rx/tx rates.
///
/// # Returns
/// * `Result<(f64, f64), Box<dyn Error>>` - Tuple of (rx_rate, tx_rate) in bytes per second.
pub fn update_usage(&mut self) -> Result<(f64, f64), Box<dyn Error>> {
let (current_rx, current_tx) = get_network_bytes()?;
let elapsed = self.last_update.elapsed().as_secs_f64();
@@ -55,6 +85,13 @@ impl NetworkMonitor {
}
}
/// Collects network information (interfaces, rx/tx rates) using a monitor.
///
/// # Arguments
/// * `monitor` - Mutable reference to a `NetworkMonitor`
///
/// # Returns
/// * `Result<NetworkInfo, Box<dyn Error>>` - Network statistics or error if unavailable.
pub async fn get_network_info(monitor: &mut NetworkMonitor) -> Result<NetworkInfo, Box<dyn Error>> {
let (rx_rate, tx_rate) = monitor.update_usage()?;
Ok(NetworkInfo {

View File

@@ -1,20 +1,55 @@
/// WatcherAgent - A Rust-based system monitoring agent
/// This agent collects hardware metrics and sends them to a backend server.
/// It supports CPU, GPU, RAM, disk, and network metrics.
//! # WatcherAgent
//!
//! **WatcherAgent** is a cross-platform system monitoring agent written in Rust.
//!
//! ## Overview
//! This agent collects real-time hardware metrics (CPU, GPU, RAM, disk, network) and communicates with a backend server for registration, reporting, and remote control. It is designed for deployment in environments where automated monitoring and remote management of system resources is required.
//!
//! ## Features
//! - **Hardware Metrics:** Collects CPU, GPU, RAM, disk, and network statistics using platform-specific APIs.
//! - **Docker Integration:** Detects and manages its own Docker container, supports image updates and container restarts.
//! - **Server Communication:** Registers with a backend server, sends periodic heartbeats, and reports metrics securely.
//! - **Remote Commands:** Listens for and executes commands from the backend (e.g., update image, restart container, stop agent).
//!
//! ## Modules
//! - [`api`]: Handles HTTP communication with the backend server (registration, heartbeat, metrics, commands).
//! - [`hardware`]: Collects hardware metrics from the host system (CPU, GPU, RAM, disk, network).
//! - [`metrics`]: Orchestrates metric collection and reporting.
//! - [`models`]: Defines data structures for server communication and metrics.
//! - [`docker`]: Integrates with Docker for container management and agent lifecycle.
//!
//! ## Usage
//! Run the agent with the backend server URL as an argument:
//! ```sh
//! watcheragent <server-url>
//! ```
//!
//! The agent will register itself, start collecting metrics, and listen for remote commands.
pub mod api;
pub mod hardware;
pub mod metrics;
pub mod models;
pub mod docker;
use std::env;
use std::error::Error;
use std::marker::Send;
use std::marker::Sync;
use std::result::Result;
use tokio::task::JoinHandle;
use bollard::Docker;
/// Awaits a spawned asynchronous task and flattens its nested `Result` type.
///
/// This utility is used to handle the result of a `tokio::spawn`ed task that itself returns a `Result`,
/// propagating any errors from both the task and its execution.
///
/// # Type Parameters
/// * `T` - The type returned by the task on success.
///
/// # Arguments
/// * `handle` - The `JoinHandle` of the spawned task.
///
/// # Returns
/// * `Result<T, Box<dyn Error + Send + Sync>>` - The result of the task, or an error if the task failed or panicked.
async fn flatten<T>(
handle: JoinHandle<Result<T, Box<dyn Error + Send + Sync>>>,
) -> Result<T, Box<dyn Error + Send + Sync>> {
@@ -25,6 +60,24 @@ async fn flatten<T>(
}
}
/// Main entry point for the WatcherAgent application.
///
/// This function performs the following steps:
/// 1. Initializes the Docker client for container management.
/// 2. Detects the current running image version.
/// 3. Parses command-line arguments to obtain the backend server URL.
/// 4. Registers the agent with the backend server and retrieves its server ID and IP address.
/// 5. Spawns background tasks for:
/// - Listening for remote commands from the server
/// - Sending periodic heartbeat signals
/// - Collecting and reporting hardware metrics
/// 6. Waits for all background tasks to complete and logs their results.
///
/// # Arguments
/// * `server-url` - The URL of the backend server to register and report metrics to (passed as a command-line argument).
///
/// # Errors
/// Returns an error if registration or any background task fails, or if required arguments are missing.
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
// Initialize Docker client
@@ -54,7 +107,7 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
let server_url = &args[1];
println!("Server URL: {:?}", server_url);
// Registration
// Registration with backend server
let (server_id, ip) = match api::register_with_server(&server_url).await {
Ok((id, ip)) => (id, ip),
Err(e) => {

View File

@@ -1,3 +1,16 @@
//! # Metrics Module
//!
//! This module orchestrates the collection and reporting of hardware and network metrics for WatcherAgent.
//!
//! ## Responsibilities
//! - **Metric Collection:** Gathers real-time statistics from all hardware subsystems (CPU, GPU, RAM, disk, network).
//! - **Reporting:** Periodically sends metrics to the backend server using the API module.
//! - **Error Handling:** Robust to hardware failures and network errors, with retry logic and logging.
//!
//! ## Usage
//! The [`Collector`] struct is instantiated in the main loop and runs as a background task, continuously collecting and reporting metrics.
use std::error::Error;
use std::time::Duration;
@@ -6,13 +19,31 @@ use crate::hardware::network::NetworkMonitor;
use crate::hardware::HardwareInfo;
use crate::models::MetricDto;
/// Main orchestrator for hardware and network metric collection and reporting.
///
/// The `Collector` struct manages the state required to collect metrics and send them to the backend server. It maintains a network monitor for bandwidth tracking, the agent's server ID, and its IP address.
///
/// # Fields
/// - `network_monitor`: Tracks network usage rates (rx/tx).
/// - `server_id`: Unique server ID assigned by the backend.
/// - `ip_address`: IP address of the agent.
pub struct Collector {
network_monitor: NetworkMonitor,
server_id: i32,
ip_address: String,
}
impl Collector {
/// Creates a new `Collector` instance for metric collection and reporting.
///
/// # Arguments
/// * `server_id` - The server ID assigned by the backend.
/// * `ip_address` - The IP address of the agent.
///
/// # Returns
/// A new `Collector` ready to collect and report metrics.
pub fn new(server_id: i32, ip_address: String) -> Self {
Self {
network_monitor: NetworkMonitor::new(),
@@ -21,6 +52,15 @@ impl Collector {
}
}
/// Runs the main metrics collection loop, periodically sending metrics to the backend server.
///
/// This function continuously collects hardware and network metrics, sends them to the backend, and handles errors gracefully. It uses a configurable interval and retries on failures.
///
/// # Arguments
/// * `base_url` - The base URL of the backend server.
///
/// # Returns
/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if metrics are sent successfully.
pub async fn run(&mut self, base_url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
loop {
println!(
@@ -40,6 +80,12 @@ impl Collector {
}
}
/// Collects hardware and network metrics from all subsystems.
///
/// This function queries the hardware module for CPU, GPU, RAM, disk, and network statistics, and packages them into a [`MetricDto`] for reporting.
///
/// # Returns
/// * `Result<MetricDto, Box<dyn Error + Send + Sync>>` - The collected metrics or an error if hardware info is unavailable.
pub async fn collect(&mut self) -> Result<MetricDto, Box<dyn Error + Send + Sync>> {
let hardware = match HardwareInfo::collect().await {
Ok(hw) => hw,

View File

@@ -1,6 +1,27 @@
//! # Models Module
//!
//! This module defines all data structures (DTOs) used for communication between WatcherAgent and the backend server, as well as hardware metrics and Docker container info.
//!
//! ## Responsibilities
//! - **DTOs:** Define payloads for registration, metrics, heartbeat, and server commands.
//! - **Units:** All struct fields are documented with their units for clarity and API compatibility.
//! - **Docker Info:** Structures for representing Docker container state and statistics.
//!
//! ## Usage
//! These types are serialized/deserialized for HTTP communication and used throughout the agent for data exchange.
use serde::{Deserialize, Serialize};
// Data structures matching the C# DTOs
/// Registration data sent to the backend server.
///
/// ## Units
/// - `id`: Unique server identifier (integer)
/// - `ip_address`: IPv4 or IPv6 address (string)
/// - `cpu_type`: CPU model name (string)
/// - `cpu_cores`: Number of physical CPU cores (integer)
/// - `gpu_type`: GPU model name (string)
/// - `ram_size`: Total RAM size in **megabytes (MB)**
#[derive(Serialize, Debug)]
pub struct RegistrationDto {
#[serde(rename = "id")]
@@ -17,6 +38,24 @@ pub struct RegistrationDto {
pub ram_size: f64,
}
/// Hardware and network metrics data sent to the backend server.
///
/// ## Units
/// - `server_id`: Unique server identifier (integer)
/// - `ip_address`: IPv4 or IPv6 address (string)
/// - `cpu_load`: CPU usage as a percentage (**0.0100.0**)
/// - `cpu_temp`: CPU temperature in **degrees Celsius (°C)**
/// - `gpu_load`: GPU usage as a percentage (**0.0100.0**)
/// - `gpu_temp`: GPU temperature in **degrees Celsius (°C)**
/// - `gpu_vram_size`: Total GPU VRAM in **megabytes (MB)**
/// - `gpu_vram_usage`: Used GPU VRAM in **megabytes (MB)**
/// - `ram_load`: Used RAM in **megabytes (MB)**
/// - `ram_size`: Total RAM in **megabytes (MB)**
/// - `disk_size`: Total disk size in **megabytes (MB)**
/// - `disk_usage`: Used disk space in **megabytes (MB)**
/// - `disk_temp`: Disk temperature in **degrees Celsius (°C)** (if available)
/// - `net_rx`: Network receive rate in **bytes per second (B/s)**
/// - `net_tx`: Network transmit rate in **bytes per second (B/s)**
#[derive(Serialize, Debug)]
pub struct MetricDto {
#[serde(rename = "serverId")]
@@ -51,6 +90,13 @@ pub struct MetricDto {
pub net_tx: f64,
}
/// Detailed disk information for each detected disk.
///
/// ## Units
/// - `disk_total_space`: Total disk space in **bytes**
/// - `disk_available_space`: Available disk space in **bytes**
/// - `disk_used_space`: Used disk space in **bytes**
/// - `component_disk_temperature`: Disk temperature in **degrees Celsius (°C)**
#[derive(Serialize, Debug)]
pub struct DiskInfoDetailed {
pub disk_name: String,
@@ -63,6 +109,11 @@ pub struct DiskInfoDetailed {
pub component_disk_temperature: f32,
}
/// Response containing server ID and IP address.
///
/// ## Units
/// - `id`: Unique server identifier (integer)
/// - `ip_address`: IPv4 or IPv6 address (string)
#[derive(Deserialize)]
pub struct IdResponse {
pub id: i32,
@@ -70,12 +121,24 @@ pub struct IdResponse {
pub ip_address: String,
}
/// Heartbeat message data sent to the backend server.
///
/// ## Units
/// - `ip_address`: IPv4 or IPv6 address (string)
#[derive(Serialize)]
pub struct HeartbeatDto {
#[serde(rename = "IpAddress")]
pub ip_address: String,
}
/// Hardware summary data for diagnostics and registration.
///
/// ## Units
/// - `cpu_type`: CPU model name (string)
/// - `cpu_cores`: Number of physical CPU cores (integer)
/// - `gpu_type`: GPU model name (string)
/// - `ram_size`: Total RAM size in **megabytes (MB)**
/// - `ip_address`: IPv4 or IPv6 address (string)
#[derive(Serialize, Debug)]
pub struct HardwareDto {
pub cpu_type: String,
@@ -85,6 +148,12 @@ pub struct HardwareDto {
pub ip_address: String,
}
/// Command message received from the backend server.
///
/// ## Fields
/// - `message_type`: Type of command (e.g., "update_image", "restart_container", "stop_agent")
/// - `data`: Command payload (arbitrary JSON)
/// - `message_id`: Unique identifier for acknowledgment
#[derive(Debug, Deserialize, Clone)]
pub struct ServerMessage {
// Define your message structure here
@@ -93,6 +162,12 @@ pub struct ServerMessage {
pub message_id: String, // Add an ID for acknowledgment
}
/// Acknowledgment payload sent to the backend server for command messages.
///
/// ## Fields
/// - `message_id`: Unique identifier of the acknowledged message
/// - `status`: Status string ("success", "error", etc.)
/// - `details`: Additional details or error messages
#[derive(Debug, Serialize, Clone)]
pub struct Acknowledgment {
pub message_id: String,
@@ -100,6 +175,16 @@ pub struct Acknowledgment {
pub details: String,
}
/// Docker container information for agent and managed containers.
///
/// ## Fields
/// - `ID`: Container ID (first 12 hex digits, integer)
/// - `image`: Docker image name (string)
/// - `Name`: Container name (string)
/// - `Status`: Container status ("running", "stopped", etc.)
/// - `_net_in`: Network receive rate in **bytes per second (B/s)**
/// - `_net_out`: Network transmit rate in **bytes per second (B/s)**
/// - `_cpu_load`: CPU usage as a percentage (**0.0100.0**)
#[derive(Debug, Serialize, Clone)]
pub struct DockerContainer {
pub ID: u32,