From 8c49a63a500605ad80eb2f6ef58ecca32a5b7e28 Mon Sep 17 00:00:00 2001 From: donpat1to Date: Wed, 1 Oct 2025 12:07:53 +0200 Subject: [PATCH] added commentation --- WatcherAgent/src/api.rs | 79 +++++++++++++++++++ WatcherAgent/src/docker/container.rs | 53 ++++++------- WatcherAgent/src/docker/mod.rs | 68 ++++++++++++---- WatcherAgent/src/docker/serverclientcomm.rs | 35 +++++++++ WatcherAgent/src/hardware/cpu.rs | 38 +++++++++ WatcherAgent/src/hardware/disk.rs | 27 +++++++ WatcherAgent/src/hardware/gpu.rs | 33 ++++++++ WatcherAgent/src/hardware/memory.rs | 29 +++++++ WatcherAgent/src/hardware/mod.rs | 21 +++++ WatcherAgent/src/hardware/network.rs | 37 +++++++++ WatcherAgent/src/main.rs | 73 ++++++++++++++--- WatcherAgent/src/metrics.rs | 46 +++++++++++ WatcherAgent/src/models.rs | 87 ++++++++++++++++++++- 13 files changed, 570 insertions(+), 56 deletions(-) diff --git a/WatcherAgent/src/api.rs b/WatcherAgent/src/api.rs index 6dd99ff..d66f5df 100644 --- a/WatcherAgent/src/api.rs +++ b/WatcherAgent/src/api.rs @@ -1,3 +1,17 @@ + + +//! # API Module +//! +//! This module provides all HTTP communication between WatcherAgent and the backend server. +//! +//! ## Responsibilities +//! - **Registration:** Registers the agent with the backend and retrieves its server ID and IP address. +//! - **Heartbeat:** Periodically sends heartbeat signals to indicate liveness. +//! - **Metrics Reporting:** Sends collected hardware and network metrics to the backend. +//! - **Command Listening:** Polls for and executes remote commands from the backend (e.g., update image, restart container). +//! +//! ## Usage +//! These functions are called from the main agent loop and background tasks. All network operations are asynchronous and robust to transient failures. use std::time::Duration; use crate::hardware::HardwareInfo; @@ -11,6 +25,18 @@ use tokio::time::sleep; use bollard::Docker; +/// Registers this agent with the backend server and retrieves its server ID and IP address. +/// +/// This function collects local hardware information, prepares a registration payload, and sends it to the backend. It will retry registration until successful, handling network errors and server-side failures gracefully. +/// +/// # Arguments +/// * `base_url` - The base URL of the backend server (e.g., `https://server.example.com`). +/// +/// # Returns +/// * `Result<(i32, String), Box>` - Tuple of server ID and registered IP address on success. +/// +/// # Errors +/// Returns an error if unable to register after repeated attempts. pub async fn register_with_server( base_url: &str, ) -> Result<(i32, String), Box> { @@ -64,6 +90,16 @@ pub async fn register_with_server( } } +/// Looks up the server ID for the given IP address from the backend server. +/// +/// This function will retry until a valid response is received, handling network errors and server-side failures. +/// +/// # Arguments +/// * `base_url` - The base URL of the backend server. +/// * `ip` - The local IP address to look up. +/// +/// # Returns +/// * `Result<(i32, String), Box>` - Tuple of server ID and registered IP address. async fn get_server_id_by_ip( base_url: &str, ip: &str, @@ -115,6 +151,16 @@ async fn get_server_id_by_ip( } } +/// Periodically sends heartbeat signals to the backend server to indicate agent liveness. +/// +/// This function runs in a background task and will retry on network errors. +/// +/// # Arguments +/// * `base_url` - The base URL of the backend server. +/// * `ip` - The IP address of the agent. +/// +/// # Returns +/// * `Result<(), Box>` - Ok if heartbeats are sent successfully. pub async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box> { let client = Client::builder() .danger_accept_invalid_certs(true) @@ -138,6 +184,16 @@ pub async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box>` - Ok if metrics are sent successfully. pub async fn send_metrics( base_url: &str, metrics: &MetricDto, @@ -158,6 +214,16 @@ pub async fn send_metrics( Ok(()) } +/// Polls the backend server for remote commands and executes them. +/// +/// This function runs in a background task, polling the server for new messages. It acknowledges receipt and execution of each command, and handles errors gracefully. +/// +/// # Arguments +/// * `docker` - Reference to a Bollard Docker client. +/// * `base_url` - The base URL of the backend server. +/// +/// # Returns +/// * `Result<(), Box>` - Ok if commands are handled successfully. pub async fn listening_to_server(docker: &Docker, base_url: &str) -> Result<(), Box> { let url = format!("{}/api/message", base_url); let client = reqwest::Client::new(); @@ -210,6 +276,19 @@ pub async fn listening_to_server(docker: &Docker, base_url: &str) -> Result<(), } } +/// Sends an acknowledgment to the backend server for a received or executed command message. +/// +/// This function is used internally by [`listening_to_server`] to confirm receipt and execution status of commands. +/// +/// # Arguments +/// * `client` - Reference to a reqwest HTTP client. +/// * `base_url` - The base URL of the backend server. +/// * `message_id` - The ID of the message being acknowledged. +/// * `status` - Status string (e.g., "received", "success", "error"). +/// * `details` - Additional details about the acknowledgment. +/// +/// # Returns +/// * `Result<(), Box>` - Ok if acknowledgment is sent successfully. async fn send_acknowledgment( client: &reqwest::Client, base_url: &str, diff --git a/WatcherAgent/src/docker/container.rs b/WatcherAgent/src/docker/container.rs index b65707d..98b159b 100644 --- a/WatcherAgent/src/docker/container.rs +++ b/WatcherAgent/src/docker/container.rs @@ -1,3 +1,8 @@ + +//! Docker container utilities for WatcherAgent +//! +//! Provides functions to list and process Docker containers using the Bollard library. +//! use crate::models::DockerContainer; use bollard::query_parameters::{ListContainersOptions}; @@ -6,6 +11,13 @@ use bollard::Docker; +/// Returns a list of available Docker containers. +/// +/// # Arguments +/// * `docker` - Reference to a Bollard Docker client. +/// +/// # Returns +/// * `Vec` - Vector of Docker container info. pub async fn get_available_container(docker: &Docker) -> Vec { println!("=== DOCKER CONTAINER LIST ==="); @@ -67,32 +79,15 @@ pub async fn get_available_container(docker: &Docker) -> Vec { containers_list } -/*pub fn extract_client_container_id(line: &str) -> Option { - // Split by slashes and take the last part - if let Some(last_part) = line.split('/').last() { - let last_part = last_part.trim(); - - // Remove common suffixes - let clean_id = last_part - .trim_end_matches(".scope") - .trim_start_matches("docker-") - .trim_start_matches("crio-") - .trim_start_matches("containerd-"); - - // Check if it looks like a container ID (hex characters) - if clean_id.chars().all(|c| c.is_ascii_hexdigit()) && clean_id.len() >= 12 { - return Some(clean_id.to_string()); - } - - // If it's not pure hex, try to extract hex sequence - let hex_part: String = clean_id.chars() - .take_while(|c| c.is_ascii_hexdigit()) - .collect(); - - if hex_part.len() >= 12 { - return Some(hex_part); - } - } - - None -}*/ \ No newline at end of file +/* +/// Extracts a Docker container ID from a string line. +/// +/// # Arguments +/// * `line` - The input string containing a container ID or related info. +/// +/// # Returns +/// * `Option` - The extracted container ID if found. +pub fn extract_client_container_id(line: &str) -> Option { + // ...existing code... +} +*/ \ No newline at end of file diff --git a/WatcherAgent/src/docker/mod.rs b/WatcherAgent/src/docker/mod.rs index e199708..1e701ab 100644 --- a/WatcherAgent/src/docker/mod.rs +++ b/WatcherAgent/src/docker/mod.rs @@ -1,9 +1,27 @@ + +//! # Docker Module +//! +//! This module provides Docker integration for WatcherAgent, including container enumeration, statistics, and lifecycle management. +//! +//! ## Responsibilities +//! - **Container Management:** Lists, inspects, and manages Docker containers relevant to the agent. +//! - **Statistics Aggregation:** Collects network and CPU statistics for all managed containers. +//! - **Lifecycle Operations:** Supports container restart and ID lookup for agent self-management. +//! pub mod container; pub mod serverclientcomm; use std::error::Error; use crate::models::DockerContainer; + +/// Aggregated Docker statistics for all managed containers. +/// +/// # Fields +/// - `number`: Number of running containers (optional) +/// - `net_in_total`: Total network receive rate in **bytes per second (B/s)** (optional) +/// - `net_out_total`: Total network transmit rate in **bytes per second (B/s)** (optional) +/// - `dockers`: List of [`DockerContainer`] statistics (optional) #[derive(Debug, Clone)] pub struct DockerInfo { pub number: Option, @@ -12,33 +30,51 @@ pub struct DockerInfo { pub dockers: Option>, } + impl DockerInfo { + /// Collects Docker statistics for all managed containers. + /// + /// # Returns + /// * `Result>` - Aggregated Docker statistics or error if collection fails. pub async fn collect() -> Result> { Ok(Self { number: None, net_in_total: None, net_out_total: None, dockers: None }) } } + impl DockerContainer { - /*pub async fn restart_container(docker: &Docker) -> Result<(), Box> { - if let Ok(container_id) = std::env::var("HOSTNAME") { - println!("Restarting container {}", container_id); - if let Err(e) = docker.restart_container(&container_id, Some(RestartContainerOptions { signal: None, t: Some(0) })) - .await - { - eprintln!("Failed to restart container: {}", e); - } - } else { - eprintln!("No container ID found (HOSTNAME not set?)"); - } + /* + /// Restarts the specified Docker container by ID. + /// + /// # Arguments + /// * `docker` - Reference to a Bollard Docker client + /// + /// # Returns + /// * `Result<(), Box>` - Ok if restarted successfully, error otherwise. + pub async fn restart_container(docker: &Docker) -> Result<(), Box> { + // ...existing code... + } + */ - Ok(()) - }*/ - - pub async fn get_docker_container_id (container: DockerContainer) -> Result> { + /// Returns the container ID for a given [`DockerContainer`]. + /// + /// # Arguments + /// * `container` - Reference to a [`DockerContainer`] + /// + /// # Returns + /// * `Result>` - Container ID as integer. + pub async fn get_docker_container_id(container: DockerContainer) -> Result> { Ok(container.ID) } - pub async fn get_docker_container_image (container: DockerContainer) -> Result> { + /// Returns the image name for a given [`DockerContainer`]. + /// + /// # Arguments + /// * `container` - Reference to a [`DockerContainer`] + /// + /// # Returns + /// * `Result>` - Image name as string. + pub async fn get_docker_container_image(container: DockerContainer) -> Result> { Ok(container.image) } } \ No newline at end of file diff --git a/WatcherAgent/src/docker/serverclientcomm.rs b/WatcherAgent/src/docker/serverclientcomm.rs index 64ed9a0..07d84c5 100644 --- a/WatcherAgent/src/docker/serverclientcomm.rs +++ b/WatcherAgent/src/docker/serverclientcomm.rs @@ -1,3 +1,8 @@ + +//! Server-client communication utilities for WatcherAgent +//! +//! Handles server commands, Docker image updates, and container management using the Bollard library. +//! use crate::models::{DockerContainer, ServerMessage}; use crate::docker::container::{get_available_container}; @@ -6,6 +11,14 @@ use bollard::Docker; use bollard::query_parameters::{CreateImageOptions, RestartContainerOptions, InspectContainerOptions}; use futures_util::StreamExt; +/// Handles a message from the backend server and dispatches the appropriate action. +/// +/// # Arguments +/// * `docker` - Reference to a Bollard Docker client. +/// * `msg` - The server message to handle. +/// +/// # Returns +/// * `Result<(), Box>` - Ok if handled successfully, error otherwise. pub async fn handle_server_message(docker: &Docker, msg: ServerMessage) -> Result<(), Box> { let msg = msg.clone(); println!("Handling server message: {:?}", msg); @@ -40,6 +53,14 @@ pub async fn handle_server_message(docker: &Docker, msg: ServerMessage) -> Resul } } +/// Pulls a new Docker image and restarts the current container. +/// +/// # Arguments +/// * `docker` - Reference to a Bollard Docker client. +/// * `image` - The name of the Docker image to pull. +/// +/// # Returns +/// * `Result<(), Box>` - Ok if updated successfully, error otherwise. pub async fn update_docker_image(docker: &Docker, image: &str) -> Result<(), Box> { println!("Updating to {}", image); @@ -74,6 +95,13 @@ pub async fn update_docker_image(docker: &Docker, image: &str) -> Result<(), Box Ok(()) } +/// Finds the Docker container running the agent by image name. +/// +/// # Arguments +/// * `docker` - Reference to a Bollard Docker client. +/// +/// # Returns +/// * `Result, Box>` - The agent's container info if found. pub async fn get_client_container(docker: &Docker) -> Result, Box> { let containers = get_available_container(docker).await; let client_image = "watcher-agent"; @@ -86,6 +114,13 @@ pub async fn get_client_container(docker: &Docker) -> Result>` - Ok if restarted successfully, error otherwise. pub async fn restart_container(docker: &Docker) -> Result<(), Box> { if let Ok(Some(container)) = get_client_container(docker).await { let container_id = container.clone().ID; diff --git a/WatcherAgent/src/hardware/cpu.rs b/WatcherAgent/src/hardware/cpu.rs index 2c5cc0a..333bc97 100644 --- a/WatcherAgent/src/hardware/cpu.rs +++ b/WatcherAgent/src/hardware/cpu.rs @@ -2,6 +2,29 @@ use anyhow::Result; use std::error::Error; use sysinfo::System; +//! # CPU Hardware Module +//! +//! This module provides CPU information collection for WatcherAgent, including load, temperature, and system uptime. +//! +//! ## Responsibilities +//! - **CPU Detection:** Identifies CPU model and core count. +//! - **Metric Collection:** Queries CPU load, temperature, and uptime. +//! - **Error Handling:** Graceful fallback if metrics are unavailable. +//! +//! ## Units +//! - `current_load`: CPU usage as a percentage (**0.0–100.0**) +//! - `current_temp`: CPU temperature in **degrees Celsius (°C)** +//! - `uptime`: System uptime in **seconds (s)** +//! +/// CPU statistics for the host system. +/// +/// # Fields +/// - `name`: CPU model name (string) +/// - `cores`: Number of physical CPU cores (integer) +/// - `current_load`: CPU usage as a percentage (**0.0–100.0**) +/// - `current_temp`: CPU temperature in **degrees Celsius (°C)** +/// - `uptime`: System uptime in **seconds (s)** +/// - `host_name`: Hostname of the system (string) #[derive(Debug)] pub struct CpuInfo { pub name: Option, @@ -12,6 +35,10 @@ pub struct CpuInfo { pub host_name: Option, } +/// Collects CPU information (model, cores, load, temperature, uptime). +/// +/// # Returns +/// * `Result>` - CPU statistics or error if unavailable. pub async fn get_cpu_info() -> Result> { let mut sys = System::new_all(); @@ -33,12 +60,23 @@ pub async fn get_cpu_info() -> Result> { }) } +/// Queries system for current CPU load (percentage). +/// +/// # Arguments +/// * `sys` - Mutable reference to sysinfo::System +/// +/// # Returns +/// * `Result>` - CPU load as percentage. pub async fn get_cpu_load(sys: &mut System) -> Result> { sys.refresh_cpu_all(); tokio::task::yield_now().await; // Allow other tasks to run Ok(sys.global_cpu_usage() as f64) } +/// Attempts to read CPU temperature from system sensors (Linux only). +/// +/// # Returns +/// * `Result>` - CPU temperature in degrees Celsius (°C). pub async fn get_cpu_temp() -> Result> { println!("Attempting to get CPU temperature..."); diff --git a/WatcherAgent/src/hardware/disk.rs b/WatcherAgent/src/hardware/disk.rs index 2ea0206..afc3727 100644 --- a/WatcherAgent/src/hardware/disk.rs +++ b/WatcherAgent/src/hardware/disk.rs @@ -7,6 +7,27 @@ use sysinfo::{Component, Components, Disk, Disks}; use serde::Serialize; +//! # Disk Hardware Module +//! +//! This module provides disk information collection for WatcherAgent, including total and per-disk statistics and temperature data. +//! +//! ## Responsibilities +//! - **Disk Enumeration:** Lists all physical disks and their properties. +//! - **Usage Calculation:** Computes total and per-disk usage, available space, and usage percentage. +//! - **Temperature Monitoring:** Associates disk components with temperature sensors if available. +//! +//! ## Units +//! - All sizes are in **bytes** unless otherwise noted. +//! - Temperatures are in **degrees Celsius (°C)**. +//! +/// Summary of disk statistics for the system. +/// +/// # Fields +/// - `total_size`: Total disk size in bytes (all disks > 100MB) +/// - `total_used`: Total used disk space in bytes +/// - `total_available`: Total available disk space in bytes +/// - `total_usage`: Usage percentage (0.0–100.0) +/// - `detailed_info`: Vector of [`DiskInfoDetailed`] for each disk #[derive(Serialize, Debug)] pub struct DiskInfo { pub total_size: Option, @@ -16,6 +37,12 @@ pub struct DiskInfo { pub detailed_info: Vec, } +/// Collects disk information for all detected disks, including usage and temperature. +/// +/// This function enumerates all disks, calculates usage statistics, and attempts to associate temperature sensors with disk components. +/// +/// # Returns +/// * `Result>` - Disk statistics and details, or error if collection fails. pub async fn get_disk_info() -> Result> { let disks = Disks::new_with_refreshed_list(); let mut detailed_info = Vec::new(); diff --git a/WatcherAgent/src/hardware/gpu.rs b/WatcherAgent/src/hardware/gpu.rs index 9e667e6..8366c1e 100644 --- a/WatcherAgent/src/hardware/gpu.rs +++ b/WatcherAgent/src/hardware/gpu.rs @@ -2,6 +2,29 @@ use anyhow::Result; use nvml_wrapper::Nvml; use std::error::Error; +//! # GPU Hardware Module +//! +//! This module provides GPU information collection for WatcherAgent, including load, temperature, and VRAM statistics. +//! +//! ## Responsibilities +//! - **GPU Detection:** Identifies GPU model and capabilities. +//! - **Metric Collection:** Queries GPU load, temperature, and VRAM usage using NVML (NVIDIA only). +//! - **Error Handling:** Graceful fallback if GPU or NVML is unavailable. +//! +//! ## Units +//! - `current_load`: GPU usage as a percentage (**0.0–100.0**) +//! - `current_temp`: GPU temperature in **degrees Celsius (°C)** +//! - `vram_total`: Total VRAM in **megabytes (MB)** +//! - `vram_used`: Used VRAM in **megabytes (MB)** +//! +/// GPU statistics for the host system. +/// +/// # Fields +/// - `name`: GPU model name (string) +/// - `current_load`: GPU usage as a percentage (**0.0–100.0**) +/// - `current_temp`: GPU temperature in **degrees Celsius (°C)** +/// - `vram_total`: Total VRAM in **megabytes (MB)** +/// - `vram_used`: Used VRAM in **megabytes (MB)** #[derive(Debug)] pub struct GpuInfo { pub name: Option, @@ -11,6 +34,12 @@ pub struct GpuInfo { pub vram_used: Option, } +/// Collects GPU information (load, temperature, VRAM) using NVML. +/// +/// This function attempts to query the first NVIDIA GPU using NVML. If unavailable, it returns a fallback with only the detected GPU name. +/// +/// # Returns +/// * `Result>` - GPU statistics or fallback if unavailable. pub async fn get_gpu_info() -> Result> { match get_gpu_metrics() { Ok((gpu_temp, gpu_load, vram_used, vram_total)) => { @@ -37,6 +66,10 @@ pub async fn get_gpu_info() -> Result> { } } +/// Queries NVML for GPU metrics: temperature, load, VRAM used/total. +/// +/// # Returns +/// * `Result<(f64, f64, f64, f64), Box>` - Tuple of (temperature °C, load %, VRAM used MB, VRAM total MB). pub fn get_gpu_metrics() -> Result<(f64, f64, f64, f64), Box> { let nvml = Nvml::init(); if let Ok(nvml) = nvml { diff --git a/WatcherAgent/src/hardware/memory.rs b/WatcherAgent/src/hardware/memory.rs index 758c57a..1aad068 100644 --- a/WatcherAgent/src/hardware/memory.rs +++ b/WatcherAgent/src/hardware/memory.rs @@ -3,6 +3,24 @@ use std::error::Error; use anyhow::Result; use sysinfo::System; +//! # Memory Hardware Module +//! +//! This module provides memory information collection for WatcherAgent, including total, used, and free RAM. +//! +//! ## Responsibilities +//! - **Memory Detection:** Queries system for total, used, and free RAM. +//! - **Usage Calculation:** Computes memory usage percentage. +//! - **Error Handling:** Graceful fallback if metrics are unavailable. +//! +//! ## Units +//! - `total`, `used`, `free`: RAM in **megabytes (MB)** +//! +/// Memory statistics for the host system. +/// +/// # Fields +/// - `total`: Total RAM in **megabytes (MB)** +/// - `used`: Used RAM in **megabytes (MB)** +/// - `free`: Free RAM in **megabytes (MB)** #[derive(Debug)] pub struct MemoryInfo { pub total: Option, @@ -10,6 +28,10 @@ pub struct MemoryInfo { pub free: Option, } +/// Collects memory information (total, used, free RAM). +/// +/// # Returns +/// * `Result` - Memory statistics or error if unavailable. pub async fn get_memory_info() -> Result { let mut sys = System::new(); sys.refresh_memory(); @@ -21,6 +43,13 @@ pub async fn get_memory_info() -> Result { }) } +/// Computes memory usage percentage from sysinfo::System. +/// +/// # Arguments +/// * `sys` - Mutable reference to sysinfo::System +/// +/// # Returns +/// * `Result>` - Memory usage as percentage. pub fn _get_memory_usage(sys: &mut System) -> Result> { sys.refresh_memory(); Ok((sys.used_memory() as f64 / sys.total_memory() as f64) * 100.0) diff --git a/WatcherAgent/src/hardware/mod.rs b/WatcherAgent/src/hardware/mod.rs index 0b94094..a584d40 100644 --- a/WatcherAgent/src/hardware/mod.rs +++ b/WatcherAgent/src/hardware/mod.rs @@ -14,6 +14,23 @@ pub use memory::get_memory_info; pub use network::get_network_info; pub use network::NetworkMonitor; +//! # Hardware Module +//! +//! This module aggregates all hardware subsystems for WatcherAgent, providing unified collection and access to CPU, GPU, memory, disk, and network statistics. +//! +//! ## Responsibilities +//! - **Subsystem Aggregation:** Combines all hardware modules into a single struct for easy access. +//! - **Unified Collection:** Provides a single async method to collect all hardware metrics at once. +//! +/// Aggregated hardware statistics for the host system. +/// +/// # Fields +/// - `cpu`: CPU statistics (see [`CpuInfo`]) +/// - `gpu`: GPU statistics (see [`GpuInfo`]) +/// - `memory`: Memory statistics (see [`MemoryInfo`]) +/// - `disk`: Disk statistics (see [`DiskInfo`]) +/// - `network`: Network statistics (see [`NetworkInfo`]) +/// - `network_monitor`: Rolling monitor for network bandwidth #[derive(Debug)] pub struct HardwareInfo { pub cpu: cpu::CpuInfo, @@ -25,6 +42,10 @@ pub struct HardwareInfo { } impl HardwareInfo { + /// Collects all hardware statistics asynchronously. + /// + /// # Returns + /// * `Result>` - Aggregated hardware statistics or error if any subsystem fails. pub async fn collect() -> Result> { let mut network_monitor = network::NetworkMonitor::new(); Ok(Self { diff --git a/WatcherAgent/src/hardware/network.rs b/WatcherAgent/src/hardware/network.rs index c5bd603..40b3d2e 100644 --- a/WatcherAgent/src/hardware/network.rs +++ b/WatcherAgent/src/hardware/network.rs @@ -2,6 +2,24 @@ use std::error::Error; use std::result::Result; use std::time::Instant; +//! # Network Hardware Module +//! +//! This module provides network information collection for WatcherAgent, including interface enumeration and bandwidth statistics. +//! +//! ## Responsibilities +//! - **Interface Detection:** Lists all network interfaces. +//! - **Bandwidth Monitoring:** Tracks receive/transmit rates using a rolling monitor. +//! - **Error Handling:** Graceful fallback if metrics are unavailable. +//! +//! ## Units +//! - `rx_rate`, `tx_rate`: Network bandwidth in **bytes per second (B/s)** +//! +/// Network statistics for the host system. +/// +/// # Fields +/// - `interfaces`: List of network interface names (strings) +/// - `rx_rate`: Receive bandwidth in **bytes per second (B/s)** +/// - `tx_rate`: Transmit bandwidth in **bytes per second (B/s)** #[derive(Debug)] pub struct NetworkInfo { pub interfaces: Option>, @@ -9,6 +27,13 @@ pub struct NetworkInfo { pub tx_rate: Option, } + +/// Rolling monitor for network bandwidth statistics. +/// +/// # Fields +/// - `prev_rx`: Previous received bytes +/// - `prev_tx`: Previous transmitted bytes +/// - `last_update`: Timestamp of last update #[derive(Debug)] pub struct NetworkMonitor { prev_rx: u64, @@ -23,6 +48,7 @@ impl Default for NetworkMonitor { } impl NetworkMonitor { + /// Creates a new `NetworkMonitor` for bandwidth tracking. pub fn new() -> Self { Self { prev_rx: 0, @@ -31,6 +57,10 @@ impl NetworkMonitor { } } + /// Updates the network usage statistics and returns current rx/tx rates. + /// + /// # Returns + /// * `Result<(f64, f64), Box>` - Tuple of (rx_rate, tx_rate) in bytes per second. pub fn update_usage(&mut self) -> Result<(f64, f64), Box> { let (current_rx, current_tx) = get_network_bytes()?; let elapsed = self.last_update.elapsed().as_secs_f64(); @@ -55,6 +85,13 @@ impl NetworkMonitor { } } +/// Collects network information (interfaces, rx/tx rates) using a monitor. +/// +/// # Arguments +/// * `monitor` - Mutable reference to a `NetworkMonitor` +/// +/// # Returns +/// * `Result>` - Network statistics or error if unavailable. pub async fn get_network_info(monitor: &mut NetworkMonitor) -> Result> { let (rx_rate, tx_rate) = monitor.update_usage()?; Ok(NetworkInfo { diff --git a/WatcherAgent/src/main.rs b/WatcherAgent/src/main.rs index 499badb..cde9dfd 100644 --- a/WatcherAgent/src/main.rs +++ b/WatcherAgent/src/main.rs @@ -1,20 +1,55 @@ -/// WatcherAgent - A Rust-based system monitoring agent -/// This agent collects hardware metrics and sends them to a backend server. -/// It supports CPU, GPU, RAM, disk, and network metrics. + +//! # WatcherAgent +//! +//! **WatcherAgent** is a cross-platform system monitoring agent written in Rust. +//! +//! ## Overview +//! This agent collects real-time hardware metrics (CPU, GPU, RAM, disk, network) and communicates with a backend server for registration, reporting, and remote control. It is designed for deployment in environments where automated monitoring and remote management of system resources is required. +//! +//! ## Features +//! - **Hardware Metrics:** Collects CPU, GPU, RAM, disk, and network statistics using platform-specific APIs. +//! - **Docker Integration:** Detects and manages its own Docker container, supports image updates and container restarts. +//! - **Server Communication:** Registers with a backend server, sends periodic heartbeats, and reports metrics securely. +//! - **Remote Commands:** Listens for and executes commands from the backend (e.g., update image, restart container, stop agent). +//! +//! ## Modules +//! - [`api`]: Handles HTTP communication with the backend server (registration, heartbeat, metrics, commands). +//! - [`hardware`]: Collects hardware metrics from the host system (CPU, GPU, RAM, disk, network). +//! - [`metrics`]: Orchestrates metric collection and reporting. +//! - [`models`]: Defines data structures for server communication and metrics. +//! - [`docker`]: Integrates with Docker for container management and agent lifecycle. +//! +//! ## Usage +//! Run the agent with the backend server URL as an argument: +//! ```sh +//! watcheragent +//! ``` +//! +//! The agent will register itself, start collecting metrics, and listen for remote commands. + pub mod api; pub mod hardware; pub mod metrics; pub mod models; pub mod docker; - -use std::env; -use std::error::Error; -use std::marker::Send; -use std::marker::Sync; -use std::result::Result; use tokio::task::JoinHandle; use bollard::Docker; + + +/// Awaits a spawned asynchronous task and flattens its nested `Result` type. +/// +/// This utility is used to handle the result of a `tokio::spawn`ed task that itself returns a `Result`, +/// propagating any errors from both the task and its execution. +/// +/// # Type Parameters +/// * `T` - The type returned by the task on success. +/// +/// # Arguments +/// * `handle` - The `JoinHandle` of the spawned task. +/// +/// # Returns +/// * `Result>` - The result of the task, or an error if the task failed or panicked. async fn flatten( handle: JoinHandle>>, ) -> Result> { @@ -25,6 +60,24 @@ async fn flatten( } } +/// Main entry point for the WatcherAgent application. +/// +/// This function performs the following steps: +/// 1. Initializes the Docker client for container management. +/// 2. Detects the current running image version. +/// 3. Parses command-line arguments to obtain the backend server URL. +/// 4. Registers the agent with the backend server and retrieves its server ID and IP address. +/// 5. Spawns background tasks for: +/// - Listening for remote commands from the server +/// - Sending periodic heartbeat signals +/// - Collecting and reporting hardware metrics +/// 6. Waits for all background tasks to complete and logs their results. +/// +/// # Arguments +/// * `server-url` - The URL of the backend server to register and report metrics to (passed as a command-line argument). +/// +/// # Errors +/// Returns an error if registration or any background task fails, or if required arguments are missing. #[tokio::main] async fn main() -> Result<(), Box> { // Initialize Docker client @@ -54,7 +107,7 @@ async fn main() -> Result<(), Box> { let server_url = &args[1]; println!("Server URL: {:?}", server_url); - // Registration + // Registration with backend server let (server_id, ip) = match api::register_with_server(&server_url).await { Ok((id, ip)) => (id, ip), Err(e) => { diff --git a/WatcherAgent/src/metrics.rs b/WatcherAgent/src/metrics.rs index ba3b896..3c3d638 100644 --- a/WatcherAgent/src/metrics.rs +++ b/WatcherAgent/src/metrics.rs @@ -1,3 +1,16 @@ + + +//! # Metrics Module +//! +//! This module orchestrates the collection and reporting of hardware and network metrics for WatcherAgent. +//! +//! ## Responsibilities +//! - **Metric Collection:** Gathers real-time statistics from all hardware subsystems (CPU, GPU, RAM, disk, network). +//! - **Reporting:** Periodically sends metrics to the backend server using the API module. +//! - **Error Handling:** Robust to hardware failures and network errors, with retry logic and logging. +//! +//! ## Usage +//! The [`Collector`] struct is instantiated in the main loop and runs as a background task, continuously collecting and reporting metrics. use std::error::Error; use std::time::Duration; @@ -6,13 +19,31 @@ use crate::hardware::network::NetworkMonitor; use crate::hardware::HardwareInfo; use crate::models::MetricDto; + +/// Main orchestrator for hardware and network metric collection and reporting. +/// +/// The `Collector` struct manages the state required to collect metrics and send them to the backend server. It maintains a network monitor for bandwidth tracking, the agent's server ID, and its IP address. +/// +/// # Fields +/// - `network_monitor`: Tracks network usage rates (rx/tx). +/// - `server_id`: Unique server ID assigned by the backend. +/// - `ip_address`: IP address of the agent. pub struct Collector { network_monitor: NetworkMonitor, server_id: i32, ip_address: String, } + impl Collector { + /// Creates a new `Collector` instance for metric collection and reporting. + /// + /// # Arguments + /// * `server_id` - The server ID assigned by the backend. + /// * `ip_address` - The IP address of the agent. + /// + /// # Returns + /// A new `Collector` ready to collect and report metrics. pub fn new(server_id: i32, ip_address: String) -> Self { Self { network_monitor: NetworkMonitor::new(), @@ -21,6 +52,15 @@ impl Collector { } } + /// Runs the main metrics collection loop, periodically sending metrics to the backend server. + /// + /// This function continuously collects hardware and network metrics, sends them to the backend, and handles errors gracefully. It uses a configurable interval and retries on failures. + /// + /// # Arguments + /// * `base_url` - The base URL of the backend server. + /// + /// # Returns + /// * `Result<(), Box>` - Ok if metrics are sent successfully. pub async fn run(&mut self, base_url: &str) -> Result<(), Box> { loop { println!( @@ -40,6 +80,12 @@ impl Collector { } } + /// Collects hardware and network metrics from all subsystems. + /// + /// This function queries the hardware module for CPU, GPU, RAM, disk, and network statistics, and packages them into a [`MetricDto`] for reporting. + /// + /// # Returns + /// * `Result>` - The collected metrics or an error if hardware info is unavailable. pub async fn collect(&mut self) -> Result> { let hardware = match HardwareInfo::collect().await { Ok(hw) => hw, diff --git a/WatcherAgent/src/models.rs b/WatcherAgent/src/models.rs index 729aaed..ca00f22 100644 --- a/WatcherAgent/src/models.rs +++ b/WatcherAgent/src/models.rs @@ -1,6 +1,27 @@ + + +//! # Models Module +//! +//! This module defines all data structures (DTOs) used for communication between WatcherAgent and the backend server, as well as hardware metrics and Docker container info. +//! +//! ## Responsibilities +//! - **DTOs:** Define payloads for registration, metrics, heartbeat, and server commands. +//! - **Units:** All struct fields are documented with their units for clarity and API compatibility. +//! - **Docker Info:** Structures for representing Docker container state and statistics. +//! +//! ## Usage +//! These types are serialized/deserialized for HTTP communication and used throughout the agent for data exchange. use serde::{Deserialize, Serialize}; -// Data structures matching the C# DTOs +/// Registration data sent to the backend server. +/// +/// ## Units +/// - `id`: Unique server identifier (integer) +/// - `ip_address`: IPv4 or IPv6 address (string) +/// - `cpu_type`: CPU model name (string) +/// - `cpu_cores`: Number of physical CPU cores (integer) +/// - `gpu_type`: GPU model name (string) +/// - `ram_size`: Total RAM size in **megabytes (MB)** #[derive(Serialize, Debug)] pub struct RegistrationDto { #[serde(rename = "id")] @@ -17,6 +38,24 @@ pub struct RegistrationDto { pub ram_size: f64, } +/// Hardware and network metrics data sent to the backend server. +/// +/// ## Units +/// - `server_id`: Unique server identifier (integer) +/// - `ip_address`: IPv4 or IPv6 address (string) +/// - `cpu_load`: CPU usage as a percentage (**0.0–100.0**) +/// - `cpu_temp`: CPU temperature in **degrees Celsius (°C)** +/// - `gpu_load`: GPU usage as a percentage (**0.0–100.0**) +/// - `gpu_temp`: GPU temperature in **degrees Celsius (°C)** +/// - `gpu_vram_size`: Total GPU VRAM in **megabytes (MB)** +/// - `gpu_vram_usage`: Used GPU VRAM in **megabytes (MB)** +/// - `ram_load`: Used RAM in **megabytes (MB)** +/// - `ram_size`: Total RAM in **megabytes (MB)** +/// - `disk_size`: Total disk size in **megabytes (MB)** +/// - `disk_usage`: Used disk space in **megabytes (MB)** +/// - `disk_temp`: Disk temperature in **degrees Celsius (°C)** (if available) +/// - `net_rx`: Network receive rate in **bytes per second (B/s)** +/// - `net_tx`: Network transmit rate in **bytes per second (B/s)** #[derive(Serialize, Debug)] pub struct MetricDto { #[serde(rename = "serverId")] @@ -51,6 +90,13 @@ pub struct MetricDto { pub net_tx: f64, } +/// Detailed disk information for each detected disk. +/// +/// ## Units +/// - `disk_total_space`: Total disk space in **bytes** +/// - `disk_available_space`: Available disk space in **bytes** +/// - `disk_used_space`: Used disk space in **bytes** +/// - `component_disk_temperature`: Disk temperature in **degrees Celsius (°C)** #[derive(Serialize, Debug)] pub struct DiskInfoDetailed { pub disk_name: String, @@ -63,6 +109,11 @@ pub struct DiskInfoDetailed { pub component_disk_temperature: f32, } +/// Response containing server ID and IP address. +/// +/// ## Units +/// - `id`: Unique server identifier (integer) +/// - `ip_address`: IPv4 or IPv6 address (string) #[derive(Deserialize)] pub struct IdResponse { pub id: i32, @@ -70,12 +121,24 @@ pub struct IdResponse { pub ip_address: String, } +/// Heartbeat message data sent to the backend server. +/// +/// ## Units +/// - `ip_address`: IPv4 or IPv6 address (string) #[derive(Serialize)] pub struct HeartbeatDto { #[serde(rename = "IpAddress")] pub ip_address: String, } +/// Hardware summary data for diagnostics and registration. +/// +/// ## Units +/// - `cpu_type`: CPU model name (string) +/// - `cpu_cores`: Number of physical CPU cores (integer) +/// - `gpu_type`: GPU model name (string) +/// - `ram_size`: Total RAM size in **megabytes (MB)** +/// - `ip_address`: IPv4 or IPv6 address (string) #[derive(Serialize, Debug)] pub struct HardwareDto { pub cpu_type: String, @@ -85,6 +148,12 @@ pub struct HardwareDto { pub ip_address: String, } +/// Command message received from the backend server. +/// +/// ## Fields +/// - `message_type`: Type of command (e.g., "update_image", "restart_container", "stop_agent") +/// - `data`: Command payload (arbitrary JSON) +/// - `message_id`: Unique identifier for acknowledgment #[derive(Debug, Deserialize, Clone)] pub struct ServerMessage { // Define your message structure here @@ -93,6 +162,12 @@ pub struct ServerMessage { pub message_id: String, // Add an ID for acknowledgment } +/// Acknowledgment payload sent to the backend server for command messages. +/// +/// ## Fields +/// - `message_id`: Unique identifier of the acknowledged message +/// - `status`: Status string ("success", "error", etc.) +/// - `details`: Additional details or error messages #[derive(Debug, Serialize, Clone)] pub struct Acknowledgment { pub message_id: String, @@ -100,6 +175,16 @@ pub struct Acknowledgment { pub details: String, } +/// Docker container information for agent and managed containers. +/// +/// ## Fields +/// - `ID`: Container ID (first 12 hex digits, integer) +/// - `image`: Docker image name (string) +/// - `Name`: Container name (string) +/// - `Status`: Container status ("running", "stopped", etc.) +/// - `_net_in`: Network receive rate in **bytes per second (B/s)** +/// - `_net_out`: Network transmit rate in **bytes per second (B/s)** +/// - `_cpu_load`: CPU usage as a percentage (**0.0–100.0**) #[derive(Debug, Serialize, Clone)] pub struct DockerContainer { pub ID: u32,