check if checks are needed or needs is enough

added toolcache
update
2025-10-03 13:47:00 +02:00 · 2025-10-03 13:17:48 +02:00 · 2025-10-03 12:22:12 +02:00 · 2025-10-02 00:38:23 +02:00 · 2025-10-01 22:57:06 +02:00 · 2025-10-01 22:42:35 +02:00
16 changed files with 1107 additions and 162 deletions
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -9,11 +9,8 @@ on:
    branches: [ "development", "main" ]

 env:
-  CARGO_TERM_COLOR: always
-  RUST_BACKTRACE: 1
  REGISTRY: git.triggermeelmo.com
  IMAGE_NAME: donpat1to/watcher-agent
-  TAG: ${{ github.ref == 'refs/heads/main' && 'latest' || github.ref == 'refs/heads/development' && 'development' || github.ref_type == 'tag' && github.ref_name || 'pr' }}

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@@ -23,6 +20,8 @@ jobs:
  detect-project:
    name: Detect Rust Project
    runs-on: ubuntu-latest
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
    outputs:
      project-dir: ${{ steps.detect.outputs.project-dir }}
      project-name: ${{ steps.detect.outputs.project-name }}
@@ -54,26 +53,13 @@ jobs:
            exit 1
          fi

-  setup-rust:
-    name: Setup Rust Toolchain
-    needs: detect-project
-    if: ${{ !failure() && !cancelled() }}
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      
-      - name: Install Rust
-        uses: dtolnay/rust-toolchain@stable
-        with:
-          toolchain: stable
-          targets: x86_64-unknown-linux-gnu, x86_64-pc-windows-gnu
-          components: rustfmt, clippy
-
  test:
    name: Run Tests
-    needs: [detect-project, setup-rust]
+    needs: [detect-project]
    if: ${{ !failure() && !cancelled() }}
    runs-on: ubuntu-latest
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
    steps:
      - uses: actions/checkout@v4
      
@@ -97,9 +83,13 @@ jobs:
  set-tag:
    name: Set Tag Name
    runs-on: ubuntu-latest
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
    outputs:
      tag_name: ${{ steps.set_tag.outputs.tag_name }}
    steps:
+      - uses: actions/checkout@v4
+
      - name: Determine next semantic version tag
        id: set_tag
        run: |
@@ -132,7 +122,7 @@ jobs:

 #  audit:
 #    name: Security Audit
-#    needs: [detect-project, setup-rust]
+#    needs: [detect-project]
 #    if: ${{ !failure() && !cancelled() }}
 #    runs-on: ubuntu-latest
 #    steps:
@@ -152,9 +142,11 @@ jobs:

  build:
    name: Build (${{ matrix.target }})
-    needs: [detect-project, setup-rust, test, audit]
+    needs: [detect-project, test]
    if: ${{ !failure() && !cancelled() }}
    runs-on: ubuntu-latest
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
    strategy:
      matrix:
        include:
@@ -214,6 +206,8 @@ jobs:
      needs.build.result == 'success' &&
      github.event_name != 'pull_request'
    runs-on: ubuntu-latest
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
    environment: production
    steps:
      - uses: actions/checkout@v4
@@ -225,14 +219,14 @@ jobs:
          path: dist/
      
      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3
      
      - name: Login to Docker Registry
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
-          username: ${{ secrets.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
+          username: ${{ secrets.AUTOMATION_USERNAME }}
+          password: ${{ secrets.AUTOMATION_PASSWORD }}

      - name: Build Docker image
        uses: docker/build-push-action@v4
@@ -248,9 +242,13 @@ jobs:

  tag:
    name: Create Tag
-    needs: [build, set-tag]
-    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    needs: [docker-build, build, set-tag]
+    #if: |
+    #  github.event_name == 'push' && 
+    #  needs.docker-build.result == 'success'
    runs-on: ubuntu-latest
+    env:
+      RUNNER_TOOL_CACHE: /toolcache
    steps:
      - uses: actions/checkout@v4
        with:
@@ -271,7 +269,7 @@ jobs:

  summary:
    name: Workflow Summary
-    needs: [test, audit, build, docker-build]
+    needs: [test, build, docker-build]
    if: always()
    runs-on: ubuntu-latest
    steps:
--- a/WatcherAgent/Cargo.toml
+++ b/WatcherAgent/Cargo.toml
@@ -19,9 +19,11 @@ nvml-wrapper = "0.11"
 nvml-wrapper-sys = "0.9.0"
 anyhow = "1.0.98"

-# Docker .env loading
-config = "0.13"
-dotenvy = "0.15"
+regex = "1.11.3"
+
+# Docker API access
+bollard = "0.19"
+futures-util = "0.3"

 [target.'cfg(windows)'.dependencies]
 winapi = { version = "0.3", features = ["winuser", "pdh", "ifmib", "iphlpapi", "winerror" ,"wbemcli", "combaseapi"] }
--- a/WatcherAgent/src/api.rs
+++ b/WatcherAgent/src/api.rs
@@ -1,12 +1,42 @@
+
+
+/// # API Module
+///
+/// This module provides all HTTP communication between WatcherAgent and the backend server.
+///
+/// ## Responsibilities
+/// - **Registration:** Registers the agent with the backend and retrieves its server ID and IP address.
+/// - **Heartbeat:** Periodically sends heartbeat signals to indicate liveness.
+/// - **Metrics Reporting:** Sends collected hardware and network metrics to the backend.
+/// - **Command Listening:** Polls for and executes remote commands from the backend (e.g., update image, restart container).
+///
+/// ## Usage
+/// These functions are called from the main agent loop and background tasks. All network operations are asynchronous and robust to transient failures.
 use std::time::Duration;

 use crate::hardware::HardwareInfo;
-use crate::models::{HeartbeatDto, IdResponse, MetricDto, RegistrationDto};
+use crate::models::{HeartbeatDto, IdResponse, MetricDto, RegistrationDto, ServerMessage, Acknowledgment};
+use crate::docker::serverclientcomm::handle_server_message;
+
 use anyhow::Result;
 use reqwest::{Client, StatusCode};
 use std::error::Error;
 use tokio::time::sleep;

+use bollard::Docker;
+
+/// Registers this agent with the backend server and retrieves its server ID and IP address.
+///
+/// This function collects local hardware information, prepares a registration payload, and sends it to the backend. It will retry registration until successful, handling network errors and server-side failures gracefully.
+///
+/// # Arguments
+/// * `base_url` - The base URL of the backend server (e.g., `https://server.example.com`).
+///
+/// # Returns
+/// * `Result<(i32, String), Box<dyn Error + Send + Sync>>` - Tuple of server ID and registered IP address on success.
+///
+/// # Errors
+/// Returns an error if unable to register after repeated attempts.
 pub async fn register_with_server(
    base_url: &str,
 ) -> Result<(i32, String), Box<dyn Error + Send + Sync>> {
@@ -32,7 +62,7 @@ pub async fn register_with_server(
        cpu_type: hardware.cpu.name.clone().unwrap_or_default(),
        cpu_cores: (hardware.cpu.cores).unwrap_or_default(),
        gpu_type: hardware.gpu.name.clone().unwrap_or_default(),
-        ram_size: hardware.memory.total.unwrap_or_default(),
+        ram_size: hardware.memory.total_size.unwrap_or_default(),
    };

    // Try to register (will retry on failure)
@@ -60,6 +90,16 @@ pub async fn register_with_server(
    }
 }

+/// Looks up the server ID for the given IP address from the backend server.
+///
+/// This function will retry until a valid response is received, handling network errors and server-side failures.
+///
+/// # Arguments
+/// * `base_url` - The base URL of the backend server.
+/// * `ip` - The local IP address to look up.
+///
+/// # Returns
+/// * `Result<(i32, String), Box<dyn Error + Send + Sync>>` - Tuple of server ID and registered IP address.
 async fn get_server_id_by_ip(
    base_url: &str,
    ip: &str,
@@ -111,6 +151,16 @@ async fn get_server_id_by_ip(
    }
 }

+/// Periodically sends heartbeat signals to the backend server to indicate agent liveness.
+///
+/// This function runs in a background task and will retry on network errors.
+///
+/// # Arguments
+/// * `base_url` - The base URL of the backend server.
+/// * `ip` - The IP address of the agent.
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if heartbeats are sent successfully.
 pub async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
    let client = Client::builder()
        .danger_accept_invalid_certs(true)
@@ -134,6 +184,16 @@ pub async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box<dyn Erro
    }
 }

+/// Sends collected hardware and network metrics to the backend server.
+///
+/// This function is called periodically from the metrics collection loop. It logs the result and retries on network errors.
+///
+/// # Arguments
+/// * `base_url` - The base URL of the backend server.
+/// * `metrics` - The metrics data to send (see [`MetricDto`]).
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if metrics are sent successfully.
 pub async fn send_metrics(
    base_url: &str,
    metrics: &MetricDto,
@@ -153,3 +213,108 @@ pub async fn send_metrics(

    Ok(())
 }
+
+/// Polls the backend server for remote commands and executes them.
+///
+/// This function runs in a background task, polling the server for new messages. It acknowledges receipt and execution of each command, and handles errors gracefully.
+///
+/// # Arguments
+/// * `docker` - Reference to a Bollard Docker client.
+/// * `base_url` - The base URL of the backend server.
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if commands are handled successfully.
+pub async fn listening_to_server(docker: &Docker, base_url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let url = format!("{}/api/message", base_url);
+    let client = reqwest::Client::new();
+    
+    loop {
+        // Get message from server
+        let resp = client.get(&url).send().await;
+
+        match resp {
+            Ok(response) => {
+                if response.status().is_success() {
+                    match response.json::<ServerMessage>().await {
+                        Ok(msg) => {
+                            // Acknowledge receipt immediately
+                            if let Err(e) = send_acknowledgment(&client, base_url, &msg.message_id, "received", "Message received successfully").await {
+                                eprintln!("Failed to send receipt acknowledgment: {}", e);
+                            }
+                            
+                            // Handle the message
+                            let result = handle_server_message(docker, msg.clone()).await;
+                            
+                            // Send execution result acknowledgment
+                            let (status, details) = match result {
+                                Ok(_) => ("success", "Message executed successfully".to_string()),
+                                Err(e) => ("error", format!("Execution failed: {}", e)),
+                            };
+                            
+                            if let Err(e) = send_acknowledgment(&client, base_url, &msg.message_id, status, &details).await {
+                                eprintln!("Failed to send execution acknowledgment: {}", e);
+                            }
+                        }
+                        Err(e) => {
+                            eprintln!("Failed to parse message: {}", e);
+                        }
+                    }
+                } else if response.status() == reqwest::StatusCode::NO_CONTENT {
+                    // No new messages, continue polling
+                    println!("No new messages from server");
+                } else {
+                    eprintln!("Server returned error status: {}", response.status());
+                }
+            }
+            Err(e) => {
+                eprintln!("Failed to reach server: {}", e);
+            }
+        }
+
+        // Poll every 5 seconds (or use WebSocket for real-time)
+        sleep(Duration::from_secs(5)).await;
+    }
+}
+
+/// Sends an acknowledgment to the backend server for a received or executed command message.
+///
+/// This function is used internally by [`listening_to_server`] to confirm receipt and execution status of commands.
+///
+/// # Arguments
+/// * `client` - Reference to a reqwest HTTP client.
+/// * `base_url` - The base URL of the backend server.
+/// * `message_id` - The ID of the message being acknowledged.
+/// * `status` - Status string (e.g., "received", "success", "error").
+/// * `details` - Additional details about the acknowledgment.
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if acknowledgment is sent successfully.
+async fn send_acknowledgment(
+    client: &reqwest::Client,
+    base_url: &str,
+    message_id: &str,
+    status: &str,
+    details: &str,
+) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let ack_url = format!("{}/api/acknowledge", base_url);
+    
+    let acknowledgment = Acknowledgment {
+        message_id: message_id.to_string(),
+        status: status.to_string(),
+        details: details.to_string(),
+    };
+    
+    let response = client
+        .post(&ack_url)
+        .json(&acknowledgment)
+        .send()
+        .await?;
+    
+    if response.status().is_success() {
+        println!("Acknowledgment sent successfully for message {}", message_id);
+    } else {
+        eprintln!("Server returned error for acknowledgment: {}", response.status());
+    }
+    
+    Ok(())
+}
--- a/WatcherAgent/src/docker/container.rs
+++ b/WatcherAgent/src/docker/container.rs
@@ -0,0 +1,93 @@
+
+//! Docker container utilities for WatcherAgent
+//!
+//! Provides functions to list and process Docker containers using the Bollard library.
+//!
+use crate::models::DockerContainer;
+
+use bollard::query_parameters::{ListContainersOptions};
+use bollard::Docker;
+
+
+
+
+/// Returns a list of available Docker containers.
+///
+/// # Arguments
+/// * `docker` - Reference to a Bollard Docker client.
+///
+/// # Returns
+/// * `Vec<DockerContainer>` - Vector of Docker container info.
+pub async fn get_available_container(docker: &Docker) -> Vec<DockerContainer> {
+    println!("=== DOCKER CONTAINER LIST ===");
+    
+    let options = Some(ListContainersOptions {
+        all: true,
+        ..Default::default()
+    });
+    
+    let containers_list = match docker.list_containers(options).await {
+        Ok(containers) => {
+            println!("Available containers ({}):", containers.len());
+            containers.into_iter()
+                .filter_map(|container| {
+                    container.id.as_ref()?; // Skip if no ID
+                    
+                    let id = container.id?;
+                    let short_id = if id.len() > 12 { &id[..12] } else { &id };
+                    //let short_id: u32 = short_string_id.trim().parse().unwrap();
+
+                    let name = container.names
+                        .and_then(|names| names.into_iter().next())
+                        .map(|name| name.trim_start_matches('/').to_string())
+                        .unwrap_or_else(|| "unknown".to_string());
+                    
+                    let image = container.image
+                        .as_ref()
+                        .map(|img| img.to_string())
+                        .unwrap_or_else(|| "unknown".to_string());
+                    
+                    let status = container.status
+                        .as_ref()
+                        .map(|s| match s.to_lowercase().as_str() {
+                            s if s.contains("up") || s.contains("running") => "running".to_string(),
+                            s if s.contains("exited") || s.contains("stopped") => "stopped".to_string(),
+                            _ => s.to_string(),
+                        })
+                        .unwrap_or_else(|| "unknown".to_string());
+                    
+                    println!("  - ID: {}, Image: {}, Name: {}", short_id, container.image.unwrap(), name);
+                    
+                    Some(DockerContainer {
+                        ID: short_id.to_string(),
+                        image,
+                        Name: name,
+                        Status: status,
+                        _net_in: 0.0,
+                        _net_out: 0.0,
+                        _cpu_load: 0.0,
+                    })
+                })
+                .collect()
+        }
+        Err(e) => {
+            eprintln!("Failed to list containers: {}", e);
+            Vec::new()
+        }
+    };
+    
+    containers_list
+}
+
+/*
+/// Extracts a Docker container ID from a string line.
+///
+/// # Arguments
+/// * `line` - The input string containing a container ID or related info.
+///
+/// # Returns
+/// * `Option<String>` - The extracted container ID if found.
+pub fn extract_client_container_id(line: &str) -> Option<String> {
+    // ...existing code...
+}
+*/
--- a/WatcherAgent/src/docker/mod.rs
+++ b/WatcherAgent/src/docker/mod.rs
@@ -0,0 +1,80 @@
+
+/// # Docker Module
+///
+/// This module provides Docker integration for WatcherAgent, including container enumeration, statistics, and lifecycle management.
+///
+/// ## Responsibilities
+/// - **Container Management:** Lists, inspects, and manages Docker containers relevant to the agent.
+/// - **Statistics Aggregation:** Collects network and CPU statistics for all managed containers.
+/// - **Lifecycle Operations:** Supports container restart and ID lookup for agent self-management.
+///
+pub mod container;
+pub mod serverclientcomm;
+
+use std::error::Error;
+use crate::models::DockerContainer;
+
+
+/// Aggregated Docker statistics for all managed containers.
+///
+/// # Fields
+/// - `number`: Number of running containers (optional)
+/// - `net_in_total`: Total network receive rate in **bytes per second (B/s)** (optional)
+/// - `net_out_total`: Total network transmit rate in **bytes per second (B/s)** (optional)
+/// - `dockers`: List of [`DockerContainer`] statistics (optional)
+#[derive(Debug, Clone)]
+pub struct DockerInfo {
+    pub number: Option<u16>,
+    pub net_in_total: Option<f64>,
+    pub net_out_total: Option<f64>,
+    pub dockers: Option<Vec<DockerContainer>>,
+}
+
+
+impl DockerInfo {
+    /// Collects Docker statistics for all managed containers.
+    ///
+    /// # Returns
+    /// * `Result<DockerInfo, Box<dyn Error + Send + Sync>>` - Aggregated Docker statistics or error if collection fails.
+    pub async fn collect() -> Result<Self, Box<dyn Error + Send + Sync>> {
+        Ok(Self { number: None, net_in_total: None, net_out_total: None, dockers: None })
+    }
+}
+
+
+impl DockerContainer {
+    /*
+    /// Restarts the specified Docker container by ID.
+    ///
+    /// # Arguments
+    /// * `docker` - Reference to a Bollard Docker client
+    ///
+    /// # Returns
+    /// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if restarted successfully, error otherwise.
+    pub async fn restart_container(docker: &Docker) -> Result<(), Box<dyn Error + Send + Sync>> {
+        // ...existing code...
+    }
+    */
+
+    /// Returns the container ID for a given [`DockerContainer`].
+    ///
+    /// # Arguments
+    /// * `container` - Reference to a [`DockerContainer`]
+    ///
+    /// # Returns
+    /// * `Result<u32, Box<dyn Error + Send + Sync>>` - Container ID as integer.
+    pub async fn get_docker_container_id(container: DockerContainer) -> Result<String, Box<dyn Error + Send + Sync>> {
+        Ok(container.ID)
+    }
+
+    /// Returns the image name for a given [`DockerContainer`].
+    ///
+    /// # Arguments
+    /// * `container` - Reference to a [`DockerContainer`]
+    ///
+    /// # Returns
+    /// * `Result<String, Box<dyn Error + Send + Sync>>` - Image name as string.
+    pub async fn get_docker_container_image(container: DockerContainer) -> Result<String, Box<dyn Error + Send + Sync>> {
+        Ok(container.image)
+    }
+}
--- a/WatcherAgent/src/docker/serverclientcomm.rs
+++ b/WatcherAgent/src/docker/serverclientcomm.rs
@@ -0,0 +1,138 @@
+
+//! Server-client communication utilities for WatcherAgent
+//!
+//! Handles server commands, Docker image updates, and container management using the Bollard library.
+//!
+use crate::models::{DockerContainer, ServerMessage};
+use crate::docker::container::{get_available_container};
+
+use std::error::Error;
+use bollard::Docker;
+use bollard::query_parameters::{CreateImageOptions, RestartContainerOptions};
+use futures_util::StreamExt;
+
+/// Handles a message from the backend server and dispatches the appropriate action.
+///
+/// # Arguments
+/// * `docker` - Reference to a Bollard Docker client.
+/// * `msg` - The server message to handle.
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if handled successfully, error otherwise.
+pub async fn handle_server_message(docker: &Docker, msg: ServerMessage) -> Result<(), Box<dyn Error + Send + Sync>> {
+    let msg = msg.clone();
+    println!("Handling server message: {:?}", msg);
+
+    // Handle different message types
+    match msg.message_type.as_str() {
+        "update_image" => {
+            if let Some(image_name) = msg.data.get("image").and_then(|v| v.as_str()) {
+                println!("Received update command for image: {}", image_name);
+                // Call your update_docker_image function here
+                update_docker_image(docker, image_name).await?;
+                Ok(())
+            } else {
+                Err("Missing image name in update message".into())
+            }
+        }
+        "restart_container" => {
+            println!("Received restart container command");
+            // Call your restart_container function here
+            restart_container(docker).await?;
+            Ok(())
+        }
+        "stop_agent" => {
+            println!("Received stop agent command");
+            // Implement graceful shutdown
+            std::process::exit(0);
+        }
+        _ => {
+            eprintln!("Unknown message type: {}", msg.message_type);
+            Err(format!("Unknown message type: {}", msg.message_type).into())
+        }
+    }
+}
+
+/// Pulls a new Docker image and restarts the current container.
+///
+/// # Arguments
+/// * `docker` - Reference to a Bollard Docker client.
+/// * `image` - The name of the Docker image to pull.
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if updated successfully, error otherwise.
+pub async fn update_docker_image(docker: &Docker, image: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
+    println!("Updating to {}", image);
+
+    // 1. Pull new image
+    let mut stream = docker.create_image(
+        Some(CreateImageOptions {
+            from_image: Some(image.to_string()),
+            ..Default::default()
+        }),
+        None,
+        None,
+    );
+
+    // Use the stream with proper trait bounds
+    while let Some(result) = StreamExt::next(&mut stream).await {
+        match result {
+            Ok(progress) => {
+                if let Some(status) = progress.status {
+                    println!("Pull status: {}", status);
+                }
+            }
+            Err(e) => {
+                eprintln!("Error pulling image: {}", e);
+                break;
+            }
+        }
+    }
+
+    // 2. Restart the current container
+    let _ = restart_container(docker).await;
+
+    Ok(())
+}
+
+/// Finds the Docker container running the agent by image name.
+///
+/// # Arguments
+/// * `docker` - Reference to a Bollard Docker client.
+///
+/// # Returns
+/// * `Result<Option<DockerContainer>, Box<dyn Error + Send + Sync>>` - The agent's container info if found.
+pub async fn get_client_container(docker: &Docker) -> Result<Option<DockerContainer>, Box<dyn Error + Send + Sync>> {
+    let containers = get_available_container(docker).await;
+    let client_image = "watcher-agent";
+    
+    // Find container with the specific image
+    if let Some(container) = containers.iter().find(|c| c.image == client_image) {
+        Ok(Some(container.clone()))
+    } else {
+        Ok(None)
+    }
+}
+
+/// Restarts the agent's own Docker container.
+///
+/// # Arguments
+/// * `docker` - Reference to a Bollard Docker client.
+///
+/// # Returns
+/// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if restarted successfully, error otherwise.
+pub async fn restart_container(docker: &Docker) -> Result<(), Box<dyn Error + Send + Sync>> {
+    if let Ok(Some(container)) = get_client_container(docker).await {
+        let container_id = container.clone().ID;
+        println!("Restarting container {}", container_id);
+        if let Err(e) = docker.restart_container(&container_id.to_string(), Some(RestartContainerOptions { signal: None, t: Some(0) }))
+            .await
+        {
+            eprintln!("Failed to restart container: {}", e);
+        }
+    } else {
+        eprintln!("No container ID found (HOSTNAME not set?)");
+    }
+
+    Ok(())
+}
--- a/WatcherAgent/src/hardware/cpu.rs
+++ b/WatcherAgent/src/hardware/cpu.rs
@@ -2,6 +2,29 @@ use anyhow::Result;
 use std::error::Error;
 use sysinfo::System;

+/// # CPU Hardware Module
+///
+/// This module provides CPU information collection for WatcherAgent, including load, temperature, and system uptime.
+///
+/// ## Responsibilities
+/// - **CPU Detection:** Identifies CPU model and core count.
+/// - **Metric Collection:** Queries CPU load, temperature, and uptime.
+/// - **Error Handling:** Graceful fallback if metrics are unavailable.
+///
+/// ## Units
+/// - `current_load`: CPU usage as a percentage (**0.0–100.0**)
+/// - `current_temp`: CPU temperature in **degrees Celsius (°C)**
+/// - `uptime`: System uptime in **seconds (s)**
+///
+/// CPU statistics for the host system.
+///
+/// # Fields
+/// - `name`: CPU model name (string)
+/// - `cores`: Number of physical CPU cores (integer)
+/// - `current_load`: CPU usage as a percentage (**0.0–100.0**)
+/// - `current_temp`: CPU temperature in **degrees Celsius (°C)**
+/// - `uptime`: System uptime in **seconds (s)**
+/// - `host_name`: Hostname of the system (string)
 #[derive(Debug)]
 pub struct CpuInfo {
    pub name: Option<String>,
@@ -12,6 +35,10 @@ pub struct CpuInfo {
    pub host_name: Option<String>,
 }

+/// Collects CPU information (model, cores, load, temperature, uptime).
+///
+/// # Returns
+/// * `Result<CpuInfo, Box<dyn Error + Send + Sync>>` - CPU statistics or error if unavailable.
 pub async fn get_cpu_info() -> Result<CpuInfo, Box<dyn Error + Send + Sync>> {
    let mut sys = System::new_all();

@@ -33,12 +60,23 @@ pub async fn get_cpu_info() -> Result<CpuInfo, Box<dyn Error + Send + Sync>> {
    })
 }

+/// Queries system for current CPU load (percentage).
+///
+/// # Arguments
+/// * `sys` - Mutable reference to sysinfo::System
+///
+/// # Returns
+/// * `Result<f64, Box<dyn Error + Send + Sync>>` - CPU load as percentage.
 pub async fn get_cpu_load(sys: &mut System) -> Result<f64, Box<dyn Error + Send + Sync>> {
    sys.refresh_cpu_all();
    tokio::task::yield_now().await; // Allow other tasks to run
    Ok(sys.global_cpu_usage() as f64)
 }

+/// Attempts to read CPU temperature from system sensors (Linux only).
+///
+/// # Returns
+/// * `Result<f64, Box<dyn Error + Send + Sync>>` - CPU temperature in degrees Celsius (°C).
 pub async fn get_cpu_temp() -> Result<f64, Box<dyn Error + Send + Sync>> {
    println!("Attempting to get CPU temperature...");

--- a/WatcherAgent/src/hardware/disk.rs
+++ b/WatcherAgent/src/hardware/disk.rs
@@ -1,130 +1,165 @@
-use std::error::Error;
+use crate::models::DiskInfoDetailed;

+use std::error::Error;
 use anyhow::Result;
 use sysinfo::DiskUsage;
-use sysinfo::{Component, Components, Disk, Disks, System};
+use sysinfo::{Component, Components, Disk, Disks};
+use serde::Serialize;

-#[derive(Debug)]
+
+/// # Disk Hardware Module
+///
+/// This module provides disk information collection for WatcherAgent, including total and per-disk statistics and temperature data.
+///
+/// ## Responsibilities
+/// - **Disk Enumeration:** Lists all physical disks and their properties.
+/// - **Usage Calculation:** Computes total and per-disk usage, available space, and usage percentage.
+/// - **Temperature Monitoring:** Associates disk components with temperature sensors if available.
+///
+/// ## Units
+/// - All sizes are in **bytes** unless otherwise noted.
+/// - Temperatures are in **degrees Celsius (°C)**.
+///
+/// Summary of disk statistics for the system.
+///
+/// # Fields
+/// - `total_size`: Total disk size in bytes (all disks > 100MB)
+/// - `total_used`: Total used disk space in bytes
+/// - `total_available`: Total available disk space in bytes
+/// - `total_usage`: Usage percentage (0.0–100.0)
+/// - `detailed_info`: Vector of [`DiskInfoDetailed`] for each disk
+#[derive(Serialize, Debug)]
 pub struct DiskInfo {
-    pub total: Option<f64>,
-    pub used: Option<f64>,
-    pub free: Option<f64>,
+    pub total_size: Option<f64>,
+    pub total_used: Option<f64>,
+    pub total_available: Option<f64>,
+    pub total_usage: Option<f64>,
+    pub detailed_info: Vec<DiskInfoDetailed>,
 }

-pub async fn get_disk_info() -> Result<DiskInfo> {
+/// Collects disk information for all detected disks, including usage and temperature.
+///
+/// This function enumerates all disks, calculates usage statistics, and attempts to associate temperature sensors with disk components.
+///
+/// # Returns
+/// * `Result<DiskInfo, Box<dyn std::error::Error + Send + Sync>>` - Disk statistics and details, or error if collection fails.
+pub async fn get_disk_info() -> Result<DiskInfo, Box<dyn std::error::Error + Send + Sync>> {
    let disks = Disks::new_with_refreshed_list();
-    let _disk_types = [
-        sysinfo::DiskKind::HDD,
-        sysinfo::DiskKind::SSD,
-        sysinfo::DiskKind::Unknown(0),
-    ];
-
-    let (_, _, _, _) = get_disk_utitlization().unwrap();
-
-    let mut total = 0;
-    let mut used = 0;
+    let mut detailed_info = Vec::new();
    
+    // Collect detailed disk information
    for disk in disks.list() {
-        if disk.total_space() > 100 * 1024 * 1024 {
-            // > 100MB
-            total += disk.total_space();
-            used += disk.total_space() - disk.available_space();
+        if disk.kind() == sysinfo::DiskKind::Unknown(0) {
+            continue;
+        }
+        
+        let disk_used = disk.total_space() - disk.available_space();
+        detailed_info.push(DiskInfoDetailed {
+            disk_name: disk.name().to_string_lossy().into_owned(),
+            disk_kind: format!("{:?}", disk.kind()),
+            disk_total_space: disk.total_space() as f64,
+            disk_available_space: disk.available_space() as f64,
+            disk_used_space: disk_used as f64,
+            disk_mount_point: disk.mount_point().to_string_lossy().into_owned(),
+            component_disk_label: String::new(),
+            component_disk_temperature: 0.0,
+        });
+    }
+    
+    // Get component temperatures
+    let components = Components::new_with_refreshed_list();
+    for component in &components {
+        if let Some(temperature) = component.temperature() {
+            // Update detailed info with temperature data if it matches a disk component
+            for disk_info in &mut detailed_info {
+                if component.label().contains(&disk_info.disk_name) {
+                    disk_info.component_disk_label = component.label().to_string();
+                    disk_info.component_disk_temperature = temperature;
                }
            }
+        }
+    }
+    
+    // Calculate totals (only disks > 100MB)
+    let (total_size, total_used, total_available) = calculate_disk_totals(&disks);
+    
+    let (total_size, total_used, total_available, total_usage) = if total_size > 0.0 {
+        (total_size, total_used, total_available, (total_used / total_size) * 100.0)
+    } else {
+        match get_disk_info_fallback() {
+            Ok(fallback_data) => fallback_data,
+            Err(_) => (0.0, 0.0, 0.0, 0.0), // Default values if fallback fails
+        }
+    };
    
    Ok(DiskInfo {
-        total: Some(total as f64),
-        used: Some(used as f64),
-        free: Some((total - used) as f64),
+        total_size: if total_size > 0.0 { Some(total_size) } else { None },
+        total_used: if total_used > 0.0 { Some(total_used) } else { None },
+        total_available: if total_available > 0.0 { Some(total_available) } else { None },
+        total_usage: if total_usage > 0.0 { Some(total_usage) } else { None },
+        detailed_info,
    })
 }

-pub fn get_disk_utitlization() -> Result<(f64, f64, f64, f64), Box<dyn Error>> {
-    let mut sys = System::new();
-    sys.refresh_all();
-    let mut count = 0;
-
+fn calculate_disk_totals(disks: &Disks) -> (f64, f64, f64) {
    let mut total_size = 0u64;
    let mut total_used = 0u64;
    let mut total_available = 0u64;
    
-    let disks = Disks::new_with_refreshed_list();
    for disk in disks.list() {
-        // Only print disks with known kind
-        if disk.kind() == sysinfo::DiskKind::Unknown(0) {
-            continue;
-        }
-        println!(
-            "Disk_Name: {:?}:\n---- Disk_Kind: {},\n---- Total: {},\n---- Available: {},\n---- Used: {}, \n---- Mount_Point: {:?}",
-            disk.name(),
-            disk.kind(),
-            disk.total_space(),
-            disk.available_space(),
-            disk.total_space() - disk.available_space(),
-            disk.mount_point()
-        );
-    }
-    let components = Components::new_with_refreshed_list();
-    for component in &components {
-        if let Some(temperature) = component.temperature() {
-            println!(
-                "Component_Label: {}, Temperature: {}°C",
-                component.label(),
-                temperature
-            );
+        if disk.total_space() > 100 * 1024 * 1024 { // > 100MB
+            total_size += disk.total_space();
+            total_available += disk.available_space();
+            total_used += disk.total_space() - disk.available_space();
        }
    }
    
-    // Berechnungen
-    let total_size = if count > 0 {
-        total_size as f64 // in Bytes
-    } else {
-        // Fallback: Versuche df unter Linux
-        println!("Fallback: Using 'df' command to get disk info.");
-        #[cfg(target_os = "linux")]
-        {
+    (total_size as f64, total_used as f64, total_available as f64)
+}
+
+#[cfg(target_os = "linux")]
+fn get_disk_info_fallback() -> Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>> {
    use std::process::Command;
-            if let Ok(output) = Command::new("df")
+    
+    let output = Command::new("df")
        .arg("-B1")
-                .arg("--output=size,used")
-                .output()
-            {
+        .arg("--output=size,used,avail")
+        .output()?;
+        
    let stdout = String::from_utf8_lossy(&output.stdout);
+    let mut total_size = 0u64;
+    let mut total_used = 0u64;
+    let mut total_available = 0u64;
+    let mut count = 0;
+    
    for line in stdout.lines().skip(1) {
        let parts: Vec<&str> = line.split_whitespace().collect();
-                    if parts.len() == 2 {
-                        if let (Ok(size), Ok(used)) =
-                            (parts[0].parse::<u64>(), parts[1].parse::<u64>())
-                        {
+        if parts.len() >= 3 {
+            if let (Ok(size), Ok(used), Ok(avail)) = (
+                parts[0].parse::<u64>(),
+                parts[1].parse::<u64>(),
+                parts[2].parse::<u64>(),
+            ) {
                total_size += size;
                total_used += used;
+                total_available += avail;
                count += 1;
            }
        }
    }
-                total_size as f64 // in Bytes
-            } else {
-                0.0
-            }
-        }
-        #[cfg(not(target_os = "linux"))]
-        {
-            0.0
-        }
-    };
    
-    let usage = if total_size > 0.0 {
+    let usage = if total_size > 0 {
        (total_used as f64 / total_size as f64) * 100.0
    } else {
        0.0
    };
    
-    Ok((
-        total_size,
-        total_used as f64,
-        total_available as f64,
-        usage as f64,
-    )) // Disk-Temp bleibt 0.0 ohne spezielle Hardware
+    Ok((total_size as f64, total_used as f64, total_available as f64, usage))
+}
+
+#[cfg(not(target_os = "linux"))]
+fn get_disk_info_fallback() -> Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>> {
+    Ok((0.0, 0.0, 0.0, 0.0))
 }

 pub fn _get_disk_temp_for_component(component: &Component) -> Option<f64> {
--- a/WatcherAgent/src/hardware/gpu.rs
+++ b/WatcherAgent/src/hardware/gpu.rs
@@ -2,6 +2,29 @@ use anyhow::Result;
 use nvml_wrapper::Nvml;
 use std::error::Error;

+/// # GPU Hardware Module
+///
+/// This module provides GPU information collection for WatcherAgent, including load, temperature, and VRAM statistics.
+///
+/// ## Responsibilities
+/// - **GPU Detection:** Identifies GPU model and capabilities.
+/// - **Metric Collection:** Queries GPU load, temperature, and VRAM usage using NVML (NVIDIA only).
+/// - **Error Handling:** Graceful fallback if GPU or NVML is unavailable.
+///
+/// ## Units
+/// - `current_load`: GPU usage as a percentage (**0.0–100.0**)
+/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
+/// - `vram_total`: Total VRAM in **bytes**
+/// - `vram_used`: Used VRAM in **bytes**
+///
+/// GPU statistics for the host system.
+///
+/// # Fields
+/// - `name`: GPU model name (string)
+/// - `current_load`: GPU usage as a percentage (**0.0–100.0**)
+/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
+/// - `vram_total`: Total VRAM in **bytes**
+/// - `vram_used`: Used VRAM in **bytes**
 #[derive(Debug)]
 pub struct GpuInfo {
    pub name: Option<String>,
@@ -11,6 +34,12 @@ pub struct GpuInfo {
    pub vram_used: Option<f64>,
 }

+/// Collects GPU information (load, temperature, VRAM) using NVML.
+///
+/// This function attempts to query the first NVIDIA GPU using NVML. If unavailable, it returns a fallback with only the detected GPU name.
+///
+/// # Returns
+/// * `Result<GpuInfo, Box<dyn Error + Send + Sync>>` - GPU statistics or fallback if unavailable.
 pub async fn get_gpu_info() -> Result<GpuInfo, Box<dyn Error + Send + Sync>> {
    match get_gpu_metrics() {
        Ok((gpu_temp, gpu_load, vram_used, vram_total)) => {
@@ -37,6 +66,10 @@ pub async fn get_gpu_info() -> Result<GpuInfo, Box<dyn Error + Send + Sync>> {
    }
 }

+/// Queries NVML for GPU metrics: temperature, load, VRAM used/total.
+///
+/// # Returns
+/// * `Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>>` - Tuple of (temperature °C, load %, VRAM used bytes, VRAM total bytes).
 pub fn get_gpu_metrics() -> Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>> {
    let nvml = Nvml::init();
    if let Ok(nvml) = nvml {
--- a/WatcherAgent/src/hardware/memory.rs
+++ b/WatcherAgent/src/hardware/memory.rs
@@ -3,25 +3,56 @@ use std::error::Error;
 use anyhow::Result;
 use sysinfo::System;

+/// # Memory Hardware Module
+///
+/// This module provides memory information collection for WatcherAgent, including total, used, and free RAM.
+///
+/// ## Responsibilities
+/// - **Memory Detection:** Queries system for total, used, and free RAM.
+/// - **Usage Calculation:** Computes memory usage percentage.
+/// - **Error Handling:** Graceful fallback if metrics are unavailable.
+///
+/// ## Units
+/// - `total`, `used`, `free`: RAM in **bytes**
+///
+/// Memory statistics for the host system.
+///
+/// # Fields
+/// - `total`: Total RAM in **bytes**
+/// - `used`: Used RAM in **bytes**
+/// - `free`: Free RAM in **bytes**
 #[derive(Debug)]
 pub struct MemoryInfo {
-    pub total: Option<f64>,
+    pub total_size: Option<f64>,
    pub used: Option<f64>,
    pub free: Option<f64>,
+    pub current_load: Option<f64>,
 }

-pub async fn get_memory_info() -> Result<MemoryInfo> {
+/// Collects memory information (total, used, free RAM).
+///
+/// # Returns
+/// * `Result<MemoryInfo>` - Memory statistics or error if unavailable.
+pub async fn get_memory_info() -> Result<MemoryInfo, Box<dyn Error + Send + Sync>> {
    let mut sys = System::new();
    sys.refresh_memory();

    Ok(MemoryInfo {
-        total: Some(sys.total_memory() as f64),
+        total_size: Some(sys.total_memory() as f64),
        used: Some(sys.used_memory() as f64),
        free: Some(sys.free_memory() as f64),
+        current_load: Some(get_memory_usage(&mut sys).unwrap() as f64)
    })
 }

-pub fn _get_memory_usage(sys: &mut System) -> Result<f64, Box<dyn Error + Send + Sync>> {
+/// Computes memory usage percentage from sysinfo::System.
+///
+/// # Arguments
+/// * `sys` - Mutable reference to sysinfo::System
+///
+/// # Returns
+/// * `Result<f64, Box<dyn Error + Send + Sync>>` - Memory usage as percentage.
+pub fn get_memory_usage(sys: &mut System) -> Result<f64, Box<dyn Error + Send + Sync>> {
    sys.refresh_memory();
    Ok((sys.used_memory() as f64 / sys.total_memory() as f64) * 100.0)
 }
--- a/WatcherAgent/src/hardware/mod.rs
+++ b/WatcherAgent/src/hardware/mod.rs
@@ -14,6 +14,23 @@ pub use memory::get_memory_info;
 pub use network::get_network_info;
 pub use network::NetworkMonitor;

+/// # Hardware Module
+///
+/// This module aggregates all hardware subsystems for WatcherAgent, providing unified collection and access to CPU, GPU, memory, disk, and network statistics.
+///
+/// ## Responsibilities
+/// - **Subsystem Aggregation:** Combines all hardware modules into a single struct for easy access.
+/// - **Unified Collection:** Provides a single async method to collect all hardware metrics at once.
+///
+/// Aggregated hardware statistics for the host system.
+///
+/// # Fields
+/// - `cpu`: CPU statistics (see [`CpuInfo`])
+/// - `gpu`: GPU statistics (see [`GpuInfo`])
+/// - `memory`: Memory statistics (see [`MemoryInfo`])
+/// - `disk`: Disk statistics (see [`DiskInfo`])
+/// - `network`: Network statistics (see [`NetworkInfo`])
+/// - `network_monitor`: Rolling monitor for network bandwidth
 #[derive(Debug)]
 pub struct HardwareInfo {
    pub cpu: cpu::CpuInfo,
@@ -25,6 +42,10 @@ pub struct HardwareInfo {
 }

 impl HardwareInfo {
+    /// Collects all hardware statistics asynchronously.
+    ///
+    /// # Returns
+    /// * `Result<HardwareInfo, Box<dyn Error + Send + Sync>>` - Aggregated hardware statistics or error if any subsystem fails.
    pub async fn collect() -> Result<Self, Box<dyn Error + Send + Sync>> {
        let mut network_monitor = network::NetworkMonitor::new();
        Ok(Self {
--- a/WatcherAgent/src/hardware/network.rs
+++ b/WatcherAgent/src/hardware/network.rs
@@ -2,6 +2,24 @@ use std::error::Error;
 use std::result::Result;
 use std::time::Instant;

+/// # Network Hardware Module
+///
+/// This module provides network information collection for WatcherAgent, including interface enumeration and bandwidth statistics.
+///
+/// ## Responsibilities
+/// - **Interface Detection:** Lists all network interfaces.
+/// - **Bandwidth Monitoring:** Tracks receive/transmit rates using a rolling monitor.
+/// - **Error Handling:** Graceful fallback if metrics are unavailable.
+///
+/// ## Units
+/// - `rx_rate`, `tx_rate`: Network bandwidth in **bytes per second (B/s)**
+///
+/// Network statistics for the host system.
+///
+/// # Fields
+/// - `interfaces`: List of network interface names (strings)
+/// - `rx_rate`: Receive bandwidth in **bytes per second (B/s)**
+/// - `tx_rate`: Transmit bandwidth in **bytes per second (B/s)**
 #[derive(Debug)]
 pub struct NetworkInfo {
    pub interfaces: Option<Vec<String>>,
@@ -9,6 +27,13 @@ pub struct NetworkInfo {
    pub tx_rate: Option<f64>,
 }

+
+/// Rolling monitor for network bandwidth statistics.
+///
+/// # Fields
+/// - `prev_rx`: Previous received bytes
+/// - `prev_tx`: Previous transmitted bytes
+/// - `last_update`: Timestamp of last update
 #[derive(Debug)]
 pub struct NetworkMonitor {
    prev_rx: u64,
@@ -23,6 +48,7 @@ impl Default for NetworkMonitor {
 }

 impl NetworkMonitor {
+    /// Creates a new `NetworkMonitor` for bandwidth tracking.
    pub fn new() -> Self {
        Self {
            prev_rx: 0,
@@ -31,6 +57,10 @@ impl NetworkMonitor {
        }
    }

+    /// Updates the network usage statistics and returns current rx/tx rates.
+    ///
+    /// # Returns
+    /// * `Result<(f64, f64), Box<dyn Error>>` - Tuple of (rx_rate, tx_rate) in bytes per second.
    pub fn update_usage(&mut self) -> Result<(f64, f64), Box<dyn Error>> {
        let (current_rx, current_tx) = get_network_bytes()?;
        let elapsed = self.last_update.elapsed().as_secs_f64();
@@ -55,6 +85,13 @@ impl NetworkMonitor {
    }
 }

+/// Collects network information (interfaces, rx/tx rates) using a monitor.
+///
+/// # Arguments
+/// * `monitor` - Mutable reference to a `NetworkMonitor`
+///
+/// # Returns
+/// * `Result<NetworkInfo, Box<dyn Error>>` - Network statistics or error if unavailable.
 pub async fn get_network_info(monitor: &mut NetworkMonitor) -> Result<NetworkInfo, Box<dyn Error>> {
    let (rx_rate, tx_rate) = monitor.update_usage()?;
    Ok(NetworkInfo {
--- a/WatcherAgent/src/main.rs
+++ b/WatcherAgent/src/main.rs
@@ -1,18 +1,57 @@
-/// WatcherAgent - A Rust-based system monitoring agent
-/// This agent collects hardware metrics and sends them to a backend server.
-/// It supports CPU, GPU, RAM, disk, and network metrics.
+
+/// # WatcherAgent
+///
+/// **WatcherAgent** is a cross-platform system monitoring agent written in Rust.
+///
+/// ## Overview
+/// This agent collects real-time hardware metrics (CPU, GPU, RAM, disk, network) and communicates with a backend server for registration, reporting, and remote control. It is designed for deployment in environments where automated monitoring and remote management of system resources is required.
+///
+/// ## Features
+/// - **Hardware Metrics:** Collects CPU, GPU, RAM, disk, and network statistics using platform-specific APIs.
+/// - **Docker Integration:** Detects and manages its own Docker container, supports image updates and container restarts.
+/// - **Server Communication:** Registers with a backend server, sends periodic heartbeats, and reports metrics securely.
+/// - **Remote Commands:** Listens for and executes commands from the backend (e.g., update image, restart container, stop agent).
+///
+/// ## Modules
+/// - [`api`]: Handles HTTP communication with the backend server (registration, heartbeat, metrics, commands).
+/// - [`hardware`]: Collects hardware metrics from the host system (CPU, GPU, RAM, disk, network).
+/// - [`metrics`]: Orchestrates metric collection and reporting.
+/// - [`models`]: Defines data structures for server communication and metrics.
+/// - [`docker`]: Integrates with Docker for container management and agent lifecycle.
+///
+/// ## Usage
+/// Run the agent with the backend server URL as an argument:
+/// ```sh
+/// watcheragent <server-url>
+/// ```
+///
+/// The agent will register itself, start collecting metrics, and listen for remote commands.
+
 pub mod api;
 pub mod hardware;
 pub mod metrics;
 pub mod models;
-
+pub mod docker;
+use tokio::task::JoinHandle;
+use bollard::Docker;
 use std::env;
 use std::error::Error;
-use std::marker::Send;
-use std::marker::Sync;
-use std::result::Result;
-use tokio::task::JoinHandle;

+
+
+/// Awaits a spawned asynchronous task and flattens its nested `Result` type.
+///
+/// This utility is used to handle the result of a `tokio::spawn`ed task that itself returns a `Result`,
+/// propagating any errors from both the task and its execution.
+///
+/// # Type Parameters
+/// * `T` - The type returned by the task on success.
+///
+/// # Arguments
+/// * `handle` - The `JoinHandle` of the spawned task.
+///
+/// # Returns
+/// * `Result<T, Box<dyn Error + Send + Sync>>` - The result of the task, or an error if the task failed or panicked.
 async fn flatten<T>(
    handle: JoinHandle<Result<T, Box<dyn Error + Send + Sync>>>,
 ) -> Result<T, Box<dyn Error + Send + Sync>> {
@@ -23,20 +62,54 @@ async fn flatten<T>(
    }
 }

+/// Main entry point for the WatcherAgent application.
+///
+/// This function performs the following steps:
+/// 1. Initializes the Docker client for container management.
+/// 2. Detects the current running image version.
+/// 3. Parses command-line arguments to obtain the backend server URL.
+/// 4. Registers the agent with the backend server and retrieves its server ID and IP address.
+/// 5. Spawns background tasks for:
+///     - Listening for remote commands from the server
+///     - Sending periodic heartbeat signals
+///     - Collecting and reporting hardware metrics
+/// 6. Waits for all background tasks to complete and logs their results.
+///
+/// # Arguments
+/// * `server-url` - The URL of the backend server to register and report metrics to (passed as a command-line argument).
+///
+/// # Errors
+/// Returns an error if registration or any background task fails, or if required arguments are missing.
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
-    let args: Vec<String> = env::args().collect();
+    // Initialize Docker client
+    let docker = Docker::connect_with_local_defaults()
+        .map_err(|e| format!("Failed to connect to Docker: {}", e))?;

+    // Get current image version
+    let client_version = match docker::serverclientcomm::get_client_container(&docker).await {
+        Ok(Some(version)) => version.image,
+        Ok(None) => {
+            eprintln!("Warning: No image version found");
+            "unknown".to_string()
+        }
+        Err(e) => {
+            eprintln!("Warning: Could not get current image version: {}", e);
+            "unknown".to_string()
+        }
+    };
+    println!("Client Version: {}", client_version);
+
+    let args: Vec<String> = env::args().collect();
    // args[0] is the binary name, args[1] is the first actual argument
    if args.len() < 2 {
        eprintln!("Usage: {} <server-url>", args[0]);
        return Err("Missing server URL argument".into());
    }
-
    let server_url = &args[1];
    println!("Server URL: {:?}", server_url);

-    // Registration
+    // Registration with backend server
    let (server_id, ip) = match api::register_with_server(&server_url).await {
        Ok((id, ip)) => (id, ip),
        Err(e) => {
@@ -46,6 +119,13 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
    };

    // Start background tasks
+    // Start server listening for commands
+    let listening_handle = tokio::spawn({
+        let docker = docker.clone();
+        let server_url = server_url.to_string();
+        async move { api::listening_to_server(&docker, &server_url).await }
+    });
+
    // Start heartbeat in background
    let heartbeat_handle = tokio::spawn({
        let ip = ip.clone();
@@ -64,14 +144,16 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
        }
    });

-    // Warte auf beide Tasks und prüfe explizit auf Fehler
-    let (heartbeat_handle, metrics_handle) =
-        tokio::try_join!(flatten(heartbeat_handle), flatten(metrics_handle))?;
+    // Wait for all tasks and check for errors
+    let (listening_result, heartbeat_result, metrics_result) = tokio::try_join!(
+        flatten(listening_handle),
+        flatten(heartbeat_handle),
+        flatten(metrics_handle)
+    )?;

-    let (heartbeat, metrics) = (heartbeat_handle, metrics_handle);
    println!(
-        "All tasks completed successfully: {:?}, {:?}.",
-        heartbeat, metrics
+        "All tasks completed: listening={:?}, heartbeat={:?}, metrics={:?}",
+        listening_result, heartbeat_result, metrics_result
    );

    println!("All tasks completed successfully.");
--- a/WatcherAgent/src/metrics.rs
+++ b/WatcherAgent/src/metrics.rs
@@ -1,3 +1,16 @@
+
+
+/// # Metrics Module
+///
+/// This module orchestrates the collection and reporting of hardware and network metrics for WatcherAgent.
+///
+/// ## Responsibilities
+/// - **Metric Collection:** Gathers real-time statistics from all hardware subsystems (CPU, GPU, RAM, disk, network).
+/// - **Reporting:** Periodically sends metrics to the backend server using the API module.
+/// - **Error Handling:** Robust to hardware failures and network errors, with retry logic and logging.
+///
+/// ## Usage
+/// The [`Collector`] struct is instantiated in the main loop and runs as a background task, continuously collecting and reporting metrics.
 use std::error::Error;
 use std::time::Duration;

@@ -6,13 +19,31 @@ use crate::hardware::network::NetworkMonitor;
 use crate::hardware::HardwareInfo;
 use crate::models::MetricDto;

+
+/// Main orchestrator for hardware and network metric collection and reporting.
+///
+/// The `Collector` struct manages the state required to collect metrics and send them to the backend server. It maintains a network monitor for bandwidth tracking, the agent's server ID, and its IP address.
+///
+/// # Fields
+/// - `network_monitor`: Tracks network usage rates (rx/tx).
+/// - `server_id`: Unique server ID assigned by the backend.
+/// - `ip_address`: IP address of the agent.
 pub struct Collector {
    network_monitor: NetworkMonitor,
    server_id: i32,
    ip_address: String,
 }

+
 impl Collector {
+    /// Creates a new `Collector` instance for metric collection and reporting.
+    ///
+    /// # Arguments
+    /// * `server_id` - The server ID assigned by the backend.
+    /// * `ip_address` - The IP address of the agent.
+    ///
+    /// # Returns
+    /// A new `Collector` ready to collect and report metrics.
    pub fn new(server_id: i32, ip_address: String) -> Self {
        Self {
            network_monitor: NetworkMonitor::new(),
@@ -21,6 +52,15 @@ impl Collector {
        }
    }

+    /// Runs the main metrics collection loop, periodically sending metrics to the backend server.
+    ///
+    /// This function continuously collects hardware and network metrics, sends them to the backend, and handles errors gracefully. It uses a configurable interval and retries on failures.
+    ///
+    /// # Arguments
+    /// * `base_url` - The base URL of the backend server.
+    ///
+    /// # Returns
+    /// * `Result<(), Box<dyn Error + Send + Sync>>` - Ok if metrics are sent successfully.
    pub async fn run(&mut self, base_url: &str) -> Result<(), Box<dyn Error + Send + Sync>> {
        loop {
            println!(
@@ -40,6 +80,12 @@ impl Collector {
        }
    }

+    /// Collects hardware and network metrics from all subsystems.
+    ///
+    /// This function queries the hardware module for CPU, GPU, RAM, disk, and network statistics, and packages them into a [`MetricDto`] for reporting.
+    ///
+    /// # Returns
+    /// * `Result<MetricDto, Box<dyn Error + Send + Sync>>` - The collected metrics or an error if hardware info is unavailable.
    pub async fn collect(&mut self) -> Result<MetricDto, Box<dyn Error + Send + Sync>> {
        let hardware = match HardwareInfo::collect().await {
            Ok(hw) => hw,
@@ -59,11 +105,11 @@ impl Collector {
            gpu_load: hardware.gpu.current_load.unwrap_or_default(),
            gpu_temp: hardware.gpu.current_temp.unwrap_or_default(),
            gpu_vram_size: hardware.gpu.vram_total.unwrap_or_default(),
-            gpu_vram_usage: hardware.gpu.vram_used.unwrap_or_default(),
-            ram_load: hardware.memory.used.unwrap_or_default(),
-            ram_size: hardware.memory.total.unwrap_or_default(),
-            disk_size: hardware.disk.total.unwrap_or_default(),
-            disk_usage: hardware.disk.used.unwrap_or_default(),
+            gpu_vram_load: hardware.gpu.current_load.unwrap_or_default(),
+            ram_load: hardware.memory.current_load.unwrap_or_default(),
+            ram_size: hardware.memory.total_size.unwrap_or_default(),
+            disk_size: hardware.disk.total_size.unwrap_or_default(),
+            disk_usage: hardware.disk.total_usage.unwrap_or_default(),
            disk_temp: 0.0, // not supported
            net_rx: hardware.network.rx_rate.unwrap_or_default(),
            net_tx: hardware.network.tx_rate.unwrap_or_default(),
--- a/WatcherAgent/src/models.rs
+++ b/WatcherAgent/src/models.rs
@@ -1,6 +1,27 @@
+
+
+/// # Models Module
+///
+/// This module defines all data structures (DTOs) used for communication between WatcherAgent and the backend server, as well as hardware metrics and Docker container info.
+///
+/// ## Responsibilities
+/// - **DTOs:** Define payloads for registration, metrics, heartbeat, and server commands.
+/// - **Units:** All struct fields are documented with their units for clarity and API compatibility.
+/// - **Docker Info:** Structures for representing Docker container state and statistics.
+///
+/// ## Usage
+/// These types are serialized/deserialized for HTTP communication and used throughout the agent for data exchange.
 use serde::{Deserialize, Serialize};

-// Data structures matching the C# DTOs
+/// Registration data sent to the backend server.
+///
+/// ## Units
+/// - `id`: Unique server identifier (integer)
+/// - `ip_address`: IPv4 or IPv6 address (string)
+/// - `cpu_type`: CPU model name (string)
+/// - `cpu_cores`: Number of physical CPU cores (integer)
+/// - `gpu_type`: GPU model name (string)
+/// - `ram_size`: Total RAM size in **megabytes (MB)**
 #[derive(Serialize, Debug)]
 pub struct RegistrationDto {
    #[serde(rename = "id")]
@@ -17,6 +38,24 @@ pub struct RegistrationDto {
    pub ram_size: f64,
 }

+/// Hardware and network metrics data sent to the backend server.
+///
+/// ## Units
+/// - `server_id`: Unique server identifier (integer)
+/// - `ip_address`: IPv4 or IPv6 address (string)
+/// - `cpu_load`: CPU usage as a percentage (**0.0–100.0**)
+/// - `cpu_temp`: CPU temperature in **degrees Celsius (°C)**
+/// - `gpu_load`: GPU usage as a percentage (**0.0–100.0**)
+/// - `gpu_temp`: GPU temperature in **degrees Celsius (°C)**
+/// - `gpu_vram_size`: Total GPU VRAM in **bytes**
+/// - `gpu_vram_load`: GPU Usage of VRAM as a percentage (**0.0–100.0**)
+/// - `ram_load`: RAM usage as a percentage (**0.0–100.0**)
+/// - `ram_size`: Total RAM in **bytes**
+/// - `disk_size`: Total disk size in **bytes**
+/// - `disk_usage`: Used disk space in **bytes**
+/// - `disk_temp`: Disk temperature in **degrees Celsius (°C)** (if available)
+/// - `net_rx`: Network receive rate in **bytes per second (B/s)**
+/// - `net_tx`: Network transmit rate in **bytes per second (B/s)**
 #[derive(Serialize, Debug)]
 pub struct MetricDto {
    #[serde(rename = "serverId")]
@@ -33,8 +72,8 @@ pub struct MetricDto {
    pub gpu_temp: f64,
    #[serde(rename = "gpu_Vram_Size")]
    pub gpu_vram_size: f64,
-    #[serde(rename = "gpu_Vram_Usage")]
-    pub gpu_vram_usage: f64,
+    #[serde(rename = "gpu_Vram_Load")]
+    pub gpu_vram_load: f64,
    #[serde(rename = "ram_Load")]
    pub ram_load: f64,
    #[serde(rename = "ram_Size")]
@@ -51,6 +90,30 @@ pub struct MetricDto {
    pub net_tx: f64,
 }

+/// Detailed disk information for each detected disk.
+///
+/// ## Units
+/// - `disk_total_space`: Total disk space in **bytes**
+/// - `disk_available_space`: Available disk space in **bytes**
+/// - `disk_used_space`: Used disk space in **bytes**
+/// - `component_disk_temperature`: Disk temperature in **degrees Celsius (°C)**
+#[derive(Serialize, Debug)]
+pub struct DiskInfoDetailed {
+    pub disk_name: String,
+    pub disk_kind: String,
+    pub disk_total_space: f64,
+    pub disk_available_space: f64,
+    pub disk_used_space: f64,
+    pub disk_mount_point: String,
+    pub component_disk_label: String,
+    pub component_disk_temperature: f32,
+}
+
+/// Response containing server ID and IP address.
+///
+/// ## Units
+/// - `id`: Unique server identifier (integer)
+/// - `ip_address`: IPv4 or IPv6 address (string)
 #[derive(Deserialize)]
 pub struct IdResponse {
    pub id: i32,
@@ -58,12 +121,24 @@ pub struct IdResponse {
    pub ip_address: String,
 }

+/// Heartbeat message data sent to the backend server.
+///
+/// ## Units
+/// - `ip_address`: IPv4 or IPv6 address (string)
 #[derive(Serialize)]
 pub struct HeartbeatDto {
    #[serde(rename = "IpAddress")]
    pub ip_address: String,
 }

+/// Hardware summary data for diagnostics and registration.
+///
+/// ## Units
+/// - `cpu_type`: CPU model name (string)
+/// - `cpu_cores`: Number of physical CPU cores (integer)
+/// - `gpu_type`: GPU model name (string)
+/// - `ram_size`: Total RAM size in **megabytes (MB)**
+/// - `ip_address`: IPv4 or IPv6 address (string)
 #[derive(Serialize, Debug)]
 pub struct HardwareDto {
    pub cpu_type: String,
@@ -72,3 +147,51 @@ pub struct HardwareDto {
    pub ram_size: f64,
    pub ip_address: String,
 }
+
+/// Command message received from the backend server.
+///
+/// ## Fields
+/// - `message_type`: Type of command (e.g., "update_image", "restart_container", "stop_agent")
+/// - `data`: Command payload (arbitrary JSON)
+/// - `message_id`: Unique identifier for acknowledgment
+#[derive(Debug, Deserialize, Clone)]
+pub struct ServerMessage {
+    // Define your message structure here
+    pub message_type: String,
+    pub data: serde_json::Value,
+    pub message_id: String, // Add an ID for acknowledgment
+}
+
+/// Acknowledgment payload sent to the backend server for command messages.
+///
+/// ## Fields
+/// - `message_id`: Unique identifier of the acknowledged message
+/// - `status`: Status string ("success", "error", etc.)
+/// - `details`: Additional details or error messages
+#[derive(Debug, Serialize, Clone)]
+pub struct Acknowledgment {
+    pub message_id: String,
+    pub status: String, // "success" or "error"
+    pub details: String,
+}
+
+/// Docker container information for agent and managed containers.
+///
+/// ## Fields
+/// - `ID`: Container ID (first 12 hex digits, integer)
+/// - `image`: Docker image name (string)
+/// - `Name`: Container name (string)
+/// - `Status`: Container status ("running", "stopped", etc.)
+/// - `_net_in`: Network receive rate in **bytes per second (B/s)**
+/// - `_net_out`: Network transmit rate in **bytes per second (B/s)**
+/// - `_cpu_load`: CPU usage as a percentage (**0.0–100.0**)
+#[derive(Debug, Serialize, Clone)]
+pub struct DockerContainer {
+    pub ID: String,
+    pub image: String,
+    pub Name: String,
+    pub Status: String, // "running";"stopped";others
+    pub _net_in: f64,
+    pub _net_out: f64,
+    pub _cpu_load: f64,
+}
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -0,0 +1,23 @@
+watcher-agent:
+    image: git.triggermeelmo.com/donpat1to/watcher-agent:development
+    container_name: watcher-agent
+    restart: always
+    privileged: true  # Grants full hardware access (use with caution)
+    env_file: .env
+    pid: "host"
+    volumes:
+      # Mount critical system paths for hardware monitoring
+      - /sys:/sys:ro                                # CPU/GPU temps, sensors
+      - /proc:/proc                                 # Process/CPU stats
+      - /dev:/dev:ro                                # Disk/GPU device access
+      - /var/run/docker.sock:/var/run/docker.sock   # Docker API access
+      - /:/root:ro                                  # Access to for df-command
+      # Application volumes
+      - ./config:/app/config:ro
+      - ./logs:/app/logs
+    network_mode: host  # Uses host network (for correct IP/interface detection)
+    healthcheck:
+      test: ["CMD", "/usr/local/bin/WatcherAgent", "healthcheck"]
+      interval: 30s
+      timeout: 3s
+      retries: 3