Compare commits

...

9 Commits

Author SHA1 Message Date
e02914516d removed db files
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 5s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m17s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 3m32s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 4m30s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 4s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m39s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 2s
Rust Cross-Platform Build / Create Tag (push) Successful in 6s
2025-10-29 14:25:09 +01:00
bf90d3ceb9 added docker compose file 2025-10-29 14:24:26 +01:00
a8ccb0521a updated models to parse json better
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 4s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m19s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 3m28s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 4m26s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 4s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m31s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 2s
Rust Cross-Platform Build / Create Tag (push) Successful in 5s
2025-10-29 12:11:30 +01:00
c90a276dca added error handling
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 3s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m8s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 3m4s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 3m55s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 4s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m11s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 2s
Rust Cross-Platform Build / Create Tag (push) Successful in 5s
2025-10-28 11:20:12 +01:00
dc4c23f9d9 remoeved mut input attribute in broadcast docker container
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 4s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m5s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 2m43s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 3m26s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 3s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 1m58s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 1s
Rust Cross-Platform Build / Create Tag (push) Successful in 4s
2025-10-27 23:28:23 +01:00
3182d57539 added documentation for broadcasting docker container 2025-10-27 23:25:30 +01:00
8c1ef7f9f6 removed unused imports
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 4s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m1s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 2m38s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 3m28s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 4s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m2s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 2s
Rust Cross-Platform Build / Create Tag (push) Successful in 4s
2025-10-27 23:07:48 +01:00
16020eea50 added error handling in metrics handle 2025-10-27 23:03:49 +01:00
432a798210 updated models 2025-10-27 21:58:35 +01:00
7 changed files with 266 additions and 78 deletions

1
.env Normal file
View File

@@ -0,0 +1 @@
SERVER_URL=http://localhost:5000

View File

@@ -12,16 +12,16 @@
/// These functions are called from the main agent loop and background tasks. All network operations are asynchronous and robust to transient failures. /// These functions are called from the main agent loop and background tasks. All network operations are asynchronous and robust to transient failures.
use std::time::Duration; use std::time::Duration;
use crate::docker::container;
use crate::docker::serverclientcomm::handle_server_message; use crate::docker::serverclientcomm::handle_server_message;
use crate::hardware::HardwareInfo; use crate::hardware::HardwareInfo;
use crate::models::{ use crate::models::{
Acknowledgment, DockerMetricDto, DockerRegistrationDto, HeartbeatDto, IdResponse, MetricDto, Acknowledgment, DockerContainer, DockerMetricDto, DockerRegistrationDto, HeartbeatDto,
RegistrationDto, ServerMessage, IdResponse, MetricDto, RegistrationDto, ServerMessage,
}; };
use anyhow::Result; use anyhow::Result;
use reqwest::{Client, StatusCode}; use reqwest::{Client, StatusCode};
use serde::Serialize;
use std::error::Error; use std::error::Error;
use tokio::time::sleep; use tokio::time::sleep;
@@ -153,10 +153,45 @@ async fn get_server_id_by_ip(
} }
} }
/// Broadcasts Docker container information to the monitoring server for service discovery.
///
/// This function sends the current Docker container configuration to the server
/// to register available containers and enable service monitoring. It will
/// continuously retry until successful, making it suitable for initial
/// registration scenarios.
///
/// # Arguments
///
/// * `base_url` - The base URL of the monitoring server API (e.g., "https://monitoring.example.com")
/// * `server_id` - The ID of the server to associate the containers with
/// * `container_dto` - Mutable reference to Docker container information for broadcast
///
/// # Returns
///
/// * `Ok(())` - When container information is successfully broadcasted to the server
/// * `Err(Box<dyn Error + Send + Sync>)` - If an unrecoverable error occurs (though the function typically retries on transient failures)
///
/// # Behavior
///
/// This function operates in a retry loop with the following characteristics:
///
/// - **Retry Logic**: Attempts broadcast every 10 seconds until successful
/// - **Mutation**: Modifies the `container_dto` to set the `server_id` before sending
/// - **TLS**: Accepts invalid TLS certificates for development environments
/// - **Logging**: Provides detailed console output about broadcast attempts and results
///
/// # Errors
///
/// This function may return an error in the following cases:
///
/// * **HTTP Client Creation**: Failed to create HTTP client with TLS configuration
/// * **Network Issues**: Persistent connection failures to the backend server
/// * **Server Errors**: Backend returns non-success HTTP status codes repeatedly
/// * **JSON Serialization**: Cannot serialize container data (should be rare with proper DTOs)
pub async fn broadcast_docker_containers( pub async fn broadcast_docker_containers(
base_url: &str, base_url: &str,
server_id: u16, server_id: u16,
container_dto: &mut DockerRegistrationDto, container_dto: &DockerRegistrationDto,
) -> Result<(), Box<dyn Error + Send + Sync>> { ) -> Result<(), Box<dyn Error + Send + Sync>> {
// First get local IP // First get local IP
println!("Preparing to broadcast docker containers..."); println!("Preparing to broadcast docker containers...");
@@ -166,8 +201,8 @@ pub async fn broadcast_docker_containers(
.build()?; .build()?;
// Prepare registration data // Prepare registration data
let container_dto = container_dto; let mut broadcast_data = container_dto.clone();
container_dto.server_id = server_id; broadcast_data.server_id = server_id;
// Try to register (will retry on failure) // Try to register (will retry on failure)
loop { loop {

View File

@@ -129,27 +129,84 @@ impl DockerManager {
/// Collects Docker metrics for all containers /// Collects Docker metrics for all containers
pub async fn collect_metrics(&self) -> Result<DockerMetricDto, Box<dyn Error + Send + Sync>> { pub async fn collect_metrics(&self) -> Result<DockerMetricDto, Box<dyn Error + Send + Sync>> {
let containers = self.get_containers().await?; let containers = self.get_containers().await?;
let (cpu_stats, net_stats, mem_stats) = stats::get_container_stats(&self.docker).await?; if let Some(first_container) = containers.first() {
println!("Debug: Testing stats for container {}", first_container.id);
let _ = self.debug_container_stats(&first_container.id).await;
}
// Get stats with proper error handling
let stats_result = stats::get_container_stats(&self.docker).await;
let (cpu_stats, net_stats, mem_stats) = match stats_result {
Ok(stats) => stats,
Err(e) => {
eprintln!("Warning: Failed to get container stats: {}", e);
// Return empty stats instead of failing completely
(Vec::new(), Vec::new(), Vec::new())
}
};
println!(
"Debug: Found {} containers, {} CPU stats, {} network stats, {} memory stats",
containers.len(),
cpu_stats.len(),
net_stats.len(),
mem_stats.len()
);
let container_infos_total: Vec<_> = containers let container_infos_total: Vec<_> = containers
.into_iter() .into_iter()
.map(|container| { .map(|container| {
// Use short ID for matching (first 12 chars)
let container_short_id = if container.id.len() > 12 {
&container.id[..12]
} else {
&container.id
};
let cpu = cpu_stats let cpu = cpu_stats
.iter() .iter()
.find(|c| c.container_id == Some(container.id.clone())) .find(|c| {
c.container_id
.as_ref()
.map(|id| id.starts_with(container_short_id))
.unwrap_or(false)
})
.cloned(); .cloned();
let network = net_stats let network = net_stats
.iter() .iter()
.find(|n| n.container_id == Some(container.id.clone())) .find(|n| {
n.container_id
.as_ref()
.map(|id| id.starts_with(container_short_id))
.unwrap_or(false)
})
.cloned(); .cloned();
let ram = mem_stats let ram = mem_stats
.iter() .iter()
.find(|m| m.container_id == Some(container.id.clone())) .find(|m| {
m.container_id
.as_ref()
.map(|id| id.starts_with(container_short_id))
.unwrap_or(false)
})
.cloned(); .cloned();
// Debug output for this container
if cpu.is_none() || network.is_none() || ram.is_none() {
println!(
"Debug: Container {} - CPU: {:?}, Network: {:?}, RAM: {:?}",
container_short_id,
cpu.is_some(),
network.is_some(),
ram.is_some()
);
}
DockerContainerInfo { DockerContainerInfo {
container: Some(container), container: Some(container),
status: None, // Status can be fetched if needed status: None,
cpu, cpu,
network, network,
ram, ram,
@@ -159,43 +216,57 @@ impl DockerManager {
let container_infos: Vec<DockerCollectMetricDto> = container_infos_total let container_infos: Vec<DockerCollectMetricDto> = container_infos_total
.into_iter() .into_iter()
.map(|info| DockerCollectMetricDto { .filter_map(|info| {
id: Some(info.container.unwrap().id).unwrap_or("".to_string()), let container = match info.container {
cpu: info Some(c) => c,
.cpu None => {
.unwrap() eprintln!("Warning: Container info missing container data, skipping");
.cpu_usage_percent return None;
.map(|load| DockerContainerCpuDto { }
cpu_load: Some(load), };
})
.unwrap_or(DockerContainerCpuDto { cpu_load: None }), // Safely handle CPU data with defaults
ram: info let cpu_dto = if let Some(cpu) = info.cpu {
.ram DockerContainerCpuDto {
.unwrap() cpu_load: cpu.cpu_usage_percent,
.memory_usage_percent }
.map(|load| DockerContainerRamDto { } else {
cpu_load: Some(load), DockerContainerCpuDto { cpu_load: None }
}) };
.unwrap_or(DockerContainerRamDto { cpu_load: None }),
network: DockerContainerNetworkDto { // Safely handle RAM data with defaults
net_in: info let ram_dto = if let Some(ram) = info.ram {
.network DockerContainerRamDto {
.as_ref() ram_load: ram.memory_usage_percent,
.unwrap() }
.rx_bytes } else {
.map(|bytes| bytes as f64) DockerContainerRamDto { ram_load: None }
.or(Some(0.0)), };
net_out: info
.network // Safely handle network data with defaults
.unwrap() let network_dto = if let Some(net) = info.network {
.tx_bytes DockerContainerNetworkDto {
.map(|bytes| bytes as f64) net_in: net.rx_bytes.map(|bytes| bytes as f64),
.or(Some(0.0)), net_out: net.tx_bytes.map(|bytes| bytes as f64),
}, }
} else {
DockerContainerNetworkDto {
net_in: None,
net_out: None,
}
};
Some(DockerCollectMetricDto {
id: container.id,
cpu: cpu_dto,
ram: ram_dto,
network: network_dto,
})
}) })
.collect(); .collect();
let dto = DockerMetricDto { let dto = DockerMetricDto {
server_id: 0, server_id: 0, // This should be set by the caller
containers: serde_json::to_string(&container_infos)?, containers: serde_json::to_string(&container_infos)?,
}; };
@@ -207,13 +278,48 @@ impl DockerManager {
) -> Result<DockerRegistrationDto, Box<dyn Error + Send + Sync>> { ) -> Result<DockerRegistrationDto, Box<dyn Error + Send + Sync>> {
let containers = self.get_containers().await?; let containers = self.get_containers().await?;
let dto = DockerRegistrationDto { let dto = DockerRegistrationDto {
server_id: 0, server_id: 0, // This will be set by the caller
//container_count, containers, // Fallback to empty array
containers: serde_json::to_string(&containers)?,
}; };
Ok(dto) Ok(dto)
} }
/// Debug function to check stats collection for a specific container
pub async fn debug_container_stats(
&self,
container_id: &str,
) -> Result<(), Box<dyn Error + Send + Sync>> {
println!("=== DEBUG STATS FOR CONTAINER {} ===", container_id);
let (cpu_info, net_info, mem_info) =
stats::get_single_container_stats(&self.docker, container_id).await?;
println!("CPU Info: {:?}", cpu_info);
println!("Network Info: {:?}", net_info);
println!("Memory Info: {:?}", mem_info);
// Also try the individual stats functions
println!("--- Individual CPU Stats ---");
match stats::cpu::get_single_container_cpu_stats(&self.docker, container_id).await {
Ok(cpu) => println!("CPU: {:?}", cpu),
Err(e) => println!("CPU Error: {}", e),
}
println!("--- Individual Network Stats ---");
match stats::network::get_single_container_network_stats(&self.docker, container_id).await {
Ok(net) => println!("Network: {:?}", net),
Err(e) => println!("Network Error: {}", e),
}
println!("--- Individual Memory Stats ---");
match stats::ram::get_single_container_memory_stats(&self.docker, container_id).await {
Ok(mem) => println!("Memory: {:?}", mem),
Err(e) => println!("Memory Error: {}", e),
}
Ok(())
}
} }
// Keep these as utility functions if needed, but they should use DockerManager internally // Keep these as utility functions if needed, but they should use DockerManager internally

View File

@@ -31,10 +31,8 @@ pub mod hardware;
pub mod metrics; pub mod metrics;
pub mod models; pub mod models;
use bollard::Docker;
use std::env; use std::env;
use std::error::Error; use std::error::Error;
use std::sync::Arc;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
/// Awaits a spawned asynchronous task and flattens its nested `Result` type. /// Awaits a spawned asynchronous task and flattens its nested `Result` type.
@@ -119,7 +117,7 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
models::DockerRegistrationDto { models::DockerRegistrationDto {
server_id: 0, server_id: 0,
//container_count: 0, --- IGNORE --- //container_count: 0, --- IGNORE ---
containers: "[]".to_string(), containers: Vec::new(),
} }
}; };
let _ = let _ =
@@ -153,7 +151,13 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
let docker_manager = docker_manager.as_ref().cloned().unwrap(); let docker_manager = docker_manager.as_ref().cloned().unwrap();
async move { async move {
let mut collector = metrics::Collector::new(server_id, ip, docker_manager); let mut collector = metrics::Collector::new(server_id, ip, docker_manager);
collector.run(&server_url).await if let Err(e) = collector.run(&server_url).await {
eprintln!("Metrics collection error: {}", e);
// Don't panic, just return the error
Err(e)
} else {
Ok(())
}
} }
}); });

View File

@@ -182,12 +182,10 @@ pub struct Acknowledgment {
/// - `image`: Docker image name (string) /// - `image`: Docker image name (string)
/// - `Name`: Container name (string) /// - `Name`: Container name (string)
/// - `Status`: Container status ("running", "stopped", etc.) /// - `Status`: Container status ("running", "stopped", etc.)
/// - `_net_in`: Network receive rate in **bytes per second (B/s)**
/// - `_net_out`: Network transmit rate in **bytes per second (B/s)**
/// - `_cpu_load`: CPU usage as a percentage (**0.0100.0**)
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
pub struct DockerRegistrationDto { pub struct DockerRegistrationDto {
/// Unique server identifier (integer) /// Unique server identifier (integer)
#[serde(rename = "Server_id")]
pub server_id: u16, pub server_id: u16,
/// Number of currently running containers /// Number of currently running containers
// pub container_count: usize, --- IGNORE --- // pub container_count: usize, --- IGNORE ---
@@ -200,7 +198,8 @@ pub struct DockerRegistrationDto {
/// id: unique container ID (first 12 hex digits) /// id: unique container ID (first 12 hex digits)
/// image: docker image name /// image: docker image name
/// name: container name /// name: container name
pub containers: String, // Vec<DockerContainer>, #[serde(rename = "Containers")]
pub containers: Vec<DockerContainer>, // Vec<DockerContainer>,
} }
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
@@ -238,7 +237,7 @@ pub struct DockerContainerCpuDto {
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
pub struct DockerContainerRamDto { pub struct DockerContainerRamDto {
pub cpu_load: Option<f64>, pub ram_load: Option<f64>,
} }
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
@@ -260,6 +259,8 @@ pub struct DockerContainerInfo {
#[derive(Debug, Serialize, Clone)] #[derive(Debug, Serialize, Clone)]
pub struct DockerContainer { pub struct DockerContainer {
pub id: String, pub id: String,
#[serde(default)]
pub image: Option<String>, pub image: Option<String>,
#[serde(default)]
pub name: Option<String>, pub name: Option<String>,
} }

View File

@@ -0,0 +1,44 @@
networks:
watcher-network:
driver: bridge
services:
watcher:
image: git.triggermeelmo.com/watcher/watcher-server:v0.1.11
container_name: watcher
deploy:
resources:
limits:
memory: 200M
restart: unless-stopped
env_file: .env
ports:
- "5000:5000"
volumes:
- ./watcher-volumes/data:/app/persistence
- ./watcher-volumes/dumps:/app/wwwroot/downloads/sqlite
- ./watcher-volumes/logs:/app/logs
watcher-agent:
image: git.triggermeelmo.com/donpat1to/watcher-agent:v0.1.28
container_name: watcher-agent
restart: always
privileged: true # Grants full hardware access (use with caution)
env_file: .env
pid: "host"
volumes:
# Mount critical system paths for hardware monitoring
- /sys:/sys:ro # CPU/GPU temps, sensors
- /proc:/proc # Process/CPU stats
- /dev:/dev:ro # Disk/GPU device access
- /var/run/docker.sock:/var/run/docker.sock # Docker API access
- /:/root:ro # Access to for df-command
# Application volumes
- ./config:/app/config:ro
- ./logs:/app/logs
network_mode: host # Uses host network (for correct IP/interface detection)
healthcheck:
test: [ "CMD", "/usr/local/bin/WatcherAgent", "healthcheck" ]
interval: 30s
timeout: 3s
retries: 3

View File

@@ -1,23 +1,20 @@
watcher-agent: networks:
image: git.triggermeelmo.com/donpat1to/watcher-agent:development watcher-network:
container_name: watcher-agent driver: bridge
restart: always
privileged: true # Grants full hardware access (use with caution) services:
watcher:
image: git.triggermeelmo.com/watcher/watcher-server:v0.1.11
container_name: watcher
deploy:
resources:
limits:
memory: 200M
restart: unless-stopped
env_file: .env env_file: .env
pid: "host" ports:
- "5000:5000"
volumes: volumes:
# Mount critical system paths for hardware monitoring - ./watcher-volumes/data:/app/persistence
- /sys:/sys:ro # CPU/GPU temps, sensors - ./watcher-volumes/dumps:/app/wwwroot/downloads/sqlite
- /proc:/proc # Process/CPU stats - ./watcher-volumes/logs:/app/logs
- /dev:/dev:ro # Disk/GPU device access
- /var/run/docker.sock:/var/run/docker.sock # Docker API access
- /:/root:ro # Access to for df-command
# Application volumes
- ./config:/app/config:ro
- ./logs:/app/logs
network_mode: host # Uses host network (for correct IP/interface detection)
healthcheck:
test: ["CMD", "/usr/local/bin/WatcherAgent", "healthcheck"]
interval: 30s
timeout: 3s
retries: 3