Compare commits
4 Commits
v0.1.27
...
e02914516d
| Author | SHA1 | Date | |
|---|---|---|---|
| e02914516d | |||
| bf90d3ceb9 | |||
| a8ccb0521a | |||
| c90a276dca |
@@ -15,12 +15,13 @@ use std::time::Duration;
|
|||||||
use crate::docker::serverclientcomm::handle_server_message;
|
use crate::docker::serverclientcomm::handle_server_message;
|
||||||
use crate::hardware::HardwareInfo;
|
use crate::hardware::HardwareInfo;
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
Acknowledgment, DockerMetricDto, DockerRegistrationDto, HeartbeatDto, IdResponse, MetricDto,
|
Acknowledgment, DockerContainer, DockerMetricDto, DockerRegistrationDto, HeartbeatDto,
|
||||||
RegistrationDto, ServerMessage,
|
IdResponse, MetricDto, RegistrationDto, ServerMessage,
|
||||||
};
|
};
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use reqwest::{Client, StatusCode};
|
use reqwest::{Client, StatusCode};
|
||||||
|
use serde::Serialize;
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use tokio::time::sleep;
|
use tokio::time::sleep;
|
||||||
|
|
||||||
|
|||||||
@@ -129,27 +129,84 @@ impl DockerManager {
|
|||||||
/// Collects Docker metrics for all containers
|
/// Collects Docker metrics for all containers
|
||||||
pub async fn collect_metrics(&self) -> Result<DockerMetricDto, Box<dyn Error + Send + Sync>> {
|
pub async fn collect_metrics(&self) -> Result<DockerMetricDto, Box<dyn Error + Send + Sync>> {
|
||||||
let containers = self.get_containers().await?;
|
let containers = self.get_containers().await?;
|
||||||
let (cpu_stats, net_stats, mem_stats) = stats::get_container_stats(&self.docker).await?;
|
if let Some(first_container) = containers.first() {
|
||||||
|
println!("Debug: Testing stats for container {}", first_container.id);
|
||||||
|
let _ = self.debug_container_stats(&first_container.id).await;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get stats with proper error handling
|
||||||
|
let stats_result = stats::get_container_stats(&self.docker).await;
|
||||||
|
let (cpu_stats, net_stats, mem_stats) = match stats_result {
|
||||||
|
Ok(stats) => stats,
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Warning: Failed to get container stats: {}", e);
|
||||||
|
// Return empty stats instead of failing completely
|
||||||
|
(Vec::new(), Vec::new(), Vec::new())
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Debug: Found {} containers, {} CPU stats, {} network stats, {} memory stats",
|
||||||
|
containers.len(),
|
||||||
|
cpu_stats.len(),
|
||||||
|
net_stats.len(),
|
||||||
|
mem_stats.len()
|
||||||
|
);
|
||||||
|
|
||||||
let container_infos_total: Vec<_> = containers
|
let container_infos_total: Vec<_> = containers
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|container| {
|
.map(|container| {
|
||||||
|
// Use short ID for matching (first 12 chars)
|
||||||
|
let container_short_id = if container.id.len() > 12 {
|
||||||
|
&container.id[..12]
|
||||||
|
} else {
|
||||||
|
&container.id
|
||||||
|
};
|
||||||
|
|
||||||
let cpu = cpu_stats
|
let cpu = cpu_stats
|
||||||
.iter()
|
.iter()
|
||||||
.find(|c| c.container_id == Some(container.id.clone()))
|
.find(|c| {
|
||||||
|
c.container_id
|
||||||
|
.as_ref()
|
||||||
|
.map(|id| id.starts_with(container_short_id))
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
.cloned();
|
.cloned();
|
||||||
|
|
||||||
let network = net_stats
|
let network = net_stats
|
||||||
.iter()
|
.iter()
|
||||||
.find(|n| n.container_id == Some(container.id.clone()))
|
.find(|n| {
|
||||||
|
n.container_id
|
||||||
|
.as_ref()
|
||||||
|
.map(|id| id.starts_with(container_short_id))
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
.cloned();
|
.cloned();
|
||||||
|
|
||||||
let ram = mem_stats
|
let ram = mem_stats
|
||||||
.iter()
|
.iter()
|
||||||
.find(|m| m.container_id == Some(container.id.clone()))
|
.find(|m| {
|
||||||
|
m.container_id
|
||||||
|
.as_ref()
|
||||||
|
.map(|id| id.starts_with(container_short_id))
|
||||||
|
.unwrap_or(false)
|
||||||
|
})
|
||||||
.cloned();
|
.cloned();
|
||||||
|
|
||||||
|
// Debug output for this container
|
||||||
|
if cpu.is_none() || network.is_none() || ram.is_none() {
|
||||||
|
println!(
|
||||||
|
"Debug: Container {} - CPU: {:?}, Network: {:?}, RAM: {:?}",
|
||||||
|
container_short_id,
|
||||||
|
cpu.is_some(),
|
||||||
|
network.is_some(),
|
||||||
|
ram.is_some()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
DockerContainerInfo {
|
DockerContainerInfo {
|
||||||
container: Some(container),
|
container: Some(container),
|
||||||
status: None, // Status can be fetched if needed
|
status: None,
|
||||||
cpu,
|
cpu,
|
||||||
network,
|
network,
|
||||||
ram,
|
ram,
|
||||||
@@ -160,7 +217,6 @@ impl DockerManager {
|
|||||||
let container_infos: Vec<DockerCollectMetricDto> = container_infos_total
|
let container_infos: Vec<DockerCollectMetricDto> = container_infos_total
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|info| {
|
.filter_map(|info| {
|
||||||
// Safely handle container extraction
|
|
||||||
let container = match info.container {
|
let container = match info.container {
|
||||||
Some(c) => c,
|
Some(c) => c,
|
||||||
None => {
|
None => {
|
||||||
@@ -190,13 +246,13 @@ impl DockerManager {
|
|||||||
// Safely handle network data with defaults
|
// Safely handle network data with defaults
|
||||||
let network_dto = if let Some(net) = info.network {
|
let network_dto = if let Some(net) = info.network {
|
||||||
DockerContainerNetworkDto {
|
DockerContainerNetworkDto {
|
||||||
net_in: net.rx_bytes.map(|bytes| bytes as f64).or(Some(0.0)),
|
net_in: net.rx_bytes.map(|bytes| bytes as f64),
|
||||||
net_out: net.tx_bytes.map(|bytes| bytes as f64).or(Some(0.0)),
|
net_out: net.tx_bytes.map(|bytes| bytes as f64),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
DockerContainerNetworkDto {
|
DockerContainerNetworkDto {
|
||||||
net_in: Some(0.0),
|
net_in: None,
|
||||||
net_out: Some(0.0),
|
net_out: None,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -223,12 +279,47 @@ impl DockerManager {
|
|||||||
let containers = self.get_containers().await?;
|
let containers = self.get_containers().await?;
|
||||||
let dto = DockerRegistrationDto {
|
let dto = DockerRegistrationDto {
|
||||||
server_id: 0, // This will be set by the caller
|
server_id: 0, // This will be set by the caller
|
||||||
containers: serde_json::to_string(&containers)
|
containers, // Fallback to empty array
|
||||||
.unwrap_or_else(|_| "[]".to_string()), // Fallback to empty array
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(dto)
|
Ok(dto)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Debug function to check stats collection for a specific container
|
||||||
|
pub async fn debug_container_stats(
|
||||||
|
&self,
|
||||||
|
container_id: &str,
|
||||||
|
) -> Result<(), Box<dyn Error + Send + Sync>> {
|
||||||
|
println!("=== DEBUG STATS FOR CONTAINER {} ===", container_id);
|
||||||
|
|
||||||
|
let (cpu_info, net_info, mem_info) =
|
||||||
|
stats::get_single_container_stats(&self.docker, container_id).await?;
|
||||||
|
|
||||||
|
println!("CPU Info: {:?}", cpu_info);
|
||||||
|
println!("Network Info: {:?}", net_info);
|
||||||
|
println!("Memory Info: {:?}", mem_info);
|
||||||
|
|
||||||
|
// Also try the individual stats functions
|
||||||
|
println!("--- Individual CPU Stats ---");
|
||||||
|
match stats::cpu::get_single_container_cpu_stats(&self.docker, container_id).await {
|
||||||
|
Ok(cpu) => println!("CPU: {:?}", cpu),
|
||||||
|
Err(e) => println!("CPU Error: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("--- Individual Network Stats ---");
|
||||||
|
match stats::network::get_single_container_network_stats(&self.docker, container_id).await {
|
||||||
|
Ok(net) => println!("Network: {:?}", net),
|
||||||
|
Err(e) => println!("Network Error: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("--- Individual Memory Stats ---");
|
||||||
|
match stats::ram::get_single_container_memory_stats(&self.docker, container_id).await {
|
||||||
|
Ok(mem) => println!("Memory: {:?}", mem),
|
||||||
|
Err(e) => println!("Memory Error: {}", e),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Keep these as utility functions if needed, but they should use DockerManager internally
|
// Keep these as utility functions if needed, but they should use DockerManager internally
|
||||||
|
|||||||
@@ -117,7 +117,7 @@ async fn main() -> Result<(), Box<dyn Error + Send + Sync>> {
|
|||||||
models::DockerRegistrationDto {
|
models::DockerRegistrationDto {
|
||||||
server_id: 0,
|
server_id: 0,
|
||||||
//container_count: 0, --- IGNORE ---
|
//container_count: 0, --- IGNORE ---
|
||||||
containers: "[]".to_string(),
|
containers: Vec::new(),
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let _ =
|
let _ =
|
||||||
|
|||||||
@@ -185,6 +185,7 @@ pub struct Acknowledgment {
|
|||||||
#[derive(Debug, Serialize, Clone)]
|
#[derive(Debug, Serialize, Clone)]
|
||||||
pub struct DockerRegistrationDto {
|
pub struct DockerRegistrationDto {
|
||||||
/// Unique server identifier (integer)
|
/// Unique server identifier (integer)
|
||||||
|
#[serde(rename = "Server_id")]
|
||||||
pub server_id: u16,
|
pub server_id: u16,
|
||||||
/// Number of currently running containers
|
/// Number of currently running containers
|
||||||
// pub container_count: usize, --- IGNORE ---
|
// pub container_count: usize, --- IGNORE ---
|
||||||
@@ -197,7 +198,8 @@ pub struct DockerRegistrationDto {
|
|||||||
/// id: unique container ID (first 12 hex digits)
|
/// id: unique container ID (first 12 hex digits)
|
||||||
/// image: docker image name
|
/// image: docker image name
|
||||||
/// name: container name
|
/// name: container name
|
||||||
pub containers: String, // Vec<DockerContainer>,
|
#[serde(rename = "Containers")]
|
||||||
|
pub containers: Vec<DockerContainer>, // Vec<DockerContainer>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize, Clone)]
|
#[derive(Debug, Serialize, Clone)]
|
||||||
@@ -257,6 +259,8 @@ pub struct DockerContainerInfo {
|
|||||||
#[derive(Debug, Serialize, Clone)]
|
#[derive(Debug, Serialize, Clone)]
|
||||||
pub struct DockerContainer {
|
pub struct DockerContainer {
|
||||||
pub id: String,
|
pub id: String,
|
||||||
|
#[serde(default)]
|
||||||
pub image: Option<String>,
|
pub image: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
pub name: Option<String>,
|
pub name: Option<String>,
|
||||||
}
|
}
|
||||||
|
|||||||
44
docker-compose.example.yaml
Normal file
44
docker-compose.example.yaml
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
networks:
|
||||||
|
watcher-network:
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
services:
|
||||||
|
watcher:
|
||||||
|
image: git.triggermeelmo.com/watcher/watcher-server:v0.1.11
|
||||||
|
container_name: watcher
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 200M
|
||||||
|
restart: unless-stopped
|
||||||
|
env_file: .env
|
||||||
|
ports:
|
||||||
|
- "5000:5000"
|
||||||
|
volumes:
|
||||||
|
- ./watcher-volumes/data:/app/persistence
|
||||||
|
- ./watcher-volumes/dumps:/app/wwwroot/downloads/sqlite
|
||||||
|
- ./watcher-volumes/logs:/app/logs
|
||||||
|
|
||||||
|
watcher-agent:
|
||||||
|
image: git.triggermeelmo.com/donpat1to/watcher-agent:v0.1.28
|
||||||
|
container_name: watcher-agent
|
||||||
|
restart: always
|
||||||
|
privileged: true # Grants full hardware access (use with caution)
|
||||||
|
env_file: .env
|
||||||
|
pid: "host"
|
||||||
|
volumes:
|
||||||
|
# Mount critical system paths for hardware monitoring
|
||||||
|
- /sys:/sys:ro # CPU/GPU temps, sensors
|
||||||
|
- /proc:/proc # Process/CPU stats
|
||||||
|
- /dev:/dev:ro # Disk/GPU device access
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock # Docker API access
|
||||||
|
- /:/root:ro # Access to for df-command
|
||||||
|
# Application volumes
|
||||||
|
- ./config:/app/config:ro
|
||||||
|
- ./logs:/app/logs
|
||||||
|
network_mode: host # Uses host network (for correct IP/interface detection)
|
||||||
|
healthcheck:
|
||||||
|
test: [ "CMD", "/usr/local/bin/WatcherAgent", "healthcheck" ]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 3
|
||||||
@@ -1,23 +1,20 @@
|
|||||||
watcher-agent:
|
networks:
|
||||||
image: git.triggermeelmo.com/donpat1to/watcher-agent:development
|
watcher-network:
|
||||||
container_name: watcher-agent
|
driver: bridge
|
||||||
restart: always
|
|
||||||
privileged: true # Grants full hardware access (use with caution)
|
services:
|
||||||
|
watcher:
|
||||||
|
image: git.triggermeelmo.com/watcher/watcher-server:v0.1.11
|
||||||
|
container_name: watcher
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 200M
|
||||||
|
restart: unless-stopped
|
||||||
env_file: .env
|
env_file: .env
|
||||||
pid: "host"
|
ports:
|
||||||
|
- "5000:5000"
|
||||||
volumes:
|
volumes:
|
||||||
# Mount critical system paths for hardware monitoring
|
- ./watcher-volumes/data:/app/persistence
|
||||||
- /sys:/sys:ro # CPU/GPU temps, sensors
|
- ./watcher-volumes/dumps:/app/wwwroot/downloads/sqlite
|
||||||
- /proc:/proc # Process/CPU stats
|
- ./watcher-volumes/logs:/app/logs
|
||||||
- /dev:/dev:ro # Disk/GPU device access
|
|
||||||
- /var/run/docker.sock:/var/run/docker.sock # Docker API access
|
|
||||||
- /:/root:ro # Access to for df-command
|
|
||||||
# Application volumes
|
|
||||||
- ./config:/app/config:ro
|
|
||||||
- ./logs:/app/logs
|
|
||||||
network_mode: host # Uses host network (for correct IP/interface detection)
|
|
||||||
healthcheck:
|
|
||||||
test: ["CMD", "/usr/local/bin/WatcherAgent", "healthcheck"]
|
|
||||||
interval: 30s
|
|
||||||
timeout: 3s
|
|
||||||
retries: 3
|
|
||||||
|
|||||||
Reference in New Issue
Block a user