Files
watcheragent/WatcherAgent/src/main.rs

803 lines
24 KiB
Rust

//use chrono::Utc;
use nvml_wrapper::Nvml;
use reqwest::{Client, StatusCode};
use serde::{Deserialize, Serialize};
use std::{error::Error, fs, process::Command, time::Duration};
use sysinfo::{Components, Disks, System};
use tokio::time::{interval, sleep, Instant};
// Data structures matching the C# DTOs
#[derive(Serialize, Debug)]
struct RegistrationDto {
#[serde(rename = "id")]
id: i32,
#[serde(rename = "ipAddress")]
ip_address: String,
#[serde(rename = "cpuType")]
cpu_type: String,
#[serde(rename = "cpuCores")]
cpu_cores: i32,
#[serde(rename = "gpuType")]
gpu_type: String,
#[serde(rename = "ramSize")]
ram_size: f64,
}
#[derive(Serialize, Debug)]
struct MetricDto {
#[serde(rename = "serverId")]
server_id: i32,
#[serde(rename = "ipAddress")]
ip_address: String,
#[serde(rename = "cpu_Load")]
cpu_load: f64,
#[serde(rename = "cpu_Temp")]
cpu_temp: f64,
#[serde(rename = "gpu_Load")]
gpu_load: f64,
#[serde(rename = "gpu_Temp")]
gpu_temp: f64,
#[serde(rename = "gpu_Vram_Size")]
gpu_vram_size: f64,
#[serde(rename = "gpu_Vram_Usage")]
gpu_vram_usage: f64,
#[serde(rename = "ram_Load")]
ram_load: f64,
#[serde(rename = "ram_Size")]
ram_size: f64,
#[serde(rename = "disk_Size")]
disk_size: f64,
#[serde(rename = "disk_Usage")]
disk_usage: f64,
#[serde(rename = "disk_Temp")]
disk_temp: f64,
#[serde(rename = "net_In")]
net_in: f64,
#[serde(rename = "net_Out")]
net_out: f64,
}
#[derive(Deserialize)]
struct IdResponse {
id: i32,
#[serde(rename = "ipAddress")]
ip_address: String,
}
#[derive(Serialize)]
struct HeartbeatPayload {
#[serde(rename = "IpAddress")]
ip_address: String,
}
#[derive(Serialize, Debug)]
struct HardwareInfo {
cpu_type: String,
cpu_cores: i32,
gpu_type: String,
ram_size: f64,
ip_address: String,
}
struct NetworkState {
prev_rx: u64,
prev_tx: u64,
last_update: Instant,
}
impl NetworkState {
fn new() -> Self {
Self {
prev_rx: 0,
prev_tx: 0,
last_update: Instant::now(),
}
}
}
impl HardwareInfo {
async fn collect() -> Result<Self, Box<dyn Error>> {
let mut sys = System::new_all();
sys.refresh_cpu_all();
sys.refresh_memory();
let cpus = sys.cpus();
let cpu_type = if !cpus.is_empty() {
cpus[0].brand().to_string()
} else {
"Unknown CPU".to_string()
};
let cpu_cores = cpus.len() as i32;
let ram_bytes = sys.total_memory() as f64;
let gpu_type = Self::detect_gpu_name();
let ip_address = local_ip_address::local_ip()?.to_string();
Ok(Self {
cpu_type,
cpu_cores,
gpu_type,
ram_size: ram_bytes,
ip_address,
})
}
fn detect_gpu_name() -> String {
Self::try_nvml_gpu_name()
.or_else(Self::fallback_gpu_name)
.unwrap_or_else(|| "Unknown GPU".to_string())
}
fn try_nvml_gpu_name() -> Option<String> {
let nvml = Nvml::init().ok()?;
let device = nvml.device_by_index(0).ok()?;
device.name().ok().map(|s| s.to_string())
}
fn fallback_gpu_name() -> Option<String> {
#[cfg(target_os = "linux")]
{
let output = std::process::Command::new("lshw")
.args(&["-C", "display"])
.output()
.ok()?;
Some(
String::from_utf8_lossy(&output.stdout)
.lines()
.find(|l| l.contains("product:"))
.map(|l| l.trim().replace("product:", "").trim().to_string())
.unwrap_or("Unknown GPU".to_string()),
)
}
#[cfg(target_os = "windows")]
{
let output = std::process::Command::new("wmic")
.args(&["path", "win32_VideoController", "get", "name"])
.output()
.ok()?;
Some(
String::from_utf8_lossy(&output.stdout)
.lines()
.nth(1)
.map(|s| s.trim().to_string())
.unwrap_or("Unknown GPU".to_string()),
)
}
}
}
async fn get_server_id_by_ip(base_url: &str, ip: &str) -> Result<(i32, String), Box<dyn Error>> {
let client = Client::builder()
.danger_accept_invalid_certs(true)
.build()?;
let url = format!("{}/monitoring/server-id-by-ip?ipAddress={}", base_url, ip);
loop {
println!("Attempting to fetch server ID for IP {}...", ip);
match client.get(&url).send().await {
Ok(resp) if resp.status().is_success() => {
let text = resp.text().await?;
println!("Raw response: {}", text); // Debug output
let id_resp: IdResponse = serde_json::from_str(&text).map_err(|e| {
println!("Failed to parse response: {}", e);
e
})?;
println!(
"✅ Received ID {} for IP {}",
id_resp.id, id_resp.ip_address
);
return Ok((id_resp.id, id_resp.ip_address));
}
Ok(resp) if resp.status() == StatusCode::NOT_FOUND => {
println!(
"❌ Server with IP {} not found in database (will retry in 10 seconds)",
ip
);
sleep(Duration::from_secs(10)).await;
}
Ok(resp) => {
println!(
"⚠️ Server responded with status: {} - {}",
resp.status(),
resp.text().await?
);
sleep(Duration::from_secs(10)).await;
}
Err(err) => {
println!("⚠️ Request failed: {} (will retry in 10 seconds)", err);
sleep(Duration::from_secs(10)).await;
}
}
}
}
async fn register_with_server(base_url: &str) -> Result<(i32, String), Box<dyn Error>> {
// First get local IP
let ip = local_ip_address::local_ip()?.to_string();
println!("Local IP address detected: {}", ip);
// Get server ID from backend (this will retry until successful)
let (server_id, registered_ip) = get_server_id_by_ip(base_url, &ip).await?;
// Create HTTP client for registration
let client = Client::builder()
.danger_accept_invalid_certs(true)
.build()?;
// Collect hardware info
let hardware = HardwareInfo::collect().await?;
// Prepare registration data
let registration = RegistrationDto {
id: server_id,
ip_address: registered_ip.clone(),
cpu_type: hardware.cpu_type,
cpu_cores: hardware.cpu_cores,
gpu_type: hardware.gpu_type,
ram_size: hardware.ram_size,
};
// Try to register (will retry on failure)
loop {
println!("Attempting to register with server...");
let url = format!("{}/monitoring/register-agent-by-id", base_url);
match client.post(&url).json(&registration).send().await {
Ok(resp) if resp.status().is_success() => {
println!("✅ Successfully registered with server.");
return Ok((server_id, registered_ip));
}
Ok(resp) => {
let status = resp.status();
let text = resp.text().await.unwrap_or_default();
println!(
"⚠️ Registration failed ({}): {} (will retry in 10 seconds)",
status, text
);
}
Err(err) => {
println!("⚠️ Registration error: {} (will retry in 10 seconds)", err);
}
}
sleep(Duration::from_secs(10)).await;
}
}
async fn heartbeat_loop(base_url: &str, ip: &str) -> Result<(), Box<dyn Error>> {
let client = Client::builder()
.danger_accept_invalid_certs(true)
.build()?;
let url = format!("{}/heartbeat/receive", base_url);
loop {
let payload = HeartbeatPayload {
ip_address: ip.to_string(),
};
match client.post(&url).json(&payload).send().await {
Ok(res) if res.status().is_success() => {
println!("Heartbeat sent successfully.");
}
Ok(res) => eprintln!("Server responded with status: {}", res.status()),
Err(e) => eprintln!("Heartbeat error: {}", e),
}
sleep(Duration::from_secs(20)).await;
}
}
struct MetricsCollector {
sys: System,
nvml: Option<Nvml>,
server_id: i32,
ip_address: String,
network_state: NetworkState,
}
impl MetricsCollector {
fn new(server_id: i32, ip_address: String) -> Self {
Self {
sys: System::new(),
nvml: Nvml::init().ok(),
server_id,
ip_address,
network_state: NetworkState::new(),
}
}
async fn collect_and_send_loop(&mut self, base_url: &str) -> Result<(), Box<dyn Error>> {
let client = Client::new();
let url = format!("{}/monitoring/metric", base_url);
let mut interval = interval(Duration::from_secs(20));
loop {
interval.tick().await;
let metric = self.collect_metrics();
println!("Collected metrics: {:?}", metric);
match client.post(&url).json(&metric).send().await {
Ok(res) => println!(
"✅ Sent metrics for server {} | Status: {}",
metric.server_id,
res.status()
),
Err(err) => eprintln!("❌ Failed to send metrics: {}", err),
}
}
}
fn collect_metrics(&mut self) -> MetricDto {
self.sys.refresh_all();
// CPU - updated for sysinfo 0.35
let cpu_load = self.sys.global_cpu_usage() as f64;
let cpu_temp = get_cpu_temp().unwrap_or(0.0) as f64;
// RAM - updated for sysinfo 0.35
let total_memory = self.sys.total_memory() as f64;
let used_memory = self.sys.used_memory() as f64;
let ram_load = (used_memory / total_memory) * 100.0;
let ram_size = total_memory;
// Disk - updated for sysinfo 0.35
let (disk_size, disk_usage, disk_temp) = get_disk_info();
// GPU (NVIDIA)
let (gpu_temp, gpu_load, vram_used, vram_total) = if let Some(nvml) = &self.nvml {
if let Ok(device) = nvml.device_by_index(0) {
let temp = device
.temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
.unwrap_or(0) as f64;
let load = device
.utilization_rates()
.map(|u| u.gpu as f64)
.unwrap_or(0.0);
let mem = device.memory_info().ok();
let used = mem.clone().map(|m| (m.used as f64)).unwrap_or(0.0); // B
let total = mem.map(|m| (m.total as f64)).unwrap_or(0.0); // B
(temp, load, used, total)
} else {
(0.0, 0.0, 0.0, 0.0)
}
} else {
(0.0, 0.0, 0.0, 0.0)
};
// Network metrics
let (current_rx, current_tx) = get_network_traffic().unwrap_or((0, 0));
let elapsed_secs = self.network_state.last_update.elapsed().as_secs_f64();
self.network_state.last_update = Instant::now();
// Calculate the difference since the last call
let net_in = if self.network_state.prev_rx > 0 && current_rx >= self.network_state.prev_rx {
((current_rx - self.network_state.prev_rx) as f64 * 8.0) / elapsed_secs
// bits per second
} else {
0.0
};
let net_out = if self.network_state.prev_tx > 0 && current_tx >= self.network_state.prev_tx
{
((current_tx - self.network_state.prev_tx) as f64 * 8.0) / elapsed_secs
// bits per second
} else {
0.0
};
// Store the current values for the next call
self.network_state.prev_rx = current_rx;
self.network_state.prev_tx = current_tx;
MetricDto {
server_id: self.server_id,
ip_address: self.ip_address.clone(),
cpu_load,
cpu_temp,
gpu_load,
gpu_temp,
gpu_vram_size: vram_total,
gpu_vram_usage: if vram_total > 0.0 {
(vram_used / vram_total) * 100.0
} else {
0.0
},
ram_load,
ram_size,
disk_size,
disk_usage: disk_usage,
disk_temp: disk_temp, // not supported
net_in,
net_out,
}
}
}
fn get_cpu_temp() -> Option<f32> {
println!("Attempting to get CPU temperature...");
let mut sys = System::new_all();
//let components = Components::new_with_refreshed_list();
sys.refresh_all();
for component in sys.components() {
if let Some(temperature) = component.temperature() {
println!("{temperature}°C");
}
}
Some(0.0) // Placeholder, actual implementation depends on platform
}
/*
#[cfg(target_os = "linux")]
{
// Versuche mehrere Methoden der Reihe nach
// 1. sensors-Befehl
if let Ok(output) = Command::new("sensors").output() {
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines() {
if line.contains("Package id") || line.contains("Tdie") || line.contains("CPU Temp")
{
if let Some(temp_str) = line
.split('+')
.nth(1)
.and_then(|s| s.split_whitespace().next())
{
if let Ok(temp) = temp_str.replace("°C", "").parse::<f32>() {
return Some(temp);
}
}
}
}
}
// 2. Sysfs (Intel/AMD)
if let Ok(content) = fs::read_to_string("/sys/class/thermal/thermal_zone0/temp") {
if let Ok(temp) = content.trim().parse::<f32>() {
return Some(temp / 1000.0);
}
}
// 3. Alternative Sysfs-Pfade
let paths = [
"/sys/class/hwmon/hwmontemp1_input",
"/sys/class/hwmon/hwmondevice/temp1_input",
];
for path_pattern in &paths {
if let Ok(paths) = glob::glob(path_pattern) {
for path in paths.flatten() {
if let Ok(content) = fs::read_to_string(&path) {
if let Ok(temp) = content.trim().parse::<f32>() {
return Some(temp / 1000.0);
}
}
}
}
}
}
#[cfg(target_os = "windows")]
fn get_cpu_temp() -> Option<f32> {
use winapi::um::pdh::{
PdhAddCounter, PdhCollectQueryData, PdhGetFormattedCounterValue, PdhOpenQuery,
PDH_FMT_DOUBLE,
};
use winapi::um::pdhmsg::PDH_FMT_COUNTERVALUE;
unsafe {
let mut query_handle = 0 as u64;
if PdhOpenQuery(std::ptr::null_mut(), 0, &mut query_handle) != 0 {
return None;
}
let mut counter_handle = 0 as u64;
if PdhAddCounter(
query_handle,
"\\Processor Information(_Total)\\Temperature",
0,
&mut counter_handle,
) != 0
{
return None;
}
if PdhCollectQueryData(query_handle) != 0 {
return None;
}
let mut counter_value = PDH_FMT_COUNTERVALUE {
CStatus: 0,
union: PDH_FMT_DOUBLE { doubleValue: 0.0 },
};
if PdhGetFormattedCounterValue(
counter_handle,
PDH_FMT_DOUBLE,
std::ptr::null_mut(),
&mut counter_value,
) != 0
{
return None;
}
Some(counter_value.union.doubleValue as f32)
}
}
None*/
fn get_disk_info() -> (f64, f64, f64) {
let mut sys = System::new();
sys.refresh_all();
//sys.refresh_disks_list();
let mut total_size = 0u64;
let mut total_used = 0u64;
let mut count = 0;
let disks = Disks::new_with_refreshed_list();
for disk in disks.list() {
// Ignoriere CD-ROMs und kleine Systempartitionen
if disk.total_space() > 100 * 1024 * 1024 {
// > 100MB
total_size += disk.total_space();
total_used += disk.total_space() - disk.available_space();
count += 1;
}
}
// Berechnungen
let size_b = if count > 0 {
total_size as f64 // in Bytes
} else {
// Fallback: Versuche df unter Linux
#[cfg(target_os = "linux")]
{
if let Ok(output) = Command::new("df")
.arg("-B1")
.arg("--output=size,used")
.output()
{
let stdout = String::from_utf8_lossy(&output.stdout);
for line in stdout.lines().skip(1) {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() == 2 {
if let (Ok(size), Ok(used)) =
(parts[0].parse::<u64>(), parts[1].parse::<u64>())
{
total_size += size;
total_used += used;
count += 1;
}
}
}
total_size as f64 // in Bytes
} else {
0.0
}
}
#[cfg(not(target_os = "linux"))]
{
0.0
}
};
let usage = if total_size > 0 {
(total_used as f64 / total_size as f64) * 100.0
} else {
0.0
};
(size_b, usage, 0.0) // Disk-Temp bleibt 0.0 ohne spezielle Hardware
}
#[cfg(target_os = "windows")]
fn get_network_traffic() -> Option<(u64, u64)> {
use std::ptr::null_mut;
use winapi::shared::ifmib::{MIB_IFROW, MIB_IFTABLE};
use winapi::um::iphlpapi::GetIfTable;
unsafe {
// Erste Abfrage zur Bestimmung der benötigten Puffergröße
let mut buffer_size = 0u32;
if GetIfTable(null_mut(), &mut buffer_size, 0)
!= winapi::shared::winerror::ERROR_INSUFFICIENT_BUFFER
{
return None;
}
// Puffer allozieren
let mut buffer = vec![0u8; buffer_size as usize];
let if_table = buffer.as_mut_ptr() as *mut MIB_IFTABLE;
// Tatsächliche Daten abrufen
if GetIfTable(if_table, &mut buffer_size, 0) != 0 {
return None;
}
// Daten auswerten
let mut rx_total = 0u64;
let mut tx_total = 0u64;
for i in 0..(*if_table).dwNumEntries {
let row = &*((*if_table).table.as_ptr().offset(i as isize));
rx_total += row.dwInOctets as u64;
tx_total += row.dwOutOctets as u64;
}
Some((rx_total, tx_total))
}
}
#[cfg(target_os = "linux")]
fn get_network_traffic() -> Option<(u64, u64)> {
// Bessere Methode mit sysfs
let mut rx_total = 0u64;
let mut tx_total = 0u64;
if let Ok(dir) = fs::read_dir("/sys/class/net") {
for entry in dir.flatten() {
let iface = entry.file_name();
let iface_name = iface.to_string_lossy();
// Ignoriere virtuelle Interfaces
if !iface_name.starts_with("lo") && !iface_name.starts_with("virbr") {
if let (Ok(rx), Ok(tx)) = (
fs::read_to_string(entry.path().join("statistics/rx_bytes")),
fs::read_to_string(entry.path().join("statistics/tx_bytes")),
) {
rx_total += rx.trim().parse::<u64>().unwrap_or(0);
tx_total += tx.trim().parse::<u64>().unwrap_or(0);
}
}
}
}
Some((rx_total, tx_total))
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let server_base_url = "http://localhost:5000";
// Registration phase
println!("Starting registration process...");
let (server_id, ip_address) = register_with_server(server_base_url).await?;
// Start heartbeat in background
let heartbeat_handle = tokio::spawn({
let ip = ip_address.clone();
async move {
if let Err(e) = heartbeat_loop(server_base_url, &ip).await {
eprintln!("Heartbeat loop failed: {}", e);
}
}
});
// Start metrics collection
println!("Starting metrics collection...");
let mut metrics_collector = MetricsCollector::new(server_id, ip_address);
metrics_collector
.collect_and_send_loop(server_base_url)
.await?;
heartbeat_handle.await?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use tokio::runtime::Runtime;
// Test CPU temperature collection
#[test]
fn test_cpu_temp() {
let temp = get_cpu_temp();
println!("CPU Temperature: {:?}°C", temp);
// Basic validation - temp should be between 0-100°C if available
if let Some(t) = temp {
assert!(
t >= 0.0 && t <= 100.0,
"CPU temperature out of reasonable range"
);
}
}
// Test disk information collection
#[test]
fn test_disk_info() {
let (size, usage, _temp) = get_disk_info();
println!("Disk Size: {:.2}GB, Usage: {:.2}%", size, usage);
assert!(size >= 0.0, "Disk size should be non-negative");
assert!(
usage >= 0.0 && usage <= 100.0,
"Disk usage should be 0-100%"
);
}
// Test hardware info collection
#[tokio::test]
async fn test_hardware_info() {
let hardware = HardwareInfo::collect().await.unwrap();
println!("Hardware Info: {:?}", hardware);
assert!(
!hardware.cpu_type.is_empty(),
"CPU type should not be empty"
);
assert!(hardware.cpu_cores > 0, "CPU cores should be positive");
assert!(
!hardware.gpu_type.is_empty(),
"GPU type should not be empty"
);
assert!(hardware.ram_size > 0.0, "RAM size should be positive");
assert!(
!hardware.ip_address.is_empty(),
"IP address should not be empty"
);
}
// Test metrics collector
#[tokio::test]
async fn test_metrics_collector() {
let mut collector = MetricsCollector::new(1, "127.0.0.1".to_string());
let metrics = collector.collect_metrics();
println!("Collected Metrics: {:?}", metrics);
// Validate basic metrics ranges
assert!(
metrics.cpu_load >= 0.0 && metrics.cpu_load <= 100.0,
"CPU load should be 0-100%"
);
assert!(
metrics.ram_load >= 0.0 && metrics.ram_load <= 100.0,
"RAM load should be 0-100%"
);
assert!(metrics.ram_size > 0.0, "RAM size should be positive");
}
// Test registration flow (mock server needed for full test)
#[tokio::test]
async fn test_registration_flow() {
// Note: This would require a mock server for proper testing
// Currently just testing the hardware detection part
let hardware = HardwareInfo::collect().await.unwrap();
let registration = RegistrationDto {
id: 1,
ip_address: hardware.ip_address.clone(),
cpu_type: hardware.cpu_type,
cpu_cores: hardware.cpu_cores,
gpu_type: hardware.gpu_type,
ram_size: hardware.ram_size,
};
println!("Registration DTO: {:?}", registration);
assert_eq!(registration.id, 1);
assert!(!registration.ip_address.is_empty());
}
// Test error cases
#[test]
fn test_error_handling() {
// Test with invalid paths
#[cfg(target_os = "linux")]
{
let temp = get_cpu_temp_with_path("/invalid/path");
assert!(temp.is_none(), "Should handle invalid paths gracefully");
}
}
// Helper function for testing with custom paths
#[cfg(target_os = "linux")]
fn get_cpu_temp_with_path(path: &str) -> Option<f32> {
fs::read_to_string(path)
.ok()?
.trim()
.parse::<f32>()
.map(|t| t / 1000.0)
.ok()
}
}