use anyhow::Result; use nvml_wrapper::Nvml; use std::error::Error; /// # GPU Hardware Module /// /// This module provides GPU information collection for WatcherAgent, including load, temperature, and VRAM statistics. /// /// ## Responsibilities /// - **GPU Detection:** Identifies GPU model and capabilities. /// - **Metric Collection:** Queries GPU load, temperature, and VRAM usage using NVML (NVIDIA only). /// - **Error Handling:** Graceful fallback if GPU or NVML is unavailable. /// /// ## Units /// - `current_load`: GPU usage as a percentage (**0.0–100.0**) /// - `current_temp`: GPU temperature in **degrees Celsius (°C)** /// - `vram_total`: Total VRAM in **bytes** /// - `vram_used`: Used VRAM in **bytes** /// /// GPU statistics for the host system. /// /// # Fields /// - `name`: GPU model name (string) /// - `current_load`: GPU usage as a percentage (**0.0–100.0**) /// - `current_temp`: GPU temperature in **degrees Celsius (°C)** /// - `vram_total`: Total VRAM in **bytes** /// - `vram_used`: Used VRAM in **bytes** #[derive(Debug)] pub struct GpuInfo { pub name: Option, pub current_load: Option, pub current_temp: Option, pub vram_total: Option, pub vram_used: Option, } /// Collects GPU information (load, temperature, VRAM) using NVML. /// /// This function attempts to query the first NVIDIA GPU using NVML. If unavailable, it returns a fallback with only the detected GPU name. /// /// # Returns /// * `Result>` - GPU statistics or fallback if unavailable. pub async fn get_gpu_info() -> Result> { match get_gpu_metrics() { Ok((gpu_temp, gpu_load, vram_used, vram_total)) => { let gpu_name = detect_gpu_name(); Ok(GpuInfo { name: Some(gpu_name), current_load: Some(gpu_load), current_temp: Some(gpu_temp), vram_total: Some(vram_total), vram_used: Some(vram_used), }) } Err(e) => { // Graceful fallback: log error, return empty/None values eprintln!("GPU info unavailable: {e}"); Ok(GpuInfo { name: Some(detect_gpu_name()), current_load: None, current_temp: None, vram_total: None, vram_used: None, }) } } } /// Queries NVML for GPU metrics: temperature, load, VRAM used/total. /// /// # Returns /// * `Result<(f64, f64, f64, f64), Box>` - Tuple of (temperature °C, load %, VRAM used bytes, VRAM total bytes). pub fn get_gpu_metrics() -> Result<(f64, f64, f64, f64), Box> { let nvml = Nvml::init(); if let Ok(nvml) = nvml { if let Ok(device) = nvml.device_by_index(0) { let temp = device .temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu) .unwrap_or(0) as f64; let load = device .utilization_rates() .map(|u| u.gpu as f64) .unwrap_or(0.0); let mem = device.memory_info().ok(); let used = mem.clone().map(|m| m.used as f64).unwrap_or(0.0); let total = mem.map(|m| m.total as f64).unwrap_or(0.0); Ok((temp, load, used, total)) } else { Err(anyhow::anyhow!("No NVIDIA GPU found").into()) } } else { Err(anyhow::anyhow!("Failed to initialize NVML").into()) } } fn detect_gpu_name() -> String { try_nvml_gpu_name() .or_else(fallback_gpu_name) .unwrap_or_else(|| "Unknown GPU".to_string()) } fn try_nvml_gpu_name() -> Option { let nvml = Nvml::init().ok()?; let device = nvml.device_by_index(0).ok()?; device.name().ok().map(|s| s.to_string()) } fn fallback_gpu_name() -> Option { #[cfg(target_os = "linux")] { let output = std::process::Command::new("lshw") .args(&["-C", "display"]) .output() .ok()?; String::from_utf8_lossy(&output.stdout) .lines() .find(|l| l.contains("product:")) .map(|l| l.trim().replace("product:", "").trim().to_string()) } #[cfg(target_os = "windows")] { let output = std::process::Command::new("wmic") .args(["path", "win32_VideoController", "get", "name"]) .output() .ok()?; String::from_utf8_lossy(&output.stdout) .lines() .skip(1) // Skip header .find(|s| !s.trim().is_empty()) .map(|s| s.trim().to_string()) } #[cfg(not(any(target_os = "linux", target_os = "windows")))] { None } }