Files
watcheragent/WatcherAgent/src/hardware/gpu.rs
donpat1to 49f1af392d
All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 5s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 5s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m8s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 3m5s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 3m56s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m24s
Rust Cross-Platform Build / Create Tag (push) Successful in 6s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 1s
fixed units
2025-10-01 13:13:18 +02:00

139 lines
4.7 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

use anyhow::Result;
use nvml_wrapper::Nvml;
use std::error::Error;
/// # GPU Hardware Module
///
/// This module provides GPU information collection for WatcherAgent, including load, temperature, and VRAM statistics.
///
/// ## Responsibilities
/// - **GPU Detection:** Identifies GPU model and capabilities.
/// - **Metric Collection:** Queries GPU load, temperature, and VRAM usage using NVML (NVIDIA only).
/// - **Error Handling:** Graceful fallback if GPU or NVML is unavailable.
///
/// ## Units
/// - `current_load`: GPU usage as a percentage (**0.0100.0**)
/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
/// - `vram_total`: Total VRAM in **bytes**
/// - `vram_used`: Used VRAM in **bytes**
///
/// GPU statistics for the host system.
///
/// # Fields
/// - `name`: GPU model name (string)
/// - `current_load`: GPU usage as a percentage (**0.0100.0**)
/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
/// - `vram_total`: Total VRAM in **bytes**
/// - `vram_used`: Used VRAM in **bytes**
#[derive(Debug)]
pub struct GpuInfo {
pub name: Option<String>,
pub current_load: Option<f64>,
pub current_temp: Option<f64>,
pub vram_total: Option<f64>,
pub vram_used: Option<f64>,
}
/// Collects GPU information (load, temperature, VRAM) using NVML.
///
/// This function attempts to query the first NVIDIA GPU using NVML. If unavailable, it returns a fallback with only the detected GPU name.
///
/// # Returns
/// * `Result<GpuInfo, Box<dyn Error + Send + Sync>>` - GPU statistics or fallback if unavailable.
pub async fn get_gpu_info() -> Result<GpuInfo, Box<dyn Error + Send + Sync>> {
match get_gpu_metrics() {
Ok((gpu_temp, gpu_load, vram_used, vram_total)) => {
let gpu_name = detect_gpu_name();
Ok(GpuInfo {
name: Some(gpu_name),
current_load: Some(gpu_load),
current_temp: Some(gpu_temp),
vram_total: Some(vram_total),
vram_used: Some(vram_used),
})
}
Err(e) => {
// Graceful fallback: log error, return empty/None values
eprintln!("GPU info unavailable: {e}");
Ok(GpuInfo {
name: Some(detect_gpu_name()),
current_load: None,
current_temp: None,
vram_total: None,
vram_used: None,
})
}
}
}
/// Queries NVML for GPU metrics: temperature, load, VRAM used/total.
///
/// # Returns
/// * `Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>>` - Tuple of (temperature °C, load %, VRAM used bytes, VRAM total bytes).
pub fn get_gpu_metrics() -> Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>> {
let nvml = Nvml::init();
if let Ok(nvml) = nvml {
if let Ok(device) = nvml.device_by_index(0) {
let temp = device
.temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
.unwrap_or(0) as f64;
let load = device
.utilization_rates()
.map(|u| u.gpu as f64)
.unwrap_or(0.0);
let mem = device.memory_info().ok();
let used = mem.clone().map(|m| m.used as f64).unwrap_or(0.0);
let total = mem.map(|m| m.total as f64).unwrap_or(0.0);
Ok((temp, load, used, total))
} else {
Err(anyhow::anyhow!("No NVIDIA GPU found").into())
}
} else {
Err(anyhow::anyhow!("Failed to initialize NVML").into())
}
}
fn detect_gpu_name() -> String {
try_nvml_gpu_name()
.or_else(fallback_gpu_name)
.unwrap_or_else(|| "Unknown GPU".to_string())
}
fn try_nvml_gpu_name() -> Option<String> {
let nvml = Nvml::init().ok()?;
let device = nvml.device_by_index(0).ok()?;
device.name().ok().map(|s| s.to_string())
}
fn fallback_gpu_name() -> Option<String> {
#[cfg(target_os = "linux")]
{
let output = std::process::Command::new("lshw")
.args(&["-C", "display"])
.output()
.ok()?;
String::from_utf8_lossy(&output.stdout)
.lines()
.find(|l| l.contains("product:"))
.map(|l| l.trim().replace("product:", "").trim().to_string())
}
#[cfg(target_os = "windows")]
{
let output = std::process::Command::new("wmic")
.args(["path", "win32_VideoController", "get", "name"])
.output()
.ok()?;
String::from_utf8_lossy(&output.stdout)
.lines()
.skip(1) // Skip header
.find(|s| !s.trim().is_empty())
.map(|s| s.trim().to_string())
}
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
{
None
}
}