All checks were successful
Rust Cross-Platform Build / Detect Rust Project (push) Successful in 5s
Rust Cross-Platform Build / Set Tag Name (push) Successful in 5s
Rust Cross-Platform Build / Run Tests (push) Successful in 1m8s
Rust Cross-Platform Build / Build (x86_64-unknown-linux-gnu) (push) Successful in 3m5s
Rust Cross-Platform Build / Build (x86_64-pc-windows-gnu) (push) Successful in 3m56s
Rust Cross-Platform Build / Build and Push Docker Image (push) Successful in 2m24s
Rust Cross-Platform Build / Create Tag (push) Successful in 6s
Rust Cross-Platform Build / Workflow Summary (push) Successful in 1s
139 lines
4.7 KiB
Rust
139 lines
4.7 KiB
Rust
use anyhow::Result;
|
||
use nvml_wrapper::Nvml;
|
||
use std::error::Error;
|
||
|
||
/// # GPU Hardware Module
|
||
///
|
||
/// This module provides GPU information collection for WatcherAgent, including load, temperature, and VRAM statistics.
|
||
///
|
||
/// ## Responsibilities
|
||
/// - **GPU Detection:** Identifies GPU model and capabilities.
|
||
/// - **Metric Collection:** Queries GPU load, temperature, and VRAM usage using NVML (NVIDIA only).
|
||
/// - **Error Handling:** Graceful fallback if GPU or NVML is unavailable.
|
||
///
|
||
/// ## Units
|
||
/// - `current_load`: GPU usage as a percentage (**0.0–100.0**)
|
||
/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
|
||
/// - `vram_total`: Total VRAM in **bytes**
|
||
/// - `vram_used`: Used VRAM in **bytes**
|
||
///
|
||
/// GPU statistics for the host system.
|
||
///
|
||
/// # Fields
|
||
/// - `name`: GPU model name (string)
|
||
/// - `current_load`: GPU usage as a percentage (**0.0–100.0**)
|
||
/// - `current_temp`: GPU temperature in **degrees Celsius (°C)**
|
||
/// - `vram_total`: Total VRAM in **bytes**
|
||
/// - `vram_used`: Used VRAM in **bytes**
|
||
#[derive(Debug)]
|
||
pub struct GpuInfo {
|
||
pub name: Option<String>,
|
||
pub current_load: Option<f64>,
|
||
pub current_temp: Option<f64>,
|
||
pub vram_total: Option<f64>,
|
||
pub vram_used: Option<f64>,
|
||
}
|
||
|
||
/// Collects GPU information (load, temperature, VRAM) using NVML.
|
||
///
|
||
/// This function attempts to query the first NVIDIA GPU using NVML. If unavailable, it returns a fallback with only the detected GPU name.
|
||
///
|
||
/// # Returns
|
||
/// * `Result<GpuInfo, Box<dyn Error + Send + Sync>>` - GPU statistics or fallback if unavailable.
|
||
pub async fn get_gpu_info() -> Result<GpuInfo, Box<dyn Error + Send + Sync>> {
|
||
match get_gpu_metrics() {
|
||
Ok((gpu_temp, gpu_load, vram_used, vram_total)) => {
|
||
let gpu_name = detect_gpu_name();
|
||
Ok(GpuInfo {
|
||
name: Some(gpu_name),
|
||
current_load: Some(gpu_load),
|
||
current_temp: Some(gpu_temp),
|
||
vram_total: Some(vram_total),
|
||
vram_used: Some(vram_used),
|
||
})
|
||
}
|
||
Err(e) => {
|
||
// Graceful fallback: log error, return empty/None values
|
||
eprintln!("GPU info unavailable: {e}");
|
||
Ok(GpuInfo {
|
||
name: Some(detect_gpu_name()),
|
||
current_load: None,
|
||
current_temp: None,
|
||
vram_total: None,
|
||
vram_used: None,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Queries NVML for GPU metrics: temperature, load, VRAM used/total.
|
||
///
|
||
/// # Returns
|
||
/// * `Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>>` - Tuple of (temperature °C, load %, VRAM used bytes, VRAM total bytes).
|
||
pub fn get_gpu_metrics() -> Result<(f64, f64, f64, f64), Box<dyn Error + Send + Sync>> {
|
||
let nvml = Nvml::init();
|
||
if let Ok(nvml) = nvml {
|
||
if let Ok(device) = nvml.device_by_index(0) {
|
||
let temp = device
|
||
.temperature(nvml_wrapper::enum_wrappers::device::TemperatureSensor::Gpu)
|
||
.unwrap_or(0) as f64;
|
||
let load = device
|
||
.utilization_rates()
|
||
.map(|u| u.gpu as f64)
|
||
.unwrap_or(0.0);
|
||
let mem = device.memory_info().ok();
|
||
let used = mem.clone().map(|m| m.used as f64).unwrap_or(0.0);
|
||
let total = mem.map(|m| m.total as f64).unwrap_or(0.0);
|
||
Ok((temp, load, used, total))
|
||
} else {
|
||
Err(anyhow::anyhow!("No NVIDIA GPU found").into())
|
||
}
|
||
} else {
|
||
Err(anyhow::anyhow!("Failed to initialize NVML").into())
|
||
}
|
||
}
|
||
|
||
fn detect_gpu_name() -> String {
|
||
try_nvml_gpu_name()
|
||
.or_else(fallback_gpu_name)
|
||
.unwrap_or_else(|| "Unknown GPU".to_string())
|
||
}
|
||
|
||
fn try_nvml_gpu_name() -> Option<String> {
|
||
let nvml = Nvml::init().ok()?;
|
||
let device = nvml.device_by_index(0).ok()?;
|
||
device.name().ok().map(|s| s.to_string())
|
||
}
|
||
|
||
fn fallback_gpu_name() -> Option<String> {
|
||
#[cfg(target_os = "linux")]
|
||
{
|
||
let output = std::process::Command::new("lshw")
|
||
.args(&["-C", "display"])
|
||
.output()
|
||
.ok()?;
|
||
String::from_utf8_lossy(&output.stdout)
|
||
.lines()
|
||
.find(|l| l.contains("product:"))
|
||
.map(|l| l.trim().replace("product:", "").trim().to_string())
|
||
}
|
||
|
||
#[cfg(target_os = "windows")]
|
||
{
|
||
let output = std::process::Command::new("wmic")
|
||
.args(["path", "win32_VideoController", "get", "name"])
|
||
.output()
|
||
.ok()?;
|
||
String::from_utf8_lossy(&output.stdout)
|
||
.lines()
|
||
.skip(1) // Skip header
|
||
.find(|s| !s.trim().is_empty())
|
||
.map(|s| s.trim().to_string())
|
||
}
|
||
|
||
#[cfg(not(any(target_os = "linux", target_os = "windows")))]
|
||
{
|
||
None
|
||
}
|
||
}
|