diff --git a/Cargo.lock b/Cargo.lock index e946152c..8f347de9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1278,9 +1278,11 @@ dependencies = [ "glob", "lazy-regex", "libc", + "log", "num_cpus", "nutype", "nvml-wrapper", + "pretty_env_logger", "serde", "syscalls", "sysconf", diff --git a/lib/process_data/Cargo.toml b/lib/process_data/Cargo.toml index 6f9d641b..effbe376 100644 --- a/lib/process_data/Cargo.toml +++ b/lib/process_data/Cargo.toml @@ -21,9 +21,11 @@ anyhow = "1.0.94" glob = "0.3.1" lazy-regex = "3.3.0" libc = "0.2.167" +log = "0.4.22" num_cpus = "1.16.0" nutype = { version = "0.5.0", features = ["serde"] } nvml-wrapper = "0.10.0" +pretty_env_logger = "0.5" serde = { version = "1.0.215", features = ["serde_derive"] } syscalls = { version = "0.6.18", features = ["all"] } sysconf = "0.3.4" diff --git a/lib/process_data/src/lib.rs b/lib/process_data/src/lib.rs index 3de390bf..1b3a7eb8 100644 --- a/lib/process_data/src/lib.rs +++ b/lib/process_data/src/lib.rs @@ -3,6 +3,7 @@ pub mod pci_slot; use anyhow::{bail, Context, Result}; use glob::glob; use lazy_regex::{lazy_regex, Lazy, Regex}; +use log::{debug, trace, warn}; use nutype::nutype; use nvml_wrapper::enums::device::UsedGpuMemory; use nvml_wrapper::error::NvmlError; @@ -12,8 +13,6 @@ use pci_slot::PciSlot; use serde::{Deserialize, Serialize}; use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::Display; -use std::fs::File; -use std::io::{Read, Write}; use std::os::linux::fs::MetadataExt; use std::path::Path; use std::str::FromStr; @@ -27,10 +26,21 @@ const STAT_SYSTEM_CPU_TIME: usize = 14 - STAT_OFFSET; const STAT_NICE: usize = 18 - STAT_OFFSET; const STAT_STARTTIME: usize = 21 - STAT_OFFSET; +const GPU_DRIVER_NAMES: &[&str] = &["amdgpu", "i915"]; +const NPU_DRIVER_NAMES: &[&str] = &["amdxdna_accel_driver"]; + +const MAJOR: u32 = 226; + static USERS_CACHE: LazyLock> = LazyLock::new(|| unsafe { - uzers::all_users() - .map(|user| (user.uid(), user.name().to_string_lossy().to_string())) - .collect() + debug!("Initializing users cache…"); + let users: HashMap = uzers::all_users() + .map(|user| { + trace!("Found user {}", user.name().to_string_lossy()); + (user.uid(), user.name().to_string_lossy().to_string()) + }) + .collect(); + debug!("Found {} users", users.len()); + users }); static PAGESIZE: LazyLock = LazyLock::new(sysconf::pagesize); @@ -52,6 +62,10 @@ static RE_DRM_DRIVER: Lazy = lazy_regex!(r"drm-driver:\s*(.+)"); static RE_DRM_PDEV: Lazy = lazy_regex!(r"drm-pdev:\s*([0-9A-Fa-f]{4}:[0-9A-Fa-f]{2}:[0-9A-Fa-f]{2}\.[0-9A-Fa-f])"); +// AMD only +static RE_DRM_ENGINE_NPU_AMDXDNA: Lazy = + lazy_regex!(r"drm-engine-npu-amdxdna:\s*(\d+)\s*ns"); + // AMD only static RE_DRM_ENGINE_GFX: Lazy = lazy_regex!(r"drm-engine-gfx:\s*(\d+)\s*ns"); @@ -76,19 +90,27 @@ static RE_DRM_ENGINE_RENDER: Lazy = lazy_regex!(r"drm-engine-render:\s*(\ // Intel only static RE_DRM_ENGINE_VIDEO: Lazy = lazy_regex!(r"drm-engine-video:\s*(\d+)\s*ns"); -// v3d only static RE_DRM_TOTAL_MEMORY: Lazy = lazy_regex!(r"drm-total-memory:\s*(\d+)\s*KiB"); -static NVML: Lazy> = Lazy::new(Nvml::init); +static NVML: Lazy> = Lazy::new(|| { + debug!("Initializing connection to NVML…"); + Nvml::init().inspect_err(|err| warn!("Unable to connect to NVML: {err}")) +}); static NVML_DEVICES: Lazy> = Lazy::new(|| { if let Ok(nvml) = NVML.as_ref() { + debug!("Looking for NVIDIA devices…"); let device_count = nvml.device_count().unwrap_or(0); let mut return_vec = Vec::with_capacity(device_count as usize); for i in 0..device_count { if let Ok(gpu) = nvml.device_by_index(i) { if let Ok(pci_slot) = gpu.pci_info().map(|pci_info| pci_info.bus_id) { let pci_slot = PciSlot::from_str(&pci_slot).unwrap(); + debug!( + "Found {} at {}", + gpu.name().unwrap_or("N/A".into()), + pci_slot + ); return_vec.push((pci_slot, gpu)); } } @@ -160,6 +182,13 @@ pub struct GpuUsageStats { pub nvidia: bool, } +/// Represents NPU usage statistics per-process. +#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize, Copy)] +pub struct NpuUsageStats { + pub usage: u64, + pub mem: u64, +} + /// Data that could be transferred using `resources-processes`, separated from /// `Process` mainly due to `Icon` not being able to derive `Serialize` and /// `Deserialize`. @@ -184,6 +213,7 @@ pub struct ProcessData { pub timestamp: u64, /// Key: PCI Slot ID of the GPU pub gpu_usage_stats: BTreeMap, + pub npu_usage_stats: BTreeMap, } impl ProcessData { @@ -263,13 +293,6 @@ impl ProcessData { pub fn try_from_path>(proc_path: P) -> Result { let proc_path = proc_path.as_ref(); - let stat = std::fs::read_to_string(proc_path.join("stat"))?; - let statm = std::fs::read_to_string(proc_path.join("statm"))?; - let status = std::fs::read_to_string(proc_path.join("status"))?; - let comm = std::fs::read_to_string(proc_path.join("comm"))?; - let commandline = std::fs::read_to_string(proc_path.join("cmdline"))?; - let io = std::fs::read_to_string(proc_path.join("io")).ok(); - let pid = proc_path .file_name() .context("proc_path terminates in ..")? @@ -277,6 +300,23 @@ impl ProcessData { .context("can't turn OsStr to str")? .parse()?; + trace!("Inspecting process {pid}…"); + + trace!("Reading info files…"); + let stat = std::fs::read_to_string(proc_path.join("stat")) + .inspect_err(|err| trace!("Error reading 'stat': {err}"))?; + let statm = std::fs::read_to_string(proc_path.join("statm")) + .inspect_err(|err| trace!("Error reading 'statm': {err}"))?; + let status = std::fs::read_to_string(proc_path.join("status")) + .inspect_err(|err| trace!("Error reading 'status': {err}"))?; + let comm = std::fs::read_to_string(proc_path.join("comm")) + .inspect_err(|err| trace!("Error reading 'comm': {err}"))?; + let commandline = std::fs::read_to_string(proc_path.join("cmdline")) + .inspect_err(|err| trace!("Error reading 'cmdline': {err}"))?; + let io = std::fs::read_to_string(proc_path.join("io")) + .inspect_err(|err| trace!("Error reading 'io': {err}")) + .ok(); + let user = USERS_CACHE .get(&Self::get_uid(proc_path)?) .cloned() @@ -285,7 +325,8 @@ impl ProcessData { let stat = stat .split(')') // since we don't care about the pid or the executable name, split after the executable name to make our life easier .last() - .context("stat doesn't have ')'")? + .context("stat doesn't have ')'") + .inspect_err(|err| trace!("Can't parse 'stat': {err}"))? .split(' ') .skip(1) // the first element would be a space, let's ignore that .collect::>(); @@ -298,23 +339,28 @@ impl ProcessData { let parent_pid = stat .get(STAT_PARENT_PID) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse parent pid from 'stat': {err}"))?; let user_cpu_time = stat .get(STAT_USER_CPU_TIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse user cpu time from 'stat': {err}"))?; let system_cpu_time = stat .get(STAT_SYSTEM_CPU_TIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse system cpu time from 'stat': {err}"))?; let nice = stat .get(STAT_NICE) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse nice from 'stat': {err}"))?; let starttime = stat .get(STAT_STARTTIME) .context("wrong stat file format") - .and_then(|x| x.parse().context("couldn't parse stat file content"))?; + .and_then(|x| x.parse().context("couldn't parse stat file content to int")) + .inspect_err(|err| trace!("Can't parse start time from 'stat': {err}"))?; let mut affinity = Vec::with_capacity(*NUM_CPUS); RE_AFFINITY @@ -350,7 +396,8 @@ impl ProcessData { .and_then(|x| { x.parse::() .context("couldn't parse statm file content") - })? + }) + .inspect_err(|err| trace!("Can't parse memory usage from 'statm': {err}"))? .saturating_sub( statm .get(2) @@ -363,6 +410,7 @@ impl ProcessData { .saturating_mul(*PAGESIZE); let cgroup = std::fs::read_to_string(proc_path.join("cgroup")) + .inspect_err(|err| trace!("Can't read cgroup: {err}")) .ok() .and_then(Self::sanitize_cgroup); @@ -390,6 +438,8 @@ impl ProcessData { let gpu_usage_stats = Self::gpu_usage_stats(proc_path, pid); + let npu_usage_stats = Self::npu_usage_stats(proc_path, pid).unwrap_or_default(); + let timestamp = unix_as_millis(); Ok(Self { @@ -411,10 +461,91 @@ impl ProcessData { write_bytes, timestamp, gpu_usage_stats, + npu_usage_stats, }) } + /// Returns the fd_num and the plausibility of whether this file might contain drm fdinfo data. + /// This function is cautious and will signal plausibility if there's an error during evaluation. + fn drm_fdinfo_plausible>( + fdinfo_path: P, + pid: libc::pid_t, + seen_fds: &HashSet, + ) -> (bool, usize) { + let fdinfo_path = fdinfo_path.as_ref(); + + // if our fd is 0, 1 or 2 it's probably just a std stream so skip it + let fd_num = fdinfo_path + .file_name() + .and_then(|osstr| osstr.to_str()) + .unwrap_or("0") + .parse::() + .unwrap_or(0); + if fd_num <= 2 { + trace!( + "fdinfo {fd_num} deemed as not plausible. Reason: fd_num ≤ 2 (probably std stream)" + ); + return (false, fd_num); + } + + let _file = std::fs::File::open(&fdinfo_path); + if _file.is_err() { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: File can't be opened"); + return (false, fd_num); + } + let file = _file.unwrap(); + + let _metadata = file.metadata(); + if _metadata.is_err() { + trace!( + "fdinfo {fd_num} deemed as not plausible. Reason: File's metadata can't be read" + ); + return (false, fd_num); + } + let metadata = _metadata.unwrap(); + + if !metadata.is_file() { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: Not a file"); + return (false, fd_num); + } + + // Adapted from nvtop's `is_drm_fd()` + // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c + let fd_path = fdinfo_path.to_str().map(|s| s.replace("fdinfo", "fd")); + if let Some(fd_path) = fd_path { + if let Ok(fd_metadata) = std::fs::metadata(fd_path) { + if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: Wrong st_mode"); + return (false, fd_num); + } + let major = unsafe { libc::major(fd_metadata.st_rdev()) }; + if major != MAJOR { + trace!( + "fdinfo {fd_num} deemed as not plausible. Reason: Wrong major (expected: {MAJOR}, got: {major})" + ); + return (false, fd_num); + } + } + } + + // Adapted from nvtop's `processinfo_sweep_fdinfos()` + // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c + // if we've already seen the file this fd refers to, skip + let not_unique = seen_fds.iter().any(|seen_fd| unsafe { + syscalls::syscall!(syscalls::Sysno::kcmp, pid, pid, 0, fd_num, *seen_fd).unwrap_or(0) + == 0 + }); + if not_unique { + trace!("fdinfo {fd_num} deemed as not plausible. Reason: kcmp indicated that we've already seen this file"); + return (false, fd_num); + } + + trace!("fdinfo {fd_num} deemed as plausible"); + (true, fd_num) + } + fn gpu_usage_stats(proc_path: &Path, pid: i32) -> BTreeMap { + trace!("Gathering GPU stats…"); let nvidia_stats = Self::nvidia_gpu_stats_all(pid); let mut other_stats = Self::other_gpu_usage_stats(proc_path, pid).unwrap_or_default(); other_stats.extend(nvidia_stats); @@ -430,180 +561,220 @@ impl ProcessData { let mut seen_fds = HashSet::new(); let mut return_map = BTreeMap::new(); - for entry in std::fs::read_dir(fdinfo_dir)? { - let entry = entry?; + for entry in std::fs::read_dir(fdinfo_dir)?.flatten() { let fdinfo_path = entry.path(); - let _file = std::fs::File::open(&fdinfo_path); - if _file.is_err() { + let (plausible, fd_num) = Self::drm_fdinfo_plausible(&fdinfo_path, pid, &seen_fds); + if !plausible { continue; } - let mut file = _file.unwrap(); - let _metadata = file.metadata(); - if _metadata.is_err() { - continue; - } - let metadata = _metadata.unwrap(); - - // if our fd is 0, 1 or 2 it's probably just a std stream so skip it - let fd_num = fdinfo_path - .file_name() - .and_then(|osstr| osstr.to_str()) - .unwrap_or("0") - .parse::() - .unwrap_or(0); - if fd_num <= 2 { - continue; - } + seen_fds.insert(fd_num); - if !metadata.is_file() { - continue; + if let Ok((pci_slot, stats)) = Self::read_gpu_fdinfo(&fdinfo_path) { + return_map + .entry(pci_slot) + .and_modify(|existing_value: &mut GpuUsageStats| { + if stats.gfx > existing_value.gfx { + existing_value.gfx = stats.gfx; + } + if stats.dec > existing_value.dec { + existing_value.dec = stats.dec; + } + if stats.enc > existing_value.enc { + existing_value.enc = stats.enc; + } + if stats.mem > existing_value.mem { + existing_value.mem = stats.mem; + } + }) + .or_insert(stats); } + } - // Adapted from nvtop's `is_drm_fd()` - // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c - let fd_path = fdinfo_path.to_str().map(|s| s.replace("fdinfo", "fd")); - if let Some(fd_path) = fd_path { - if let Ok(fd_metadata) = std::fs::metadata(fd_path) { - let major = unsafe { libc::major(fd_metadata.st_rdev()) }; - if (fd_metadata.st_mode() & libc::S_IFMT) != libc::S_IFCHR || major != 226 { - continue; - } - } - } + Ok(return_map) + } - // Adapted from nvtop's `processinfo_sweep_fdinfos()` - // https://github.com/Syllo/nvtop/blob/master/src/extract_processinfo_fdinfo.c - // if we've already seen the file this fd refers to, skip - let not_unique = seen_fds.iter().any(|seen_fd| unsafe { - syscalls::syscall!(syscalls::Sysno::kcmp, pid, pid, 0, fd_num, *seen_fd) - .unwrap_or(0) - == 0 - }); - if not_unique { + fn npu_usage_stats(proc_path: &Path, pid: i32) -> Result> { + trace!("Gathering NPU stats…"); + let fdinfo_dir = proc_path.join("fdinfo"); + + let mut seen_fds = HashSet::new(); + + let mut return_map = BTreeMap::new(); + for entry in std::fs::read_dir(fdinfo_dir)?.flatten() { + let fdinfo_path = entry.path(); + + let (plausible, fd_num) = Self::drm_fdinfo_plausible(&fdinfo_path, pid, &seen_fds); + if !plausible { continue; } seen_fds.insert(fd_num); - if let Ok(stats) = Self::read_fdinfo(&mut file, metadata.len() as usize) { + if let Ok((pci_slot, stats)) = Self::read_npu_fdinfo(&fdinfo_path) { return_map - .entry(stats.0) - .and_modify(|existing_value: &mut GpuUsageStats| { - if stats.1.gfx > existing_value.gfx { - existing_value.gfx = stats.1.gfx; + .entry(pci_slot) + .and_modify(|existing_value: &mut NpuUsageStats| { + if stats.usage > existing_value.usage { + existing_value.usage = stats.usage; } - if stats.1.dec > existing_value.dec { - existing_value.dec = stats.1.dec; - } - if stats.1.enc > existing_value.enc { - existing_value.enc = stats.1.enc; - } - if stats.1.mem > existing_value.mem { - existing_value.mem = stats.1.mem; + if stats.mem > existing_value.mem { + existing_value.mem = stats.mem; } }) - .or_insert(stats.1); + .or_insert(stats); } } Ok(return_map) } - fn read_fdinfo( - fdinfo_file: &mut File, - file_size: usize, - ) -> Result<(GpuIdentifier, GpuUsageStats)> { - let mut content = String::with_capacity(file_size); - fdinfo_file.read_to_string(&mut content)?; - fdinfo_file.flush()?; + fn read_npu_fdinfo>(fdinfo_path: P) -> Result<(PciSlot, NpuUsageStats)> { + trace!( + "Reading and parsing {} for NPU stats…", + fdinfo_path.as_ref().to_string_lossy() + ); + + let content = std::fs::read_to_string(fdinfo_path.as_ref())?; + + let pci_slot = RE_DRM_PDEV + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) + .context("can't parse PCI slot ID")?; let driver = RE_DRM_DRIVER .captures(&content) .and_then(|captures| captures.get(1)) - .map(|capture| capture.as_str()); + .map(|capture| capture.as_str()) + .unwrap_or_default(); - if driver.is_some() { - let gpu_identifier = RE_DRM_PDEV - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) - .map(|pci_slot| GpuIdentifier::PciSlot(pci_slot)) - .unwrap_or_default(); + if !NPU_DRIVER_NAMES.contains(&driver) { + trace!("Driver '{driver}' is not known to be NPU-related, skipping"); + bail!("this is not an NPU") + } - let gfx = RE_DRM_ENGINE_GFX - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + let usage = RE_DRM_ENGINE_NPU_AMDXDNA + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); - let render = RE_DRM_ENGINE_RENDER - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + let total_memory = RE_DRM_TOTAL_MEMORY + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); - let compute = RE_DRM_ENGINE_COMPUTE - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + let stats = NpuUsageStats { + usage, + mem: total_memory, + }; - let enc = RE_DRM_ENGINE_ENC - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + trace!("Success reading NPU data for {pci_slot}: {stats:?}"); - let video = RE_DRM_ENGINE_VIDEO - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + return Ok((pci_slot, stats)); + } - let dec = RE_DRM_ENGINE_DEC - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default(); + fn read_gpu_fdinfo>(fdinfo_path: P) -> Result<(GpuIdentifier, GpuUsageStats)> { + trace!( + "Reading and parsing {} for GPU stats…", + fdinfo_path.as_ref().to_string_lossy() + ); - let vram = RE_DRM_MEMORY_VRAM - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default() - .saturating_mul(1024); + let content = std::fs::read_to_string(fdinfo_path.as_ref())?; - let gtt = RE_DRM_MEMORY_GTT - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default() - .saturating_mul(1024); + let driver = RE_DRM_DRIVER + .captures(&content) + .and_then(|captures| captures.get(1)) + .map(|capture| capture.as_str()) + .unwrap_or_default(); - let total_memory = RE_DRM_TOTAL_MEMORY - .captures(&content) - .and_then(|captures| captures.get(1)) - .and_then(|capture| capture.as_str().parse::().ok()) - .unwrap_or_default() - .saturating_mul(1024); - - let stats = GpuUsageStats { - gfx: gfx.saturating_add(render).saturating_add(compute), - mem: vram.saturating_add(gtt).saturating_add(total_memory), - enc: enc.saturating_add(video), - dec, - nvidia: false, - }; - - return Ok((gpu_identifier, stats)); + if !GPU_DRIVER_NAMES.contains(&driver) { + trace!("Driver {driver} is not known to be GPU-related, skipping"); + bail!("this is not a GPU"); } - bail!("unable to find gpu information in this fdinfo"); + let gpu_identifier = RE_DRM_PDEV + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| PciSlot::from_str(capture.as_str()).ok()) + .map(|pci_slot| GpuIdentifier::PciSlot(pci_slot)) + .unwrap_or_default(); + + let gfx = RE_DRM_ENGINE_GFX + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let render = RE_DRM_ENGINE_RENDER + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let compute = RE_DRM_ENGINE_COMPUTE + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let enc = RE_DRM_ENGINE_ENC + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let video = RE_DRM_ENGINE_VIDEO + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let dec = RE_DRM_ENGINE_DEC + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default(); + + let vram = RE_DRM_MEMORY_VRAM + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); + + let gtt = RE_DRM_MEMORY_GTT + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); + + let total_memory = RE_DRM_TOTAL_MEMORY + .captures(&content) + .and_then(|captures| captures.get(1)) + .and_then(|capture| capture.as_str().parse::().ok()) + .unwrap_or_default() + .saturating_mul(1024); + + let stats = GpuUsageStats { + gfx: gfx.saturating_add(render).saturating_add(compute), + mem: vram.saturating_add(gtt).saturating_add(total_memory), + enc: enc.saturating_add(video), + dec, + nvidia: false, + }; + + return Ok((gpu_identifier, stats)); } fn nvidia_gpu_stats_all(pid: i32) -> BTreeMap { + trace!("Gathering NVIDIA GPU stats…"); + let mut return_map = BTreeMap::new(); for (pci_slot, _) in NVML_DEVICES.iter() { @@ -616,6 +787,7 @@ impl ProcessData { } fn nvidia_gpu_stats(pid: i32, pci_slot: PciSlot) -> Result { + trace!("Gathering GPU stats for NVIDIA GPU at {pci_slot}…"); let this_process_stats = NVIDIA_PROCESSES_STATS .read() .unwrap() @@ -650,6 +822,7 @@ impl ProcessData { } fn nvidia_process_infos() -> HashMap> { + trace!("Refreshing NVIDIA process infos…"); let mut return_map = HashMap::new(); for (pci_slot, gpu) in NVML_DEVICES.iter() { @@ -663,6 +836,7 @@ impl ProcessData { } fn nvidia_process_stats() -> HashMap> { + trace!("Refreshing NVIDIA process stats…"); let mut return_map = HashMap::new(); for (pci_slot, gpu) in NVML_DEVICES.iter() { diff --git a/src/bin/resources-processes.rs b/src/bin/resources-processes.rs index 64569716..154d01ad 100644 --- a/src/bin/resources-processes.rs +++ b/src/bin/resources-processes.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use log::{info, trace}; use process_data::ProcessData; use ron::ser::PrettyConfig; use std::io::{Read, Write}; @@ -18,6 +19,11 @@ struct Args { } fn main() -> Result<()> { + // Initialize logger + pretty_env_logger::init(); + + info!("Starting resources-processes…"); + let args = Args::parse(); if args.once { @@ -29,12 +35,14 @@ fn main() -> Result<()> { let mut buffer = [0; 1]; std::io::stdin().read_exact(&mut buffer)?; + trace!("Received character"); output(args.ron)?; } } fn output(ron: bool) -> Result<()> { + trace!("Gathering process data…"); let data = ProcessData::all_process_data()?; let encoded = if ron { @@ -50,10 +58,13 @@ fn output(ron: bool) -> Result<()> { let stdout = std::io::stdout(); let mut handle = stdout.lock(); + trace!("Sending content length ({})…", encoded.len()); handle.write_all(&len_byte_array)?; + trace!("Sending content…"); handle.write_all(&encoded)?; + trace!("Flushing…"); handle.flush()?; Ok(()) } diff --git a/src/ui/window.rs b/src/ui/window.rs index 081fe166..f47736ed 100644 --- a/src/ui/window.rs +++ b/src/ui/window.rs @@ -598,17 +598,32 @@ impl MainWindow { page.refresh_page(&gpu_data); } - std::mem::drop(apps_context); - /* * Npu */ let npu_pages = imp.npu_pages.borrow(); - for ((_, page), npu_data) in npu_pages.values().zip(npu_data) { + for ((_, page), mut npu_data) in npu_pages.values().zip(npu_data) { let page = page.content().and_downcast::().unwrap(); + + let processes_npu_fraction = apps_context.npu_fraction(npu_data.pci_slot); + npu_data.usage_fraction = Some(f64::max( + npu_data.usage_fraction.unwrap_or(0.0), + processes_npu_fraction.into(), + )); + + if npu_data.total_memory.is_some() { + let processes_npu_memory_fraction = apps_context.npu_mem(npu_data.pci_slot); + npu_data.used_memory = Some(usize::max( + npu_data.used_memory.unwrap_or(0), + processes_npu_memory_fraction as usize, + )); + } + page.refresh_page(&npu_data); } + std::mem::drop(apps_context); + /* * Cpu */ diff --git a/src/utils/app.rs b/src/utils/app.rs index 80fbc4e2..326cab94 100644 --- a/src/utils/app.rs +++ b/src/utils/app.rs @@ -12,7 +12,7 @@ use gtk::{ }; use lazy_regex::{lazy_regex, Lazy, Regex}; use log::{debug, info, trace}; -use process_data::{Containerization, GpuIdentifier, ProcessData}; +use process_data::{pci_slot::PciSlot, Containerization, GpuIdentifier, ProcessData}; use crate::i18n::i18n; @@ -640,6 +640,52 @@ impl AppsContext { .clamp(0.0, 1.0) } + pub fn npu_fraction(&self, pci_slot: PciSlot) -> f32 { + self.processes_iter() + .map(|process| { + ( + &process.data.npu_usage_stats, + &process.npu_usage_stats_last, + process.data.timestamp, + process.timestamp_last, + ) + }) + .map(|(new, old, timestamp, timestamp_last)| { + ( + new.get(&pci_slot), + old.get(&pci_slot), + timestamp, + timestamp_last, + ) + }) + .filter_map(|(new, old, timestamp, timestamp_last)| match (new, old) { + (Some(new), Some(old)) => Some((new, old, timestamp, timestamp_last)), + _ => None, + }) + .map(|(new, old, timestamp, timestamp_last)| { + if old.usage == 0 { + 0.0 + } else { + ((new.usage.saturating_sub(old.usage) as f32) + / (timestamp.saturating_sub(timestamp_last) as f32)) + .finite_or_default() + / 1_000_000.0 + } + }) + .sum::() + .clamp(0.0, 1.0) + } + + pub fn npu_mem(&self, pci_slot: PciSlot) -> u64 { + self.processes_iter() + .map(|process| process.data.npu_usage_stats.get(&pci_slot)) + .map(|stats| match stats { + Some(stats) => stats.mem, + None => 0, + }) + .sum() + } + fn app_associated_with_process(&self, process: &Process) -> Option { // TODO: tidy this up // ↓ look for whether we can find an ID in the cgroup @@ -797,6 +843,7 @@ impl AppsContext { old_process.read_bytes_last = old_process.data.read_bytes; old_process.write_bytes_last = old_process.data.write_bytes; old_process.gpu_usage_stats_last = old_process.data.gpu_usage_stats.clone(); + old_process.npu_usage_stats_last = old_process.data.npu_usage_stats.clone(); old_process.data = process_data.clone(); } else { diff --git a/src/utils/gpu/nvidia.rs b/src/utils/gpu/nvidia.rs index 65cbe824..48c9cf4d 100644 --- a/src/utils/gpu/nvidia.rs +++ b/src/utils/gpu/nvidia.rs @@ -10,20 +10,16 @@ use process_data::GpuIdentifier; use std::{path::PathBuf, sync::LazyLock}; static NVML: LazyLock> = LazyLock::new(|| { - let nvml = Nvml::init(); - - if let Err(error) = nvml.as_ref() { - warn!("Connection to NVML failed, reason: {error}"); - if *IS_FLATPAK { - warn!("This can occur when the version of the NVIDIA Flatpak runtime (org.freedesktop.Platform.GL.nvidia) \ - and the version of the natively installed NVIDIA driver do not match. Consider updating both your system \ - and Flatpak packages before opening an issue.") - } - } else { - debug!("Successfully connected to NVML"); - } - - nvml + Nvml::init() + .inspect_err(|err| { + warn!("Unable to connect to NVML: {err}"); + if *IS_FLATPAK { + warn!("This can occur when the version of the NVIDIA Flatpak runtime \ + (org.freedesktop.Platform.GL.nvidia) and the version of the natively installed NVIDIA driver do not \ + match. Consider updating both your system and Flatpak packages before opening an issue."); + } + }) + .inspect(|_| debug!("Successfully connected to NVML")) }); use crate::utils::{pci::Device, IS_FLATPAK}; diff --git a/src/utils/npu/amd.rs b/src/utils/npu/amd.rs new file mode 100644 index 00000000..8eba9c9a --- /dev/null +++ b/src/utils/npu/amd.rs @@ -0,0 +1,98 @@ +use anyhow::Result; +use process_data::pci_slot::PciSlot; + +use std::path::PathBuf; + +use crate::utils::pci::Device; + +use super::NpuImpl; + +#[derive(Debug, Clone, Default)] + +pub struct AmdNpu { + pub device: Option<&'static Device>, + pub pci_slot: PciSlot, + pub driver: String, + sysfs_path: PathBuf, + first_hwmon_path: Option, +} + +impl AmdNpu { + pub fn new( + device: Option<&'static Device>, + pci_slot: PciSlot, + driver: String, + sysfs_path: PathBuf, + first_hwmon_path: Option, + ) -> Self { + Self { + device, + pci_slot, + driver, + sysfs_path, + first_hwmon_path, + } + } +} + +impl NpuImpl for AmdNpu { + fn device(&self) -> Option<&'static Device> { + self.device + } + + fn pci_slot(&self) -> PciSlot { + self.pci_slot + } + + fn driver(&self) -> String { + self.driver.clone() + } + + fn sysfs_path(&self) -> PathBuf { + self.sysfs_path.clone() + } + + fn first_hwmon(&self) -> Option { + self.first_hwmon_path.clone() + } + + fn name(&self) -> Result { + self.drm_name() + } + + fn usage(&self) -> Result { + self.drm_usage().map(|usage| usage as f64 / 100.0) + } + + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) + } + + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) + } + + fn temperature(&self) -> Result { + self.hwmon_temperature() + } + + fn power_usage(&self) -> Result { + self.hwmon_power_usage() + } + + fn core_frequency(&self) -> Result { + self.hwmon_core_frequency() + } + + fn memory_frequency(&self) -> Result { + self.hwmon_vram_frequency() + } + + fn power_cap(&self) -> Result { + self.hwmon_power_cap() + } + + fn power_cap_max(&self) -> Result { + self.hwmon_power_cap_max() + } +} diff --git a/src/utils/npu/intel.rs b/src/utils/npu/intel.rs index bf9990a4..62029c05 100644 --- a/src/utils/npu/intel.rs +++ b/src/utils/npu/intel.rs @@ -82,12 +82,12 @@ impl NpuImpl for IntelNpu { Ok((delta_busy_time / delta_timestamp) / 1000.0) } - fn used_vram(&self) -> Result { - self.drm_used_vram().map(|usage| usage as usize) + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) } - fn total_vram(&self) -> Result { - self.drm_total_vram().map(|usage| usage as usize) + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) } fn temperature(&self) -> Result { diff --git a/src/utils/npu/mod.rs b/src/utils/npu/mod.rs index a263fd87..d1a94c93 100644 --- a/src/utils/npu/mod.rs +++ b/src/utils/npu/mod.rs @@ -1,6 +1,8 @@ +mod amd; mod intel; mod other; +use amd::AmdNpu; use anyhow::{bail, Context, Result}; use log::{debug, info, trace}; use process_data::pci_slot::PciSlot; @@ -21,6 +23,7 @@ use self::{intel::IntelNpu, other::OtherNpu}; use super::pci::Vendor; +pub const VID_AMD: u16 = 4098; pub const VID_INTEL: u16 = 0x8086; #[derive(Debug)] @@ -50,8 +53,8 @@ impl NpuData { let usage_fraction = npu.usage().ok(); - let total_memory = npu.total_vram().ok(); - let used_memory = npu.used_vram().ok(); + let total_memory = npu.total_memory().ok(); + let used_memory = npu.used_memory().ok(); let clock_speed = npu.core_frequency().ok(); let vram_speed = npu.memory_frequency().ok(); @@ -83,6 +86,7 @@ impl NpuData { #[derive(Debug, Clone)] pub enum Npu { + Amd(AmdNpu), Intel(IntelNpu), Other(OtherNpu), } @@ -102,8 +106,8 @@ pub trait NpuImpl { fn name(&self) -> Result; fn usage(&self) -> Result; - fn used_vram(&self) -> Result; - fn total_vram(&self) -> Result; + fn used_memory(&self) -> Result; + fn total_memory(&self) -> Result; fn temperature(&self) -> Result; fn power_usage(&self) -> Result; fn core_frequency(&self) -> Result; @@ -151,14 +155,17 @@ pub trait NpuImpl { } fn drm_usage(&self) -> Result { - bail!("usage fallback not implemented") + // This is purely a guess for the future since no NPU driver has actually implemented this statistic + self.read_device_int("npu_busy_percent") } - fn drm_used_vram(&self) -> Result { + fn drm_used_memory(&self) -> Result { + // This is purely a guess for the future since no NPU driver has actually implemented this statistic self.read_device_int("mem_info_vram_used") } - fn drm_total_vram(&self) -> Result { + fn drm_total_memory(&self) -> Result { + // This is purely a guess for the future since no NPU driver has actually implemented this statistic self.read_device_int("mem_info_vram_total") } @@ -268,6 +275,17 @@ impl Npu { )), "Intel", ) + } else if vid == VID_AMD || driver == "amdxdna" { + ( + Npu::Amd(AmdNpu::new( + device, + pci_slot, + driver, + path.to_path_buf(), + hwmon_vec.first().cloned(), + )), + "AMD", + ) } else { ( Npu::Other(OtherNpu::new( @@ -294,6 +312,7 @@ impl Npu { pub fn get_vendor(&self) -> Result<&'static Vendor> { Ok(match self { + Npu::Amd(npu) => npu.device(), Npu::Intel(npu) => npu.device(), Npu::Other(npu) => npu.device(), } @@ -303,6 +322,7 @@ impl Npu { pub fn pci_slot(&self) -> PciSlot { match self { + Npu::Amd(npu) => npu.pci_slot(), Npu::Intel(npu) => npu.pci_slot(), Npu::Other(npu) => npu.pci_slot(), } @@ -310,6 +330,7 @@ impl Npu { pub fn driver(&self) -> String { match self { + Npu::Amd(npu) => npu.driver(), Npu::Intel(npu) => npu.driver(), Npu::Other(npu) => npu.driver(), } @@ -317,6 +338,7 @@ impl Npu { pub fn name(&self) -> Result { match self { + Npu::Amd(npu) => npu.name(), Npu::Intel(npu) => npu.name(), Npu::Other(npu) => npu.name(), } @@ -324,27 +346,31 @@ impl Npu { pub fn usage(&self) -> Result { match self { + Npu::Amd(npu) => npu.usage(), Npu::Intel(npu) => npu.usage(), Npu::Other(npu) => npu.usage(), } } - pub fn used_vram(&self) -> Result { + pub fn used_memory(&self) -> Result { match self { - Npu::Intel(npu) => npu.used_vram(), - Npu::Other(npu) => npu.used_vram(), + Npu::Amd(npu) => npu.used_memory(), + Npu::Intel(npu) => npu.used_memory(), + Npu::Other(npu) => npu.used_memory(), } } - pub fn total_vram(&self) -> Result { + pub fn total_memory(&self) -> Result { match self { - Npu::Intel(npu) => npu.total_vram(), - Npu::Other(npu) => npu.total_vram(), + Npu::Amd(npu) => npu.total_memory(), + Npu::Intel(npu) => npu.total_memory(), + Npu::Other(npu) => npu.total_memory(), } } pub fn temperature(&self) -> Result { match self { + Npu::Amd(npu) => npu.temperature(), Npu::Intel(npu) => npu.temperature(), Npu::Other(npu) => npu.temperature(), } @@ -352,6 +378,7 @@ impl Npu { pub fn power_usage(&self) -> Result { match self { + Npu::Amd(npu) => npu.power_usage(), Npu::Intel(npu) => npu.power_usage(), Npu::Other(npu) => npu.power_usage(), } @@ -359,6 +386,7 @@ impl Npu { pub fn core_frequency(&self) -> Result { match self { + Npu::Amd(npu) => npu.core_frequency(), Npu::Intel(npu) => npu.core_frequency(), Npu::Other(npu) => npu.core_frequency(), } @@ -366,6 +394,7 @@ impl Npu { pub fn memory_frequency(&self) -> Result { match self { + Npu::Amd(npu) => npu.memory_frequency(), Npu::Intel(npu) => npu.memory_frequency(), Npu::Other(npu) => npu.memory_frequency(), } @@ -373,6 +402,7 @@ impl Npu { pub fn power_cap(&self) -> Result { match self { + Npu::Amd(npu) => npu.power_cap(), Npu::Intel(npu) => npu.power_cap(), Npu::Other(npu) => npu.power_cap(), } @@ -380,6 +410,7 @@ impl Npu { pub fn power_cap_max(&self) -> Result { match self { + Npu::Amd(npu) => npu.power_cap(), Npu::Intel(npu) => npu.power_cap_max(), Npu::Other(npu) => npu.power_cap_max(), } diff --git a/src/utils/npu/other.rs b/src/utils/npu/other.rs index 2104e303..8f851850 100644 --- a/src/utils/npu/other.rs +++ b/src/utils/npu/other.rs @@ -64,12 +64,12 @@ impl NpuImpl for OtherNpu { self.drm_usage().map(|usage| usage as f64 / 100.0) } - fn used_vram(&self) -> Result { - self.drm_used_vram().map(|usage| usage as usize) + fn used_memory(&self) -> Result { + self.drm_used_memory().map(|usage| usage as usize) } - fn total_vram(&self) -> Result { - self.drm_total_vram().map(|usage| usage as usize) + fn total_memory(&self) -> Result { + self.drm_total_memory().map(|usage| usage as usize) } fn temperature(&self) -> Result { diff --git a/src/utils/process.rs b/src/utils/process.rs index 83cc865c..fc763a8f 100644 --- a/src/utils/process.rs +++ b/src/utils/process.rs @@ -1,7 +1,9 @@ use anyhow::{bail, Context, Result}; use config::LIBEXECDIR; use log::{debug, error, info, trace}; -use process_data::{GpuIdentifier, GpuUsageStats, Niceness, ProcessData}; +use process_data::{ + pci_slot::PciSlot, GpuIdentifier, GpuUsageStats, Niceness, NpuUsageStats, ProcessData, +}; use std::{ collections::BTreeMap, ffi::{OsStr, OsString}, @@ -36,18 +38,29 @@ static COMPANION_PROCESS: LazyLock> = LazyLock: let child = if *IS_FLATPAK { debug!("Spawning resources-processes in Flatpak mode ({proxy_path})"); Command::new(FLATPAK_SPAWN) - .args(["--host", proxy_path.as_str()]) + .args([ + &format!( + "--env=RUST_LOG={}", + std::env::var("RUST_LOG=resources").unwrap_or("warn".into()) + ), + "--host", + proxy_path.as_str(), + ]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + .stderr(Stdio::inherit()) .spawn() .unwrap() } else { debug!("Spawning resources-processes in native mode ({proxy_path})"); Command::new(proxy_path) + .arg(&format!( + "--env=RUST_LOG={}", + std::env::var("RUST_LOG=resources").unwrap_or("warn".into()) + )) .stdin(Stdio::piped()) .stdout(Stdio::piped()) - .stderr(Stdio::null()) + .stderr(Stdio::inherit()) .spawn() .unwrap() }; @@ -70,6 +83,7 @@ pub struct Process { pub read_bytes_last: Option, pub write_bytes_last: Option, pub gpu_usage_stats_last: BTreeMap, + pub npu_usage_stats_last: BTreeMap, pub display_name: String, } @@ -172,6 +186,7 @@ impl Process { read_bytes_last, write_bytes_last, gpu_usage_stats_last: Default::default(), + npu_usage_stats_last: Default::default(), display_name, } } @@ -478,6 +493,38 @@ impl Process { .sum() } + #[must_use] + pub fn npu_usage(&self) -> f32 { + let mut returned_npu_usage = 0.0; + for (npu, usage) in &self.data.npu_usage_stats { + if let Some(old_usage) = self.npu_usage_stats_last.get(npu) { + let this_npu_usage = if old_usage.usage == 0 { + 0.0 + } else { + ((usage.usage.saturating_sub(old_usage.usage) as f32) + / (self.data.timestamp.saturating_sub(self.timestamp_last) as f32) + .finite_or_default()) + / 1_000_000.0 + }; + + if this_npu_usage > returned_npu_usage { + returned_npu_usage = this_npu_usage; + } + } + } + + returned_npu_usage + } + + #[must_use] + pub fn npu_mem_usage(&self) -> u64 { + self.data + .npu_usage_stats + .values() + .map(|stats| stats.mem) + .sum() + } + #[must_use] pub fn starttime(&self) -> f64 { self.data.starttime as f64 / *TICK_RATE as f64