From b0ab3e99e7976c1662ebe4fac7bcbee5c0574b72 Mon Sep 17 00:00:00 2001 From: Clement Tsang <34804052+ClementTsang@users.noreply.github.com> Date: Sat, 18 Apr 2026 02:00:35 -0400 Subject: [PATCH] bug: address possible memory usage growth in AMD GPU stat gathering (#2042) * driveby changelog update * fix missing cleanup for AMD GPU stat gathering * update changelog * relegate counter to get_amd_vecs --- CHANGELOG.md | 5 ++ src/collection/amd.rs | 117 ++++++++++++++++++++++++++---------------- 2 files changed, 78 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71cb5909..f10e74aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ That said, these are more guidelines rather than hard rules, though the project ### Bug Fixes - [#2035](https://github.com/ClementTsang/bottom/pull/2035): Fix panic when deleting unicode words in search. +- [#2042](https://github.com/ClementTsang/bottom/pull/2042): Address possible memory usage growth for AMD GPU stat gathering on Linux. ### Features @@ -43,6 +44,9 @@ That said, these are more guidelines rather than hard rules, though the project - [#1955](https://github.com/ClementTsang/bottom/pull/1955): Fix mirrored documentation deploy to GitHub Pages. - [#1957](https://github.com/ClementTsang/bottom/pull/1957): Fix CI bug around deploying docs on release. - [#1958](https://github.com/ClementTsang/bottom/pull/1958): Fix cosmetic banner issue on docs page. +- [#1962](https://github.com/ClementTsang/bottom/pull/1962): Add code signing for Windows artifacts. +- [#1986](https://github.com/ClementTsang/bottom/pull/1986): Test and build NetBSD target. +- [#2009](https://github.com/ClementTsang/bottom/pull/2009): Configure CI job to publish to crates.io. - [#2037](https://github.com/ClementTsang/bottom/pull/2037): Update AMD GPU names list. ## [0.12.3] - 2026-01-01 @@ -91,6 +95,7 @@ That said, these are more guidelines rather than hard rules, though the project - [#1891](https://github.com/ClementTsang/bottom/pull/1891): Fix typos in codebase. - [#1896](https://github.com/ClementTsang/bottom/pull/1896): Rename Linux icon to avoid collision with generic "bottom" icon. + - [#1913](https://github.com/ClementTsang/bottom/pull/1913): Add `loongarch64-unknown-linux-gnu` binary build target in CI. - [#1914](https://github.com/ClementTsang/bottom/pull/1914): Add `aarch64-linux-android` binary build target in CI (with diff --git a/src/collection/amd.rs b/src/collection/amd.rs index 026cc5aa..5d097457 100644 --- a/src/collection/amd.rs +++ b/src/collection/amd.rs @@ -1,10 +1,10 @@ mod amd_gpu_marketing; use std::{ + cell::RefCell, fs::{self, read_to_string}, num::NonZeroU64, path::{Path, PathBuf}, - sync::{LazyLock, Mutex}, time::{Duration, Instant}, }; @@ -14,7 +14,7 @@ use super::linux::utils::is_device_awake; use crate::{ app::layout_manager::UsedWidgets, collection::{memory::MemData, processes::Pid}, - utils::int_hash::IntHashMap, + utils::int_hash::{IntHashMap, IntHashSet}, }; // TODO: May be able to clean up some of these, Option for example is a bit @@ -42,9 +42,11 @@ pub struct AmdGpuProc { pub compute_usage: u64, } -// needs previous state for usage calculation -static PROC_DATA: LazyLock>>> = - LazyLock::new(|| Mutex::new(HashMap::default())); +// TODO: This is kind of a hack. +thread_local! { + static PREV_PROC_DATA: RefCell>> = RefCell::new(HashMap::default()); + static LAST_CLEAN_COUNTER: RefCell = const { RefCell::new(0) }; +} fn get_amd_devs() -> Option> { let mut devices = Vec::new(); @@ -336,6 +338,11 @@ pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Opt let mut proc_vec = Vec::with_capacity(num_gpu); let mut total_mem = 0; + PREV_PROC_DATA.with_borrow_mut(|prev_proc_data| { + let device_path_set = device_path_list.iter().cloned().collect::>(); + prev_proc_data.retain(|k, _| device_path_set.contains(k)); + }); + for device_path in device_path_list { let device_name = get_amd_name(&device_path) .unwrap_or(amd_gpu_marketing::AMDGPU_DEFAULT_NAME.to_string()); @@ -358,56 +365,78 @@ pub fn get_amd_vecs(widgets_to_harvest: &UsedWidgets, prev_time: Instant) -> Opt if widgets_to_harvest.use_proc { if let Some(procs) = get_amd_fdinfo(&device_path) { - let mut proc_info = PROC_DATA.lock().expect("mutex is poisoned"); - let prev_fdinfo = proc_info.entry(device_path).or_default(); + PREV_PROC_DATA.with_borrow_mut(|prev_proc_data| { + let prev_fdinfo = prev_proc_data.entry(device_path).or_default(); + let mut seen_pids = IntHashSet::default(); - let mut procs_map = IntHashMap::default(); - for (proc_pid, proc_usage) in procs { - if let Some(prev_usage) = prev_fdinfo.get_mut(&proc_pid) { - // calculate deltas - let gfx_usage = - diff_usage(prev_usage.gfx_usage, proc_usage.gfx_usage, &interval); - let dma_usage = - diff_usage(prev_usage.dma_usage, proc_usage.dma_usage, &interval); - let enc_usage = - diff_usage(prev_usage.enc_usage, proc_usage.enc_usage, &interval); - let dec_usage = - diff_usage(prev_usage.dec_usage, proc_usage.dec_usage, &interval); - let uvd_usage = - diff_usage(prev_usage.uvd_usage, proc_usage.uvd_usage, &interval); - let vcn_usage = - diff_usage(prev_usage.vcn_usage, proc_usage.vcn_usage, &interval); - let vpe_usage = - diff_usage(prev_usage.vpe_usage, proc_usage.vpe_usage, &interval); + let mut procs_map = IntHashMap::default(); + for (proc_pid, proc_usage) in procs { + seen_pids.insert(proc_pid); + if let Some(prev_usage) = prev_fdinfo.get_mut(&proc_pid) { + // calculate deltas + let gfx_usage = + diff_usage(prev_usage.gfx_usage, proc_usage.gfx_usage, &interval); + let dma_usage = + diff_usage(prev_usage.dma_usage, proc_usage.dma_usage, &interval); + let enc_usage = + diff_usage(prev_usage.enc_usage, proc_usage.enc_usage, &interval); + let dec_usage = + diff_usage(prev_usage.dec_usage, proc_usage.dec_usage, &interval); + let uvd_usage = + diff_usage(prev_usage.uvd_usage, proc_usage.uvd_usage, &interval); + let vcn_usage = + diff_usage(prev_usage.vcn_usage, proc_usage.vcn_usage, &interval); + let vpe_usage = + diff_usage(prev_usage.vpe_usage, proc_usage.vpe_usage, &interval); - // combined usage - let gpu_util_wide = gfx_usage - + dma_usage - + enc_usage - + dec_usage - + uvd_usage - + vcn_usage - + vpe_usage; + // combined usage + let gpu_util_wide = gfx_usage + + dma_usage + + enc_usage + + dec_usage + + uvd_usage + + vcn_usage + + vpe_usage; - let gpu_util: u32 = gpu_util_wide.try_into().unwrap_or(0); + let gpu_util: u32 = gpu_util_wide.try_into().unwrap_or(0); - if gpu_util > 0 || proc_usage.vram_usage > 0 { - procs_map.insert(proc_pid, (proc_usage.vram_usage, gpu_util)); + if gpu_util > 0 || proc_usage.vram_usage > 0 { + procs_map.insert(proc_pid, (proc_usage.vram_usage, gpu_util)); + } + + *prev_usage = proc_usage; + } else { + prev_fdinfo.insert(proc_pid, proc_usage); } - - *prev_usage = proc_usage; - } else { - prev_fdinfo.insert(proc_pid, proc_usage); } - } - if !procs_map.is_empty() { - proc_vec.push(procs_map); - } + prev_fdinfo.retain(|k, _| seen_pids.contains(k)); + + if !procs_map.is_empty() { + proc_vec.push(procs_map); + } + }); } } } + // Bit of a hacky way to keep this trimmed. Ain't pretty but it should work. + LAST_CLEAN_COUNTER.with_borrow_mut(|counter| { + *counter += 1; + + if *counter >= 300 { + PREV_PROC_DATA.with_borrow_mut(|prev_proc_data| { + for prev_fdinfo in prev_proc_data.values_mut() { + prev_fdinfo.shrink_to_fit(); + } + + prev_proc_data.shrink_to_fit(); + }); + + *counter = 0; + } + }); + Some(AmdGpuData { memory: (!mem_vec.is_empty()).then_some(mem_vec), procs: (!proc_vec.is_empty()).then_some((total_mem, proc_vec)),