mirror of
https://github.com/henrygd/beszel.git
synced 2026-02-14 04:56:03 +00:00
Compare commits
8 Commits
docker-24-
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
283fa9d5c2 | ||
|
|
7d6c0caafc | ||
|
|
04d54a3efc | ||
|
|
14ecb1b069 | ||
|
|
1f1a448aef | ||
|
|
e816ea143a | ||
|
|
2230097dc7 | ||
|
|
25c77c5664 |
7
.github/workflows/inactivity-actions.yml
vendored
7
.github/workflows/inactivity-actions.yml
vendored
@@ -6,6 +6,7 @@ on:
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
actions: write
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
@@ -48,6 +49,9 @@ jobs:
|
||||
# Action can not skip PRs, set it to 100 years to cover it.
|
||||
days-before-pr-stale: 36524
|
||||
|
||||
# Max issues to process before early exit. Next run resumes from cache. GH API limit: 5000.
|
||||
operations-per-run: 1500
|
||||
|
||||
# Labels
|
||||
stale-issue-label: 'stale'
|
||||
remove-stale-when-updated: true
|
||||
@@ -56,4 +60,5 @@ jobs:
|
||||
|
||||
# Exemptions
|
||||
exempt-assignees: true
|
||||
exempt-milestones: true
|
||||
|
||||
exempt-milestones: true
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,7 @@ dist
|
||||
*.exe
|
||||
internal/cmd/hub/hub
|
||||
internal/cmd/agent/agent
|
||||
agent.test
|
||||
node_modules
|
||||
build
|
||||
*timestamp*
|
||||
|
||||
40
Makefile
40
Makefile
@@ -3,6 +3,40 @@ OS ?= $(shell go env GOOS)
|
||||
ARCH ?= $(shell go env GOARCH)
|
||||
# Skip building the web UI if true
|
||||
SKIP_WEB ?= false
|
||||
# Controls NVML/glibc agent build tag behavior:
|
||||
# - auto (default): enable on linux/amd64 glibc hosts
|
||||
# - true: always enable
|
||||
# - false: always disable
|
||||
NVML ?= auto
|
||||
|
||||
# Detect glibc host for local linux/amd64 builds.
|
||||
HOST_GLIBC := $(shell \
|
||||
if [ "$(OS)" = "linux" ] && [ "$(ARCH)" = "amd64" ]; then \
|
||||
for p in /lib64/ld-linux-x86-64.so.2 /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 /lib/ld-linux-x86-64.so.2; do \
|
||||
[ -e "$$p" ] && { echo true; exit 0; }; \
|
||||
done; \
|
||||
if command -v ldd >/dev/null 2>&1; then \
|
||||
if ldd --version 2>&1 | tr '[:upper:]' '[:lower:]' | awk '/gnu libc|glibc/{found=1} END{exit !found}'; then \
|
||||
echo true; \
|
||||
else \
|
||||
echo false; \
|
||||
fi; \
|
||||
else \
|
||||
echo false; \
|
||||
fi; \
|
||||
else \
|
||||
echo false; \
|
||||
fi)
|
||||
|
||||
# Enable glibc build tag for NVML on supported Linux builds.
|
||||
AGENT_GO_TAGS :=
|
||||
ifeq ($(NVML),true)
|
||||
AGENT_GO_TAGS := -tags glibc
|
||||
else ifeq ($(NVML),auto)
|
||||
ifeq ($(HOST_GLIBC),true)
|
||||
AGENT_GO_TAGS := -tags glibc
|
||||
endif
|
||||
endif
|
||||
|
||||
# Set executable extension based on target OS
|
||||
EXE_EXT := $(if $(filter windows,$(OS)),.exe,)
|
||||
@@ -54,7 +88,7 @@ fetch-smartctl-conditional:
|
||||
|
||||
# Update build-agent to include conditional .NET build
|
||||
build-agent: tidy build-dotnet-conditional fetch-smartctl-conditional
|
||||
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
|
||||
GOOS=$(OS) GOARCH=$(ARCH) go build $(AGENT_GO_TAGS) -o ./build/beszel-agent_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/agent
|
||||
|
||||
build-hub: tidy $(if $(filter false,$(SKIP_WEB)),build-web-ui)
|
||||
GOOS=$(OS) GOARCH=$(ARCH) go build -o ./build/beszel_$(OS)_$(ARCH)$(EXE_EXT) -ldflags "-w -s" ./internal/cmd/hub
|
||||
@@ -90,9 +124,9 @@ dev-hub:
|
||||
|
||||
dev-agent:
|
||||
@if command -v entr >/dev/null 2>&1; then \
|
||||
find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
find ./internal/cmd/agent/*.go ./agent/*.go | entr -r go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
else \
|
||||
go run github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
go run $(AGENT_GO_TAGS) github.com/henrygd/beszel/internal/cmd/agent; \
|
||||
fi
|
||||
|
||||
build-dotnet:
|
||||
|
||||
95
agent/emmc_common.go
Normal file
95
agent/emmc_common.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func isEmmcBlockName(name string) bool {
|
||||
if !strings.HasPrefix(name, "mmcblk") {
|
||||
return false
|
||||
}
|
||||
suffix := strings.TrimPrefix(name, "mmcblk")
|
||||
if suffix == "" {
|
||||
return false
|
||||
}
|
||||
for _, c := range suffix {
|
||||
if c < '0' || c > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func parseHexOrDecByte(s string) (uint8, bool) {
|
||||
s = strings.TrimSpace(s)
|
||||
if s == "" {
|
||||
return 0, false
|
||||
}
|
||||
base := 10
|
||||
if strings.HasPrefix(s, "0x") || strings.HasPrefix(s, "0X") {
|
||||
base = 16
|
||||
s = s[2:]
|
||||
}
|
||||
parsed, err := strconv.ParseUint(s, base, 8)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return uint8(parsed), true
|
||||
}
|
||||
|
||||
func parseHexBytePair(s string) (uint8, uint8, bool) {
|
||||
fields := strings.Fields(s)
|
||||
if len(fields) < 2 {
|
||||
return 0, 0, false
|
||||
}
|
||||
a, okA := parseHexOrDecByte(fields[0])
|
||||
b, okB := parseHexOrDecByte(fields[1])
|
||||
if !okA && !okB {
|
||||
return 0, 0, false
|
||||
}
|
||||
return a, b, true
|
||||
}
|
||||
|
||||
func emmcSmartStatus(preEOL uint8) string {
|
||||
switch preEOL {
|
||||
case 0x01:
|
||||
return "PASSED"
|
||||
case 0x02:
|
||||
return "WARNING"
|
||||
case 0x03:
|
||||
return "FAILED"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
func emmcPreEOLString(preEOL uint8) string {
|
||||
switch preEOL {
|
||||
case 0x01:
|
||||
return "0x01 (normal)"
|
||||
case 0x02:
|
||||
return "0x02 (warning)"
|
||||
case 0x03:
|
||||
return "0x03 (urgent)"
|
||||
default:
|
||||
return fmt.Sprintf("0x%02x", preEOL)
|
||||
}
|
||||
}
|
||||
|
||||
func emmcLifeTimeString(v uint8) string {
|
||||
// JEDEC eMMC: 0x01..0x0A => 0-100% used in 10% steps, 0x0B => exceeded.
|
||||
switch {
|
||||
case v == 0:
|
||||
return "0x00 (not reported)"
|
||||
case v >= 0x01 && v <= 0x0A:
|
||||
low := int(v-1) * 10
|
||||
high := int(v) * 10
|
||||
return fmt.Sprintf("0x%02x (%d-%d%% used)", v, low, high)
|
||||
case v == 0x0B:
|
||||
return "0x0b (>100% used)"
|
||||
default:
|
||||
return fmt.Sprintf("0x%02x", v)
|
||||
}
|
||||
}
|
||||
78
agent/emmc_common_test.go
Normal file
78
agent/emmc_common_test.go
Normal file
@@ -0,0 +1,78 @@
|
||||
package agent
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestParseHexOrDecByte(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want uint8
|
||||
ok bool
|
||||
}{
|
||||
{"0x01", 1, true},
|
||||
{"0X0b", 11, true},
|
||||
{"01", 1, true},
|
||||
{" 3 ", 3, true},
|
||||
{"", 0, false},
|
||||
{"0x", 0, false},
|
||||
{"nope", 0, false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
got, ok := parseHexOrDecByte(tt.in)
|
||||
if ok != tt.ok || got != tt.want {
|
||||
t.Fatalf("parseHexOrDecByte(%q) = (%d,%v), want (%d,%v)", tt.in, got, ok, tt.want, tt.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseHexBytePair(t *testing.T) {
|
||||
a, b, ok := parseHexBytePair("0x01 0x02\n")
|
||||
if !ok || a != 1 || b != 2 {
|
||||
t.Fatalf("parseHexBytePair hex = (%d,%d,%v), want (1,2,true)", a, b, ok)
|
||||
}
|
||||
|
||||
a, b, ok = parseHexBytePair("01 02")
|
||||
if !ok || a != 1 || b != 2 {
|
||||
t.Fatalf("parseHexBytePair dec = (%d,%d,%v), want (1,2,true)", a, b, ok)
|
||||
}
|
||||
|
||||
_, _, ok = parseHexBytePair("0x01")
|
||||
if ok {
|
||||
t.Fatalf("parseHexBytePair short input ok=true, want false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmmcSmartStatus(t *testing.T) {
|
||||
if got := emmcSmartStatus(0x01); got != "PASSED" {
|
||||
t.Fatalf("emmcSmartStatus(0x01) = %q, want PASSED", got)
|
||||
}
|
||||
if got := emmcSmartStatus(0x02); got != "WARNING" {
|
||||
t.Fatalf("emmcSmartStatus(0x02) = %q, want WARNING", got)
|
||||
}
|
||||
if got := emmcSmartStatus(0x03); got != "FAILED" {
|
||||
t.Fatalf("emmcSmartStatus(0x03) = %q, want FAILED", got)
|
||||
}
|
||||
if got := emmcSmartStatus(0x00); got != "UNKNOWN" {
|
||||
t.Fatalf("emmcSmartStatus(0x00) = %q, want UNKNOWN", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsEmmcBlockName(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
ok bool
|
||||
}{
|
||||
{"mmcblk0", true},
|
||||
{"mmcblk1", true},
|
||||
{"mmcblk10", true},
|
||||
{"mmcblk0p1", false},
|
||||
{"sda", false},
|
||||
{"mmcblk", false},
|
||||
{"mmcblkA", false},
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := isEmmcBlockName(c.name); got != c.ok {
|
||||
t.Fatalf("isEmmcBlockName(%q) = %v, want %v", c.name, got, c.ok)
|
||||
}
|
||||
}
|
||||
}
|
||||
227
agent/emmc_linux.go
Normal file
227
agent/emmc_linux.go
Normal file
@@ -0,0 +1,227 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
// emmcSysfsRoot is a test hook; production value is "/sys".
|
||||
var emmcSysfsRoot = "/sys"
|
||||
|
||||
type emmcHealth struct {
|
||||
model string
|
||||
serial string
|
||||
revision string
|
||||
capacity uint64
|
||||
preEOL uint8
|
||||
lifeA uint8
|
||||
lifeB uint8
|
||||
}
|
||||
|
||||
func scanEmmcDevices() []*DeviceInfo {
|
||||
blockDir := filepath.Join(emmcSysfsRoot, "class", "block")
|
||||
entries, err := os.ReadDir(blockDir)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := make([]*DeviceInfo, 0, 2)
|
||||
for _, ent := range entries {
|
||||
name := ent.Name()
|
||||
if !isEmmcBlockName(name) {
|
||||
continue
|
||||
}
|
||||
|
||||
deviceDir := filepath.Join(blockDir, name, "device")
|
||||
if !hasEmmcHealthFiles(deviceDir) {
|
||||
continue
|
||||
}
|
||||
|
||||
devPath := filepath.Join("/dev", name)
|
||||
devices = append(devices, &DeviceInfo{
|
||||
Name: devPath,
|
||||
Type: "emmc",
|
||||
InfoName: devPath + " [eMMC]",
|
||||
Protocol: "MMC",
|
||||
})
|
||||
}
|
||||
|
||||
return devices
|
||||
}
|
||||
|
||||
func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
if deviceInfo == nil || deviceInfo.Name == "" {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
base := filepath.Base(deviceInfo.Name)
|
||||
if !isEmmcBlockName(base) && !strings.EqualFold(deviceInfo.Type, "emmc") && !strings.EqualFold(deviceInfo.Type, "mmc") {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
health, ok := readEmmcHealth(base)
|
||||
if !ok {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Normalize the device type to keep pruning logic stable across refreshes.
|
||||
deviceInfo.Type = "emmc"
|
||||
|
||||
key := health.serial
|
||||
if key == "" {
|
||||
key = filepath.Join("/dev", base)
|
||||
}
|
||||
|
||||
status := emmcSmartStatus(health.preEOL)
|
||||
|
||||
attrs := []*smart.SmartAttribute{
|
||||
{
|
||||
Name: "PreEOLInfo",
|
||||
RawValue: uint64(health.preEOL),
|
||||
RawString: emmcPreEOLString(health.preEOL),
|
||||
},
|
||||
{
|
||||
Name: "DeviceLifeTimeEstA",
|
||||
RawValue: uint64(health.lifeA),
|
||||
RawString: emmcLifeTimeString(health.lifeA),
|
||||
},
|
||||
{
|
||||
Name: "DeviceLifeTimeEstB",
|
||||
RawValue: uint64(health.lifeB),
|
||||
RawString: emmcLifeTimeString(health.lifeB),
|
||||
},
|
||||
}
|
||||
|
||||
sm.Lock()
|
||||
defer sm.Unlock()
|
||||
|
||||
if _, exists := sm.SmartDataMap[key]; !exists {
|
||||
sm.SmartDataMap[key] = &smart.SmartData{}
|
||||
}
|
||||
|
||||
data := sm.SmartDataMap[key]
|
||||
data.ModelName = health.model
|
||||
data.SerialNumber = health.serial
|
||||
data.FirmwareVersion = health.revision
|
||||
data.Capacity = health.capacity
|
||||
data.Temperature = 0
|
||||
data.SmartStatus = status
|
||||
data.DiskName = filepath.Join("/dev", base)
|
||||
data.DiskType = "emmc"
|
||||
data.Attributes = attrs
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func readEmmcHealth(blockName string) (emmcHealth, bool) {
|
||||
var out emmcHealth
|
||||
|
||||
if !isEmmcBlockName(blockName) {
|
||||
return out, false
|
||||
}
|
||||
|
||||
deviceDir := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "device")
|
||||
preEOL, okPre := readHexByteFile(filepath.Join(deviceDir, "pre_eol_info"))
|
||||
|
||||
// Some kernels expose EXT_CSD lifetime via "life_time" (two bytes), others as
|
||||
// separate files. Support both.
|
||||
lifeA, lifeB, okLife := readLifeTime(deviceDir)
|
||||
|
||||
if !okPre && !okLife {
|
||||
return out, false
|
||||
}
|
||||
|
||||
out.preEOL = preEOL
|
||||
out.lifeA = lifeA
|
||||
out.lifeB = lifeB
|
||||
|
||||
out.model = readStringFile(filepath.Join(deviceDir, "name"))
|
||||
out.serial = readStringFile(filepath.Join(deviceDir, "serial"))
|
||||
out.revision = readStringFile(filepath.Join(deviceDir, "prv"))
|
||||
|
||||
if capBytes, ok := readBlockCapacityBytes(blockName); ok {
|
||||
out.capacity = capBytes
|
||||
}
|
||||
|
||||
return out, true
|
||||
}
|
||||
|
||||
func readLifeTime(deviceDir string) (uint8, uint8, bool) {
|
||||
if content, ok := readStringFileOK(filepath.Join(deviceDir, "life_time")); ok {
|
||||
a, b, ok := parseHexBytePair(content)
|
||||
return a, b, ok
|
||||
}
|
||||
|
||||
a, okA := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_a"))
|
||||
b, okB := readHexByteFile(filepath.Join(deviceDir, "device_life_time_est_typ_b"))
|
||||
if okA || okB {
|
||||
return a, b, true
|
||||
}
|
||||
return 0, 0, false
|
||||
}
|
||||
|
||||
func readBlockCapacityBytes(blockName string) (uint64, bool) {
|
||||
sizePath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "size")
|
||||
lbsPath := filepath.Join(emmcSysfsRoot, "class", "block", blockName, "queue", "logical_block_size")
|
||||
|
||||
sizeStr, ok := readStringFileOK(sizePath)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
sectors, err := strconv.ParseUint(sizeStr, 10, 64)
|
||||
if err != nil || sectors == 0 {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
lbsStr, ok := readStringFileOK(lbsPath)
|
||||
logicalBlockSize := uint64(512)
|
||||
if ok {
|
||||
if parsed, err := strconv.ParseUint(lbsStr, 10, 64); err == nil && parsed > 0 {
|
||||
logicalBlockSize = parsed
|
||||
}
|
||||
}
|
||||
|
||||
return sectors * logicalBlockSize, true
|
||||
}
|
||||
|
||||
func readHexByteFile(path string) (uint8, bool) {
|
||||
content, ok := readStringFileOK(path)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
b, ok := parseHexOrDecByte(content)
|
||||
return b, ok
|
||||
}
|
||||
|
||||
func readStringFile(path string) string {
|
||||
content, _ := readStringFileOK(path)
|
||||
return content
|
||||
}
|
||||
|
||||
func readStringFileOK(path string) (string, bool) {
|
||||
b, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return "", false
|
||||
}
|
||||
return strings.TrimSpace(string(b)), true
|
||||
}
|
||||
|
||||
func hasEmmcHealthFiles(deviceDir string) bool {
|
||||
entries, err := os.ReadDir(deviceDir)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
for _, ent := range entries {
|
||||
switch ent.Name() {
|
||||
case "pre_eol_info", "life_time", "device_life_time_est_typ_a", "device_life_time_est_typ_b":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
80
agent/emmc_linux_test.go
Normal file
80
agent/emmc_linux_test.go
Normal file
@@ -0,0 +1,80 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/smart"
|
||||
)
|
||||
|
||||
func TestEmmcMockSysfsScanAndCollect(t *testing.T) {
|
||||
tmp := t.TempDir()
|
||||
prev := emmcSysfsRoot
|
||||
emmcSysfsRoot = tmp
|
||||
t.Cleanup(func() { emmcSysfsRoot = prev })
|
||||
|
||||
// Fake: /sys/class/block/mmcblk0
|
||||
mmcDeviceDir := filepath.Join(tmp, "class", "block", "mmcblk0", "device")
|
||||
mmcQueueDir := filepath.Join(tmp, "class", "block", "mmcblk0", "queue")
|
||||
if err := os.MkdirAll(mmcDeviceDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := os.MkdirAll(mmcQueueDir, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
write := func(path, content string) {
|
||||
t.Helper()
|
||||
if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
write(filepath.Join(mmcDeviceDir, "pre_eol_info"), "0x02\n")
|
||||
write(filepath.Join(mmcDeviceDir, "life_time"), "0x04 0x05\n")
|
||||
write(filepath.Join(mmcDeviceDir, "name"), "H26M52103FMR\n")
|
||||
write(filepath.Join(mmcDeviceDir, "serial"), "01234567\n")
|
||||
write(filepath.Join(mmcDeviceDir, "prv"), "0x08\n")
|
||||
write(filepath.Join(mmcQueueDir, "logical_block_size"), "512\n")
|
||||
write(filepath.Join(tmp, "class", "block", "mmcblk0", "size"), "1024\n") // sectors
|
||||
|
||||
devs := scanEmmcDevices()
|
||||
if len(devs) != 1 {
|
||||
t.Fatalf("scanEmmcDevices() = %d devices, want 1", len(devs))
|
||||
}
|
||||
if devs[0].Name != "/dev/mmcblk0" || devs[0].Type != "emmc" {
|
||||
t.Fatalf("scanEmmcDevices()[0] = %+v, want Name=/dev/mmcblk0 Type=emmc", devs[0])
|
||||
}
|
||||
|
||||
sm := &SmartManager{SmartDataMap: map[string]*smart.SmartData{}}
|
||||
ok, err := sm.collectEmmcHealth(devs[0])
|
||||
if err != nil || !ok {
|
||||
t.Fatalf("collectEmmcHealth() = (ok=%v, err=%v), want (true,nil)", ok, err)
|
||||
}
|
||||
if len(sm.SmartDataMap) != 1 {
|
||||
t.Fatalf("SmartDataMap len=%d, want 1", len(sm.SmartDataMap))
|
||||
}
|
||||
var got *smart.SmartData
|
||||
for _, v := range sm.SmartDataMap {
|
||||
got = v
|
||||
break
|
||||
}
|
||||
if got == nil {
|
||||
t.Fatalf("SmartDataMap value nil")
|
||||
}
|
||||
if got.DiskType != "emmc" || got.DiskName != "/dev/mmcblk0" {
|
||||
t.Fatalf("disk fields = (type=%q name=%q), want (emmc,/dev/mmcblk0)", got.DiskType, got.DiskName)
|
||||
}
|
||||
if got.SmartStatus != "WARNING" {
|
||||
t.Fatalf("SmartStatus=%q, want WARNING", got.SmartStatus)
|
||||
}
|
||||
if got.SerialNumber != "01234567" || got.ModelName == "" || got.Capacity == 0 {
|
||||
t.Fatalf("identity fields = (model=%q serial=%q cap=%d), want non-empty model, serial 01234567, cap>0", got.ModelName, got.SerialNumber, got.Capacity)
|
||||
}
|
||||
if len(got.Attributes) < 3 {
|
||||
t.Fatalf("attributes len=%d, want >= 3", len(got.Attributes))
|
||||
}
|
||||
}
|
||||
14
agent/emmc_stub.go
Normal file
14
agent/emmc_stub.go
Normal file
@@ -0,0 +1,14 @@
|
||||
//go:build !linux
|
||||
|
||||
package agent
|
||||
|
||||
// Non-Linux builds: eMMC health via sysfs is not available.
|
||||
|
||||
func scanEmmcDevices() []*DeviceInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (sm *SmartManager) collectEmmcHealth(deviceInfo *DeviceInfo) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
412
agent/gpu.go
412
agent/gpu.go
@@ -21,13 +21,10 @@ const (
|
||||
// Commands
|
||||
nvidiaSmiCmd string = "nvidia-smi"
|
||||
rocmSmiCmd string = "rocm-smi"
|
||||
amdgpuCmd string = "amdgpu" // internal cmd for sysfs collection
|
||||
tegraStatsCmd string = "tegrastats"
|
||||
nvtopCmd string = "nvtop"
|
||||
noGPUFoundMsg string = "no GPU found - see https://beszel.dev/guide/gpu"
|
||||
|
||||
// Polling intervals
|
||||
nvidiaSmiInterval string = "4" // in seconds
|
||||
tegraStatsInterval string = "3700" // in milliseconds
|
||||
rocmSmiInterval time.Duration = 4300 * time.Millisecond
|
||||
// Command retry and timeout constants
|
||||
retryWaitTime time.Duration = 5 * time.Second
|
||||
maxFailureRetries int = 5
|
||||
@@ -40,13 +37,7 @@ const (
|
||||
// GPUManager manages data collection for GPUs (either Nvidia or AMD)
|
||||
type GPUManager struct {
|
||||
sync.Mutex
|
||||
nvidiaSmi bool
|
||||
rocmSmi bool
|
||||
amdgpu bool
|
||||
tegrastats bool
|
||||
intelGpuStats bool
|
||||
nvml bool
|
||||
GpuDataMap map[string]*system.GPUData
|
||||
GpuDataMap map[string]*system.GPUData
|
||||
// lastAvgData stores the last calculated averages for each GPU
|
||||
// Used when a collection happens before new data arrives (Count == 0)
|
||||
lastAvgData map[string]system.GPUData
|
||||
@@ -87,6 +78,51 @@ type gpuCollector struct {
|
||||
|
||||
var errNoValidData = fmt.Errorf("no valid GPU data found") // Error for missing data
|
||||
|
||||
// collectorSource identifies a selectable GPU collector in GPU_COLLECTOR.
|
||||
type collectorSource string
|
||||
|
||||
const (
|
||||
collectorSourceNVTop collectorSource = collectorSource(nvtopCmd)
|
||||
collectorSourceNVML collectorSource = "nvml"
|
||||
collectorSourceNvidiaSMI collectorSource = collectorSource(nvidiaSmiCmd)
|
||||
collectorSourceIntelGpuTop collectorSource = collectorSource(intelGpuStatsCmd)
|
||||
collectorSourceAmdSysfs collectorSource = "amd_sysfs"
|
||||
collectorSourceRocmSMI collectorSource = collectorSource(rocmSmiCmd)
|
||||
collectorGroupNvidia string = "nvidia"
|
||||
collectorGroupIntel string = "intel"
|
||||
collectorGroupAmd string = "amd"
|
||||
)
|
||||
|
||||
func isValidCollectorSource(source collectorSource) bool {
|
||||
switch source {
|
||||
case collectorSourceNVTop,
|
||||
collectorSourceNVML,
|
||||
collectorSourceNvidiaSMI,
|
||||
collectorSourceIntelGpuTop,
|
||||
collectorSourceAmdSysfs,
|
||||
collectorSourceRocmSMI:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// gpuCapabilities describes detected GPU tooling and sysfs support on the host.
|
||||
type gpuCapabilities struct {
|
||||
hasNvidiaSmi bool
|
||||
hasRocmSmi bool
|
||||
hasAmdSysfs bool
|
||||
hasTegrastats bool
|
||||
hasIntelGpuTop bool
|
||||
hasNvtop bool
|
||||
}
|
||||
|
||||
type collectorDefinition struct {
|
||||
group string
|
||||
available bool
|
||||
start func(onFailure func()) bool
|
||||
deprecationWarning string
|
||||
}
|
||||
|
||||
// starts and manages the ongoing collection of GPU data for the specified GPU management utility
|
||||
func (c *gpuCollector) start() {
|
||||
for {
|
||||
@@ -392,93 +428,257 @@ func (gm *GPUManager) storeSnapshot(id string, gpu *system.GPUData, cacheKey uin
|
||||
gm.lastSnapshots[cacheKey][id] = snapshot
|
||||
}
|
||||
|
||||
// detectGPUs checks for the presence of GPU management tools (nvidia-smi, rocm-smi, tegrastats)
|
||||
// in the system path. It sets the corresponding flags in the GPUManager struct if any of these
|
||||
// tools are found. If none of the tools are found, it returns an error indicating that no GPU
|
||||
// management tools are available.
|
||||
func (gm *GPUManager) detectGPUs() error {
|
||||
// discoverGpuCapabilities checks for available GPU tooling and sysfs support.
|
||||
// It only reports capability presence and does not apply policy decisions.
|
||||
func (gm *GPUManager) discoverGpuCapabilities() gpuCapabilities {
|
||||
caps := gpuCapabilities{
|
||||
hasAmdSysfs: gm.hasAmdSysfs(),
|
||||
}
|
||||
if _, err := exec.LookPath(nvidiaSmiCmd); err == nil {
|
||||
gm.nvidiaSmi = true
|
||||
caps.hasNvidiaSmi = true
|
||||
}
|
||||
if _, err := exec.LookPath(rocmSmiCmd); err == nil {
|
||||
if val, _ := GetEnv("AMD_SYSFS"); val == "true" {
|
||||
gm.amdgpu = true
|
||||
} else {
|
||||
gm.rocmSmi = true
|
||||
}
|
||||
} else if gm.hasAmdSysfs() {
|
||||
gm.amdgpu = true
|
||||
caps.hasRocmSmi = true
|
||||
}
|
||||
if _, err := exec.LookPath(tegraStatsCmd); err == nil {
|
||||
gm.tegrastats = true
|
||||
gm.nvidiaSmi = false
|
||||
caps.hasTegrastats = true
|
||||
}
|
||||
if _, err := exec.LookPath(intelGpuStatsCmd); err == nil {
|
||||
gm.intelGpuStats = true
|
||||
caps.hasIntelGpuTop = true
|
||||
}
|
||||
if gm.nvidiaSmi || gm.rocmSmi || gm.amdgpu || gm.tegrastats || gm.intelGpuStats || gm.nvml {
|
||||
return nil
|
||||
if _, err := exec.LookPath(nvtopCmd); err == nil {
|
||||
caps.hasNvtop = true
|
||||
}
|
||||
return fmt.Errorf("no GPU found - install nvidia-smi, rocm-smi, or intel_gpu_top")
|
||||
return caps
|
||||
}
|
||||
|
||||
// startCollector starts the appropriate GPU data collector based on the command
|
||||
func (gm *GPUManager) startCollector(command string) {
|
||||
collector := gpuCollector{
|
||||
name: command,
|
||||
bufSize: 10 * 1024,
|
||||
}
|
||||
switch command {
|
||||
case intelGpuStatsCmd:
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := gm.collectIntelStats(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting Intel GPU data; see https://beszel.dev/guide/gpu", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
func hasAnyGpuCollector(caps gpuCapabilities) bool {
|
||||
return caps.hasNvidiaSmi || caps.hasRocmSmi || caps.hasAmdSysfs || caps.hasTegrastats || caps.hasIntelGpuTop || caps.hasNvtop
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startIntelCollector() {
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := gm.collectIntelStats(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting Intel GPU data; see https://beszel.dev/guide/gpu", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
}()
|
||||
case nvidiaSmiCmd:
|
||||
collector.cmdArgs = []string{
|
||||
"-l", nvidiaSmiInterval,
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startNvidiaSmiCollector(intervalSeconds string) {
|
||||
collector := gpuCollector{
|
||||
name: nvidiaSmiCmd,
|
||||
bufSize: 10 * 1024,
|
||||
cmdArgs: []string{
|
||||
"-l", intervalSeconds,
|
||||
"--query-gpu=index,name,temperature.gpu,memory.used,memory.total,utilization.gpu,power.draw",
|
||||
"--format=csv,noheader,nounits",
|
||||
}
|
||||
collector.parse = gm.parseNvidiaData
|
||||
go collector.start()
|
||||
case tegraStatsCmd:
|
||||
collector.cmdArgs = []string{"--interval", tegraStatsInterval}
|
||||
collector.parse = gm.getJetsonParser()
|
||||
go collector.start()
|
||||
case amdgpuCmd:
|
||||
go func() {
|
||||
if err := gm.collectAmdStats(); err != nil {
|
||||
slog.Warn("Error collecting AMD GPU data via sysfs", "err", err)
|
||||
}
|
||||
}()
|
||||
case rocmSmiCmd:
|
||||
collector.cmdArgs = []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"}
|
||||
collector.parse = gm.parseAmdData
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := collector.collect(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting AMD GPU data via rocm-smi", "err", err)
|
||||
}
|
||||
time.Sleep(rocmSmiInterval)
|
||||
}
|
||||
}()
|
||||
},
|
||||
parse: gm.parseNvidiaData,
|
||||
}
|
||||
go collector.start()
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startTegraStatsCollector(intervalMilliseconds string) {
|
||||
collector := gpuCollector{
|
||||
name: tegraStatsCmd,
|
||||
bufSize: 10 * 1024,
|
||||
cmdArgs: []string{"--interval", intervalMilliseconds},
|
||||
parse: gm.getJetsonParser(),
|
||||
}
|
||||
go collector.start()
|
||||
}
|
||||
|
||||
func (gm *GPUManager) startRocmSmiCollector(pollInterval time.Duration) {
|
||||
collector := gpuCollector{
|
||||
name: rocmSmiCmd,
|
||||
bufSize: 10 * 1024,
|
||||
cmdArgs: []string{"--showid", "--showtemp", "--showuse", "--showpower", "--showproductname", "--showmeminfo", "vram", "--json"},
|
||||
parse: gm.parseAmdData,
|
||||
}
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := collector.collect(); err != nil {
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting AMD GPU data via rocm-smi", "err", err)
|
||||
}
|
||||
time.Sleep(pollInterval)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func (gm *GPUManager) collectorDefinitions(caps gpuCapabilities) map[collectorSource]collectorDefinition {
|
||||
return map[collectorSource]collectorDefinition{
|
||||
collectorSourceNVML: {
|
||||
group: collectorGroupNvidia,
|
||||
available: caps.hasNvidiaSmi,
|
||||
start: func(_ func()) bool {
|
||||
return gm.startNvmlCollector()
|
||||
},
|
||||
},
|
||||
collectorSourceNvidiaSMI: {
|
||||
group: collectorGroupNvidia,
|
||||
available: caps.hasNvidiaSmi,
|
||||
start: func(_ func()) bool {
|
||||
gm.startNvidiaSmiCollector("4") // seconds
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceIntelGpuTop: {
|
||||
group: collectorGroupIntel,
|
||||
available: caps.hasIntelGpuTop,
|
||||
start: func(_ func()) bool {
|
||||
gm.startIntelCollector()
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceAmdSysfs: {
|
||||
group: collectorGroupAmd,
|
||||
available: caps.hasAmdSysfs,
|
||||
start: func(_ func()) bool {
|
||||
return gm.startAmdSysfsCollector()
|
||||
},
|
||||
},
|
||||
collectorSourceRocmSMI: {
|
||||
group: collectorGroupAmd,
|
||||
available: caps.hasRocmSmi,
|
||||
deprecationWarning: "rocm-smi is deprecated and may be removed in a future release",
|
||||
start: func(_ func()) bool {
|
||||
gm.startRocmSmiCollector(4300 * time.Millisecond)
|
||||
return true
|
||||
},
|
||||
},
|
||||
collectorSourceNVTop: {
|
||||
available: caps.hasNvtop,
|
||||
start: func(onFailure func()) bool {
|
||||
gm.startNvtopCollector("30", onFailure) // tens of milliseconds
|
||||
return true
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// parseCollectorPriority parses GPU_COLLECTOR and returns valid ordered entries.
|
||||
func parseCollectorPriority(value string) []collectorSource {
|
||||
parts := strings.Split(value, ",")
|
||||
priorities := make([]collectorSource, 0, len(parts))
|
||||
for _, raw := range parts {
|
||||
name := collectorSource(strings.TrimSpace(strings.ToLower(raw)))
|
||||
if !isValidCollectorSource(name) {
|
||||
if name != "" {
|
||||
slog.Warn("Ignoring unknown GPU collector", "collector", name)
|
||||
}
|
||||
continue
|
||||
}
|
||||
priorities = append(priorities, name)
|
||||
}
|
||||
return priorities
|
||||
}
|
||||
|
||||
// startNvmlCollector initializes NVML and starts its polling loop.
|
||||
func (gm *GPUManager) startNvmlCollector() bool {
|
||||
collector := &nvmlCollector{gm: gm}
|
||||
if err := collector.init(); err != nil {
|
||||
slog.Warn("Failed to initialize NVML", "err", err)
|
||||
return false
|
||||
}
|
||||
go collector.start()
|
||||
return true
|
||||
}
|
||||
|
||||
// startAmdSysfsCollector starts AMD GPU collection via sysfs.
|
||||
func (gm *GPUManager) startAmdSysfsCollector() bool {
|
||||
go func() {
|
||||
if err := gm.collectAmdStats(); err != nil {
|
||||
slog.Warn("Error collecting AMD GPU data via sysfs", "err", err)
|
||||
}
|
||||
}()
|
||||
return true
|
||||
}
|
||||
|
||||
// startCollectorsByPriority starts collectors in order with one source per vendor group.
|
||||
func (gm *GPUManager) startCollectorsByPriority(priorities []collectorSource, caps gpuCapabilities) int {
|
||||
definitions := gm.collectorDefinitions(caps)
|
||||
selectedGroups := make(map[string]bool, 3)
|
||||
started := 0
|
||||
for i, source := range priorities {
|
||||
definition, ok := definitions[source]
|
||||
if !ok || !definition.available {
|
||||
continue
|
||||
}
|
||||
// nvtop is not a vendor-specific collector, so should only be used if no other collectors are selected or it is first in GPU_COLLECTOR.
|
||||
if source == collectorSourceNVTop {
|
||||
if len(selectedGroups) > 0 {
|
||||
slog.Warn("Skipping nvtop because other collectors are selected")
|
||||
continue
|
||||
}
|
||||
// if nvtop fails, fall back to remaining collectors.
|
||||
remaining := append([]collectorSource(nil), priorities[i+1:]...)
|
||||
if definition.start(func() {
|
||||
gm.startCollectorsByPriority(remaining, caps)
|
||||
}) {
|
||||
started++
|
||||
return started
|
||||
}
|
||||
}
|
||||
group := definition.group
|
||||
if group == "" || selectedGroups[group] {
|
||||
continue
|
||||
}
|
||||
if definition.deprecationWarning != "" {
|
||||
slog.Warn(definition.deprecationWarning)
|
||||
}
|
||||
if definition.start(nil) {
|
||||
selectedGroups[group] = true
|
||||
started++
|
||||
}
|
||||
}
|
||||
return started
|
||||
}
|
||||
|
||||
// resolveLegacyCollectorPriority builds the default collector order when GPU_COLLECTOR is unset.
|
||||
func (gm *GPUManager) resolveLegacyCollectorPriority(caps gpuCapabilities) []collectorSource {
|
||||
priorities := make([]collectorSource, 0, 4)
|
||||
|
||||
if caps.hasNvidiaSmi && !caps.hasTegrastats {
|
||||
if nvml, _ := GetEnv("NVML"); nvml == "true" {
|
||||
priorities = append(priorities, collectorSourceNVML, collectorSourceNvidiaSMI)
|
||||
} else {
|
||||
priorities = append(priorities, collectorSourceNvidiaSMI)
|
||||
}
|
||||
}
|
||||
|
||||
if caps.hasRocmSmi {
|
||||
if val, _ := GetEnv("AMD_SYSFS"); val == "true" {
|
||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
||||
} else {
|
||||
priorities = append(priorities, collectorSourceRocmSMI)
|
||||
}
|
||||
} else if caps.hasAmdSysfs {
|
||||
priorities = append(priorities, collectorSourceAmdSysfs)
|
||||
}
|
||||
|
||||
if caps.hasIntelGpuTop {
|
||||
priorities = append(priorities, collectorSourceIntelGpuTop)
|
||||
}
|
||||
|
||||
// Keep nvtop as a legacy last resort only when no vendor collector exists.
|
||||
if len(priorities) == 0 && caps.hasNvtop {
|
||||
priorities = append(priorities, collectorSourceNVTop)
|
||||
}
|
||||
return priorities
|
||||
}
|
||||
|
||||
// NewGPUManager creates and initializes a new GPUManager
|
||||
@@ -487,38 +687,30 @@ func NewGPUManager() (*GPUManager, error) {
|
||||
return nil, nil
|
||||
}
|
||||
var gm GPUManager
|
||||
if err := gm.detectGPUs(); err != nil {
|
||||
return nil, err
|
||||
caps := gm.discoverGpuCapabilities()
|
||||
if !hasAnyGpuCollector(caps) {
|
||||
return nil, fmt.Errorf(noGPUFoundMsg)
|
||||
}
|
||||
gm.GpuDataMap = make(map[string]*system.GPUData)
|
||||
|
||||
if gm.nvidiaSmi {
|
||||
if nvml, _ := GetEnv("NVML"); nvml == "true" {
|
||||
gm.nvml = true
|
||||
gm.nvidiaSmi = false
|
||||
collector := &nvmlCollector{gm: &gm}
|
||||
if err := collector.init(); err == nil {
|
||||
go collector.start()
|
||||
} else {
|
||||
slog.Warn("Failed to initialize NVML, falling back to nvidia-smi", "err", err)
|
||||
gm.nvidiaSmi = true
|
||||
gm.startCollector(nvidiaSmiCmd)
|
||||
}
|
||||
} else {
|
||||
gm.startCollector(nvidiaSmiCmd)
|
||||
// Jetson devices should always use tegrastats (ignore GPU_COLLECTOR).
|
||||
if caps.hasTegrastats {
|
||||
gm.startTegraStatsCollector("3700")
|
||||
return &gm, nil
|
||||
}
|
||||
|
||||
// if GPU_COLLECTOR is set, start user-defined collectors.
|
||||
if collectorConfig, ok := GetEnv("GPU_COLLECTOR"); ok && strings.TrimSpace(collectorConfig) != "" {
|
||||
priorities := parseCollectorPriority(collectorConfig)
|
||||
if gm.startCollectorsByPriority(priorities, caps) == 0 {
|
||||
return nil, fmt.Errorf("no configured GPU collectors are available")
|
||||
}
|
||||
return &gm, nil
|
||||
}
|
||||
if gm.rocmSmi {
|
||||
gm.startCollector(rocmSmiCmd)
|
||||
}
|
||||
if gm.amdgpu {
|
||||
gm.startCollector(amdgpuCmd)
|
||||
}
|
||||
if gm.tegrastats {
|
||||
gm.startCollector(tegraStatsCmd)
|
||||
}
|
||||
if gm.intelGpuStats {
|
||||
gm.startCollector(intelGpuStatsCmd)
|
||||
|
||||
// auto-detect and start collectors when GPU_COLLECTOR is unset.
|
||||
if gm.startCollectorsByPriority(gm.resolveLegacyCollectorPriority(caps), caps) == 0 {
|
||||
return nil, fmt.Errorf(noGPUFoundMsg)
|
||||
}
|
||||
|
||||
return &gm, nil
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
@@ -15,6 +16,15 @@ import (
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
var amdgpuNameCache = struct {
|
||||
sync.RWMutex
|
||||
hits map[string]string
|
||||
misses map[string]struct{}
|
||||
}{
|
||||
hits: make(map[string]string),
|
||||
misses: make(map[string]struct{}),
|
||||
}
|
||||
|
||||
// hasAmdSysfs returns true if any AMD GPU sysfs nodes are found
|
||||
func (gm *GPUManager) hasAmdSysfs() bool {
|
||||
cards, err := filepath.Glob("/sys/class/drm/card*/device/vendor")
|
||||
@@ -32,6 +42,7 @@ func (gm *GPUManager) hasAmdSysfs() bool {
|
||||
|
||||
// collectAmdStats collects AMD GPU metrics directly from sysfs to avoid the overhead of rocm-smi
|
||||
func (gm *GPUManager) collectAmdStats() error {
|
||||
sysfsPollInterval := 3000 * time.Millisecond
|
||||
cards, err := filepath.Glob("/sys/class/drm/card*")
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -70,10 +81,11 @@ func (gm *GPUManager) collectAmdStats() error {
|
||||
continue
|
||||
}
|
||||
failures = 0
|
||||
time.Sleep(rocmSmiInterval)
|
||||
time.Sleep(sysfsPollInterval)
|
||||
}
|
||||
}
|
||||
|
||||
// isAmdGpu checks whether a DRM card path belongs to AMD vendor ID 0x1002.
|
||||
func isAmdGpu(cardPath string) bool {
|
||||
vendorPath := filepath.Join(cardPath, "device/vendor")
|
||||
vendor, err := os.ReadFile(vendorPath)
|
||||
@@ -91,8 +103,17 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
||||
|
||||
// Read all sysfs values first (no lock needed - these can be slow)
|
||||
usage, usageErr := readSysfsFloat(filepath.Join(devicePath, "gpu_busy_percent"))
|
||||
memUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
|
||||
memTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
|
||||
vramUsed, memUsedErr := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_used"))
|
||||
vramTotal, _ := readSysfsFloat(filepath.Join(devicePath, "mem_info_vram_total"))
|
||||
memUsed := vramUsed
|
||||
memTotal := vramTotal
|
||||
// if gtt is present, add it to the memory used and total (https://github.com/henrygd/beszel/issues/1569#issuecomment-3837640484)
|
||||
if gttUsed, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_used")); err == nil && gttUsed > 0 {
|
||||
if gttTotal, err := readSysfsFloat(filepath.Join(devicePath, "mem_info_gtt_total")); err == nil {
|
||||
memUsed += gttUsed
|
||||
memTotal += gttTotal
|
||||
}
|
||||
}
|
||||
|
||||
var temp, power float64
|
||||
hwmons, _ := filepath.Glob(filepath.Join(devicePath, "hwmon/hwmon*"))
|
||||
@@ -133,6 +154,7 @@ func (gm *GPUManager) updateAmdGpuData(cardPath string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// readSysfsFloat reads and parses a numeric value from a sysfs file.
|
||||
func readSysfsFloat(path string) (float64, error) {
|
||||
val, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
@@ -141,6 +163,110 @@ func readSysfsFloat(path string) (float64, error) {
|
||||
return strconv.ParseFloat(strings.TrimSpace(string(val)), 64)
|
||||
}
|
||||
|
||||
// normalizeHexID normalizes hex IDs by trimming spaces, lowercasing, and dropping 0x.
|
||||
func normalizeHexID(id string) string {
|
||||
return strings.TrimPrefix(strings.ToLower(strings.TrimSpace(id)), "0x")
|
||||
}
|
||||
|
||||
// cacheKeyForAmdgpu builds the cache key for a device and optional revision.
|
||||
func cacheKeyForAmdgpu(deviceID, revisionID string) string {
|
||||
if revisionID != "" {
|
||||
return deviceID + ":" + revisionID
|
||||
}
|
||||
return deviceID
|
||||
}
|
||||
|
||||
// lookupAmdgpuNameInFile resolves an AMDGPU name from amdgpu.ids by device/revision.
|
||||
func lookupAmdgpuNameInFile(deviceID, revisionID, filePath string) (name string, exact bool, found bool) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return "", false, false
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
var byDevice string
|
||||
scanner := bufio.NewScanner(file)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if line == "" || strings.HasPrefix(line, "#") {
|
||||
continue
|
||||
}
|
||||
parts := strings.SplitN(line, ",", 3)
|
||||
if len(parts) != 3 {
|
||||
continue
|
||||
}
|
||||
|
||||
dev := normalizeHexID(parts[0])
|
||||
rev := normalizeHexID(parts[1])
|
||||
productName := strings.TrimSpace(parts[2])
|
||||
if dev == "" || productName == "" || dev != deviceID {
|
||||
continue
|
||||
}
|
||||
if byDevice == "" {
|
||||
byDevice = productName
|
||||
}
|
||||
if revisionID != "" && rev == revisionID {
|
||||
return productName, true, true
|
||||
}
|
||||
}
|
||||
if byDevice != "" {
|
||||
return byDevice, false, true
|
||||
}
|
||||
return "", false, false
|
||||
}
|
||||
|
||||
// getCachedAmdgpuName returns cached hit/miss status for the given device/revision.
|
||||
func getCachedAmdgpuName(deviceID, revisionID string) (name string, found bool, done bool) {
|
||||
// Build the list of cache keys to check. We always look up the exact device+revision key.
|
||||
// When revisionID is set, we also look up deviceID alone, since the cache may store a
|
||||
// device-only fallback when we couldn't resolve the exact revision.
|
||||
keys := []string{cacheKeyForAmdgpu(deviceID, revisionID)}
|
||||
if revisionID != "" {
|
||||
keys = append(keys, deviceID)
|
||||
}
|
||||
|
||||
knownMisses := 0
|
||||
amdgpuNameCache.RLock()
|
||||
defer amdgpuNameCache.RUnlock()
|
||||
for _, key := range keys {
|
||||
if name, ok := amdgpuNameCache.hits[key]; ok {
|
||||
return name, true, true
|
||||
}
|
||||
if _, ok := amdgpuNameCache.misses[key]; ok {
|
||||
knownMisses++
|
||||
}
|
||||
}
|
||||
// done=true means "don't bother doing slow lookup": we either found a name (above) or
|
||||
// every key we checked was already a known miss, so we've tried before and failed.
|
||||
return "", false, knownMisses == len(keys)
|
||||
}
|
||||
|
||||
// normalizeAmdgpuName trims standard suffixes from AMDGPU product names.
|
||||
func normalizeAmdgpuName(name string) string {
|
||||
return strings.TrimSuffix(strings.TrimSpace(name), " Graphics")
|
||||
}
|
||||
|
||||
// cacheAmdgpuName stores a resolved AMDGPU name in the lookup cache.
|
||||
func cacheAmdgpuName(deviceID, revisionID, name string, exact bool) {
|
||||
name = normalizeAmdgpuName(name)
|
||||
amdgpuNameCache.Lock()
|
||||
defer amdgpuNameCache.Unlock()
|
||||
if exact && revisionID != "" {
|
||||
amdgpuNameCache.hits[cacheKeyForAmdgpu(deviceID, revisionID)] = name
|
||||
}
|
||||
amdgpuNameCache.hits[deviceID] = name
|
||||
}
|
||||
|
||||
// cacheMissingAmdgpuName records unresolved device/revision lookups.
|
||||
func cacheMissingAmdgpuName(deviceID, revisionID string) {
|
||||
amdgpuNameCache.Lock()
|
||||
defer amdgpuNameCache.Unlock()
|
||||
amdgpuNameCache.misses[deviceID] = struct{}{}
|
||||
if revisionID != "" {
|
||||
amdgpuNameCache.misses[cacheKeyForAmdgpu(deviceID, revisionID)] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// getAmdGpuName attempts to get a descriptive GPU name.
|
||||
// First tries product_name (rarely available), then looks up the PCI device ID.
|
||||
// Falls back to showing the raw device ID if not found in the lookup table.
|
||||
@@ -152,33 +278,24 @@ func getAmdGpuName(devicePath string) string {
|
||||
|
||||
// Read PCI device ID and look it up
|
||||
if deviceID, err := os.ReadFile(filepath.Join(devicePath, "device")); err == nil {
|
||||
id := strings.TrimPrefix(strings.ToLower(strings.TrimSpace(string(deviceID))), "0x")
|
||||
if name, ok := getRadeonNames()[id]; ok {
|
||||
return fmt.Sprintf("Radeon %s", name)
|
||||
id := normalizeHexID(string(deviceID))
|
||||
revision := ""
|
||||
if revBytes, revErr := os.ReadFile(filepath.Join(devicePath, "revision")); revErr == nil {
|
||||
revision = normalizeHexID(string(revBytes))
|
||||
}
|
||||
|
||||
if name, found, done := getCachedAmdgpuName(id, revision); found {
|
||||
return name
|
||||
} else if !done {
|
||||
if name, exact, ok := lookupAmdgpuNameInFile(id, revision, "/usr/share/libdrm/amdgpu.ids"); ok {
|
||||
cacheAmdgpuName(id, revision, name, exact)
|
||||
return normalizeAmdgpuName(name)
|
||||
}
|
||||
cacheMissingAmdgpuName(id, revision)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("AMD GPU (%s)", id)
|
||||
}
|
||||
|
||||
return "AMD GPU"
|
||||
}
|
||||
|
||||
// getRadeonNames returns the AMD GPU name lookup table
|
||||
// Device IDs from https://pci-ids.ucw.cz/read/PC/1002
|
||||
var getRadeonNames = sync.OnceValue(func() map[string]string {
|
||||
return map[string]string{
|
||||
"7550": "RX 9070",
|
||||
"7590": "RX 9060 XT",
|
||||
"7551": "AI PRO R9700",
|
||||
|
||||
"744c": "RX 7900",
|
||||
|
||||
"1681": "680M",
|
||||
|
||||
"7448": "PRO W7900",
|
||||
"745e": "PRO W7800",
|
||||
"7470": "PRO W7700",
|
||||
"73e3": "PRO W6600",
|
||||
"7422": "PRO W6400",
|
||||
"7341": "PRO W5500",
|
||||
}
|
||||
})
|
||||
|
||||
264
agent/gpu_amd_linux_test.go
Normal file
264
agent/gpu_amd_linux_test.go
Normal file
@@ -0,0 +1,264 @@
|
||||
//go:build linux
|
||||
|
||||
package agent
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNormalizeHexID(t *testing.T) {
|
||||
tests := []struct {
|
||||
in string
|
||||
want string
|
||||
}{
|
||||
{"0x1002", "1002"},
|
||||
{"C2", "c2"},
|
||||
{" 15BF ", "15bf"},
|
||||
{"0x15bf", "15bf"},
|
||||
{"", ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
subName := tt.in
|
||||
if subName == "" {
|
||||
subName = "empty_string"
|
||||
}
|
||||
t.Run(subName, func(t *testing.T) {
|
||||
got := normalizeHexID(tt.in)
|
||||
assert.Equal(t, tt.want, got)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCacheKeyForAmdgpu(t *testing.T) {
|
||||
tests := []struct {
|
||||
deviceID string
|
||||
revisionID string
|
||||
want string
|
||||
}{
|
||||
{"1114", "c2", "1114:c2"},
|
||||
{"15bf", "", "15bf"},
|
||||
{"1506", "c1", "1506:c1"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
got := cacheKeyForAmdgpu(tt.deviceID, tt.revisionID)
|
||||
assert.Equal(t, tt.want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadSysfsFloat(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
validPath := filepath.Join(dir, "val")
|
||||
require.NoError(t, os.WriteFile(validPath, []byte(" 42.5 \n"), 0o644))
|
||||
got, err := readSysfsFloat(validPath)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 42.5, got)
|
||||
|
||||
// Integer and scientific
|
||||
sciPath := filepath.Join(dir, "sci")
|
||||
require.NoError(t, os.WriteFile(sciPath, []byte("1e2"), 0o644))
|
||||
got, err = readSysfsFloat(sciPath)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 100.0, got)
|
||||
|
||||
// Missing file
|
||||
_, err = readSysfsFloat(filepath.Join(dir, "missing"))
|
||||
require.Error(t, err)
|
||||
|
||||
// Invalid content
|
||||
badPath := filepath.Join(dir, "bad")
|
||||
require.NoError(t, os.WriteFile(badPath, []byte("not a number"), 0o644))
|
||||
_, err = readSysfsFloat(badPath)
|
||||
require.Error(t, err)
|
||||
}
|
||||
|
||||
func TestIsAmdGpu(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
deviceDir := filepath.Join(dir, "device")
|
||||
require.NoError(t, os.MkdirAll(deviceDir, 0o755))
|
||||
|
||||
// AMD vendor 0x1002 -> true
|
||||
require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x1002\n"), 0o644))
|
||||
assert.True(t, isAmdGpu(dir), "vendor 0x1002 should be AMD")
|
||||
|
||||
// Non-AMD vendor -> false
|
||||
require.NoError(t, os.WriteFile(filepath.Join(deviceDir, "vendor"), []byte("0x10de\n"), 0o644))
|
||||
assert.False(t, isAmdGpu(dir), "vendor 0x10de should not be AMD")
|
||||
|
||||
// Missing vendor file -> false
|
||||
require.NoError(t, os.Remove(filepath.Join(deviceDir, "vendor")))
|
||||
assert.False(t, isAmdGpu(dir), "missing vendor file should be false")
|
||||
}
|
||||
|
||||
func TestAmdgpuNameCacheRoundTrip(t *testing.T) {
|
||||
// Cache a name and retrieve it (unique key to avoid affecting other tests)
|
||||
deviceID, revisionID := "cachedev99", "00"
|
||||
cacheAmdgpuName(deviceID, revisionID, "AMD Test GPU 99 Graphics", true)
|
||||
|
||||
name, found, done := getCachedAmdgpuName(deviceID, revisionID)
|
||||
assert.True(t, found)
|
||||
assert.True(t, done)
|
||||
assert.Equal(t, "AMD Test GPU 99", name)
|
||||
|
||||
// Device-only key also stored
|
||||
name2, found2, _ := getCachedAmdgpuName(deviceID, "")
|
||||
assert.True(t, found2)
|
||||
assert.Equal(t, "AMD Test GPU 99", name2)
|
||||
|
||||
// Cache a miss
|
||||
cacheMissingAmdgpuName("missedev99", "ab")
|
||||
_, found3, done3 := getCachedAmdgpuName("missedev99", "ab")
|
||||
assert.False(t, found3)
|
||||
assert.True(t, done3, "done should be true so caller skips file lookup")
|
||||
}
|
||||
|
||||
func TestUpdateAmdGpuDataWithFakeSysfs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
writeGTT bool
|
||||
wantMemoryUsed float64
|
||||
wantMemoryTotal float64
|
||||
}{
|
||||
{
|
||||
name: "sums vram and gtt when gtt is present",
|
||||
writeGTT: true,
|
||||
wantMemoryUsed: bytesToMegabytes(1073741824 + 536870912),
|
||||
wantMemoryTotal: bytesToMegabytes(2147483648 + 4294967296),
|
||||
},
|
||||
{
|
||||
name: "falls back to vram when gtt is missing",
|
||||
writeGTT: false,
|
||||
wantMemoryUsed: bytesToMegabytes(1073741824),
|
||||
wantMemoryTotal: bytesToMegabytes(2147483648),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
cardPath := filepath.Join(dir, "card0")
|
||||
devicePath := filepath.Join(cardPath, "device")
|
||||
hwmonPath := filepath.Join(devicePath, "hwmon", "hwmon0")
|
||||
require.NoError(t, os.MkdirAll(hwmonPath, 0o755))
|
||||
|
||||
write := func(name, content string) {
|
||||
require.NoError(t, os.WriteFile(filepath.Join(devicePath, name), []byte(content), 0o644))
|
||||
}
|
||||
write("vendor", "0x1002")
|
||||
write("device", "0x1506")
|
||||
write("revision", "0xc1")
|
||||
write("gpu_busy_percent", "25")
|
||||
write("mem_info_vram_used", "1073741824")
|
||||
write("mem_info_vram_total", "2147483648")
|
||||
if tt.writeGTT {
|
||||
write("mem_info_gtt_used", "536870912")
|
||||
write("mem_info_gtt_total", "4294967296")
|
||||
}
|
||||
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "temp1_input"), []byte("45000"), 0o644))
|
||||
require.NoError(t, os.WriteFile(filepath.Join(hwmonPath, "power1_input"), []byte("20000000"), 0o644))
|
||||
|
||||
// Pre-cache name so getAmdGpuName returns a known value (it uses system amdgpu.ids path)
|
||||
cacheAmdgpuName("1506", "c1", "AMD Radeon 610M Graphics", true)
|
||||
|
||||
gm := &GPUManager{GpuDataMap: make(map[string]*system.GPUData)}
|
||||
ok := gm.updateAmdGpuData(cardPath)
|
||||
require.True(t, ok)
|
||||
|
||||
gpu, ok := gm.GpuDataMap["card0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "AMD Radeon 610M", gpu.Name)
|
||||
assert.Equal(t, 25.0, gpu.Usage)
|
||||
assert.Equal(t, tt.wantMemoryUsed, gpu.MemoryUsed)
|
||||
assert.Equal(t, tt.wantMemoryTotal, gpu.MemoryTotal)
|
||||
assert.Equal(t, 45.0, gpu.Temperature)
|
||||
assert.Equal(t, 20.0, gpu.Power)
|
||||
assert.Equal(t, 1.0, gpu.Count)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestLookupAmdgpuNameInFile(t *testing.T) {
|
||||
idsPath := filepath.Join("test-data", "amdgpu.ids")
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
deviceID string
|
||||
revisionID string
|
||||
wantName string
|
||||
wantExact bool
|
||||
wantFound bool
|
||||
}{
|
||||
{
|
||||
name: "exact device and revision match",
|
||||
deviceID: "1114",
|
||||
revisionID: "c2",
|
||||
wantName: "AMD Radeon 860M Graphics",
|
||||
wantExact: true,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "exact match 15BF revision 01 returns 760M",
|
||||
deviceID: "15bf",
|
||||
revisionID: "01",
|
||||
wantName: "AMD Radeon 760M Graphics",
|
||||
wantExact: true,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "exact match 15BF revision 00 returns 780M",
|
||||
deviceID: "15bf",
|
||||
revisionID: "00",
|
||||
wantName: "AMD Radeon 780M Graphics",
|
||||
wantExact: true,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "device-only match returns first entry for device",
|
||||
deviceID: "1506",
|
||||
revisionID: "",
|
||||
wantName: "AMD Radeon 610M",
|
||||
wantExact: false,
|
||||
wantFound: true,
|
||||
},
|
||||
{
|
||||
name: "unknown device not found",
|
||||
deviceID: "dead",
|
||||
revisionID: "00",
|
||||
wantName: "",
|
||||
wantExact: false,
|
||||
wantFound: false,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
gotName, gotExact, gotFound := lookupAmdgpuNameInFile(tt.deviceID, tt.revisionID, idsPath)
|
||||
assert.Equal(t, tt.wantName, gotName, "name")
|
||||
assert.Equal(t, tt.wantExact, gotExact, "exact")
|
||||
assert.Equal(t, tt.wantFound, gotFound, "found")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetAmdGpuNameFromIdsFile(t *testing.T) {
|
||||
// Test that getAmdGpuName resolves a name when we can't inject the ids path.
|
||||
// We only verify behavior when product_name is missing and device/revision
|
||||
// would be read from sysfs; the actual lookup uses /usr/share/libdrm/amdgpu.ids.
|
||||
// So this test focuses on normalizeAmdgpuName and that lookupAmdgpuNameInFile
|
||||
// returns the expected name for our test-data file.
|
||||
idsPath := filepath.Join("test-data", "amdgpu.ids")
|
||||
name, exact, found := lookupAmdgpuNameInFile("1435", "ae", idsPath)
|
||||
require.True(t, found)
|
||||
require.True(t, exact)
|
||||
assert.Equal(t, "AMD Custom GPU 0932", name)
|
||||
assert.Equal(t, "AMD Custom GPU 0932", normalizeAmdgpuName(name))
|
||||
|
||||
// " Graphics" suffix is trimmed by normalizeAmdgpuName
|
||||
name2 := "AMD Radeon 860M Graphics"
|
||||
assert.Equal(t, "AMD Radeon 860M", normalizeAmdgpuName(name2))
|
||||
}
|
||||
@@ -13,21 +13,3 @@ func (c *nvmlCollector) init() error {
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) start() {}
|
||||
|
||||
func (c *nvmlCollector) collect() {}
|
||||
|
||||
func openLibrary(name string) (uintptr, error) {
|
||||
return 0, fmt.Errorf("nvml not supported on this platform")
|
||||
}
|
||||
|
||||
func getNVMLPath() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func hasSymbol(lib uintptr, symbol string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (c *nvmlCollector) isGPUActive(bdf string) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
159
agent/gpu_nvtop.go
Normal file
159
agent/gpu_nvtop.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/henrygd/beszel/internal/entities/system"
|
||||
)
|
||||
|
||||
type nvtopSnapshot struct {
|
||||
DeviceName string `json:"device_name"`
|
||||
Temp *string `json:"temp"`
|
||||
PowerDraw *string `json:"power_draw"`
|
||||
GpuUtil *string `json:"gpu_util"`
|
||||
MemTotal *string `json:"mem_total"`
|
||||
MemUsed *string `json:"mem_used"`
|
||||
}
|
||||
|
||||
// parseNvtopNumber parses nvtop numeric strings with units (C/W/%).
|
||||
func parseNvtopNumber(raw string) float64 {
|
||||
cleaned := strings.TrimSpace(raw)
|
||||
cleaned = strings.TrimSuffix(cleaned, "C")
|
||||
cleaned = strings.TrimSuffix(cleaned, "W")
|
||||
cleaned = strings.TrimSuffix(cleaned, "%")
|
||||
val, _ := strconv.ParseFloat(cleaned, 64)
|
||||
return val
|
||||
}
|
||||
|
||||
// parseNvtopData parses a single nvtop JSON snapshot payload.
|
||||
func (gm *GPUManager) parseNvtopData(output []byte) bool {
|
||||
var snapshots []nvtopSnapshot
|
||||
if err := json.Unmarshal(output, &snapshots); err != nil || len(snapshots) == 0 {
|
||||
return false
|
||||
}
|
||||
return gm.updateNvtopSnapshots(snapshots)
|
||||
}
|
||||
|
||||
// updateNvtopSnapshots applies one decoded nvtop snapshot batch to GPU accumulators.
|
||||
func (gm *GPUManager) updateNvtopSnapshots(snapshots []nvtopSnapshot) bool {
|
||||
gm.Lock()
|
||||
defer gm.Unlock()
|
||||
|
||||
valid := false
|
||||
usedIDs := make(map[string]struct{}, len(snapshots))
|
||||
for i, sample := range snapshots {
|
||||
if sample.DeviceName == "" {
|
||||
continue
|
||||
}
|
||||
indexID := "n" + strconv.Itoa(i)
|
||||
id := indexID
|
||||
|
||||
// nvtop ordering can change, so prefer reusing an existing slot with matching device name.
|
||||
if existingByIndex, ok := gm.GpuDataMap[indexID]; ok && existingByIndex.Name != "" && existingByIndex.Name != sample.DeviceName {
|
||||
for existingID, gpu := range gm.GpuDataMap {
|
||||
if !strings.HasPrefix(existingID, "n") {
|
||||
continue
|
||||
}
|
||||
if _, taken := usedIDs[existingID]; taken {
|
||||
continue
|
||||
}
|
||||
if gpu.Name == sample.DeviceName {
|
||||
id = existingID
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := gm.GpuDataMap[id]; !ok {
|
||||
gm.GpuDataMap[id] = &system.GPUData{Name: sample.DeviceName}
|
||||
}
|
||||
gpu := gm.GpuDataMap[id]
|
||||
gpu.Name = sample.DeviceName
|
||||
|
||||
if sample.Temp != nil {
|
||||
gpu.Temperature = parseNvtopNumber(*sample.Temp)
|
||||
}
|
||||
if sample.MemUsed != nil {
|
||||
gpu.MemoryUsed = bytesToMegabytes(parseNvtopNumber(*sample.MemUsed))
|
||||
}
|
||||
if sample.MemTotal != nil {
|
||||
gpu.MemoryTotal = bytesToMegabytes(parseNvtopNumber(*sample.MemTotal))
|
||||
}
|
||||
if sample.GpuUtil != nil {
|
||||
gpu.Usage += parseNvtopNumber(*sample.GpuUtil)
|
||||
}
|
||||
if sample.PowerDraw != nil {
|
||||
gpu.Power += parseNvtopNumber(*sample.PowerDraw)
|
||||
}
|
||||
gpu.Count++
|
||||
usedIDs[id] = struct{}{}
|
||||
valid = true
|
||||
}
|
||||
return valid
|
||||
}
|
||||
|
||||
// collectNvtopStats runs nvtop loop mode and continuously decodes JSON snapshots.
|
||||
func (gm *GPUManager) collectNvtopStats(interval string) error {
|
||||
cmd := exec.Command(nvtopCmd, "-lP", "-d", interval)
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
_ = stdout.Close()
|
||||
if cmd.ProcessState == nil || !cmd.ProcessState.Exited() {
|
||||
_ = cmd.Process.Kill()
|
||||
}
|
||||
_ = cmd.Wait()
|
||||
}()
|
||||
|
||||
decoder := json.NewDecoder(stdout)
|
||||
foundValid := false
|
||||
for {
|
||||
var snapshots []nvtopSnapshot
|
||||
if err := decoder.Decode(&snapshots); err != nil {
|
||||
if err == io.EOF {
|
||||
if foundValid {
|
||||
return nil
|
||||
}
|
||||
return errNoValidData
|
||||
}
|
||||
return err
|
||||
}
|
||||
if gm.updateNvtopSnapshots(snapshots) {
|
||||
foundValid = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// startNvtopCollector starts nvtop collection with retry or fallback callback handling.
|
||||
func (gm *GPUManager) startNvtopCollector(interval string, onFailure func()) {
|
||||
go func() {
|
||||
failures := 0
|
||||
for {
|
||||
if err := gm.collectNvtopStats(interval); err != nil {
|
||||
if onFailure != nil {
|
||||
slog.Warn("Error collecting GPU data via nvtop", "err", err)
|
||||
onFailure()
|
||||
return
|
||||
}
|
||||
failures++
|
||||
if failures > maxFailureRetries {
|
||||
break
|
||||
}
|
||||
slog.Warn("Error collecting GPU data via nvtop", "err", err)
|
||||
time.Sleep(retryWaitTime)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -250,6 +250,100 @@ func TestParseAmdData(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseNvtopData(t *testing.T) {
|
||||
input, err := os.ReadFile("test-data/nvtop.json")
|
||||
require.NoError(t, err)
|
||||
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
valid := gm.parseNvtopData(input)
|
||||
require.True(t, valid)
|
||||
|
||||
g0, ok := gm.GpuDataMap["n0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", g0.Name)
|
||||
assert.Equal(t, 48.0, g0.Temperature)
|
||||
assert.Equal(t, 5.0, g0.Usage)
|
||||
assert.Equal(t, 13.0, g0.Power)
|
||||
assert.Equal(t, bytesToMegabytes(349372416), g0.MemoryUsed)
|
||||
assert.Equal(t, bytesToMegabytes(4294967296), g0.MemoryTotal)
|
||||
assert.Equal(t, 1.0, g0.Count)
|
||||
|
||||
g1, ok := gm.GpuDataMap["n1"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "AMD Radeon 680M", g1.Name)
|
||||
assert.Equal(t, 48.0, g1.Temperature)
|
||||
assert.Equal(t, 12.0, g1.Usage)
|
||||
assert.Equal(t, 9.0, g1.Power)
|
||||
assert.Equal(t, bytesToMegabytes(1213784064), g1.MemoryUsed)
|
||||
assert.Equal(t, bytesToMegabytes(16929173504), g1.MemoryTotal)
|
||||
assert.Equal(t, 1.0, g1.Count)
|
||||
}
|
||||
|
||||
func TestUpdateNvtopSnapshotsKeepsDeviceAssociationWhenOrderChanges(t *testing.T) {
|
||||
strPtr := func(s string) *string { return &s }
|
||||
|
||||
gm := &GPUManager{
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
|
||||
firstBatch := []nvtopSnapshot{
|
||||
{
|
||||
DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
||||
GpuUtil: strPtr("20%"),
|
||||
PowerDraw: strPtr("10W"),
|
||||
},
|
||||
{
|
||||
DeviceName: "AMD Radeon 680M",
|
||||
GpuUtil: strPtr("30%"),
|
||||
PowerDraw: strPtr("20W"),
|
||||
},
|
||||
}
|
||||
secondBatchSwapped := []nvtopSnapshot{
|
||||
{
|
||||
DeviceName: "AMD Radeon 680M",
|
||||
GpuUtil: strPtr("40%"),
|
||||
PowerDraw: strPtr("25W"),
|
||||
},
|
||||
{
|
||||
DeviceName: "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
||||
GpuUtil: strPtr("50%"),
|
||||
PowerDraw: strPtr("15W"),
|
||||
},
|
||||
}
|
||||
|
||||
require.True(t, gm.updateNvtopSnapshots(firstBatch))
|
||||
require.True(t, gm.updateNvtopSnapshots(secondBatchSwapped))
|
||||
|
||||
nvidia := gm.GpuDataMap["n0"]
|
||||
require.NotNil(t, nvidia)
|
||||
assert.Equal(t, "NVIDIA GeForce RTX 3050 Ti Laptop GPU", nvidia.Name)
|
||||
assert.Equal(t, 70.0, nvidia.Usage)
|
||||
assert.Equal(t, 25.0, nvidia.Power)
|
||||
assert.Equal(t, 2.0, nvidia.Count)
|
||||
|
||||
amd := gm.GpuDataMap["n1"]
|
||||
require.NotNil(t, amd)
|
||||
assert.Equal(t, "AMD Radeon 680M", amd.Name)
|
||||
assert.Equal(t, 70.0, amd.Usage)
|
||||
assert.Equal(t, 45.0, amd.Power)
|
||||
assert.Equal(t, 2.0, amd.Count)
|
||||
}
|
||||
|
||||
func TestParseCollectorPriority(t *testing.T) {
|
||||
got := parseCollectorPriority(" nvml, nvidia-smi, intel_gpu_top, amd_sysfs, nvtop, rocm-smi, bad ")
|
||||
want := []collectorSource{
|
||||
collectorSourceNVML,
|
||||
collectorSourceNvidiaSMI,
|
||||
collectorSourceIntelGpuTop,
|
||||
collectorSourceAmdSysfs,
|
||||
collectorSourceNVTop,
|
||||
collectorSourceRocmSMI,
|
||||
}
|
||||
assert.Equal(t, want, got)
|
||||
}
|
||||
|
||||
func TestParseJetsonData(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
@@ -987,36 +1081,35 @@ func TestCalculateGPUAverage(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestDetectGPUs(t *testing.T) {
|
||||
func TestGPUCapabilitiesAndLegacyPriority(t *testing.T) {
|
||||
// Save original PATH
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
// Set up temp dir with the commands
|
||||
tempDir := t.TempDir()
|
||||
os.Setenv("PATH", tempDir)
|
||||
hasAmdSysfs := (&GPUManager{}).hasAmdSysfs()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
setupCommands func() error
|
||||
setupCommands func(string) error
|
||||
wantNvidiaSmi bool
|
||||
wantRocmSmi bool
|
||||
wantTegrastats bool
|
||||
wantNvtop bool
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "nvidia-smi not available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(_ string) error {
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: false,
|
||||
wantNvtop: false,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "nvidia-smi available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "nvidia-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
@@ -1028,29 +1121,14 @@ echo "test"`
|
||||
wantNvidiaSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantRocmSmi: false,
|
||||
wantNvtop: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "rocm-smi available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "rocm-smi")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: true,
|
||||
wantRocmSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "tegrastats available",
|
||||
setupCommands: func() error {
|
||||
path := filepath.Join(tempDir, "tegrastats")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
@@ -1059,12 +1137,47 @@ echo "test"`
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: true,
|
||||
wantTegrastats: false,
|
||||
wantNvtop: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "tegrastats available",
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "tegrastats")
|
||||
script := `#!/bin/sh
|
||||
echo "test"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: true,
|
||||
wantNvtop: false,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nvtop available",
|
||||
setupCommands: func(tempDir string) error {
|
||||
path := filepath.Join(tempDir, "nvtop")
|
||||
script := `#!/bin/sh
|
||||
echo "[]"`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
wantNvidiaSmi: false,
|
||||
wantRocmSmi: false,
|
||||
wantTegrastats: false,
|
||||
wantNvtop: true,
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "no gpu tools available",
|
||||
setupCommands: func() error {
|
||||
setupCommands: func(_ string) error {
|
||||
os.Setenv("PATH", "")
|
||||
return nil
|
||||
},
|
||||
@@ -1074,29 +1187,53 @@ echo "test"`
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.setupCommands(); err != nil {
|
||||
tempDir := t.TempDir()
|
||||
os.Setenv("PATH", tempDir)
|
||||
if err := tt.setupCommands(tempDir); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
gm := &GPUManager{}
|
||||
err := gm.detectGPUs()
|
||||
caps := gm.discoverGpuCapabilities()
|
||||
var err error
|
||||
if !hasAnyGpuCollector(caps) {
|
||||
err = fmt.Errorf(noGPUFoundMsg)
|
||||
}
|
||||
priorities := gm.resolveLegacyCollectorPriority(caps)
|
||||
hasPriority := func(source collectorSource) bool {
|
||||
for _, s := range priorities {
|
||||
if s == source {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
gotNvidiaSmi := hasPriority(collectorSourceNvidiaSMI)
|
||||
gotRocmSmi := hasPriority(collectorSourceRocmSMI)
|
||||
gotTegrastats := caps.hasTegrastats
|
||||
gotNvtop := caps.hasNvtop
|
||||
|
||||
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gm.nvidiaSmi, gm.rocmSmi, gm.tegrastats)
|
||||
t.Logf("nvidiaSmi: %v, rocmSmi: %v, tegrastats: %v", gotNvidiaSmi, gotRocmSmi, gotTegrastats)
|
||||
|
||||
if tt.wantErr {
|
||||
wantErr := tt.wantErr
|
||||
if hasAmdSysfs && (tt.name == "nvidia-smi not available" || tt.name == "no gpu tools available") {
|
||||
wantErr = false
|
||||
}
|
||||
if wantErr {
|
||||
assert.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.wantNvidiaSmi, gm.nvidiaSmi)
|
||||
assert.Equal(t, tt.wantRocmSmi, gm.rocmSmi)
|
||||
assert.Equal(t, tt.wantTegrastats, gm.tegrastats)
|
||||
assert.Equal(t, tt.wantNvidiaSmi, gotNvidiaSmi)
|
||||
assert.Equal(t, tt.wantRocmSmi, gotRocmSmi)
|
||||
assert.Equal(t, tt.wantTegrastats, gotTegrastats)
|
||||
assert.Equal(t, tt.wantNvtop, gotNvtop)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartCollector(t *testing.T) {
|
||||
func TestCollectorStartHelpers(t *testing.T) {
|
||||
// Save original PATH
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
@@ -1181,6 +1318,27 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "nvtop collector",
|
||||
command: "nvtop",
|
||||
setup: func(t *testing.T) error {
|
||||
path := filepath.Join(dir, "nvtop")
|
||||
script := `#!/bin/sh
|
||||
echo '[{"device_name":"NVIDIA Test GPU","temp":"52C","power_draw":"31W","gpu_util":"37%","mem_total":"4294967296","mem_used":"536870912","processes":[]}]'`
|
||||
if err := os.WriteFile(path, []byte(script), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
validate: func(t *testing.T, gm *GPUManager) {
|
||||
gpu, exists := gm.GpuDataMap["n0"]
|
||||
assert.True(t, exists)
|
||||
if exists {
|
||||
assert.Equal(t, "NVIDIA Test GPU", gpu.Name)
|
||||
assert.Equal(t, 52.0, gpu.Temperature)
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
@@ -1193,13 +1351,157 @@ echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000m
|
||||
GpuDataMap: make(map[string]*system.GPUData),
|
||||
}
|
||||
}
|
||||
tt.gm.startCollector(tt.command)
|
||||
switch tt.command {
|
||||
case nvidiaSmiCmd:
|
||||
tt.gm.startNvidiaSmiCollector("4")
|
||||
case rocmSmiCmd:
|
||||
tt.gm.startRocmSmiCollector(4300 * time.Millisecond)
|
||||
case tegraStatsCmd:
|
||||
tt.gm.startTegraStatsCollector("3700")
|
||||
case nvtopCmd:
|
||||
tt.gm.startNvtopCollector("30", nil)
|
||||
default:
|
||||
t.Fatalf("unknown test command %q", tt.command)
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond) // Give collector time to run
|
||||
tt.validate(t, tt.gm)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewGPUManagerPriorityNvtopFallback(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvtop,nvidia-smi")
|
||||
|
||||
nvtopPath := filepath.Join(dir, "nvtop")
|
||||
nvtopScript := `#!/bin/sh
|
||||
echo 'not-json'`
|
||||
require.NoError(t, os.WriteFile(nvtopPath, []byte(nvtopScript), 0755))
|
||||
|
||||
nvidiaPath := filepath.Join(dir, "nvidia-smi")
|
||||
nvidiaScript := `#!/bin/sh
|
||||
echo "0, NVIDIA Priority GPU, 45, 512, 2048, 12, 25"`
|
||||
require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
gpu, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Priority GPU", gpu.Name)
|
||||
assert.Equal(t, 45.0, gpu.Temperature)
|
||||
}
|
||||
|
||||
func TestNewGPUManagerPriorityMixedCollectors(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "intel_gpu_top,rocm-smi")
|
||||
|
||||
intelPath := filepath.Join(dir, "intel_gpu_top")
|
||||
intelScript := `#!/bin/sh
|
||||
echo "Freq MHz IRQ RC6 Power W IMC MiB/s RCS VCS"
|
||||
echo " req act /s % gpu pkg rd wr % se wa % se wa"
|
||||
echo "226 223 338 58 2.00 2.69 1820 965 0.00 0 0 0.00 0 0"
|
||||
echo "189 187 412 67 1.80 2.45 1950 823 8.50 2 1 15.00 1 0"
|
||||
`
|
||||
require.NoError(t, os.WriteFile(intelPath, []byte(intelScript), 0755))
|
||||
|
||||
rocmPath := filepath.Join(dir, "rocm-smi")
|
||||
rocmScript := `#!/bin/sh
|
||||
echo '{"card0": {"Temperature (Sensor edge) (C)": "49.0", "Current Socket Graphics Package Power (W)": "28.159", "GPU use (%)": "0", "VRAM Total Memory (B)": "536870912", "VRAM Total Used Memory (B)": "445550592", "Card Series": "Rembrandt [Radeon 680M]", "GUID": "34756"}}'
|
||||
`
|
||||
require.NoError(t, os.WriteFile(rocmPath, []byte(rocmScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
_, intelOk := gm.GpuDataMap["i0"]
|
||||
_, amdOk := gm.GpuDataMap["34756"]
|
||||
assert.True(t, intelOk)
|
||||
assert.True(t, amdOk)
|
||||
}
|
||||
|
||||
func TestNewGPUManagerPriorityNvmlFallbackToNvidiaSmi(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvml,nvidia-smi")
|
||||
|
||||
nvidiaPath := filepath.Join(dir, "nvidia-smi")
|
||||
nvidiaScript := `#!/bin/sh
|
||||
echo "0, NVIDIA Fallback GPU, 41, 256, 1024, 8, 14"`
|
||||
require.NoError(t, os.WriteFile(nvidiaPath, []byte(nvidiaScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(150 * time.Millisecond)
|
||||
gpu, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "Fallback GPU", gpu.Name)
|
||||
}
|
||||
|
||||
func TestNewGPUManagerConfiguredCollectorsMustStart(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
|
||||
t.Run("configured valid collector unavailable", func(t *testing.T) {
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
|
||||
gm, err := NewGPUManager()
|
||||
require.Nil(t, gm)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "no configured GPU collectors are available")
|
||||
})
|
||||
|
||||
t.Run("configured collector list has only unknown entries", func(t *testing.T) {
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "bad,unknown")
|
||||
gm, err := NewGPUManager()
|
||||
require.Nil(t, gm)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "no configured GPU collectors are available")
|
||||
})
|
||||
}
|
||||
|
||||
func TestNewGPUManagerJetsonIgnoresCollectorConfig(t *testing.T) {
|
||||
origPath := os.Getenv("PATH")
|
||||
defer os.Setenv("PATH", origPath)
|
||||
|
||||
dir := t.TempDir()
|
||||
os.Setenv("PATH", dir)
|
||||
t.Setenv("BESZEL_AGENT_GPU_COLLECTOR", "nvidia-smi")
|
||||
|
||||
tegraPath := filepath.Join(dir, "tegrastats")
|
||||
tegraScript := `#!/bin/sh
|
||||
echo "11-14-2024 22:54:33 RAM 1024/4096MB GR3D_FREQ 80% tj@70C VDD_GPU_SOC 1000mW"`
|
||||
require.NoError(t, os.WriteFile(tegraPath, []byte(tegraScript), 0755))
|
||||
|
||||
gm, err := NewGPUManager()
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, gm)
|
||||
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
gpu, ok := gm.GpuDataMap["0"]
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "GPU", gpu.Name)
|
||||
}
|
||||
|
||||
// TestAccumulationTableDriven tests the accumulation behavior for all three GPU types
|
||||
func TestAccumulation(t *testing.T) {
|
||||
type expectedGPUValues struct {
|
||||
|
||||
@@ -28,7 +28,7 @@ type SmartManager struct {
|
||||
SmartDevices []*DeviceInfo
|
||||
refreshMutex sync.Mutex
|
||||
lastScanTime time.Time
|
||||
binPath string
|
||||
smartctlPath string
|
||||
excludedDevices map[string]struct{}
|
||||
}
|
||||
|
||||
@@ -170,27 +170,35 @@ func (sm *SmartManager) ScanDevices(force bool) error {
|
||||
configuredDevices = parsedDevices
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, sm.binPath, "--scan", "-j")
|
||||
output, err := cmd.Output()
|
||||
|
||||
var (
|
||||
scanErr error
|
||||
scannedDevices []*DeviceInfo
|
||||
hasValidScan bool
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
scanErr = err
|
||||
} else {
|
||||
scannedDevices, hasValidScan = sm.parseScan(output)
|
||||
if !hasValidScan {
|
||||
scanErr = errNoValidSmartData
|
||||
if sm.smartctlPath != "" {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
cmd := exec.CommandContext(ctx, sm.smartctlPath, "--scan", "-j")
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
scanErr = err
|
||||
} else {
|
||||
scannedDevices, hasValidScan = sm.parseScan(output)
|
||||
if !hasValidScan {
|
||||
scanErr = errNoValidSmartData
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add eMMC devices (Linux only) by reading sysfs health fields. This does not
|
||||
// require smartctl and does not scan the whole device.
|
||||
if emmcDevices := scanEmmcDevices(); len(emmcDevices) > 0 {
|
||||
scannedDevices = append(scannedDevices, emmcDevices...)
|
||||
hasValidScan = true
|
||||
}
|
||||
|
||||
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
|
||||
finalDevices = sm.filterExcludedDevices(finalDevices)
|
||||
sm.updateSmartDevices(finalDevices)
|
||||
@@ -442,6 +450,18 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
return errNoValidSmartData
|
||||
}
|
||||
|
||||
// eMMC health is not exposed via SMART on Linux, but the kernel provides
|
||||
// wear / EOL indicators via sysfs. Prefer that path when available.
|
||||
if deviceInfo != nil {
|
||||
if ok, err := sm.collectEmmcHealth(deviceInfo); ok {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if sm.smartctlPath == "" {
|
||||
return errNoValidSmartData
|
||||
}
|
||||
|
||||
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
|
||||
|
||||
// Check if we have any existing data for this device
|
||||
@@ -452,7 +472,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
|
||||
// Try with -n standby first if we have existing data
|
||||
args := sm.smartctlArgs(deviceInfo, hasExistingData)
|
||||
cmd := exec.CommandContext(ctx, sm.binPath, args...)
|
||||
cmd := exec.CommandContext(ctx, sm.smartctlPath, args...)
|
||||
output, err := cmd.CombinedOutput()
|
||||
|
||||
// Check if device is in standby (exit status 2)
|
||||
@@ -465,7 +485,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel2()
|
||||
args = sm.smartctlArgs(deviceInfo, false)
|
||||
cmd = exec.CommandContext(ctx2, sm.binPath, args...)
|
||||
cmd = exec.CommandContext(ctx2, sm.smartctlPath, args...)
|
||||
output, err = cmd.CombinedOutput()
|
||||
}
|
||||
|
||||
@@ -482,7 +502,7 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
|
||||
ctx3, cancel3 := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer cancel3()
|
||||
args = sm.smartctlArgs(deviceInfo, false)
|
||||
cmd = exec.CommandContext(ctx3, sm.binPath, args...)
|
||||
cmd = exec.CommandContext(ctx3, sm.smartctlPath, args...)
|
||||
output, err = cmd.CombinedOutput()
|
||||
hasValidData = sm.parseSmartOutput(deviceInfo, output)
|
||||
|
||||
@@ -1123,10 +1143,15 @@ func NewSmartManager() (*SmartManager, error) {
|
||||
}
|
||||
sm.refreshExcludedDevices()
|
||||
path, err := sm.detectSmartctl()
|
||||
slog.Debug("smartctl", "path", path, "err", err)
|
||||
if err != nil {
|
||||
// Keep the previous fail-fast behavior unless this Linux host exposes
|
||||
// eMMC health via sysfs, in which case smartctl is optional.
|
||||
if runtime.GOOS == "linux" && len(scanEmmcDevices()) > 0 {
|
||||
return sm, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
slog.Debug("smartctl", "path", path)
|
||||
sm.binPath = path
|
||||
sm.smartctlPath = path
|
||||
return sm, nil
|
||||
}
|
||||
|
||||
700
agent/test-data/amdgpu.ids
Normal file
700
agent/test-data/amdgpu.ids
Normal file
@@ -0,0 +1,700 @@
|
||||
# List of AMDGPU IDs
|
||||
#
|
||||
# Syntax:
|
||||
# device_id, revision_id, product_name <-- single tab after comma
|
||||
|
||||
1.0.0
|
||||
1114, C2, AMD Radeon 860M Graphics
|
||||
1114, C3, AMD Radeon 840M Graphics
|
||||
1114, D2, AMD Radeon 860M Graphics
|
||||
1114, D3, AMD Radeon 840M Graphics
|
||||
1309, 00, AMD Radeon R7 Graphics
|
||||
130A, 00, AMD Radeon R6 Graphics
|
||||
130B, 00, AMD Radeon R4 Graphics
|
||||
130C, 00, AMD Radeon R7 Graphics
|
||||
130D, 00, AMD Radeon R6 Graphics
|
||||
130E, 00, AMD Radeon R5 Graphics
|
||||
130F, 00, AMD Radeon R7 Graphics
|
||||
130F, D4, AMD Radeon R7 Graphics
|
||||
130F, D5, AMD Radeon R7 Graphics
|
||||
130F, D6, AMD Radeon R7 Graphics
|
||||
130F, D7, AMD Radeon R7 Graphics
|
||||
1313, 00, AMD Radeon R7 Graphics
|
||||
1313, D4, AMD Radeon R7 Graphics
|
||||
1313, D5, AMD Radeon R7 Graphics
|
||||
1313, D6, AMD Radeon R7 Graphics
|
||||
1315, 00, AMD Radeon R5 Graphics
|
||||
1315, D4, AMD Radeon R5 Graphics
|
||||
1315, D5, AMD Radeon R5 Graphics
|
||||
1315, D6, AMD Radeon R5 Graphics
|
||||
1315, D7, AMD Radeon R5 Graphics
|
||||
1316, 00, AMD Radeon R5 Graphics
|
||||
1318, 00, AMD Radeon R5 Graphics
|
||||
131B, 00, AMD Radeon R4 Graphics
|
||||
131C, 00, AMD Radeon R7 Graphics
|
||||
131D, 00, AMD Radeon R6 Graphics
|
||||
1435, AE, AMD Custom GPU 0932
|
||||
1506, C1, AMD Radeon 610M
|
||||
1506, C2, AMD Radeon 610M
|
||||
1506, C3, AMD Radeon 610M
|
||||
1506, C4, AMD Radeon 610M
|
||||
150E, C1, AMD Radeon 890M Graphics
|
||||
150E, C4, AMD Radeon 890M Graphics
|
||||
150E, C5, AMD Radeon 890M Graphics
|
||||
150E, C6, AMD Radeon 890M Graphics
|
||||
150E, D1, AMD Radeon 890M Graphics
|
||||
150E, D2, AMD Radeon 890M Graphics
|
||||
150E, D3, AMD Radeon 890M Graphics
|
||||
1586, C1, Radeon 8060S Graphics
|
||||
1586, C2, Radeon 8050S Graphics
|
||||
1586, C4, Radeon 8050S Graphics
|
||||
1586, D1, Radeon 8060S Graphics
|
||||
1586, D2, Radeon 8050S Graphics
|
||||
1586, D4, Radeon 8050S Graphics
|
||||
1586, D5, Radeon 8040S Graphics
|
||||
15BF, 00, AMD Radeon 780M Graphics
|
||||
15BF, 01, AMD Radeon 760M Graphics
|
||||
15BF, 02, AMD Radeon 780M Graphics
|
||||
15BF, 03, AMD Radeon 760M Graphics
|
||||
15BF, C1, AMD Radeon 780M Graphics
|
||||
15BF, C2, AMD Radeon 780M Graphics
|
||||
15BF, C3, AMD Radeon 760M Graphics
|
||||
15BF, C4, AMD Radeon 780M Graphics
|
||||
15BF, C5, AMD Radeon 740M Graphics
|
||||
15BF, C6, AMD Radeon 780M Graphics
|
||||
15BF, C7, AMD Radeon 780M Graphics
|
||||
15BF, C8, AMD Radeon 760M Graphics
|
||||
15BF, C9, AMD Radeon 780M Graphics
|
||||
15BF, CA, AMD Radeon 740M Graphics
|
||||
15BF, CB, AMD Radeon 760M Graphics
|
||||
15BF, CC, AMD Radeon 740M Graphics
|
||||
15BF, CD, AMD Radeon 760M Graphics
|
||||
15BF, CF, AMD Radeon 780M Graphics
|
||||
15BF, D0, AMD Radeon 780M Graphics
|
||||
15BF, D1, AMD Radeon 780M Graphics
|
||||
15BF, D2, AMD Radeon 780M Graphics
|
||||
15BF, D3, AMD Radeon 780M Graphics
|
||||
15BF, D4, AMD Radeon 780M Graphics
|
||||
15BF, D5, AMD Radeon 760M Graphics
|
||||
15BF, D6, AMD Radeon 760M Graphics
|
||||
15BF, D7, AMD Radeon 780M Graphics
|
||||
15BF, D8, AMD Radeon 740M Graphics
|
||||
15BF, D9, AMD Radeon 780M Graphics
|
||||
15BF, DA, AMD Radeon 780M Graphics
|
||||
15BF, DB, AMD Radeon 760M Graphics
|
||||
15BF, DC, AMD Radeon 760M Graphics
|
||||
15BF, DD, AMD Radeon 780M Graphics
|
||||
15BF, DE, AMD Radeon 740M Graphics
|
||||
15BF, DF, AMD Radeon 760M Graphics
|
||||
15BF, F0, AMD Radeon 760M Graphics
|
||||
15C8, C1, AMD Radeon 740M Graphics
|
||||
15C8, C2, AMD Radeon 740M Graphics
|
||||
15C8, C3, AMD Radeon 740M Graphics
|
||||
15C8, C4, AMD Radeon 740M Graphics
|
||||
15C8, D1, AMD Radeon 740M Graphics
|
||||
15C8, D2, AMD Radeon 740M Graphics
|
||||
15C8, D3, AMD Radeon 740M Graphics
|
||||
15C8, D4, AMD Radeon 740M Graphics
|
||||
15D8, 00, AMD Radeon RX Vega 8 Graphics WS
|
||||
15D8, 91, AMD Radeon Vega 3 Graphics
|
||||
15D8, 91, AMD Ryzen Embedded R1606G with Radeon Vega Gfx
|
||||
15D8, 92, AMD Radeon Vega 3 Graphics
|
||||
15D8, 92, AMD Ryzen Embedded R1505G with Radeon Vega Gfx
|
||||
15D8, 93, AMD Radeon Vega 1 Graphics
|
||||
15D8, A1, AMD Radeon Vega 10 Graphics
|
||||
15D8, A2, AMD Radeon Vega 8 Graphics
|
||||
15D8, A3, AMD Radeon Vega 6 Graphics
|
||||
15D8, A4, AMD Radeon Vega 3 Graphics
|
||||
15D8, B1, AMD Radeon Vega 10 Graphics
|
||||
15D8, B2, AMD Radeon Vega 8 Graphics
|
||||
15D8, B3, AMD Radeon Vega 6 Graphics
|
||||
15D8, B4, AMD Radeon Vega 3 Graphics
|
||||
15D8, C1, AMD Radeon Vega 10 Graphics
|
||||
15D8, C2, AMD Radeon Vega 8 Graphics
|
||||
15D8, C3, AMD Radeon Vega 6 Graphics
|
||||
15D8, C4, AMD Radeon Vega 3 Graphics
|
||||
15D8, C5, AMD Radeon Vega 3 Graphics
|
||||
15D8, C8, AMD Radeon Vega 11 Graphics
|
||||
15D8, C9, AMD Radeon Vega 8 Graphics
|
||||
15D8, CA, AMD Radeon Vega 11 Graphics
|
||||
15D8, CB, AMD Radeon Vega 8 Graphics
|
||||
15D8, CC, AMD Radeon Vega 3 Graphics
|
||||
15D8, CE, AMD Radeon Vega 3 Graphics
|
||||
15D8, CF, AMD Ryzen Embedded R1305G with Radeon Vega Gfx
|
||||
15D8, D1, AMD Radeon Vega 10 Graphics
|
||||
15D8, D2, AMD Radeon Vega 8 Graphics
|
||||
15D8, D3, AMD Radeon Vega 6 Graphics
|
||||
15D8, D4, AMD Radeon Vega 3 Graphics
|
||||
15D8, D8, AMD Radeon Vega 11 Graphics
|
||||
15D8, D9, AMD Radeon Vega 8 Graphics
|
||||
15D8, DA, AMD Radeon Vega 11 Graphics
|
||||
15D8, DB, AMD Radeon Vega 3 Graphics
|
||||
15D8, DB, AMD Radeon Vega 8 Graphics
|
||||
15D8, DC, AMD Radeon Vega 3 Graphics
|
||||
15D8, DD, AMD Radeon Vega 3 Graphics
|
||||
15D8, DE, AMD Radeon Vega 3 Graphics
|
||||
15D8, DF, AMD Radeon Vega 3 Graphics
|
||||
15D8, E3, AMD Radeon Vega 3 Graphics
|
||||
15D8, E4, AMD Ryzen Embedded R1102G with Radeon Vega Gfx
|
||||
15DD, 81, AMD Ryzen Embedded V1807B with Radeon Vega Gfx
|
||||
15DD, 82, AMD Ryzen Embedded V1756B with Radeon Vega Gfx
|
||||
15DD, 83, AMD Ryzen Embedded V1605B with Radeon Vega Gfx
|
||||
15DD, 84, AMD Radeon Vega 6 Graphics
|
||||
15DD, 85, AMD Ryzen Embedded V1202B with Radeon Vega Gfx
|
||||
15DD, 86, AMD Radeon Vega 11 Graphics
|
||||
15DD, 88, AMD Radeon Vega 8 Graphics
|
||||
15DD, C1, AMD Radeon Vega 11 Graphics
|
||||
15DD, C2, AMD Radeon Vega 8 Graphics
|
||||
15DD, C3, AMD Radeon Vega 3 / 10 Graphics
|
||||
15DD, C4, AMD Radeon Vega 8 Graphics
|
||||
15DD, C5, AMD Radeon Vega 3 Graphics
|
||||
15DD, C6, AMD Radeon Vega 11 Graphics
|
||||
15DD, C8, AMD Radeon Vega 8 Graphics
|
||||
15DD, C9, AMD Radeon Vega 11 Graphics
|
||||
15DD, CA, AMD Radeon Vega 8 Graphics
|
||||
15DD, CB, AMD Radeon Vega 3 Graphics
|
||||
15DD, CC, AMD Radeon Vega 6 Graphics
|
||||
15DD, CE, AMD Radeon Vega 3 Graphics
|
||||
15DD, CF, AMD Radeon Vega 3 Graphics
|
||||
15DD, D0, AMD Radeon Vega 10 Graphics
|
||||
15DD, D1, AMD Radeon Vega 8 Graphics
|
||||
15DD, D3, AMD Radeon Vega 11 Graphics
|
||||
15DD, D5, AMD Radeon Vega 8 Graphics
|
||||
15DD, D6, AMD Radeon Vega 11 Graphics
|
||||
15DD, D7, AMD Radeon Vega 8 Graphics
|
||||
15DD, D8, AMD Radeon Vega 3 Graphics
|
||||
15DD, D9, AMD Radeon Vega 6 Graphics
|
||||
15DD, E1, AMD Radeon Vega 3 Graphics
|
||||
15DD, E2, AMD Radeon Vega 3 Graphics
|
||||
163F, AE, AMD Custom GPU 0405
|
||||
163F, E1, AMD Custom GPU 0405
|
||||
164E, D8, AMD Radeon 610M
|
||||
164E, D9, AMD Radeon 610M
|
||||
164E, DA, AMD Radeon 610M
|
||||
164E, DB, AMD Radeon 610M
|
||||
164E, DC, AMD Radeon 610M
|
||||
1681, 06, AMD Radeon 680M
|
||||
1681, 07, AMD Radeon 660M
|
||||
1681, 0A, AMD Radeon 680M
|
||||
1681, 0B, AMD Radeon 660M
|
||||
1681, C7, AMD Radeon 680M
|
||||
1681, C8, AMD Radeon 680M
|
||||
1681, C9, AMD Radeon 660M
|
||||
1900, 01, AMD Radeon 780M Graphics
|
||||
1900, 02, AMD Radeon 760M Graphics
|
||||
1900, 03, AMD Radeon 780M Graphics
|
||||
1900, 04, AMD Radeon 760M Graphics
|
||||
1900, 05, AMD Radeon 780M Graphics
|
||||
1900, 06, AMD Radeon 780M Graphics
|
||||
1900, 07, AMD Radeon 760M Graphics
|
||||
1900, B0, AMD Radeon 780M Graphics
|
||||
1900, B1, AMD Radeon 780M Graphics
|
||||
1900, B2, AMD Radeon 780M Graphics
|
||||
1900, B3, AMD Radeon 780M Graphics
|
||||
1900, B4, AMD Radeon 780M Graphics
|
||||
1900, B5, AMD Radeon 780M Graphics
|
||||
1900, B6, AMD Radeon 780M Graphics
|
||||
1900, B7, AMD Radeon 760M Graphics
|
||||
1900, B8, AMD Radeon 760M Graphics
|
||||
1900, B9, AMD Radeon 780M Graphics
|
||||
1900, BA, AMD Radeon 780M Graphics
|
||||
1900, BB, AMD Radeon 780M Graphics
|
||||
1900, C0, AMD Radeon 780M Graphics
|
||||
1900, C1, AMD Radeon 760M Graphics
|
||||
1900, C2, AMD Radeon 780M Graphics
|
||||
1900, C3, AMD Radeon 760M Graphics
|
||||
1900, C4, AMD Radeon 780M Graphics
|
||||
1900, C5, AMD Radeon 780M Graphics
|
||||
1900, C6, AMD Radeon 760M Graphics
|
||||
1900, C7, AMD Radeon 780M Graphics
|
||||
1900, C8, AMD Radeon 760M Graphics
|
||||
1900, C9, AMD Radeon 780M Graphics
|
||||
1900, CA, AMD Radeon 760M Graphics
|
||||
1900, CB, AMD Radeon 780M Graphics
|
||||
1900, CC, AMD Radeon 780M Graphics
|
||||
1900, CD, AMD Radeon 760M Graphics
|
||||
1900, CE, AMD Radeon 780M Graphics
|
||||
1900, CF, AMD Radeon 760M Graphics
|
||||
1900, D0, AMD Radeon 780M Graphics
|
||||
1900, D1, AMD Radeon 760M Graphics
|
||||
1900, D2, AMD Radeon 780M Graphics
|
||||
1900, D3, AMD Radeon 760M Graphics
|
||||
1900, D4, AMD Radeon 780M Graphics
|
||||
1900, D5, AMD Radeon 780M Graphics
|
||||
1900, D6, AMD Radeon 760M Graphics
|
||||
1900, D7, AMD Radeon 780M Graphics
|
||||
1900, D8, AMD Radeon 760M Graphics
|
||||
1900, D9, AMD Radeon 780M Graphics
|
||||
1900, DA, AMD Radeon 760M Graphics
|
||||
1900, DB, AMD Radeon 780M Graphics
|
||||
1900, DC, AMD Radeon 780M Graphics
|
||||
1900, DD, AMD Radeon 760M Graphics
|
||||
1900, DE, AMD Radeon 780M Graphics
|
||||
1900, DF, AMD Radeon 760M Graphics
|
||||
1900, F0, AMD Radeon 780M Graphics
|
||||
1900, F1, AMD Radeon 780M Graphics
|
||||
1900, F2, AMD Radeon 780M Graphics
|
||||
1901, C1, AMD Radeon 740M Graphics
|
||||
1901, C2, AMD Radeon 740M Graphics
|
||||
1901, C3, AMD Radeon 740M Graphics
|
||||
1901, C6, AMD Radeon 740M Graphics
|
||||
1901, C7, AMD Radeon 740M Graphics
|
||||
1901, C8, AMD Radeon 740M Graphics
|
||||
1901, C9, AMD Radeon 740M Graphics
|
||||
1901, CA, AMD Radeon 740M Graphics
|
||||
1901, D1, AMD Radeon 740M Graphics
|
||||
1901, D2, AMD Radeon 740M Graphics
|
||||
1901, D3, AMD Radeon 740M Graphics
|
||||
1901, D4, AMD Radeon 740M Graphics
|
||||
1901, D5, AMD Radeon 740M Graphics
|
||||
1901, D6, AMD Radeon 740M Graphics
|
||||
1901, D7, AMD Radeon 740M Graphics
|
||||
1901, D8, AMD Radeon 740M Graphics
|
||||
6600, 00, AMD Radeon HD 8600 / 8700M
|
||||
6600, 81, AMD Radeon R7 M370
|
||||
6601, 00, AMD Radeon HD 8500M / 8700M
|
||||
6604, 00, AMD Radeon R7 M265 Series
|
||||
6604, 81, AMD Radeon R7 M350
|
||||
6605, 00, AMD Radeon R7 M260 Series
|
||||
6605, 81, AMD Radeon R7 M340
|
||||
6606, 00, AMD Radeon HD 8790M
|
||||
6607, 00, AMD Radeon R5 M240
|
||||
6608, 00, AMD FirePro W2100
|
||||
6610, 00, AMD Radeon R7 200 Series
|
||||
6610, 81, AMD Radeon R7 350
|
||||
6610, 83, AMD Radeon R5 340
|
||||
6610, 87, AMD Radeon R7 200 Series
|
||||
6611, 00, AMD Radeon R7 200 Series
|
||||
6611, 87, AMD Radeon R7 200 Series
|
||||
6613, 00, AMD Radeon R7 200 Series
|
||||
6617, 00, AMD Radeon R7 240 Series
|
||||
6617, 87, AMD Radeon R7 200 Series
|
||||
6617, C7, AMD Radeon R7 240 Series
|
||||
6640, 00, AMD Radeon HD 8950
|
||||
6640, 80, AMD Radeon R9 M380
|
||||
6646, 00, AMD Radeon R9 M280X
|
||||
6646, 80, AMD Radeon R9 M385
|
||||
6646, 80, AMD Radeon R9 M470X
|
||||
6647, 00, AMD Radeon R9 M200X Series
|
||||
6647, 80, AMD Radeon R9 M380
|
||||
6649, 00, AMD FirePro W5100
|
||||
6658, 00, AMD Radeon R7 200 Series
|
||||
665C, 00, AMD Radeon HD 7700 Series
|
||||
665D, 00, AMD Radeon R7 200 Series
|
||||
665F, 81, AMD Radeon R7 360 Series
|
||||
6660, 00, AMD Radeon HD 8600M Series
|
||||
6660, 81, AMD Radeon R5 M335
|
||||
6660, 83, AMD Radeon R5 M330
|
||||
6663, 00, AMD Radeon HD 8500M Series
|
||||
6663, 83, AMD Radeon R5 M320
|
||||
6664, 00, AMD Radeon R5 M200 Series
|
||||
6665, 00, AMD Radeon R5 M230 Series
|
||||
6665, 83, AMD Radeon R5 M320
|
||||
6665, C3, AMD Radeon R5 M435
|
||||
6666, 00, AMD Radeon R5 M200 Series
|
||||
6667, 00, AMD Radeon R5 M200 Series
|
||||
666F, 00, AMD Radeon HD 8500M
|
||||
66A1, 02, AMD Instinct MI60 / MI50
|
||||
66A1, 06, AMD Radeon Pro VII
|
||||
66AF, C1, AMD Radeon VII
|
||||
6780, 00, AMD FirePro W9000
|
||||
6784, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
6788, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
678A, 00, AMD FirePro W8000
|
||||
6798, 00, AMD Radeon R9 200 / HD 7900 Series
|
||||
6799, 00, AMD Radeon HD 7900 Series
|
||||
679A, 00, AMD Radeon HD 7900 Series
|
||||
679B, 00, AMD Radeon HD 7900 Series
|
||||
679E, 00, AMD Radeon HD 7800 Series
|
||||
67A0, 00, AMD Radeon FirePro W9100
|
||||
67A1, 00, AMD Radeon FirePro W8100
|
||||
67B0, 00, AMD Radeon R9 200 Series
|
||||
67B0, 80, AMD Radeon R9 390 Series
|
||||
67B1, 00, AMD Radeon R9 200 Series
|
||||
67B1, 80, AMD Radeon R9 390 Series
|
||||
67B9, 00, AMD Radeon R9 200 Series
|
||||
67C0, 00, AMD Radeon Pro WX 7100 Graphics
|
||||
67C0, 80, AMD Radeon E9550
|
||||
67C2, 01, AMD Radeon Pro V7350x2
|
||||
67C2, 02, AMD Radeon Pro V7300X
|
||||
67C4, 00, AMD Radeon Pro WX 7100 Graphics
|
||||
67C4, 80, AMD Radeon E9560 / E9565 Graphics
|
||||
67C7, 00, AMD Radeon Pro WX 5100 Graphics
|
||||
67C7, 80, AMD Radeon E9390 Graphics
|
||||
67D0, 01, AMD Radeon Pro V7350x2
|
||||
67D0, 02, AMD Radeon Pro V7300X
|
||||
67DF, C0, AMD Radeon Pro 580X
|
||||
67DF, C1, AMD Radeon RX 580 Series
|
||||
67DF, C2, AMD Radeon RX 570 Series
|
||||
67DF, C3, AMD Radeon RX 580 Series
|
||||
67DF, C4, AMD Radeon RX 480 Graphics
|
||||
67DF, C5, AMD Radeon RX 470 Graphics
|
||||
67DF, C6, AMD Radeon RX 570 Series
|
||||
67DF, C7, AMD Radeon RX 480 Graphics
|
||||
67DF, CF, AMD Radeon RX 470 Graphics
|
||||
67DF, D7, AMD Radeon RX 470 Graphics
|
||||
67DF, E0, AMD Radeon RX 470 Series
|
||||
67DF, E1, AMD Radeon RX 590 Series
|
||||
67DF, E3, AMD Radeon RX Series
|
||||
67DF, E7, AMD Radeon RX 580 Series
|
||||
67DF, EB, AMD Radeon Pro 580X
|
||||
67DF, EF, AMD Radeon RX 570 Series
|
||||
67DF, F7, AMD Radeon RX P30PH
|
||||
67DF, FF, AMD Radeon RX 470 Series
|
||||
67E0, 00, AMD Radeon Pro WX Series
|
||||
67E3, 00, AMD Radeon Pro WX 4100
|
||||
67E8, 00, AMD Radeon Pro WX Series
|
||||
67E8, 01, AMD Radeon Pro WX Series
|
||||
67E8, 80, AMD Radeon E9260 Graphics
|
||||
67EB, 00, AMD Radeon Pro V5300X
|
||||
67EF, C0, AMD Radeon RX Graphics
|
||||
67EF, C1, AMD Radeon RX 460 Graphics
|
||||
67EF, C2, AMD Radeon Pro Series
|
||||
67EF, C3, AMD Radeon RX Series
|
||||
67EF, C5, AMD Radeon RX 460 Graphics
|
||||
67EF, C7, AMD Radeon RX Graphics
|
||||
67EF, CF, AMD Radeon RX 460 Graphics
|
||||
67EF, E0, AMD Radeon RX 560 Series
|
||||
67EF, E1, AMD Radeon RX Series
|
||||
67EF, E2, AMD Radeon RX 560X
|
||||
67EF, E3, AMD Radeon RX Series
|
||||
67EF, E5, AMD Radeon RX 560 Series
|
||||
67EF, E7, AMD Radeon RX 560 Series
|
||||
67EF, EF, AMD Radeon 550 Series
|
||||
67EF, FF, AMD Radeon RX 460 Graphics
|
||||
67FF, C0, AMD Radeon Pro 465
|
||||
67FF, C1, AMD Radeon RX 560 Series
|
||||
67FF, CF, AMD Radeon RX 560 Series
|
||||
67FF, EF, AMD Radeon RX 560 Series
|
||||
67FF, FF, AMD Radeon RX 550 Series
|
||||
6800, 00, AMD Radeon HD 7970M
|
||||
6801, 00, AMD Radeon HD 8970M
|
||||
6806, 00, AMD Radeon R9 M290X
|
||||
6808, 00, AMD FirePro W7000
|
||||
6808, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
6809, 00, ATI FirePro W5000
|
||||
6810, 00, AMD Radeon R9 200 Series
|
||||
6810, 81, AMD Radeon R9 370 Series
|
||||
6811, 00, AMD Radeon R9 200 Series
|
||||
6811, 81, AMD Radeon R7 370 Series
|
||||
6818, 00, AMD Radeon HD 7800 Series
|
||||
6819, 00, AMD Radeon HD 7800 Series
|
||||
6820, 00, AMD Radeon R9 M275X
|
||||
6820, 81, AMD Radeon R9 M375
|
||||
6820, 83, AMD Radeon R9 M375X
|
||||
6821, 00, AMD Radeon R9 M200X Series
|
||||
6821, 83, AMD Radeon R9 M370X
|
||||
6821, 87, AMD Radeon R7 M380
|
||||
6822, 00, AMD Radeon E8860
|
||||
6823, 00, AMD Radeon R9 M200X Series
|
||||
6825, 00, AMD Radeon HD 7800M Series
|
||||
6826, 00, AMD Radeon HD 7700M Series
|
||||
6827, 00, AMD Radeon HD 7800M Series
|
||||
6828, 00, AMD FirePro W600
|
||||
682B, 00, AMD Radeon HD 8800M Series
|
||||
682B, 87, AMD Radeon R9 M360
|
||||
682C, 00, AMD FirePro W4100
|
||||
682D, 00, AMD Radeon HD 7700M Series
|
||||
682F, 00, AMD Radeon HD 7700M Series
|
||||
6830, 00, AMD Radeon 7800M Series
|
||||
6831, 00, AMD Radeon 7700M Series
|
||||
6835, 00, AMD Radeon R7 Series / HD 9000 Series
|
||||
6837, 00, AMD Radeon HD 7700 Series
|
||||
683D, 00, AMD Radeon HD 7700 Series
|
||||
683F, 00, AMD Radeon HD 7700 Series
|
||||
684C, 00, ATI FirePro V (FireGL V) Graphics Adapter
|
||||
6860, 00, AMD Radeon Instinct MI25
|
||||
6860, 01, AMD Radeon Instinct MI25
|
||||
6860, 02, AMD Radeon Instinct MI25
|
||||
6860, 03, AMD Radeon Pro V340
|
||||
6860, 04, AMD Radeon Instinct MI25x2
|
||||
6860, 07, AMD Radeon Pro V320
|
||||
6861, 00, AMD Radeon Pro WX 9100
|
||||
6862, 00, AMD Radeon Pro SSG
|
||||
6863, 00, AMD Radeon Vega Frontier Edition
|
||||
6864, 03, AMD Radeon Pro V340
|
||||
6864, 04, AMD Radeon Instinct MI25x2
|
||||
6864, 05, AMD Radeon Pro V340
|
||||
6868, 00, AMD Radeon Pro WX 8200
|
||||
686C, 00, AMD Radeon Instinct MI25 MxGPU
|
||||
686C, 01, AMD Radeon Instinct MI25 MxGPU
|
||||
686C, 02, AMD Radeon Instinct MI25 MxGPU
|
||||
686C, 03, AMD Radeon Pro V340 MxGPU
|
||||
686C, 04, AMD Radeon Instinct MI25x2 MxGPU
|
||||
686C, 05, AMD Radeon Pro V340L MxGPU
|
||||
686C, 06, AMD Radeon Instinct MI25 MxGPU
|
||||
687F, 01, AMD Radeon RX Vega
|
||||
687F, C0, AMD Radeon RX Vega
|
||||
687F, C1, AMD Radeon RX Vega
|
||||
687F, C3, AMD Radeon RX Vega
|
||||
687F, C7, AMD Radeon RX Vega
|
||||
6900, 00, AMD Radeon R7 M260
|
||||
6900, 81, AMD Radeon R7 M360
|
||||
6900, 83, AMD Radeon R7 M340
|
||||
6900, C1, AMD Radeon R5 M465 Series
|
||||
6900, C3, AMD Radeon R5 M445 Series
|
||||
6900, D1, AMD Radeon 530 Series
|
||||
6900, D3, AMD Radeon 530 Series
|
||||
6901, 00, AMD Radeon R5 M255
|
||||
6902, 00, AMD Radeon Series
|
||||
6907, 00, AMD Radeon R5 M255
|
||||
6907, 87, AMD Radeon R5 M315
|
||||
6920, 00, AMD Radeon R9 M395X
|
||||
6920, 01, AMD Radeon R9 M390X
|
||||
6921, 00, AMD Radeon R9 M390X
|
||||
6929, 00, AMD FirePro S7150
|
||||
6929, 01, AMD FirePro S7100X
|
||||
692B, 00, AMD FirePro W7100
|
||||
6938, 00, AMD Radeon R9 200 Series
|
||||
6938, F0, AMD Radeon R9 200 Series
|
||||
6938, F1, AMD Radeon R9 380 Series
|
||||
6939, 00, AMD Radeon R9 200 Series
|
||||
6939, F0, AMD Radeon R9 200 Series
|
||||
6939, F1, AMD Radeon R9 380 Series
|
||||
694C, C0, AMD Radeon RX Vega M GH Graphics
|
||||
694E, C0, AMD Radeon RX Vega M GL Graphics
|
||||
6980, 00, AMD Radeon Pro WX 3100
|
||||
6981, 00, AMD Radeon Pro WX 3200 Series
|
||||
6981, 01, AMD Radeon Pro WX 3200 Series
|
||||
6981, 10, AMD Radeon Pro WX 3200 Series
|
||||
6985, 00, AMD Radeon Pro WX 3100
|
||||
6986, 00, AMD Radeon Pro WX 2100
|
||||
6987, 80, AMD Embedded Radeon E9171
|
||||
6987, C0, AMD Radeon 550X Series
|
||||
6987, C1, AMD Radeon RX 640
|
||||
6987, C3, AMD Radeon 540X Series
|
||||
6987, C7, AMD Radeon 540
|
||||
6995, 00, AMD Radeon Pro WX 2100
|
||||
6997, 00, AMD Radeon Pro WX 2100
|
||||
699F, 81, AMD Embedded Radeon E9170 Series
|
||||
699F, C0, AMD Radeon 500 Series
|
||||
699F, C1, AMD Radeon 540 Series
|
||||
699F, C3, AMD Radeon 500 Series
|
||||
699F, C7, AMD Radeon RX 550 / 550 Series
|
||||
699F, C9, AMD Radeon 540
|
||||
6FDF, E7, AMD Radeon RX 590 GME
|
||||
6FDF, EF, AMD Radeon RX 580 2048SP
|
||||
7300, C1, AMD FirePro S9300 x2
|
||||
7300, C8, AMD Radeon R9 Fury Series
|
||||
7300, C9, AMD Radeon Pro Duo
|
||||
7300, CA, AMD Radeon R9 Fury Series
|
||||
7300, CB, AMD Radeon R9 Fury Series
|
||||
7312, 00, AMD Radeon Pro W5700
|
||||
731E, C6, AMD Radeon RX 5700XTB
|
||||
731E, C7, AMD Radeon RX 5700B
|
||||
731F, C0, AMD Radeon RX 5700 XT 50th Anniversary
|
||||
731F, C1, AMD Radeon RX 5700 XT
|
||||
731F, C2, AMD Radeon RX 5600M
|
||||
731F, C3, AMD Radeon RX 5700M
|
||||
731F, C4, AMD Radeon RX 5700
|
||||
731F, C5, AMD Radeon RX 5700 XT
|
||||
731F, CA, AMD Radeon RX 5600 XT
|
||||
731F, CB, AMD Radeon RX 5600 OEM
|
||||
7340, C1, AMD Radeon RX 5500M
|
||||
7340, C3, AMD Radeon RX 5300M
|
||||
7340, C5, AMD Radeon RX 5500 XT
|
||||
7340, C7, AMD Radeon RX 5500
|
||||
7340, C9, AMD Radeon RX 5500XTB
|
||||
7340, CF, AMD Radeon RX 5300
|
||||
7341, 00, AMD Radeon Pro W5500
|
||||
7347, 00, AMD Radeon Pro W5500M
|
||||
7360, 41, AMD Radeon Pro 5600M
|
||||
7360, C3, AMD Radeon Pro V520
|
||||
7362, C1, AMD Radeon Pro V540
|
||||
7362, C3, AMD Radeon Pro V520
|
||||
738C, 01, AMD Instinct MI100
|
||||
73A1, 00, AMD Radeon Pro V620
|
||||
73A3, 00, AMD Radeon Pro W6800
|
||||
73A5, C0, AMD Radeon RX 6950 XT
|
||||
73AE, 00, AMD Radeon Pro V620 MxGPU
|
||||
73AF, C0, AMD Radeon RX 6900 XT
|
||||
73BF, C0, AMD Radeon RX 6900 XT
|
||||
73BF, C1, AMD Radeon RX 6800 XT
|
||||
73BF, C3, AMD Radeon RX 6800
|
||||
73DF, C0, AMD Radeon RX 6750 XT
|
||||
73DF, C1, AMD Radeon RX 6700 XT
|
||||
73DF, C2, AMD Radeon RX 6800M
|
||||
73DF, C3, AMD Radeon RX 6800M
|
||||
73DF, C5, AMD Radeon RX 6700 XT
|
||||
73DF, CF, AMD Radeon RX 6700M
|
||||
73DF, D5, AMD Radeon RX 6750 GRE 12GB
|
||||
73DF, D7, AMD TDC-235
|
||||
73DF, DF, AMD Radeon RX 6700
|
||||
73DF, E5, AMD Radeon RX 6750 GRE 12GB
|
||||
73DF, FF, AMD Radeon RX 6700
|
||||
73E0, 00, AMD Radeon RX 6600M
|
||||
73E1, 00, AMD Radeon Pro W6600M
|
||||
73E3, 00, AMD Radeon Pro W6600
|
||||
73EF, C0, AMD Radeon RX 6800S
|
||||
73EF, C1, AMD Radeon RX 6650 XT
|
||||
73EF, C2, AMD Radeon RX 6700S
|
||||
73EF, C3, AMD Radeon RX 6650M
|
||||
73EF, C4, AMD Radeon RX 6650M XT
|
||||
73FF, C1, AMD Radeon RX 6600 XT
|
||||
73FF, C3, AMD Radeon RX 6600M
|
||||
73FF, C7, AMD Radeon RX 6600
|
||||
73FF, CB, AMD Radeon RX 6600S
|
||||
73FF, CF, AMD Radeon RX 6600 LE
|
||||
73FF, DF, AMD Radeon RX 6750 GRE 10GB
|
||||
7408, 00, AMD Instinct MI250X
|
||||
740C, 01, AMD Instinct MI250X / MI250
|
||||
740F, 02, AMD Instinct MI210
|
||||
7421, 00, AMD Radeon Pro W6500M
|
||||
7422, 00, AMD Radeon Pro W6400
|
||||
7423, 00, AMD Radeon Pro W6300M
|
||||
7423, 01, AMD Radeon Pro W6300
|
||||
7424, 00, AMD Radeon RX 6300
|
||||
743F, C1, AMD Radeon RX 6500 XT
|
||||
743F, C3, AMD Radeon RX 6500
|
||||
743F, C3, AMD Radeon RX 6500M
|
||||
743F, C7, AMD Radeon RX 6400
|
||||
743F, C8, AMD Radeon RX 6500M
|
||||
743F, CC, AMD Radeon 6550S
|
||||
743F, CE, AMD Radeon RX 6450M
|
||||
743F, CF, AMD Radeon RX 6300M
|
||||
743F, D3, AMD Radeon RX 6550M
|
||||
743F, D7, AMD Radeon RX 6400
|
||||
7448, 00, AMD Radeon Pro W7900
|
||||
7449, 00, AMD Radeon Pro W7800 48GB
|
||||
744A, 00, AMD Radeon Pro W7900 Dual Slot
|
||||
744B, 00, AMD Radeon Pro W7900D
|
||||
744C, C8, AMD Radeon RX 7900 XTX
|
||||
744C, CC, AMD Radeon RX 7900 XT
|
||||
744C, CE, AMD Radeon RX 7900 GRE
|
||||
744C, CF, AMD Radeon RX 7900M
|
||||
745E, CC, AMD Radeon Pro W7800
|
||||
7460, 00, AMD Radeon Pro V710
|
||||
7461, 00, AMD Radeon Pro V710 MxGPU
|
||||
7470, 00, AMD Radeon Pro W7700
|
||||
747E, C8, AMD Radeon RX 7800 XT
|
||||
747E, D8, AMD Radeon RX 7800M
|
||||
747E, DB, AMD Radeon RX 7700
|
||||
747E, FF, AMD Radeon RX 7700 XT
|
||||
7480, 00, AMD Radeon Pro W7600
|
||||
7480, C0, AMD Radeon RX 7600 XT
|
||||
7480, C1, AMD Radeon RX 7700S
|
||||
7480, C2, AMD Radeon RX 7650 GRE
|
||||
7480, C3, AMD Radeon RX 7600S
|
||||
7480, C7, AMD Radeon RX 7600M XT
|
||||
7480, CF, AMD Radeon RX 7600
|
||||
7481, C7, AMD Steam Machine
|
||||
7483, CF, AMD Radeon RX 7600M
|
||||
7489, 00, AMD Radeon Pro W7500
|
||||
7499, 00, AMD Radeon Pro W7400
|
||||
7499, C0, AMD Radeon RX 7400
|
||||
7499, C1, AMD Radeon RX 7300
|
||||
74A0, 00, AMD Instinct MI300A
|
||||
74A1, 00, AMD Instinct MI300X
|
||||
74A2, 00, AMD Instinct MI308X
|
||||
74A5, 00, AMD Instinct MI325X
|
||||
74A8, 00, AMD Instinct MI308X HF
|
||||
74A9, 00, AMD Instinct MI300X HF
|
||||
74B5, 00, AMD Instinct MI300X VF
|
||||
74B6, 00, AMD Instinct MI308X
|
||||
74BD, 00, AMD Instinct MI300X HF
|
||||
7550, C0, AMD Radeon RX 9070 XT
|
||||
7550, C2, AMD Radeon RX 9070 GRE
|
||||
7550, C3, AMD Radeon RX 9070
|
||||
7551, C0, AMD Radeon AI PRO R9700
|
||||
7590, C0, AMD Radeon RX 9060 XT
|
||||
7590, C7, AMD Radeon RX 9060
|
||||
75A0, C0, AMD Instinct MI350X
|
||||
75A3, C0, AMD Instinct MI355X
|
||||
75B0, C0, AMD Instinct MI350X VF
|
||||
75B3, C0, AMD Instinct MI355X VF
|
||||
9830, 00, AMD Radeon HD 8400 / R3 Series
|
||||
9831, 00, AMD Radeon HD 8400E
|
||||
9832, 00, AMD Radeon HD 8330
|
||||
9833, 00, AMD Radeon HD 8330E
|
||||
9834, 00, AMD Radeon HD 8210
|
||||
9835, 00, AMD Radeon HD 8210E
|
||||
9836, 00, AMD Radeon HD 8200 / R3 Series
|
||||
9837, 00, AMD Radeon HD 8280E
|
||||
9838, 00, AMD Radeon HD 8200 / R3 series
|
||||
9839, 00, AMD Radeon HD 8180
|
||||
983D, 00, AMD Radeon HD 8250
|
||||
9850, 00, AMD Radeon R3 Graphics
|
||||
9850, 03, AMD Radeon R3 Graphics
|
||||
9850, 40, AMD Radeon R2 Graphics
|
||||
9850, 45, AMD Radeon R3 Graphics
|
||||
9851, 00, AMD Radeon R4 Graphics
|
||||
9851, 01, AMD Radeon R5E Graphics
|
||||
9851, 05, AMD Radeon R5 Graphics
|
||||
9851, 06, AMD Radeon R5E Graphics
|
||||
9851, 40, AMD Radeon R4 Graphics
|
||||
9851, 45, AMD Radeon R5 Graphics
|
||||
9852, 00, AMD Radeon R2 Graphics
|
||||
9852, 40, AMD Radeon E1 Graphics
|
||||
9853, 00, AMD Radeon R2 Graphics
|
||||
9853, 01, AMD Radeon R4E Graphics
|
||||
9853, 03, AMD Radeon R2 Graphics
|
||||
9853, 05, AMD Radeon R1E Graphics
|
||||
9853, 06, AMD Radeon R1E Graphics
|
||||
9853, 07, AMD Radeon R1E Graphics
|
||||
9853, 08, AMD Radeon R1E Graphics
|
||||
9853, 40, AMD Radeon R2 Graphics
|
||||
9854, 00, AMD Radeon R3 Graphics
|
||||
9854, 01, AMD Radeon R3E Graphics
|
||||
9854, 02, AMD Radeon R3 Graphics
|
||||
9854, 05, AMD Radeon R2 Graphics
|
||||
9854, 06, AMD Radeon R4 Graphics
|
||||
9854, 07, AMD Radeon R3 Graphics
|
||||
9855, 02, AMD Radeon R6 Graphics
|
||||
9855, 05, AMD Radeon R4 Graphics
|
||||
9856, 00, AMD Radeon R2 Graphics
|
||||
9856, 01, AMD Radeon R2E Graphics
|
||||
9856, 02, AMD Radeon R2 Graphics
|
||||
9856, 05, AMD Radeon R1E Graphics
|
||||
9856, 06, AMD Radeon R2 Graphics
|
||||
9856, 07, AMD Radeon R1E Graphics
|
||||
9856, 08, AMD Radeon R1E Graphics
|
||||
9856, 13, AMD Radeon R1E Graphics
|
||||
9874, 81, AMD Radeon R6 Graphics
|
||||
9874, 84, AMD Radeon R7 Graphics
|
||||
9874, 85, AMD Radeon R6 Graphics
|
||||
9874, 87, AMD Radeon R5 Graphics
|
||||
9874, 88, AMD Radeon R7E Graphics
|
||||
9874, 89, AMD Radeon R6E Graphics
|
||||
9874, C4, AMD Radeon R7 Graphics
|
||||
9874, C5, AMD Radeon R6 Graphics
|
||||
9874, C6, AMD Radeon R6 Graphics
|
||||
9874, C7, AMD Radeon R5 Graphics
|
||||
9874, C8, AMD Radeon R7 Graphics
|
||||
9874, C9, AMD Radeon R7 Graphics
|
||||
9874, CA, AMD Radeon R5 Graphics
|
||||
9874, CB, AMD Radeon R5 Graphics
|
||||
9874, CC, AMD Radeon R7 Graphics
|
||||
9874, CD, AMD Radeon R7 Graphics
|
||||
9874, CE, AMD Radeon R5 Graphics
|
||||
9874, E1, AMD Radeon R7 Graphics
|
||||
9874, E2, AMD Radeon R7 Graphics
|
||||
9874, E3, AMD Radeon R7 Graphics
|
||||
9874, E4, AMD Radeon R7 Graphics
|
||||
9874, E5, AMD Radeon R5 Graphics
|
||||
9874, E6, AMD Radeon R5 Graphics
|
||||
98E4, 80, AMD Radeon R5E Graphics
|
||||
98E4, 81, AMD Radeon R4E Graphics
|
||||
98E4, 83, AMD Radeon R2E Graphics
|
||||
98E4, 84, AMD Radeon R2E Graphics
|
||||
98E4, 86, AMD Radeon R1E Graphics
|
||||
98E4, C0, AMD Radeon R4 Graphics
|
||||
98E4, C1, AMD Radeon R5 Graphics
|
||||
98E4, C2, AMD Radeon R4 Graphics
|
||||
98E4, C4, AMD Radeon R5 Graphics
|
||||
98E4, C6, AMD Radeon R5 Graphics
|
||||
98E4, C8, AMD Radeon R4 Graphics
|
||||
98E4, C9, AMD Radeon R4 Graphics
|
||||
98E4, CA, AMD Radeon R5 Graphics
|
||||
98E4, D0, AMD Radeon R2 Graphics
|
||||
98E4, D1, AMD Radeon R2 Graphics
|
||||
98E4, D2, AMD Radeon R2 Graphics
|
||||
98E4, D4, AMD Radeon R2 Graphics
|
||||
98E4, D9, AMD Radeon R5 Graphics
|
||||
98E4, DA, AMD Radeon R5 Graphics
|
||||
98E4, DB, AMD Radeon R3 Graphics
|
||||
98E4, E1, AMD Radeon R3 Graphics
|
||||
98E4, E2, AMD Radeon R3 Graphics
|
||||
98E4, E9, AMD Radeon R4 Graphics
|
||||
98E4, EA, AMD Radeon R4 Graphics
|
||||
98E4, EB, AMD Radeon R3 Graphics
|
||||
98E4, EB, AMD Radeon R4 Graphics
|
||||
34
agent/test-data/nvtop.json
Normal file
34
agent/test-data/nvtop.json
Normal file
@@ -0,0 +1,34 @@
|
||||
[
|
||||
{
|
||||
"device_name": "NVIDIA GeForce RTX 3050 Ti Laptop GPU",
|
||||
"gpu_clock": "1485MHz",
|
||||
"mem_clock": "6001MHz",
|
||||
"temp": "48C",
|
||||
"fan_speed": null,
|
||||
"power_draw": "13W",
|
||||
"gpu_util": "5%",
|
||||
"encode": "0%",
|
||||
"decode": "0%",
|
||||
"mem_util": "8%",
|
||||
"mem_total": "4294967296",
|
||||
"mem_used": "349372416",
|
||||
"mem_free": "3945594880",
|
||||
"processes" : []
|
||||
},
|
||||
{
|
||||
"device_name": "AMD Radeon 680M",
|
||||
"gpu_clock": "2200MHz",
|
||||
"mem_clock": "2400MHz",
|
||||
"temp": "48C",
|
||||
"fan_speed": "CPU Fan",
|
||||
"power_draw": "9W",
|
||||
"gpu_util": "12%",
|
||||
"encode": null,
|
||||
"decode": "0%",
|
||||
"mem_util": "7%",
|
||||
"mem_total": "16929173504",
|
||||
"mem_used": "1213784064",
|
||||
"mem_free": "15715389440",
|
||||
"processes" : []
|
||||
}
|
||||
]
|
||||
@@ -2,18 +2,18 @@ package alerts
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/pocketbase/pocketbase/core"
|
||||
)
|
||||
|
||||
// handleSmartDeviceAlert sends alerts when a SMART device state changes from PASSED to FAILED.
|
||||
// handleSmartDeviceAlert sends alerts when a SMART device state worsens into WARNING/FAILED.
|
||||
// This is automatic and does not require user opt-in.
|
||||
func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
oldState := e.Record.Original().GetString("state")
|
||||
newState := e.Record.GetString("state")
|
||||
|
||||
// Only alert when transitioning from PASSED to FAILED
|
||||
if oldState != "PASSED" || newState != "FAILED" {
|
||||
if !shouldSendSmartDeviceAlert(oldState, newState) {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
@@ -32,14 +32,15 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
systemName := systemRecord.GetString("name")
|
||||
deviceName := e.Record.GetString("name")
|
||||
model := e.Record.GetString("model")
|
||||
statusLabel := smartStateLabel(newState)
|
||||
|
||||
// Build alert message
|
||||
title := fmt.Sprintf("SMART failure on %s: %s \U0001F534", systemName, deviceName)
|
||||
title := fmt.Sprintf("SMART %s on %s: %s %s", statusLabel, systemName, deviceName, smartStateEmoji(newState))
|
||||
var message string
|
||||
if model != "" {
|
||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to FAILED", deviceName, model)
|
||||
message = fmt.Sprintf("Disk %s (%s) SMART status changed to %s", deviceName, model, newState)
|
||||
} else {
|
||||
message = fmt.Sprintf("Disk %s SMART status changed to FAILED", deviceName)
|
||||
message = fmt.Sprintf("Disk %s SMART status changed to %s", deviceName, newState)
|
||||
}
|
||||
|
||||
// Get users associated with the system
|
||||
@@ -65,3 +66,42 @@ func (am *AlertManager) handleSmartDeviceAlert(e *core.RecordEvent) error {
|
||||
return e.Next()
|
||||
}
|
||||
|
||||
func shouldSendSmartDeviceAlert(oldState, newState string) bool {
|
||||
oldSeverity := smartStateSeverity(oldState)
|
||||
newSeverity := smartStateSeverity(newState)
|
||||
|
||||
// Ignore unknown states and recoveries; only alert on worsening transitions
|
||||
// from known-good/degraded states into WARNING/FAILED.
|
||||
return oldSeverity >= 1 && newSeverity > oldSeverity
|
||||
}
|
||||
|
||||
func smartStateSeverity(state string) int {
|
||||
switch state {
|
||||
case "PASSED":
|
||||
return 1
|
||||
case "WARNING":
|
||||
return 2
|
||||
case "FAILED":
|
||||
return 3
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func smartStateEmoji(state string) string {
|
||||
switch state {
|
||||
case "WARNING":
|
||||
return "\U0001F7E0"
|
||||
default:
|
||||
return "\U0001F534"
|
||||
}
|
||||
}
|
||||
|
||||
func smartStateLabel(state string) string {
|
||||
switch state {
|
||||
case "FAILED":
|
||||
return "failure"
|
||||
default:
|
||||
return strings.ToLower(state)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -58,6 +58,74 @@ func TestSmartDeviceAlert(t *testing.T) {
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertPassedToWarning(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/mmcblk0",
|
||||
"model": "eMMC",
|
||||
"state": "PASSED",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice.Set("state", "WARNING")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed to WARNING")
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART warning on test-system")
|
||||
assert.Contains(t, lastMessage.Text, "WARNING")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertWarningToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
|
||||
system, err := beszelTests.CreateRecord(hub, "systems", map[string]any{
|
||||
"name": "test-system",
|
||||
"users": []string{user.Id},
|
||||
"host": "127.0.0.1",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err := beszelTests.CreateRecord(hub, "smart_devices", map[string]any{
|
||||
"system": system.Id,
|
||||
"name": "/dev/mmcblk0",
|
||||
"model": "eMMC",
|
||||
"state": "WARNING",
|
||||
})
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
assert.EqualValues(t, 1, hub.TestMailer.TotalSend(), "should have 1 email sent after state changed from WARNING to FAILED")
|
||||
lastMessage := hub.TestMailer.LastMessage()
|
||||
assert.Contains(t, lastMessage.Subject, "SMART failure on test-system")
|
||||
assert.Contains(t, lastMessage.Text, "FAILED")
|
||||
}
|
||||
|
||||
func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||
hub, user := beszelTests.GetHubWithUser(t)
|
||||
defer hub.Cleanup()
|
||||
@@ -83,7 +151,8 @@ func TestSmartDeviceAlertNoAlertOnNonPassedToFailed(t *testing.T) {
|
||||
smartDevice, err = hub.FindRecordById("smart_devices", smartDevice.Id)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert
|
||||
// Update the state from UNKNOWN to FAILED - should NOT trigger alert.
|
||||
// We only alert from known healthy/degraded states.
|
||||
smartDevice.Set("state", "FAILED")
|
||||
err = hub.Save(smartDevice)
|
||||
assert.NoError(t, err)
|
||||
|
||||
@@ -23,6 +23,9 @@ COPY --from=builder /agent /agent
|
||||
# this is so we don't need to create the /tmp directory in the scratch container
|
||||
COPY --from=builder /tmp /tmp
|
||||
|
||||
# AMD GPU name lookup (used by agent on Linux when /usr/share/libdrm/amdgpu.ids is read)
|
||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
||||
|
||||
# Ensure data persistence across container recreations
|
||||
VOLUME ["/var/lib/beszel-agent"]
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ RUN rm -rf /tmp/*
|
||||
FROM alpine:3.23
|
||||
COPY --from=builder /agent /agent
|
||||
|
||||
# AMD GPU name lookup (used by agent on Linux when /usr/share/libdrm/amdgpu.ids is read)
|
||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
||||
|
||||
RUN apk add --no-cache smartmontools
|
||||
|
||||
# Ensure data persistence across container recreations
|
||||
|
||||
@@ -37,6 +37,9 @@ RUN apt-get update && apt-get install -y \
|
||||
FROM nvidia/cuda:12.2.2-base-ubuntu22.04
|
||||
COPY --from=builder /agent /agent
|
||||
|
||||
# AMD GPU name lookup (used by agent on hybrid laptops when /usr/share/libdrm/amdgpu.ids is read)
|
||||
COPY --from=builder /app/agent/test-data/amdgpu.ids /usr/share/libdrm/amdgpu.ids
|
||||
|
||||
# Copy smartmontools binaries and config files
|
||||
COPY --from=smartmontools-builder /usr/sbin/smartctl /usr/sbin/smartctl
|
||||
|
||||
|
||||
@@ -54,36 +54,34 @@ export default function ContainersTable({ systemId }: { systemId?: string }) {
|
||||
fields: "id,name,image,cpu,memory,net,health,status,system,updated",
|
||||
filter: systemId ? pb.filter("system={:system}", { system: systemId }) : undefined,
|
||||
})
|
||||
.then(
|
||||
({ items }) => {
|
||||
if (items.length === 0) {
|
||||
setData((curItems) => {
|
||||
if (systemId) {
|
||||
return curItems?.filter((item) => item.system !== systemId) ?? []
|
||||
}
|
||||
return []
|
||||
})
|
||||
return
|
||||
}
|
||||
.then(({ items }) => {
|
||||
if (items.length === 0) {
|
||||
setData((curItems) => {
|
||||
const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0)
|
||||
const containerIds = new Set()
|
||||
const newItems = []
|
||||
for (const item of items) {
|
||||
if (Math.abs(lastUpdated - item.updated) < 70_000) {
|
||||
containerIds.add(item.id)
|
||||
newItems.push(item)
|
||||
}
|
||||
if (systemId) {
|
||||
return curItems?.filter((item) => item.system !== systemId) ?? []
|
||||
}
|
||||
for (const item of curItems ?? []) {
|
||||
if (!containerIds.has(item.id) && lastUpdated - item.updated < 70_000) {
|
||||
newItems.push(item)
|
||||
}
|
||||
}
|
||||
return newItems
|
||||
return []
|
||||
})
|
||||
return
|
||||
}
|
||||
)
|
||||
setData((curItems) => {
|
||||
const lastUpdated = Math.max(items[0].updated, items.at(-1)?.updated ?? 0)
|
||||
const containerIds = new Set()
|
||||
const newItems = []
|
||||
for (const item of items) {
|
||||
if (Math.abs(lastUpdated - item.updated) < 70_000) {
|
||||
containerIds.add(item.id)
|
||||
newItems.push(item)
|
||||
}
|
||||
}
|
||||
for (const item of curItems ?? []) {
|
||||
if (!containerIds.has(item.id) && lastUpdated - item.updated < 70_000) {
|
||||
newItems.push(item)
|
||||
}
|
||||
}
|
||||
return newItems
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// initial load
|
||||
@@ -285,7 +283,7 @@ async function getInfoHtml(container: ContainerRecord): Promise<string> {
|
||||
])
|
||||
try {
|
||||
info = JSON.stringify(JSON.parse(info), null, 2)
|
||||
} catch (_) { }
|
||||
} catch (_) {}
|
||||
return info ? highlighter.codeToHtml(info, { lang: "json", theme: syntaxTheme }) : t`No results.`
|
||||
} catch (error) {
|
||||
console.error(error)
|
||||
@@ -342,12 +340,12 @@ function ContainerSheet({
|
||||
setLogsDisplay("")
|
||||
setInfoDisplay("")
|
||||
if (!container) return
|
||||
; (async () => {
|
||||
const [logsHtml, infoHtml] = await Promise.all([getLogsHtml(container), getInfoHtml(container)])
|
||||
setLogsDisplay(logsHtml)
|
||||
setInfoDisplay(infoHtml)
|
||||
setTimeout(scrollLogsToBottom, 20)
|
||||
})()
|
||||
;(async () => {
|
||||
const [logsHtml, infoHtml] = await Promise.all([getLogsHtml(container), getInfoHtml(container)])
|
||||
setLogsDisplay(logsHtml)
|
||||
setInfoDisplay(infoHtml)
|
||||
setTimeout(scrollLogsToBottom, 20)
|
||||
})()
|
||||
}, [container])
|
||||
|
||||
return (
|
||||
@@ -473,7 +471,7 @@ const ContainerTableRow = memo(function ContainerTableRow({
|
||||
{row.getVisibleCells().map((cell) => (
|
||||
<TableCell
|
||||
key={cell.id}
|
||||
className="py-0"
|
||||
className="py-0 ps-4.5"
|
||||
style={{
|
||||
height: virtualRow.size,
|
||||
}}
|
||||
|
||||
@@ -19,7 +19,7 @@ import { FreeBsdIcon, TuxIcon, WebSocketIcon, WindowsIcon } from "@/components/u
|
||||
import { Separator } from "@/components/ui/separator"
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
|
||||
import { ConnectionType, connectionTypeLabels, Os, SystemStatus } from "@/lib/enums"
|
||||
import { cn, formatBytes, getHostDisplayValue, secondsToString, toFixedFloat } from "@/lib/utils"
|
||||
import { cn, formatBytes, getHostDisplayValue, secondsToUptimeString, toFixedFloat } from "@/lib/utils"
|
||||
import type { ChartData, SystemDetailsRecord, SystemRecord } from "@/types"
|
||||
|
||||
export default function InfoBar({
|
||||
@@ -77,14 +77,6 @@ export default function InfoBar({
|
||||
},
|
||||
}
|
||||
|
||||
let uptime: string
|
||||
if (system.info.u < 3600) {
|
||||
uptime = secondsToString(system.info.u, "minute")
|
||||
} else if (system.info.u < 360000) {
|
||||
uptime = secondsToString(system.info.u, "hour")
|
||||
} else {
|
||||
uptime = secondsToString(system.info.u, "day")
|
||||
}
|
||||
const info = [
|
||||
{ value: getHostDisplayValue(system), Icon: GlobeIcon },
|
||||
{
|
||||
@@ -94,7 +86,7 @@ export default function InfoBar({
|
||||
// hide if hostname is same as host or name
|
||||
hide: hostname === system.host || hostname === system.name,
|
||||
},
|
||||
{ value: uptime, Icon: ClockArrowUp, label: t`Uptime`, hide: !system.info.u },
|
||||
{ value: secondsToUptimeString(system.info.u), Icon: ClockArrowUp, label: t`Uptime`, hide: !system.info.u },
|
||||
osInfo[os],
|
||||
{
|
||||
value: cpuModel,
|
||||
|
||||
@@ -174,8 +174,8 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
<HeaderButton column={column} name={t({ message: "Power On", comment: "Power On Time" })} Icon={Clock} />
|
||||
),
|
||||
cell: ({ getValue }) => {
|
||||
const hours = (getValue() ?? 0) as number
|
||||
if (!hours && hours !== 0) {
|
||||
const hours = getValue() as number | undefined
|
||||
if (hours == null) {
|
||||
return <div className="text-sm text-muted-foreground ms-1.5">N/A</div>
|
||||
}
|
||||
const seconds = hours * 3600
|
||||
@@ -195,7 +195,7 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
),
|
||||
cell: ({ getValue }) => {
|
||||
const cycles = getValue() as number | undefined
|
||||
if (!cycles && cycles !== 0) {
|
||||
if (cycles == null) {
|
||||
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
||||
}
|
||||
return <span className="ms-1.5">{cycles.toLocaleString()}</span>
|
||||
@@ -206,7 +206,11 @@ export const columns: ColumnDef<SmartDeviceRecord>[] = [
|
||||
invertSorting: true,
|
||||
header: ({ column }) => <HeaderButton column={column} name={t`Temp`} Icon={ThermometerIcon} />,
|
||||
cell: ({ getValue }) => {
|
||||
const { value, unit } = formatTemperature(getValue() as number)
|
||||
const temp = getValue() as number | null | undefined
|
||||
if (!temp) {
|
||||
return <div className="text-muted-foreground ms-1.5">N/A</div>
|
||||
}
|
||||
const { value, unit } = formatTemperature(temp)
|
||||
return <span className="ms-1.5">{`${value} ${unit}`}</span>
|
||||
},
|
||||
},
|
||||
@@ -304,41 +308,41 @@ export default function DisksTable({ systemId }: { systemId?: string }) {
|
||||
? { fields: SMART_DEVICE_FIELDS, filter: pb.filter("system = {:system}", { system: systemId }) }
|
||||
: { fields: SMART_DEVICE_FIELDS }
|
||||
|
||||
; (async () => {
|
||||
try {
|
||||
unsubscribe = await pb.collection("smart_devices").subscribe(
|
||||
"*",
|
||||
(event) => {
|
||||
const record = event.record as SmartDeviceRecord
|
||||
setSmartDevices((currentDevices) => {
|
||||
const devices = currentDevices ?? []
|
||||
const matchesSystemScope = !systemId || record.system === systemId
|
||||
;(async () => {
|
||||
try {
|
||||
unsubscribe = await pb.collection("smart_devices").subscribe(
|
||||
"*",
|
||||
(event) => {
|
||||
const record = event.record as SmartDeviceRecord
|
||||
setSmartDevices((currentDevices) => {
|
||||
const devices = currentDevices ?? []
|
||||
const matchesSystemScope = !systemId || record.system === systemId
|
||||
|
||||
if (event.action === "delete") {
|
||||
return devices.filter((device) => device.id !== record.id)
|
||||
}
|
||||
if (event.action === "delete") {
|
||||
return devices.filter((device) => device.id !== record.id)
|
||||
}
|
||||
|
||||
if (!matchesSystemScope) {
|
||||
// Record moved out of scope; ensure it disappears locally.
|
||||
return devices.filter((device) => device.id !== record.id)
|
||||
}
|
||||
if (!matchesSystemScope) {
|
||||
// Record moved out of scope; ensure it disappears locally.
|
||||
return devices.filter((device) => device.id !== record.id)
|
||||
}
|
||||
|
||||
const existingIndex = devices.findIndex((device) => device.id === record.id)
|
||||
if (existingIndex === -1) {
|
||||
return [record, ...devices]
|
||||
}
|
||||
const existingIndex = devices.findIndex((device) => device.id === record.id)
|
||||
if (existingIndex === -1) {
|
||||
return [record, ...devices]
|
||||
}
|
||||
|
||||
const next = [...devices]
|
||||
next[existingIndex] = record
|
||||
return next
|
||||
})
|
||||
},
|
||||
pbOptions
|
||||
)
|
||||
} catch (error) {
|
||||
console.error("Failed to subscribe to SMART device updates:", error)
|
||||
}
|
||||
})()
|
||||
const next = [...devices]
|
||||
next[existingIndex] = record
|
||||
return next
|
||||
})
|
||||
},
|
||||
pbOptions
|
||||
)
|
||||
} catch (error) {
|
||||
console.error("Failed to subscribe to SMART device updates:", error)
|
||||
}
|
||||
})()
|
||||
|
||||
return () => {
|
||||
unsubscribe?.()
|
||||
|
||||
@@ -35,7 +35,7 @@ import {
|
||||
formatTemperature,
|
||||
getMeterState,
|
||||
parseSemVer,
|
||||
secondsToString,
|
||||
secondsToUptimeString,
|
||||
} from "@/lib/utils"
|
||||
import { batteryStateTranslations } from "@/lib/i18n"
|
||||
import type { SystemRecord } from "@/types"
|
||||
@@ -154,11 +154,7 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
|
||||
{name}
|
||||
</Link>
|
||||
</span>
|
||||
<Link
|
||||
href={linkUrl}
|
||||
className="inset-0 absolute size-full"
|
||||
aria-label={name}
|
||||
></Link>
|
||||
<Link href={linkUrl} className="inset-0 absolute size-full" aria-label={name}></Link>
|
||||
</>
|
||||
)
|
||||
},
|
||||
@@ -382,20 +378,13 @@ export function SystemsTableColumns(viewMode: "table" | "grid"): ColumnDef<Syste
|
||||
size: 50,
|
||||
Icon: ClockArrowUp,
|
||||
header: sortableHeader,
|
||||
hideSort: true,
|
||||
cell(info) {
|
||||
const uptime = info.getValue() as number
|
||||
if (!uptime) {
|
||||
return null
|
||||
}
|
||||
let formatted: string
|
||||
if (uptime < 3600) {
|
||||
formatted = secondsToString(uptime, "minute")
|
||||
} else if (uptime < 360000) {
|
||||
formatted = secondsToString(uptime, "hour")
|
||||
} else {
|
||||
formatted = secondsToString(uptime, "day")
|
||||
}
|
||||
return <span className="tabular-nums whitespace-nowrap">{formatted}</span>
|
||||
return <span className="tabular-nums whitespace-nowrap">{secondsToUptimeString(uptime)}</span>
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -479,9 +468,9 @@ function TableCellWithMeter(info: CellContext<SystemRecord, unknown>) {
|
||||
const meterClass = cn(
|
||||
"h-full",
|
||||
(info.row.original.status !== SystemStatus.Up && STATUS_COLORS.paused) ||
|
||||
(threshold === MeterState.Good && STATUS_COLORS.up) ||
|
||||
(threshold === MeterState.Warn && STATUS_COLORS.pending) ||
|
||||
STATUS_COLORS.down
|
||||
(threshold === MeterState.Good && STATUS_COLORS.up) ||
|
||||
(threshold === MeterState.Warn && STATUS_COLORS.pending) ||
|
||||
STATUS_COLORS.down
|
||||
)
|
||||
return (
|
||||
<div className="flex gap-2 items-center tabular-nums tracking-tight w-full">
|
||||
@@ -593,7 +582,7 @@ export function IndicatorDot({ system, className }: { system: SystemRecord; clas
|
||||
return (
|
||||
<span
|
||||
className={cn("shrink-0 size-2 rounded-full", className)}
|
||||
// style={{ marginBottom: "-1px" }}
|
||||
// style={{ marginBottom: "-1px" }}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -434,7 +434,7 @@ const SystemTableRow = memo(
|
||||
width: cell.column.getSize(),
|
||||
height: virtualRow.size,
|
||||
}}
|
||||
className="py-0"
|
||||
className="py-0 ps-4.5"
|
||||
>
|
||||
{flexRender(cell.column.columnDef.cell, cell.getContext())}
|
||||
</TableCell>
|
||||
|
||||
@@ -465,4 +465,15 @@ export function secondsToString(seconds: number, unit: "hour" | "minute" | "day"
|
||||
case "day":
|
||||
return plural(count, { one: `${countString} day`, other: `${countString} days` })
|
||||
}
|
||||
}
|
||||
|
||||
/** Format seconds to uptime string - "X minutes", "X hours", "X days" */
|
||||
export function secondsToUptimeString(seconds: number): string {
|
||||
if (seconds < 3600) {
|
||||
return secondsToString(seconds, "minute")
|
||||
} else if (seconds < 360000) {
|
||||
return secondsToString(seconds, "hour")
|
||||
} else {
|
||||
return secondsToString(seconds, "day")
|
||||
}
|
||||
}
|
||||
@@ -51,7 +51,7 @@ The [quick start guide](https://beszel.dev/guide/getting-started) and other docu
|
||||
- **GPU usage / power draw** - Nvidia, AMD, and Intel.
|
||||
- **Battery** - Host system battery charge.
|
||||
- **Containers** - Status and metrics of all running Docker / Podman containers.
|
||||
- **S.M.A.R.T.** - Host system disk health.
|
||||
- **S.M.A.R.T.** - Host system disk health (includes eMMC wear/EOL via Linux sysfs when available).
|
||||
|
||||
## Help and discussion
|
||||
|
||||
|
||||
Reference in New Issue
Block a user