Compare commits

..

3 Commits

Author SHA1 Message Date
Sven van Ginkel
de1ad26fc2 Add extra disks to system table (#1365) 2025-11-12 14:17:07 -05:00
henrygd
a7ef1235f4 specify latest tag for non-alpine agent image
also change capitalization for gpu alert
2025-11-11 16:18:54 -05:00
henrygd
f64a361c60 add EXCLUDE_SMART env var (#1392) 2025-11-11 16:05:00 -05:00
8 changed files with 307 additions and 39 deletions

View File

@@ -33,6 +33,7 @@ jobs:
password_secret: DOCKERHUB_TOKEN
tags: |
type=raw,value=edge
type=raw,value=latest
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}
@@ -99,6 +100,7 @@ jobs:
password_secret: GITHUB_TOKEN
tags: |
type=raw,value=edge
type=raw,value=latest
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=semver,pattern={{major}}

View File

@@ -166,7 +166,7 @@ func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
}
data.Stats.ExtraFs = make(map[string]*system.FsStats)
data.Info.ExtraFsPct = make(map[string]float64)
data.Info.ExtraFsPct = make(map[string]system.ExtraFsInfo)
for name, stats := range a.fsStats {
if !stats.Root && stats.DiskTotal > 0 {
// Use custom name if available, otherwise use device name
@@ -177,8 +177,8 @@ func (a *Agent) gatherStats(cacheTimeMs uint16) *system.CombinedData {
data.Stats.ExtraFs[key] = stats
// Add percentage info to Info struct for dashboard
if stats.DiskTotal > 0 {
pct := twoDecimals((stats.DiskUsed / stats.DiskTotal) * 100)
data.Info.ExtraFsPct[key] = pct
pct := (stats.DiskUsed / stats.DiskTotal) * 100
data.Info.ExtraFsPct[key] = system.ExtraFsInfo{DiskPct: pct}
}
}
}

View File

@@ -24,11 +24,12 @@ import (
// SmartManager manages data collection for SMART devices
type SmartManager struct {
sync.Mutex
SmartDataMap map[string]*smart.SmartData
SmartDevices []*DeviceInfo
refreshMutex sync.Mutex
lastScanTime time.Time
binPath string
SmartDataMap map[string]*smart.SmartData
SmartDevices []*DeviceInfo
refreshMutex sync.Mutex
lastScanTime time.Time
binPath string
excludedDevices map[string]struct{}
}
type scanOutput struct {
@@ -185,6 +186,7 @@ func (sm *SmartManager) ScanDevices(force bool) error {
}
finalDevices := mergeDeviceLists(currentDevices, scannedDevices, configuredDevices)
finalDevices = sm.filterExcludedDevices(finalDevices)
sm.updateSmartDevices(finalDevices)
if len(finalDevices) == 0 {
@@ -232,6 +234,47 @@ func (sm *SmartManager) parseConfiguredDevices(config string) ([]*DeviceInfo, er
return devices, nil
}
func (sm *SmartManager) refreshExcludedDevices() {
rawValue, _ := GetEnv("EXCLUDE_SMART")
sm.excludedDevices = make(map[string]struct{})
for entry := range strings.SplitSeq(rawValue, ",") {
device := strings.TrimSpace(entry)
if device == "" {
continue
}
sm.excludedDevices[device] = struct{}{}
}
}
func (sm *SmartManager) isExcludedDevice(deviceName string) bool {
_, exists := sm.excludedDevices[deviceName]
return exists
}
func (sm *SmartManager) filterExcludedDevices(devices []*DeviceInfo) []*DeviceInfo {
if devices == nil {
return []*DeviceInfo{}
}
excluded := sm.excludedDevices
if len(excluded) == 0 {
return devices
}
filtered := make([]*DeviceInfo, 0, len(devices))
for _, device := range devices {
if device == nil || device.Name == "" {
continue
}
if _, skip := excluded[device.Name]; skip {
continue
}
filtered = append(filtered, device)
}
return filtered
}
// detectSmartOutputType inspects sections that are unique to each smartctl
// JSON schema (NVMe, ATA/SATA, SCSI) to determine which parser should be used
// when the reported device type is ambiguous or missing.
@@ -378,6 +421,10 @@ func (sm *SmartManager) parseSmartOutput(deviceInfo *DeviceInfo, output []byte)
// Uses -n standby to avoid waking up sleeping disks, but bypasses standby mode
// for initial data collection when no cached data exists
func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
if deviceInfo != nil && sm.isExcludedDevice(deviceInfo.Name) {
return errNoValidSmartData
}
// slog.Info("collecting SMART data", "device", deviceInfo.Name, "type", deviceInfo.Type, "has_existing_data", sm.hasDataForDevice(deviceInfo.Name))
// Check if we have any existing data for this device
@@ -409,10 +456,10 @@ func (sm *SmartManager) CollectSmart(deviceInfo *DeviceInfo) error {
if !hasValidData {
if err != nil {
slog.Debug("smartctl failed", "device", deviceInfo.Name, "err", err)
slog.Info("smartctl failed", "device", deviceInfo.Name, "err", err)
return err
}
slog.Debug("no valid SMART data found", "device", deviceInfo.Name)
slog.Info("no valid SMART data found", "device", deviceInfo.Name)
return errNoValidSmartData
}
@@ -915,6 +962,7 @@ func NewSmartManager() (*SmartManager, error) {
sm := &SmartManager{
SmartDataMap: make(map[string]*smart.SmartData),
}
sm.refreshExcludedDevices()
path, err := sm.detectSmartctl()
if err != nil {
slog.Debug(err.Error())

View File

@@ -588,3 +588,195 @@ func TestIsVirtualDeviceScsi(t *testing.T) {
})
}
}
func TestRefreshExcludedDevices(t *testing.T) {
tests := []struct {
name string
envValue string
expectedDevs map[string]struct{}
}{
{
name: "empty env",
envValue: "",
expectedDevs: map[string]struct{}{},
},
{
name: "single device",
envValue: "/dev/sda",
expectedDevs: map[string]struct{}{
"/dev/sda": {},
},
},
{
name: "multiple devices",
envValue: "/dev/sda,/dev/sdb,/dev/nvme0",
expectedDevs: map[string]struct{}{
"/dev/sda": {},
"/dev/sdb": {},
"/dev/nvme0": {},
},
},
{
name: "devices with whitespace",
envValue: " /dev/sda , /dev/sdb , /dev/nvme0 ",
expectedDevs: map[string]struct{}{
"/dev/sda": {},
"/dev/sdb": {},
"/dev/nvme0": {},
},
},
{
name: "duplicate devices",
envValue: "/dev/sda,/dev/sdb,/dev/sda",
expectedDevs: map[string]struct{}{
"/dev/sda": {},
"/dev/sdb": {},
},
},
{
name: "empty entries and whitespace",
envValue: "/dev/sda,, /dev/sdb , , ",
expectedDevs: map[string]struct{}{
"/dev/sda": {},
"/dev/sdb": {},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if tt.envValue != "" {
t.Setenv("EXCLUDE_SMART", tt.envValue)
} else {
// Ensure env var is not set for empty test
os.Unsetenv("EXCLUDE_SMART")
}
sm := &SmartManager{}
sm.refreshExcludedDevices()
assert.Equal(t, tt.expectedDevs, sm.excludedDevices)
})
}
}
func TestIsExcludedDevice(t *testing.T) {
sm := &SmartManager{
excludedDevices: map[string]struct{}{
"/dev/sda": {},
"/dev/nvme0": {},
},
}
tests := []struct {
name string
deviceName string
expectedBool bool
}{
{"excluded device sda", "/dev/sda", true},
{"excluded device nvme0", "/dev/nvme0", true},
{"non-excluded device sdb", "/dev/sdb", false},
{"non-excluded device nvme1", "/dev/nvme1", false},
{"empty device name", "", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := sm.isExcludedDevice(tt.deviceName)
assert.Equal(t, tt.expectedBool, result)
})
}
}
func TestFilterExcludedDevices(t *testing.T) {
tests := []struct {
name string
excludedDevs map[string]struct{}
inputDevices []*DeviceInfo
expectedDevs []*DeviceInfo
expectedLength int
}{
{
name: "no exclusions",
excludedDevs: map[string]struct{}{},
inputDevices: []*DeviceInfo{
{Name: "/dev/sda"},
{Name: "/dev/sdb"},
{Name: "/dev/nvme0"},
},
expectedDevs: []*DeviceInfo{
{Name: "/dev/sda"},
{Name: "/dev/sdb"},
{Name: "/dev/nvme0"},
},
expectedLength: 3,
},
{
name: "some devices excluded",
excludedDevs: map[string]struct{}{
"/dev/sda": {},
"/dev/nvme0": {},
},
inputDevices: []*DeviceInfo{
{Name: "/dev/sda"},
{Name: "/dev/sdb"},
{Name: "/dev/nvme0"},
{Name: "/dev/nvme1"},
},
expectedDevs: []*DeviceInfo{
{Name: "/dev/sdb"},
{Name: "/dev/nvme1"},
},
expectedLength: 2,
},
{
name: "all devices excluded",
excludedDevs: map[string]struct{}{
"/dev/sda": {},
"/dev/sdb": {},
},
inputDevices: []*DeviceInfo{
{Name: "/dev/sda"},
{Name: "/dev/sdb"},
},
expectedDevs: []*DeviceInfo{},
expectedLength: 0,
},
{
name: "nil devices",
excludedDevs: map[string]struct{}{},
inputDevices: nil,
expectedDevs: []*DeviceInfo{},
expectedLength: 0,
},
{
name: "filter nil and empty name devices",
excludedDevs: map[string]struct{}{
"/dev/sda": {},
},
inputDevices: []*DeviceInfo{
{Name: "/dev/sda"},
nil,
{Name: ""},
{Name: "/dev/sdb"},
},
expectedDevs: []*DeviceInfo{
{Name: "/dev/sdb"},
},
expectedLength: 1,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
sm := &SmartManager{
excludedDevices: tt.excludedDevs,
}
result := sm.filterExcludedDevices(tt.inputDevices)
assert.Len(t, result, tt.expectedLength)
assert.Equal(t, tt.expectedDevs, result)
})
}
}

View File

@@ -281,9 +281,9 @@ func (am *AlertManager) sendSystemAlert(alert SystemAlertData) {
alert.name = after + "m Load"
}
// make title alert name lowercase if not CPU
// make title alert name lowercase if not CPU or GPU
titleAlertName := alert.name
if titleAlertName != "CPU" {
if titleAlertName != "CPU" && titleAlertName != "GPU" {
titleAlertName = strings.ToLower(titleAlertName)
}

View File

@@ -99,6 +99,10 @@ type FsStats struct {
MaxDiskWriteBytes uint64 `json:"wbm,omitempty" cbor:"-"`
}
// ExtraFsInfo contains summary info for extra filesystems in the system info
type ExtraFsInfo struct {
DiskPct float64 `json:"dp" cbor:"0,keyasint"`
}
type NetIoStats struct {
BytesRecv uint64
@@ -147,7 +151,7 @@ type Info struct {
// TODO: remove load fields in future release in favor of load avg array
LoadAvg [3]float64 `json:"la,omitempty" cbor:"19,keyasint"`
ConnectionType ConnectionType `json:"ct,omitempty" cbor:"20,keyasint,omitempty,omitzero"`
ExtraFsPct map[string]float64 `json:"efs,omitempty" cbor:"21,keyasint,omitempty"`
ExtraFsPct map[string]ExtraFsInfo `json:"efsp,omitempty" cbor:"21,keyasint,omitempty"`
}
// Final data structure to return to the hub

View File

@@ -358,66 +358,84 @@ function TableCellWithMeter(info: CellContext<SystemRecord, unknown>) {
function DiskCellWithMultiple(info: CellContext<SystemRecord, unknown>) {
const { info: sysInfo, status } = info.row.original
const rootDiskPct = sysInfo.dp
const extraFsData = sysInfo.efs
const extraFsData = sysInfo.efsp
const extraFsCount = extraFsData ? Object.keys(extraFsData).length : 0
function getMeterClass(pct: number) {
const threshold = getMeterState(pct)
return cn(
"h-full",
(status !== SystemStatus.Up && STATUS_COLORS.paused) ||
(threshold === MeterState.Good && STATUS_COLORS.up) ||
(threshold === MeterState.Warn && STATUS_COLORS.pending) ||
STATUS_COLORS.down
)
}
const threshold = getMeterState(rootDiskPct)
const meterClass = cn(
"h-full",
(status !== SystemStatus.Up && STATUS_COLORS.paused) ||
(threshold === MeterState.Good && STATUS_COLORS.up) ||
(threshold === MeterState.Warn && STATUS_COLORS.pending) ||
STATUS_COLORS.down
)
// No extra disks - show simple meter
if (extraFsCount === 0) {
return TableCellWithMeter(info)
return (
<div className="flex gap-2 items-center tabular-nums tracking-tight w-full">
<span className="min-w-8 shrink-0">{decimalString(rootDiskPct, rootDiskPct >= 10 ? 1 : 2)}%</span>
<span className="flex-1 min-w-8 grid bg-muted h-[1em] rounded-sm overflow-hidden">
<span className={meterClass} style={{ width: `${rootDiskPct}%` }}></span>
</span>
</div>
)
}
// Has extra disks - show with tooltip
return (
<Tooltip>
<TooltipTrigger asChild>
<Link href={getPagePath($router, "system", { id: info.row.original.id })} tabIndex={-1} className="flex flex-col gap-0.5 w-full relative z-10">
<div className="flex flex-col gap-0.5 w-full cursor-help relative z-10">
<div className="flex gap-2 items-center tabular-nums tracking-tight">
<span className="min-w-8 shrink-0">{decimalString(rootDiskPct, rootDiskPct >= 10 ? 1 : 2)}%</span>
<span className="flex-1 min-w-8 grid bg-muted h-[1em] rounded-sm overflow-hidden">
<span className={getMeterClass(rootDiskPct)} style={{ width: `${rootDiskPct}%` }}></span>
{extraFsData && Object.entries(extraFsData).slice(0, 2).map(([_name, pct], index) => (
<span key={index} className={getMeterClass(pct)} style={{ width: `${pct}%` }}></span>
))}
<span className={meterClass} style={{ width: `${rootDiskPct}%` }}></span>
</span>
</div>
</Link>
<div className="text-[0.7rem] text-muted-foreground ps-0.5">+{extraFsCount} more</div>
</div>
</TooltipTrigger>
<TooltipContent side="right" className="max-w-xs pb-2">
<TooltipContent side="right" className="max-w-xs">
<div className="flex flex-col gap-2 py-1">
<div className="flex items-center gap-2 text-xs font-medium">
<HardDriveIcon className="size-3" />
<span>{t`All Disks`}</span>
</div>
<div className="flex flex-col gap-1.5">
<div className="flex flex-col gap-0.5">
<div className="text-[0.65rem] text-muted-foreground uppercase tabular-nums">{t`Root`}</div>
<div className="text-[0.65rem] text-muted-foreground uppercase tracking-wider">{t`Root`}</div>
<div className="flex gap-2 items-center tabular-nums text-xs">
<span className="min-w-7">{decimalString(rootDiskPct, rootDiskPct >= 10 ? 1 : 2)}%</span>
<span className="flex-1 min-w-12 grid bg-muted/50 h-2 rounded-sm overflow-hidden">
<span className={getMeterClass(rootDiskPct)} style={{ width: `${rootDiskPct}%` }}></span>
<span className="flex-1 min-w-12 grid bg-muted/50 h-1.5 rounded-sm overflow-hidden">
<span className={meterClass} style={{ width: `${rootDiskPct}%` }}></span>
</span>
</div>
</div>
{extraFsData && Object.entries(extraFsData).map(([name, pct]) => {
{extraFsData && Object.entries(extraFsData).map(([name, fs]) => {
const pct = fs.dp
const fsThreshold = getMeterState(pct)
const fsMeterClass = cn(
"h-full",
(status !== SystemStatus.Up && STATUS_COLORS.paused) ||
(fsThreshold === MeterState.Good && STATUS_COLORS.up) ||
(fsThreshold === MeterState.Warn && STATUS_COLORS.pending) ||
STATUS_COLORS.down
)
return (
<div key={name} className="flex flex-col gap-0.5">
<div className="text-[0.65rem] text-muted-foreground uppercase tracking-wider truncate">{name}</div>
<div className="flex gap-2 items-center tabular-nums text-xs">
<span className="min-w-7">{decimalString(pct, pct >= 10 ? 1 : 2)}%</span>
<span className="flex-1 min-w-12 grid bg-muted/50 h-2 rounded-sm overflow-hidden">
<span className={getMeterClass(pct)} style={{ width: `${pct}%` }}></span>
<span className="flex-1 min-w-12 grid bg-muted/50 h-1.5 rounded-sm overflow-hidden">
<span className={fsMeterClass} style={{ width: `${pct}%` }}></span>
</span>
</div>
</div>
)
})}
</div>
</div>
</TooltipContent>
</Tooltip>
)

View File

@@ -78,9 +78,13 @@ export interface SystemInfo {
/** connection type */
ct?: ConnectionType
/** extra filesystem percentages */
efs?: Record<string, number>
efsp?: Record<string, ExtraFsInfo>
}
export interface ExtraFsInfo {
/** disk percent */
dp: number
}
export interface SystemStats {
/** cpu percent */