From 783ed9f456d77014955f636e4d79e9dd3b80a76e Mon Sep 17 00:00:00 2001 From: henrygd Date: Tue, 28 Oct 2025 14:01:45 -0400 Subject: [PATCH] cache smartctl scan results for 10 min w/ force option also add support for sntrealtek --- agent/handlers.go | 2 +- agent/smart.go | 31 +++++++++++++++++++++---------- agent/smart_test.go | 39 +++++++++++++++------------------------ 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/agent/handlers.go b/agent/handlers.go index c7040056..341dfdf7 100644 --- a/agent/handlers.go +++ b/agent/handlers.go @@ -168,7 +168,7 @@ func (h *GetSmartDataHandler) Handle(hctx *HandlerContext) error { // return empty map to indicate no data return hctx.SendResponse(map[string]smart.SmartData{}, hctx.RequestID) } - if err := hctx.Agent.smartManager.Refresh(); err != nil { + if err := hctx.Agent.smartManager.Refresh(false); err != nil { slog.Debug("smart refresh failed", "err", err) } data := hctx.Agent.smartManager.GetCurrentData() diff --git a/agent/smart.go b/agent/smart.go index e2e56175..c9bb82d6 100644 --- a/agent/smart.go +++ b/agent/smart.go @@ -22,6 +22,7 @@ type SmartManager struct { SmartDataMap map[string]*smart.SmartData SmartDevices []*DeviceInfo refreshMutex sync.Mutex + lastScanTime time.Time } type scanOutput struct { @@ -42,12 +43,12 @@ type DeviceInfo struct { var errNoValidSmartData = fmt.Errorf("no valid SMART data found") // Error for missing data -// Refresh updates SMART data for all known devices on demand. -func (sm *SmartManager) Refresh() error { +// Refresh updates SMART data for all known devices +func (sm *SmartManager) Refresh(forceScan bool) error { sm.refreshMutex.Lock() defer sm.refreshMutex.Unlock() - scanErr := sm.ScanDevices() + scanErr := sm.ScanDevices(false) if scanErr != nil { slog.Debug("smartctl scan failed", "err", scanErr) } @@ -129,7 +130,12 @@ func (sm *SmartManager) GetCurrentData() map[string]smart.SmartData { // Scan devices using `smartctl --scan -j` // If scan fails, return error // If scan succeeds, parse the output and update the SmartDevices slice -func (sm *SmartManager) ScanDevices() error { +func (sm *SmartManager) ScanDevices(force bool) error { + if !force && time.Since(sm.lastScanTime) < 10*time.Minute { + return nil + } + sm.lastScanTime = time.Now() + if configuredDevices, ok := GetEnv("SMART_DEVICES"); ok { config := strings.TrimSpace(configuredDevices) if config == "" { @@ -232,7 +238,7 @@ func (sm *SmartManager) parseSmartOutput(deviceInfo *DeviceInfo, output []byte) Parse func([]byte) (bool, int) Alias []string }{ - {Type: "nvme", Parse: sm.parseSmartForNvme, Alias: []string{"sntasmedia"}}, + {Type: "nvme", Parse: sm.parseSmartForNvme, Alias: []string{"sntasmedia", "sntrealtek"}}, {Type: "sat", Parse: sm.parseSmartForSata, Alias: []string{"ata"}}, {Type: "scsi", Parse: sm.parseSmartForScsi}, } @@ -368,11 +374,18 @@ func (sm *SmartManager) parseScan(output []byte) bool { sm.SmartDevices = append(sm.SmartDevices, deviceInfo) scannedDeviceNameMap[device.Name] = true } - // remove devices that are not in the scan - for key := range sm.SmartDataMap { - if _, ok := scannedDeviceNameMap[key]; !ok { + // remove cached entries whose device path no longer appears in the scan + for key, data := range sm.SmartDataMap { + if data == nil { delete(sm.SmartDataMap, key) +continue } + + if _, ok := scannedDeviceNameMap[data.DiskName]; ok { + continue + } + + delete(sm.SmartDataMap, key) } return true @@ -424,7 +437,6 @@ func (sm *SmartManager) parseSmartForSata(output []byte) (bool, int) { sm.Lock() defer sm.Unlock() - // get device name (e.g. /dev/sda) keyName := data.SerialNumber // if device does not exist in SmartDataMap, initialize it @@ -572,7 +584,6 @@ func (sm *SmartManager) parseSmartForNvme(output []byte) (bool, int) { sm.Lock() defer sm.Unlock() - // get device name (e.g. /dev/nvme0) keyName := data.SerialNumber // if device does not exist in SmartDataMap, initialize it diff --git a/agent/smart_test.go b/agent/smart_test.go index 48d8409e..bb0ab424 100644 --- a/agent/smart_test.go +++ b/agent/smart_test.go @@ -38,27 +38,14 @@ func TestParseSmartForScsi(t *testing.T) { t.Fatalf("expected smart data entry for serial 9YHSDH9B") } - if deviceData.ModelName != "YADRO WUH721414AL4204" { - t.Fatalf("unexpected model name: %s", deviceData.ModelName) - } - if deviceData.FirmwareVersion != "C240" { - t.Fatalf("unexpected firmware version: %s", deviceData.FirmwareVersion) - } - if deviceData.DiskName != "/dev/sde" { - t.Fatalf("unexpected disk name: %s", deviceData.DiskName) - } - if deviceData.DiskType != "scsi" { - t.Fatalf("unexpected disk type: %s", deviceData.DiskType) - } - if deviceData.Temperature != 34 { - t.Fatalf("unexpected temperature: %d", deviceData.Temperature) - } - if deviceData.SmartStatus != "PASSED" { - t.Fatalf("unexpected SMART status: %s", deviceData.SmartStatus) - } - if deviceData.Capacity != 14000519643136 { - t.Fatalf("unexpected capacity: %d", deviceData.Capacity) - } + assert.Equal(t, deviceData.ModelName, "YADRO WUH721414AL4204") + assert.Equal(t, deviceData.SerialNumber, "9YHSDH9B") + assert.Equal(t, deviceData.FirmwareVersion, "C240") + assert.Equal(t, deviceData.DiskName, "/dev/sde") + assert.Equal(t, deviceData.DiskType, "scsi") + assert.EqualValues(t, deviceData.Temperature, 34) + assert.Equal(t, deviceData.SmartStatus, "PASSED") + assert.EqualValues(t, deviceData.Capacity, 14000519643136) if len(deviceData.Attributes) == 0 { t.Fatalf("expected attributes to be populated") @@ -316,7 +303,8 @@ func TestResolveRefreshError(t *testing.T) { func TestParseScan(t *testing.T) { sm := &SmartManager{ SmartDataMap: map[string]*smart.SmartData{ - "/dev/sdb": {}, + "serial-active": {DiskName: "/dev/sda"}, + "serial-stale": {DiskName: "/dev/sdb"}, }, } @@ -336,8 +324,11 @@ func TestParseScan(t *testing.T) { assert.Equal(t, "/dev/nvme0", sm.SmartDevices[1].Name) assert.Equal(t, "nvme", sm.SmartDevices[1].Type) - _, exists := sm.SmartDataMap["/dev/sdb"] - assert.False(t, exists, "stale smart data entry should be removed") + _, activeExists := sm.SmartDataMap["serial-active"] + assert.True(t, activeExists, "active smart data should be preserved when device path remains") + + _, staleExists := sm.SmartDataMap["serial-stale"] + assert.False(t, staleExists, "stale smart data entry should be removed when device path disappears") } func assertAttrValue(t *testing.T, attributes []*smart.SmartAttribute, name string, expected uint64) {