diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index 5e503afa4..5ce5b2b44 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -111,6 +111,10 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## without a password. # use_sudo = false + ## Adds an extra tag "device_type", which can be used to differentiate + ## multiple disks behind the same controller (e.g., MegaRAID). + # tag_with_device_type = false + ## Skip checking disks in this power mode. Defaults to ## "standby" to not wake up disks that have stopped rotating. ## See --nocheck in the man pages for smartctl. @@ -182,6 +186,7 @@ execute this script. - tags: - capacity - device + - device_type (only emitted if `tag_with_device_type` is set to `true`) - enabled - model - serial_no @@ -201,6 +206,7 @@ execute this script. - tags: - capacity - device + - device_type (only emitted if `tag_with_device_type` is set to `true`) - enabled - fail - flags diff --git a/plugins/inputs/smart/sample.conf b/plugins/inputs/smart/sample.conf index c8631836d..d141b8804 100644 --- a/plugins/inputs/smart/sample.conf +++ b/plugins/inputs/smart/sample.conf @@ -17,6 +17,10 @@ ## without a password. # use_sudo = false + ## Adds an extra tag "device_type", which can be used to differentiate + ## multiple disks behind the same controller (e.g., MegaRAID). + # tag_with_device_type = false + ## Skip checking disks in this power mode. Defaults to ## "standby" to not wake up disks that have stopped rotating. ## See --nocheck in the man pages for smartctl. diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index f12fb840c..55c9318c5 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -353,18 +353,19 @@ var ( // Smart plugin reads metrics from storage devices supporting S.M.A.R.T. type Smart struct { - Path string `toml:"path" deprecated:"1.16.0;use 'path_smartctl' instead"` - PathSmartctl string `toml:"path_smartctl"` - PathNVMe string `toml:"path_nvme"` - Nocheck string `toml:"nocheck"` - EnableExtensions []string `toml:"enable_extensions"` - Attributes bool `toml:"attributes"` - Excludes []string `toml:"excludes"` - Devices []string `toml:"devices"` - UseSudo bool `toml:"use_sudo"` - Timeout config.Duration `toml:"timeout"` - ReadMethod string `toml:"read_method"` - Log telegraf.Logger `toml:"-"` + Path string `toml:"path" deprecated:"1.16.0;use 'path_smartctl' instead"` + PathSmartctl string `toml:"path_smartctl"` + PathNVMe string `toml:"path_nvme"` + Nocheck string `toml:"nocheck"` + EnableExtensions []string `toml:"enable_extensions"` + Attributes bool `toml:"attributes"` + Excludes []string `toml:"excludes"` + Devices []string `toml:"devices"` + UseSudo bool `toml:"use_sudo"` + TagWithDeviceType bool `toml:"tag_with_device_type"` + Timeout config.Duration `toml:"timeout"` + ReadMethod string `toml:"read_method"` + Log telegraf.Logger `toml:"-"` } type nvmeDevice struct { @@ -741,8 +742,16 @@ func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.Wai } deviceTags := map[string]string{} - deviceNode := strings.Split(device, " ")[0] - deviceTags["device"] = path.Base(deviceNode) + if m.TagWithDeviceType { + deviceNode := strings.SplitN(device, " ", 2) + deviceTags["device"] = path.Base(deviceNode[0]) + if len(deviceNode) == 2 && deviceNode[1] != "" { + deviceTags["device_type"] = strings.TrimPrefix(deviceNode[1], "-d ") + } + } else { + deviceNode := strings.Split(device, " ")[0] + deviceTags["device"] = path.Base(deviceNode) + } deviceFields := make(map[string]interface{}) deviceFields["exit_status"] = exitStatus @@ -798,7 +807,7 @@ func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.Wai if m.Attributes { //add power mode - keys := [...]string{"device", "model", "serial_no", "wwn", "capacity", "enabled", "power"} + keys := [...]string{"device", "device_type", "model", "serial_no", "wwn", "capacity", "enabled", "power"} for _, key := range keys { if value, ok := deviceTags[key]; ok { tags[key] = value diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index e0858dad4..14c0496a8 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -2,6 +2,7 @@ package smart import ( "errors" + "fmt" "sync" "testing" "time" @@ -313,6 +314,39 @@ func TestGatherSSDRaid(t *testing.T) { require.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") } +func TestGatherDeviceTypeTag(t *testing.T) { + runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) { + switch args[0] { + case "--scan": + return nil, errors.New("scan command should not be run, since devices are provided in config") + case "--info": + switch args[len(args)-1] { + case "megaraid,0": + return []byte(smartctlMegaraidInfo1), nil + case "megaraid,1": + return []byte(smartctlMegaraidInfo2), nil + default: + return nil, fmt.Errorf("unexpected device type %q", args[len(args)-1]) + } + default: + return nil, fmt.Errorf("unexpected command %q", args[0]) + } + } + + s := newSmart() + s.Devices = []string{"/dev/bus/0 -d megaraid,0", "/dev/bus/0 -d megaraid,1"} + s.TagWithDeviceType = true + + acc := testutil.Accumulator{} + + err := s.Gather(&acc) + require.NoError(t, err) + require.NoError(t, errors.Join(acc.Errors...)) + + result := acc.GetTelegrafMetrics() + testutil.RequireMetricsEqual(t, testSmartctlDeviceTypeTag, result, testutil.SortMetrics(), testutil.IgnoreTime()) +} + func TestGatherNVMe(t *testing.T) { runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) { return []byte(smartctlNVMeInfoData), nil @@ -792,6 +826,45 @@ var ( mockModel = "INTEL SSDPEDABCDEFG" mockSerial = "CVFT5123456789ABCD" + testSmartctlDeviceTypeTag = []telegraf.Metric{ + testutil.MustMetric( + "smart_device", + map[string]string{ + "capacity": "600000000000", + "device": "0", + "device_type": "megaraid,0", + "enabled": "Enabled", + "model": "ST3450857SS", + "power": "ACTIVE", + "serial_no": "xxx", + }, + map[string]any{ + "exit_status": int64(0), + "health_ok": true, + "temp_c": int64(37), + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_device", + map[string]string{ + "capacity": "600000000000", + "device": "0", + "device_type": "megaraid,1", + "enabled": "Enabled", + "model": "ST3450857SS", + "power": "ACTIVE", + "serial_no": "xxx", + }, + map[string]any{ + "exit_status": int64(0), + "health_ok": true, + "temp_c": int64(47), + }, + time.Unix(0, 0), + ), + } + testSmartctlNVMeAttributes = []telegraf.Metric{ testutil.MustMetric("smart_device", map[string]string{ @@ -2237,6 +2310,93 @@ Selective self-test flags (0x0): After scanning selected spans, do NOT read-scan remainder of disk. If Selective self-test is pending on power-up, resume after 0 minute delay. ` + + smartctlMegaraidInfo1 = `smartctl 7.3 2022-02-28 r5338 [x86_64-linux-6.2.16-12-pve] (local build) +Copyright (C) 2002-22, Bruce Allen, Christian Franke, www.smartmontools.org + +=== START OF INFORMATION SECTION === +Vendor: SEAGATE +Product: ST3450857SS +Revision: ES12 +Compliance: SPC-3 +User Capacity: 600,000,000,000 bytes [600 GB] +Logical block size: 512 bytes +Rotation Rate: 15000 rpm +Form Factor: 3.5 inches +Logical Unit id: 0x6000c60641d10397 +Serial number: xxx +Device type: disk +Transport protocol: SAS (SPL-4) +Local Time is: Fri Jan 12 11:43:49 2024 CET +SMART support is: Available - device has SMART capability. +SMART support is: Enabled +Temperature Warning: Disabled or Not Supported +Power mode is: ACTIVE + +=== START OF READ SMART DATA SECTION === +SMART Health Status: OK + +Current Drive Temperature: 37 C +Drive Trip Temperature: 63 C + +Accumulated power on time, hours:minutes 16003:18 +Elements in grown defect list: 0 + +Vendor (Seagate Cache) information + Blocks sent to initiator = 3000000000 + Blocks received from initiator = 3000000000 + Blocks read from cache and sent to initiator = 3000000000 + Number of read and write commands whose size <= segment size = 3000000000 + Number of read and write commands whose size > segment size = 300 + +Vendor (Seagate/Hitachi) factory information + number of hours powered up = 30000.30 + number of minutes until next internal SMART test = 7 +` + + smartctlMegaraidInfo2 = `smartctl 7.3 2022-02-28 r5338 [x86_64-linux-6.2.16-12-pve] (local build) +Copyright (C) 2002-22, Bruce Allen, Christian Franke, www.smartmontools.org + +=== START OF INFORMATION SECTION === +Vendor: SEAGATE +Product: ST3450857SS +Revision: ES12 +Compliance: SPC-3 +User Capacity: 600,000,000,000 bytes [600 GB] +Logical block size: 512 bytes +Rotation Rate: 15000 rpm +Form Factor: 3.5 inches +Logical Unit id: 0x6000c60641d10497 +Serial number: xxx +Device type: disk +Transport protocol: SAS (SPL-4) +Local Time is: Fri Jan 12 11:44:49 2024 CET +SMART support is: Available - device has SMART capability. +SMART support is: Enabled +Temperature Warning: Disabled or Not Supported +Power mode is: ACTIVE + +=== START OF READ SMART DATA SECTION === +SMART Health Status: OK + +Current Drive Temperature: 47 C +Drive Trip Temperature: 64 C + +Accumulated power on time, hours:minutes 16004:18 +Elements in grown defect list: 0 + +Vendor (Seagate Cache) information + Blocks sent to initiator = 4000000000 + Blocks received from initiator = 4000000000 + Blocks read from cache and sent to initiator = 4000000000 + Number of read and write commands whose size <= segment size = 4000000000 + Number of read and write commands whose size > segment size = 400 + +Vendor (Seagate/Hitachi) factory information + number of hours powered up = 30000.30 + number of minutes until next internal SMART test = 7 +` + smartctlNVMeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build) Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org