feat(inputs.smart): Add a "device_type" tag to differentiate disks behind a RAID controller (#14613)
Co-authored-by: Thomas Delbende <thomas.delbende@bleemeo.com>
This commit is contained in:
parent
9878eba241
commit
439df813ec
|
|
@ -111,6 +111,10 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
## without a password.
|
||||
# use_sudo = false
|
||||
|
||||
## Adds an extra tag "device_type", which can be used to differentiate
|
||||
## multiple disks behind the same controller (e.g., MegaRAID).
|
||||
# tag_with_device_type = false
|
||||
|
||||
## Skip checking disks in this power mode. Defaults to
|
||||
## "standby" to not wake up disks that have stopped rotating.
|
||||
## See --nocheck in the man pages for smartctl.
|
||||
|
|
@ -182,6 +186,7 @@ execute this script.
|
|||
- tags:
|
||||
- capacity
|
||||
- device
|
||||
- device_type (only emitted if `tag_with_device_type` is set to `true`)
|
||||
- enabled
|
||||
- model
|
||||
- serial_no
|
||||
|
|
@ -201,6 +206,7 @@ execute this script.
|
|||
- tags:
|
||||
- capacity
|
||||
- device
|
||||
- device_type (only emitted if `tag_with_device_type` is set to `true`)
|
||||
- enabled
|
||||
- fail
|
||||
- flags
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@
|
|||
## without a password.
|
||||
# use_sudo = false
|
||||
|
||||
## Adds an extra tag "device_type", which can be used to differentiate
|
||||
## multiple disks behind the same controller (e.g., MegaRAID).
|
||||
# tag_with_device_type = false
|
||||
|
||||
## Skip checking disks in this power mode. Defaults to
|
||||
## "standby" to not wake up disks that have stopped rotating.
|
||||
## See --nocheck in the man pages for smartctl.
|
||||
|
|
|
|||
|
|
@ -353,18 +353,19 @@ var (
|
|||
|
||||
// Smart plugin reads metrics from storage devices supporting S.M.A.R.T.
|
||||
type Smart struct {
|
||||
Path string `toml:"path" deprecated:"1.16.0;use 'path_smartctl' instead"`
|
||||
PathSmartctl string `toml:"path_smartctl"`
|
||||
PathNVMe string `toml:"path_nvme"`
|
||||
Nocheck string `toml:"nocheck"`
|
||||
EnableExtensions []string `toml:"enable_extensions"`
|
||||
Attributes bool `toml:"attributes"`
|
||||
Excludes []string `toml:"excludes"`
|
||||
Devices []string `toml:"devices"`
|
||||
UseSudo bool `toml:"use_sudo"`
|
||||
Timeout config.Duration `toml:"timeout"`
|
||||
ReadMethod string `toml:"read_method"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
Path string `toml:"path" deprecated:"1.16.0;use 'path_smartctl' instead"`
|
||||
PathSmartctl string `toml:"path_smartctl"`
|
||||
PathNVMe string `toml:"path_nvme"`
|
||||
Nocheck string `toml:"nocheck"`
|
||||
EnableExtensions []string `toml:"enable_extensions"`
|
||||
Attributes bool `toml:"attributes"`
|
||||
Excludes []string `toml:"excludes"`
|
||||
Devices []string `toml:"devices"`
|
||||
UseSudo bool `toml:"use_sudo"`
|
||||
TagWithDeviceType bool `toml:"tag_with_device_type"`
|
||||
Timeout config.Duration `toml:"timeout"`
|
||||
ReadMethod string `toml:"read_method"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
}
|
||||
|
||||
type nvmeDevice struct {
|
||||
|
|
@ -741,8 +742,16 @@ func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.Wai
|
|||
}
|
||||
|
||||
deviceTags := map[string]string{}
|
||||
deviceNode := strings.Split(device, " ")[0]
|
||||
deviceTags["device"] = path.Base(deviceNode)
|
||||
if m.TagWithDeviceType {
|
||||
deviceNode := strings.SplitN(device, " ", 2)
|
||||
deviceTags["device"] = path.Base(deviceNode[0])
|
||||
if len(deviceNode) == 2 && deviceNode[1] != "" {
|
||||
deviceTags["device_type"] = strings.TrimPrefix(deviceNode[1], "-d ")
|
||||
}
|
||||
} else {
|
||||
deviceNode := strings.Split(device, " ")[0]
|
||||
deviceTags["device"] = path.Base(deviceNode)
|
||||
}
|
||||
deviceFields := make(map[string]interface{})
|
||||
deviceFields["exit_status"] = exitStatus
|
||||
|
||||
|
|
@ -798,7 +807,7 @@ func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.Wai
|
|||
|
||||
if m.Attributes {
|
||||
//add power mode
|
||||
keys := [...]string{"device", "model", "serial_no", "wwn", "capacity", "enabled", "power"}
|
||||
keys := [...]string{"device", "device_type", "model", "serial_no", "wwn", "capacity", "enabled", "power"}
|
||||
for _, key := range keys {
|
||||
if value, ok := deviceTags[key]; ok {
|
||||
tags[key] = value
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package smart
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
|
@ -313,6 +314,39 @@ func TestGatherSSDRaid(t *testing.T) {
|
|||
require.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||
}
|
||||
|
||||
func TestGatherDeviceTypeTag(t *testing.T) {
|
||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||
switch args[0] {
|
||||
case "--scan":
|
||||
return nil, errors.New("scan command should not be run, since devices are provided in config")
|
||||
case "--info":
|
||||
switch args[len(args)-1] {
|
||||
case "megaraid,0":
|
||||
return []byte(smartctlMegaraidInfo1), nil
|
||||
case "megaraid,1":
|
||||
return []byte(smartctlMegaraidInfo2), nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected device type %q", args[len(args)-1])
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected command %q", args[0])
|
||||
}
|
||||
}
|
||||
|
||||
s := newSmart()
|
||||
s.Devices = []string{"/dev/bus/0 -d megaraid,0", "/dev/bus/0 -d megaraid,1"}
|
||||
s.TagWithDeviceType = true
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
|
||||
err := s.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, errors.Join(acc.Errors...))
|
||||
|
||||
result := acc.GetTelegrafMetrics()
|
||||
testutil.RequireMetricsEqual(t, testSmartctlDeviceTypeTag, result, testutil.SortMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestGatherNVMe(t *testing.T) {
|
||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||
return []byte(smartctlNVMeInfoData), nil
|
||||
|
|
@ -792,6 +826,45 @@ var (
|
|||
mockModel = "INTEL SSDPEDABCDEFG"
|
||||
mockSerial = "CVFT5123456789ABCD"
|
||||
|
||||
testSmartctlDeviceTypeTag = []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"smart_device",
|
||||
map[string]string{
|
||||
"capacity": "600000000000",
|
||||
"device": "0",
|
||||
"device_type": "megaraid,0",
|
||||
"enabled": "Enabled",
|
||||
"model": "ST3450857SS",
|
||||
"power": "ACTIVE",
|
||||
"serial_no": "xxx",
|
||||
},
|
||||
map[string]any{
|
||||
"exit_status": int64(0),
|
||||
"health_ok": true,
|
||||
"temp_c": int64(37),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric(
|
||||
"smart_device",
|
||||
map[string]string{
|
||||
"capacity": "600000000000",
|
||||
"device": "0",
|
||||
"device_type": "megaraid,1",
|
||||
"enabled": "Enabled",
|
||||
"model": "ST3450857SS",
|
||||
"power": "ACTIVE",
|
||||
"serial_no": "xxx",
|
||||
},
|
||||
map[string]any{
|
||||
"exit_status": int64(0),
|
||||
"health_ok": true,
|
||||
"temp_c": int64(47),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
|
||||
testSmartctlNVMeAttributes = []telegraf.Metric{
|
||||
testutil.MustMetric("smart_device",
|
||||
map[string]string{
|
||||
|
|
@ -2237,6 +2310,93 @@ Selective self-test flags (0x0):
|
|||
After scanning selected spans, do NOT read-scan remainder of disk.
|
||||
If Selective self-test is pending on power-up, resume after 0 minute delay.
|
||||
`
|
||||
|
||||
smartctlMegaraidInfo1 = `smartctl 7.3 2022-02-28 r5338 [x86_64-linux-6.2.16-12-pve] (local build)
|
||||
Copyright (C) 2002-22, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||
|
||||
=== START OF INFORMATION SECTION ===
|
||||
Vendor: SEAGATE
|
||||
Product: ST3450857SS
|
||||
Revision: ES12
|
||||
Compliance: SPC-3
|
||||
User Capacity: 600,000,000,000 bytes [600 GB]
|
||||
Logical block size: 512 bytes
|
||||
Rotation Rate: 15000 rpm
|
||||
Form Factor: 3.5 inches
|
||||
Logical Unit id: 0x6000c60641d10397
|
||||
Serial number: xxx
|
||||
Device type: disk
|
||||
Transport protocol: SAS (SPL-4)
|
||||
Local Time is: Fri Jan 12 11:43:49 2024 CET
|
||||
SMART support is: Available - device has SMART capability.
|
||||
SMART support is: Enabled
|
||||
Temperature Warning: Disabled or Not Supported
|
||||
Power mode is: ACTIVE
|
||||
|
||||
=== START OF READ SMART DATA SECTION ===
|
||||
SMART Health Status: OK
|
||||
|
||||
Current Drive Temperature: 37 C
|
||||
Drive Trip Temperature: 63 C
|
||||
|
||||
Accumulated power on time, hours:minutes 16003:18
|
||||
Elements in grown defect list: 0
|
||||
|
||||
Vendor (Seagate Cache) information
|
||||
Blocks sent to initiator = 3000000000
|
||||
Blocks received from initiator = 3000000000
|
||||
Blocks read from cache and sent to initiator = 3000000000
|
||||
Number of read and write commands whose size <= segment size = 3000000000
|
||||
Number of read and write commands whose size > segment size = 300
|
||||
|
||||
Vendor (Seagate/Hitachi) factory information
|
||||
number of hours powered up = 30000.30
|
||||
number of minutes until next internal SMART test = 7
|
||||
`
|
||||
|
||||
smartctlMegaraidInfo2 = `smartctl 7.3 2022-02-28 r5338 [x86_64-linux-6.2.16-12-pve] (local build)
|
||||
Copyright (C) 2002-22, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||
|
||||
=== START OF INFORMATION SECTION ===
|
||||
Vendor: SEAGATE
|
||||
Product: ST3450857SS
|
||||
Revision: ES12
|
||||
Compliance: SPC-3
|
||||
User Capacity: 600,000,000,000 bytes [600 GB]
|
||||
Logical block size: 512 bytes
|
||||
Rotation Rate: 15000 rpm
|
||||
Form Factor: 3.5 inches
|
||||
Logical Unit id: 0x6000c60641d10497
|
||||
Serial number: xxx
|
||||
Device type: disk
|
||||
Transport protocol: SAS (SPL-4)
|
||||
Local Time is: Fri Jan 12 11:44:49 2024 CET
|
||||
SMART support is: Available - device has SMART capability.
|
||||
SMART support is: Enabled
|
||||
Temperature Warning: Disabled or Not Supported
|
||||
Power mode is: ACTIVE
|
||||
|
||||
=== START OF READ SMART DATA SECTION ===
|
||||
SMART Health Status: OK
|
||||
|
||||
Current Drive Temperature: 47 C
|
||||
Drive Trip Temperature: 64 C
|
||||
|
||||
Accumulated power on time, hours:minutes 16004:18
|
||||
Elements in grown defect list: 0
|
||||
|
||||
Vendor (Seagate Cache) information
|
||||
Blocks sent to initiator = 4000000000
|
||||
Blocks received from initiator = 4000000000
|
||||
Blocks read from cache and sent to initiator = 4000000000
|
||||
Number of read and write commands whose size <= segment size = 4000000000
|
||||
Number of read and write commands whose size > segment size = 400
|
||||
|
||||
Vendor (Seagate/Hitachi) factory information
|
||||
number of hours powered up = 30000.30
|
||||
number of minutes until next internal SMART test = 7
|
||||
`
|
||||
|
||||
smartctlNVMeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build)
|
||||
Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue