feat(inputs.smart): Collect SSD endurance information where available in smartctl (#11391)
This commit is contained in:
parent
0ef5df50af
commit
fa0c9c937e
|
|
@ -180,10 +180,13 @@ execute this script.
|
|||
- fields:
|
||||
- exit_status
|
||||
- health_ok
|
||||
- media_wearout_indicator
|
||||
- percent_lifetime_remain
|
||||
- read_error_rate
|
||||
- seek_error
|
||||
- temp_c
|
||||
- udma_crc_errors
|
||||
- wear_leveling_count
|
||||
|
||||
- smart_attribute:
|
||||
- tags:
|
||||
|
|
|
|||
|
|
@ -87,6 +87,14 @@ var (
|
|||
"199": "udma_crc_errors",
|
||||
}
|
||||
|
||||
// There are some fields we're interested in which use the vendor specific device ids
|
||||
// so we need to be able to match on name instead
|
||||
deviceFieldNames = map[string]string{
|
||||
"Percent_Lifetime_Remain": "percent_lifetime_remain",
|
||||
"Wear_Leveling_Count": "wear_leveling_count",
|
||||
"Media_Wearout_Indicator": "media_wearout_indicator",
|
||||
}
|
||||
|
||||
// to obtain metrics from smartctl
|
||||
sasNVMeAttributes = map[string]struct {
|
||||
ID string
|
||||
|
|
@ -150,6 +158,10 @@ var (
|
|||
Name: "Percentage_Used",
|
||||
Parse: parsePercentageInt,
|
||||
},
|
||||
"Percentage used endurance indicator": {
|
||||
Name: "Percentage_Used",
|
||||
Parse: parsePercentageInt,
|
||||
},
|
||||
"Data Units Read": {
|
||||
Name: "Data_Units_Read",
|
||||
Parse: parseDataUnits,
|
||||
|
|
@ -817,6 +829,16 @@ func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.Wai
|
|||
deviceFields[field] = val
|
||||
}
|
||||
}
|
||||
|
||||
if len(attr) > 4 {
|
||||
// If the attribute name matches on in deviceFieldNames
|
||||
// save the value to a field
|
||||
if field, ok := deviceFieldNames[attr[2]]; ok {
|
||||
if val, err := parseRawValue(attr[4]); err == nil {
|
||||
deviceFields[field] = val
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// what was found is not a vendor attribute
|
||||
if matches := sasNVMeAttr.FindStringSubmatch(line); len(matches) > 2 {
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ func TestGatherAttributes(t *testing.T) {
|
|||
err := s.Gather(&acc)
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, 66, acc.NFields(), "Wrong number of fields gathered")
|
||||
|
||||
for _, test := range testsAda0Attributes {
|
||||
acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags)
|
||||
|
|
@ -171,7 +171,7 @@ func TestGatherNoAttributes(t *testing.T) {
|
|||
err := s.Gather(&acc)
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, 8, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, 9, acc.NFields(), "Wrong number of fields gathered")
|
||||
acc.AssertDoesNotContainMeasurement(t, "smart_attribute")
|
||||
|
||||
for _, test := range testsAda0Device {
|
||||
|
|
@ -264,6 +264,23 @@ func TestGatherHtSAS(t *testing.T) {
|
|||
testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestGatherLongFormEnduranceAttrib(t *testing.T) {
|
||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||
return []byte(mockHGST), nil
|
||||
}
|
||||
|
||||
var (
|
||||
acc = &testutil.Accumulator{}
|
||||
wg = &sync.WaitGroup{}
|
||||
)
|
||||
|
||||
wg.Add(1)
|
||||
|
||||
sampleSmart.gatherDisk(acc, "", wg)
|
||||
assert.Equal(t, 7, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, uint64(5), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||
}
|
||||
|
||||
func TestGatherSSD(t *testing.T) {
|
||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||
return []byte(ssdInfoData), nil
|
||||
|
|
@ -276,7 +293,7 @@ func TestGatherSSD(t *testing.T) {
|
|||
|
||||
wg.Add(1)
|
||||
sampleSmart.gatherDisk(acc, "", wg)
|
||||
assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, 106, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||
}
|
||||
|
||||
|
|
@ -292,7 +309,7 @@ func TestGatherSSDRaid(t *testing.T) {
|
|||
|
||||
wg.Add(1)
|
||||
sampleSmart.gatherDisk(acc, "", wg)
|
||||
assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, 75, acc.NFields(), "Wrong number of fields gathered")
|
||||
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||
}
|
||||
|
||||
|
|
@ -1394,11 +1411,12 @@ var (
|
|||
}{
|
||||
{
|
||||
map[string]interface{}{
|
||||
"exit_status": int(0),
|
||||
"health_ok": bool(true),
|
||||
"read_error_rate": int64(0),
|
||||
"temp_c": int64(34),
|
||||
"udma_crc_errors": int64(0),
|
||||
"exit_status": int(0),
|
||||
"health_ok": bool(true),
|
||||
"read_error_rate": int64(0),
|
||||
"temp_c": int64(34),
|
||||
"udma_crc_errors": int64(0),
|
||||
"wear_leveling_count": int64(185),
|
||||
},
|
||||
map[string]string{
|
||||
"device": "ada0",
|
||||
|
|
@ -1810,6 +1828,52 @@ ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
|||
|||____ S speed/performance
|
||||
||_____ O updated online
|
||||
|______ P prefailure warning
|
||||
`
|
||||
|
||||
mockHGST = `
|
||||
smartctl 6.6 2016-05-31 r4324 [x86_64-linux-4.9.0-3-amd64] (local build)
|
||||
Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||
|
||||
=== START OF INFORMATION SECTION ===
|
||||
Vendor: HGST
|
||||
Product: HUSMM1640ASS200
|
||||
Revision: A360
|
||||
Compliance: SPC-4
|
||||
User Capacity: 400,088,457,216 bytes [400 GB]
|
||||
Logical block size: 512 bytes
|
||||
Physical block size: 4096 bytes
|
||||
LU is resource provisioned, LBPRZ=1
|
||||
Rotation Rate: Solid State Device
|
||||
Form Factor: 2.5 inches
|
||||
Logical Unit id: 0x5000cca04ec26364
|
||||
Serial number: ZZZZZZZZZ
|
||||
Device type: disk
|
||||
Transport protocol: SAS (SPL-3)
|
||||
Local Time is: Mon Nov 6 10:20:33 2017 CET
|
||||
SMART support is: Available - device has SMART capability.
|
||||
SMART support is: Enabled
|
||||
Temperature Warning: Enabled
|
||||
Read Cache is: Enabled
|
||||
Writeback Cache is: Enabled
|
||||
|
||||
=== START OF READ SMART DATA SECTION ===
|
||||
SMART Health Status: OK
|
||||
|
||||
Percentage used endurance indicator: 0%
|
||||
Current Drive Temperature: 28 C
|
||||
Drive Trip Temperature: 70 C
|
||||
|
||||
Manufactured in week 30 of year 2017
|
||||
Specified cycle count over device lifetime: 0
|
||||
Accumulated start-stop cycles: 0
|
||||
Specified load-unload count over device lifetime: 0
|
||||
Accumulated load-unload cycles: 0
|
||||
defect list format 6 unknown
|
||||
Elements in grown defect list: 0
|
||||
|
||||
Vendor (Seagate) cache information
|
||||
Blocks sent to initiator = 3400674574336
|
||||
|
||||
`
|
||||
|
||||
htSASInfoData = `smartctl 6.6 2016-05-31 r4324 [x86_64-linux-4.15.18-12-pve] (local build)
|
||||
|
|
|
|||
Loading…
Reference in New Issue