feat(inputs.smart): Collect SSD endurance information where available in smartctl (#11391)
This commit is contained in:
parent
0ef5df50af
commit
fa0c9c937e
|
|
@ -180,10 +180,13 @@ execute this script.
|
||||||
- fields:
|
- fields:
|
||||||
- exit_status
|
- exit_status
|
||||||
- health_ok
|
- health_ok
|
||||||
|
- media_wearout_indicator
|
||||||
|
- percent_lifetime_remain
|
||||||
- read_error_rate
|
- read_error_rate
|
||||||
- seek_error
|
- seek_error
|
||||||
- temp_c
|
- temp_c
|
||||||
- udma_crc_errors
|
- udma_crc_errors
|
||||||
|
- wear_leveling_count
|
||||||
|
|
||||||
- smart_attribute:
|
- smart_attribute:
|
||||||
- tags:
|
- tags:
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,14 @@ var (
|
||||||
"199": "udma_crc_errors",
|
"199": "udma_crc_errors",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// There are some fields we're interested in which use the vendor specific device ids
|
||||||
|
// so we need to be able to match on name instead
|
||||||
|
deviceFieldNames = map[string]string{
|
||||||
|
"Percent_Lifetime_Remain": "percent_lifetime_remain",
|
||||||
|
"Wear_Leveling_Count": "wear_leveling_count",
|
||||||
|
"Media_Wearout_Indicator": "media_wearout_indicator",
|
||||||
|
}
|
||||||
|
|
||||||
// to obtain metrics from smartctl
|
// to obtain metrics from smartctl
|
||||||
sasNVMeAttributes = map[string]struct {
|
sasNVMeAttributes = map[string]struct {
|
||||||
ID string
|
ID string
|
||||||
|
|
@ -150,6 +158,10 @@ var (
|
||||||
Name: "Percentage_Used",
|
Name: "Percentage_Used",
|
||||||
Parse: parsePercentageInt,
|
Parse: parsePercentageInt,
|
||||||
},
|
},
|
||||||
|
"Percentage used endurance indicator": {
|
||||||
|
Name: "Percentage_Used",
|
||||||
|
Parse: parsePercentageInt,
|
||||||
|
},
|
||||||
"Data Units Read": {
|
"Data Units Read": {
|
||||||
Name: "Data_Units_Read",
|
Name: "Data_Units_Read",
|
||||||
Parse: parseDataUnits,
|
Parse: parseDataUnits,
|
||||||
|
|
@ -817,6 +829,16 @@ func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.Wai
|
||||||
deviceFields[field] = val
|
deviceFields[field] = val
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(attr) > 4 {
|
||||||
|
// If the attribute name matches on in deviceFieldNames
|
||||||
|
// save the value to a field
|
||||||
|
if field, ok := deviceFieldNames[attr[2]]; ok {
|
||||||
|
if val, err := parseRawValue(attr[4]); err == nil {
|
||||||
|
deviceFields[field] = val
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// what was found is not a vendor attribute
|
// what was found is not a vendor attribute
|
||||||
if matches := sasNVMeAttr.FindStringSubmatch(line); len(matches) > 2 {
|
if matches := sasNVMeAttr.FindStringSubmatch(line); len(matches) > 2 {
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ func TestGatherAttributes(t *testing.T) {
|
||||||
err := s.Gather(&acc)
|
err := s.Gather(&acc)
|
||||||
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 66, acc.NFields(), "Wrong number of fields gathered")
|
||||||
|
|
||||||
for _, test := range testsAda0Attributes {
|
for _, test := range testsAda0Attributes {
|
||||||
acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags)
|
acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags)
|
||||||
|
|
@ -171,7 +171,7 @@ func TestGatherNoAttributes(t *testing.T) {
|
||||||
err := s.Gather(&acc)
|
err := s.Gather(&acc)
|
||||||
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, 8, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 9, acc.NFields(), "Wrong number of fields gathered")
|
||||||
acc.AssertDoesNotContainMeasurement(t, "smart_attribute")
|
acc.AssertDoesNotContainMeasurement(t, "smart_attribute")
|
||||||
|
|
||||||
for _, test := range testsAda0Device {
|
for _, test := range testsAda0Device {
|
||||||
|
|
@ -264,6 +264,23 @@ func TestGatherHtSAS(t *testing.T) {
|
||||||
testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime())
|
testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGatherLongFormEnduranceAttrib(t *testing.T) {
|
||||||
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
|
return []byte(mockHGST), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
acc = &testutil.Accumulator{}
|
||||||
|
wg = &sync.WaitGroup{}
|
||||||
|
)
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
|
||||||
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
|
assert.Equal(t, 7, acc.NFields(), "Wrong number of fields gathered")
|
||||||
|
assert.Equal(t, uint64(5), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
|
}
|
||||||
|
|
||||||
func TestGatherSSD(t *testing.T) {
|
func TestGatherSSD(t *testing.T) {
|
||||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
return []byte(ssdInfoData), nil
|
return []byte(ssdInfoData), nil
|
||||||
|
|
@ -276,7 +293,7 @@ func TestGatherSSD(t *testing.T) {
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
sampleSmart.gatherDisk(acc, "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 106, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -292,7 +309,7 @@ func TestGatherSSDRaid(t *testing.T) {
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
sampleSmart.gatherDisk(acc, "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 75, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1394,11 +1411,12 @@ var (
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
map[string]interface{}{
|
map[string]interface{}{
|
||||||
"exit_status": int(0),
|
"exit_status": int(0),
|
||||||
"health_ok": bool(true),
|
"health_ok": bool(true),
|
||||||
"read_error_rate": int64(0),
|
"read_error_rate": int64(0),
|
||||||
"temp_c": int64(34),
|
"temp_c": int64(34),
|
||||||
"udma_crc_errors": int64(0),
|
"udma_crc_errors": int64(0),
|
||||||
|
"wear_leveling_count": int64(185),
|
||||||
},
|
},
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"device": "ada0",
|
"device": "ada0",
|
||||||
|
|
@ -1810,6 +1828,52 @@ ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
||||||
|||____ S speed/performance
|
|||____ S speed/performance
|
||||||
||_____ O updated online
|
||_____ O updated online
|
||||||
|______ P prefailure warning
|
|______ P prefailure warning
|
||||||
|
`
|
||||||
|
|
||||||
|
mockHGST = `
|
||||||
|
smartctl 6.6 2016-05-31 r4324 [x86_64-linux-4.9.0-3-amd64] (local build)
|
||||||
|
Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||||
|
|
||||||
|
=== START OF INFORMATION SECTION ===
|
||||||
|
Vendor: HGST
|
||||||
|
Product: HUSMM1640ASS200
|
||||||
|
Revision: A360
|
||||||
|
Compliance: SPC-4
|
||||||
|
User Capacity: 400,088,457,216 bytes [400 GB]
|
||||||
|
Logical block size: 512 bytes
|
||||||
|
Physical block size: 4096 bytes
|
||||||
|
LU is resource provisioned, LBPRZ=1
|
||||||
|
Rotation Rate: Solid State Device
|
||||||
|
Form Factor: 2.5 inches
|
||||||
|
Logical Unit id: 0x5000cca04ec26364
|
||||||
|
Serial number: ZZZZZZZZZ
|
||||||
|
Device type: disk
|
||||||
|
Transport protocol: SAS (SPL-3)
|
||||||
|
Local Time is: Mon Nov 6 10:20:33 2017 CET
|
||||||
|
SMART support is: Available - device has SMART capability.
|
||||||
|
SMART support is: Enabled
|
||||||
|
Temperature Warning: Enabled
|
||||||
|
Read Cache is: Enabled
|
||||||
|
Writeback Cache is: Enabled
|
||||||
|
|
||||||
|
=== START OF READ SMART DATA SECTION ===
|
||||||
|
SMART Health Status: OK
|
||||||
|
|
||||||
|
Percentage used endurance indicator: 0%
|
||||||
|
Current Drive Temperature: 28 C
|
||||||
|
Drive Trip Temperature: 70 C
|
||||||
|
|
||||||
|
Manufactured in week 30 of year 2017
|
||||||
|
Specified cycle count over device lifetime: 0
|
||||||
|
Accumulated start-stop cycles: 0
|
||||||
|
Specified load-unload count over device lifetime: 0
|
||||||
|
Accumulated load-unload cycles: 0
|
||||||
|
defect list format 6 unknown
|
||||||
|
Elements in grown defect list: 0
|
||||||
|
|
||||||
|
Vendor (Seagate) cache information
|
||||||
|
Blocks sent to initiator = 3400674574336
|
||||||
|
|
||||||
`
|
`
|
||||||
|
|
||||||
htSASInfoData = `smartctl 6.6 2016-05-31 r4324 [x86_64-linux-4.15.18-12-pve] (local build)
|
htSASInfoData = `smartctl 6.6 2016-05-31 r4324 [x86_64-linux-4.15.18-12-pve] (local build)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue