feat: Add SMART plugin concurrency configuration option, nvme-cli v1.14+ support and lint fixes. (#10150)
This commit is contained in:
parent
0e237774f1
commit
d4475b7d08
|
|
@ -112,6 +112,14 @@ smartctl --scan -d nvme
|
||||||
|
|
||||||
## Timeout for the cli command to complete.
|
## Timeout for the cli command to complete.
|
||||||
# timeout = "30s"
|
# timeout = "30s"
|
||||||
|
|
||||||
|
## Optionally call smartctl and nvme-cli with a specific concurrency policy.
|
||||||
|
## By default, smartctl and nvme-cli are called in separate threads (goroutines) to gather disk attributes.
|
||||||
|
## Some devices (e.g. disks in RAID arrays) may have access limitations that require sequential reading of
|
||||||
|
## SMART data - one individual array drive at the time. In such case please set this configuration option
|
||||||
|
## to "sequential" to get readings for all drives.
|
||||||
|
## valid options: concurrent, sequential
|
||||||
|
# read_method = "concurrent"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Permissions
|
## Permissions
|
||||||
|
|
@ -235,13 +243,27 @@ the DEVICE (name of the device could be taken from the previous command):
|
||||||
smartctl --info --health --attributes --tolerance=verypermissive --nocheck NOCHECK --format=brief -d DEVICE
|
smartctl --info --health --attributes --tolerance=verypermissive --nocheck NOCHECK --format=brief -d DEVICE
|
||||||
```
|
```
|
||||||
|
|
||||||
If you try to gather vendor specific metrics, please provide this commad
|
If you try to gather vendor specific metrics, please provide this command
|
||||||
and replace vendor and device to match your case:
|
and replace vendor and device to match your case:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
nvme VENDOR smart-log-add DEVICE
|
nvme VENDOR smart-log-add DEVICE
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you have specified devices array in configuration file, and Telegraf only shows data from one device, you should
|
||||||
|
change the plugin configuration to sequentially gather disk attributes instead of collecting it in separate threads
|
||||||
|
(goroutines). To do this find in plugin configuration read_method and change it to sequential:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
## Optionally call smartctl and nvme-cli with a specific concurrency policy.
|
||||||
|
## By default, smartctl and nvme-cli are called in separate threads (goroutines) to gather disk attributes.
|
||||||
|
## Some devices (e.g. disks in RAID arrays) may have access limitations that require sequential reading of
|
||||||
|
## SMART data - one individual array drive at the time. In such case please set this configuration option
|
||||||
|
## to "sequential" to get readings for all drives.
|
||||||
|
## valid options: concurrent, sequential
|
||||||
|
read_method = "sequential"
|
||||||
|
```
|
||||||
|
|
||||||
## Example SMART Plugin Outputs
|
## Example SMART Plugin Outputs
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,8 @@ var (
|
||||||
// PASSED, FAILED, UNKNOWN
|
// PASSED, FAILED, UNKNOWN
|
||||||
smartOverallHealth = regexp.MustCompile(`^(SMART overall-health self-assessment test result|SMART Health Status):\s+(\w+).*$`)
|
smartOverallHealth = regexp.MustCompile(`^(SMART overall-health self-assessment test result|SMART Health Status):\s+(\w+).*$`)
|
||||||
|
|
||||||
// sasNvmeAttr is a SAS or NVME SMART attribute
|
// sasNVMeAttr is a SAS or NVMe SMART attribute
|
||||||
sasNvmeAttr = regexp.MustCompile(`^([^:]+):\s+(.+)$`)
|
sasNVMeAttr = regexp.MustCompile(`^([^:]+):\s+(.+)$`)
|
||||||
|
|
||||||
// ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
// ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
||||||
// 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
// 1 Raw_Read_Error_Rate -O-RC- 200 200 000 - 0
|
||||||
|
|
@ -53,14 +53,26 @@ var (
|
||||||
attribute = regexp.MustCompile(`^\s*([0-9]+)\s(\S+)\s+([-P][-O][-S][-R][-C][-K])\s+([0-9]+)\s+([0-9]+)\s+([0-9-]+)\s+([-\w]+)\s+([\w\+\.]+).*$`)
|
attribute = regexp.MustCompile(`^\s*([0-9]+)\s(\S+)\s+([-P][-O][-S][-R][-C][-K])\s+([0-9]+)\s+([0-9]+)\s+([0-9-]+)\s+([-\w]+)\s+([\w\+\.]+).*$`)
|
||||||
|
|
||||||
// Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff
|
// Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff
|
||||||
|
// nvme version 1.14+ metrics:
|
||||||
|
// ID KEY Normalized Raw
|
||||||
|
// 0xab program_fail_count 100 0
|
||||||
|
|
||||||
|
// nvme deprecated metric format:
|
||||||
// key normalized raw
|
// key normalized raw
|
||||||
// program_fail_count : 100% 0
|
// program_fail_count : 100% 0
|
||||||
intelExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\w\s]+)%(.+)`)
|
|
||||||
|
// REGEX patter supports deprecated metrics (nvme-cli version below 1.14) and metrics from nvme-cli 1.14 (and above).
|
||||||
|
intelExpressionPattern = regexp.MustCompile(`^([A-Za-z0-9_\s]+)[:|\s]+(\d+)[%|\s]+(.+)`)
|
||||||
|
|
||||||
// vid : 0x8086
|
// vid : 0x8086
|
||||||
// sn : CFGT53260XSP8011P
|
// sn : CFGT53260XSP8011P
|
||||||
nvmeIDCtrlExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\s\w]+)`)
|
nvmeIDCtrlExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\s\w]+)`)
|
||||||
|
|
||||||
|
// Format from nvme-cli 1.14 (and above) gives ID and KEY, this regex is for separating id from key.
|
||||||
|
// ID KEY
|
||||||
|
// 0xab program_fail_count
|
||||||
|
nvmeIDSeparatePattern = regexp.MustCompile(`^([A-Za-z0-9_]+)(.+)`)
|
||||||
|
|
||||||
deviceFieldIds = map[string]string{
|
deviceFieldIds = map[string]string{
|
||||||
"1": "read_error_rate",
|
"1": "read_error_rate",
|
||||||
"7": "seek_error_rate",
|
"7": "seek_error_rate",
|
||||||
|
|
@ -70,7 +82,7 @@ var (
|
||||||
}
|
}
|
||||||
|
|
||||||
// to obtain metrics from smartctl
|
// to obtain metrics from smartctl
|
||||||
sasNvmeAttributes = map[string]struct {
|
sasNVMeAttributes = map[string]struct {
|
||||||
ID string
|
ID string
|
||||||
Name string
|
Name string
|
||||||
Parse func(fields, deviceFields map[string]interface{}, str string) error
|
Parse func(fields, deviceFields map[string]interface{}, str string) error
|
||||||
|
|
@ -213,12 +225,51 @@ var (
|
||||||
Parse: parseTemperatureSensor,
|
Parse: parseTemperatureSensor,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
// To obtain Intel specific metrics from nvme-cli version 1.14 and above.
|
||||||
// to obtain Intel specific metrics from nvme-cli
|
|
||||||
intelAttributes = map[string]struct {
|
intelAttributes = map[string]struct {
|
||||||
ID string
|
ID string
|
||||||
Name string
|
Name string
|
||||||
Parse func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error
|
Parse func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error
|
||||||
|
}{
|
||||||
|
"program_fail_count": {
|
||||||
|
Name: "Program_Fail_Count",
|
||||||
|
},
|
||||||
|
"erase_fail_count": {
|
||||||
|
Name: "Erase_Fail_Count",
|
||||||
|
},
|
||||||
|
"wear_leveling_count": { // previously: "wear_leveling"
|
||||||
|
Name: "Wear_Leveling_Count",
|
||||||
|
},
|
||||||
|
"e2e_error_detect_count": { // previously: "end_to_end_error_detection_count"
|
||||||
|
Name: "End_To_End_Error_Detection_Count",
|
||||||
|
},
|
||||||
|
"crc_error_count": {
|
||||||
|
Name: "Crc_Error_Count",
|
||||||
|
},
|
||||||
|
"media_wear_percentage": { // previously: "timed_workload_media_wear"
|
||||||
|
Name: "Media_Wear_Percentage",
|
||||||
|
},
|
||||||
|
"host_reads": {
|
||||||
|
Name: "Host_Reads",
|
||||||
|
},
|
||||||
|
"timed_work_load": { // previously: "timed_workload_timer"
|
||||||
|
Name: "Timed_Workload_Timer",
|
||||||
|
},
|
||||||
|
"thermal_throttle_status": {
|
||||||
|
Name: "Thermal_Throttle_Status",
|
||||||
|
},
|
||||||
|
"retry_buff_overflow_count": { // previously: "retry_buffer_overflow_count"
|
||||||
|
Name: "Retry_Buffer_Overflow_Count",
|
||||||
|
},
|
||||||
|
"pll_lock_loss_counter": { // previously: "pll_lock_loss_count"
|
||||||
|
Name: "Pll_Lock_Loss_Count",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
// to obtain Intel specific metrics from nvme-cli
|
||||||
|
intelAttributesDeprecatedFormat = map[string]struct {
|
||||||
|
ID string
|
||||||
|
Name string
|
||||||
|
Parse func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error
|
||||||
}{
|
}{
|
||||||
"program_fail_count": {
|
"program_fail_count": {
|
||||||
Name: "Program_Fail_Count",
|
Name: "Program_Fail_Count",
|
||||||
|
|
@ -269,6 +320,8 @@ var (
|
||||||
Parse: parseBytesWritten,
|
Parse: parseBytesWritten,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
knownReadMethods = []string{"concurrent", "sequential"}
|
||||||
)
|
)
|
||||||
|
|
||||||
// Smart plugin reads metrics from storage devices supporting S.M.A.R.T.
|
// Smart plugin reads metrics from storage devices supporting S.M.A.R.T.
|
||||||
|
|
@ -283,6 +336,7 @@ type Smart struct {
|
||||||
Devices []string `toml:"devices"`
|
Devices []string `toml:"devices"`
|
||||||
UseSudo bool `toml:"use_sudo"`
|
UseSudo bool `toml:"use_sudo"`
|
||||||
Timeout config.Duration `toml:"timeout"`
|
Timeout config.Duration `toml:"timeout"`
|
||||||
|
ReadMethod string `toml:"read_method"`
|
||||||
Log telegraf.Logger `toml:"-"`
|
Log telegraf.Logger `toml:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -333,11 +387,20 @@ var sampleConfig = `
|
||||||
|
|
||||||
## Timeout for the cli command to complete.
|
## Timeout for the cli command to complete.
|
||||||
# timeout = "30s"
|
# timeout = "30s"
|
||||||
|
|
||||||
|
## Optionally call smartctl and nvme-cli with a specific concurrency policy.
|
||||||
|
## By default, smartctl and nvme-cli are called in separate threads (goroutines) to gather disk attributes.
|
||||||
|
## Some devices (e.g. disks in RAID arrays) may have access limitations that require sequential reading of
|
||||||
|
## SMART data - one individual array drive at the time. In such case please set this configuration option
|
||||||
|
## to "sequential" to get readings for all drives.
|
||||||
|
## valid options: concurrent, sequential
|
||||||
|
# read_method = "concurrent"
|
||||||
`
|
`
|
||||||
|
|
||||||
func newSmart() *Smart {
|
func newSmart() *Smart {
|
||||||
return &Smart{
|
return &Smart{
|
||||||
Timeout: config.Duration(time.Second * 30),
|
Timeout: config.Duration(time.Second * 30),
|
||||||
|
ReadMethod: "concurrent",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -368,6 +431,10 @@ func (m *Smart) Init() error {
|
||||||
m.PathNVMe, _ = exec.LookPath("nvme")
|
m.PathNVMe, _ = exec.LookPath("nvme")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !contains(knownReadMethods, m.ReadMethod) {
|
||||||
|
return fmt.Errorf("provided read method `%s` is not valid", m.ReadMethod)
|
||||||
|
}
|
||||||
|
|
||||||
err := validatePath(m.PathSmartctl)
|
err := validatePath(m.PathSmartctl)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.PathSmartctl = ""
|
m.PathSmartctl = ""
|
||||||
|
|
@ -404,9 +471,9 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
NVMeDevices := distinguishNVMeDevices(devicesFromConfig, scannedNVMeDevices)
|
nvmeDevices := distinguishNVMeDevices(devicesFromConfig, scannedNVMeDevices)
|
||||||
|
|
||||||
m.getVendorNVMeAttributes(acc, NVMeDevices)
|
m.getVendorNVMeAttributes(acc, nvmeDevices)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -434,28 +501,28 @@ func (m *Smart) scanAllDevices(ignoreExcludes bool) ([]string, []string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// this will return only NVMe devices
|
// this will return only NVMe devices
|
||||||
NVMeDevices, err := m.scanDevices(ignoreExcludes, "--scan", "--device=nvme")
|
nvmeDevices, err := m.scanDevices(ignoreExcludes, "--scan", "--device=nvme")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// to handle all versions of smartctl this will return only non NVMe devices
|
// to handle all versions of smartctl this will return only non NVMe devices
|
||||||
nonNVMeDevices := difference(devices, NVMeDevices)
|
nonNVMeDevices := difference(devices, nvmeDevices)
|
||||||
return NVMeDevices, nonNVMeDevices, nil
|
return nvmeDevices, nonNVMeDevices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func distinguishNVMeDevices(userDevices []string, availableNVMeDevices []string) []string {
|
func distinguishNVMeDevices(userDevices []string, availableNVMeDevices []string) []string {
|
||||||
var NVMeDevices []string
|
var nvmeDevices []string
|
||||||
|
|
||||||
for _, userDevice := range userDevices {
|
for _, userDevice := range userDevices {
|
||||||
for _, NVMeDevice := range availableNVMeDevices {
|
for _, availableNVMeDevice := range availableNVMeDevices {
|
||||||
// double check. E.g. in case when nvme0 is equal nvme0n1, will check if "nvme0" part is present.
|
// double check. E.g. in case when nvme0 is equal nvme0n1, will check if "nvme0" part is present.
|
||||||
if strings.Contains(NVMeDevice, userDevice) || strings.Contains(userDevice, NVMeDevice) {
|
if strings.Contains(availableNVMeDevice, userDevice) || strings.Contains(userDevice, availableNVMeDevice) {
|
||||||
NVMeDevices = append(NVMeDevices, userDevice)
|
nvmeDevices = append(nvmeDevices, userDevice)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NVMeDevices
|
return nvmeDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan for S.M.A.R.T. devices from smartctl
|
// Scan for S.M.A.R.T. devices from smartctl
|
||||||
|
|
@ -506,69 +573,86 @@ func excludedDev(excludes []string, deviceLine string) bool {
|
||||||
func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) {
|
func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(len(devices))
|
wg.Add(len(devices))
|
||||||
|
|
||||||
for _, device := range devices {
|
for _, device := range devices {
|
||||||
go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.PathSmartctl, m.Nocheck, device, &wg)
|
switch m.ReadMethod {
|
||||||
|
case "concurrent":
|
||||||
|
go m.gatherDisk(acc, device, &wg)
|
||||||
|
case "sequential":
|
||||||
|
m.gatherDisk(acc, device, &wg)
|
||||||
|
default:
|
||||||
|
wg.Done()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Smart) getVendorNVMeAttributes(acc telegraf.Accumulator, devices []string) {
|
func (m *Smart) getVendorNVMeAttributes(acc telegraf.Accumulator, devices []string) {
|
||||||
NVMeDevices := getDeviceInfoForNVMeDisks(acc, devices, m.PathNVMe, m.Timeout, m.UseSudo)
|
nvmeDevices := getDeviceInfoForNVMeDisks(acc, devices, m.PathNVMe, m.Timeout, m.UseSudo)
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
for _, device := range NVMeDevices {
|
for _, device := range nvmeDevices {
|
||||||
if contains(m.EnableExtensions, "auto-on") {
|
if contains(m.EnableExtensions, "auto-on") {
|
||||||
|
// nolint:revive // one case switch on purpose to demonstrate potential extensions
|
||||||
switch device.vendorID {
|
switch device.vendorID {
|
||||||
case intelVID:
|
case intelVID:
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
switch m.ReadMethod {
|
||||||
|
case "concurrent":
|
||||||
|
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
||||||
|
case "sequential":
|
||||||
|
gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
||||||
|
default:
|
||||||
|
wg.Done()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else if contains(m.EnableExtensions, "Intel") && device.vendorID == intelVID {
|
} else if contains(m.EnableExtensions, "Intel") && device.vendorID == intelVID {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
switch m.ReadMethod {
|
||||||
|
case "concurrent":
|
||||||
|
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
||||||
|
case "sequential":
|
||||||
|
gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
||||||
|
default:
|
||||||
|
wg.Done()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme string, timeout config.Duration, useSudo bool) []nvmeDevice {
|
func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme string, timeout config.Duration, useSudo bool) []nvmeDevice {
|
||||||
var NVMeDevices []nvmeDevice
|
var nvmeDevices []nvmeDevice
|
||||||
|
|
||||||
for _, device := range devices {
|
for _, device := range devices {
|
||||||
vid, sn, mn, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo)
|
newDevice, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
acc.AddError(fmt.Errorf("cannot find device info for %s device", device))
|
acc.AddError(fmt.Errorf("cannot find device info for %s device", device))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
newDevice := nvmeDevice{
|
nvmeDevices = append(nvmeDevices, newDevice)
|
||||||
name: device,
|
|
||||||
vendorID: vid,
|
|
||||||
model: mn,
|
|
||||||
serialNumber: sn,
|
|
||||||
}
|
|
||||||
NVMeDevices = append(NVMeDevices, newDevice)
|
|
||||||
}
|
}
|
||||||
return NVMeDevices
|
return nvmeDevices
|
||||||
}
|
}
|
||||||
|
|
||||||
func gatherNVMeDeviceInfo(nvme, device string, timeout config.Duration, useSudo bool) (string, string, string, error) {
|
func gatherNVMeDeviceInfo(nvme, deviceName string, timeout config.Duration, useSudo bool) (device nvmeDevice, err error) {
|
||||||
args := []string{"id-ctrl"}
|
args := []string{"id-ctrl"}
|
||||||
args = append(args, strings.Split(device, " ")...)
|
args = append(args, strings.Split(deviceName, " ")...)
|
||||||
out, err := runCmd(timeout, useSudo, nvme, args...)
|
out, err := runCmd(timeout, useSudo, nvme, args...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", "", "", err
|
return device, err
|
||||||
}
|
}
|
||||||
outStr := string(out)
|
outStr := string(out)
|
||||||
|
device, err = findNVMeDeviceInfo(outStr)
|
||||||
vid, sn, mn, err := findNVMeDeviceInfo(outStr)
|
if err != nil {
|
||||||
|
return device, err
|
||||||
return vid, sn, mn, err
|
}
|
||||||
|
device.name = deviceName
|
||||||
|
return device, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func findNVMeDeviceInfo(output string) (string, string, string, error) {
|
func findNVMeDeviceInfo(output string) (nvmeDevice, error) {
|
||||||
scanner := bufio.NewScanner(strings.NewReader(output))
|
scanner := bufio.NewScanner(strings.NewReader(output))
|
||||||
var vid, sn, mn string
|
var vid, sn, mn string
|
||||||
|
|
||||||
|
|
@ -580,7 +664,7 @@ func findNVMeDeviceInfo(output string) (string, string, string, error) {
|
||||||
matches[2] = strings.TrimSpace(matches[2])
|
matches[2] = strings.TrimSpace(matches[2])
|
||||||
if matches[1] == "vid" {
|
if matches[1] == "vid" {
|
||||||
if _, err := fmt.Sscanf(matches[2], "%s", &vid); err != nil {
|
if _, err := fmt.Sscanf(matches[2], "%s", &vid); err != nil {
|
||||||
return "", "", "", err
|
return nvmeDevice{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if matches[1] == "sn" {
|
if matches[1] == "sn" {
|
||||||
|
|
@ -591,7 +675,13 @@ func findNVMeDeviceInfo(output string) (string, string, string, error) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return vid, sn, mn, nil
|
|
||||||
|
newDevice := nvmeDevice{
|
||||||
|
vendorID: vid,
|
||||||
|
model: mn,
|
||||||
|
serialNumber: sn,
|
||||||
|
}
|
||||||
|
return newDevice, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo bool, nvme string, device nvmeDevice, wg *sync.WaitGroup) {
|
func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo bool, nvme string, device nvmeDevice, wg *sync.WaitGroup) {
|
||||||
|
|
@ -619,10 +709,31 @@ func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout config.Duration, uses
|
||||||
tags["model"] = device.model
|
tags["model"] = device.model
|
||||||
tags["serial_no"] = device.serialNumber
|
tags["serial_no"] = device.serialNumber
|
||||||
|
|
||||||
if matches := intelExpressionPattern.FindStringSubmatch(line); len(matches) > 3 {
|
// Create struct to initialize later with intel attributes.
|
||||||
matches[1] = strings.TrimSpace(matches[1])
|
var (
|
||||||
|
attr = struct {
|
||||||
|
ID string
|
||||||
|
Name string
|
||||||
|
Parse func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error
|
||||||
|
}{}
|
||||||
|
attrExists bool
|
||||||
|
)
|
||||||
|
|
||||||
|
if matches := intelExpressionPattern.FindStringSubmatch(line); len(matches) > 3 && len(matches[1]) > 1 {
|
||||||
|
// Check if nvme shows metrics in deprecated format or in format with ID.
|
||||||
|
// Based on that, an attribute map with metrics is chosen.
|
||||||
|
// If string has more than one character it means it has KEY there, otherwise it's empty string ("").
|
||||||
|
if separatedIDAndKey := nvmeIDSeparatePattern.FindStringSubmatch(matches[1]); len(strings.TrimSpace(separatedIDAndKey[2])) > 1 {
|
||||||
|
matches[1] = strings.TrimSpace(separatedIDAndKey[2])
|
||||||
|
attr, attrExists = intelAttributes[matches[1]]
|
||||||
|
} else {
|
||||||
|
matches[1] = strings.TrimSpace(matches[1])
|
||||||
|
attr, attrExists = intelAttributesDeprecatedFormat[matches[1]]
|
||||||
|
}
|
||||||
|
|
||||||
matches[3] = strings.TrimSpace(matches[3])
|
matches[3] = strings.TrimSpace(matches[3])
|
||||||
if attr, ok := intelAttributes[matches[1]]; ok {
|
|
||||||
|
if attrExists {
|
||||||
tags["name"] = attr.Name
|
tags["name"] = attr.Name
|
||||||
if attr.ID != "" {
|
if attr.ID != "" {
|
||||||
tags["id"] = attr.ID
|
tags["id"] = attr.ID
|
||||||
|
|
@ -641,18 +752,18 @@ func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout config.Duration, uses
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func gatherDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) {
|
func (m *Smart) gatherDisk(acc telegraf.Accumulator, device string, wg *sync.WaitGroup) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
// smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n
|
// smartctl 5.41 & 5.42 have are broken regarding handling of --nocheck/-n
|
||||||
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", nocheck, "--format=brief"}
|
args := []string{"--info", "--health", "--attributes", "--tolerance=verypermissive", "-n", m.Nocheck, "--format=brief"}
|
||||||
args = append(args, strings.Split(device, " ")...)
|
args = append(args, strings.Split(device, " ")...)
|
||||||
out, e := runCmd(timeout, usesudo, smartctl, args...)
|
out, e := runCmd(m.Timeout, m.UseSudo, m.PathSmartctl, args...)
|
||||||
outStr := string(out)
|
outStr := string(out)
|
||||||
|
|
||||||
// Ignore all exit statuses except if it is a command line parse error
|
// Ignore all exit statuses except if it is a command line parse error
|
||||||
exitStatus, er := exitStatus(e)
|
exitStatus, er := exitStatus(e)
|
||||||
if er != nil {
|
if er != nil {
|
||||||
acc.AddError(fmt.Errorf("failed to run command '%s %s': %s - %s", smartctl, strings.Join(args, " "), e, outStr))
|
acc.AddError(fmt.Errorf("failed to run command '%s %s': %s - %s", m.PathSmartctl, strings.Join(args, " "), e, outStr))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -712,7 +823,7 @@ func gatherDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo, coll
|
||||||
tags := map[string]string{}
|
tags := map[string]string{}
|
||||||
fields := make(map[string]interface{})
|
fields := make(map[string]interface{})
|
||||||
|
|
||||||
if collectAttributes {
|
if m.Attributes {
|
||||||
//add power mode
|
//add power mode
|
||||||
keys := [...]string{"device", "model", "serial_no", "wwn", "capacity", "enabled", "power"}
|
keys := [...]string{"device", "model", "serial_no", "wwn", "capacity", "enabled", "power"}
|
||||||
for _, key := range keys {
|
for _, key := range keys {
|
||||||
|
|
@ -724,8 +835,8 @@ func gatherDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo, coll
|
||||||
|
|
||||||
attr := attribute.FindStringSubmatch(line)
|
attr := attribute.FindStringSubmatch(line)
|
||||||
if len(attr) > 1 {
|
if len(attr) > 1 {
|
||||||
// attribute has been found, add it only if collectAttributes is true
|
// attribute has been found, add it only if m.Attributes is true
|
||||||
if collectAttributes {
|
if m.Attributes {
|
||||||
tags["id"] = attr[1]
|
tags["id"] = attr[1]
|
||||||
tags["name"] = attr[2]
|
tags["name"] = attr[2]
|
||||||
tags["flags"] = attr[3]
|
tags["flags"] = attr[3]
|
||||||
|
|
@ -758,8 +869,8 @@ func gatherDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo, coll
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// what was found is not a vendor attribute
|
// what was found is not a vendor attribute
|
||||||
if matches := sasNvmeAttr.FindStringSubmatch(line); len(matches) > 2 {
|
if matches := sasNVMeAttr.FindStringSubmatch(line); len(matches) > 2 {
|
||||||
if attr, ok := sasNvmeAttributes[matches[1]]; ok {
|
if attr, ok := sasNVMeAttributes[matches[1]]; ok {
|
||||||
tags["name"] = attr.Name
|
tags["name"] = attr.Name
|
||||||
if attr.ID != "" {
|
if attr.ID != "" {
|
||||||
tags["id"] = attr.ID
|
tags["id"] = attr.ID
|
||||||
|
|
@ -774,8 +885,8 @@ func gatherDisk(acc telegraf.Accumulator, timeout config.Duration, usesudo, coll
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// if the field is classified as an attribute, only add it
|
// if the field is classified as an attribute, only add it
|
||||||
// if collectAttributes is true
|
// if m.Attributes is true
|
||||||
if collectAttributes {
|
if m.Attributes {
|
||||||
acc.AddFields("smart_attribute", fields, tags)
|
acc.AddFields("smart_attribute", fields, tags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -972,13 +1083,13 @@ func parseTemperatureSensor(fields, _ map[string]interface{}, str string) error
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func validatePath(path string) error {
|
func validatePath(filePath string) error {
|
||||||
pathInfo, err := os.Stat(path)
|
pathInfo, err := os.Stat(filePath)
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(err) {
|
||||||
return fmt.Errorf("provided path does not exist: [%s]", path)
|
return fmt.Errorf("provided path does not exist: [%s]", filePath)
|
||||||
}
|
}
|
||||||
if mode := pathInfo.Mode(); !mode.IsRegular() {
|
if mode := pathInfo.Mode(); !mode.IsRegular() {
|
||||||
return fmt.Errorf("provided path does not point to a regular file: [%s]", path)
|
return fmt.Errorf("provided path does not point to a regular file: [%s]", filePath)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,11 +24,11 @@ func TestGatherAttributes(t *testing.T) {
|
||||||
if args[0] == "--info" && args[7] == "/dev/ada0" {
|
if args[0] == "--info" && args[7] == "/dev/ada0" {
|
||||||
return []byte(mockInfoAttributeData), nil
|
return []byte(mockInfoAttributeData), nil
|
||||||
} else if args[0] == "--info" && args[7] == "/dev/nvme0" {
|
} else if args[0] == "--info" && args[7] == "/dev/nvme0" {
|
||||||
return []byte(smartctlNvmeInfoData), nil
|
return []byte(smartctlNVMeInfoData), nil
|
||||||
} else if args[0] == "--scan" && len(args) == 1 {
|
} else if args[0] == "--scan" && len(args) == 1 {
|
||||||
return []byte(mockScanData), nil
|
return []byte(mockScanData), nil
|
||||||
} else if args[0] == "--scan" && len(args) >= 2 && args[1] == "--device=nvme" {
|
} else if args[0] == "--scan" && len(args) >= 2 && args[1] == "--device=nvme" {
|
||||||
return []byte(mockScanNvmeData), nil
|
return []byte(mockScanNVMeData), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, errors.New("command not found")
|
return nil, errors.New("command not found")
|
||||||
|
|
@ -45,7 +45,7 @@ func TestGatherAttributes(t *testing.T) {
|
||||||
s.PathSmartctl = "smartctl"
|
s.PathSmartctl = "smartctl"
|
||||||
s.PathNVMe = ""
|
s.PathNVMe = ""
|
||||||
|
|
||||||
t.Run("Only non nvme device", func(t *testing.T) {
|
t.Run("Only non NVMe device", func(t *testing.T) {
|
||||||
s.Devices = []string{"/dev/ada0"}
|
s.Devices = []string{"/dev/ada0"}
|
||||||
var acc testutil.Accumulator
|
var acc testutil.Accumulator
|
||||||
|
|
||||||
|
|
@ -62,7 +62,7 @@ func TestGatherAttributes(t *testing.T) {
|
||||||
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
t.Run("Only nvme device", func(t *testing.T) {
|
t.Run("Only NVMe device", func(t *testing.T) {
|
||||||
s.Devices = []string{"/dev/nvme0"}
|
s.Devices = []string{"/dev/nvme0"}
|
||||||
var acc testutil.Accumulator
|
var acc testutil.Accumulator
|
||||||
|
|
||||||
|
|
@ -71,12 +71,78 @@ func TestGatherAttributes(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, 32, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 32, acc.NFields(), "Wrong number of fields gathered")
|
||||||
|
|
||||||
testutil.RequireMetricsEqual(t, testSmartctlNvmeAttributes, acc.GetTelegrafMetrics(),
|
testutil.RequireMetricsEqual(t, testSmartctlNVMeAttributes, acc.GetTelegrafMetrics(),
|
||||||
testutil.SortMetrics(), testutil.IgnoreTime())
|
testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGatherInParallelMode(t *testing.T) {
|
||||||
|
s := newSmart()
|
||||||
|
s.Attributes = true
|
||||||
|
s.PathSmartctl = "smartctl"
|
||||||
|
s.PathNVMe = "nvmeIdentifyController"
|
||||||
|
s.EnableExtensions = append(s.EnableExtensions, "auto-on")
|
||||||
|
s.Devices = []string{"/dev/nvme0"}
|
||||||
|
|
||||||
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
|
if len(args) > 0 {
|
||||||
|
if args[0] == "--info" && args[7] == "/dev/ada0" {
|
||||||
|
return []byte(mockInfoAttributeData), nil
|
||||||
|
} else if args[0] == "--info" && args[7] == "/dev/nvmeIdentifyController" {
|
||||||
|
return []byte(smartctlNVMeInfoData), nil
|
||||||
|
} else if args[0] == "--scan" && len(args) == 1 {
|
||||||
|
return []byte(mockScanData), nil
|
||||||
|
} else if args[0] == "--scan" && len(args) >= 2 && args[1] == "--device=nvme" {
|
||||||
|
return []byte(mockScanNVMeData), nil
|
||||||
|
} else if args[0] == "intel" && args[1] == "smart-log-add" {
|
||||||
|
return []byte(nvmeIntelInfoDataMetricsFormat), nil
|
||||||
|
} else if args[0] == "id-ctrl" {
|
||||||
|
return []byte(nvmeIdentifyController), nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, errors.New("command not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("Gather NVMe device info in goroutine", func(t *testing.T) {
|
||||||
|
acc := &testutil.Accumulator{}
|
||||||
|
s.ReadMethod = "concurrent"
|
||||||
|
|
||||||
|
err := s.Gather(acc)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
result := acc.GetTelegrafMetrics()
|
||||||
|
testutil.RequireMetricsEqual(t, testIntelNVMeNewFormatAttributes, result,
|
||||||
|
testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Gather NVMe device info sequentially", func(t *testing.T) {
|
||||||
|
acc := &testutil.Accumulator{}
|
||||||
|
s.ReadMethod = "sequential"
|
||||||
|
|
||||||
|
err := s.Gather(acc)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
result := acc.GetTelegrafMetrics()
|
||||||
|
testutil.RequireMetricsEqual(t, testIntelNVMeNewFormatAttributes, result,
|
||||||
|
testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Gather NVMe device info - not known read method", func(t *testing.T) {
|
||||||
|
acc := &testutil.Accumulator{}
|
||||||
|
s.ReadMethod = "horizontally"
|
||||||
|
|
||||||
|
err := s.Init()
|
||||||
|
require.Error(t, err)
|
||||||
|
|
||||||
|
err = s.Gather(acc)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
result := acc.GetTelegrafMetrics()
|
||||||
|
testutil.RequireMetricsEqual(t, []telegraf.Metric{}, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func TestGatherNoAttributes(t *testing.T) {
|
func TestGatherNoAttributes(t *testing.T) {
|
||||||
s := newSmart()
|
s := newSmart()
|
||||||
s.Attributes = false
|
s.Attributes = false
|
||||||
|
|
@ -90,9 +156,9 @@ func TestGatherNoAttributes(t *testing.T) {
|
||||||
} else if args[0] == "--info" && args[7] == "/dev/ada0" {
|
} else if args[0] == "--info" && args[7] == "/dev/ada0" {
|
||||||
return []byte(mockInfoAttributeData), nil
|
return []byte(mockInfoAttributeData), nil
|
||||||
} else if args[0] == "--info" && args[7] == "/dev/nvme0" {
|
} else if args[0] == "--info" && args[7] == "/dev/nvme0" {
|
||||||
return []byte(smartctlNvmeInfoData), nil
|
return []byte(smartctlNVMeInfoData), nil
|
||||||
} else if args[0] == "--scan" && args[1] == "--device=nvme" {
|
} else if args[0] == "--scan" && args[1] == "--device=nvme" {
|
||||||
return []byte(mockScanNvmeData), nil
|
return []byte(mockScanNVMeData), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil, errors.New("command not found")
|
return nil, errors.New("command not found")
|
||||||
|
|
@ -111,7 +177,7 @@ func TestGatherNoAttributes(t *testing.T) {
|
||||||
for _, test := range testsAda0Device {
|
for _, test := range testsAda0Device {
|
||||||
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
||||||
}
|
}
|
||||||
for _, test := range testNvmeDevice {
|
for _, test := range testNVMeDevice {
|
||||||
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
@ -123,6 +189,16 @@ func TestExcludedDev(t *testing.T) {
|
||||||
assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.")
|
assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
sampleSmart = Smart{
|
||||||
|
PathSmartctl: "",
|
||||||
|
Nocheck: "",
|
||||||
|
Attributes: true,
|
||||||
|
UseSudo: true,
|
||||||
|
Timeout: config.Duration(time.Second * 30),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
func TestGatherSATAInfo(t *testing.T) {
|
func TestGatherSATAInfo(t *testing.T) {
|
||||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
return []byte(hgstSATAInfoData), nil
|
return []byte(hgstSATAInfoData), nil
|
||||||
|
|
@ -134,7 +210,8 @@ func TestGatherSATAInfo(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "", wg)
|
|
||||||
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
@ -150,7 +227,7 @@ func TestGatherSATAInfo65(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
@ -166,7 +243,7 @@ func TestGatherHgstSAS(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
@ -182,7 +259,7 @@ func TestGatherHtSAS(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
|
|
||||||
testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime())
|
testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
}
|
}
|
||||||
|
|
@ -198,7 +275,7 @@ func TestGatherSSD(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
@ -214,14 +291,14 @@ func TestGatherSSDRaid(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "", wg)
|
sampleSmart.gatherDisk(acc, "", wg)
|
||||||
assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered")
|
assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered")
|
||||||
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
|
assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGatherNvme(t *testing.T) {
|
func TestGatherNVMe(t *testing.T) {
|
||||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
return []byte(smartctlNvmeInfoData), nil
|
return []byte(smartctlNVMeInfoData), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
@ -230,15 +307,15 @@ func TestGatherNvme(t *testing.T) {
|
||||||
)
|
)
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "nvme0", wg)
|
sampleSmart.gatherDisk(acc, "nvme0", wg)
|
||||||
|
|
||||||
testutil.RequireMetricsEqual(t, testSmartctlNvmeAttributes, acc.GetTelegrafMetrics(),
|
testutil.RequireMetricsEqual(t, testSmartctlNVMeAttributes, acc.GetTelegrafMetrics(),
|
||||||
testutil.SortMetrics(), testutil.IgnoreTime())
|
testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGatherIntelNvme(t *testing.T) {
|
func TestGatherIntelNVMeMetrics(t *testing.T) {
|
||||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
return []byte(nvmeIntelInfoData), nil
|
return []byte(nvmeIntelInfoDataMetricsFormat), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
@ -255,17 +332,40 @@ func TestGatherIntelNvme(t *testing.T) {
|
||||||
gatherIntelNVMeDisk(acc, config.Duration(time.Second*30), true, "", device, wg)
|
gatherIntelNVMeDisk(acc, config.Duration(time.Second*30), true, "", device, wg)
|
||||||
|
|
||||||
result := acc.GetTelegrafMetrics()
|
result := acc.GetTelegrafMetrics()
|
||||||
testutil.RequireMetricsEqual(t, testIntelInvmeAttributes, result,
|
testutil.RequireMetricsEqual(t, testIntelNVMeNewFormatAttributes, result,
|
||||||
|
testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGatherIntelNVMeDeprecatedFormatMetrics(t *testing.T) {
|
||||||
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
|
return []byte(nvmeIntelInfoDataDeprecatedMetricsFormat), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
acc = &testutil.Accumulator{}
|
||||||
|
wg = &sync.WaitGroup{}
|
||||||
|
device = nvmeDevice{
|
||||||
|
name: "nvme0",
|
||||||
|
model: mockModel,
|
||||||
|
serialNumber: mockSerial,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
gatherIntelNVMeDisk(acc, config.Duration(time.Second*30), true, "", device, wg)
|
||||||
|
|
||||||
|
result := acc.GetTelegrafMetrics()
|
||||||
|
testutil.RequireMetricsEqual(t, testIntelNVMeAttributes, result,
|
||||||
testutil.SortMetrics(), testutil.IgnoreTime())
|
testutil.SortMetrics(), testutil.IgnoreTime())
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_findVIDFromNVMeOutput(t *testing.T) {
|
func Test_findVIDFromNVMeOutput(t *testing.T) {
|
||||||
vid, sn, mn, err := findNVMeDeviceInfo(nvmeIdentifyController)
|
device, err := findNVMeDeviceInfo(nvmeIdentifyController)
|
||||||
|
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
assert.Equal(t, "0x8086", vid)
|
assert.Equal(t, "0x8086", device.vendorID)
|
||||||
assert.Equal(t, "CVFT5123456789ABCD", sn)
|
assert.Equal(t, "CVFT5123456789ABCD", device.serialNumber)
|
||||||
assert.Equal(t, "INTEL SSDPEDABCDEFG", mn)
|
assert.Equal(t, "INTEL SSDPEDABCDEFG", device.model)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_checkForNVMeDevices(t *testing.T) {
|
func Test_checkForNVMeDevices(t *testing.T) {
|
||||||
|
|
@ -293,7 +393,7 @@ func Test_difference(t *testing.T) {
|
||||||
|
|
||||||
func Test_integerOverflow(t *testing.T) {
|
func Test_integerOverflow(t *testing.T) {
|
||||||
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
runCmd = func(timeout config.Duration, sudo bool, command string, args ...string) ([]byte, error) {
|
||||||
return []byte(smartctlNvmeInfoDataWithOverflow), nil
|
return []byte(smartctlNVMeInfoDataWithOverflow), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
@ -303,7 +403,8 @@ func Test_integerOverflow(t *testing.T) {
|
||||||
|
|
||||||
t.Run("If data raw_value is out of int64 range, there should be no metrics for that attribute", func(t *testing.T) {
|
t.Run("If data raw_value is out of int64 range, there should be no metrics for that attribute", func(t *testing.T) {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
gatherDisk(acc, config.Duration(time.Second*30), true, true, "", "", "nvme0", wg)
|
|
||||||
|
sampleSmart.gatherDisk(acc, "nvme0", wg)
|
||||||
|
|
||||||
result := acc.GetTelegrafMetrics()
|
result := acc.GetTelegrafMetrics()
|
||||||
testutil.RequireMetricsEqual(t, testOverflowAttributes, result,
|
testutil.RequireMetricsEqual(t, testOverflowAttributes, result,
|
||||||
|
|
@ -656,7 +757,7 @@ var (
|
||||||
mockModel = "INTEL SSDPEDABCDEFG"
|
mockModel = "INTEL SSDPEDABCDEFG"
|
||||||
mockSerial = "CVFT5123456789ABCD"
|
mockSerial = "CVFT5123456789ABCD"
|
||||||
|
|
||||||
testSmartctlNvmeAttributes = []telegraf.Metric{
|
testSmartctlNVMeAttributes = []telegraf.Metric{
|
||||||
testutil.MustMetric("smart_device",
|
testutil.MustMetric("smart_device",
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"device": "nvme0",
|
"device": "nvme0",
|
||||||
|
|
@ -1045,7 +1146,7 @@ var (
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
testNvmeDevice = []struct {
|
testNVMeDevice = []struct {
|
||||||
fields map[string]interface{}
|
fields map[string]interface{}
|
||||||
tags map[string]string
|
tags map[string]string
|
||||||
}{
|
}{
|
||||||
|
|
@ -1063,7 +1164,7 @@ var (
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
testIntelInvmeAttributes = []telegraf.Metric{
|
testIntelNVMeAttributes = []telegraf.Metric{
|
||||||
testutil.MustMetric("smart_attribute",
|
testutil.MustMetric("smart_attribute",
|
||||||
map[string]string{
|
map[string]string{
|
||||||
"device": "nvme0",
|
"device": "nvme0",
|
||||||
|
|
@ -1257,11 +1358,146 @@ var (
|
||||||
time.Now(),
|
time.Now(),
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
testIntelNVMeNewFormatAttributes = []telegraf.Metric{
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Program_Fail_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Erase_Fail_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Wear_Leveling_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": int64(700090417315),
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "End_To_End_Error_Detection_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Crc_Error_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 13,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Media_Wear_Percentage",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 552,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Host_Reads",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 73,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Timed_Workload_Timer",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": int64(2343038),
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Thermal_Throttle_Status",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Retry_Buffer_Overflow_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
testutil.MustMetric("smart_attribute",
|
||||||
|
map[string]string{
|
||||||
|
"device": "nvme0",
|
||||||
|
"serial_no": mockSerial,
|
||||||
|
"model": mockModel,
|
||||||
|
"name": "Pll_Lock_Loss_Count",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"raw_value": 0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
}
|
||||||
// smartctl --scan
|
// smartctl --scan
|
||||||
mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device`
|
mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device`
|
||||||
|
|
||||||
// smartctl --scan -d nvme
|
// smartctl --scan -d nvme
|
||||||
mockScanNvmeData = `/dev/nvme0 -d nvme # /dev/nvme0, NVMe device`
|
mockScanNVMeData = `/dev/nvme0 -d nvme # /dev/nvme0, NVMe device`
|
||||||
|
|
||||||
// smartctl --info --health --attributes --tolerance=verypermissive -n standby --format=brief [DEVICE]
|
// smartctl --info --health --attributes --tolerance=verypermissive -n standby --format=brief [DEVICE]
|
||||||
mockInfoAttributeData = `smartctl 6.5 2016-05-07 r4318 [Darwin 16.4.0 x86_64] (local build)
|
mockInfoAttributeData = `smartctl 6.5 2016-05-07 r4318 [Darwin 16.4.0 x86_64] (local build)
|
||||||
|
|
@ -1670,7 +1906,7 @@ Selective self-test flags (0x0):
|
||||||
After scanning selected spans, do NOT read-scan remainder of disk.
|
After scanning selected spans, do NOT read-scan remainder of disk.
|
||||||
If Selective self-test is pending on power-up, resume after 0 minute delay.
|
If Selective self-test is pending on power-up, resume after 0 minute delay.
|
||||||
`
|
`
|
||||||
smartctlNvmeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build)
|
smartctlNVMeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build)
|
||||||
Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org
|
Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org
|
||||||
|
|
||||||
=== START OF INFORMATION SECTION ===
|
=== START OF INFORMATION SECTION ===
|
||||||
|
|
@ -1720,14 +1956,14 @@ Temperature Sensor 7: 44 C
|
||||||
Temperature Sensor 8: 43 C
|
Temperature Sensor 8: 43 C
|
||||||
`
|
`
|
||||||
|
|
||||||
smartctlNvmeInfoDataWithOverflow = `
|
smartctlNVMeInfoDataWithOverflow = `
|
||||||
Temperature Sensor 1: 9223372036854775808 C
|
Temperature Sensor 1: 9223372036854775808 C
|
||||||
Temperature Sensor 2: -9223372036854775809 C
|
Temperature Sensor 2: -9223372036854775809 C
|
||||||
Temperature Sensor 3: 9223372036854775807 C
|
Temperature Sensor 3: 9223372036854775807 C
|
||||||
Temperature Sensor 4: -9223372036854775808 C
|
Temperature Sensor 4: -9223372036854775808 C
|
||||||
`
|
`
|
||||||
|
|
||||||
nvmeIntelInfoData = `Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff
|
nvmeIntelInfoDataDeprecatedMetricsFormat = `Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff
|
||||||
key normalized raw
|
key normalized raw
|
||||||
program_fail_count : 100% 0
|
program_fail_count : 100% 0
|
||||||
erase_fail_count : 100% 0
|
erase_fail_count : 100% 0
|
||||||
|
|
@ -1742,6 +1978,20 @@ retry_buffer_overflow_count : 100% 0
|
||||||
pll_lock_loss_count : 100% 0
|
pll_lock_loss_count : 100% 0
|
||||||
nand_bytes_written : 0% sectors: 0
|
nand_bytes_written : 0% sectors: 0
|
||||||
host_bytes_written : 0% sectors: 0
|
host_bytes_written : 0% sectors: 0
|
||||||
|
`
|
||||||
|
nvmeIntelInfoDataMetricsFormat = `Additional Smart Log for NVME device:nvme0n1 namespace-id:ffffffff
|
||||||
|
ID KEY Normalized Raw
|
||||||
|
0xab program_fail_count 100 0
|
||||||
|
0xac erase_fail_count 100 0
|
||||||
|
0xad wear_leveling_count 100 700090417315
|
||||||
|
0xb8 e2e_error_detect_count 100 0
|
||||||
|
0xc7 crc_error_count 100 13
|
||||||
|
0xe2 media_wear_percentage 100 552
|
||||||
|
0xe3 host_reads 100 73
|
||||||
|
0xe4 timed_work_load 100 2343038
|
||||||
|
0xea thermal_throttle_status 100 0
|
||||||
|
0xf0 retry_buff_overflow_count 100 0
|
||||||
|
0xf3 pll_lock_loss_counter 100 0
|
||||||
`
|
`
|
||||||
|
|
||||||
nvmeIdentifyController = `NVME Identify Controller:
|
nvmeIdentifyController = `NVME Identify Controller:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue