diff --git a/plugins/inputs/smart/README.md b/plugins/inputs/smart/README.md index 47320aeac..dec58e3f9 100644 --- a/plugins/inputs/smart/README.md +++ b/plugins/inputs/smart/README.md @@ -1,6 +1,6 @@ # S.M.A.R.T. Input Plugin -Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs)[1] that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures. +Get metrics using the command line utility `smartctl` for S.M.A.R.T. (Self-Monitoring, Analysis and Reporting Technology) storage devices. SMART is a monitoring system included in computer hard disk drives (HDDs) and solid-state drives (SSDs) that detects and reports on various indicators of drive reliability, with the intent of enabling the anticipation of hardware failures. See smartmontools (https://www.smartmontools.org/). SMART information is separated between different measurements: `smart_device` is used for general information, while `smart_attribute` stores the detailed attribute information if `attributes = true` is enabled in the plugin configuration. @@ -19,57 +19,98 @@ smartctl --info --attributes --health -n --format=brief This plugin supports _smartmontools_ version 5.41 and above, but v. 5.41 and v. 5.42 might require setting `nocheck`, see the comment in the sample configuration. +Also, NVMe capabilities were introduced in version 6.5. To enable SMART on a storage device run: ``` smartctl -s on ``` +## NVMe vendor specific attributes -### Configuration +For NVMe disk type, plugin can use command line utility `nvme-cli`. It has a feature +to easy access a vendor specific attributes. +This plugin supports nmve-cli version 1.5 and above (https://github.com/linux-nvme/nvme-cli). +In case of `nvme-cli` absence NVMe vendor specific metrics will not be obtained. + +Vendor specific SMART metrics for NVMe disks may be reported from the following `nvme` command: + +``` +nvme smart-log-add +``` + +Note that vendor plugins for `nvme-cli` could require different naming convention and report format. + +To see installed plugin extensions, depended on the nvme-cli version, look at the bottom of: +``` +nvme help +``` + +To gather disk vendor id (vid) `id-ctrl` could be used: +``` +nvme id-ctrl +``` +Association between a vid and company can be found there: https://pcisig.com/membership/member-companies. + +Devices affiliation to being NVMe or non NVMe will be determined thanks to: +``` +smartctl --scan +``` +and: +``` +smartctl --scan -d nvme +``` + +## Configuration ```toml # Read metrics from storage devices supporting S.M.A.R.T. [[inputs.smart]] - ## Optionally specify the path to the smartctl executable - # path = "/usr/bin/smartctl" - - ## On most platforms smartctl requires root access. - ## Setting 'use_sudo' to true will make use of sudo to run smartctl. - ## Sudo must be configured to to allow the telegraf user to run smartctl - ## without a password. - # use_sudo = false - - ## Skip checking disks in this power mode. Defaults to - ## "standby" to not wake up disks that have stoped rotating. - ## See --nocheck in the man pages for smartctl. - ## smartctl version 5.41 and 5.42 have faulty detection of - ## power mode and might require changing this value to - ## "never" depending on your disks. - # nocheck = "standby" - - ## Gather all returned S.M.A.R.T. attribute metrics and the detailed - ## information from each drive into the `smart_attribute` measurement. - # attributes = false - - ## Optionally specify devices to exclude from reporting. - # excludes = [ "/dev/pass6" ] - - ## Optionally specify devices and device type, if unset - ## a scan (smartctl --scan) for S.M.A.R.T. devices will - ## done and all found will be included except for the - ## excluded in excludes. - # devices = [ "/dev/ada0 -d atacam" ] - - ## Timeout for the smartctl command to complete. - # timeout = "30s" + ## Optionally specify the path to the smartctl executable + # path_smartctl = "/usr/bin/smartctl" + + ## Optionally specify the path to the nvme-cli executable + # path_nvme = "/usr/bin/nvme" + + ## Optionally specify if vendor specific attributes should be propagated for NVMe disk case + ## ["auto-on"] - automatically find and enable additional vendor specific disk info + ## ["vendor1", "vendor2", ...] - e.g. "Intel" enable additional Intel specific disk info + # enable_extensions = ["auto-on"] + + ## On most platforms used cli utilities requires root access. + ## Setting 'use_sudo' to true will make use of sudo to run smartctl or nvme-cli. + ## Sudo must be configured to allow the telegraf user to run smartctl or nvme-cli + ## without a password. + # use_sudo = false + + ## Skip checking disks in this power mode. Defaults to + ## "standby" to not wake up disks that have stopped rotating. + ## See --nocheck in the man pages for smartctl. + ## smartctl version 5.41 and 5.42 have faulty detection of + ## power mode and might require changing this value to + ## "never" depending on your disks. + # nocheck = "standby" + + ## Gather all returned S.M.A.R.T. attribute metrics and the detailed + ## information from each drive into the 'smart_attribute' measurement. + # attributes = false + + ## Optionally specify devices to exclude from reporting if disks auto-discovery is performed. + # excludes = [ "/dev/pass6" ] + + ## Optionally specify devices and device type, if unset + ## a scan (smartctl --scan and smartctl --scan -d nvme) for S.M.A.R.T. devices will be done + ## and all found will be included except for the excluded in excludes. + # devices = [ "/dev/ada0 -d atacam", "/dev/nvme0"] + + ## Timeout for the cli command to complete. + # timeout = "30s" ``` -### Permissions - -It's important to note that this plugin references smartctl, which may require additional permissions to execute successfully. -Depending on the user/group permissions of the telegraf user executing this plugin, you may need to use sudo. +## Permissions +It's important to note that this plugin references smartctl and nvme-cli, which may require additional permissions to execute successfully. +Depending on the user/group permissions of the telegraf user executing this plugin, you may need to use sudo. You will need the following in your telegraf config: ```toml @@ -80,13 +121,20 @@ You will need the following in your telegraf config: You will also need to update your sudoers file: ```bash $ visudo -# Add the following line: +# For smartctl add the following lines: Cmnd_Alias SMARTCTL = /usr/bin/smartctl telegraf ALL=(ALL) NOPASSWD: SMARTCTL Defaults!SMARTCTL !logfile, !syslog, !pam_session -``` -### Metrics +# For nvme-cli add the following lines: +Cmnd_Alias NVME = /path/to/nvme +telegraf ALL=(ALL) NOPASSWD: NVME +Defaults!NVME !logfile, !syslog, !pam_session +``` +To run smartctl or nvme with `sudo` wrapper script can be created. `path_smartctl` or +`path_nvme` in the configuration should be set to execute this script. + +## Metrics - smart_device: - tags: @@ -135,37 +183,44 @@ The interpretation of the tag `flags` is: #### Exit Status -The `exit_status` field captures the exit status of the smartctl command which +The `exit_status` field captures the exit status of the used cli utilities command which is defined by a bitmask. For the interpretation of the bitmask see the man page for -smartctl. - -#### Device Names +smartctl or nvme-cli. +## Device Names Device names, e.g., `/dev/sda`, are *not persistent*, and may be -subject to change across reboots or system changes. Instead, you can the +subject to change across reboots or system changes. Instead, you can use the *World Wide Name* (WWN) or serial number to identify devices. On Linux block devices can be referenced by the WWN in the following location: `/dev/disk/by-id/`. - -To run `smartctl` with `sudo` create a wrapper script and use `path` in -the configuration to execute that. - -### Troubleshooting +## Troubleshooting +If you expect to see more SMART metrics than this plugin shows, be sure to use a proper version +of smartctl or nvme-cli utility which has the functionality to gather desired data. Also, check +your device capability because not every SMART metrics are mandatory. +For example the number of temperature sensors depends on the device specification. If this plugin is not working as expected for your SMART enabled device, please run these commands and include the output in a bug report: + +For non NVMe devices (from smartctl version >= 7.0 this will also return NVMe devices by default): ``` smartctl --scan ``` - +For NVMe devices: +``` +smartctl --scan -d nvme +``` Run the following command replacing your configuration setting for NOCHECK and -the DEVICE from the previous command: +the DEVICE (name of the device could be taken from the previous command): ``` smartctl --info --health --attributes --tolerance=verypermissive --nocheck NOCHECK --format=brief -d DEVICE ``` - -### Example Output - +If you try to gather vendor specific metrics, please provide this commad +and replace vendor and device to match your case: +``` +nvme VENDOR smart-log-add DEVICE +``` +## Example SMART Plugin Outputs ``` smart_device,enabled=Enabled,host=mbpro.local,device=rdisk0,model=APPLE\ SSD\ SM0512F,serial_no=S1K5NYCD964433,wwn=5002538655584d30,capacity=500277790720 udma_crc_errors=0i,exit_status=0i,health_ok=true,read_error_rate=0i,temp_c=40i 1502536854000000000 smart_attribute,capacity=500277790720,device=rdisk0,enabled=Enabled,fail=-,flags=-O-RC-,host=mbpro.local,id=199,model=APPLE\ SSD\ SM0512F,name=UDMA_CRC_Error_Count,serial_no=S1K5NYCD964433,wwn=5002538655584d30 exit_status=0i,raw_value=0i,threshold=0i,value=200i,worst=200i 1502536854000000000 diff --git a/plugins/inputs/smart/smart.go b/plugins/inputs/smart/smart.go index 63d16aad3..0c7924027 100644 --- a/plugins/inputs/smart/smart.go +++ b/plugins/inputs/smart/smart.go @@ -3,6 +3,7 @@ package smart import ( "bufio" "fmt" + "os" "os/exec" "path" "regexp" @@ -11,12 +12,15 @@ import ( "sync" "syscall" "time" + "unicode" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" ) +const IntelVID = "0x8086" + var ( // Device Model: APPLE SSD SM256E // Product: HUH721212AL5204 @@ -27,7 +31,7 @@ var ( // LU WWN Device Id: 5 002538 655584d30 wwnInfo = regexp.MustCompile("^LU WWN Device Id:\\s+(.*)$") // User Capacity: 251,000,193,024 bytes [251 GB] - usercapacityInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") + userCapacityInfo = regexp.MustCompile("^User Capacity:\\s+([0-9,]+)\\s+bytes.*$") // SMART support is: Enabled smartEnabledInfo = regexp.MustCompile("^SMART support is:\\s+(\\w+)$") // SMART overall-health self-assessment test result: PASSED @@ -44,6 +48,15 @@ var ( // 192 Power-Off_Retract_Count -O--C- 097 097 000 - 14716 attribute = regexp.MustCompile("^\\s*([0-9]+)\\s(\\S+)\\s+([-P][-O][-S][-R][-C][-K])\\s+([0-9]+)\\s+([0-9]+)\\s+([0-9-]+)\\s+([-\\w]+)\\s+([\\w\\+\\.]+).*$") + // Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff + // key normalized raw + // program_fail_count : 100% 0 + intelExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\w\s]+)%(.+)`) + + // vid : 0x8086 + // sn : CFGT53260XSP8011P + nvmeIdCtrlExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\s\w]+)`) + deviceFieldIds = map[string]string{ "1": "read_error_rate", "7": "seek_error_rate", @@ -52,6 +65,7 @@ var ( "199": "udma_crc_errors", } + // to obtain metrics from smartctl sasNvmeAttributes = map[string]struct { ID string Name string @@ -146,31 +160,154 @@ var ( Name: "Critical_Temperature_Time", Parse: parseCommaSeparatedInt, }, + "Thermal Temp. 1 Transition Count": { + Name: "Thermal_Management_T1_Trans_Count", + Parse: parseCommaSeparatedInt, + }, + "Thermal Temp. 2 Transition Count": { + Name: "Thermal_Management_T2_Trans_Count", + Parse: parseCommaSeparatedInt, + }, + "Thermal Temp. 1 Total Time": { + Name: "Thermal_Management_T1_Total_Time", + Parse: parseCommaSeparatedInt, + }, + "Thermal Temp. 2 Total Time": { + Name: "Thermal_Management_T2_Total_Time", + Parse: parseCommaSeparatedInt, + }, + "Temperature Sensor 1": { + Name: "Temperature_Sensor_1", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 2": { + Name: "Temperature_Sensor_2", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 3": { + Name: "Temperature_Sensor_3", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 4": { + Name: "Temperature_Sensor_4", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 5": { + Name: "Temperature_Sensor_5", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 6": { + Name: "Temperature_Sensor_6", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 7": { + Name: "Temperature_Sensor_7", + Parse: parseTemperatureSensor, + }, + "Temperature Sensor 8": { + Name: "Temperature_Sensor_8", + Parse: parseTemperatureSensor, + }, + } + + // to obtain Intel specific metrics from nvme-cli + intelAttributes = map[string]struct { + ID string + Name string + Parse func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error + }{ + "program_fail_count": { + Name: "Program_Fail_Count", + }, + "erase_fail_count": { + Name: "Erase_Fail_Count", + }, + "end_to_end_error_detection_count": { + Name: "End_To_End_Error_Detection_Count", + }, + "crc_error_count": { + Name: "Crc_Error_Count", + }, + "retry_buffer_overflow_count": { + Name: "Retry_Buffer_Overflow_Count", + }, + "wear_leveling": { + Name: "Wear_Leveling", + Parse: parseWearLeveling, + }, + "timed_workload_media_wear": { + Name: "Timed_Workload_Media_Wear", + Parse: parseTimedWorkload, + }, + "timed_workload_host_reads": { + Name: "Timed_Workload_Host_Reads", + Parse: parseTimedWorkload, + }, + "timed_workload_timer": { + Name: "Timed_Workload_Timer", + Parse: func(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + return parseCommaSeparatedIntWithAccumulator(acc, fields, tags, strings.TrimSuffix(str, " min")) + }, + }, + "thermal_throttle_status": { + Name: "Thermal_Throttle_Status", + Parse: parseThermalThrottle, + }, + "pll_lock_loss_count": { + Name: "Pll_Lock_Loss_Count", + }, + "nand_bytes_written": { + Name: "Nand_Bytes_Written", + Parse: parseBytesWritten, + }, + "host_bytes_written": { + Name: "Host_Bytes_Written", + Parse: parseBytesWritten, + }, } ) +type NVMeDevice struct { + name string + vendorID string + model string + serialNumber string +} + type Smart struct { - Path string - Nocheck string - Attributes bool - Excludes []string - Devices []string - UseSudo bool - Timeout internal.Duration + Path string `toml:"path"` //deprecated - to keep backward compatibility + PathSmartctl string `toml:"path_smartctl"` + PathNVMe string `toml:"path_nvme"` + Nocheck string `toml:"nocheck"` + EnableExtensions []string `toml:"enable_extensions"` + Attributes bool `toml:"attributes"` + Excludes []string `toml:"excludes"` + Devices []string `toml:"devices"` + UseSudo bool `toml:"use_sudo"` + Timeout internal.Duration `toml:"timeout"` + Log telegraf.Logger `toml:"-"` } var sampleConfig = ` ## Optionally specify the path to the smartctl executable - # path = "/usr/bin/smartctl" + # path_smartctl = "/usr/bin/smartctl" - ## On most platforms smartctl requires root access. - ## Setting 'use_sudo' to true will make use of sudo to run smartctl. - ## Sudo must be configured to to allow the telegraf user to run smartctl + ## Optionally specify the path to the nvme-cli executable + # path_nvme = "/usr/bin/nvme" + + ## Optionally specify if vendor specific attributes should be propagated for NVMe disk case + ## ["auto-on"] - automatically find and enable additional vendor specific disk info + ## ["vendor1", "vendor2", ...] - e.g. "Intel" enable additional Intel specific disk info + # enable_extensions = ["auto-on"] + + ## On most platforms used cli utilities requires root access. + ## Setting 'use_sudo' to true will make use of sudo to run smartctl or nvme-cli. + ## Sudo must be configured to allow the telegraf user to run smartctl or nvme-cli ## without a password. # use_sudo = false ## Skip checking disks in this power mode. Defaults to - ## "standby" to not wake up disks that have stoped rotating. + ## "standby" to not wake up disks that have stopped rotating. ## See --nocheck in the man pages for smartctl. ## smartctl version 5.41 and 5.42 have faulty detection of ## power mode and might require changing this value to @@ -181,16 +318,15 @@ var sampleConfig = ` ## information from each drive into the 'smart_attribute' measurement. # attributes = false - ## Optionally specify devices to exclude from reporting. + ## Optionally specify devices to exclude from reporting if disks auto-discovery is performed. # excludes = [ "/dev/pass6" ] ## Optionally specify devices and device type, if unset - ## a scan (smartctl --scan) for S.M.A.R.T. devices will - ## done and all found will be included except for the - ## excluded in excludes. - # devices = [ "/dev/ada0 -d atacam" ] + ## a scan (smartctl --scan and smartctl --scan -d nvme) for S.M.A.R.T. devices will be done + ## and all found will be included except for the excluded in excludes. + # devices = [ "/dev/ada0 -d atacam", "/dev/nvme0"] - ## Timeout for the smartctl command to complete. + ## Timeout for the cli command to complete. # timeout = "30s" ` @@ -208,22 +344,159 @@ func (m *Smart) Description() string { return "Read metrics from storage devices supporting S.M.A.R.T." } -func (m *Smart) Gather(acc telegraf.Accumulator) error { - if len(m.Path) == 0 { - return fmt.Errorf("smartctl not found: verify that smartctl is installed and that smartctl is in your PATH") +func (m *Smart) Init() error { + //if deprecated `path` (to smartctl binary) is provided in config and `path_smartctl` override does not exist + if len(m.Path) > 0 && len(m.PathSmartctl) == 0 { + m.PathSmartctl = m.Path } - devices := m.Devices - if len(devices) == 0 { - var err error - devices, err = m.scan() - if err != nil { - return err + //if `path_smartctl` is not provided in config, try to find smartctl binary in PATH + if len(m.PathSmartctl) == 0 { + m.PathSmartctl, _ = exec.LookPath("smartctl") + } + + //if `path_nvme` is not provided in config, try to find nvme binary in PATH + if len(m.PathNVMe) == 0 { + m.PathNVMe, _ = exec.LookPath("nvme") + } + + err := validatePath(m.PathSmartctl) + if err != nil { + m.PathSmartctl = "" + //without smartctl, plugin will not be able to gather basic metrics + return fmt.Errorf("smartctl not found: verify that smartctl is installed and it is in your PATH (or specified in config): %s", err.Error()) + } + + err = validatePath(m.PathNVMe) + if err != nil { + m.PathNVMe = "" + //without nvme, plugin will not be able to gather vendor specific attributes (but it can work without it) + m.Log.Warnf("nvme not found: verify that nvme is installed and it is in your PATH (or specified in config) to gather vendor specific attributes: %s", err.Error()) + } + + return nil +} + +func (m *Smart) Gather(acc telegraf.Accumulator) error { + var err error + var scannedNVMeDevices []string + var scannedNonNVMeDevices []string + + devicesFromConfig := m.Devices + isNVMe := len(m.PathNVMe) != 0 + isVendorExtension := len(m.EnableExtensions) != 0 + + if len(m.Devices) != 0 { + devicesFromConfig = excludeWrongDeviceNames(devicesFromConfig) + + m.getAttributes(acc, devicesFromConfig) + + // if nvme-cli is present, vendor specific attributes can be gathered + if isVendorExtension && isNVMe { + scannedNVMeDevices, scannedNonNVMeDevices, err = m.scanAllDevices(true) + if err != nil { + return err + } + NVMeDevices := distinguishNVMeDevices(devicesFromConfig, scannedNVMeDevices) + + m.getVendorNVMeAttributes(acc, NVMeDevices) + } + return nil + } + scannedNVMeDevices, scannedNonNVMeDevices, err = m.scanAllDevices(false) + if err != nil { + return err + } + var devicesFromScan []string + devicesFromScan = append(devicesFromScan, scannedNVMeDevices...) + devicesFromScan = append(devicesFromScan, scannedNonNVMeDevices...) + + m.getAttributes(acc, devicesFromScan) + if isVendorExtension && isNVMe { + m.getVendorNVMeAttributes(acc, scannedNVMeDevices) + } + return nil +} + +// validate and exclude not correct config device names to avoid unwanted behaviours +func excludeWrongDeviceNames(devices []string) []string { + validSigns := map[string]struct{}{ + " ": {}, + "/": {}, + "\\": {}, + "-": {}, + ",": {}, + } + var wrongDevices []string + + for _, device := range devices { + for _, char := range device { + if unicode.IsLetter(char) || unicode.IsNumber(char) { + continue + } + if _, exist := validSigns[string(char)]; exist { + continue + } + wrongDevices = append(wrongDevices, device) } } + return difference(devices, wrongDevices) +} - m.getAttributes(acc, devices) - return nil +func (m *Smart) scanAllDevices(ignoreExcludes bool) ([]string, []string, error) { + // this will return all devices (including NVMe devices) for smartctl version >= 7.0 + // for older versions this will return non NVMe devices + devices, err := m.scanDevices(ignoreExcludes, "--scan") + if err != nil { + return nil, nil, err + } + + // this will return only NVMe devices + NVMeDevices, err := m.scanDevices(ignoreExcludes, "--scan", "--device=nvme") + if err != nil { + return nil, nil, err + } + + // to handle all versions of smartctl this will return only non NVMe devices + nonNVMeDevices := difference(devices, NVMeDevices) + return NVMeDevices, nonNVMeDevices, nil +} + +func distinguishNVMeDevices(userDevices []string, availableNVMeDevices []string) []string { + var NVMeDevices []string + + for _, userDevice := range userDevices { + for _, NVMeDevice := range availableNVMeDevices { + // double check. E.g. in case when nvme0 is equal nvme0n1, will check if "nvme0" part is present. + if strings.Contains(NVMeDevice, userDevice) || strings.Contains(userDevice, NVMeDevice) { + NVMeDevices = append(NVMeDevices, userDevice) + } + } + } + return NVMeDevices +} + +// Scan for S.M.A.R.T. devices from smartctl +func (m *Smart) scanDevices(ignoreExcludes bool, scanArgs ...string) ([]string, error) { + out, err := runCmd(m.Timeout, m.UseSudo, m.PathSmartctl, scanArgs...) + if err != nil { + return []string{}, fmt.Errorf("failed to run command '%s %s': %s - %s", m.PathSmartctl, scanArgs, err, string(out)) + } + var devices []string + for _, line := range strings.Split(string(out), "\n") { + dev := strings.Split(line, " ") + if len(dev) <= 1 { + continue + } + if !ignoreExcludes { + if !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) { + devices = append(devices, strings.TrimSpace(dev[0])) + } + } else { + devices = append(devices, strings.TrimSpace(dev[0])) + } + } + return devices, nil } // Wrap with sudo @@ -235,23 +508,6 @@ var runCmd = func(timeout internal.Duration, sudo bool, command string, args ... return internal.CombinedOutputTimeout(cmd, timeout.Duration) } -// Scan for S.M.A.R.T. devices -func (m *Smart) scan() ([]string, error) { - out, err := runCmd(m.Timeout, m.UseSudo, m.Path, "--scan") - if err != nil { - return []string{}, fmt.Errorf("failed to run command '%s --scan': %s - %s", m.Path, err, string(out)) - } - - devices := []string{} - for _, line := range strings.Split(string(out), "\n") { - dev := strings.Split(line, " ") - if len(dev) > 1 && !excludedDev(m.Excludes, strings.TrimSpace(dev[0])) { - devices = append(devices, strings.TrimSpace(dev[0])) - } - } - return devices, nil -} - func excludedDev(excludes []string, deviceLine string) bool { device := strings.Split(deviceLine, " ") if len(device) != 0 { @@ -270,21 +526,137 @@ func (m *Smart) getAttributes(acc telegraf.Accumulator, devices []string) { wg.Add(len(devices)) for _, device := range devices { - go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.Path, m.Nocheck, device, &wg) + go gatherDisk(acc, m.Timeout, m.UseSudo, m.Attributes, m.PathSmartctl, m.Nocheck, device, &wg) } wg.Wait() } -// Command line parse errors are denoted by the exit code having the 0 bit set. -// All other errors are drive/communication errors and should be ignored. -func exitStatus(err error) (int, error) { - if exiterr, ok := err.(*exec.ExitError); ok { - if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { - return status.ExitStatus(), nil +func (m *Smart) getVendorNVMeAttributes(acc telegraf.Accumulator, devices []string) { + NVMeDevices := getDeviceInfoForNVMeDisks(acc, devices, m.PathNVMe, m.Timeout, m.UseSudo) + + var wg sync.WaitGroup + + for _, device := range NVMeDevices { + if contains(m.EnableExtensions, "auto-on") { + switch device.vendorID { + case IntelVID: + wg.Add(1) + go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg) + } + } else if contains(m.EnableExtensions, "Intel") && device.vendorID == IntelVID { + wg.Add(1) + go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg) + } + } + wg.Wait() +} + +func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme string, timeout internal.Duration, useSudo bool) []NVMeDevice { + var NVMeDevices []NVMeDevice + + for _, device := range devices { + vid, sn, mn, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo) + if err != nil { + acc.AddError(fmt.Errorf("cannot find device info for %s device", device)) + continue + } + newDevice := NVMeDevice{ + name: device, + vendorID: vid, + model: mn, + serialNumber: sn, + } + NVMeDevices = append(NVMeDevices, newDevice) + } + return NVMeDevices +} + +func gatherNVMeDeviceInfo(nvme, device string, timeout internal.Duration, useSudo bool) (string, string, string, error) { + args := []string{"id-ctrl"} + args = append(args, strings.Split(device, " ")...) + out, err := runCmd(timeout, useSudo, nvme, args...) + if err != nil { + return "", "", "", err + } + outStr := string(out) + + vid, sn, mn, err := findNVMeDeviceInfo(outStr) + + return vid, sn, mn, err +} + +func findNVMeDeviceInfo(output string) (string, string, string, error) { + scanner := bufio.NewScanner(strings.NewReader(output)) + var vid, sn, mn string + + for scanner.Scan() { + line := scanner.Text() + + if matches := nvmeIdCtrlExpressionPattern.FindStringSubmatch(line); len(matches) > 2 { + matches[1] = strings.TrimSpace(matches[1]) + matches[2] = strings.TrimSpace(matches[2]) + if matches[1] == "vid" { + if _, err := fmt.Sscanf(matches[2], "%s", &vid); err != nil { + return "", "", "", err + } + } + if matches[1] == "sn" { + sn = matches[2] + } + if matches[1] == "mn" { + mn = matches[2] + } + } + } + return vid, sn, mn, nil +} + +func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo bool, nvme string, device NVMeDevice, wg *sync.WaitGroup) { + defer wg.Done() + + args := []string{"intel", "smart-log-add"} + args = append(args, strings.Split(device.name, " ")...) + out, e := runCmd(timeout, usesudo, nvme, args...) + outStr := string(out) + + _, er := exitStatus(e) + if er != nil { + acc.AddError(fmt.Errorf("failed to run command '%s %s': %s - %s", nvme, strings.Join(args, " "), e, outStr)) + return + } + + scanner := bufio.NewScanner(strings.NewReader(outStr)) + + for scanner.Scan() { + line := scanner.Text() + tags := map[string]string{} + fields := make(map[string]interface{}) + + tags["device"] = path.Base(device.name) + tags["model"] = device.model + tags["serial_no"] = device.serialNumber + + if matches := intelExpressionPattern.FindStringSubmatch(line); len(matches) > 3 { + matches[1] = strings.TrimSpace(matches[1]) + matches[3] = strings.TrimSpace(matches[3]) + if attr, ok := intelAttributes[matches[1]]; ok { + tags["name"] = attr.Name + if attr.ID != "" { + tags["id"] = attr.ID + } + + parse := parseCommaSeparatedIntWithAccumulator + if attr.Parse != nil { + parse = attr.Parse + } + + if err := parse(acc, fields, tags, matches[3]); err != nil { + continue + } + } } } - return 0, err } func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, collectAttributes bool, smartctl, nocheck, device string, wg *sync.WaitGroup) { @@ -328,7 +700,7 @@ func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, co deviceTags["wwn"] = strings.Replace(wwn[1], " ", "", -1) } - capacity := usercapacityInfo.FindStringSubmatch(line) + capacity := userCapacityInfo.FindStringSubmatch(line) if len(capacity) > 1 { deviceTags["capacity"] = strings.Replace(capacity[1], ",", "", -1) } @@ -340,7 +712,7 @@ func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, co health := smartOverallHealth.FindStringSubmatch(line) if len(health) > 2 { - deviceFields["health_ok"] = (health[2] == "PASSED" || health[2] == "OK") + deviceFields["health_ok"] = health[2] == "PASSED" || health[2] == "OK" } tags := map[string]string{} @@ -418,6 +790,40 @@ func gatherDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo, co acc.AddFields("smart_device", deviceFields, deviceTags) } +// Command line parse errors are denoted by the exit code having the 0 bit set. +// All other errors are drive/communication errors and should be ignored. +func exitStatus(err error) (int, error) { + if exiterr, ok := err.(*exec.ExitError); ok { + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + return status.ExitStatus(), nil + } + } + return 0, err +} + +func contains(args []string, element string) bool { + for _, arg := range args { + if arg == element { + return true + } + } + return false +} + +func difference(a, b []string) []string { + mb := make(map[string]struct{}, len(b)) + for _, x := range b { + mb[x] = struct{}{} + } + var diff []string + for _, x := range a { + if _, found := mb[x]; !found { + diff = append(diff, x) + } + } + return diff +} + func parseRawValue(rawVal string) (int64, error) { // Integer if i, err := strconv.ParseInt(rawVal, 10, 64); err == nil { @@ -428,7 +834,7 @@ func parseRawValue(rawVal string) (int64, error) { unit := regexp.MustCompile("^(.*)([hms])$") parts := strings.Split(rawVal, "+") if len(parts) == 0 { - return 0, fmt.Errorf("Couldn't parse RAW_VALUE '%s'", rawVal) + return 0, fmt.Errorf("couldn't parse RAW_VALUE '%s'", rawVal) } duration := int64(0) @@ -452,6 +858,63 @@ func parseRawValue(rawVal string) (int64, error) { return duration, nil } +func parseBytesWritten(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var value int64 + + if _, err := fmt.Sscanf(str, "sectors: %d", &value); err != nil { + return err + } + fields["raw_value"] = value + acc.AddFields("smart_attribute", fields, tags) + return nil +} + +func parseThermalThrottle(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var percentage float64 + var count int64 + + if _, err := fmt.Sscanf(str, "%f%%, cnt: %d", &percentage, &count); err != nil { + return err + } + + fields["raw_value"] = percentage + tags["name"] = "Thermal_Throttle_Status_Prc" + acc.AddFields("smart_attribute", fields, tags) + + fields["raw_value"] = count + tags["name"] = "Thermal_Throttle_Status_Cnt" + acc.AddFields("smart_attribute", fields, tags) + + return nil +} + +func parseWearLeveling(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var min, max, avg int64 + + if _, err := fmt.Sscanf(str, "min: %d, max: %d, avg: %d", &min, &max, &avg); err != nil { + return err + } + values := []int64{min, max, avg} + for i, submetricName := range []string{"Min", "Max", "Avg"} { + fields["raw_value"] = values[i] + tags["name"] = fmt.Sprintf("Wear_Leveling_%s", submetricName) + acc.AddFields("smart_attribute", fields, tags) + } + + return nil +} + +func parseTimedWorkload(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + var value float64 + + if _, err := fmt.Sscanf(str, "%f", &value); err != nil { + return err + } + fields["raw_value"] = value + acc.AddFields("smart_attribute", fields, tags) + return nil +} + func parseInt(str string) int64 { if i, err := strconv.ParseInt(str, 10, 64); err == nil { return i @@ -460,6 +923,7 @@ func parseInt(str string) int64 { } func parseCommaSeparatedInt(fields, _ map[string]interface{}, str string) error { + str = strings.Join(strings.Fields(str), "") i, err := strconv.ParseInt(strings.Replace(str, ",", "", -1), 10, 64) if err != nil { return err @@ -479,6 +943,17 @@ func parseDataUnits(fields, deviceFields map[string]interface{}, str string) err return parseCommaSeparatedInt(fields, deviceFields, units) } +func parseCommaSeparatedIntWithAccumulator(acc telegraf.Accumulator, fields map[string]interface{}, tags map[string]string, str string) error { + i, err := strconv.ParseInt(strings.Replace(str, ",", "", -1), 10, 64) + if err != nil { + return err + } + + fields["raw_value"] = i + acc.AddFields("smart_attribute", fields, tags) + return nil +} + func parseTemperature(fields, deviceFields map[string]interface{}, str string) error { var temp int64 if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil { @@ -491,13 +966,34 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e return nil } +func parseTemperatureSensor(fields, deviceFields map[string]interface{}, str string) error { + var temp int64 + if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil { + return err + } + + fields["raw_value"] = temp + + return nil +} + +func validatePath(path string) error { + pathInfo, err := os.Stat(path) + if os.IsNotExist(err) { + return fmt.Errorf("provided path does not exist: [%s]", path) + } + if mode := pathInfo.Mode(); !mode.IsRegular() { + return fmt.Errorf("provided path does not point to a regular file: [%s]", path) + } + return nil +} + func init() { + // Set LC_NUMERIC to uniform numeric output from cli tools + _ = os.Setenv("LC_NUMERIC", "en_US.UTF-8") + inputs.Add("smart", func() telegraf.Input { m := NewSmart() - path, _ := exec.LookPath("smartctl") - if len(path) > 0 { - m.Path = path - } m.Nocheck = "standby" return m }) diff --git a/plugins/inputs/smart/smart_test.go b/plugins/inputs/smart/smart_test.go index 465ce9317..00d8cf072 100644 --- a/plugins/inputs/smart/smart_test.go +++ b/plugins/inputs/smart/smart_test.go @@ -15,30 +15,394 @@ import ( func TestGatherAttributes(t *testing.T) { s := NewSmart() - s.Path = "smartctl" s.Attributes = true assert.Equal(t, time.Second*30, s.Timeout.Duration) - var acc testutil.Accumulator - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { if len(args) > 0 { - if args[0] == "--scan" { - return []byte(mockScanData), nil - } else if args[0] == "--info" { + if args[0] == "--info" && args[7] == "/dev/ada0" { return []byte(mockInfoAttributeData), nil + } else if args[0] == "--info" && args[7] == "/dev/nvme0" { + return []byte(smartctlNvmeInfoData), nil + } else if args[0] == "--scan" && len(args) == 1 { + return []byte(mockScanData), nil + } else if args[0] == "--scan" && len(args) >= 2 && args[1] == "--device=nvme" { + return []byte(mockScanNvmeData), nil } } return nil, errors.New("command not found") } - err := s.Gather(&acc) + t.Run("Wrong path to smartctl", func(t *testing.T) { + s.PathSmartctl = "this_path_to_smartctl_does_not_exist" + err := s.Init() - require.NoError(t, err) - assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered") + assert.Error(t, err) + }) - var testsAda0Attributes = []struct { + t.Run("Smartctl presence", func(t *testing.T) { + s.PathSmartctl = "smartctl" + s.PathNVMe = "" + + t.Run("Only non nvme device", func(t *testing.T) { + s.Devices = []string{"/dev/ada0"} + var acc testutil.Accumulator + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 65, acc.NFields(), "Wrong number of fields gathered") + + for _, test := range testsAda0Attributes { + acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags) + } + + for _, test := range testsAda0Device { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + }) + t.Run("Only nvme device", func(t *testing.T) { + s.Devices = []string{"/dev/nvme0"} + var acc testutil.Accumulator + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 32, acc.NFields(), "Wrong number of fields gathered") + + testutil.RequireMetricsEqual(t, testSmartctlNvmeAttributes, acc.GetTelegrafMetrics(), + testutil.SortMetrics(), testutil.IgnoreTime()) + }) + }) +} + +func TestGatherNoAttributes(t *testing.T) { + s := NewSmart() + s.Attributes = false + + assert.Equal(t, time.Second*30, s.Timeout.Duration) + + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + if len(args) > 0 { + if args[0] == "--scan" && len(args) == 1 { + return []byte(mockScanData), nil + } else if args[0] == "--info" && args[7] == "/dev/ada0" { + return []byte(mockInfoAttributeData), nil + } else if args[0] == "--info" && args[7] == "/dev/nvme0" { + return []byte(smartctlNvmeInfoData), nil + } else if args[0] == "--scan" && args[1] == "--device=nvme" { + return []byte(mockScanNvmeData), nil + } + } + return nil, errors.New("command not found") + } + + t.Run("scan for devices", func(t *testing.T) { + var acc testutil.Accumulator + s.PathSmartctl = "smartctl" + + err := s.Gather(&acc) + + require.NoError(t, err) + assert.Equal(t, 8, acc.NFields(), "Wrong number of fields gathered") + acc.AssertDoesNotContainMeasurement(t, "smart_attribute") + + for _, test := range testsAda0Device { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + for _, test := range testNvmeDevice { + acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) + } + }) +} + +func TestExcludedDev(t *testing.T) { + assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6 -d atacam"), "Should be excluded.") + assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6 -d atacam"), "Shouldn't be excluded.") + assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.") +} + +func TestGatherSATAInfo(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(hgstSATAInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherSATAInfo65(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(hgstSATAInfoData65), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherHgstSAS(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(hgstSASInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherHtSAS(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(htSASInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + + testutil.RequireMetricsEqual(t, testHtsasAtributtes, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime()) +} + +func TestGatherSSD(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(ssdInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherSSDRaid(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(ssdRaidInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) + assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") + assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") +} + +func TestGatherNvme(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(smartctlNvmeInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "nvme0", wg) + + testutil.RequireMetricsEqual(t, testSmartctlNvmeAttributes, acc.GetTelegrafMetrics(), + testutil.SortMetrics(), testutil.IgnoreTime()) +} + +func TestGatherIntelNvme(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(nvmeIntelInfoData), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + device = NVMeDevice{ + name: "nvme0", + model: mockModel, + serialNumber: mockSerial, + } + ) + + wg.Add(1) + gatherIntelNVMeDisk(acc, internal.Duration{Duration: time.Second * 30}, true, "", device, wg) + + result := acc.GetTelegrafMetrics() + testutil.RequireMetricsEqual(t, testIntelInvmeAttributes, result, + testutil.SortMetrics(), testutil.IgnoreTime()) +} + +func Test_findVIDFromNVMeOutput(t *testing.T) { + vid, sn, mn, err := findNVMeDeviceInfo(nvmeIdentifyController) + + assert.Nil(t, err) + assert.Equal(t, "0x8086", vid) + assert.Equal(t, "CVFT5123456789ABCD", sn) + assert.Equal(t, "INTEL SSDPEDABCDEFG", mn) +} + +func Test_checkForNVMeDevices(t *testing.T) { + devices := []string{"sda1", "nvme0", "sda2", "nvme2"} + expectedNVMeDevices := []string{"nvme0", "nvme2"} + resultNVMeDevices := distinguishNVMeDevices(devices, expectedNVMeDevices) + assert.Equal(t, expectedNVMeDevices, resultNVMeDevices) +} + +func Test_excludeWrongDeviceNames(t *testing.T) { + devices := []string{"/dev/sda", "/dev/nvme -d nvme", "/dev/sda1 -d megaraid,1", "/dev/sda ; ./suspicious_script.sh"} + validDevices := []string{"/dev/sda", "/dev/nvme -d nvme", "/dev/sda1 -d megaraid,1"} + result := excludeWrongDeviceNames(devices) + assert.Equal(t, validDevices, result) +} + +func Test_contains(t *testing.T) { + devices := []string{"/dev/sda", "/dev/nvme1"} + device := "/dev/nvme1" + deviceNotIncluded := "/dev/nvme5" + assert.True(t, contains(devices, device)) + assert.False(t, contains(devices, deviceNotIncluded)) +} + +func Test_difference(t *testing.T) { + devices := []string{"/dev/sda", "/dev/nvme1", "/dev/nvme2"} + secondDevices := []string{"/dev/sda", "/dev/nvme1"} + expected := []string{"/dev/nvme2"} + result := difference(devices, secondDevices) + assert.Equal(t, expected, result) +} + +func Test_integerOverflow(t *testing.T) { + runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { + return []byte(smartctlNvmeInfoDataWithOverflow), nil + } + + var ( + acc = &testutil.Accumulator{} + wg = &sync.WaitGroup{} + ) + + t.Run("If data raw_value is out of int64 range, there should be no metrics for that attribute", func(t *testing.T) { + wg.Add(1) + gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "nvme0", wg) + + result := acc.GetTelegrafMetrics() + testutil.RequireMetricsEqual(t, testOverflowAttributes, result, + testutil.SortMetrics(), testutil.IgnoreTime()) + }) +} + +var ( + testOverflowAttributes = []telegraf.Metric{ + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Temperature_Sensor_3", + }, + map[string]interface{}{ + "raw_value": int64(9223372036854775807), + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": "nvme0", + "name": "Temperature_Sensor_4", + }, + map[string]interface{}{ + "raw_value": int64(-9223372036854775808), + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_device", + map[string]string{ + "device": "nvme0", + }, + map[string]interface{}{ + "exit_status": 0, + }, + time.Unix(0, 0), + ), + } + + testHtsasAtributtes = []telegraf.Metric{ + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": ".", + "serial_no": "PDWAR9GE", + "enabled": "Enabled", + "id": "194", + "model": "HUC103030CSS600", + "name": "Temperature_Celsius", + }, + map[string]interface{}{ + "raw_value": 36, + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_attribute", + map[string]string{ + "device": ".", + "serial_no": "PDWAR9GE", + "enabled": "Enabled", + "id": "4", + "model": "HUC103030CSS600", + "name": "Start_Stop_Count", + }, + map[string]interface{}{ + "raw_value": 47, + }, + time.Unix(0, 0), + ), + testutil.MustMetric( + "smart_device", + map[string]string{ + "device": ".", + "serial_no": "PDWAR9GE", + "enabled": "Enabled", + "model": "HUC103030CSS600", + }, + map[string]interface{}{ + "exit_status": 0, + "health_ok": true, + "temp_c": 36, + }, + time.Unix(0, 0), + ), + } + + testsAda0Attributes = []struct { fields map[string]interface{} tags map[string]string }{ @@ -296,249 +660,13 @@ func TestGatherAttributes(t *testing.T) { }, } - for _, test := range testsAda0Attributes { - acc.AssertContainsTaggedFields(t, "smart_attribute", test.fields, test.tags) - } + mockModel = "INTEL SSDPEDABCDEFG" + mockSerial = "CVFT5123456789ABCD" - var testsAda0Device = []struct { - fields map[string]interface{} - tags map[string]string - }{ - { - map[string]interface{}{ - "exit_status": int(0), - "health_ok": bool(true), - "read_error_rate": int64(0), - "temp_c": int64(34), - "udma_crc_errors": int64(0), - }, - map[string]string{ - "device": "ada0", - "model": "APPLE SSD SM256E", - "serial_no": "S0X5NZBC422720", - "wwn": "5002538043584d30", - "enabled": "Enabled", - "capacity": "251000193024", - }, - }, - } - - for _, test := range testsAda0Device { - acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) - } -} - -func TestGatherNoAttributes(t *testing.T) { - s := NewSmart() - s.Path = "smartctl" - s.Attributes = false - - assert.Equal(t, time.Second*30, s.Timeout.Duration) - - // overwriting exec commands with mock commands - var acc testutil.Accumulator - - err := s.Gather(&acc) - - require.NoError(t, err) - assert.Equal(t, 5, acc.NFields(), "Wrong number of fields gathered") - acc.AssertDoesNotContainMeasurement(t, "smart_attribute") - - var testsAda0Device = []struct { - fields map[string]interface{} - tags map[string]string - }{ - { - map[string]interface{}{ - "exit_status": int(0), - "health_ok": bool(true), - "read_error_rate": int64(0), - "temp_c": int64(34), - "udma_crc_errors": int64(0), - }, - map[string]string{ - "device": "ada0", - "model": "APPLE SSD SM256E", - "serial_no": "S0X5NZBC422720", - "wwn": "5002538043584d30", - "enabled": "Enabled", - "capacity": "251000193024", - }, - }, - } - - for _, test := range testsAda0Device { - acc.AssertContainsTaggedFields(t, "smart_device", test.fields, test.tags) - } -} - -func TestExcludedDev(t *testing.T) { - assert.Equal(t, true, excludedDev([]string{"/dev/pass6"}, "/dev/pass6 -d atacam"), "Should be excluded.") - assert.Equal(t, false, excludedDev([]string{}, "/dev/pass6 -d atacam"), "Shouldn't be excluded.") - assert.Equal(t, false, excludedDev([]string{"/dev/pass6"}, "/dev/pass1 -d atacam"), "Shouldn't be excluded.") -} - -func TestGatherSATAInfo(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(hgstSATAInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 101, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(20), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherSATAInfo65(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(hgstSATAInfoData65), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 91, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(18), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherHgstSAS(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(hgstSASInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 6, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(4), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherHtSAS(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(htSASInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - - expected := []telegraf.Metric{ - testutil.MustMetric( - "smart_attribute", - map[string]string{ - "device": ".", - "serial_no": "PDWAR9GE", - "enabled": "Enabled", - "id": "194", - "model": "HUC103030CSS600", - "name": "Temperature_Celsius", - }, - map[string]interface{}{ - "raw_value": 36, - }, - time.Unix(0, 0), - ), - testutil.MustMetric( - "smart_attribute", - map[string]string{ - "device": ".", - "serial_no": "PDWAR9GE", - "enabled": "Enabled", - "id": "4", - "model": "HUC103030CSS600", - "name": "Start_Stop_Count", - }, - map[string]interface{}{ - "raw_value": 47, - }, - time.Unix(0, 0), - ), - testutil.MustMetric( - "smart_device", - map[string]string{ - "device": ".", - "serial_no": "PDWAR9GE", - "enabled": "Enabled", - "model": "HUC103030CSS600", - }, - map[string]interface{}{ - "exit_status": 0, - "health_ok": true, - "temp_c": 36, - }, - time.Unix(0, 0), - ), - } - - testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.SortMetrics(), testutil.IgnoreTime()) -} - -func TestGatherSSD(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(ssdInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 105, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(26), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherSSDRaid(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(ssdRaidInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - assert.Equal(t, 74, acc.NFields(), "Wrong number of fields gathered") - assert.Equal(t, uint64(15), acc.NMetrics(), "Wrong number of metrics gathered") -} - -func TestGatherNvme(t *testing.T) { - runCmd = func(timeout internal.Duration, sudo bool, command string, args ...string) ([]byte, error) { - return []byte(nvmeInfoData), nil - } - - var ( - acc = &testutil.Accumulator{} - wg = &sync.WaitGroup{} - ) - - wg.Add(1) - gatherDisk(acc, internal.Duration{Duration: time.Second * 30}, true, true, "", "", "", wg) - - expected := []telegraf.Metric{ + testSmartctlNvmeAttributes = []telegraf.Metric{ testutil.MustMetric("smart_device", map[string]string{ - "device": ".", + "device": "nvme0", "model": "TS128GMTE850", "serial_no": "D704940282?", }, @@ -551,7 +679,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "id": "9", "name": "Power_On_Hours", "serial_no": "D704940282?", @@ -564,7 +692,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "id": "12", "name": "Power_Cycle_Count", "serial_no": "D704940282?", @@ -577,7 +705,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Media_and_Data_Integrity_Errors", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -589,7 +717,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Error_Information_Log_Entries", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -601,7 +729,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Available_Spare", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -613,7 +741,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Available_Spare_Threshold", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -625,7 +753,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "id": "194", "name": "Temperature_Celsius", "serial_no": "D704940282?", @@ -638,7 +766,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Critical_Warning", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -650,7 +778,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Percentage_Used", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -662,7 +790,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Data_Units_Read", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -674,7 +802,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Data_Units_Written", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -686,7 +814,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Host_Read_Commands", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -698,7 +826,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Host_Write_Commands", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -710,7 +838,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Controller_Busy_Time", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -722,7 +850,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Unsafe_Shutdowns", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -734,7 +862,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Warning_Temperature_Time", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -746,7 +874,7 @@ func TestGatherNvme(t *testing.T) { ), testutil.MustMetric("smart_attribute", map[string]string{ - "device": ".", + "device": "nvme0", "name": "Critical_Temperature_Time", "serial_no": "D704940282?", "model": "TS128GMTE850", @@ -755,18 +883,393 @@ func TestGatherNvme(t *testing.T) { "raw_value": int64(7), }, time.Now(), + ), testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_1", + }, + map[string]interface{}{ + "raw_value": int64(57), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_2", + }, + map[string]interface{}{ + "raw_value": int64(50), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_3", + }, + map[string]interface{}{ + "raw_value": int64(44), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_4", + }, + map[string]interface{}{ + "raw_value": int64(43), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_5", + }, + map[string]interface{}{ + "raw_value": int64(57), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_6", + }, + map[string]interface{}{ + "raw_value": int64(50), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_7", + }, + map[string]interface{}{ + "raw_value": int64(44), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Temperature_Sensor_8", + }, + map[string]interface{}{ + "raw_value": int64(43), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Thermal_Management_T1_Trans_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Thermal_Management_T2_Trans_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Thermal_Management_T1_Total_Time", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": "D704940282?", + "model": "TS128GMTE850", + "name": "Thermal_Management_T2_Total_Time", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), ), } - testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), - testutil.SortMetrics(), testutil.IgnoreTime()) -} + testsAda0Device = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "exit_status": int(0), + "health_ok": bool(true), + "read_error_rate": int64(0), + "temp_c": int64(34), + "udma_crc_errors": int64(0), + }, + map[string]string{ + "device": "ada0", + "model": "APPLE SSD SM256E", + "serial_no": "S0X5NZBC422720", + "wwn": "5002538043584d30", + "enabled": "Enabled", + "capacity": "251000193024", + }, + }, + } -// smartctl output -var ( + testNvmeDevice = []struct { + fields map[string]interface{} + tags map[string]string + }{ + { + map[string]interface{}{ + "exit_status": int(0), + "temp_c": int64(38), + "health_ok": true, + }, + map[string]string{ + "device": "nvme0", + "model": "TS128GMTE850", + "serial_no": "D704940282?", + }, + }, + } + + testIntelInvmeAttributes = []telegraf.Metric{ + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Program_Fail_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Erase_Fail_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "End_To_End_Error_Detection_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Crc_Error_Count", + }, + map[string]interface{}{ + "raw_value": 13, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Retry_Buffer_Overflow_Count", + }, + map[string]interface{}{ + "raw_value": 0, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Wear_Leveling_Min", + }, + map[string]interface{}{ + "raw_value": 39, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Wear_Leveling_Max", + }, + map[string]interface{}{ + "raw_value": 40, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Wear_Leveling_Avg", + }, + map[string]interface{}{ + "raw_value": 39, + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Timed_Workload_Media_Wear", + }, + map[string]interface{}{ + "raw_value": float64(0.13), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Timed_Workload_Host_Reads", + }, + map[string]interface{}{ + "raw_value": float64(71), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Timed_Workload_Timer", + }, + map[string]interface{}{ + "raw_value": int64(1612952), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Thermal_Throttle_Status_Prc", + }, + map[string]interface{}{ + "raw_value": float64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Thermal_Throttle_Status_Cnt", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Pll_Lock_Loss_Count", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Nand_Bytes_Written", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + testutil.MustMetric("smart_attribute", + map[string]string{ + "device": "nvme0", + "serial_no": mockSerial, + "model": mockModel, + "name": "Host_Bytes_Written", + }, + map[string]interface{}{ + "raw_value": int64(0), + }, + time.Now(), + ), + } // smartctl --scan - mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device -` + mockScanData = `/dev/ada0 -d atacam # /dev/ada0, ATA device` + + // smartctl --scan -d nvme + mockScanNvmeData = `/dev/nvme0 -d nvme # /dev/nvme0, NVMe device` + // smartctl --info --health --attributes --tolerance=verypermissive -n standby --format=brief [DEVICE] mockInfoAttributeData = `smartctl 6.5 2016-05-07 r4318 [Darwin 16.4.0 x86_64] (local build) Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org @@ -1174,8 +1677,7 @@ Selective self-test flags (0x0): After scanning selected spans, do NOT read-scan remainder of disk. If Selective self-test is pending on power-up, resume after 0 minute delay. ` - - nvmeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build) + smartctlNvmeInfoData = `smartctl 6.5 2016-05-07 r4318 [x86_64-linux-4.1.27-gvt-yocto-standard] (local build) Copyright (C) 2002-16, Bruce Allen, Christian Franke, www.smartmontools.org === START OF INFORMATION SECTION === @@ -1211,5 +1713,108 @@ Media and Data Integrity Errors: 0 Error Information Log Entries: 119,699 Warning Comp. Temperature Time: 11 Critical Comp. Temperature Time: 7 +Thermal Temp. 1 Transition Count: 0 +Thermal Temp. 2 Transition Count: 0 +Thermal Temp. 1 Total Time: 0 +Thermal Temp. 2 Total Time: 0 +Temperature Sensor 1: 57 C +Temperature Sensor 2: 50 C +Temperature Sensor 3: 44 C +Temperature Sensor 4: 43 C +Temperature Sensor 5: 57 C +Temperature Sensor 6: 50 C +Temperature Sensor 7: 44 C +Temperature Sensor 8: 43 C +` + + smartctlNvmeInfoDataWithOverflow = ` +Temperature Sensor 1: 9223372036854775808 C +Temperature Sensor 2: -9223372036854775809 C +Temperature Sensor 3: 9223372036854775807 C +Temperature Sensor 4: -9223372036854775808 C +` + + nvmeIntelInfoData = `Additional Smart Log for NVME device:nvme0 namespace-id:ffffffff +key normalized raw +program_fail_count : 100% 0 +erase_fail_count : 100% 0 +wear_leveling : 100% min: 39, max: 40, avg: 39 +end_to_end_error_detection_count: 100% 0 +crc_error_count : 100% 13 +timed_workload_media_wear : 100% 0.130% +timed_workload_host_reads : 100% 71% +timed_workload_timer : 100% 1612952 min +thermal_throttle_status : 100% 0%, cnt: 0 +retry_buffer_overflow_count : 100% 0 +pll_lock_loss_count : 100% 0 +nand_bytes_written : 0% sectors: 0 +host_bytes_written : 0% sectors: 0 +` + + nvmeIdentifyController = `NVME Identify Controller: +vid : 0x8086 +ssvid : 0x8086 +sn : CVFT5123456789ABCD +mn : INTEL SSDPEDABCDEFG +fr : 8DV10131 +rab : 0 +ieee : 5cd2e4 +cmic : 0 +mdts : 5 +cntlid : 0 +ver : 0 +rtd3r : 0 +rtd3e : 0 +<<<<<<< HEAD +oaes : 0 +ctratt : 0 +oacs : 0x6 +acl : 3 +aerl : 3 +frmw : 0x2 +lpa : 0 +elpe : 63 +npss : 0 +avscc : 0 +apsta : 0 +wctemp : 0 +cctemp : 0 +mtfa : 0 +hmpre : 0 +hmmin : 0 +tnvmcap : 0 +unvmcap : 0 +rpmbs : 0 +edstt : 0 +dsto : 0 +fwug : 0 +kas : 0 +hctma : 0 +mntmt : 0 +mxtmt : 0 +sanicap : 0 +hmminds : 0 +hmmaxd : 0 +sqes : 0x66 +cqes : 0x44 +maxcmd : 0 +nn : 1 +oncs : 0x6 +fuses : 0 +fna : 0x7 +vwc : 0 +awun : 0 +awupf : 0 +nvscc : 0 +acwu : 0 +sgls : 0 +subnqn : +ioccsz : 0 +iorcsz : 0 +icdoff : 0 +ctrattr : 0 +msdbd : 0 +ps 0 : mp:25.00W operational enlat:0 exlat:0 rrt:0 rrl:0 + rwt:0 rwl:0 idle_power:- active_power:- ` )