Fix SMART plugin to recognize all devices from config (#8374)
This commit is contained in:
parent
3523652e30
commit
ff0a8c2d87
|
|
@ -12,14 +12,13 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
"unicode"
|
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
)
|
)
|
||||||
|
|
||||||
const IntelVID = "0x8086"
|
const intelVID = "0x8086"
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// Device Model: APPLE SSD SM256E
|
// Device Model: APPLE SSD SM256E
|
||||||
|
|
@ -55,7 +54,7 @@ var (
|
||||||
|
|
||||||
// vid : 0x8086
|
// vid : 0x8086
|
||||||
// sn : CFGT53260XSP8011P
|
// sn : CFGT53260XSP8011P
|
||||||
nvmeIdCtrlExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\s\w]+)`)
|
nvmeIDCtrlExpressionPattern = regexp.MustCompile(`^([\w\s]+):([\s\w]+)`)
|
||||||
|
|
||||||
deviceFieldIds = map[string]string{
|
deviceFieldIds = map[string]string{
|
||||||
"1": "read_error_rate",
|
"1": "read_error_rate",
|
||||||
|
|
@ -267,13 +266,7 @@ var (
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
type NVMeDevice struct {
|
// Smart plugin reads metrics from storage devices supporting S.M.A.R.T.
|
||||||
name string
|
|
||||||
vendorID string
|
|
||||||
model string
|
|
||||||
serialNumber string
|
|
||||||
}
|
|
||||||
|
|
||||||
type Smart struct {
|
type Smart struct {
|
||||||
Path string `toml:"path"` //deprecated - to keep backward compatibility
|
Path string `toml:"path"` //deprecated - to keep backward compatibility
|
||||||
PathSmartctl string `toml:"path_smartctl"`
|
PathSmartctl string `toml:"path_smartctl"`
|
||||||
|
|
@ -288,6 +281,13 @@ type Smart struct {
|
||||||
Log telegraf.Logger `toml:"-"`
|
Log telegraf.Logger `toml:"-"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type nvmeDevice struct {
|
||||||
|
name string
|
||||||
|
vendorID string
|
||||||
|
model string
|
||||||
|
serialNumber string
|
||||||
|
}
|
||||||
|
|
||||||
var sampleConfig = `
|
var sampleConfig = `
|
||||||
## Optionally specify the path to the smartctl executable
|
## Optionally specify the path to the smartctl executable
|
||||||
# path_smartctl = "/usr/bin/smartctl"
|
# path_smartctl = "/usr/bin/smartctl"
|
||||||
|
|
@ -330,20 +330,23 @@ var sampleConfig = `
|
||||||
# timeout = "30s"
|
# timeout = "30s"
|
||||||
`
|
`
|
||||||
|
|
||||||
func NewSmart() *Smart {
|
func newSmart() *Smart {
|
||||||
return &Smart{
|
return &Smart{
|
||||||
Timeout: internal.Duration{Duration: time.Second * 30},
|
Timeout: internal.Duration{Duration: time.Second * 30},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SampleConfig returns sample configuration for this plugin.
|
||||||
func (m *Smart) SampleConfig() string {
|
func (m *Smart) SampleConfig() string {
|
||||||
return sampleConfig
|
return sampleConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Description returns the plugin description.
|
||||||
func (m *Smart) Description() string {
|
func (m *Smart) Description() string {
|
||||||
return "Read metrics from storage devices supporting S.M.A.R.T."
|
return "Read metrics from storage devices supporting S.M.A.R.T."
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Init performs one time setup of the plugin and returns an error if the configuration is invalid.
|
||||||
func (m *Smart) Init() error {
|
func (m *Smart) Init() error {
|
||||||
//if deprecated `path` (to smartctl binary) is provided in config and `path_smartctl` override does not exist
|
//if deprecated `path` (to smartctl binary) is provided in config and `path_smartctl` override does not exist
|
||||||
if len(m.Path) > 0 && len(m.PathSmartctl) == 0 {
|
if len(m.Path) > 0 && len(m.PathSmartctl) == 0 {
|
||||||
|
|
@ -377,6 +380,7 @@ func (m *Smart) Init() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gather takes in an accumulator and adds the metrics that the SMART tools gather.
|
||||||
func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
||||||
var err error
|
var err error
|
||||||
var scannedNVMeDevices []string
|
var scannedNVMeDevices []string
|
||||||
|
|
@ -387,8 +391,6 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
||||||
isVendorExtension := len(m.EnableExtensions) != 0
|
isVendorExtension := len(m.EnableExtensions) != 0
|
||||||
|
|
||||||
if len(m.Devices) != 0 {
|
if len(m.Devices) != 0 {
|
||||||
devicesFromConfig = excludeWrongDeviceNames(devicesFromConfig)
|
|
||||||
|
|
||||||
m.getAttributes(acc, devicesFromConfig)
|
m.getAttributes(acc, devicesFromConfig)
|
||||||
|
|
||||||
// if nvme-cli is present, vendor specific attributes can be gathered
|
// if nvme-cli is present, vendor specific attributes can be gathered
|
||||||
|
|
@ -418,31 +420,6 @@ func (m *Smart) Gather(acc telegraf.Accumulator) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// validate and exclude not correct config device names to avoid unwanted behaviours
|
|
||||||
func excludeWrongDeviceNames(devices []string) []string {
|
|
||||||
validSigns := map[string]struct{}{
|
|
||||||
" ": {},
|
|
||||||
"/": {},
|
|
||||||
"\\": {},
|
|
||||||
"-": {},
|
|
||||||
",": {},
|
|
||||||
}
|
|
||||||
var wrongDevices []string
|
|
||||||
|
|
||||||
for _, device := range devices {
|
|
||||||
for _, char := range device {
|
|
||||||
if unicode.IsLetter(char) || unicode.IsNumber(char) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if _, exist := validSigns[string(char)]; exist {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
wrongDevices = append(wrongDevices, device)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return difference(devices, wrongDevices)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m *Smart) scanAllDevices(ignoreExcludes bool) ([]string, []string, error) {
|
func (m *Smart) scanAllDevices(ignoreExcludes bool) ([]string, []string, error) {
|
||||||
// this will return all devices (including NVMe devices) for smartctl version >= 7.0
|
// this will return all devices (including NVMe devices) for smartctl version >= 7.0
|
||||||
// for older versions this will return non NVMe devices
|
// for older versions this will return non NVMe devices
|
||||||
|
|
@ -540,11 +517,11 @@ func (m *Smart) getVendorNVMeAttributes(acc telegraf.Accumulator, devices []stri
|
||||||
for _, device := range NVMeDevices {
|
for _, device := range NVMeDevices {
|
||||||
if contains(m.EnableExtensions, "auto-on") {
|
if contains(m.EnableExtensions, "auto-on") {
|
||||||
switch device.vendorID {
|
switch device.vendorID {
|
||||||
case IntelVID:
|
case intelVID:
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
||||||
}
|
}
|
||||||
} else if contains(m.EnableExtensions, "Intel") && device.vendorID == IntelVID {
|
} else if contains(m.EnableExtensions, "Intel") && device.vendorID == intelVID {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
go gatherIntelNVMeDisk(acc, m.Timeout, m.UseSudo, m.PathNVMe, device, &wg)
|
||||||
}
|
}
|
||||||
|
|
@ -552,8 +529,8 @@ func (m *Smart) getVendorNVMeAttributes(acc telegraf.Accumulator, devices []stri
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme string, timeout internal.Duration, useSudo bool) []NVMeDevice {
|
func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme string, timeout internal.Duration, useSudo bool) []nvmeDevice {
|
||||||
var NVMeDevices []NVMeDevice
|
var NVMeDevices []nvmeDevice
|
||||||
|
|
||||||
for _, device := range devices {
|
for _, device := range devices {
|
||||||
vid, sn, mn, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo)
|
vid, sn, mn, err := gatherNVMeDeviceInfo(nvme, device, timeout, useSudo)
|
||||||
|
|
@ -561,7 +538,7 @@ func getDeviceInfoForNVMeDisks(acc telegraf.Accumulator, devices []string, nvme
|
||||||
acc.AddError(fmt.Errorf("cannot find device info for %s device", device))
|
acc.AddError(fmt.Errorf("cannot find device info for %s device", device))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
newDevice := NVMeDevice{
|
newDevice := nvmeDevice{
|
||||||
name: device,
|
name: device,
|
||||||
vendorID: vid,
|
vendorID: vid,
|
||||||
model: mn,
|
model: mn,
|
||||||
|
|
@ -593,7 +570,7 @@ func findNVMeDeviceInfo(output string) (string, string, string, error) {
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := scanner.Text()
|
line := scanner.Text()
|
||||||
|
|
||||||
if matches := nvmeIdCtrlExpressionPattern.FindStringSubmatch(line); len(matches) > 2 {
|
if matches := nvmeIDCtrlExpressionPattern.FindStringSubmatch(line); len(matches) > 2 {
|
||||||
matches[1] = strings.TrimSpace(matches[1])
|
matches[1] = strings.TrimSpace(matches[1])
|
||||||
matches[2] = strings.TrimSpace(matches[2])
|
matches[2] = strings.TrimSpace(matches[2])
|
||||||
if matches[1] == "vid" {
|
if matches[1] == "vid" {
|
||||||
|
|
@ -612,7 +589,7 @@ func findNVMeDeviceInfo(output string) (string, string, string, error) {
|
||||||
return vid, sn, mn, nil
|
return vid, sn, mn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo bool, nvme string, device NVMeDevice, wg *sync.WaitGroup) {
|
func gatherIntelNVMeDisk(acc telegraf.Accumulator, timeout internal.Duration, usesudo bool, nvme string, device nvmeDevice, wg *sync.WaitGroup) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
args := []string{"intel", "smart-log-add"}
|
args := []string{"intel", "smart-log-add"}
|
||||||
|
|
@ -966,7 +943,7 @@ func parseTemperature(fields, deviceFields map[string]interface{}, str string) e
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseTemperatureSensor(fields, deviceFields map[string]interface{}, str string) error {
|
func parseTemperatureSensor(fields, _ map[string]interface{}, str string) error {
|
||||||
var temp int64
|
var temp int64
|
||||||
if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil {
|
if _, err := fmt.Sscanf(str, "%d C", &temp); err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
@ -993,7 +970,7 @@ func init() {
|
||||||
_ = os.Setenv("LC_NUMERIC", "en_US.UTF-8")
|
_ = os.Setenv("LC_NUMERIC", "en_US.UTF-8")
|
||||||
|
|
||||||
inputs.Add("smart", func() telegraf.Input {
|
inputs.Add("smart", func() telegraf.Input {
|
||||||
m := NewSmart()
|
m := newSmart()
|
||||||
m.Nocheck = "standby"
|
m.Nocheck = "standby"
|
||||||
return m
|
return m
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGatherAttributes(t *testing.T) {
|
func TestGatherAttributes(t *testing.T) {
|
||||||
s := NewSmart()
|
s := newSmart()
|
||||||
s.Attributes = true
|
s.Attributes = true
|
||||||
|
|
||||||
assert.Equal(t, time.Second*30, s.Timeout.Duration)
|
assert.Equal(t, time.Second*30, s.Timeout.Duration)
|
||||||
|
|
@ -78,7 +78,7 @@ func TestGatherAttributes(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGatherNoAttributes(t *testing.T) {
|
func TestGatherNoAttributes(t *testing.T) {
|
||||||
s := NewSmart()
|
s := newSmart()
|
||||||
s.Attributes = false
|
s.Attributes = false
|
||||||
|
|
||||||
assert.Equal(t, time.Second*30, s.Timeout.Duration)
|
assert.Equal(t, time.Second*30, s.Timeout.Duration)
|
||||||
|
|
@ -244,7 +244,7 @@ func TestGatherIntelNvme(t *testing.T) {
|
||||||
var (
|
var (
|
||||||
acc = &testutil.Accumulator{}
|
acc = &testutil.Accumulator{}
|
||||||
wg = &sync.WaitGroup{}
|
wg = &sync.WaitGroup{}
|
||||||
device = NVMeDevice{
|
device = nvmeDevice{
|
||||||
name: "nvme0",
|
name: "nvme0",
|
||||||
model: mockModel,
|
model: mockModel,
|
||||||
serialNumber: mockSerial,
|
serialNumber: mockSerial,
|
||||||
|
|
@ -275,13 +275,6 @@ func Test_checkForNVMeDevices(t *testing.T) {
|
||||||
assert.Equal(t, expectedNVMeDevices, resultNVMeDevices)
|
assert.Equal(t, expectedNVMeDevices, resultNVMeDevices)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_excludeWrongDeviceNames(t *testing.T) {
|
|
||||||
devices := []string{"/dev/sda", "/dev/nvme -d nvme", "/dev/sda1 -d megaraid,1", "/dev/sda ; ./suspicious_script.sh"}
|
|
||||||
validDevices := []string{"/dev/sda", "/dev/nvme -d nvme", "/dev/sda1 -d megaraid,1"}
|
|
||||||
result := excludeWrongDeviceNames(devices)
|
|
||||||
assert.Equal(t, validDevices, result)
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_contains(t *testing.T) {
|
func Test_contains(t *testing.T) {
|
||||||
devices := []string{"/dev/sda", "/dev/nvme1"}
|
devices := []string{"/dev/sda", "/dev/nvme1"}
|
||||||
device := "/dev/nvme1"
|
device := "/dev/nvme1"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue