feat: add inputs.mdstat to gather from /proc/mdstat collection (#9101)

This commit is contained in:
John Seekins 2021-08-31 16:04:32 -06:00 committed by GitHub
parent 2370d39e89
commit 435c2a6e33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 515 additions and 0 deletions

View File

@ -265,6 +265,7 @@ For documentation on the latest development code see the [documentation index][d
* [mailchimp](./plugins/inputs/mailchimp)
* [marklogic](./plugins/inputs/marklogic)
* [mcrouter](./plugins/inputs/mcrouter)
* [mdstat](./plugins/inputs/mdstat)
* [memcached](./plugins/inputs/memcached)
* [mem](./plugins/inputs/mem)
* [mesos](./plugins/inputs/mesos)

View File

@ -101,6 +101,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/mailchimp"
_ "github.com/influxdata/telegraf/plugins/inputs/marklogic"
_ "github.com/influxdata/telegraf/plugins/inputs/mcrouter"
_ "github.com/influxdata/telegraf/plugins/inputs/mdstat"
_ "github.com/influxdata/telegraf/plugins/inputs/mem"
_ "github.com/influxdata/telegraf/plugins/inputs/memcached"
_ "github.com/influxdata/telegraf/plugins/inputs/mesos"

View File

@ -0,0 +1,49 @@
# mdstat Input Plugin
The mdstat plugin gathers statistics about any Linux MD RAID arrays configured on the host
by reading /proc/mdstat. For a full list of available fields see the
/proc/mdstat section of the [proc man page](http://man7.org/linux/man-pages/man5/proc.5.html).
For a better idea of what each field represents, see the
[mdstat man page](https://raid.wiki.kernel.org/index.php/Mdstat).
Stat collection based on Prometheus' mdstat collection library at https://github.com/prometheus/procfs/blob/master/mdstat.go
### Configuration:
```toml
# Get kernel statistics from /proc/mdstat
[[inputs.mdstat]]
## Sets file path
## If not specified, then default is /proc/mdstat
# file_name = "/proc/mdstat"
```
### Measurements & Fields:
- mdstat
- BlocksSynced (if the array is rebuilding/checking, this is the count of blocks that have been scanned)
- BlocksSyncedFinishTime (the expected finish time of the rebuild scan, listed in minutes remaining)
- BlocksSyncedPct (the percentage of the rebuild scan left)
- BlocksSyncedSpeed (the current speed the rebuild is running at, listed in K/sec)
- BlocksTotal (the total count of blocks in the array)
- DisksActive (the number of disks that are currently considered healthy in the array)
- DisksFailed (the current count of failed disks in the array)
- DisksSpare (the current count of "spare" disks in the array)
- DisksTotal (total count of disks in the array)
### Tags:
- mdstat
- ActivityState (`active` or `inactive`)
- Devices (comma separated list of devices that make up the array)
- Name (name of the array)
### Example Output:
```
$ telegraf --config ~/ws/telegraf.conf --input-filter mdstat --test
* Plugin: mdstat, Collection 1
> mdstat,ActivityState=active,Devices=sdm1\,sdn1,Name=md1 BlocksSynced=231299072i,BlocksSyncedFinishTime=0,BlocksSyncedPct=0,BlocksSyncedSpeed=0,BlocksTotal=231299072i,DisksActive=2i,DisksFailed=0i,DisksSpare=0i,DisksTotal=2i,DisksDown=0i 1617814276000000000
> mdstat,ActivityState=active,Devices=sdm5\,sdn5,Name=md2 BlocksSynced=2996224i,BlocksSyncedFinishTime=0,BlocksSyncedPct=0,BlocksSyncedSpeed=0,BlocksTotal=2996224i,DisksActive=2i,DisksFailed=0i,DisksSpare=0i,DisksTotal=2i,DisksDown=0i 1617814276000000000
```

View File

@ -0,0 +1,313 @@
// +build linux
// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Code has been changed since initial import.
package mdstat
import (
"fmt"
"io/ioutil"
"os"
"regexp"
"sort"
"strconv"
"strings"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
const (
defaultHostProc = "/proc"
envProc = "HOST_PROC"
)
var (
statusLineRE = regexp.MustCompile(`(\d+) blocks .*\[(\d+)/(\d+)\] \[([U_]+)\]`)
recoveryLineBlocksRE = regexp.MustCompile(`\((\d+)/\d+\)`)
recoveryLinePctRE = regexp.MustCompile(`= (.+)%`)
recoveryLineFinishRE = regexp.MustCompile(`finish=(.+)min`)
recoveryLineSpeedRE = regexp.MustCompile(`speed=(.+)[A-Z]`)
componentDeviceRE = regexp.MustCompile(`(.*)\[\d+\]`)
)
type statusLine struct {
active int64
total int64
size int64
down int64
}
type recoveryLine struct {
syncedBlocks int64
pct float64
finish float64
speed float64
}
type MdstatConf struct {
FileName string `toml:"file_name"`
}
func (k *MdstatConf) Description() string {
return "Get md array statistics from /proc/mdstat"
}
var mdSampleConfig = `
## Sets file path
## If not specified, then default is /proc/mdstat
# file_name = "/proc/mdstat"
`
func (k *MdstatConf) SampleConfig() string {
return mdSampleConfig
}
func evalStatusLine(deviceLine, statusLineStr string) (statusLine, error) {
sizeFields := strings.Fields(statusLineStr)
if len(sizeFields) < 1 {
return statusLine{active: 0, total: 0, down: 0, size: 0},
fmt.Errorf("statusLine empty? %q", statusLineStr)
}
sizeStr := sizeFields[0]
size, err := strconv.ParseInt(sizeStr, 10, 64)
if err != nil {
return statusLine{active: 0, total: 0, down: 0, size: 0},
fmt.Errorf("unexpected statusLine %q: %w", statusLineStr, err)
}
if strings.Contains(deviceLine, "raid0") || strings.Contains(deviceLine, "linear") {
// In the device deviceLine, only disks have a number associated with them in [].
total := int64(strings.Count(deviceLine, "["))
return statusLine{active: total, total: total, down: 0, size: size}, nil
}
if strings.Contains(deviceLine, "inactive") {
return statusLine{active: 0, total: 0, down: 0, size: size}, nil
}
matches := statusLineRE.FindStringSubmatch(statusLineStr)
if len(matches) != 5 {
return statusLine{active: 0, total: 0, down: 0, size: size},
fmt.Errorf("couldn't find all the substring matches: %s", statusLineStr)
}
total, err := strconv.ParseInt(matches[2], 10, 64)
if err != nil {
return statusLine{active: 0, total: 0, down: 0, size: size},
fmt.Errorf("unexpected statusLine %q: %w", statusLineStr, err)
}
active, err := strconv.ParseInt(matches[3], 10, 64)
if err != nil {
return statusLine{active: 0, total: total, down: 0, size: size},
fmt.Errorf("unexpected statusLine %q: %w", statusLineStr, err)
}
down := int64(strings.Count(matches[4], "_"))
return statusLine{active: active, total: total, size: size, down: down}, nil
}
func evalRecoveryLine(recoveryLineStr string) (recoveryLine, error) {
// Get count of completed vs. total blocks
matches := recoveryLineBlocksRE.FindStringSubmatch(recoveryLineStr)
if len(matches) != 2 {
return recoveryLine{syncedBlocks: 0, pct: 0, finish: 0, speed: 0},
fmt.Errorf("unexpected recoveryLine matching syncedBlocks: %s", recoveryLineStr)
}
syncedBlocks, err := strconv.ParseInt(matches[1], 10, 64)
if err != nil {
return recoveryLine{syncedBlocks: 0, pct: 0, finish: 0, speed: 0},
fmt.Errorf("error parsing int from recoveryLine %q: %w", recoveryLineStr, err)
}
// Get percentage complete
matches = recoveryLinePctRE.FindStringSubmatch(recoveryLineStr)
if len(matches) != 2 {
return recoveryLine{syncedBlocks: syncedBlocks, pct: 0, finish: 0, speed: 0},
fmt.Errorf("unexpected recoveryLine matching percentage: %s", recoveryLineStr)
}
pct, err := strconv.ParseFloat(matches[1], 64)
if err != nil {
return recoveryLine{syncedBlocks: syncedBlocks, pct: 0, finish: 0, speed: 0},
fmt.Errorf("error parsing float from recoveryLine %q: %w", recoveryLineStr, err)
}
// Get time expected left to complete
matches = recoveryLineFinishRE.FindStringSubmatch(recoveryLineStr)
if len(matches) != 2 {
return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: 0, speed: 0},
fmt.Errorf("unexpected recoveryLine matching est. finish time: %s", recoveryLineStr)
}
finish, err := strconv.ParseFloat(matches[1], 64)
if err != nil {
return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: 0, speed: 0},
fmt.Errorf("error parsing float from recoveryLine %q: %w", recoveryLineStr, err)
}
// Get recovery speed
matches = recoveryLineSpeedRE.FindStringSubmatch(recoveryLineStr)
if len(matches) != 2 {
return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: finish, speed: 0},
fmt.Errorf("unexpected recoveryLine matching speed: %s", recoveryLineStr)
}
speed, err := strconv.ParseFloat(matches[1], 64)
if err != nil {
return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: finish, speed: 0},
fmt.Errorf("error parsing float from recoveryLine %q: %w", recoveryLineStr, err)
}
return recoveryLine{syncedBlocks: syncedBlocks, pct: pct, finish: finish, speed: speed}, nil
}
func evalComponentDevices(deviceFields []string) string {
mdComponentDevices := make([]string, 0)
if len(deviceFields) > 3 {
for _, field := range deviceFields[4:] {
match := componentDeviceRE.FindStringSubmatch(field)
if match == nil {
continue
}
mdComponentDevices = append(mdComponentDevices, match[1])
}
}
// Ensure no churn on tag ordering change
sort.Strings(mdComponentDevices)
return strings.Join(mdComponentDevices, ",")
}
func (k *MdstatConf) Gather(acc telegraf.Accumulator) error {
data, err := k.getProcMdstat()
if err != nil {
return err
}
lines := strings.Split(string(data), "\n")
// empty file should return nothing
if len(lines) < 3 {
return nil
}
for i, line := range lines {
if strings.TrimSpace(line) == "" || line[0] == ' ' || strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") {
continue
}
deviceFields := strings.Fields(line)
if len(deviceFields) < 3 || len(lines) <= i+3 {
return fmt.Errorf("not enough fields in mdline (expected at least 3): %s", line)
}
mdName := deviceFields[0] // mdx
state := deviceFields[2] // active or inactive
/*
Failed disks have the suffix (F) & Spare disks have the suffix (S).
Failed disks may also not be marked separately...
*/
fail := int64(strings.Count(line, "(F)"))
spare := int64(strings.Count(line, "(S)"))
sts, err := evalStatusLine(lines[i], lines[i+1])
if err != nil {
return fmt.Errorf("error parsing md device lines: %w", err)
}
syncLineIdx := i + 2
if strings.Contains(lines[i+2], "bitmap") { // skip bitmap line
syncLineIdx++
}
var rcvry recoveryLine
// If device is syncing at the moment, get the number of currently
// synced bytes, otherwise that number equals the size of the device.
rcvry.syncedBlocks = sts.size
recovering := strings.Contains(lines[syncLineIdx], "recovery")
resyncing := strings.Contains(lines[syncLineIdx], "resync")
checking := strings.Contains(lines[syncLineIdx], "check")
// Append recovery and resyncing state info.
if recovering || resyncing || checking {
if recovering {
state = "recovering"
} else if checking {
state = "checking"
} else {
state = "resyncing"
}
// Handle case when resync=PENDING or resync=DELAYED.
if strings.Contains(lines[syncLineIdx], "PENDING") || strings.Contains(lines[syncLineIdx], "DELAYED") {
rcvry.syncedBlocks = 0
} else {
var err error
rcvry, err = evalRecoveryLine(lines[syncLineIdx])
if err != nil {
return fmt.Errorf("error parsing sync line in md device %q: %w", mdName, err)
}
}
}
fields := map[string]interface{}{
"DisksActive": sts.active,
"DisksFailed": fail,
"DisksSpare": spare,
"DisksTotal": sts.total,
"DisksDown": sts.down,
"BlocksTotal": sts.size,
"BlocksSynced": rcvry.syncedBlocks,
"BlocksSyncedPct": rcvry.pct,
"BlocksSyncedFinishTime": rcvry.finish,
"BlocksSyncedSpeed": rcvry.speed,
}
tags := map[string]string{
"Name": mdName,
"ActivityState": state,
"Devices": evalComponentDevices(deviceFields),
}
acc.AddFields("mdstat", fields, tags)
}
return nil
}
func (k *MdstatConf) getProcMdstat() ([]byte, error) {
var mdStatFile string
if k.FileName == "" {
mdStatFile = proc(envProc, defaultHostProc) + "/mdstat"
} else {
mdStatFile = k.FileName
}
if _, err := os.Stat(mdStatFile); os.IsNotExist(err) {
return nil, fmt.Errorf("mdstat: %s does not exist", mdStatFile)
} else if err != nil {
return nil, err
}
data, err := ioutil.ReadFile(mdStatFile)
if err != nil {
return nil, err
}
return data, nil
}
func init() {
inputs.Add("mdstat", func() telegraf.Input { return &MdstatConf{} })
}
// proc can be used to read file paths from env
func proc(env, path string) string {
// try to read full file path
if p := os.Getenv(env); p != "" {
return p
}
// return default path
return path
}

View File

@ -0,0 +1,3 @@
// +build !linux
package mdstat

View File

@ -0,0 +1,148 @@
// +build linux
package mdstat
import (
"io/ioutil"
"os"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/stretchr/testify/assert"
)
func TestFullMdstatProcFile(t *testing.T) {
filename := makeFakeMDStatFile([]byte(mdStatFileFull))
defer os.Remove(filename)
k := MdstatConf{
FileName: filename,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.NoError(t, err)
fields := map[string]interface{}{
"BlocksSynced": int64(10620027200),
"BlocksSyncedFinishTime": float64(101.6),
"BlocksSyncedPct": float64(94.3),
"BlocksSyncedSpeed": float64(103517),
"BlocksTotal": int64(11251451904),
"DisksActive": int64(12),
"DisksFailed": int64(0),
"DisksSpare": int64(0),
"DisksTotal": int64(12),
"DisksDown": int64(0),
}
acc.AssertContainsFields(t, "mdstat", fields)
}
func TestFailedDiskMdStatProcFile1(t *testing.T) {
filename := makeFakeMDStatFile([]byte(mdStatFileFailedDisk))
defer os.Remove(filename)
k := MdstatConf{
FileName: filename,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.NoError(t, err)
fields := map[string]interface{}{
"BlocksSynced": int64(5860144128),
"BlocksSyncedFinishTime": float64(0),
"BlocksSyncedPct": float64(0),
"BlocksSyncedSpeed": float64(0),
"BlocksTotal": int64(5860144128),
"DisksActive": int64(3),
"DisksFailed": int64(0),
"DisksSpare": int64(0),
"DisksTotal": int64(4),
"DisksDown": int64(1),
}
acc.AssertContainsFields(t, "mdstat", fields)
}
func TestEmptyMdStatProcFile1(t *testing.T) {
filename := makeFakeMDStatFile([]byte(mdStatFileEmpty))
defer os.Remove(filename)
k := MdstatConf{
FileName: filename,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.NoError(t, err)
}
func TestInvalidMdStatProcFile1(t *testing.T) {
filename := makeFakeMDStatFile([]byte(mdStatFileInvalid))
defer os.Remove(filename)
k := MdstatConf{
FileName: filename,
}
acc := testutil.Accumulator{}
err := k.Gather(&acc)
assert.Error(t, err)
}
const mdStatFileFull = `
Personalities : [raid1] [raid10] [linear] [multipath] [raid0] [raid6] [raid5] [raid4]
md2 : active raid10 sde[2] sdl[9] sdf[3] sdk[8] sdh[5] sdd[1] sdg[4] sdn[11] sdm[10] sdj[7] sdc[0] sdi[6]
11251451904 blocks super 1.2 512K chunks 2 near-copies [12/12] [UUUUUUUUUUUU]
[==================>..] check = 94.3% (10620027200/11251451904) finish=101.6min speed=103517K/sec
bitmap: 35/84 pages [140KB], 65536KB chunk
md1 : active raid1 sdb2[2] sda2[0]
5909504 blocks super 1.2 [2/2] [UU]
md0 : active raid1 sdb1[2] sda1[0]
244005888 blocks super 1.2 [2/2] [UU]
bitmap: 1/2 pages [4KB], 65536KB chunk
unused devices: <none>
`
const mdStatFileFailedDisk = `
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
md0 : active raid5 sdd1[3] sdb1[1] sda1[0]
5860144128 blocks super 1.2 level 5, 64k chunk, algorithm 2 [4/3] [UUU_]
bitmap: 8/15 pages [32KB], 65536KB chunk
unused devices: <none>
`
const mdStatFileEmpty = `
Personalities :
unused devices: <none>
`
const mdStatFileInvalid = `
Personalities :
mdf1: testman actve
md0 : active raid1 sdb1[2] sda1[0]
244005888 blocks super 1.2 [2/2] [UU]
bitmap: 1/2 pages [4KB], 65536KB chunk
unused devices: <none>
`
func makeFakeMDStatFile(content []byte) (filename string) {
fileobj, err := ioutil.TempFile("", "mdstat")
if err != nil {
panic(err)
}
if _, err = fileobj.Write(content); err != nil {
panic(err)
}
if err := fileobj.Close(); err != nil {
panic(err)
}
return fileobj.Name()
}