fix: pool detection and metrics gathering for ZFS >= 2.1.x (#10099)

This commit is contained in:
Aaron Wood 2021-12-06 07:37:34 -08:00 committed by GitHub
parent cc397279f5
commit 666bfe33a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 170 additions and 27 deletions

View File

@ -202,6 +202,16 @@ On Linux (reference: kstat accumulated time and queue length statistics):
- wcnt (integer, count) - wcnt (integer, count)
- rcnt (integer, count) - rcnt (integer, count)
For ZFS >= 2.1.x the format has changed significantly:
- zfs_pool
- writes (integer, count)
- nwritten (integer, bytes)
- reads (integer, count)
- nread (integer, bytes)
- nunlinks (integer, count)
- nunlinked (integer, count)
On FreeBSD: On FreeBSD:
- zfs_pool - zfs_pool
@ -229,6 +239,7 @@ On FreeBSD:
- Pool metrics (`zfs_pool`) will have the following tag: - Pool metrics (`zfs_pool`) will have the following tag:
- pool - with the name of the pool which the metrics are for. - pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only) - health - the health status of the pool. (FreeBSD only)
- dataset - ZFS >= 2.1.x only. (Linux only)
- Dataset metrics (`zfs_dataset`) will have the following tag: - Dataset metrics (`zfs_dataset`) will have the following tag:
- dataset - with the name of the dataset which the metrics are for. - dataset - with the name of the dataset which the metrics are for.

View File

@ -4,6 +4,7 @@
package zfs package zfs
import ( import (
"errors"
"fmt" "fmt"
"path/filepath" "path/filepath"
"strconv" "strconv"
@ -14,22 +15,56 @@ import (
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
) )
type metricsVersion uint8
const (
unknown metricsVersion = iota
v1
v2
)
type poolInfo struct { type poolInfo struct {
name string name string
ioFilename string ioFilename string
version metricsVersion
} }
func getPools(kstatPath string) []poolInfo { func probeVersion(kstatPath string) (metricsVersion, []string, error) {
poolsDirs, err := filepath.Glob(fmt.Sprintf("%s/*/objset-*", kstatPath))
// From the docs: the only possible returned error is ErrBadPattern, when pattern is malformed.
// Because of this we need to determine how to fallback differently.
if err != nil {
return unknown, poolsDirs, err
}
if len(poolsDirs) > 0 {
return v2, poolsDirs, nil
}
// Fallback to the old kstat in case of an older ZFS version.
poolsDirs, err = filepath.Glob(fmt.Sprintf("%s/*/io", kstatPath))
if err != nil {
return unknown, poolsDirs, err
}
return v1, poolsDirs, nil
}
func getPools(kstatPath string) ([]poolInfo, error) {
pools := make([]poolInfo, 0) pools := make([]poolInfo, 0)
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io") version, poolsDirs, err := probeVersion(kstatPath)
if err != nil {
return nil, err
}
for _, poolDir := range poolsDirs { for _, poolDir := range poolsDirs {
poolDirSplit := strings.Split(poolDir, "/") poolDirSplit := strings.Split(poolDir, "/")
pool := poolDirSplit[len(poolDirSplit)-2] pool := poolDirSplit[len(poolDirSplit)-2]
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir}) pools = append(pools, poolInfo{name: pool, ioFilename: poolDir, version: version})
} }
return pools return pools, nil
} }
func getTags(pools []poolInfo) map[string]string { func getTags(pools []poolInfo) map[string]string {
@ -45,36 +80,99 @@ func getTags(pools []poolInfo) map[string]string {
return map[string]string{"pools": poolNames} return map[string]string{"pools": poolNames}
} }
func gather(lines []string, fileLines int) ([]string, []string, error) {
if len(lines) != fileLines {
return nil, nil, errors.New("expected lines in kstat does not match")
}
keys := strings.Fields(lines[1])
values := strings.Fields(lines[2])
if len(keys) != len(values) {
return nil, nil, fmt.Errorf("key and value count don't match Keys:%v Values:%v", keys, values)
}
return keys, values, nil
}
func gatherV1(lines []string) (map[string]interface{}, error) {
fileLines := 3
keys, values, err := gather(lines, fileLines)
if err != nil {
return nil, err
}
fields := make(map[string]interface{})
for i := 0; i < len(keys); i++ {
value, err := strconv.ParseInt(values[i], 10, 64)
if err != nil {
return nil, err
}
fields[keys[i]] = value
}
return fields, nil
}
// New way of collection. Each objset-* file in ZFS >= 2.1.x has a format looking like this:
// 36 1 0x01 7 2160 5214787391 73405258558961
// name type data
// dataset_name 7 rpool/ROOT/pve-1
// writes 4 409570
// nwritten 4 2063419969
// reads 4 22108699
// nread 4 63067280992
// nunlinks 4 13849
// nunlinked 4 13848
//
// For explanation of the first line's values see https://github.com/openzfs/zfs/blob/master/module/os/linux/spl/spl-kstat.c#L61
func gatherV2(lines []string, tags map[string]string) (map[string]interface{}, error) {
fileLines := 9
_, _, err := gather(lines, fileLines)
if err != nil {
return nil, err
}
tags["dataset"] = strings.Fields(lines[2])[2]
fields := make(map[string]interface{})
for i := 3; i < len(lines); i++ {
lineFields := strings.Fields(lines[i])
fieldName := lineFields[0]
fieldData := lineFields[2]
value, err := strconv.ParseInt(fieldData, 10, 64)
if err != nil {
return nil, err
}
fields[fieldName] = value
}
return fields, nil
}
func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error { func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
lines, err := internal.ReadLines(pool.ioFilename) lines, err := internal.ReadLines(pool.ioFilename)
if err != nil { if err != nil {
return err return err
} }
if len(lines) != 3 { var fields map[string]interface{}
var gatherErr error
tags := map[string]string{"pool": pool.name}
switch pool.version {
case v1:
fields, gatherErr = gatherV1(lines)
case v2:
fields, gatherErr = gatherV2(lines, tags)
case unknown:
return errors.New("Unknown metrics version detected")
}
if gatherErr != nil {
return err return err
} }
keys := strings.Fields(lines[1]) acc.AddFields("zfs_pool", fields, tags)
values := strings.Fields(lines[2])
keyCount := len(keys)
if keyCount != len(values) {
return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
}
tag := map[string]string{"pool": pool.name}
fields := make(map[string]interface{})
for i := 0; i < keyCount; i++ {
value, err := strconv.ParseInt(values[i], 10, 64)
if err != nil {
return err
}
fields[keys[i]] = value
}
acc.AddFields("zfs_pool", fields, tag)
return nil return nil
} }
@ -93,10 +191,10 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatPath = "/proc/spl/kstat/zfs" kstatPath = "/proc/spl/kstat/zfs"
} }
pools := getPools(kstatPath) pools, err := getPools(kstatPath)
tags := getTags(pools) tags := getTags(pools)
if z.PoolMetrics { if z.PoolMetrics && err == nil {
for _, pool := range pools { for _, pool := range pools {
err := gatherPoolStats(pool, acc) err := gatherPoolStats(pool, acc)
if err != nil { if err != nil {

View File

@ -119,6 +119,16 @@ const poolIoContents = `11 3 0x00 1 80 2225326830828 32953476980628
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0 1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
` `
const objsetContents = `36 1 0x01 7 2160 5214787391 74985931356512
name type data
dataset_name 7 HOME
writes 4 978
nwritten 4 6450688
reads 4 22
nread 4 1884160
nunlinks 4 14148
nunlinked 4 14147
`
const zilContents = `7 1 0x01 14 672 34118481334 437444452158445 const zilContents = `7 1 0x01 14 672 34118481334 437444452158445
name type data name type data
zil_commit_count 4 77 zil_commit_count 4 77
@ -219,6 +229,19 @@ func TestZfsPoolMetrics(t *testing.T) {
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags) acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
err = os.WriteFile(testKstatPath+"/HOME/objset-0x20a", []byte(objsetContents), 0644)
require.NoError(t, err)
acc.Metrics = nil
err = z.Gather(&acc)
require.NoError(t, err)
tags["dataset"] = "HOME"
poolMetrics = getPoolMetricsNewFormat()
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
err = os.RemoveAll(os.TempDir() + "/telegraf") err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err) require.NoError(t, err)
} }
@ -477,3 +500,14 @@ func getPoolMetrics() map[string]interface{} {
"rcnt": int64(0), "rcnt": int64(0),
} }
} }
func getPoolMetricsNewFormat() map[string]interface{} {
return map[string]interface{}{
"nread": int64(1884160),
"nunlinked": int64(14147),
"nunlinks": int64(14148),
"nwritten": int64(6450688),
"reads": int64(22),
"writes": int64(978),
}
}