fix: pool detection and metrics gathering for ZFS >= 2.1.x (#10099)
This commit is contained in:
parent
cc397279f5
commit
666bfe33a2
|
|
@ -202,6 +202,16 @@ On Linux (reference: kstat accumulated time and queue length statistics):
|
|||
- wcnt (integer, count)
|
||||
- rcnt (integer, count)
|
||||
|
||||
For ZFS >= 2.1.x the format has changed significantly:
|
||||
|
||||
- zfs_pool
|
||||
- writes (integer, count)
|
||||
- nwritten (integer, bytes)
|
||||
- reads (integer, count)
|
||||
- nread (integer, bytes)
|
||||
- nunlinks (integer, count)
|
||||
- nunlinked (integer, count)
|
||||
|
||||
On FreeBSD:
|
||||
|
||||
- zfs_pool
|
||||
|
|
@ -229,6 +239,7 @@ On FreeBSD:
|
|||
- Pool metrics (`zfs_pool`) will have the following tag:
|
||||
- pool - with the name of the pool which the metrics are for.
|
||||
- health - the health status of the pool. (FreeBSD only)
|
||||
- dataset - ZFS >= 2.1.x only. (Linux only)
|
||||
|
||||
- Dataset metrics (`zfs_dataset`) will have the following tag:
|
||||
- dataset - with the name of the dataset which the metrics are for.
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
package zfs
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
|
@ -14,22 +15,56 @@ import (
|
|||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
type metricsVersion uint8
|
||||
|
||||
const (
|
||||
unknown metricsVersion = iota
|
||||
v1
|
||||
v2
|
||||
)
|
||||
|
||||
type poolInfo struct {
|
||||
name string
|
||||
ioFilename string
|
||||
version metricsVersion
|
||||
}
|
||||
|
||||
func getPools(kstatPath string) []poolInfo {
|
||||
func probeVersion(kstatPath string) (metricsVersion, []string, error) {
|
||||
poolsDirs, err := filepath.Glob(fmt.Sprintf("%s/*/objset-*", kstatPath))
|
||||
|
||||
// From the docs: the only possible returned error is ErrBadPattern, when pattern is malformed.
|
||||
// Because of this we need to determine how to fallback differently.
|
||||
if err != nil {
|
||||
return unknown, poolsDirs, err
|
||||
}
|
||||
|
||||
if len(poolsDirs) > 0 {
|
||||
return v2, poolsDirs, nil
|
||||
}
|
||||
|
||||
// Fallback to the old kstat in case of an older ZFS version.
|
||||
poolsDirs, err = filepath.Glob(fmt.Sprintf("%s/*/io", kstatPath))
|
||||
if err != nil {
|
||||
return unknown, poolsDirs, err
|
||||
}
|
||||
|
||||
return v1, poolsDirs, nil
|
||||
}
|
||||
|
||||
func getPools(kstatPath string) ([]poolInfo, error) {
|
||||
pools := make([]poolInfo, 0)
|
||||
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io")
|
||||
version, poolsDirs, err := probeVersion(kstatPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, poolDir := range poolsDirs {
|
||||
poolDirSplit := strings.Split(poolDir, "/")
|
||||
pool := poolDirSplit[len(poolDirSplit)-2]
|
||||
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir})
|
||||
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir, version: version})
|
||||
}
|
||||
|
||||
return pools
|
||||
return pools, nil
|
||||
}
|
||||
|
||||
func getTags(pools []poolInfo) map[string]string {
|
||||
|
|
@ -45,36 +80,99 @@ func getTags(pools []poolInfo) map[string]string {
|
|||
return map[string]string{"pools": poolNames}
|
||||
}
|
||||
|
||||
func gather(lines []string, fileLines int) ([]string, []string, error) {
|
||||
if len(lines) != fileLines {
|
||||
return nil, nil, errors.New("expected lines in kstat does not match")
|
||||
}
|
||||
|
||||
keys := strings.Fields(lines[1])
|
||||
values := strings.Fields(lines[2])
|
||||
if len(keys) != len(values) {
|
||||
return nil, nil, fmt.Errorf("key and value count don't match Keys:%v Values:%v", keys, values)
|
||||
}
|
||||
|
||||
return keys, values, nil
|
||||
}
|
||||
|
||||
func gatherV1(lines []string) (map[string]interface{}, error) {
|
||||
fileLines := 3
|
||||
keys, values, err := gather(lines, fileLines)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fields := make(map[string]interface{})
|
||||
for i := 0; i < len(keys); i++ {
|
||||
value, err := strconv.ParseInt(values[i], 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fields[keys[i]] = value
|
||||
}
|
||||
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
// New way of collection. Each objset-* file in ZFS >= 2.1.x has a format looking like this:
|
||||
// 36 1 0x01 7 2160 5214787391 73405258558961
|
||||
// name type data
|
||||
// dataset_name 7 rpool/ROOT/pve-1
|
||||
// writes 4 409570
|
||||
// nwritten 4 2063419969
|
||||
// reads 4 22108699
|
||||
// nread 4 63067280992
|
||||
// nunlinks 4 13849
|
||||
// nunlinked 4 13848
|
||||
//
|
||||
// For explanation of the first line's values see https://github.com/openzfs/zfs/blob/master/module/os/linux/spl/spl-kstat.c#L61
|
||||
func gatherV2(lines []string, tags map[string]string) (map[string]interface{}, error) {
|
||||
fileLines := 9
|
||||
_, _, err := gather(lines, fileLines)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tags["dataset"] = strings.Fields(lines[2])[2]
|
||||
fields := make(map[string]interface{})
|
||||
for i := 3; i < len(lines); i++ {
|
||||
lineFields := strings.Fields(lines[i])
|
||||
fieldName := lineFields[0]
|
||||
fieldData := lineFields[2]
|
||||
value, err := strconv.ParseInt(fieldData, 10, 64)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fields[fieldName] = value
|
||||
}
|
||||
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
|
||||
lines, err := internal.ReadLines(pool.ioFilename)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(lines) != 3 {
|
||||
var fields map[string]interface{}
|
||||
var gatherErr error
|
||||
tags := map[string]string{"pool": pool.name}
|
||||
switch pool.version {
|
||||
case v1:
|
||||
fields, gatherErr = gatherV1(lines)
|
||||
case v2:
|
||||
fields, gatherErr = gatherV2(lines, tags)
|
||||
case unknown:
|
||||
return errors.New("Unknown metrics version detected")
|
||||
}
|
||||
|
||||
if gatherErr != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
keys := strings.Fields(lines[1])
|
||||
values := strings.Fields(lines[2])
|
||||
|
||||
keyCount := len(keys)
|
||||
|
||||
if keyCount != len(values) {
|
||||
return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
|
||||
}
|
||||
|
||||
tag := map[string]string{"pool": pool.name}
|
||||
fields := make(map[string]interface{})
|
||||
for i := 0; i < keyCount; i++ {
|
||||
value, err := strconv.ParseInt(values[i], 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fields[keys[i]] = value
|
||||
}
|
||||
acc.AddFields("zfs_pool", fields, tag)
|
||||
|
||||
acc.AddFields("zfs_pool", fields, tags)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -93,10 +191,10 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
|
|||
kstatPath = "/proc/spl/kstat/zfs"
|
||||
}
|
||||
|
||||
pools := getPools(kstatPath)
|
||||
pools, err := getPools(kstatPath)
|
||||
tags := getTags(pools)
|
||||
|
||||
if z.PoolMetrics {
|
||||
if z.PoolMetrics && err == nil {
|
||||
for _, pool := range pools {
|
||||
err := gatherPoolStats(pool, acc)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -119,6 +119,16 @@ const poolIoContents = `11 3 0x00 1 80 2225326830828 32953476980628
|
|||
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
|
||||
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
|
||||
`
|
||||
const objsetContents = `36 1 0x01 7 2160 5214787391 74985931356512
|
||||
name type data
|
||||
dataset_name 7 HOME
|
||||
writes 4 978
|
||||
nwritten 4 6450688
|
||||
reads 4 22
|
||||
nread 4 1884160
|
||||
nunlinks 4 14148
|
||||
nunlinked 4 14147
|
||||
`
|
||||
const zilContents = `7 1 0x01 14 672 34118481334 437444452158445
|
||||
name type data
|
||||
zil_commit_count 4 77
|
||||
|
|
@ -219,6 +229,19 @@ func TestZfsPoolMetrics(t *testing.T) {
|
|||
|
||||
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
|
||||
|
||||
err = os.WriteFile(testKstatPath+"/HOME/objset-0x20a", []byte(objsetContents), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
acc.Metrics = nil
|
||||
|
||||
err = z.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
tags["dataset"] = "HOME"
|
||||
|
||||
poolMetrics = getPoolMetricsNewFormat()
|
||||
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
|
||||
|
||||
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
|
@ -477,3 +500,14 @@ func getPoolMetrics() map[string]interface{} {
|
|||
"rcnt": int64(0),
|
||||
}
|
||||
}
|
||||
|
||||
func getPoolMetricsNewFormat() map[string]interface{} {
|
||||
return map[string]interface{}{
|
||||
"nread": int64(1884160),
|
||||
"nunlinked": int64(14147),
|
||||
"nunlinks": int64(14148),
|
||||
"nwritten": int64(6450688),
|
||||
"reads": int64(22),
|
||||
"writes": int64(978),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue