fix: pool detection and metrics gathering for ZFS >= 2.1.x (#10099)
This commit is contained in:
parent
cc397279f5
commit
666bfe33a2
|
|
@ -202,6 +202,16 @@ On Linux (reference: kstat accumulated time and queue length statistics):
|
||||||
- wcnt (integer, count)
|
- wcnt (integer, count)
|
||||||
- rcnt (integer, count)
|
- rcnt (integer, count)
|
||||||
|
|
||||||
|
For ZFS >= 2.1.x the format has changed significantly:
|
||||||
|
|
||||||
|
- zfs_pool
|
||||||
|
- writes (integer, count)
|
||||||
|
- nwritten (integer, bytes)
|
||||||
|
- reads (integer, count)
|
||||||
|
- nread (integer, bytes)
|
||||||
|
- nunlinks (integer, count)
|
||||||
|
- nunlinked (integer, count)
|
||||||
|
|
||||||
On FreeBSD:
|
On FreeBSD:
|
||||||
|
|
||||||
- zfs_pool
|
- zfs_pool
|
||||||
|
|
@ -229,6 +239,7 @@ On FreeBSD:
|
||||||
- Pool metrics (`zfs_pool`) will have the following tag:
|
- Pool metrics (`zfs_pool`) will have the following tag:
|
||||||
- pool - with the name of the pool which the metrics are for.
|
- pool - with the name of the pool which the metrics are for.
|
||||||
- health - the health status of the pool. (FreeBSD only)
|
- health - the health status of the pool. (FreeBSD only)
|
||||||
|
- dataset - ZFS >= 2.1.x only. (Linux only)
|
||||||
|
|
||||||
- Dataset metrics (`zfs_dataset`) will have the following tag:
|
- Dataset metrics (`zfs_dataset`) will have the following tag:
|
||||||
- dataset - with the name of the dataset which the metrics are for.
|
- dataset - with the name of the dataset which the metrics are for.
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@
|
||||||
package zfs
|
package zfs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
|
@ -14,22 +15,56 @@ import (
|
||||||
"github.com/influxdata/telegraf/plugins/inputs"
|
"github.com/influxdata/telegraf/plugins/inputs"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type metricsVersion uint8
|
||||||
|
|
||||||
|
const (
|
||||||
|
unknown metricsVersion = iota
|
||||||
|
v1
|
||||||
|
v2
|
||||||
|
)
|
||||||
|
|
||||||
type poolInfo struct {
|
type poolInfo struct {
|
||||||
name string
|
name string
|
||||||
ioFilename string
|
ioFilename string
|
||||||
|
version metricsVersion
|
||||||
}
|
}
|
||||||
|
|
||||||
func getPools(kstatPath string) []poolInfo {
|
func probeVersion(kstatPath string) (metricsVersion, []string, error) {
|
||||||
|
poolsDirs, err := filepath.Glob(fmt.Sprintf("%s/*/objset-*", kstatPath))
|
||||||
|
|
||||||
|
// From the docs: the only possible returned error is ErrBadPattern, when pattern is malformed.
|
||||||
|
// Because of this we need to determine how to fallback differently.
|
||||||
|
if err != nil {
|
||||||
|
return unknown, poolsDirs, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(poolsDirs) > 0 {
|
||||||
|
return v2, poolsDirs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to the old kstat in case of an older ZFS version.
|
||||||
|
poolsDirs, err = filepath.Glob(fmt.Sprintf("%s/*/io", kstatPath))
|
||||||
|
if err != nil {
|
||||||
|
return unknown, poolsDirs, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return v1, poolsDirs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func getPools(kstatPath string) ([]poolInfo, error) {
|
||||||
pools := make([]poolInfo, 0)
|
pools := make([]poolInfo, 0)
|
||||||
poolsDirs, _ := filepath.Glob(kstatPath + "/*/io")
|
version, poolsDirs, err := probeVersion(kstatPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
for _, poolDir := range poolsDirs {
|
for _, poolDir := range poolsDirs {
|
||||||
poolDirSplit := strings.Split(poolDir, "/")
|
poolDirSplit := strings.Split(poolDir, "/")
|
||||||
pool := poolDirSplit[len(poolDirSplit)-2]
|
pool := poolDirSplit[len(poolDirSplit)-2]
|
||||||
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir})
|
pools = append(pools, poolInfo{name: pool, ioFilename: poolDir, version: version})
|
||||||
}
|
}
|
||||||
|
|
||||||
return pools
|
return pools, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTags(pools []poolInfo) map[string]string {
|
func getTags(pools []poolInfo) map[string]string {
|
||||||
|
|
@ -45,36 +80,99 @@ func getTags(pools []poolInfo) map[string]string {
|
||||||
return map[string]string{"pools": poolNames}
|
return map[string]string{"pools": poolNames}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func gather(lines []string, fileLines int) ([]string, []string, error) {
|
||||||
|
if len(lines) != fileLines {
|
||||||
|
return nil, nil, errors.New("expected lines in kstat does not match")
|
||||||
|
}
|
||||||
|
|
||||||
|
keys := strings.Fields(lines[1])
|
||||||
|
values := strings.Fields(lines[2])
|
||||||
|
if len(keys) != len(values) {
|
||||||
|
return nil, nil, fmt.Errorf("key and value count don't match Keys:%v Values:%v", keys, values)
|
||||||
|
}
|
||||||
|
|
||||||
|
return keys, values, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func gatherV1(lines []string) (map[string]interface{}, error) {
|
||||||
|
fileLines := 3
|
||||||
|
keys, values, err := gather(lines, fileLines)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields := make(map[string]interface{})
|
||||||
|
for i := 0; i < len(keys); i++ {
|
||||||
|
value, err := strconv.ParseInt(values[i], 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields[keys[i]] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// New way of collection. Each objset-* file in ZFS >= 2.1.x has a format looking like this:
|
||||||
|
// 36 1 0x01 7 2160 5214787391 73405258558961
|
||||||
|
// name type data
|
||||||
|
// dataset_name 7 rpool/ROOT/pve-1
|
||||||
|
// writes 4 409570
|
||||||
|
// nwritten 4 2063419969
|
||||||
|
// reads 4 22108699
|
||||||
|
// nread 4 63067280992
|
||||||
|
// nunlinks 4 13849
|
||||||
|
// nunlinked 4 13848
|
||||||
|
//
|
||||||
|
// For explanation of the first line's values see https://github.com/openzfs/zfs/blob/master/module/os/linux/spl/spl-kstat.c#L61
|
||||||
|
func gatherV2(lines []string, tags map[string]string) (map[string]interface{}, error) {
|
||||||
|
fileLines := 9
|
||||||
|
_, _, err := gather(lines, fileLines)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
tags["dataset"] = strings.Fields(lines[2])[2]
|
||||||
|
fields := make(map[string]interface{})
|
||||||
|
for i := 3; i < len(lines); i++ {
|
||||||
|
lineFields := strings.Fields(lines[i])
|
||||||
|
fieldName := lineFields[0]
|
||||||
|
fieldData := lineFields[2]
|
||||||
|
value, err := strconv.ParseInt(fieldData, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
fields[fieldName] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
return fields, nil
|
||||||
|
}
|
||||||
|
|
||||||
func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
|
func gatherPoolStats(pool poolInfo, acc telegraf.Accumulator) error {
|
||||||
lines, err := internal.ReadLines(pool.ioFilename)
|
lines, err := internal.ReadLines(pool.ioFilename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(lines) != 3 {
|
var fields map[string]interface{}
|
||||||
|
var gatherErr error
|
||||||
|
tags := map[string]string{"pool": pool.name}
|
||||||
|
switch pool.version {
|
||||||
|
case v1:
|
||||||
|
fields, gatherErr = gatherV1(lines)
|
||||||
|
case v2:
|
||||||
|
fields, gatherErr = gatherV2(lines, tags)
|
||||||
|
case unknown:
|
||||||
|
return errors.New("Unknown metrics version detected")
|
||||||
|
}
|
||||||
|
|
||||||
|
if gatherErr != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
keys := strings.Fields(lines[1])
|
acc.AddFields("zfs_pool", fields, tags)
|
||||||
values := strings.Fields(lines[2])
|
|
||||||
|
|
||||||
keyCount := len(keys)
|
|
||||||
|
|
||||||
if keyCount != len(values) {
|
|
||||||
return fmt.Errorf("Key and value count don't match Keys:%v Values:%v", keys, values)
|
|
||||||
}
|
|
||||||
|
|
||||||
tag := map[string]string{"pool": pool.name}
|
|
||||||
fields := make(map[string]interface{})
|
|
||||||
for i := 0; i < keyCount; i++ {
|
|
||||||
value, err := strconv.ParseInt(values[i], 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
fields[keys[i]] = value
|
|
||||||
}
|
|
||||||
acc.AddFields("zfs_pool", fields, tag)
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -93,10 +191,10 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
|
||||||
kstatPath = "/proc/spl/kstat/zfs"
|
kstatPath = "/proc/spl/kstat/zfs"
|
||||||
}
|
}
|
||||||
|
|
||||||
pools := getPools(kstatPath)
|
pools, err := getPools(kstatPath)
|
||||||
tags := getTags(pools)
|
tags := getTags(pools)
|
||||||
|
|
||||||
if z.PoolMetrics {
|
if z.PoolMetrics && err == nil {
|
||||||
for _, pool := range pools {
|
for _, pool := range pools {
|
||||||
err := gatherPoolStats(pool, acc)
|
err := gatherPoolStats(pool, acc)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
||||||
|
|
@ -119,6 +119,16 @@ const poolIoContents = `11 3 0x00 1 80 2225326830828 32953476980628
|
||||||
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
|
nread nwritten reads writes wtime wlentime wupdate rtime rlentime rupdate wcnt rcnt
|
||||||
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
|
1884160 6450688 22 978 272187126 2850519036 2263669418655 424226814 2850519036 2263669871823 0 0
|
||||||
`
|
`
|
||||||
|
const objsetContents = `36 1 0x01 7 2160 5214787391 74985931356512
|
||||||
|
name type data
|
||||||
|
dataset_name 7 HOME
|
||||||
|
writes 4 978
|
||||||
|
nwritten 4 6450688
|
||||||
|
reads 4 22
|
||||||
|
nread 4 1884160
|
||||||
|
nunlinks 4 14148
|
||||||
|
nunlinked 4 14147
|
||||||
|
`
|
||||||
const zilContents = `7 1 0x01 14 672 34118481334 437444452158445
|
const zilContents = `7 1 0x01 14 672 34118481334 437444452158445
|
||||||
name type data
|
name type data
|
||||||
zil_commit_count 4 77
|
zil_commit_count 4 77
|
||||||
|
|
@ -219,6 +229,19 @@ func TestZfsPoolMetrics(t *testing.T) {
|
||||||
|
|
||||||
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
|
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
|
||||||
|
|
||||||
|
err = os.WriteFile(testKstatPath+"/HOME/objset-0x20a", []byte(objsetContents), 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
acc.Metrics = nil
|
||||||
|
|
||||||
|
err = z.Gather(&acc)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
tags["dataset"] = "HOME"
|
||||||
|
|
||||||
|
poolMetrics = getPoolMetricsNewFormat()
|
||||||
|
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
|
||||||
|
|
||||||
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
@ -477,3 +500,14 @@ func getPoolMetrics() map[string]interface{} {
|
||||||
"rcnt": int64(0),
|
"rcnt": int64(0),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getPoolMetricsNewFormat() map[string]interface{} {
|
||||||
|
return map[string]interface{}{
|
||||||
|
"nread": int64(1884160),
|
||||||
|
"nunlinked": int64(14147),
|
||||||
|
"nunlinks": int64(14148),
|
||||||
|
"nwritten": int64(6450688),
|
||||||
|
"reads": int64(22),
|
||||||
|
"writes": int64(978),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue