Add dataset metrics to zfs input (#8383)

This commit is contained in:
Enzo Hamelin 2020-11-27 19:58:32 +01:00 committed by GitHub
parent 42eacb3a42
commit ef91f96de9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 144 additions and 10 deletions

View File

@ -2,7 +2,7 @@
This ZFS plugin provides metrics from your ZFS filesystems. It supports ZFS on
Linux and FreeBSD. It gets ZFS stat from `/proc/spl/kstat/zfs` on Linux and
from `sysctl` and `zpool` on FreeBSD.
from `sysctl`, 'zfs' and `zpool` on FreeBSD.
### Configuration:
@ -22,11 +22,14 @@ from `sysctl` and `zpool` on FreeBSD.
## By default, don't gather zpool stats
# poolMetrics = false
## By default, don't gather dataset stats
# datasetMetrics = false
```
### Measurements & Fields:
By default this plugin collects metrics about ZFS internals and pool.
By default this plugin collects metrics about ZFS internals pool and dataset.
These metrics are either counters or measure sizes
in bytes. These metrics will be in the `zfs` measurement with the field
names listed bellow.
@ -34,6 +37,9 @@ names listed bellow.
If `poolMetrics` is enabled then additional metrics will be gathered for
each pool.
If `datasetMetrics` is enabled then additional metrics will be gathered for
each dataset.
- zfs
With fields listed bellow.
@ -206,21 +212,34 @@ On FreeBSD:
- size (integer, bytes)
- fragmentation (integer, percent)
#### Dataset Metrics (optional, only on FreeBSD)
- zfs_dataset
- avail (integer, bytes)
- used (integer, bytes)
- usedsnap (integer, bytes
- usedds (integer, bytes)
### Tags:
- ZFS stats (`zfs`) will have the following tag:
- pools - A `::` concatenated list of all ZFS pools on the machine.
- datasets - A `::` concatenated list of all ZFS datasets on the machine.
- Pool metrics (`zfs_pool`) will have the following tag:
- pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only)
- Dataset metrics (`zfs_dataset`) will have the following tag:
- dataset - with the name of the dataset which the metrics are for.
### Example Output:
```
$ ./telegraf --config telegraf.conf --input-filter zfs --test
* Plugin: zfs, Collection 1
> zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908
> zfs_dataset,dataset=zata avail=10741741326336,used=8564135526400,usedsnap=0,usedds=90112
> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908
```

View File

@ -1,14 +1,22 @@
package zfs
import (
"github.com/influxdata/telegraf"
)
type Sysctl func(metric string) ([]string, error)
type Zpool func() ([]string, error)
type Zdataset func(properties []string) ([]string, error)
type Zfs struct {
KstatPath string
KstatMetrics []string
PoolMetrics bool
sysctl Sysctl
zpool Zpool
KstatPath string
KstatMetrics []string
PoolMetrics bool
DatasetMetrics bool
sysctl Sysctl
zpool Zpool
zdataset Zdataset
Log telegraf.Logger `toml:"-"`
}
var sampleConfig = `
@ -24,6 +32,8 @@ var sampleConfig = `
# "dmu_tx", "fm", "vdev_mirror_stats", "zfetchstats", "zil"]
## By default, don't gather zpool stats
# poolMetrics = false
## By default, don't gather zdataset stats
# datasetMetrics = false
`
func (z *Zfs) SampleConfig() string {
@ -31,5 +41,5 @@ func (z *Zfs) SampleConfig() string {
}
func (z *Zfs) Description() string {
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools"
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, pools and datasets"
}

View File

@ -87,6 +87,47 @@ func (z *Zfs) gatherPoolStats(acc telegraf.Accumulator) (string, error) {
return strings.Join(pools, "::"), nil
}
func (z *Zfs) gatherDatasetStats(acc telegraf.Accumulator) (string, error) {
properties := []string{"name", "avail", "used", "usedsnap", "usedds"}
lines, err := z.zdataset(properties)
if err != nil {
return "", err
}
datasets := []string{}
for _, line := range lines {
col := strings.Split(line, "\t")
datasets = append(datasets, col[0])
}
if z.DatasetMetrics {
for _, line := range lines {
col := strings.Split(line, "\t")
if len(col) != len(properties) {
z.Log.Warnf("Invalid number of columns for line: %s", line)
continue
}
tags := map[string]string{"dataset": col[0]}
fields := map[string]interface{}{}
for i, key := range properties[1:] {
value, err := strconv.ParseInt(col[i+1], 10, 64)
if err != nil {
return "", fmt.Errorf("Error parsing %s %q: %s", key, col[i+1], err)
}
fields[key] = value
}
acc.AddFields("zfs_dataset", fields, tags)
}
}
return strings.Join(datasets, "::"), nil
}
func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatMetrics := z.KstatMetrics
if len(kstatMetrics) == 0 {
@ -99,6 +140,11 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
return err
}
tags["pools"] = poolNames
datasetNames, err := z.gatherDatasetStats(acc)
if err != nil {
return err
}
tags["datasets"] = datasetNames
fields := make(map[string]interface{})
for _, metric := range kstatMetrics {
@ -137,6 +183,10 @@ func zpool() ([]string, error) {
return run("zpool", []string{"list", "-Hp", "-o", "name,health,size,alloc,free,fragmentation,capacity,dedupratio"}...)
}
func zdataset(properties []string) ([]string, error) {
return run("zfs", []string{"list", "-Hp", "-o", strings.Join(properties, ",")}...)
}
func sysctl(metric string) ([]string, error) {
return run("sysctl", []string{"-q", fmt.Sprintf("kstat.zfs.misc.%s", metric)}...)
}
@ -144,8 +194,9 @@ func sysctl(metric string) ([]string, error) {
func init() {
inputs.Add("zfs", func() telegraf.Input {
return &Zfs{
sysctl: sysctl,
zpool: zpool,
sysctl: sysctl,
zpool: zpool,
zdataset: zdataset,
}
})
}

View File

@ -31,6 +31,18 @@ func mock_zpool_unavail() ([]string, error) {
return zpool_output_unavail, nil
}
// $ zfs list -Hp -o name,avail,used,usedsnap,usedds
var zdataset_output = []string{
"zata 10741741326336 8564135526400 0 90112",
"zata/home 10741741326336 2498560 212992 2285568",
"zata/import 10741741326336 196608 81920 114688",
"zata/storage 10741741326336 8556084379648 3601138999296 4954945380352",
}
func mock_zdataset() ([]string, error) {
return zdataset_output, nil
}
// sysctl -q kstat.zfs.misc.arcstats
// sysctl -q kstat.zfs.misc.vdev_cache_stats
@ -126,6 +138,39 @@ func TestZfsPoolMetrics_unavail(t *testing.T) {
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
}
func TestZfsDatasetMetrics(t *testing.T) {
var acc testutil.Accumulator
z := &Zfs{
KstatMetrics: []string{"vdev_cache_stats"},
sysctl: mock_sysctl,
zdataset: mock_zdataset,
}
err := z.Gather(&acc)
require.NoError(t, err)
require.False(t, acc.HasMeasurement("zfs_dataset"))
acc.Metrics = nil
z = &Zfs{
KstatMetrics: []string{"vdev_cache_stats"},
DatasetMetrics: true,
sysctl: mock_sysctl,
zdataset: mock_zdataset,
}
err = z.Gather(&acc)
require.NoError(t, err)
//one pool, all metrics
tags := map[string]string{
"dataset": "zata",
}
datasetMetrics := getZataDatasetMetrics()
acc.AssertContainsTaggedFields(t, "zfs_dataset", datasetMetrics, tags)
}
func TestZfsGeneratesMetrics(t *testing.T) {
var acc testutil.Accumulator
@ -178,6 +223,15 @@ func getTemp2PoolMetrics() map[string]interface{} {
}
}
func getZataDatasetMetrics() map[string]interface{} {
return map[string]interface{}{
"avail": int64(10741741326336),
"used": int64(8564135526400),
"usedsnap": int64(0),
"usedds": int64(90112),
}
}
func getKstatMetricsVdevOnly() map[string]interface{} {
return map[string]interface{}{
"vdev_cache_stats_misses": int64(87789),