Add dataset metrics to zfs input (#8383)

This commit is contained in:
Enzo Hamelin 2020-11-27 19:58:32 +01:00 committed by GitHub
parent 42eacb3a42
commit ef91f96de9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 144 additions and 10 deletions

View File

@ -2,7 +2,7 @@
This ZFS plugin provides metrics from your ZFS filesystems. It supports ZFS on This ZFS plugin provides metrics from your ZFS filesystems. It supports ZFS on
Linux and FreeBSD. It gets ZFS stat from `/proc/spl/kstat/zfs` on Linux and Linux and FreeBSD. It gets ZFS stat from `/proc/spl/kstat/zfs` on Linux and
from `sysctl` and `zpool` on FreeBSD. from `sysctl`, 'zfs' and `zpool` on FreeBSD.
### Configuration: ### Configuration:
@ -22,11 +22,14 @@ from `sysctl` and `zpool` on FreeBSD.
## By default, don't gather zpool stats ## By default, don't gather zpool stats
# poolMetrics = false # poolMetrics = false
## By default, don't gather dataset stats
# datasetMetrics = false
``` ```
### Measurements & Fields: ### Measurements & Fields:
By default this plugin collects metrics about ZFS internals and pool. By default this plugin collects metrics about ZFS internals pool and dataset.
These metrics are either counters or measure sizes These metrics are either counters or measure sizes
in bytes. These metrics will be in the `zfs` measurement with the field in bytes. These metrics will be in the `zfs` measurement with the field
names listed bellow. names listed bellow.
@ -34,6 +37,9 @@ names listed bellow.
If `poolMetrics` is enabled then additional metrics will be gathered for If `poolMetrics` is enabled then additional metrics will be gathered for
each pool. each pool.
If `datasetMetrics` is enabled then additional metrics will be gathered for
each dataset.
- zfs - zfs
With fields listed bellow. With fields listed bellow.
@ -206,21 +212,34 @@ On FreeBSD:
- size (integer, bytes) - size (integer, bytes)
- fragmentation (integer, percent) - fragmentation (integer, percent)
#### Dataset Metrics (optional, only on FreeBSD)
- zfs_dataset
- avail (integer, bytes)
- used (integer, bytes)
- usedsnap (integer, bytes
- usedds (integer, bytes)
### Tags: ### Tags:
- ZFS stats (`zfs`) will have the following tag: - ZFS stats (`zfs`) will have the following tag:
- pools - A `::` concatenated list of all ZFS pools on the machine. - pools - A `::` concatenated list of all ZFS pools on the machine.
- datasets - A `::` concatenated list of all ZFS datasets on the machine.
- Pool metrics (`zfs_pool`) will have the following tag: - Pool metrics (`zfs_pool`) will have the following tag:
- pool - with the name of the pool which the metrics are for. - pool - with the name of the pool which the metrics are for.
- health - the health status of the pool. (FreeBSD only) - health - the health status of the pool. (FreeBSD only)
- Dataset metrics (`zfs_dataset`) will have the following tag:
- dataset - with the name of the dataset which the metrics are for.
### Example Output: ### Example Output:
``` ```
$ ./telegraf --config telegraf.conf --input-filter zfs --test $ ./telegraf --config telegraf.conf --input-filter zfs --test
* Plugin: zfs, Collection 1 * Plugin: zfs, Collection 1
> zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908 > zfs_pool,health=ONLINE,pool=zroot allocated=1578590208i,capacity=2i,dedupratio=1,fragmentation=1i,free=64456531968i,size=66035122176i 1464473103625653908
> zfs_dataset,dataset=zata avail=10741741326336,used=8564135526400,usedsnap=0,usedds=90112
> zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908 > zfs,pools=zroot arcstats_allocated=4167764i,arcstats_anon_evictable_data=0i,arcstats_anon_evictable_metadata=0i,arcstats_anon_size=16896i,arcstats_arc_meta_limit=10485760i,arcstats_arc_meta_max=115269568i,arcstats_arc_meta_min=8388608i,arcstats_arc_meta_used=51977456i,arcstats_c=16777216i,arcstats_c_max=41943040i,arcstats_c_min=16777216i,arcstats_data_size=0i,arcstats_deleted=1699340i,arcstats_demand_data_hits=14836131i,arcstats_demand_data_misses=2842945i,arcstats_demand_hit_predictive_prefetch=0i,arcstats_demand_metadata_hits=1655006i,arcstats_demand_metadata_misses=830074i,arcstats_duplicate_buffers=0i,arcstats_duplicate_buffers_size=0i,arcstats_duplicate_reads=123i,arcstats_evict_l2_cached=0i,arcstats_evict_l2_eligible=332172623872i,arcstats_evict_l2_ineligible=6168576i,arcstats_evict_l2_skip=0i,arcstats_evict_not_enough=12189444i,arcstats_evict_skip=195190764i,arcstats_hash_chain_max=2i,arcstats_hash_chains=10i,arcstats_hash_collisions=43134i,arcstats_hash_elements=2268i,arcstats_hash_elements_max=6136i,arcstats_hdr_size=565632i,arcstats_hits=16515778i,arcstats_l2_abort_lowmem=0i,arcstats_l2_asize=0i,arcstats_l2_cdata_free_on_write=0i,arcstats_l2_cksum_bad=0i,arcstats_l2_compress_failures=0i,arcstats_l2_compress_successes=0i,arcstats_l2_compress_zeros=0i,arcstats_l2_evict_l1cached=0i,arcstats_l2_evict_lock_retry=0i,arcstats_l2_evict_reading=0i,arcstats_l2_feeds=0i,arcstats_l2_free_on_write=0i,arcstats_l2_hdr_size=0i,arcstats_l2_hits=0i,arcstats_l2_io_error=0i,arcstats_l2_misses=0i,arcstats_l2_read_bytes=0i,arcstats_l2_rw_clash=0i,arcstats_l2_size=0i,arcstats_l2_write_buffer_bytes_scanned=0i,arcstats_l2_write_buffer_iter=0i,arcstats_l2_write_buffer_list_iter=0i,arcstats_l2_write_buffer_list_null_iter=0i,arcstats_l2_write_bytes=0i,arcstats_l2_write_full=0i,arcstats_l2_write_in_l2=0i,arcstats_l2_write_io_in_progress=0i,arcstats_l2_write_not_cacheable=380i,arcstats_l2_write_passed_headroom=0i,arcstats_l2_write_pios=0i,arcstats_l2_write_spa_mismatch=0i,arcstats_l2_write_trylock_fail=0i,arcstats_l2_writes_done=0i,arcstats_l2_writes_error=0i,arcstats_l2_writes_lock_retry=0i,arcstats_l2_writes_sent=0i,arcstats_memory_throttle_count=0i,arcstats_metadata_size=17014784i,arcstats_mfu_evictable_data=0i,arcstats_mfu_evictable_metadata=16384i,arcstats_mfu_ghost_evictable_data=5723648i,arcstats_mfu_ghost_evictable_metadata=10709504i,arcstats_mfu_ghost_hits=1315619i,arcstats_mfu_ghost_size=16433152i,arcstats_mfu_hits=7646611i,arcstats_mfu_size=305152i,arcstats_misses=3676993i,arcstats_mru_evictable_data=0i,arcstats_mru_evictable_metadata=0i,arcstats_mru_ghost_evictable_data=0i,arcstats_mru_ghost_evictable_metadata=80896i,arcstats_mru_ghost_hits=324250i,arcstats_mru_ghost_size=80896i,arcstats_mru_hits=8844526i,arcstats_mru_size=16693248i,arcstats_mutex_miss=354023i,arcstats_other_size=34397040i,arcstats_p=4172800i,arcstats_prefetch_data_hits=0i,arcstats_prefetch_data_misses=0i,arcstats_prefetch_metadata_hits=24641i,arcstats_prefetch_metadata_misses=3974i,arcstats_size=51977456i,arcstats_sync_wait_for_async=0i,vdev_cache_stats_delegations=779i,vdev_cache_stats_hits=323123i,vdev_cache_stats_misses=59929i,zfetchstats_hits=0i,zfetchstats_max_streams=0i,zfetchstats_misses=0i 1464473103634124908
``` ```

View File

@ -1,14 +1,22 @@
package zfs package zfs
import (
"github.com/influxdata/telegraf"
)
type Sysctl func(metric string) ([]string, error) type Sysctl func(metric string) ([]string, error)
type Zpool func() ([]string, error) type Zpool func() ([]string, error)
type Zdataset func(properties []string) ([]string, error)
type Zfs struct { type Zfs struct {
KstatPath string KstatPath string
KstatMetrics []string KstatMetrics []string
PoolMetrics bool PoolMetrics bool
sysctl Sysctl DatasetMetrics bool
zpool Zpool sysctl Sysctl
zpool Zpool
zdataset Zdataset
Log telegraf.Logger `toml:"-"`
} }
var sampleConfig = ` var sampleConfig = `
@ -24,6 +32,8 @@ var sampleConfig = `
# "dmu_tx", "fm", "vdev_mirror_stats", "zfetchstats", "zil"] # "dmu_tx", "fm", "vdev_mirror_stats", "zfetchstats", "zil"]
## By default, don't gather zpool stats ## By default, don't gather zpool stats
# poolMetrics = false # poolMetrics = false
## By default, don't gather zdataset stats
# datasetMetrics = false
` `
func (z *Zfs) SampleConfig() string { func (z *Zfs) SampleConfig() string {
@ -31,5 +41,5 @@ func (z *Zfs) SampleConfig() string {
} }
func (z *Zfs) Description() string { func (z *Zfs) Description() string {
return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, and pools" return "Read metrics of ZFS from arcstats, zfetchstats, vdev_cache_stats, pools and datasets"
} }

View File

@ -87,6 +87,47 @@ func (z *Zfs) gatherPoolStats(acc telegraf.Accumulator) (string, error) {
return strings.Join(pools, "::"), nil return strings.Join(pools, "::"), nil
} }
func (z *Zfs) gatherDatasetStats(acc telegraf.Accumulator) (string, error) {
properties := []string{"name", "avail", "used", "usedsnap", "usedds"}
lines, err := z.zdataset(properties)
if err != nil {
return "", err
}
datasets := []string{}
for _, line := range lines {
col := strings.Split(line, "\t")
datasets = append(datasets, col[0])
}
if z.DatasetMetrics {
for _, line := range lines {
col := strings.Split(line, "\t")
if len(col) != len(properties) {
z.Log.Warnf("Invalid number of columns for line: %s", line)
continue
}
tags := map[string]string{"dataset": col[0]}
fields := map[string]interface{}{}
for i, key := range properties[1:] {
value, err := strconv.ParseInt(col[i+1], 10, 64)
if err != nil {
return "", fmt.Errorf("Error parsing %s %q: %s", key, col[i+1], err)
}
fields[key] = value
}
acc.AddFields("zfs_dataset", fields, tags)
}
}
return strings.Join(datasets, "::"), nil
}
func (z *Zfs) Gather(acc telegraf.Accumulator) error { func (z *Zfs) Gather(acc telegraf.Accumulator) error {
kstatMetrics := z.KstatMetrics kstatMetrics := z.KstatMetrics
if len(kstatMetrics) == 0 { if len(kstatMetrics) == 0 {
@ -99,6 +140,11 @@ func (z *Zfs) Gather(acc telegraf.Accumulator) error {
return err return err
} }
tags["pools"] = poolNames tags["pools"] = poolNames
datasetNames, err := z.gatherDatasetStats(acc)
if err != nil {
return err
}
tags["datasets"] = datasetNames
fields := make(map[string]interface{}) fields := make(map[string]interface{})
for _, metric := range kstatMetrics { for _, metric := range kstatMetrics {
@ -137,6 +183,10 @@ func zpool() ([]string, error) {
return run("zpool", []string{"list", "-Hp", "-o", "name,health,size,alloc,free,fragmentation,capacity,dedupratio"}...) return run("zpool", []string{"list", "-Hp", "-o", "name,health,size,alloc,free,fragmentation,capacity,dedupratio"}...)
} }
func zdataset(properties []string) ([]string, error) {
return run("zfs", []string{"list", "-Hp", "-o", strings.Join(properties, ",")}...)
}
func sysctl(metric string) ([]string, error) { func sysctl(metric string) ([]string, error) {
return run("sysctl", []string{"-q", fmt.Sprintf("kstat.zfs.misc.%s", metric)}...) return run("sysctl", []string{"-q", fmt.Sprintf("kstat.zfs.misc.%s", metric)}...)
} }
@ -144,8 +194,9 @@ func sysctl(metric string) ([]string, error) {
func init() { func init() {
inputs.Add("zfs", func() telegraf.Input { inputs.Add("zfs", func() telegraf.Input {
return &Zfs{ return &Zfs{
sysctl: sysctl, sysctl: sysctl,
zpool: zpool, zpool: zpool,
zdataset: zdataset,
} }
}) })
} }

View File

@ -31,6 +31,18 @@ func mock_zpool_unavail() ([]string, error) {
return zpool_output_unavail, nil return zpool_output_unavail, nil
} }
// $ zfs list -Hp -o name,avail,used,usedsnap,usedds
var zdataset_output = []string{
"zata 10741741326336 8564135526400 0 90112",
"zata/home 10741741326336 2498560 212992 2285568",
"zata/import 10741741326336 196608 81920 114688",
"zata/storage 10741741326336 8556084379648 3601138999296 4954945380352",
}
func mock_zdataset() ([]string, error) {
return zdataset_output, nil
}
// sysctl -q kstat.zfs.misc.arcstats // sysctl -q kstat.zfs.misc.arcstats
// sysctl -q kstat.zfs.misc.vdev_cache_stats // sysctl -q kstat.zfs.misc.vdev_cache_stats
@ -126,6 +138,39 @@ func TestZfsPoolMetrics_unavail(t *testing.T) {
acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags) acc.AssertContainsTaggedFields(t, "zfs_pool", poolMetrics, tags)
} }
func TestZfsDatasetMetrics(t *testing.T) {
var acc testutil.Accumulator
z := &Zfs{
KstatMetrics: []string{"vdev_cache_stats"},
sysctl: mock_sysctl,
zdataset: mock_zdataset,
}
err := z.Gather(&acc)
require.NoError(t, err)
require.False(t, acc.HasMeasurement("zfs_dataset"))
acc.Metrics = nil
z = &Zfs{
KstatMetrics: []string{"vdev_cache_stats"},
DatasetMetrics: true,
sysctl: mock_sysctl,
zdataset: mock_zdataset,
}
err = z.Gather(&acc)
require.NoError(t, err)
//one pool, all metrics
tags := map[string]string{
"dataset": "zata",
}
datasetMetrics := getZataDatasetMetrics()
acc.AssertContainsTaggedFields(t, "zfs_dataset", datasetMetrics, tags)
}
func TestZfsGeneratesMetrics(t *testing.T) { func TestZfsGeneratesMetrics(t *testing.T) {
var acc testutil.Accumulator var acc testutil.Accumulator
@ -178,6 +223,15 @@ func getTemp2PoolMetrics() map[string]interface{} {
} }
} }
func getZataDatasetMetrics() map[string]interface{} {
return map[string]interface{}{
"avail": int64(10741741326336),
"used": int64(8564135526400),
"usedsnap": int64(0),
"usedds": int64(90112),
}
}
func getKstatMetricsVdevOnly() map[string]interface{} { func getKstatMetricsVdevOnly() map[string]interface{} {
return map[string]interface{}{ return map[string]interface{}{
"vdev_cache_stats_misses": int64(87789), "vdev_cache_stats_misses": int64(87789),