feat(inputs.kernel): Collect KSM metrics (#13728)
This commit is contained in:
parent
8b032b73ee
commit
0cf7d23090
|
|
@ -5,14 +5,13 @@ This plugin is only available on Linux.
|
|||
The kernel plugin gathers info about the kernel that doesn't fit into other
|
||||
plugins. In general, it is the statistics available in `/proc/stat` that are not
|
||||
covered by other plugins as well as the value of
|
||||
`/proc/sys/kernel/random/entropy_avail`
|
||||
`/proc/sys/kernel/random/entropy_avail` and optionally, Kernel Samepage Merging.
|
||||
|
||||
The metrics are documented in `man proc` under the `/proc/stat` section.
|
||||
The metrics are documented in `man 4 random` under the `/proc/stat` section.
|
||||
|
||||
```text
|
||||
|
||||
|
||||
/proc/sys/kernel/random/entropy_avail
|
||||
Contains the value of available entropy
|
||||
|
||||
|
|
@ -40,6 +39,12 @@ processes 86031
|
|||
Number of forks since boot.
|
||||
```
|
||||
|
||||
Kernel Samepage Merging is generally documented in [kernel documenation][1] and
|
||||
the available metrics exposed via sysfs are documented in [admin guide][2]
|
||||
|
||||
[1]: https://www.kernel.org/doc/html/latest/mm/ksm.html
|
||||
[2]: https://www.kernel.org/doc/html/latest/admin-guide/mm/ksm.html#ksm-daemon-sysfs-interface
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
|
|
@ -52,10 +57,13 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Get kernel statistics from /proc/stat
|
||||
# Plugin to collect various Linux kernel statistics.
|
||||
# This plugin ONLY supports Linux
|
||||
[[inputs.kernel]]
|
||||
# no configuration
|
||||
## Additional gather options
|
||||
## Possible options include:
|
||||
## * ksm - kernel same-page merging
|
||||
# collect = []
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
|
@ -68,9 +76,26 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
- interrupts (integer, `intr`)
|
||||
- processes_forked (integer, `processes`)
|
||||
- entropy_avail (integer, `entropy_available`)
|
||||
- ksm_full_scans (integer, how many times all mergeable areas have been scanned, `full_scans`)
|
||||
- ksm_max_page_sharing (integer, maximum sharing allowed for each KSM page, `max_page_sharing`)
|
||||
- ksm_merge_across_nodes (integer, whether pages should be merged across NUMA nodes, `merge_across_nodes`)
|
||||
- ksm_pages_shared (integer, how many shared pages are being used, `pages_shared`)
|
||||
- ksm_pages_sharing (integer,how many more sites are sharing them , `pages_sharing`)
|
||||
- ksm_pages_to_scan (integer, how many pages to scan before ksmd goes to sleep, `pages_to_scan`)
|
||||
- ksm_pages_unshared (integer, how many pages unique but repeatedly checked for merging, `pages_unshared`)
|
||||
- ksm_pages_volatile (integer, how many pages changing too fast to be placed in a tree, `pages_volatile`)
|
||||
- ksm_run (integer, whether ksm is running or not, `run`)
|
||||
- ksm_sleep_millisecs (integer, how many milliseconds ksmd should sleep between scans, `sleep_millisecs`)
|
||||
- ksm_stable_node_chains (integer, the number of KSM pages that hit the max_page_sharing limit, `stable_node_chains`)
|
||||
- ksm_stable_node_chains_prune_millisecs (integer, how frequently KSM checks the metadata of the pages that hit the deduplication limit, `stable_node_chains_prune_millisecs`)
|
||||
- ksm_stable_node_dups (integer, number of duplicated KSM pages, `stable_node_dups`)
|
||||
- ksm_use_zero_pages (integer, whether empty pages should be treated specially, `use_zero_pages`)
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
kernel entropy_available=2469i,boot_time=1457505775i,context_switches=2626618i,disk_pages_in=5741i,disk_pages_out=1808i,interrupts=1472736i,processes_forked=10673i 1457613402960879816
|
||||
kernel boot_time=1690487872i,context_switches=321398652i,entropy_avail=256i,interrupts=141868628i,processes_forked=946492i 1691339564000000000
|
||||
|
||||
kernel boot_time=1690487872i,context_switches=321252729i,entropy_avail=256i,interrupts=141783427i,ksm_full_scans=0i,ksm_max_page_sharing=256i,ksm_merge_across_nodes=1i,ksm_pages_shared=0i,ksm_pages_sharing=0i,ksm_pages_to_scan=100i,ksm_pages_unshared=0i,ksm_pages_volatile=0i,ksm_run=0i,ksm_sleep_millisecs=20i,ksm_stable_node_chains=0i,ksm_stable_node_chains_prune_millisecs=2000i,ksm_stable_node_dups=0i,ksm_use_zero_pages=0i,processes_forked=946467i 1691339522000000000
|
||||
|
||||
```
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ import (
|
|||
_ "embed"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
|
|
@ -28,8 +28,27 @@ var (
|
|||
)
|
||||
|
||||
type Kernel struct {
|
||||
ConfigCollect []string `toml:"collect"`
|
||||
|
||||
optCollect map[string]bool
|
||||
statFile string
|
||||
entropyStatFile string
|
||||
ksmStatsDir string
|
||||
}
|
||||
|
||||
func (k *Kernel) Init() error {
|
||||
k.optCollect = make(map[string]bool, len(k.ConfigCollect))
|
||||
for _, v := range k.ConfigCollect {
|
||||
k.optCollect[v] = true
|
||||
}
|
||||
|
||||
if k.optCollect["ksm"] {
|
||||
if _, err := os.Stat(k.ksmStatsDir); os.IsNotExist(err) {
|
||||
// ksm probably not enabled in the kernel, bail out early
|
||||
return fmt.Errorf("directory %q does not exist. Is KSM enabled in this kernel?", k.ksmStatsDir)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (*Kernel) SampleConfig() string {
|
||||
|
|
@ -37,18 +56,12 @@ func (*Kernel) SampleConfig() string {
|
|||
}
|
||||
|
||||
func (k *Kernel) Gather(acc telegraf.Accumulator) error {
|
||||
data, err := k.getProcStat()
|
||||
data, err := k.getProcValueBytes(k.statFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entropyData, err := os.ReadFile(k.entropyStatFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
entropyString := string(entropyData)
|
||||
entropyValue, err := strconv.ParseInt(strings.TrimSpace(entropyString), 10, 64)
|
||||
entropyValue, err := k.getProcValueInt(k.entropyStatFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -98,31 +111,78 @@ func (k *Kernel) Gather(acc telegraf.Accumulator) error {
|
|||
}
|
||||
}
|
||||
|
||||
if k.optCollect["ksm"] {
|
||||
stats := []string{
|
||||
"full_scans", "max_page_sharing",
|
||||
"merge_across_nodes", "pages_shared",
|
||||
"pages_sharing", "pages_to_scan",
|
||||
"pages_unshared", "pages_volatile",
|
||||
"run", "sleep_millisecs",
|
||||
"stable_node_chains", "stable_node_chains_prune_millisecs",
|
||||
"stable_node_dups", "use_zero_pages",
|
||||
}
|
||||
// these exist in very recent Linux versions only, but useful to include if there.
|
||||
extraStats := []string{"general_profit"}
|
||||
|
||||
for _, f := range stats {
|
||||
m, err := k.getProcValueInt(filepath.Join(k.ksmStatsDir, f))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fields["ksm_"+f] = m
|
||||
}
|
||||
|
||||
for _, f := range extraStats {
|
||||
m, err := k.getProcValueInt(filepath.Join(k.ksmStatsDir, f))
|
||||
if err != nil {
|
||||
// if an extraStats metric doesn't exist in our kernel version, ignore it.
|
||||
continue
|
||||
}
|
||||
|
||||
fields["ksm_"+f] = m
|
||||
}
|
||||
}
|
||||
acc.AddCounter("kernel", fields, map[string]string{})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (k *Kernel) getProcStat() ([]byte, error) {
|
||||
if _, err := os.Stat(k.statFile); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("kernel: %s does not exist", k.statFile)
|
||||
func (k *Kernel) getProcValueBytes(path string) ([]byte, error) {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
return nil, fmt.Errorf("Path %q does not exist", path)
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(k.statFile)
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to read from %q: %w", path, err)
|
||||
}
|
||||
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (k *Kernel) getProcValueInt(path string) (int64, error) {
|
||||
data, err := k.getProcValueBytes(path)
|
||||
if err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
m, err := strconv.ParseInt(string(bytes.TrimSpace(data)), 10, 64)
|
||||
if err != nil {
|
||||
return -1, fmt.Errorf("failed to parse %q as an integer: %w", data, err)
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("kernel", func() telegraf.Input {
|
||||
return &Kernel{
|
||||
statFile: "/proc/stat",
|
||||
entropyStatFile: "/proc/sys/kernel/random/entropy_avail",
|
||||
ksmStatsDir: "/sys/kernel/mm/ksm",
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,12 +4,37 @@ package kernel
|
|||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/metric"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestGetProcValueInt(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "testdata/stat_file_full",
|
||||
entropyStatFile: "testdata/entropy_stat_file_full",
|
||||
}
|
||||
|
||||
d, err := k.getProcValueInt(k.entropyStatFile)
|
||||
require.NoError(t, err)
|
||||
require.IsType(t, int64(1), d)
|
||||
}
|
||||
|
||||
func TestGetProcValueByte(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "testdata/stat_file_full",
|
||||
entropyStatFile: "testdata/entropy_stat_file_full",
|
||||
}
|
||||
|
||||
d, err := k.getProcValueBytes(k.entropyStatFile)
|
||||
require.NoError(t, err)
|
||||
require.IsType(t, []byte("test"), d)
|
||||
}
|
||||
|
||||
func TestFullProcFile(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "testdata/stat_file_full",
|
||||
|
|
@ -19,16 +44,24 @@ func TestFullProcFile(t *testing.T) {
|
|||
acc := testutil.Accumulator{}
|
||||
require.NoError(t, k.Gather(&acc))
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"boot_time": int64(1457505775),
|
||||
"context_switches": int64(2626618),
|
||||
"disk_pages_in": int64(5741),
|
||||
"disk_pages_out": int64(1808),
|
||||
"interrupts": int64(1472736),
|
||||
"processes_forked": int64(10673),
|
||||
"entropy_avail": int64(1024),
|
||||
expected := []telegraf.Metric{
|
||||
metric.New(
|
||||
"kernel",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"boot_time": int64(1457505775),
|
||||
"context_switches": int64(2626618),
|
||||
"disk_pages_in": int64(5741),
|
||||
"disk_pages_out": int64(1808),
|
||||
"interrupts": int64(1472736),
|
||||
"processes_forked": int64(10673),
|
||||
"entropy_avail": int64(1024),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
1,
|
||||
),
|
||||
}
|
||||
acc.AssertContainsFields(t, "kernel", fields)
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestPartialProcFile(t *testing.T) {
|
||||
|
|
@ -73,7 +106,7 @@ func TestInvalidProcFile2(t *testing.T) {
|
|||
acc := testutil.Accumulator{}
|
||||
err := k.Gather(&acc)
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "no such file")
|
||||
require.Contains(t, err.Error(), "does not exist")
|
||||
}
|
||||
|
||||
func TestNoProcFile(t *testing.T) {
|
||||
|
|
@ -86,3 +119,101 @@ func TestNoProcFile(t *testing.T) {
|
|||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "does not exist")
|
||||
}
|
||||
|
||||
func TestInvalidCollectOption(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "testdata/stat_file_full",
|
||||
entropyStatFile: "testdata/entropy_stat_file_full",
|
||||
ConfigCollect: []string{"invalidOption"},
|
||||
}
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
|
||||
require.NoError(t, k.Init())
|
||||
require.NoError(t, k.Gather(&acc))
|
||||
}
|
||||
|
||||
func TestKsmEnabledValidKsmDirectory(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "testdata/stat_file_full",
|
||||
entropyStatFile: "testdata/entropy_stat_file_full",
|
||||
ksmStatsDir: "testdata/ksm/valid",
|
||||
ConfigCollect: []string{"ksm"},
|
||||
}
|
||||
|
||||
require.NoError(t, k.Init())
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
require.NoError(t, k.Gather(&acc))
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
metric.New(
|
||||
"kernel",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"boot_time": int64(1457505775),
|
||||
"context_switches": int64(2626618),
|
||||
"disk_pages_in": int64(5741),
|
||||
"disk_pages_out": int64(1808),
|
||||
"interrupts": int64(1472736),
|
||||
"processes_forked": int64(10673),
|
||||
"entropy_avail": int64(1024),
|
||||
"ksm_full_scans": int64(123),
|
||||
"ksm_max_page_sharing": int64(10000),
|
||||
"ksm_merge_across_nodes": int64(1),
|
||||
"ksm_pages_shared": int64(12922),
|
||||
"ksm_pages_sharing": int64(28384),
|
||||
"ksm_pages_to_scan": int64(12928),
|
||||
"ksm_pages_unshared": int64(92847),
|
||||
"ksm_pages_volatile": int64(2824171),
|
||||
"ksm_run": int64(1),
|
||||
"ksm_sleep_millisecs": int64(1000),
|
||||
"ksm_stable_node_chains": int64(0),
|
||||
"ksm_stable_node_chains_prune_millisecs": int64(0),
|
||||
"ksm_stable_node_dups": int64(0),
|
||||
"ksm_use_zero_pages": int64(1),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
1,
|
||||
),
|
||||
}
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestKSMEnabledMissingFile(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "/proc/stat",
|
||||
entropyStatFile: "/proc/sys/kernel/random/entropy_avail",
|
||||
ksmStatsDir: "testdata/ksm/missing",
|
||||
ConfigCollect: []string{"ksm"},
|
||||
}
|
||||
|
||||
require.NoError(t, k.Init())
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
require.ErrorContains(t, k.Gather(&acc), "does not exist")
|
||||
}
|
||||
|
||||
func TestKSMEnabledWrongDir(t *testing.T) {
|
||||
k := Kernel{
|
||||
ksmStatsDir: "testdata/this_file_does_not_exist",
|
||||
ConfigCollect: []string{"ksm"},
|
||||
}
|
||||
|
||||
require.ErrorContains(t, k.Init(), "Is KSM enabled in this kernel?")
|
||||
}
|
||||
|
||||
func TestKSMDisabledNoKSMTags(t *testing.T) {
|
||||
k := Kernel{
|
||||
statFile: "testdata/stat_file_full",
|
||||
entropyStatFile: "testdata/entropy_stat_file_full",
|
||||
ksmStatsDir: "testdata/this_file_does_not_exist",
|
||||
ConfigCollect: []string{},
|
||||
}
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
|
||||
require.NoError(t, k.Init())
|
||||
require.NoError(t, k.Gather(&acc))
|
||||
require.False(t, acc.HasField("kernel", "ksm_run"))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,7 @@
|
|||
# Get kernel statistics from /proc/stat
|
||||
# Plugin to collect various Linux kernel statistics.
|
||||
# This plugin ONLY supports Linux
|
||||
[[inputs.kernel]]
|
||||
# no configuration
|
||||
## Additional gather options
|
||||
## Possible options include:
|
||||
## * ksm - kernel same-page merging
|
||||
# collect = []
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
123A
|
||||
|
|
@ -0,0 +1 @@
|
|||
10000
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
12922
|
||||
|
|
@ -0,0 +1 @@
|
|||
28384
|
||||
|
|
@ -0,0 +1 @@
|
|||
12928
|
||||
|
|
@ -0,0 +1 @@
|
|||
92847
|
||||
|
|
@ -0,0 +1 @@
|
|||
2824171
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
1000
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
1
plugins/inputs/kernel/testdata/ksm/invalid/stable_node_chains_prune_millisecs
vendored
Normal file
1
plugins/inputs/kernel/testdata/ksm/invalid/stable_node_chains_prune_millisecs
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
123
|
||||
|
|
@ -0,0 +1 @@
|
|||
10000
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
12922
|
||||
|
|
@ -0,0 +1 @@
|
|||
28384
|
||||
|
|
@ -0,0 +1 @@
|
|||
12928
|
||||
|
|
@ -0,0 +1 @@
|
|||
92847
|
||||
|
|
@ -0,0 +1 @@
|
|||
2824171
|
||||
|
|
@ -0,0 +1 @@
|
|||
1000
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
1
plugins/inputs/kernel/testdata/ksm/missing/stable_node_chains_prune_millisecs
vendored
Normal file
1
plugins/inputs/kernel/testdata/ksm/missing/stable_node_chains_prune_millisecs
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
123
|
||||
|
|
@ -0,0 +1 @@
|
|||
10000
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
12922
|
||||
|
|
@ -0,0 +1 @@
|
|||
28384
|
||||
|
|
@ -0,0 +1 @@
|
|||
12928
|
||||
|
|
@ -0,0 +1 @@
|
|||
92847
|
||||
|
|
@ -0,0 +1 @@
|
|||
2824171
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
|
|
@ -0,0 +1 @@
|
|||
1000
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1
|
||||
Loading…
Reference in New Issue