feat: new input Hugepages plugin (#10763)

This commit is contained in:
Paweł Żak 2022-03-16 17:00:45 +01:00 committed by GitHub
parent 8701ed173a
commit 7e652fdd00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 675 additions and 0 deletions

View File

@ -67,6 +67,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/http_listener_v2"
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/hugepages"
_ "github.com/influxdata/telegraf/plugins/inputs/icinga2"
_ "github.com/influxdata/telegraf/plugins/inputs/infiniband"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"

View File

@ -0,0 +1,67 @@
# Hugepages Input Plugin
Transparent Huge Pages (THP) is a Linux memory management system that reduces the overhead of
Translation Lookaside Buffer (TLB) lookups on machines with large amounts of memory by using larger
memory pages.
Consult <https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html> for more details.
## Configuration
```toml
# Gathers huge pages measurements.
[[inputs.hugepages]]
## Supported huge page types:
## - "root" - based on root huge page control directory: /sys/kernel/mm/hugepages
## - "per_node" - based on per NUMA node directories: /sys/devices/system/node/node[0-9]*/hugepages
## - "meminfo" - based on /proc/meminfo file
# types = ["root", "per_node"]
```
## Measurements
**The following measurements are supported by Hugepages plugin:**
- hugepages_root (gathered from root huge page control directory: `/sys/kernel/mm/hugepages`)
- tags:
- size_kb (integer, kB)
- fields:
- free (integer)
- mempolicy (integer)
- overcommit (integer)
- reserved (integer)
- surplus (integer)
- total (integer)
- hugepages_per_node (gathered from per NUMA node directories: `/sys/devices/system/node/node[0-9]*/hugepages`)
- tags:
- size_kb (integer, kB)
- node (integer)
- fields:
- free (integer)
- surplus (integer)
- total (integer)
- hugepages_meminfo (gathered from `/proc/meminfo` file)
- The fields `total`, `free`, `reserved`, and `surplus` are counts of pages of default size. Fields with suffix `_kb` are in kilobytes.
- fields:
- anonymous_kb (integer, kB)
- file_kb (integer, kB)
- free (integer)
- reserved (integer)
- shared_kb (integer, kB)
- size_kb (integer, kB)
- surplus (integer)
- tlb_kb (integer, kB)
- total (integer)
## Example Output
```text
$ ./telegraf -config telegraf.conf -input-filter hugepages -test
> hugepages_root,host=ubuntu,size_kb=1048576 free=0i,mempolicy=8i,overcommit=0i,reserved=0i,surplus=0i,total=8i 1646258020000000000
> hugepages_root,host=ubuntu,size_kb=2048 free=883i,mempolicy=2048i,overcommit=0i,reserved=0i,surplus=0i,total=2048i 1646258020000000000
> hugepages_per_node,host=ubuntu,size_kb=1048576,node=0 free=0i,surplus=0i,total=4i 1646258020000000000
> hugepages_per_node,host=ubuntu,size_kb=2048,node=0 free=434i,surplus=0i,total=1024i 1646258020000000000
> hugepages_per_node,host=ubuntu,size_kb=1048576,node=1 free=0i,surplus=0i,total=4i 1646258020000000000
> hugepages_per_node,host=ubuntu,size_kb=2048,node=1 free=449i,surplus=0i,total=1024i 1646258020000000000
> hugepages_meminfo,host=ubuntu anonymous_kb=0i,file_kb=0i,free=883i,reserved=0i,shared_kb=0i,size_kb=2048i,surplus=0i,tlb_kb=12582912i,total=2048i 1646258020000000000
```

View File

@ -0,0 +1,287 @@
//go:build linux
// +build linux
package hugepages
import (
"bytes"
"fmt"
"io/ioutil"
"path/filepath"
"strconv"
"strings"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
const (
// path to root huge page control directory
rootHugepagePath = "/sys/kernel/mm/hugepages"
// path where per NUMA node statistics are kept
numaNodePath = "/sys/devices/system/node"
// path to the meminfo file
meminfoPath = "/proc/meminfo"
rootHugepages = "root"
perNodeHugepages = "per_node"
meminfoHugepages = "meminfo"
hugepagesSampleConfig = `
## Supported huge page types:
## - "root" - based on root huge page control directory: /sys/kernel/mm/hugepages
## - "per_node" - based on per NUMA node directories: /sys/devices/system/node/node[0-9]*/hugepages
## - "meminfo" - based on /proc/meminfo file
# types = ["root", "per_node"]
`
)
var (
newlineByte = []byte("\n")
colonByte = []byte(":")
hugepagesMetricsRoot = map[string]string{
"free_hugepages": "free",
"nr_hugepages": "total",
"nr_hugepages_mempolicy": "mempolicy",
"nr_overcommit_hugepages": "overcommit",
"resv_hugepages": "reserved",
"surplus_hugepages": "surplus",
}
hugepagesMetricsPerNUMANode = map[string]string{
"free_hugepages": "free",
"nr_hugepages": "total",
"surplus_hugepages": "surplus",
}
hugepagesMetricsFromMeminfo = map[string]string{
"HugePages_Total": "total",
"HugePages_Free": "free",
"HugePages_Rsvd": "reserved",
"HugePages_Surp": "surplus",
"Hugepagesize": "size_kb",
"Hugetlb": "tlb_kb",
"AnonHugePages": "anonymous_kb",
"ShmemHugePages": "shared_kb",
"FileHugePages": "file_kb",
}
)
type Hugepages struct {
Types []string `toml:"types"`
gatherRoot bool
gatherPerNode bool
gatherMeminfo bool
rootHugepagePath string
numaNodePath string
meminfoPath string
}
func (h *Hugepages) Description() string {
return "Gathers huge pages measurements."
}
func (h *Hugepages) SampleConfig() string {
return hugepagesSampleConfig
}
func (h *Hugepages) Init() error {
err := h.parseHugepagesConfig()
if err != nil {
return err
}
h.rootHugepagePath = rootHugepagePath
h.numaNodePath = numaNodePath
h.meminfoPath = meminfoPath
return nil
}
func (h *Hugepages) Gather(acc telegraf.Accumulator) error {
if h.gatherRoot {
if err := h.gatherRootStats(acc); err != nil {
return fmt.Errorf("gathering root stats failed: %v", err)
}
}
if h.gatherPerNode {
if err := h.gatherStatsPerNode(acc); err != nil {
return fmt.Errorf("gathering per node stats failed: %v", err)
}
}
if h.gatherMeminfo {
if err := h.gatherStatsFromMeminfo(acc); err != nil {
return fmt.Errorf("gathering meminfo stats failed: %v", err)
}
}
return nil
}
// gatherStatsPerNode collects root hugepages statistics
func (h *Hugepages) gatherRootStats(acc telegraf.Accumulator) error {
return h.gatherFromHugepagePath(acc, "hugepages_"+rootHugepages, h.rootHugepagePath, hugepagesMetricsRoot, nil)
}
// gatherStatsPerNode collects hugepages statistics per NUMA node
func (h *Hugepages) gatherStatsPerNode(acc telegraf.Accumulator) error {
nodeDirs, err := ioutil.ReadDir(h.numaNodePath)
if err != nil {
return err
}
// read metrics from: node*/hugepages/hugepages-*/*
for _, nodeDir := range nodeDirs {
if !nodeDir.IsDir() || !strings.HasPrefix(nodeDir.Name(), "node") {
continue
}
nodeNumber := strings.TrimPrefix(nodeDir.Name(), "node")
_, err := strconv.Atoi(nodeNumber)
if err != nil {
continue
}
perNodeTags := map[string]string{
"node": nodeNumber,
}
hugepagesPath := filepath.Join(h.numaNodePath, nodeDir.Name(), "hugepages")
err = h.gatherFromHugepagePath(acc, "hugepages_"+perNodeHugepages, hugepagesPath, hugepagesMetricsPerNUMANode, perNodeTags)
if err != nil {
return err
}
}
return nil
}
func (h *Hugepages) gatherFromHugepagePath(acc telegraf.Accumulator, measurement, path string, fileFilter map[string]string, defaultTags map[string]string) error {
// read metrics from: hugepages/hugepages-*/*
hugepagesDirs, err := ioutil.ReadDir(path)
if err != nil {
return fmt.Errorf("reading root dir failed: %v", err)
}
for _, hugepagesDir := range hugepagesDirs {
if !hugepagesDir.IsDir() || !strings.HasPrefix(hugepagesDir.Name(), "hugepages-") {
continue
}
hugepagesSize := strings.TrimPrefix(strings.TrimSuffix(hugepagesDir.Name(), "kB"), "hugepages-")
_, err := strconv.Atoi(hugepagesSize)
if err != nil {
continue
}
metricsPath := filepath.Join(path, hugepagesDir.Name())
metricFiles, err := ioutil.ReadDir(metricsPath)
if err != nil {
return fmt.Errorf("reading metric dir failed: %v", err)
}
metrics := make(map[string]interface{})
for _, metricFile := range metricFiles {
metricName, ok := fileFilter[metricFile.Name()]
if mode := metricFile.Mode(); !mode.IsRegular() || !ok {
continue
}
metricFullPath := filepath.Join(metricsPath, metricFile.Name())
metricBytes, err := ioutil.ReadFile(metricFullPath)
if err != nil {
return err
}
metricValue, err := strconv.Atoi(string(bytes.TrimSuffix(metricBytes, newlineByte)))
if err != nil {
return fmt.Errorf("failed to convert content of '%s': %v", metricFullPath, err)
}
metrics[metricName] = metricValue
}
if len(metrics) == 0 {
continue
}
tags := make(map[string]string)
for key, value := range defaultTags {
tags[key] = value
}
tags["size_kb"] = hugepagesSize
acc.AddFields(measurement, metrics, tags)
}
return nil
}
// gatherStatsFromMeminfo collects hugepages statistics from meminfo file
func (h *Hugepages) gatherStatsFromMeminfo(acc telegraf.Accumulator) error {
meminfo, err := ioutil.ReadFile(h.meminfoPath)
if err != nil {
return err
}
metrics := make(map[string]interface{})
lines := bytes.Split(meminfo, newlineByte)
for _, line := range lines {
fields := bytes.Fields(line)
if len(fields) < 2 {
continue
}
fieldName := string(bytes.TrimSuffix(fields[0], colonByte))
metricName, ok := hugepagesMetricsFromMeminfo[fieldName]
if !ok {
continue
}
fieldValue, err := strconv.Atoi(string(fields[1]))
if err != nil {
return fmt.Errorf("failed to convert content of '%s': %v", fieldName, err)
}
metrics[metricName] = fieldValue
}
acc.AddFields("hugepages_"+meminfoHugepages, metrics, map[string]string{})
return nil
}
func (h *Hugepages) parseHugepagesConfig() error {
// default
if h.Types == nil {
h.gatherRoot = true
h.gatherMeminfo = true
return nil
}
// empty array
if len(h.Types) == 0 {
return fmt.Errorf("plugin was configured with nothing to read")
}
for _, hugepagesType := range h.Types {
switch hugepagesType {
case rootHugepages:
h.gatherRoot = true
case perNodeHugepages:
h.gatherPerNode = true
case meminfoHugepages:
h.gatherMeminfo = true
default:
return fmt.Errorf("provided hugepages type `%s` is not valid", hugepagesType)
}
}
return nil
}
func init() {
inputs.Add("hugepages", func() telegraf.Input {
return &Hugepages{}
})
}

View File

@ -0,0 +1,4 @@
//go:build !linux
// +build !linux
package hugepages

View File

@ -0,0 +1,228 @@
//go:build linux
// +build linux
package hugepages
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/testutil"
)
func TestInit(t *testing.T) {
t.Run("when no config is provided then all fields should be set to default values", func(t *testing.T) {
h := Hugepages{}
err := h.Init()
require.NoError(t, err)
require.True(t, h.gatherRoot)
require.False(t, h.gatherPerNode)
require.True(t, h.gatherMeminfo)
require.Equal(t, rootHugepagePath, h.rootHugepagePath)
require.Equal(t, numaNodePath, h.numaNodePath)
require.Equal(t, meminfoPath, h.meminfoPath)
})
t.Run("when empty hugepages types is provided then plugin should fail to initialize", func(t *testing.T) {
h := Hugepages{Types: []string{}}
err := h.Init()
require.Error(t, err)
require.Contains(t, err.Error(), "plugin was configured with nothing to read")
})
t.Run("when valid hugepages types is provided then proper flags should be set", func(t *testing.T) {
h := Hugepages{Types: []string{"root", "per_node", "meminfo"}}
err := h.Init()
require.NoError(t, err)
require.True(t, h.gatherRoot)
require.True(t, h.gatherPerNode)
require.True(t, h.gatherMeminfo)
})
t.Run("when hugepages types contains not supported value then plugin should fail to initialize", func(t *testing.T) {
h := Hugepages{Types: []string{"root", "per_node", "linux_hdd", "meminfo"}}
err := h.Init()
require.Error(t, err)
require.Contains(t, err.Error(), "provided hugepages type")
})
}
func TestGather(t *testing.T) {
t.Run("when root hugepages type is enabled then gather all root metrics successfully", func(t *testing.T) {
h := Hugepages{
rootHugepagePath: "./testdata/valid/mm/hugepages",
gatherRoot: true,
}
acc := &testutil.Accumulator{}
require.NoError(t, h.Gather(acc))
expectedFields := map[string]interface{}{
"free": 883,
"reserved": 0,
"surplus": 0,
"mempolicy": 2048,
"total": 2048,
"overcommit": 0,
}
acc.AssertContainsTaggedFields(t, "hugepages_root", expectedFields, map[string]string{"size_kb": "2048"})
expectedFields = map[string]interface{}{
"free": 0,
"reserved": 0,
"surplus": 0,
"mempolicy": 8,
"total": 8,
"overcommit": 0,
}
acc.AssertContainsTaggedFields(t, "hugepages_root", expectedFields, map[string]string{"size_kb": "1048576"})
})
t.Run("when per node hugepages type is enabled then gather all per node metrics successfully", func(t *testing.T) {
h := Hugepages{
numaNodePath: "./testdata/valid/node",
gatherPerNode: true,
}
acc := &testutil.Accumulator{}
require.NoError(t, h.Gather(acc))
expectedFields := map[string]interface{}{
"free": 434,
"surplus": 0,
"total": 1024,
}
acc.AssertContainsTaggedFields(t, "hugepages_per_node", expectedFields, map[string]string{"size_kb": "2048", "node": "0"})
expectedFields = map[string]interface{}{
"free": 449,
"surplus": 0,
"total": 1024,
}
acc.AssertContainsTaggedFields(t, "hugepages_per_node", expectedFields, map[string]string{"size_kb": "2048", "node": "1"})
expectedFields = map[string]interface{}{
"free": 0,
"surplus": 0,
"total": 4,
}
acc.AssertContainsTaggedFields(t, "hugepages_per_node", expectedFields, map[string]string{"size_kb": "1048576", "node": "0"})
expectedFields = map[string]interface{}{
"free": 0,
"surplus": 0,
"total": 4,
}
acc.AssertContainsTaggedFields(t, "hugepages_per_node", expectedFields, map[string]string{"size_kb": "1048576", "node": "1"})
})
t.Run("when meminfo hugepages type is enabled then gather all meminfo metrics successfully", func(t *testing.T) {
h := Hugepages{
meminfoPath: "./testdata/valid/meminfo",
gatherMeminfo: true,
}
acc := &testutil.Accumulator{}
require.NoError(t, h.Gather(acc))
expectedFields := map[string]interface{}{
"anonymous_kb": 0,
"shared_kb": 0,
"file_kb": 0,
"total": 2048,
"free": 883,
"reserved": 0,
"surplus": 0,
"size_kb": 2048,
"tlb_kb": 12582912,
}
acc.AssertContainsFields(t, "hugepages_meminfo", expectedFields)
})
t.Run("when root hugepages type is enabled but path is invalid then return error", func(t *testing.T) {
h := Hugepages{
rootHugepagePath: "./testdata/not_existing_path",
gatherRoot: true,
}
acc := &testutil.Accumulator{}
require.Error(t, h.Gather(acc))
})
t.Run("when root hugepages type is enabled but files/directories don't have proper naming then gather no metrics", func(t *testing.T) {
h := Hugepages{
rootHugepagePath: "./testdata/invalid/1/node0/hugepages",
gatherRoot: true,
}
acc := &testutil.Accumulator{}
require.NoError(t, h.Gather(acc))
require.Nil(t, acc.Metrics)
})
t.Run("when root hugepages type is enabled but metric file doesn't contain number then return error", func(t *testing.T) {
h := Hugepages{
rootHugepagePath: "./testdata/invalid/2/node1/hugepages",
gatherRoot: true,
}
acc := &testutil.Accumulator{}
require.Error(t, h.Gather(acc))
})
t.Run("when per node hugepages type is enabled but path is invalid then return error", func(t *testing.T) {
h := Hugepages{
numaNodePath: "./testdata/not_existing_path",
gatherPerNode: true,
}
acc := &testutil.Accumulator{}
require.Error(t, h.Gather(acc))
})
t.Run("when per node hugepages type is enabled but files/directories don't have proper naming then gather no metrics", func(t *testing.T) {
h := Hugepages{
numaNodePath: "./testdata/invalid/1",
gatherPerNode: true,
}
acc := &testutil.Accumulator{}
require.NoError(t, h.Gather(acc))
require.Nil(t, acc.Metrics)
})
t.Run("when per node hugepages type is enabled but metric file doesn't contain number then return error", func(t *testing.T) {
h := Hugepages{
numaNodePath: "./testdata/invalid/2/",
gatherPerNode: true,
}
acc := &testutil.Accumulator{}
require.Error(t, h.Gather(acc))
})
t.Run("when meminfo hugepages type is enabled but path is invalid then return error", func(t *testing.T) {
h := Hugepages{
meminfoPath: "./testdata/not_existing_path",
gatherMeminfo: true,
}
acc := &testutil.Accumulator{}
require.Error(t, h.Gather(acc))
})
t.Run("when per node hugepages type is enabled but any metric doesn't contain number then return error", func(t *testing.T) {
h := Hugepages{
meminfoPath: "./testdata/invalid/meminfo",
gatherMeminfo: true,
}
acc := &testutil.Accumulator{}
require.Error(t, h.Gather(acc))
})
}

View File

@ -0,0 +1 @@
whatever

View File

@ -0,0 +1,8 @@
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
HugePages_Total: 2048
HugePages_Free: sixtynine
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 12582912 kB

View File

@ -0,0 +1,51 @@
MemTotal: 264026876 kB
MemFree: 260102944 kB
MemAvailable: 260015468 kB
Buffers: 115268 kB
Cached: 1203416 kB
SwapCached: 0 kB
Active: 599752 kB
Inactive: 950072 kB
Active(anon): 2740 kB
Inactive(anon): 224176 kB
Active(file): 597012 kB
Inactive(file): 725896 kB
Unevictable: 0 kB
Mlocked: 0 kB
SwapTotal: 8388604 kB
SwapFree: 8388604 kB
Dirty: 0 kB
Writeback: 0 kB
AnonPages: 231220 kB
Mapped: 317748 kB
Shmem: 5848 kB
KReclaimable: 170796 kB
Slab: 347860 kB
SReclaimable: 170796 kB
SUnreclaim: 177064 kB
KernelStack: 13776 kB
PageTables: 10756 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 140139896 kB
Committed_AS: 2661568 kB
VmallocTotal: 34359738367 kB
VmallocUsed: 264276 kB
VmallocChunk: 0 kB
Percpu: 40896 kB
HardwareCorrupted: 0 kB
AnonHugePages: 0 kB
ShmemHugePages: 0 kB
ShmemPmdMapped: 0 kB
FileHugePages: 0 kB
FilePmdMapped: 0 kB
HugePages_Total: 2048
HugePages_Free: 883
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 2048 kB
Hugetlb: 12582912 kB
DirectMap4k: 312056 kB
DirectMap2M: 6930432 kB
DirectMap1G: 263192576 kB