feat(inputs.linux_cpu): Add plugin to collect CPU metrics on Linux (#8988)
This commit is contained in:
parent
45abba836f
commit
7f3395f148
|
|
@ -0,0 +1,5 @@
|
|||
//go:build !custom || inputs || inputs.linux_cpu
|
||||
|
||||
package all
|
||||
|
||||
import _ "github.com/influxdata/telegraf/plugins/inputs/linux_cpu" // register plugin
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
# Linux CPU Input Plugin
|
||||
|
||||
The `linux_cpu` plugin gathers CPU metrics exposed on Linux-based systems.
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Collects CPU metrics exposed on Linux
|
||||
[[inputs.linux_cpu]]
|
||||
## Path for sysfs filesystem.
|
||||
## See https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt
|
||||
## Defaults:
|
||||
# host_sys = "/sys"
|
||||
|
||||
## CPU metrics collected by the plugin.
|
||||
## Supported options:
|
||||
## "cpufreq", "thermal"
|
||||
## Defaults:
|
||||
# metrics = ["cpufreq"]
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
The following tags are emitted by the plugin under the name `linux_cpu`:
|
||||
|
||||
| Tag | Description |
|
||||
|-------|-----------------------|
|
||||
| `cpu` | Identifier of the CPU |
|
||||
|
||||
The following fields are emitted by the plugin when selecting `cpufreq`:
|
||||
|
||||
| Metric name (field) | Description | Units |
|
||||
|---------------------|------------------------------------------------------------|-------|
|
||||
| `scaling_cur_freq` | Current frequency of the CPU as determined by CPUFreq | KHz |
|
||||
| `scaling_min_freq` | Minimum frequency the governor can scale to | KHz |
|
||||
| `scaling_max_freq` | Maximum frequency the governor can scale to | KHz |
|
||||
| `cpuinfo_cur_freq` | Current frequency of the CPU as determined by the hardware | KHz |
|
||||
| `cpuinfo_min_freq` | Minimum operating frequency of the CPU | KHz |
|
||||
| `cpuinfo_max_freq` | Maximum operating frequency of the CPU | KHz |
|
||||
|
||||
The following fields are emitted by the plugin when selecting `thermal`:
|
||||
|
||||
| Metric name (field) | Description | Units |
|
||||
|-----------------------|-------------------------------------------------------------|-------|
|
||||
| `throttle_count` | Number of thermal throttle events reported by the CPU | |
|
||||
| `throttle_max_time` | Maximum amount of time CPU was in throttled state | ms |
|
||||
| `throtlle_total_time` | Cumulative time during which the CPU was in throttled state | ms |
|
||||
|
||||
## Example Output
|
||||
|
||||
```shell
|
||||
> linux_cpu,cpu=0,host=go scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=803157i,scaling_min_freq=400000i 1617621150000000000
|
||||
> linux_cpu,cpu=1,host=go throttle_total_time=0i,scaling_cur_freq=802939i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i 1617621150000000000
|
||||
> linux_cpu,cpu=10,host=go throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=838343i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i 1617621150000000000
|
||||
> linux_cpu,cpu=11,host=go cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=800054i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i 1617621150000000000
|
||||
> linux_cpu,cpu=2,host=go throttle_total_time=0i,scaling_cur_freq=800404i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i 1617621150000000000
|
||||
> linux_cpu,cpu=3,host=go throttle_total_time=0i,scaling_cur_freq=800126i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i 1617621150000000000
|
||||
> linux_cpu,cpu=4,host=go cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=800359i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i 1617621150000000000
|
||||
> linux_cpu,cpu=5,host=go throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=800093i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i 1617621150000000000
|
||||
> linux_cpu,cpu=6,host=go cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=741646i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i 1617621150000000000
|
||||
> linux_cpu,cpu=7,host=go scaling_cur_freq=700006i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i 1617621150000000000
|
||||
> linux_cpu,cpu=8,host=go throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=700046i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i,throttle_count=0i 1617621150000000000
|
||||
> linux_cpu,cpu=9,host=go throttle_count=0i,throttle_max_time=0i,throttle_total_time=0i,scaling_cur_freq=700075i,scaling_min_freq=400000i,scaling_max_freq=4700000i,cpuinfo_min_freq=400000i,cpuinfo_max_freq=4700000i 1617621150000000000
|
||||
```
|
||||
|
|
@ -0,0 +1,214 @@
|
|||
//go:build linux
|
||||
|
||||
package linux_cpu
|
||||
|
||||
import (
|
||||
_ "embed"
|
||||
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/choice"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultHostSys = "/sys"
|
||||
cpufreq = "cpufreq"
|
||||
thermal = "thermal"
|
||||
)
|
||||
|
||||
type LinuxCPU struct {
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
PathSysfs string `toml:"host_sys"`
|
||||
Metrics []string `toml:"metrics"`
|
||||
cpus []cpu
|
||||
}
|
||||
|
||||
type cpu struct {
|
||||
id string
|
||||
path string
|
||||
props map[string]string
|
||||
}
|
||||
|
||||
type prop struct {
|
||||
name string
|
||||
path string
|
||||
optional bool
|
||||
}
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
func (g *LinuxCPU) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (g *LinuxCPU) Init() error {
|
||||
if g.PathSysfs == "" {
|
||||
g.PathSysfs = defaultHostSys
|
||||
}
|
||||
|
||||
if len(g.Metrics) == 0 {
|
||||
// The user has not enabled any of the metrics
|
||||
return fmt.Errorf("no metrics selected")
|
||||
}
|
||||
|
||||
cpus, err := g.discoverCpus()
|
||||
if err != nil {
|
||||
return err
|
||||
} else if len(cpus) == 0 {
|
||||
// Although the user has specified metrics to collect, `discoverCpus` failed to find the required metrics
|
||||
return fmt.Errorf("no CPUs detected to track")
|
||||
}
|
||||
g.cpus = cpus
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *LinuxCPU) Gather(acc telegraf.Accumulator) error {
|
||||
for _, cpu := range g.cpus {
|
||||
fields := make(map[string]interface{})
|
||||
tags := map[string]string{"cpu": cpu.id}
|
||||
|
||||
failed := false
|
||||
for name, propPath := range cpu.props {
|
||||
v, err := readUintFromFile(propPath)
|
||||
if err != nil {
|
||||
acc.AddError(err)
|
||||
failed = true
|
||||
break
|
||||
}
|
||||
|
||||
fields[name] = v
|
||||
}
|
||||
|
||||
if !failed {
|
||||
acc.AddFields("linux_cpu", fields, tags)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (g *LinuxCPU) discoverCpus() ([]cpu, error) {
|
||||
var cpus []cpu
|
||||
|
||||
glob := path.Join(g.PathSysfs, "devices/system/cpu/cpu[0-9]*")
|
||||
cpuDirs, err := filepath.Glob(glob)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(cpuDirs) == 0 {
|
||||
return nil, fmt.Errorf("no CPUs detected at: %s", glob)
|
||||
}
|
||||
|
||||
for _, dir := range cpuDirs {
|
||||
_, cpuName := filepath.Split(dir)
|
||||
cpuNum := strings.TrimPrefix(cpuName, "cpu")
|
||||
|
||||
cpu := cpu{
|
||||
id: cpuNum,
|
||||
path: dir,
|
||||
props: make(map[string]string),
|
||||
}
|
||||
|
||||
var props []prop
|
||||
|
||||
if choice.Contains(cpufreq, g.Metrics) {
|
||||
props = append(props,
|
||||
prop{name: "scaling_cur_freq", path: "cpufreq/scaling_cur_freq", optional: false},
|
||||
prop{name: "scaling_min_freq", path: "cpufreq/scaling_min_freq", optional: false},
|
||||
prop{name: "scaling_max_freq", path: "cpufreq/scaling_max_freq", optional: false},
|
||||
prop{name: "cpuinfo_cur_freq", path: "cpufreq/cpuinfo_cur_freq", optional: true},
|
||||
prop{name: "cpuinfo_min_freq", path: "cpufreq/cpuinfo_min_freq", optional: true},
|
||||
prop{name: "cpuinfo_max_freq", path: "cpufreq/cpuinfo_max_freq", optional: true},
|
||||
)
|
||||
}
|
||||
|
||||
if choice.Contains(thermal, g.Metrics) {
|
||||
props = append(
|
||||
props,
|
||||
prop{name: "throttle_count", path: "thermal_throttle/core_throttle_count", optional: false},
|
||||
prop{name: "throttle_max_time", path: "thermal_throttle/core_throttle_max_time_ms", optional: false},
|
||||
prop{name: "throttle_total_time", path: "thermal_throttle/core_throttle_total_time_ms", optional: false},
|
||||
)
|
||||
}
|
||||
|
||||
var failed = false
|
||||
for _, prop := range props {
|
||||
propPath := filepath.Join(dir, prop.path)
|
||||
err := validatePath(propPath)
|
||||
if err != nil {
|
||||
if prop.optional {
|
||||
continue
|
||||
}
|
||||
|
||||
g.Log.Warnf("Failed to load property %s: %v", propPath, err)
|
||||
failed = true
|
||||
break
|
||||
}
|
||||
|
||||
cpu.props[prop.name] = propPath
|
||||
}
|
||||
|
||||
if len(cpu.props) == 0 {
|
||||
g.Log.Warnf("No properties enabled/loaded for CPU %s", cpuNum)
|
||||
failed = true
|
||||
}
|
||||
|
||||
if !failed {
|
||||
cpus = append(cpus, cpu)
|
||||
}
|
||||
}
|
||||
return cpus, nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("linux_cpu", func() telegraf.Input {
|
||||
return &LinuxCPU{
|
||||
Metrics: []string{"cpufreq"},
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func validatePath(propPath string) error {
|
||||
f, err := os.Open(propPath)
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("CPU property does not exist: [%s]", propPath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot get system information for CPU property: [%s] - %v", propPath, err)
|
||||
}
|
||||
|
||||
_ = f.Close() // File is not written to, closing should be safe
|
||||
return nil
|
||||
}
|
||||
|
||||
func readUintFromFile(propPath string) (uint64, error) {
|
||||
f, err := os.Open(propPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
buffer := make([]byte, 22)
|
||||
|
||||
n, err := f.Read(buffer)
|
||||
if err != nil && err != io.EOF {
|
||||
return 0, fmt.Errorf("error on reading file, err: %v", err)
|
||||
} else if n == 0 {
|
||||
return 0, fmt.Errorf("error on reading file, file is empty")
|
||||
}
|
||||
|
||||
return strconv.ParseUint(string(buffer[:n-1]), 10, 64)
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
//go:build !linux
|
||||
|
||||
package linux_cpu
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
//go:build linux
|
||||
|
||||
package linux_cpu
|
||||
|
||||
import (
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNoMetrics(t *testing.T) {
|
||||
plugin := &LinuxCPU{}
|
||||
require.Error(t, plugin.Init())
|
||||
}
|
||||
|
||||
func TestNoCPUs(t *testing.T) {
|
||||
td := t.TempDir()
|
||||
|
||||
plugin := &LinuxCPU{
|
||||
Log: testutil.Logger{Name: "LinuxCPUPluginTest"},
|
||||
Metrics: []string{"cpufreq"},
|
||||
PathSysfs: td,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
}
|
||||
|
||||
func TestNoCPUMetrics(t *testing.T) {
|
||||
td := t.TempDir()
|
||||
|
||||
require.NoError(t, os.MkdirAll(td+"/devices/system/cpu/cpu0/cpufreq", os.ModePerm))
|
||||
|
||||
plugin := &LinuxCPU{
|
||||
Log: testutil.Logger{Name: "LinuxCPUPluginTest"},
|
||||
Metrics: []string{"cpufreq"},
|
||||
PathSysfs: td,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
}
|
||||
|
||||
func TestGatherCPUFreq(t *testing.T) {
|
||||
td := t.TempDir()
|
||||
|
||||
require.NoError(t, os.MkdirAll(td+"/devices/system/cpu/cpu0/cpufreq", os.ModePerm))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", []byte("250\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_min_freq", []byte("100\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", []byte("255\n"), 0644))
|
||||
|
||||
require.NoError(t, os.MkdirAll(td+"/devices/system/cpu/cpu1/cpufreq", os.ModePerm))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu1/cpufreq/scaling_cur_freq", []byte("123\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu1/cpufreq/scaling_min_freq", []byte("80\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu1/cpufreq/scaling_max_freq", []byte("230\n"), 0644))
|
||||
|
||||
plugin := &LinuxCPU{
|
||||
Log: testutil.Logger{Name: "LinuxCPUPluginTest"},
|
||||
Metrics: []string{"cpufreq"},
|
||||
PathSysfs: td,
|
||||
}
|
||||
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
tags1 := map[string]string{
|
||||
"cpu": "0",
|
||||
}
|
||||
|
||||
tags2 := map[string]string{
|
||||
"cpu": "1",
|
||||
}
|
||||
|
||||
fields1 := map[string]interface{}{
|
||||
"scaling_cur_freq": uint64(250),
|
||||
"scaling_min_freq": uint64(100),
|
||||
"scaling_max_freq": uint64(255),
|
||||
}
|
||||
|
||||
fields2 := map[string]interface{}{
|
||||
"scaling_cur_freq": uint64(123),
|
||||
"scaling_min_freq": uint64(80),
|
||||
"scaling_max_freq": uint64(230),
|
||||
}
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "linux_cpu", fields1, tags1)
|
||||
acc.AssertContainsTaggedFields(t, "linux_cpu", fields2, tags2)
|
||||
}
|
||||
|
||||
func TestGatherThermal(t *testing.T) {
|
||||
td := t.TempDir()
|
||||
|
||||
require.NoError(t, os.MkdirAll(td+"/devices/system/cpu/cpu0/thermal_throttle", os.ModePerm))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", []byte("250\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/thermal_throttle/core_throttle_max_time_ms", []byte("100\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/thermal_throttle/core_throttle_total_time_ms", []byte("255\n"), 0644))
|
||||
|
||||
plugin := &LinuxCPU{
|
||||
Log: testutil.Logger{Name: "LinuxCPUPluginTest"},
|
||||
Metrics: []string{"thermal"},
|
||||
PathSysfs: td,
|
||||
}
|
||||
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
acc.AssertContainsFields(t, "linux_cpu", map[string]interface{}{
|
||||
"throttle_count": uint64(250),
|
||||
"throttle_max_time": uint64(100),
|
||||
"throttle_total_time": uint64(255),
|
||||
})
|
||||
}
|
||||
|
||||
func TestGatherPropertyRemoved(t *testing.T) {
|
||||
td := t.TempDir()
|
||||
|
||||
require.NoError(t, os.MkdirAll(td+"/devices/system/cpu/cpu0/cpufreq", os.ModePerm))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", []byte("250\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_min_freq", []byte("100\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", []byte("255\n"), 0644))
|
||||
|
||||
plugin := &LinuxCPU{
|
||||
Log: testutil.Logger{Name: "LinuxCPUPluginTest"},
|
||||
Metrics: []string{"cpufreq"},
|
||||
PathSysfs: td,
|
||||
}
|
||||
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
// Remove one of the properties
|
||||
require.NoError(t, os.RemoveAll(td+"/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"))
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
tags1 := map[string]string{
|
||||
"cpu": "0",
|
||||
}
|
||||
|
||||
fields1 := map[string]interface{}{
|
||||
"scaling_cur_freq": uint64(250),
|
||||
"scaling_min_freq": uint64(100),
|
||||
"scaling_max_freq": uint64(255),
|
||||
}
|
||||
|
||||
acc.AssertDoesNotContainsTaggedFields(t, "linux_cpu", fields1, tags1)
|
||||
require.NotEmpty(t, acc.Errors)
|
||||
}
|
||||
|
||||
func TestGatherPropertyInvalid(t *testing.T) {
|
||||
td := t.TempDir()
|
||||
|
||||
require.NoError(t, os.MkdirAll(td+"/devices/system/cpu/cpu0/cpufreq", os.ModePerm))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq", []byte("ABC\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_min_freq", []byte("100\n"), 0644))
|
||||
require.NoError(t, os.WriteFile(td+"/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", []byte("255\n"), 0644))
|
||||
|
||||
plugin := &LinuxCPU{
|
||||
Log: testutil.Logger{Name: "LinuxCPUPluginTest"},
|
||||
Metrics: []string{"cpufreq"},
|
||||
PathSysfs: td,
|
||||
}
|
||||
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
tags1 := map[string]string{
|
||||
"cpu": "0",
|
||||
}
|
||||
|
||||
fields1 := map[string]interface{}{
|
||||
"scaling_cur_freq": uint64(250),
|
||||
"scaling_min_freq": uint64(100),
|
||||
"scaling_max_freq": uint64(255),
|
||||
}
|
||||
|
||||
acc.AssertDoesNotContainsTaggedFields(t, "linux_cpu", fields1, tags1)
|
||||
require.NotEmpty(t, acc.Errors)
|
||||
}
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
# Provides Linux CPU metrics
|
||||
[[inputs.linux_cpu]]
|
||||
## Path for sysfs filesystem.
|
||||
## See https://www.kernel.org/doc/Documentation/filesystems/sysfs.txt
|
||||
## Defaults:
|
||||
# host_sys = "/sys"
|
||||
|
||||
## CPU metrics collected by the plugin.
|
||||
## Supported options:
|
||||
## "cpufreq", "thermal"
|
||||
## Defaults:
|
||||
# metrics = ["cpufreq"]
|
||||
Loading…
Reference in New Issue