Update procstat to support cgroup globs & include systemd unit children (Copy of #7890) (#9488)

This commit is contained in:
Sebastian Spaink 2021-07-27 16:51:56 -05:00 committed by GitHub
parent 80829b3b5a
commit 5843b27d75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 155 additions and 62 deletions

View File

@ -26,8 +26,9 @@ Processes can be selected for monitoring using one of several methods:
# pattern = "nginx" # pattern = "nginx"
## user as argument for pgrep (ie, pgrep -u <user>) ## user as argument for pgrep (ie, pgrep -u <user>)
# user = "nginx" # user = "nginx"
## Systemd unit name ## Systemd unit name, supports globs when include_systemd_children is set to true
# systemd_unit = "nginx.service" # systemd_unit = "nginx.service"
# systemd_all = true
## CGroup name or path ## CGroup name or path
# cgroup = "systemd/system.slice/nginx.service" # cgroup = "systemd/system.slice/nginx.service"
@ -80,6 +81,7 @@ the `win_perf_counters` input plugin as a more mature alternative.
- user (when selected) - user (when selected)
- systemd_unit (when defined) - systemd_unit (when defined)
- cgroup (when defined) - cgroup (when defined)
- cgroup_full (when cgroup or systemd_unit is used with glob)
- win_service (when defined) - win_service (when defined)
- fields: - fields:
- child_major_faults (int) - child_major_faults (int)

View File

@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"os"
"os/exec" "os/exec"
"path/filepath" "path/filepath"
"runtime" "runtime"
@ -24,19 +25,20 @@ var (
type PID int32 type PID int32
type Procstat struct { type Procstat struct {
PidFinder string `toml:"pid_finder"` PidFinder string `toml:"pid_finder"`
PidFile string `toml:"pid_file"` PidFile string `toml:"pid_file"`
Exe string Exe string
Pattern string Pattern string
Prefix string Prefix string
CmdLineTag bool `toml:"cmdline_tag"` CmdLineTag bool `toml:"cmdline_tag"`
ProcessName string ProcessName string
User string User string
SystemdUnit string SystemdUnit string `toml:"systemd_unit"`
CGroup string `toml:"cgroup"` IncludeSystemdChildren bool `toml:"include_systemd_children"`
PidTag bool CGroup string `toml:"cgroup"`
WinService string `toml:"win_service"` PidTag bool
Mode string WinService string `toml:"win_service"`
Mode string
solarisMode bool solarisMode bool
@ -56,9 +58,10 @@ var sampleConfig = `
# pattern = "nginx" # pattern = "nginx"
## user as argument for pgrep (ie, pgrep -u <user>) ## user as argument for pgrep (ie, pgrep -u <user>)
# user = "nginx" # user = "nginx"
## Systemd unit name ## Systemd unit name, supports globs when include_systemd_children is set to true
# systemd_unit = "nginx.service" # systemd_unit = "nginx.service"
## CGroup name or path # include_systemd_children = false
## CGroup name or path, supports globs
# cgroup = "systemd/system.slice/nginx.service" # cgroup = "systemd/system.slice/nginx.service"
## Windows service name ## Windows service name
@ -100,6 +103,12 @@ func (p *Procstat) Description() string {
return "Monitor process cpu and memory usage" return "Monitor process cpu and memory usage"
} }
type PidsTags struct {
PIDS []PID
Tags map[string]string
Err error
}
func (p *Procstat) Gather(acc telegraf.Accumulator) error { func (p *Procstat) Gather(acc telegraf.Accumulator) error {
if p.createPIDFinder == nil { if p.createPIDFinder == nil {
switch p.PidFinder { switch p.PidFinder {
@ -116,33 +125,48 @@ func (p *Procstat) Gather(acc telegraf.Accumulator) error {
p.createProcess = defaultProcess p.createProcess = defaultProcess
} }
pids, tags, err := p.findPids() pidCount := 0
now := time.Now() now := time.Now()
newProcs := make(map[PID]Process, len(p.procs))
pidTags := p.findPids()
for _, pidTag := range pidTags {
pids := pidTag.PIDS
tags := pidTag.Tags
err := pidTag.Err
pidCount += len(pids)
if err != nil {
fields := map[string]interface{}{
"pid_count": 0,
"running": 0,
"result_code": 1,
}
tags := map[string]string{
"pid_finder": p.PidFinder,
"result": "lookup_error",
}
acc.AddFields("procstat_lookup", fields, tags, now)
return err
}
if err != nil { err = p.updateProcesses(pids, tags, p.procs, newProcs)
fields := map[string]interface{}{ if err != nil {
"pid_count": 0, acc.AddError(fmt.Errorf("procstat getting process, exe: [%s] pidfile: [%s] pattern: [%s] user: [%s] %s",
"running": 0, p.Exe, p.PidFile, p.Pattern, p.User, err.Error()))
"result_code": 1,
} }
tags := map[string]string{
"pid_finder": p.PidFinder,
"result": "lookup_error",
}
acc.AddFields("procstat_lookup", fields, tags, now)
return err
} }
p.procs = p.updateProcesses(pids, tags, p.procs) p.procs = newProcs
for _, proc := range p.procs { for _, proc := range p.procs {
p.addMetric(proc, acc, now) p.addMetric(proc, acc, now)
} }
fields := map[string]interface{}{ fields := map[string]interface{}{
"pid_count": len(pids), "pid_count": pidCount,
"running": len(p.procs), "running": len(p.procs),
"result_code": 0, "result_code": 0,
} }
tags := make(map[string]string)
tags["pid_finder"] = p.PidFinder tags["pid_finder"] = p.PidFinder
tags["result"] = "success" tags["result"] = "success"
acc.AddFields("procstat_lookup", fields, tags, now) acc.AddFields("procstat_lookup", fields, tags, now)
@ -183,9 +207,9 @@ func (p *Procstat) addMetric(proc Process, acc telegraf.Accumulator, t time.Time
//If cmd_line tag is true and it is not already set add cmdline as a tag //If cmd_line tag is true and it is not already set add cmdline as a tag
if p.CmdLineTag { if p.CmdLineTag {
if _, ok := proc.Tags()["cmdline"]; !ok { if _, ok := proc.Tags()["cmdline"]; !ok {
Cmdline, err := proc.Cmdline() cmdline, err := proc.Cmdline()
if err == nil { if err == nil {
proc.Tags()["cmdline"] = Cmdline proc.Tags()["cmdline"] = cmdline
} }
} }
} }
@ -313,9 +337,7 @@ func (p *Procstat) addMetric(proc Process, acc telegraf.Accumulator, t time.Time
} }
// Update monitored Processes // Update monitored Processes
func (p *Procstat) updateProcesses(pids []PID, tags map[string]string, prevInfo map[PID]Process) map[PID]Process { func (p *Procstat) updateProcesses(pids []PID, tags map[string]string, prevInfo map[PID]Process, procs map[PID]Process) error {
procs := make(map[PID]Process, len(prevInfo))
for _, pid := range pids { for _, pid := range pids {
info, ok := prevInfo[pid] info, ok := prevInfo[pid]
if ok { if ok {
@ -350,8 +372,7 @@ func (p *Procstat) updateProcesses(pids []PID, tags map[string]string, prevInfo
} }
} }
} }
return nil
return procs
} }
// Create and return PIDGatherer lazily // Create and return PIDGatherer lazily
@ -367,16 +388,34 @@ func (p *Procstat) getPIDFinder() (PIDFinder, error) {
} }
// Get matching PIDs and their initial tags // Get matching PIDs and their initial tags
func (p *Procstat) findPids() ([]PID, map[string]string, error) { func (p *Procstat) findPids() []PidsTags {
var pidTags []PidsTags
if p.SystemdUnit != "" {
groups := p.systemdUnitPIDs()
return groups
} else if p.CGroup != "" {
groups := p.cgroupPIDs()
return groups
} else {
f, err := p.getPIDFinder()
if err != nil {
pidTags = append(pidTags, PidsTags{nil, nil, err})
return pidTags
}
pids, tags, err := p.SimpleFindPids(f)
pidTags = append(pidTags, PidsTags{pids, tags, err})
}
return pidTags
}
// Get matching PIDs and their initial tags
func (p *Procstat) SimpleFindPids(f PIDFinder) ([]PID, map[string]string, error) {
var pids []PID var pids []PID
tags := make(map[string]string) tags := make(map[string]string)
var err error var err error
f, err := p.getPIDFinder()
if err != nil {
return nil, nil, err
}
if p.PidFile != "" { if p.PidFile != "" {
pids, err = f.PidFile(p.PidFile) pids, err = f.PidFile(p.PidFile)
tags = map[string]string{"pidfile": p.PidFile} tags = map[string]string{"pidfile": p.PidFile}
@ -389,12 +428,6 @@ func (p *Procstat) findPids() ([]PID, map[string]string, error) {
} else if p.User != "" { } else if p.User != "" {
pids, err = f.UID(p.User) pids, err = f.UID(p.User)
tags = map[string]string{"user": p.User} tags = map[string]string{"user": p.User}
} else if p.SystemdUnit != "" {
pids, err = p.systemdUnitPIDs()
tags = map[string]string{"systemd_unit": p.SystemdUnit}
} else if p.CGroup != "" {
pids, err = p.cgroupPIDs()
tags = map[string]string{"cgroup": p.CGroup}
} else if p.WinService != "" { } else if p.WinService != "" {
pids, err = p.winServicePIDs() pids, err = p.winServicePIDs()
tags = map[string]string{"win_service": p.WinService} tags = map[string]string{"win_service": p.WinService}
@ -408,8 +441,23 @@ func (p *Procstat) findPids() ([]PID, map[string]string, error) {
// execCommand is so tests can mock out exec.Command usage. // execCommand is so tests can mock out exec.Command usage.
var execCommand = exec.Command var execCommand = exec.Command
func (p *Procstat) systemdUnitPIDs() ([]PID, error) { func (p *Procstat) systemdUnitPIDs() []PidsTags {
if p.IncludeSystemdChildren {
p.CGroup = fmt.Sprintf("systemd/system.slice/%s", p.SystemdUnit)
return p.cgroupPIDs()
}
var pidTags []PidsTags
pids, err := p.simpleSystemdUnitPIDs()
tags := map[string]string{"systemd_unit": p.SystemdUnit}
pidTags = append(pidTags, PidsTags{pids, tags, err})
return pidTags
}
func (p *Procstat) simpleSystemdUnitPIDs() ([]PID, error) {
var pids []PID var pids []PID
cmd := execCommand("systemctl", "show", p.SystemdUnit) cmd := execCommand("systemctl", "show", p.SystemdUnit)
out, err := cmd.Output() out, err := cmd.Output()
if err != nil { if err != nil {
@ -426,23 +474,48 @@ func (p *Procstat) systemdUnitPIDs() ([]PID, error) {
if len(kv[1]) == 0 || bytes.Equal(kv[1], []byte("0")) { if len(kv[1]) == 0 || bytes.Equal(kv[1], []byte("0")) {
return nil, nil return nil, nil
} }
pid, err := strconv.ParseInt(string(kv[1]), 10, 32) pid, err := strconv.Atoi(string(kv[1]))
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid pid '%s'", kv[1]) return nil, fmt.Errorf("invalid pid '%s'", kv[1])
} }
pids = append(pids, PID(pid)) pids = append(pids, PID(pid))
} }
return pids, nil return pids, nil
} }
func (p *Procstat) cgroupPIDs() ([]PID, error) { func (p *Procstat) cgroupPIDs() []PidsTags {
var pids []PID var pidTags []PidsTags
procsPath := p.CGroup procsPath := p.CGroup
if procsPath[0] != '/' { if procsPath[0] != '/' {
procsPath = "/sys/fs/cgroup/" + procsPath procsPath = "/sys/fs/cgroup/" + procsPath
} }
procsPath = filepath.Join(procsPath, "cgroup.procs") items, err := filepath.Glob(procsPath)
if err != nil {
pidTags = append(pidTags, PidsTags{nil, nil, fmt.Errorf("glob failed '%s'", err)})
return pidTags
}
for _, item := range items {
pids, err := p.singleCgroupPIDs(item)
tags := map[string]string{"cgroup": p.CGroup, "cgroup_full": item}
pidTags = append(pidTags, PidsTags{pids, tags, err})
}
return pidTags
}
func (p *Procstat) singleCgroupPIDs(path string) ([]PID, error) {
var pids []PID
ok, err := isDir(path)
if err != nil {
return nil, err
}
if !ok {
return nil, fmt.Errorf("not a directory %s", path)
}
procsPath := filepath.Join(path, "cgroup.procs")
out, err := ioutil.ReadFile(procsPath) out, err := ioutil.ReadFile(procsPath)
if err != nil { if err != nil {
return nil, err return nil, err
@ -461,6 +534,14 @@ func (p *Procstat) cgroupPIDs() ([]PID, error) {
return pids, nil return pids, nil
} }
func isDir(path string) (bool, error) {
result, err := os.Stat(path)
if err != nil {
return false, err
}
return result.IsDir(), nil
}
func (p *Procstat) winServicePIDs() ([]PID, error) { func (p *Procstat) winServicePIDs() ([]PID, error) {
var pids []PID var pids []PID

View File

@ -369,10 +369,15 @@ func TestGather_systemdUnitPIDs(t *testing.T) {
createPIDFinder: pidFinder([]PID{}), createPIDFinder: pidFinder([]PID{}),
SystemdUnit: "TestGather_systemdUnitPIDs", SystemdUnit: "TestGather_systemdUnitPIDs",
} }
pids, tags, err := p.findPids() pidsTags := p.findPids()
require.NoError(t, err) for _, pidsTag := range pidsTags {
assert.Equal(t, []PID{11408}, pids) pids := pidsTag.PIDS
assert.Equal(t, "TestGather_systemdUnitPIDs", tags["systemd_unit"]) tags := pidsTag.Tags
err := pidsTag.Err
require.NoError(t, err)
assert.Equal(t, []PID{11408}, pids)
assert.Equal(t, "TestGather_systemdUnitPIDs", tags["systemd_unit"])
}
} }
func TestGather_cgroupPIDs(t *testing.T) { func TestGather_cgroupPIDs(t *testing.T) {
@ -390,10 +395,15 @@ func TestGather_cgroupPIDs(t *testing.T) {
createPIDFinder: pidFinder([]PID{}), createPIDFinder: pidFinder([]PID{}),
CGroup: td, CGroup: td,
} }
pids, tags, err := p.findPids() pidsTags := p.findPids()
require.NoError(t, err) for _, pidsTag := range pidsTags {
assert.Equal(t, []PID{1234, 5678}, pids) pids := pidsTag.PIDS
assert.Equal(t, td, tags["cgroup"]) tags := pidsTag.Tags
err := pidsTag.Err
require.NoError(t, err)
assert.Equal(t, []PID{1234, 5678}, pids)
assert.Equal(t, td, tags["cgroup"])
}
} }
func TestProcstatLookupMetric(t *testing.T) { func TestProcstatLookupMetric(t *testing.T) {