fix(inputs.procstat): Do not report dead processes as running for orphan PID files (#15260)

This commit is contained in:
Sven Rebhan 2024-05-02 14:46:36 -04:00 committed by GitHub
parent 60cf9772a7
commit 920f92fc53
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 51 additions and 24 deletions

View File

@ -7,28 +7,32 @@ import (
"strconv"
"strings"
"github.com/influxdata/telegraf/filter"
"github.com/shirou/gopsutil/v3/process"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
)
type Filter struct {
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Name string `toml:"name"`
PidFiles []string `toml:"pid_files"`
SystemdUnits []string `toml:"systemd_units"`
SupervisorUnits []string `toml:"supervisor_units"`
WinService []string `toml:"win_services"`
CGroups []string `toml:"cgroups"`
Patterns []string `toml:"patterns"`
Users []string `toml:"users"`
Executables []string `toml:"executables"`
ProcessNames []string `toml:"process_names"`
RecursionDepth int `toml:"recursion_depth"`
Log telegraf.Logger `toml:"-"`
filterSupervisorUnit string
filterCmds []*regexp.Regexp
filterUser filter.Filter
filterExecutable filter.Filter
filterProcessName filter.Filter
finder *processFinder
}
func (f *Filter) Init() error {
@ -80,6 +84,8 @@ func (f *Filter) Init() error {
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
}
// Setup the process finder
f.finder = newProcessFinder(f.Log)
return nil
}
@ -89,7 +95,7 @@ func (f *Filter) ApplyFilter() ([]processGroup, error) {
var groups []processGroup
switch {
case len(f.PidFiles) > 0:
g, err := findByPidFiles(f.PidFiles)
g, err := f.finder.findByPidFiles(f.PidFiles)
if err != nil {
return nil, err
}

View File

@ -149,6 +149,7 @@ func (p *Procstat) Init() error {
// New-style operations
for i := range p.Filter {
p.Filter[i].Log = p.Log
if err := p.Filter[i].Init(); err != nil {
return fmt.Errorf("initializing filter %d failed: %w", i, err)
}
@ -200,17 +201,23 @@ func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
}
count += len(r.PIDs)
for _, pid := range r.PIDs {
// Check if the process is still running
proc, err := p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it or it
// might be delivered from a non-checking source like a PID file
// of a dead process.
continue
}
// Use the cached processes as we need the existing instances
// to compute delta-metrics (e.g. cpu-usage).
proc, found := p.processes[pid]
if !found {
if cached, found := p.processes[pid]; found {
proc = cached
} else {
// We've found a process that was not recorded before so add it
// to the list of processes
proc, err = p.createProcess(pid)
if err != nil {
// No problem; process may have ended after we found it
continue
}
// Assumption: if a process has no name, it probably does not exist
if name, _ := proc.Name(); name == "" {
continue

View File

@ -8,10 +8,23 @@ import (
"strconv"
"strings"
"github.com/influxdata/telegraf"
"github.com/shirou/gopsutil/v3/process"
)
func findByPidFiles(paths []string) ([]processGroup, error) {
type processFinder struct {
errPidFiles map[string]bool
log telegraf.Logger
}
func newProcessFinder(log telegraf.Logger) *processFinder {
return &processFinder{
errPidFiles: make(map[string]bool),
log: log,
}
}
func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
groups := make([]processGroup, 0, len(paths))
for _, path := range paths {
buf, err := os.ReadFile(path)
@ -24,8 +37,9 @@ func findByPidFiles(paths []string) ([]processGroup, error) {
}
p, err := process.NewProcess(int32(pid))
if err != nil {
return nil, fmt.Errorf("failed to find process for PID %d of file %q: %w", pid, path, err)
if err != nil && !f.errPidFiles[path] {
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
f.errPidFiles[path] = true
}
groups = append(groups, processGroup{
processes: []*process.Process{p},
@ -46,7 +60,7 @@ func findByCgroups(cgroups []string) ([]processGroup, error) {
files, err := filepath.Glob(path)
if err != nil {
return nil, fmt.Errorf("failed to determin files for cgroup %q: %w", cgroup, err)
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
}
for _, fpath := range files {