fix(inputs.procstat): Do not report dead processes as running for orphan PID files (#15260)
This commit is contained in:
parent
60cf9772a7
commit
920f92fc53
|
|
@ -7,28 +7,32 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/telegraf/filter"
|
||||
"github.com/shirou/gopsutil/v3/process"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/filter"
|
||||
)
|
||||
|
||||
type Filter struct {
|
||||
Name string `toml:"name"`
|
||||
PidFiles []string `toml:"pid_files"`
|
||||
SystemdUnits []string `toml:"systemd_units"`
|
||||
SupervisorUnits []string `toml:"supervisor_units"`
|
||||
WinService []string `toml:"win_services"`
|
||||
CGroups []string `toml:"cgroups"`
|
||||
Patterns []string `toml:"patterns"`
|
||||
Users []string `toml:"users"`
|
||||
Executables []string `toml:"executables"`
|
||||
ProcessNames []string `toml:"process_names"`
|
||||
RecursionDepth int `toml:"recursion_depth"`
|
||||
Name string `toml:"name"`
|
||||
PidFiles []string `toml:"pid_files"`
|
||||
SystemdUnits []string `toml:"systemd_units"`
|
||||
SupervisorUnits []string `toml:"supervisor_units"`
|
||||
WinService []string `toml:"win_services"`
|
||||
CGroups []string `toml:"cgroups"`
|
||||
Patterns []string `toml:"patterns"`
|
||||
Users []string `toml:"users"`
|
||||
Executables []string `toml:"executables"`
|
||||
ProcessNames []string `toml:"process_names"`
|
||||
RecursionDepth int `toml:"recursion_depth"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
filterSupervisorUnit string
|
||||
filterCmds []*regexp.Regexp
|
||||
filterUser filter.Filter
|
||||
filterExecutable filter.Filter
|
||||
filterProcessName filter.Filter
|
||||
finder *processFinder
|
||||
}
|
||||
|
||||
func (f *Filter) Init() error {
|
||||
|
|
@ -80,6 +84,8 @@ func (f *Filter) Init() error {
|
|||
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
|
||||
}
|
||||
|
||||
// Setup the process finder
|
||||
f.finder = newProcessFinder(f.Log)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -89,7 +95,7 @@ func (f *Filter) ApplyFilter() ([]processGroup, error) {
|
|||
var groups []processGroup
|
||||
switch {
|
||||
case len(f.PidFiles) > 0:
|
||||
g, err := findByPidFiles(f.PidFiles)
|
||||
g, err := f.finder.findByPidFiles(f.PidFiles)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,6 +149,7 @@ func (p *Procstat) Init() error {
|
|||
|
||||
// New-style operations
|
||||
for i := range p.Filter {
|
||||
p.Filter[i].Log = p.Log
|
||||
if err := p.Filter[i].Init(); err != nil {
|
||||
return fmt.Errorf("initializing filter %d failed: %w", i, err)
|
||||
}
|
||||
|
|
@ -200,17 +201,23 @@ func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
|
|||
}
|
||||
count += len(r.PIDs)
|
||||
for _, pid := range r.PIDs {
|
||||
// Check if the process is still running
|
||||
proc, err := p.createProcess(pid)
|
||||
if err != nil {
|
||||
// No problem; process may have ended after we found it or it
|
||||
// might be delivered from a non-checking source like a PID file
|
||||
// of a dead process.
|
||||
continue
|
||||
}
|
||||
|
||||
// Use the cached processes as we need the existing instances
|
||||
// to compute delta-metrics (e.g. cpu-usage).
|
||||
proc, found := p.processes[pid]
|
||||
if !found {
|
||||
if cached, found := p.processes[pid]; found {
|
||||
proc = cached
|
||||
} else {
|
||||
// We've found a process that was not recorded before so add it
|
||||
// to the list of processes
|
||||
proc, err = p.createProcess(pid)
|
||||
if err != nil {
|
||||
// No problem; process may have ended after we found it
|
||||
continue
|
||||
}
|
||||
|
||||
// Assumption: if a process has no name, it probably does not exist
|
||||
if name, _ := proc.Name(); name == "" {
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -8,10 +8,23 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/shirou/gopsutil/v3/process"
|
||||
)
|
||||
|
||||
func findByPidFiles(paths []string) ([]processGroup, error) {
|
||||
type processFinder struct {
|
||||
errPidFiles map[string]bool
|
||||
log telegraf.Logger
|
||||
}
|
||||
|
||||
func newProcessFinder(log telegraf.Logger) *processFinder {
|
||||
return &processFinder{
|
||||
errPidFiles: make(map[string]bool),
|
||||
log: log,
|
||||
}
|
||||
}
|
||||
|
||||
func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
|
||||
groups := make([]processGroup, 0, len(paths))
|
||||
for _, path := range paths {
|
||||
buf, err := os.ReadFile(path)
|
||||
|
|
@ -24,8 +37,9 @@ func findByPidFiles(paths []string) ([]processGroup, error) {
|
|||
}
|
||||
|
||||
p, err := process.NewProcess(int32(pid))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find process for PID %d of file %q: %w", pid, path, err)
|
||||
if err != nil && !f.errPidFiles[path] {
|
||||
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
|
||||
f.errPidFiles[path] = true
|
||||
}
|
||||
groups = append(groups, processGroup{
|
||||
processes: []*process.Process{p},
|
||||
|
|
@ -46,7 +60,7 @@ func findByCgroups(cgroups []string) ([]processGroup, error) {
|
|||
|
||||
files, err := filepath.Glob(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to determin files for cgroup %q: %w", cgroup, err)
|
||||
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
|
||||
}
|
||||
|
||||
for _, fpath := range files {
|
||||
|
|
|
|||
Loading…
Reference in New Issue