fix(inputs.procstat): Do not report dead processes as running for orphan PID files (#15260)
This commit is contained in:
parent
60cf9772a7
commit
920f92fc53
|
|
@ -7,28 +7,32 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf/filter"
|
|
||||||
"github.com/shirou/gopsutil/v3/process"
|
"github.com/shirou/gopsutil/v3/process"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/filter"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Filter struct {
|
type Filter struct {
|
||||||
Name string `toml:"name"`
|
Name string `toml:"name"`
|
||||||
PidFiles []string `toml:"pid_files"`
|
PidFiles []string `toml:"pid_files"`
|
||||||
SystemdUnits []string `toml:"systemd_units"`
|
SystemdUnits []string `toml:"systemd_units"`
|
||||||
SupervisorUnits []string `toml:"supervisor_units"`
|
SupervisorUnits []string `toml:"supervisor_units"`
|
||||||
WinService []string `toml:"win_services"`
|
WinService []string `toml:"win_services"`
|
||||||
CGroups []string `toml:"cgroups"`
|
CGroups []string `toml:"cgroups"`
|
||||||
Patterns []string `toml:"patterns"`
|
Patterns []string `toml:"patterns"`
|
||||||
Users []string `toml:"users"`
|
Users []string `toml:"users"`
|
||||||
Executables []string `toml:"executables"`
|
Executables []string `toml:"executables"`
|
||||||
ProcessNames []string `toml:"process_names"`
|
ProcessNames []string `toml:"process_names"`
|
||||||
RecursionDepth int `toml:"recursion_depth"`
|
RecursionDepth int `toml:"recursion_depth"`
|
||||||
|
Log telegraf.Logger `toml:"-"`
|
||||||
|
|
||||||
filterSupervisorUnit string
|
filterSupervisorUnit string
|
||||||
filterCmds []*regexp.Regexp
|
filterCmds []*regexp.Regexp
|
||||||
filterUser filter.Filter
|
filterUser filter.Filter
|
||||||
filterExecutable filter.Filter
|
filterExecutable filter.Filter
|
||||||
filterProcessName filter.Filter
|
filterProcessName filter.Filter
|
||||||
|
finder *processFinder
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Filter) Init() error {
|
func (f *Filter) Init() error {
|
||||||
|
|
@ -80,6 +84,8 @@ func (f *Filter) Init() error {
|
||||||
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
|
return fmt.Errorf("compiling process-names filter for %q failed: %w", f.Name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Setup the process finder
|
||||||
|
f.finder = newProcessFinder(f.Log)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -89,7 +95,7 @@ func (f *Filter) ApplyFilter() ([]processGroup, error) {
|
||||||
var groups []processGroup
|
var groups []processGroup
|
||||||
switch {
|
switch {
|
||||||
case len(f.PidFiles) > 0:
|
case len(f.PidFiles) > 0:
|
||||||
g, err := findByPidFiles(f.PidFiles)
|
g, err := f.finder.findByPidFiles(f.PidFiles)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -149,6 +149,7 @@ func (p *Procstat) Init() error {
|
||||||
|
|
||||||
// New-style operations
|
// New-style operations
|
||||||
for i := range p.Filter {
|
for i := range p.Filter {
|
||||||
|
p.Filter[i].Log = p.Log
|
||||||
if err := p.Filter[i].Init(); err != nil {
|
if err := p.Filter[i].Init(); err != nil {
|
||||||
return fmt.Errorf("initializing filter %d failed: %w", i, err)
|
return fmt.Errorf("initializing filter %d failed: %w", i, err)
|
||||||
}
|
}
|
||||||
|
|
@ -200,17 +201,23 @@ func (p *Procstat) gatherOld(acc telegraf.Accumulator) error {
|
||||||
}
|
}
|
||||||
count += len(r.PIDs)
|
count += len(r.PIDs)
|
||||||
for _, pid := range r.PIDs {
|
for _, pid := range r.PIDs {
|
||||||
|
// Check if the process is still running
|
||||||
|
proc, err := p.createProcess(pid)
|
||||||
|
if err != nil {
|
||||||
|
// No problem; process may have ended after we found it or it
|
||||||
|
// might be delivered from a non-checking source like a PID file
|
||||||
|
// of a dead process.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Use the cached processes as we need the existing instances
|
// Use the cached processes as we need the existing instances
|
||||||
// to compute delta-metrics (e.g. cpu-usage).
|
// to compute delta-metrics (e.g. cpu-usage).
|
||||||
proc, found := p.processes[pid]
|
if cached, found := p.processes[pid]; found {
|
||||||
if !found {
|
proc = cached
|
||||||
|
} else {
|
||||||
// We've found a process that was not recorded before so add it
|
// We've found a process that was not recorded before so add it
|
||||||
// to the list of processes
|
// to the list of processes
|
||||||
proc, err = p.createProcess(pid)
|
|
||||||
if err != nil {
|
|
||||||
// No problem; process may have ended after we found it
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Assumption: if a process has no name, it probably does not exist
|
// Assumption: if a process has no name, it probably does not exist
|
||||||
if name, _ := proc.Name(); name == "" {
|
if name, _ := proc.Name(); name == "" {
|
||||||
continue
|
continue
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,23 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/shirou/gopsutil/v3/process"
|
"github.com/shirou/gopsutil/v3/process"
|
||||||
)
|
)
|
||||||
|
|
||||||
func findByPidFiles(paths []string) ([]processGroup, error) {
|
type processFinder struct {
|
||||||
|
errPidFiles map[string]bool
|
||||||
|
log telegraf.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
func newProcessFinder(log telegraf.Logger) *processFinder {
|
||||||
|
return &processFinder{
|
||||||
|
errPidFiles: make(map[string]bool),
|
||||||
|
log: log,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *processFinder) findByPidFiles(paths []string) ([]processGroup, error) {
|
||||||
groups := make([]processGroup, 0, len(paths))
|
groups := make([]processGroup, 0, len(paths))
|
||||||
for _, path := range paths {
|
for _, path := range paths {
|
||||||
buf, err := os.ReadFile(path)
|
buf, err := os.ReadFile(path)
|
||||||
|
|
@ -24,8 +37,9 @@ func findByPidFiles(paths []string) ([]processGroup, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
p, err := process.NewProcess(int32(pid))
|
p, err := process.NewProcess(int32(pid))
|
||||||
if err != nil {
|
if err != nil && !f.errPidFiles[path] {
|
||||||
return nil, fmt.Errorf("failed to find process for PID %d of file %q: %w", pid, path, err)
|
f.log.Errorf("failed to find process for PID %d of file %q: %v", pid, path, err)
|
||||||
|
f.errPidFiles[path] = true
|
||||||
}
|
}
|
||||||
groups = append(groups, processGroup{
|
groups = append(groups, processGroup{
|
||||||
processes: []*process.Process{p},
|
processes: []*process.Process{p},
|
||||||
|
|
@ -46,7 +60,7 @@ func findByCgroups(cgroups []string) ([]processGroup, error) {
|
||||||
|
|
||||||
files, err := filepath.Glob(path)
|
files, err := filepath.Glob(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to determin files for cgroup %q: %w", cgroup, err)
|
return nil, fmt.Errorf("failed to determine files for cgroup %q: %w", cgroup, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, fpath := range files {
|
for _, fpath := range files {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue