fix(inputs.systemd_units): Revert to only gather loaded units by default (#15108)
This commit is contained in:
parent
8d8ee533ce
commit
084d49dc9b
12
CHANGELOG.md
12
CHANGELOG.md
|
|
@ -1,6 +1,18 @@
|
||||||
<!-- markdownlint-disable MD024 -->
|
<!-- markdownlint-disable MD024 -->
|
||||||
# Changelog
|
# Changelog
|
||||||
|
|
||||||
|
## Unreleased
|
||||||
|
|
||||||
|
### Important Changes
|
||||||
|
|
||||||
|
- [PR #15108](https://github.com/influxdata/telegraf/pull/15108) reverts the
|
||||||
|
behavior of `inputs.systemd_units` back to pre-v1.30.0 to only collect units
|
||||||
|
already loaded by systemd, i.e. not collecting disabled or static units. This
|
||||||
|
was necessary because using unspecific filters will cause significant load on
|
||||||
|
the system as systemd needs to read all unit-files matching the pattern in
|
||||||
|
each gather cycle. If you use specific patterns and want to collect non-loaded
|
||||||
|
units, please set the `collect_disabled_units` option to `true`.
|
||||||
|
|
||||||
## v1.30.1 [2024-04-01]
|
## v1.30.1 [2024-04-01]
|
||||||
|
|
||||||
### Bugfixes
|
### Bugfixes
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,12 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||||
## automount, swap, timer, path, slice and scope
|
## automount, swap, timer, path, slice and scope
|
||||||
# unittype = "service"
|
# unittype = "service"
|
||||||
|
|
||||||
|
## Collect also units not loaded by systemd, i.e. disabled or static units
|
||||||
|
## Enabling this feature might introduce significant load when used with
|
||||||
|
## unspecific patterns (such as '*') as systemd will need to load all
|
||||||
|
## matching unit files.
|
||||||
|
# collect_disabled_units = false
|
||||||
|
|
||||||
## Collect detailed information for the units
|
## Collect detailed information for the units
|
||||||
# details = false
|
# details = false
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,12 @@
|
||||||
## automount, swap, timer, path, slice and scope
|
## automount, swap, timer, path, slice and scope
|
||||||
# unittype = "service"
|
# unittype = "service"
|
||||||
|
|
||||||
|
## Collect also units not loaded by systemd, i.e. disabled or static units
|
||||||
|
## Enabling this feature might introduce significant load when used with
|
||||||
|
## unspecific patterns (such as '*') as systemd will need to load all
|
||||||
|
## matching unit files.
|
||||||
|
# collect_disabled_units = false
|
||||||
|
|
||||||
## Collect detailed information for the units
|
## Collect detailed information for the units
|
||||||
# details = false
|
# details = false
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,12 @@ var sampleConfig string
|
||||||
|
|
||||||
// SystemdUnits is a telegraf plugin to gather systemd unit status
|
// SystemdUnits is a telegraf plugin to gather systemd unit status
|
||||||
type SystemdUnits struct {
|
type SystemdUnits struct {
|
||||||
Pattern string `toml:"pattern"`
|
Pattern string `toml:"pattern"`
|
||||||
UnitType string `toml:"unittype"`
|
UnitType string `toml:"unittype"`
|
||||||
Details bool `toml:"details"`
|
Details bool `toml:"details"`
|
||||||
Timeout config.Duration `toml:"timeout"`
|
CollectDisabled bool `toml:"collect_disabled_units"`
|
||||||
Log telegraf.Logger `toml:"-"`
|
Timeout config.Duration `toml:"timeout"`
|
||||||
|
Log telegraf.Logger `toml:"-"`
|
||||||
archParams
|
archParams
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -115,14 +115,6 @@ var subMap = map[string]int{
|
||||||
"elapsed": 0x00a0,
|
"elapsed": 0x00a0,
|
||||||
}
|
}
|
||||||
|
|
||||||
type unitInfo struct {
|
|
||||||
name string
|
|
||||||
state dbus.UnitStatus
|
|
||||||
properties map[string]interface{}
|
|
||||||
unitFileState string
|
|
||||||
unitFilePreset string
|
|
||||||
}
|
|
||||||
|
|
||||||
type client interface {
|
type client interface {
|
||||||
Connected() bool
|
Connected() bool
|
||||||
Close()
|
Close()
|
||||||
|
|
@ -135,9 +127,10 @@ type client interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type archParams struct {
|
type archParams struct {
|
||||||
client client
|
client client
|
||||||
pattern []string
|
pattern []string
|
||||||
filter filter.Filter
|
filter filter.Filter
|
||||||
|
unitTypeDBus string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SystemdUnits) Init() error {
|
func (s *SystemdUnits) Init() error {
|
||||||
|
|
@ -156,7 +149,7 @@ func (s *SystemdUnits) Init() error {
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("invalid 'unittype' %q", s.UnitType)
|
return fmt.Errorf("invalid 'unittype' %q", s.UnitType)
|
||||||
}
|
}
|
||||||
s.UnitType = strings.ToUpper(s.UnitType[0:1]) + strings.ToLower(s.UnitType[1:])
|
s.unitTypeDBus = strings.ToUpper(s.UnitType[0:1]) + strings.ToLower(s.UnitType[1:])
|
||||||
|
|
||||||
s.pattern = strings.Split(s.Pattern, " ")
|
s.pattern = strings.Split(s.Pattern, " ")
|
||||||
f, err := filter.Compile(s.pattern)
|
f, err := filter.Compile(s.pattern)
|
||||||
|
|
@ -205,15 +198,18 @@ func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error {
|
||||||
return fmt.Errorf("listing loaded units failed: %w", err)
|
return fmt.Errorf("listing loaded units failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// List all unit files matching the pattern to also get disabled units
|
var files []dbus.UnitFile
|
||||||
list := []string{"enabled", "disabled", "static"}
|
if s.CollectDisabled {
|
||||||
files, err := s.client.ListUnitFilesByPatternsContext(ctx, list, s.pattern)
|
// List all unit files matching the pattern to also get disabled units
|
||||||
if err != nil {
|
list := []string{"enabled", "disabled", "static"}
|
||||||
return fmt.Errorf("listing unit files failed: %w", err)
|
files, err = s.client.ListUnitFilesByPatternsContext(ctx, list, s.pattern)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("listing unit files failed: %w", err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect all matching units, the loaded ones and the disabled ones
|
// Collect all matching units, the loaded ones and the disabled ones
|
||||||
states := make([]dbus.UnitStatus, 0, len(files))
|
states := make([]dbus.UnitStatus, 0, len(loaded))
|
||||||
|
|
||||||
// Match all loaded units first
|
// Match all loaded units first
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
|
|
@ -235,124 +231,95 @@ func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error {
|
||||||
|
|
||||||
// Now split the unit-files into disabled ones and static ones, ignore
|
// Now split the unit-files into disabled ones and static ones, ignore
|
||||||
// enabled units as those are already contained in the "loaded" list.
|
// enabled units as those are already contained in the "loaded" list.
|
||||||
disabled := make([]string, 0, len(files))
|
if len(files) > 0 {
|
||||||
static := make([]string, 0, len(files))
|
disabled := make([]string, 0, len(files))
|
||||||
for _, f := range files {
|
static := make([]string, 0, len(files))
|
||||||
name := path.Base(f.Path)
|
for _, f := range files {
|
||||||
|
name := path.Base(f.Path)
|
||||||
|
|
||||||
switch f.Type {
|
switch f.Type {
|
||||||
case "disabled":
|
case "disabled":
|
||||||
if seen[name] {
|
if seen[name] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[name] = true
|
seen[name] = true
|
||||||
|
|
||||||
// Detect disabled multi-instance units and declare them as static
|
// Detect disabled multi-instance units and declare them as static
|
||||||
_, suffix, found := strings.Cut(name, "@")
|
_, suffix, found := strings.Cut(name, "@")
|
||||||
instance, _, _ := strings.Cut(suffix, ".")
|
instance, _, _ := strings.Cut(suffix, ".")
|
||||||
if found && instance == "" {
|
if found && instance == "" {
|
||||||
|
static = append(static, name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
disabled = append(disabled, name)
|
||||||
|
case "static":
|
||||||
|
// Make sure we filter already loaded static multi-instance units
|
||||||
|
instance := name
|
||||||
|
if strings.Contains(name, "@") {
|
||||||
|
prefix, _, _ := strings.Cut(name, "@")
|
||||||
|
suffix := path.Ext(name)
|
||||||
|
instance = prefix + "@" + suffix
|
||||||
|
}
|
||||||
|
if seen[instance] || seen[name] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[instance] = true
|
||||||
static = append(static, name)
|
static = append(static, name)
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
disabled = append(disabled, name)
|
|
||||||
case "static":
|
|
||||||
// Make sure we filter already loaded static multi-instance units
|
|
||||||
instance := name
|
|
||||||
if strings.Contains(name, "@") {
|
|
||||||
prefix, _, _ := strings.Cut(name, "@")
|
|
||||||
suffix := path.Ext(name)
|
|
||||||
instance = prefix + "@" + suffix
|
|
||||||
}
|
|
||||||
if seen[instance] || seen[name] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[instance] = true
|
|
||||||
static = append(static, name)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resolve the disabled and remaining static units
|
|
||||||
disabledStates, err := s.client.ListUnitsByNamesContext(ctx, disabled)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("listing unit states failed: %w", err)
|
|
||||||
}
|
|
||||||
states = append(states, disabledStates...)
|
|
||||||
|
|
||||||
// Add special information about unused static units
|
|
||||||
for _, name := range static {
|
|
||||||
if !strings.EqualFold(strings.TrimPrefix(path.Ext(name), "."), s.UnitType) {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
|
|
||||||
states = append(states, dbus.UnitStatus{
|
// Resolve the disabled and remaining static units
|
||||||
Name: name,
|
disabledStates, err := s.client.ListUnitsByNamesContext(ctx, disabled)
|
||||||
LoadState: "stub",
|
if err != nil {
|
||||||
ActiveState: "inactive",
|
return fmt.Errorf("listing unit states failed: %w", err)
|
||||||
SubState: "dead",
|
}
|
||||||
})
|
states = append(states, disabledStates...)
|
||||||
|
|
||||||
|
// Add special information about unused static units
|
||||||
|
for _, name := range static {
|
||||||
|
if !strings.EqualFold(strings.TrimPrefix(path.Ext(name), "."), s.UnitType) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
states = append(states, dbus.UnitStatus{
|
||||||
|
Name: name,
|
||||||
|
LoadState: "stub",
|
||||||
|
ActiveState: "inactive",
|
||||||
|
SubState: "dead",
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge the unit information into one struct
|
// Merge the unit information into one struct
|
||||||
units := make([]unitInfo, 0, len(states))
|
|
||||||
for _, state := range states {
|
for _, state := range states {
|
||||||
// Filter units of the wrong type
|
// Filter units of the wrong type
|
||||||
props, err := s.client.GetUnitTypePropertiesContext(ctx, state.Name, s.UnitType)
|
if idx := strings.LastIndex(state.Name, "."); idx < 0 || state.Name[idx+1:] != s.UnitType {
|
||||||
if err != nil {
|
continue
|
||||||
// Skip units returning "Unknown interface" errors as those indicate
|
|
||||||
// that the unit is of the wrong type.
|
|
||||||
if strings.Contains(err.Error(), "Unknown interface") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// For other units we make up properties, usually those are
|
|
||||||
// disabled multi-instance units
|
|
||||||
props = map[string]interface{}{
|
|
||||||
"StatusErrno": int64(-1),
|
|
||||||
"NRestarts": uint64(0),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u := unitInfo{
|
|
||||||
name: state.Name,
|
|
||||||
state: state,
|
|
||||||
properties: props,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get required unit file properties
|
|
||||||
if v, err := s.client.GetUnitPropertyContext(ctx, state.Name, "UnitFileState"); err == nil {
|
|
||||||
u.unitFileState = strings.Trim(v.Value.String(), `'"`)
|
|
||||||
}
|
|
||||||
if v, err := s.client.GetUnitPropertyContext(ctx, state.Name, "UnitFilePreset"); err == nil {
|
|
||||||
u.unitFilePreset = strings.Trim(v.Value.String(), `'"`)
|
|
||||||
}
|
|
||||||
|
|
||||||
units = append(units, u)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the metrics
|
|
||||||
for _, u := range units {
|
|
||||||
// Map the state names to numerical values
|
// Map the state names to numerical values
|
||||||
load, ok := loadMap[u.state.LoadState]
|
load, ok := loadMap[state.LoadState]
|
||||||
if !ok {
|
if !ok {
|
||||||
acc.AddError(fmt.Errorf("parsing field 'load' failed, value not in map: %s", u.state.LoadState))
|
acc.AddError(fmt.Errorf("parsing field 'load' failed, value not in map: %s", state.LoadState))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
active, ok := activeMap[u.state.ActiveState]
|
active, ok := activeMap[state.ActiveState]
|
||||||
if !ok {
|
if !ok {
|
||||||
acc.AddError(fmt.Errorf("parsing field field 'active' failed, value not in map: %s", u.state.ActiveState))
|
acc.AddError(fmt.Errorf("parsing field 'active' failed, value not in map: %s", state.ActiveState))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
subState, ok := subMap[u.state.SubState]
|
subState, ok := subMap[state.SubState]
|
||||||
if !ok {
|
if !ok {
|
||||||
acc.AddError(fmt.Errorf("parsing field field 'sub' failed, value not in map: %s", u.state.SubState))
|
acc.AddError(fmt.Errorf("parsing field 'sub' failed, value not in map: %s", state.SubState))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create the metric
|
// Create the metric
|
||||||
tags := map[string]string{
|
tags := map[string]string{
|
||||||
"name": u.name,
|
"name": state.Name,
|
||||||
"load": u.state.LoadState,
|
"load": state.LoadState,
|
||||||
"active": u.state.ActiveState,
|
"active": state.ActiveState,
|
||||||
"sub": u.state.SubState,
|
"sub": state.SubState,
|
||||||
}
|
}
|
||||||
|
|
||||||
fields := map[string]interface{}{
|
fields := map[string]interface{}{
|
||||||
|
|
@ -362,17 +329,42 @@ func (s *SystemdUnits) Gather(acc telegraf.Accumulator) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if s.Details {
|
if s.Details {
|
||||||
tags["state"] = u.unitFileState
|
properties, err := s.client.GetUnitTypePropertiesContext(ctx, state.Name, s.unitTypeDBus)
|
||||||
tags["preset"] = u.unitFilePreset
|
if err != nil {
|
||||||
|
// Skip units returning "Unknown interface" errors as those indicate
|
||||||
|
// that the unit is of the wrong type.
|
||||||
|
if strings.Contains(err.Error(), "Unknown interface") {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// For other units we make up properties, usually those are
|
||||||
|
// disabled multi-instance units
|
||||||
|
properties = map[string]interface{}{
|
||||||
|
"StatusErrno": int64(-1),
|
||||||
|
"NRestarts": uint64(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fields["status_errno"] = u.properties["StatusErrno"]
|
// Get required unit file properties
|
||||||
fields["restarts"] = u.properties["NRestarts"]
|
var unitFileState string
|
||||||
fields["pid"] = u.properties["MainPID"]
|
if v, err := s.client.GetUnitPropertyContext(ctx, state.Name, "UnitFileState"); err == nil {
|
||||||
fields["mem_current"] = u.properties["MemoryCurrent"]
|
unitFileState = strings.Trim(v.Value.String(), `'"`)
|
||||||
fields["mem_peak"] = u.properties["MemoryPeak"]
|
}
|
||||||
fields["swap_current"] = u.properties["MemorySwapCurrent"]
|
var unitFilePreset string
|
||||||
fields["swap_peak"] = u.properties["MemorySwapPeak"]
|
if v, err := s.client.GetUnitPropertyContext(ctx, state.Name, "UnitFilePreset"); err == nil {
|
||||||
fields["mem_avail"] = u.properties["MemoryAvailable"]
|
unitFilePreset = strings.Trim(v.Value.String(), `'"`)
|
||||||
|
}
|
||||||
|
|
||||||
|
tags["state"] = unitFileState
|
||||||
|
tags["preset"] = unitFilePreset
|
||||||
|
|
||||||
|
fields["status_errno"] = properties["StatusErrno"]
|
||||||
|
fields["restarts"] = properties["NRestarts"]
|
||||||
|
fields["pid"] = properties["MainPID"]
|
||||||
|
fields["mem_current"] = properties["MemoryCurrent"]
|
||||||
|
fields["mem_peak"] = properties["MemoryPeak"]
|
||||||
|
fields["swap_current"] = properties["MemorySwapCurrent"]
|
||||||
|
fields["swap_peak"] = properties["MemorySwapPeak"]
|
||||||
|
fields["mem_avail"] = properties["MemoryAvailable"]
|
||||||
|
|
||||||
// Sanitize unset memory fields
|
// Sanitize unset memory fields
|
||||||
for k, value := range fields {
|
for k, value := range fields {
|
||||||
|
|
|
||||||
|
|
@ -715,9 +715,10 @@ func TestMultiInstance(t *testing.T) {
|
||||||
// Setup plugin. Do NOT call Start() as this would connect to
|
// Setup plugin. Do NOT call Start() as this would connect to
|
||||||
// the real systemd daemon.
|
// the real systemd daemon.
|
||||||
plugin := &SystemdUnits{
|
plugin := &SystemdUnits{
|
||||||
Pattern: tt.pattern,
|
Pattern: tt.pattern,
|
||||||
Timeout: config.Duration(time.Second),
|
CollectDisabled: true,
|
||||||
Log: testutil.Logger{},
|
Timeout: config.Duration(time.Second),
|
||||||
|
Log: testutil.Logger{},
|
||||||
}
|
}
|
||||||
require.NoError(t, plugin.Init())
|
require.NoError(t, plugin.Init())
|
||||||
|
|
||||||
|
|
@ -810,6 +811,41 @@ func TestMultiInstance(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func BenchmarkAllUnitsIntegration(b *testing.B) {
|
||||||
|
plugin := &SystemdUnits{
|
||||||
|
CollectDisabled: true,
|
||||||
|
Timeout: config.Duration(3 * time.Second),
|
||||||
|
}
|
||||||
|
require.NoError(b, plugin.Init())
|
||||||
|
|
||||||
|
acc := &testutil.Accumulator{Discard: true}
|
||||||
|
require.NoError(b, plugin.Start(acc))
|
||||||
|
require.NoError(b, acc.GatherError(plugin.Gather))
|
||||||
|
require.NotZero(b, acc.NMetrics())
|
||||||
|
b.Logf("produced %d metrics", acc.NMetrics())
|
||||||
|
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
_ = plugin.Gather(acc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkAllLoadedUnitsIntegration(b *testing.B) {
|
||||||
|
plugin := &SystemdUnits{
|
||||||
|
Timeout: config.Duration(3 * time.Second),
|
||||||
|
}
|
||||||
|
require.NoError(b, plugin.Init())
|
||||||
|
|
||||||
|
acc := &testutil.Accumulator{Discard: true}
|
||||||
|
require.NoError(b, plugin.Start(acc))
|
||||||
|
require.NoError(b, acc.GatherError(plugin.Gather))
|
||||||
|
require.NotZero(b, acc.NMetrics())
|
||||||
|
b.Logf("produced %d metrics", acc.NMetrics())
|
||||||
|
|
||||||
|
for n := 0; n < b.N; n++ {
|
||||||
|
_ = plugin.Gather(acc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Fake client implementation
|
// Fake client implementation
|
||||||
type fakeClient struct {
|
type fakeClient struct {
|
||||||
units map[string]properties
|
units map[string]properties
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue