feat(outputs.syslog): Implement startup error behavior options (#15787)

This commit is contained in:
Sven Rebhan 2024-08-29 17:06:35 +02:00 committed by GitHub
parent 88663992e4
commit 536a914890
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 200 additions and 1 deletions

View File

@ -23,6 +23,20 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
## Startup error behavior options <!-- @/docs/includes/startup_error_behavior.md -->
In addition to the plugin-specific and global configuration settings the plugin
supports options for specifying the behavior when experiencing startup errors
using the `startup_error_behavior` setting. Available values are:
- `error`: Telegraf with stop and exit in case of startup errors. This is the
default behavior.
- `ignore`: Telegraf will ignore startup errors for this plugin and disables it
but continues processing for all other plugins.
- `retry`: Telegraf will try to startup the plugin in every gather or write
cycle in case of startup errors. The plugin is disabled until
the startup succeeds.
## Configuration
```toml @sample.conf

View File

@ -16,6 +16,7 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
tlsint "github.com/influxdata/telegraf/plugins/common/tls"
"github.com/influxdata/telegraf/plugins/outputs"
)
@ -75,7 +76,7 @@ func (s *Syslog) Connect() error {
c, err = tls.Dial(spl[0], spl[1], tlsCfg)
}
if err != nil {
return err
return &internal.StartupError{Err: err, Retry: true}
}
if err := s.setKeepAlive(c); err != nil {

View File

@ -9,7 +9,9 @@ import (
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/models"
"github.com/influxdata/telegraf/testutil"
"github.com/leodido/go-syslog/v4/nontransparent"
)
@ -244,3 +246,185 @@ func TestSyslogWriteReconnect(t *testing.T) {
require.NoError(t, err)
require.Equal(t, string(messageBytesWithFraming), string(buf[:n]))
}
func TestStartupErrorBehaviorDefault(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()
// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}
model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
},
10, 100,
)
require.NoError(t, model.Init())
// Starting the plugin will fail with an error because the server does not listen
err = model.Connect()
require.Error(t, err, "connection should be refused")
var serr *internal.StartupError
require.ErrorAs(t, err, &serr)
}
func TestStartupErrorBehaviorError(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()
// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}
model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
StartupErrorBehavior: "error",
},
10, 100,
)
require.NoError(t, model.Init())
// Starting the plugin will fail with an error because the server does not listen
err = model.Connect()
require.Error(t, err, "connection should be refused")
var serr *internal.StartupError
require.ErrorAs(t, err, &serr)
}
func TestStartupErrorBehaviorIgnore(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()
// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}
model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
StartupErrorBehavior: "ignore",
},
10, 100,
)
require.NoError(t, model.Init())
// Starting the plugin will fail because the server does not accept connections.
// The model code should convert it to a fatal error for the agent to remove
// the plugin.
err = model.Connect()
require.Error(t, err, "connection should be refused")
var fatalErr *internal.FatalError
require.ErrorAs(t, err, &fatalErr)
}
func TestStartupErrorBehaviorRetry(t *testing.T) {
// Setup a dummy listener but do not accept connections
listener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
address := listener.Addr().String()
listener.Close()
// Setup the plugin and the model to be able to use the startup retry strategy
plugin := &Syslog{
Address: "tcp://" + address,
Trailer: nontransparent.LF,
Separator: "_",
DefaultSeverityCode: uint8(5), // notice
DefaultFacilityCode: uint8(1), // user-level
DefaultAppname: "Telegraf",
}
model := models.NewRunningOutput(
plugin,
&models.OutputConfig{
Name: "syslog",
StartupErrorBehavior: "retry",
},
10, 100,
)
require.NoError(t, model.Init())
// Starting the plugin will return no error because the plugin will
// retry to connect in every write cycle.
require.NoError(t, model.Connect())
defer model.Close()
// Writing metrics in this state should fail because we are not fully
// started up
metrics := testutil.MockMetrics()
for _, m := range metrics {
model.AddMetric(m)
}
require.ErrorIs(t, model.WriteBatch(), internal.ErrNotConnected)
// Startup an actually working listener we can connect and write to
listener, err = net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
defer listener.Close()
var wg sync.WaitGroup
buf := make([]byte, 256)
wg.Add(1)
go func() {
defer wg.Done()
conn, err := listener.Accept()
if err != nil {
t.Logf("accepting connection failed: %v", err)
t.Fail()
return
}
if err := conn.SetReadDeadline(time.Now().Add(3 * time.Second)); err != nil {
t.Logf("setting read deadline failed: %v", err)
t.Fail()
return
}
if _, err := conn.Read(buf); err != nil {
t.Logf("reading failed: %v", err)
t.Fail()
}
}()
// Update the plugin's address and write again. This time the write should
// succeed.
plugin.Address = "tcp://" + listener.Addr().String()
require.NoError(t, model.WriteBatch())
wg.Wait()
require.NotEmpty(t, string(buf))
}