From 536a91489001708422c37d428614d4ef7bbec486 Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:06:35 +0200 Subject: [PATCH] feat(outputs.syslog): Implement startup error behavior options (#15787) --- plugins/outputs/syslog/README.md | 14 ++ plugins/outputs/syslog/syslog.go | 3 +- plugins/outputs/syslog/syslog_test.go | 184 ++++++++++++++++++++++++++ 3 files changed, 200 insertions(+), 1 deletion(-) diff --git a/plugins/outputs/syslog/README.md b/plugins/outputs/syslog/README.md index de4b9fb16..ebc823b1d 100644 --- a/plugins/outputs/syslog/README.md +++ b/plugins/outputs/syslog/README.md @@ -23,6 +23,20 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. [CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins +## Startup error behavior options + +In addition to the plugin-specific and global configuration settings the plugin +supports options for specifying the behavior when experiencing startup errors +using the `startup_error_behavior` setting. Available values are: + +- `error`: Telegraf with stop and exit in case of startup errors. This is the + default behavior. +- `ignore`: Telegraf will ignore startup errors for this plugin and disables it + but continues processing for all other plugins. +- `retry`: Telegraf will try to startup the plugin in every gather or write + cycle in case of startup errors. The plugin is disabled until + the startup succeeds. + ## Configuration ```toml @sample.conf diff --git a/plugins/outputs/syslog/syslog.go b/plugins/outputs/syslog/syslog.go index 688d7f1f7..681d771f3 100644 --- a/plugins/outputs/syslog/syslog.go +++ b/plugins/outputs/syslog/syslog.go @@ -16,6 +16,7 @@ import ( "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/config" + "github.com/influxdata/telegraf/internal" tlsint "github.com/influxdata/telegraf/plugins/common/tls" "github.com/influxdata/telegraf/plugins/outputs" ) @@ -75,7 +76,7 @@ func (s *Syslog) Connect() error { c, err = tls.Dial(spl[0], spl[1], tlsCfg) } if err != nil { - return err + return &internal.StartupError{Err: err, Retry: true} } if err := s.setKeepAlive(c); err != nil { diff --git a/plugins/outputs/syslog/syslog_test.go b/plugins/outputs/syslog/syslog_test.go index 4aa0e8694..6e98372dc 100644 --- a/plugins/outputs/syslog/syslog_test.go +++ b/plugins/outputs/syslog/syslog_test.go @@ -9,7 +9,9 @@ import ( "github.com/stretchr/testify/require" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/metric" + "github.com/influxdata/telegraf/models" "github.com/influxdata/telegraf/testutil" "github.com/leodido/go-syslog/v4/nontransparent" ) @@ -244,3 +246,185 @@ func TestSyslogWriteReconnect(t *testing.T) { require.NoError(t, err) require.Equal(t, string(messageBytesWithFraming), string(buf[:n])) } + +func TestStartupErrorBehaviorDefault(t *testing.T) { + // Setup a dummy listener but do not accept connections + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + address := listener.Addr().String() + listener.Close() + + // Setup the plugin and the model to be able to use the startup retry strategy + plugin := &Syslog{ + Address: "tcp://" + address, + Trailer: nontransparent.LF, + Separator: "_", + DefaultSeverityCode: uint8(5), // notice + DefaultFacilityCode: uint8(1), // user-level + DefaultAppname: "Telegraf", + } + + model := models.NewRunningOutput( + plugin, + &models.OutputConfig{ + Name: "syslog", + }, + 10, 100, + ) + require.NoError(t, model.Init()) + + // Starting the plugin will fail with an error because the server does not listen + err = model.Connect() + require.Error(t, err, "connection should be refused") + var serr *internal.StartupError + require.ErrorAs(t, err, &serr) +} + +func TestStartupErrorBehaviorError(t *testing.T) { + // Setup a dummy listener but do not accept connections + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + address := listener.Addr().String() + listener.Close() + + // Setup the plugin and the model to be able to use the startup retry strategy + plugin := &Syslog{ + Address: "tcp://" + address, + Trailer: nontransparent.LF, + Separator: "_", + DefaultSeverityCode: uint8(5), // notice + DefaultFacilityCode: uint8(1), // user-level + DefaultAppname: "Telegraf", + } + + model := models.NewRunningOutput( + plugin, + &models.OutputConfig{ + Name: "syslog", + StartupErrorBehavior: "error", + }, + 10, 100, + ) + require.NoError(t, model.Init()) + + // Starting the plugin will fail with an error because the server does not listen + err = model.Connect() + require.Error(t, err, "connection should be refused") + var serr *internal.StartupError + require.ErrorAs(t, err, &serr) +} + +func TestStartupErrorBehaviorIgnore(t *testing.T) { + // Setup a dummy listener but do not accept connections + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + address := listener.Addr().String() + listener.Close() + + // Setup the plugin and the model to be able to use the startup retry strategy + plugin := &Syslog{ + Address: "tcp://" + address, + Trailer: nontransparent.LF, + Separator: "_", + DefaultSeverityCode: uint8(5), // notice + DefaultFacilityCode: uint8(1), // user-level + DefaultAppname: "Telegraf", + } + + model := models.NewRunningOutput( + plugin, + &models.OutputConfig{ + Name: "syslog", + StartupErrorBehavior: "ignore", + }, + 10, 100, + ) + require.NoError(t, model.Init()) + + // Starting the plugin will fail because the server does not accept connections. + // The model code should convert it to a fatal error for the agent to remove + // the plugin. + err = model.Connect() + require.Error(t, err, "connection should be refused") + var fatalErr *internal.FatalError + require.ErrorAs(t, err, &fatalErr) +} + +func TestStartupErrorBehaviorRetry(t *testing.T) { + // Setup a dummy listener but do not accept connections + listener, err := net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + address := listener.Addr().String() + listener.Close() + + // Setup the plugin and the model to be able to use the startup retry strategy + plugin := &Syslog{ + Address: "tcp://" + address, + Trailer: nontransparent.LF, + Separator: "_", + DefaultSeverityCode: uint8(5), // notice + DefaultFacilityCode: uint8(1), // user-level + DefaultAppname: "Telegraf", + } + + model := models.NewRunningOutput( + plugin, + &models.OutputConfig{ + Name: "syslog", + StartupErrorBehavior: "retry", + }, + 10, 100, + ) + require.NoError(t, model.Init()) + + // Starting the plugin will return no error because the plugin will + // retry to connect in every write cycle. + require.NoError(t, model.Connect()) + defer model.Close() + + // Writing metrics in this state should fail because we are not fully + // started up + metrics := testutil.MockMetrics() + for _, m := range metrics { + model.AddMetric(m) + } + require.ErrorIs(t, model.WriteBatch(), internal.ErrNotConnected) + + // Startup an actually working listener we can connect and write to + listener, err = net.Listen("tcp", "127.0.0.1:0") + require.NoError(t, err) + defer listener.Close() + + var wg sync.WaitGroup + buf := make([]byte, 256) + + wg.Add(1) + go func() { + defer wg.Done() + + conn, err := listener.Accept() + if err != nil { + t.Logf("accepting connection failed: %v", err) + t.Fail() + return + } + + if err := conn.SetReadDeadline(time.Now().Add(3 * time.Second)); err != nil { + t.Logf("setting read deadline failed: %v", err) + t.Fail() + return + } + + if _, err := conn.Read(buf); err != nil { + t.Logf("reading failed: %v", err) + t.Fail() + } + }() + + // Update the plugin's address and write again. This time the write should + // succeed. + plugin.Address = "tcp://" + listener.Addr().String() + require.NoError(t, model.WriteBatch()) + wg.Wait() + require.NotEmpty(t, string(buf)) +}