feat(outputs.syslog): Implement startup error behavior options (#15787)
This commit is contained in:
parent
88663992e4
commit
536a914890
|
|
@ -23,6 +23,20 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||||
|
|
||||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||||
|
|
||||||
|
## Startup error behavior options <!-- @/docs/includes/startup_error_behavior.md -->
|
||||||
|
|
||||||
|
In addition to the plugin-specific and global configuration settings the plugin
|
||||||
|
supports options for specifying the behavior when experiencing startup errors
|
||||||
|
using the `startup_error_behavior` setting. Available values are:
|
||||||
|
|
||||||
|
- `error`: Telegraf with stop and exit in case of startup errors. This is the
|
||||||
|
default behavior.
|
||||||
|
- `ignore`: Telegraf will ignore startup errors for this plugin and disables it
|
||||||
|
but continues processing for all other plugins.
|
||||||
|
- `retry`: Telegraf will try to startup the plugin in every gather or write
|
||||||
|
cycle in case of startup errors. The plugin is disabled until
|
||||||
|
the startup succeeds.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
```toml @sample.conf
|
```toml @sample.conf
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import (
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/config"
|
"github.com/influxdata/telegraf/config"
|
||||||
|
"github.com/influxdata/telegraf/internal"
|
||||||
tlsint "github.com/influxdata/telegraf/plugins/common/tls"
|
tlsint "github.com/influxdata/telegraf/plugins/common/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/outputs"
|
"github.com/influxdata/telegraf/plugins/outputs"
|
||||||
)
|
)
|
||||||
|
|
@ -75,7 +76,7 @@ func (s *Syslog) Connect() error {
|
||||||
c, err = tls.Dial(spl[0], spl[1], tlsCfg)
|
c, err = tls.Dial(spl[0], spl[1], tlsCfg)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return &internal.StartupError{Err: err, Retry: true}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := s.setKeepAlive(c); err != nil {
|
if err := s.setKeepAlive(c); err != nil {
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,9 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/internal"
|
||||||
"github.com/influxdata/telegraf/metric"
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/influxdata/telegraf/models"
|
||||||
"github.com/influxdata/telegraf/testutil"
|
"github.com/influxdata/telegraf/testutil"
|
||||||
"github.com/leodido/go-syslog/v4/nontransparent"
|
"github.com/leodido/go-syslog/v4/nontransparent"
|
||||||
)
|
)
|
||||||
|
|
@ -244,3 +246,185 @@ func TestSyslogWriteReconnect(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, string(messageBytesWithFraming), string(buf[:n]))
|
require.Equal(t, string(messageBytesWithFraming), string(buf[:n]))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestStartupErrorBehaviorDefault(t *testing.T) {
|
||||||
|
// Setup a dummy listener but do not accept connections
|
||||||
|
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
require.NoError(t, err)
|
||||||
|
address := listener.Addr().String()
|
||||||
|
listener.Close()
|
||||||
|
|
||||||
|
// Setup the plugin and the model to be able to use the startup retry strategy
|
||||||
|
plugin := &Syslog{
|
||||||
|
Address: "tcp://" + address,
|
||||||
|
Trailer: nontransparent.LF,
|
||||||
|
Separator: "_",
|
||||||
|
DefaultSeverityCode: uint8(5), // notice
|
||||||
|
DefaultFacilityCode: uint8(1), // user-level
|
||||||
|
DefaultAppname: "Telegraf",
|
||||||
|
}
|
||||||
|
|
||||||
|
model := models.NewRunningOutput(
|
||||||
|
plugin,
|
||||||
|
&models.OutputConfig{
|
||||||
|
Name: "syslog",
|
||||||
|
},
|
||||||
|
10, 100,
|
||||||
|
)
|
||||||
|
require.NoError(t, model.Init())
|
||||||
|
|
||||||
|
// Starting the plugin will fail with an error because the server does not listen
|
||||||
|
err = model.Connect()
|
||||||
|
require.Error(t, err, "connection should be refused")
|
||||||
|
var serr *internal.StartupError
|
||||||
|
require.ErrorAs(t, err, &serr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStartupErrorBehaviorError(t *testing.T) {
|
||||||
|
// Setup a dummy listener but do not accept connections
|
||||||
|
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
require.NoError(t, err)
|
||||||
|
address := listener.Addr().String()
|
||||||
|
listener.Close()
|
||||||
|
|
||||||
|
// Setup the plugin and the model to be able to use the startup retry strategy
|
||||||
|
plugin := &Syslog{
|
||||||
|
Address: "tcp://" + address,
|
||||||
|
Trailer: nontransparent.LF,
|
||||||
|
Separator: "_",
|
||||||
|
DefaultSeverityCode: uint8(5), // notice
|
||||||
|
DefaultFacilityCode: uint8(1), // user-level
|
||||||
|
DefaultAppname: "Telegraf",
|
||||||
|
}
|
||||||
|
|
||||||
|
model := models.NewRunningOutput(
|
||||||
|
plugin,
|
||||||
|
&models.OutputConfig{
|
||||||
|
Name: "syslog",
|
||||||
|
StartupErrorBehavior: "error",
|
||||||
|
},
|
||||||
|
10, 100,
|
||||||
|
)
|
||||||
|
require.NoError(t, model.Init())
|
||||||
|
|
||||||
|
// Starting the plugin will fail with an error because the server does not listen
|
||||||
|
err = model.Connect()
|
||||||
|
require.Error(t, err, "connection should be refused")
|
||||||
|
var serr *internal.StartupError
|
||||||
|
require.ErrorAs(t, err, &serr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStartupErrorBehaviorIgnore(t *testing.T) {
|
||||||
|
// Setup a dummy listener but do not accept connections
|
||||||
|
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
require.NoError(t, err)
|
||||||
|
address := listener.Addr().String()
|
||||||
|
listener.Close()
|
||||||
|
|
||||||
|
// Setup the plugin and the model to be able to use the startup retry strategy
|
||||||
|
plugin := &Syslog{
|
||||||
|
Address: "tcp://" + address,
|
||||||
|
Trailer: nontransparent.LF,
|
||||||
|
Separator: "_",
|
||||||
|
DefaultSeverityCode: uint8(5), // notice
|
||||||
|
DefaultFacilityCode: uint8(1), // user-level
|
||||||
|
DefaultAppname: "Telegraf",
|
||||||
|
}
|
||||||
|
|
||||||
|
model := models.NewRunningOutput(
|
||||||
|
plugin,
|
||||||
|
&models.OutputConfig{
|
||||||
|
Name: "syslog",
|
||||||
|
StartupErrorBehavior: "ignore",
|
||||||
|
},
|
||||||
|
10, 100,
|
||||||
|
)
|
||||||
|
require.NoError(t, model.Init())
|
||||||
|
|
||||||
|
// Starting the plugin will fail because the server does not accept connections.
|
||||||
|
// The model code should convert it to a fatal error for the agent to remove
|
||||||
|
// the plugin.
|
||||||
|
err = model.Connect()
|
||||||
|
require.Error(t, err, "connection should be refused")
|
||||||
|
var fatalErr *internal.FatalError
|
||||||
|
require.ErrorAs(t, err, &fatalErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStartupErrorBehaviorRetry(t *testing.T) {
|
||||||
|
// Setup a dummy listener but do not accept connections
|
||||||
|
listener, err := net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
require.NoError(t, err)
|
||||||
|
address := listener.Addr().String()
|
||||||
|
listener.Close()
|
||||||
|
|
||||||
|
// Setup the plugin and the model to be able to use the startup retry strategy
|
||||||
|
plugin := &Syslog{
|
||||||
|
Address: "tcp://" + address,
|
||||||
|
Trailer: nontransparent.LF,
|
||||||
|
Separator: "_",
|
||||||
|
DefaultSeverityCode: uint8(5), // notice
|
||||||
|
DefaultFacilityCode: uint8(1), // user-level
|
||||||
|
DefaultAppname: "Telegraf",
|
||||||
|
}
|
||||||
|
|
||||||
|
model := models.NewRunningOutput(
|
||||||
|
plugin,
|
||||||
|
&models.OutputConfig{
|
||||||
|
Name: "syslog",
|
||||||
|
StartupErrorBehavior: "retry",
|
||||||
|
},
|
||||||
|
10, 100,
|
||||||
|
)
|
||||||
|
require.NoError(t, model.Init())
|
||||||
|
|
||||||
|
// Starting the plugin will return no error because the plugin will
|
||||||
|
// retry to connect in every write cycle.
|
||||||
|
require.NoError(t, model.Connect())
|
||||||
|
defer model.Close()
|
||||||
|
|
||||||
|
// Writing metrics in this state should fail because we are not fully
|
||||||
|
// started up
|
||||||
|
metrics := testutil.MockMetrics()
|
||||||
|
for _, m := range metrics {
|
||||||
|
model.AddMetric(m)
|
||||||
|
}
|
||||||
|
require.ErrorIs(t, model.WriteBatch(), internal.ErrNotConnected)
|
||||||
|
|
||||||
|
// Startup an actually working listener we can connect and write to
|
||||||
|
listener, err = net.Listen("tcp", "127.0.0.1:0")
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer listener.Close()
|
||||||
|
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
buf := make([]byte, 256)
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
|
||||||
|
conn, err := listener.Accept()
|
||||||
|
if err != nil {
|
||||||
|
t.Logf("accepting connection failed: %v", err)
|
||||||
|
t.Fail()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := conn.SetReadDeadline(time.Now().Add(3 * time.Second)); err != nil {
|
||||||
|
t.Logf("setting read deadline failed: %v", err)
|
||||||
|
t.Fail()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := conn.Read(buf); err != nil {
|
||||||
|
t.Logf("reading failed: %v", err)
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Update the plugin's address and write again. This time the write should
|
||||||
|
// succeed.
|
||||||
|
plugin.Address = "tcp://" + listener.Addr().String()
|
||||||
|
require.NoError(t, model.WriteBatch())
|
||||||
|
wg.Wait()
|
||||||
|
require.NotEmpty(t, string(buf))
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue