feat(inputs.vsphere): Flag for more lenient behavior when connect fails on startup (#12828)
This commit is contained in:
parent
2006086262
commit
7daf7bb38f
|
|
@ -234,6 +234,12 @@ to use them.
|
|||
## The Historical Interval value must match EXACTLY the interval in the daily
|
||||
# "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
|
||||
# historical_interval = "5m"
|
||||
|
||||
## Specifies plugin behavior regarding disconnected servers
|
||||
## Available choices :
|
||||
## - error: telegraf will return an error on startup if one the servers is unreachable
|
||||
## - skip: telegraf will skip unreachable servers on both startup and gather
|
||||
# disconnected_servers_behavior = "error"
|
||||
```
|
||||
|
||||
NOTE: To disable collection of a specific resource type, simply exclude all
|
||||
|
|
|
|||
|
|
@ -304,7 +304,17 @@ func (e *Endpoint) initalDiscovery(ctx context.Context) {
|
|||
func (e *Endpoint) init(ctx context.Context) error {
|
||||
client, err := e.clientFactory.GetClient(ctx)
|
||||
if err != nil {
|
||||
switch e.Parent.DisconnectedServersBehavior {
|
||||
case "error":
|
||||
return err
|
||||
case "ignore":
|
||||
// Ignore the error and postpone the init until next collection cycle
|
||||
e.log.Warnf("Error connecting to vCenter on init: %s", err)
|
||||
return nil
|
||||
default:
|
||||
return fmt.Errorf("%q is not a valid value for disconnected_servers_behavior",
|
||||
e.Parent.DisconnectedServersBehavior)
|
||||
}
|
||||
}
|
||||
|
||||
// Initial load of custom field metadata
|
||||
|
|
@ -889,6 +899,15 @@ func (e *Endpoint) Close() {
|
|||
|
||||
// Collect runs a round of data collections as specified in the configuration.
|
||||
func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error {
|
||||
// Connection could have failed on init, so we need to check for a deferred
|
||||
// init request.
|
||||
if !e.initialized {
|
||||
e.log.Debug("Performing deferred init")
|
||||
err := e.init(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// If we never managed to do a discovery, collection will be a no-op. Therefore,
|
||||
// we need to check that a connection is available, or the collection will
|
||||
// silently fail.
|
||||
|
|
|
|||
|
|
@ -193,3 +193,9 @@
|
|||
## The Historical Interval value must match EXACTLY the interval in the daily
|
||||
# "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
|
||||
# historical_interval = "5m"
|
||||
|
||||
## Specifies plugin behavior regarding disconnected servers
|
||||
## Available choices :
|
||||
## - error: telegraf will return an error on startup if one the servers is unreachable
|
||||
## - skip: telegraf will skip unreachable servers on both startup and gather
|
||||
# disconnected_servers_behavior = "error"
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ type VSphere struct {
|
|||
UseIntSamples bool
|
||||
IPAddresses []string
|
||||
MetricLookback int
|
||||
DisconnectedServersBehavior string
|
||||
|
||||
MaxQueryObjects int
|
||||
MaxQueryMetrics int
|
||||
|
|
@ -195,6 +196,7 @@ func init() {
|
|||
ObjectDiscoveryInterval: config.Duration(time.Second * 300),
|
||||
Timeout: config.Duration(time.Second * 60),
|
||||
HistoricalInterval: config.Duration(time.Second * 300),
|
||||
DisconnectedServersBehavior: "error",
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"crypto/tls"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
|
@ -141,6 +142,7 @@ func defaultVSphere() *VSphere {
|
|||
CollectConcurrency: 1,
|
||||
Separator: ".",
|
||||
HistoricalInterval: config.Duration(time.Second * 300),
|
||||
DisconnectedServersBehavior: "error",
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -449,6 +451,22 @@ func TestCollectionNoClusterMetrics(t *testing.T) {
|
|||
testCollection(t, true)
|
||||
}
|
||||
|
||||
func TestDisconnectedServerBehavior(t *testing.T) {
|
||||
u, err := url.Parse("https://definitely.not.a.valid.host")
|
||||
require.NoError(t, err)
|
||||
v := defaultVSphere()
|
||||
v.DisconnectedServersBehavior = "error"
|
||||
_, err = NewEndpoint(context.Background(), v, u, v.Log)
|
||||
require.Error(t, err)
|
||||
v.DisconnectedServersBehavior = "ignore"
|
||||
_, err = NewEndpoint(context.Background(), v, u, v.Log)
|
||||
require.NoError(t, err)
|
||||
v.DisconnectedServersBehavior = "something else"
|
||||
_, err = NewEndpoint(context.Background(), v, u, v.Log)
|
||||
require.Error(t, err)
|
||||
require.Equal(t, err.Error(), `"something else" is not a valid value for disconnected_servers_behavior`)
|
||||
}
|
||||
|
||||
func testCollection(t *testing.T, excludeClusters bool) {
|
||||
mustHaveMetrics := map[string]struct{}{
|
||||
"vsphere.vm.cpu": {},
|
||||
|
|
|
|||
Loading…
Reference in New Issue