feat(inputs.vsphere): Flag for more lenient behavior when connect fails on startup (#12828)
This commit is contained in:
parent
2006086262
commit
7daf7bb38f
|
|
@ -234,6 +234,12 @@ to use them.
|
||||||
## The Historical Interval value must match EXACTLY the interval in the daily
|
## The Historical Interval value must match EXACTLY the interval in the daily
|
||||||
# "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
|
# "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
|
||||||
# historical_interval = "5m"
|
# historical_interval = "5m"
|
||||||
|
|
||||||
|
## Specifies plugin behavior regarding disconnected servers
|
||||||
|
## Available choices :
|
||||||
|
## - error: telegraf will return an error on startup if one the servers is unreachable
|
||||||
|
## - skip: telegraf will skip unreachable servers on both startup and gather
|
||||||
|
# disconnected_servers_behavior = "error"
|
||||||
```
|
```
|
||||||
|
|
||||||
NOTE: To disable collection of a specific resource type, simply exclude all
|
NOTE: To disable collection of a specific resource type, simply exclude all
|
||||||
|
|
|
||||||
|
|
@ -304,7 +304,17 @@ func (e *Endpoint) initalDiscovery(ctx context.Context) {
|
||||||
func (e *Endpoint) init(ctx context.Context) error {
|
func (e *Endpoint) init(ctx context.Context) error {
|
||||||
client, err := e.clientFactory.GetClient(ctx)
|
client, err := e.clientFactory.GetClient(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
switch e.Parent.DisconnectedServersBehavior {
|
||||||
|
case "error":
|
||||||
|
return err
|
||||||
|
case "ignore":
|
||||||
|
// Ignore the error and postpone the init until next collection cycle
|
||||||
|
e.log.Warnf("Error connecting to vCenter on init: %s", err)
|
||||||
|
return nil
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("%q is not a valid value for disconnected_servers_behavior",
|
||||||
|
e.Parent.DisconnectedServersBehavior)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initial load of custom field metadata
|
// Initial load of custom field metadata
|
||||||
|
|
@ -889,6 +899,15 @@ func (e *Endpoint) Close() {
|
||||||
|
|
||||||
// Collect runs a round of data collections as specified in the configuration.
|
// Collect runs a round of data collections as specified in the configuration.
|
||||||
func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error {
|
func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error {
|
||||||
|
// Connection could have failed on init, so we need to check for a deferred
|
||||||
|
// init request.
|
||||||
|
if !e.initialized {
|
||||||
|
e.log.Debug("Performing deferred init")
|
||||||
|
err := e.init(ctx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
// If we never managed to do a discovery, collection will be a no-op. Therefore,
|
// If we never managed to do a discovery, collection will be a no-op. Therefore,
|
||||||
// we need to check that a connection is available, or the collection will
|
// we need to check that a connection is available, or the collection will
|
||||||
// silently fail.
|
// silently fail.
|
||||||
|
|
|
||||||
|
|
@ -193,3 +193,9 @@
|
||||||
## The Historical Interval value must match EXACTLY the interval in the daily
|
## The Historical Interval value must match EXACTLY the interval in the daily
|
||||||
# "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
|
# "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals
|
||||||
# historical_interval = "5m"
|
# historical_interval = "5m"
|
||||||
|
|
||||||
|
## Specifies plugin behavior regarding disconnected servers
|
||||||
|
## Available choices :
|
||||||
|
## - error: telegraf will return an error on startup if one the servers is unreachable
|
||||||
|
## - skip: telegraf will skip unreachable servers on both startup and gather
|
||||||
|
# disconnected_servers_behavior = "error"
|
||||||
|
|
|
||||||
|
|
@ -22,45 +22,46 @@ var sampleConfig string
|
||||||
// VSphere is the top level type for the vSphere input plugin. It contains all the configuration
|
// VSphere is the top level type for the vSphere input plugin. It contains all the configuration
|
||||||
// and a list of connected vSphere endpoints
|
// and a list of connected vSphere endpoints
|
||||||
type VSphere struct {
|
type VSphere struct {
|
||||||
Vcenters []string
|
Vcenters []string
|
||||||
Username config.Secret `toml:"username"`
|
Username config.Secret `toml:"username"`
|
||||||
Password config.Secret `toml:"password"`
|
Password config.Secret `toml:"password"`
|
||||||
DatacenterInstances bool
|
DatacenterInstances bool
|
||||||
DatacenterMetricInclude []string
|
DatacenterMetricInclude []string
|
||||||
DatacenterMetricExclude []string
|
DatacenterMetricExclude []string
|
||||||
DatacenterInclude []string
|
DatacenterInclude []string
|
||||||
DatacenterExclude []string
|
DatacenterExclude []string
|
||||||
ClusterInstances bool
|
ClusterInstances bool
|
||||||
ClusterMetricInclude []string
|
ClusterMetricInclude []string
|
||||||
ClusterMetricExclude []string
|
ClusterMetricExclude []string
|
||||||
ClusterInclude []string
|
ClusterInclude []string
|
||||||
ClusterExclude []string
|
ClusterExclude []string
|
||||||
ResourcePoolInstances bool
|
ResourcePoolInstances bool
|
||||||
ResourcePoolMetricInclude []string
|
ResourcePoolMetricInclude []string
|
||||||
ResourcePoolMetricExclude []string
|
ResourcePoolMetricExclude []string
|
||||||
ResourcePoolInclude []string
|
ResourcePoolInclude []string
|
||||||
ResourcePoolExclude []string
|
ResourcePoolExclude []string
|
||||||
HostInstances bool
|
HostInstances bool
|
||||||
HostMetricInclude []string
|
HostMetricInclude []string
|
||||||
HostMetricExclude []string
|
HostMetricExclude []string
|
||||||
HostInclude []string
|
HostInclude []string
|
||||||
HostExclude []string
|
HostExclude []string
|
||||||
VMInstances bool `toml:"vm_instances"`
|
VMInstances bool `toml:"vm_instances"`
|
||||||
VMMetricInclude []string `toml:"vm_metric_include"`
|
VMMetricInclude []string `toml:"vm_metric_include"`
|
||||||
VMMetricExclude []string `toml:"vm_metric_exclude"`
|
VMMetricExclude []string `toml:"vm_metric_exclude"`
|
||||||
VMInclude []string `toml:"vm_include"`
|
VMInclude []string `toml:"vm_include"`
|
||||||
VMExclude []string `toml:"vm_exclude"`
|
VMExclude []string `toml:"vm_exclude"`
|
||||||
DatastoreInstances bool
|
DatastoreInstances bool
|
||||||
DatastoreMetricInclude []string
|
DatastoreMetricInclude []string
|
||||||
DatastoreMetricExclude []string
|
DatastoreMetricExclude []string
|
||||||
DatastoreInclude []string
|
DatastoreInclude []string
|
||||||
DatastoreExclude []string
|
DatastoreExclude []string
|
||||||
Separator string
|
Separator string
|
||||||
CustomAttributeInclude []string
|
CustomAttributeInclude []string
|
||||||
CustomAttributeExclude []string
|
CustomAttributeExclude []string
|
||||||
UseIntSamples bool
|
UseIntSamples bool
|
||||||
IPAddresses []string
|
IPAddresses []string
|
||||||
MetricLookback int
|
MetricLookback int
|
||||||
|
DisconnectedServersBehavior string
|
||||||
|
|
||||||
MaxQueryObjects int
|
MaxQueryObjects int
|
||||||
MaxQueryMetrics int
|
MaxQueryMetrics int
|
||||||
|
|
@ -186,15 +187,16 @@ func init() {
|
||||||
UseIntSamples: true,
|
UseIntSamples: true,
|
||||||
IPAddresses: []string{},
|
IPAddresses: []string{},
|
||||||
|
|
||||||
MaxQueryObjects: 256,
|
MaxQueryObjects: 256,
|
||||||
MaxQueryMetrics: 256,
|
MaxQueryMetrics: 256,
|
||||||
CollectConcurrency: 1,
|
CollectConcurrency: 1,
|
||||||
DiscoverConcurrency: 1,
|
DiscoverConcurrency: 1,
|
||||||
MetricLookback: 3,
|
MetricLookback: 3,
|
||||||
ForceDiscoverOnInit: true,
|
ForceDiscoverOnInit: true,
|
||||||
ObjectDiscoveryInterval: config.Duration(time.Second * 300),
|
ObjectDiscoveryInterval: config.Duration(time.Second * 300),
|
||||||
Timeout: config.Duration(time.Second * 60),
|
Timeout: config.Duration(time.Second * 60),
|
||||||
HistoricalInterval: config.Duration(time.Second * 300),
|
HistoricalInterval: config.Duration(time.Second * 300),
|
||||||
|
DisconnectedServersBehavior: "error",
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
@ -132,15 +133,16 @@ func defaultVSphere() *VSphere {
|
||||||
DatacenterInclude: []string{"/**"},
|
DatacenterInclude: []string{"/**"},
|
||||||
ClientConfig: itls.ClientConfig{InsecureSkipVerify: true},
|
ClientConfig: itls.ClientConfig{InsecureSkipVerify: true},
|
||||||
|
|
||||||
MaxQueryObjects: 256,
|
MaxQueryObjects: 256,
|
||||||
MaxQueryMetrics: 256,
|
MaxQueryMetrics: 256,
|
||||||
ObjectDiscoveryInterval: config.Duration(time.Second * 300),
|
ObjectDiscoveryInterval: config.Duration(time.Second * 300),
|
||||||
Timeout: config.Duration(time.Second * 20),
|
Timeout: config.Duration(time.Second * 20),
|
||||||
ForceDiscoverOnInit: true,
|
ForceDiscoverOnInit: true,
|
||||||
DiscoverConcurrency: 1,
|
DiscoverConcurrency: 1,
|
||||||
CollectConcurrency: 1,
|
CollectConcurrency: 1,
|
||||||
Separator: ".",
|
Separator: ".",
|
||||||
HistoricalInterval: config.Duration(time.Second * 300),
|
HistoricalInterval: config.Duration(time.Second * 300),
|
||||||
|
DisconnectedServersBehavior: "error",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -449,6 +451,22 @@ func TestCollectionNoClusterMetrics(t *testing.T) {
|
||||||
testCollection(t, true)
|
testCollection(t, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDisconnectedServerBehavior(t *testing.T) {
|
||||||
|
u, err := url.Parse("https://definitely.not.a.valid.host")
|
||||||
|
require.NoError(t, err)
|
||||||
|
v := defaultVSphere()
|
||||||
|
v.DisconnectedServersBehavior = "error"
|
||||||
|
_, err = NewEndpoint(context.Background(), v, u, v.Log)
|
||||||
|
require.Error(t, err)
|
||||||
|
v.DisconnectedServersBehavior = "ignore"
|
||||||
|
_, err = NewEndpoint(context.Background(), v, u, v.Log)
|
||||||
|
require.NoError(t, err)
|
||||||
|
v.DisconnectedServersBehavior = "something else"
|
||||||
|
_, err = NewEndpoint(context.Background(), v, u, v.Log)
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Equal(t, err.Error(), `"something else" is not a valid value for disconnected_servers_behavior`)
|
||||||
|
}
|
||||||
|
|
||||||
func testCollection(t *testing.T, excludeClusters bool) {
|
func testCollection(t *testing.T, excludeClusters bool) {
|
||||||
mustHaveMetrics := map[string]struct{}{
|
mustHaveMetrics := map[string]struct{}{
|
||||||
"vsphere.vm.cpu": {},
|
"vsphere.vm.cpu": {},
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue