diff --git a/plugins/inputs/vsphere/README.md b/plugins/inputs/vsphere/README.md index 1d19fb645..e4fdb2e71 100644 --- a/plugins/inputs/vsphere/README.md +++ b/plugins/inputs/vsphere/README.md @@ -234,6 +234,12 @@ to use them. ## The Historical Interval value must match EXACTLY the interval in the daily # "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals # historical_interval = "5m" + + ## Specifies plugin behavior regarding disconnected servers + ## Available choices : + ## - error: telegraf will return an error on startup if one the servers is unreachable + ## - skip: telegraf will skip unreachable servers on both startup and gather + # disconnected_servers_behavior = "error" ``` NOTE: To disable collection of a specific resource type, simply exclude all diff --git a/plugins/inputs/vsphere/endpoint.go b/plugins/inputs/vsphere/endpoint.go index e7755f041..e834258d2 100644 --- a/plugins/inputs/vsphere/endpoint.go +++ b/plugins/inputs/vsphere/endpoint.go @@ -304,7 +304,17 @@ func (e *Endpoint) initalDiscovery(ctx context.Context) { func (e *Endpoint) init(ctx context.Context) error { client, err := e.clientFactory.GetClient(ctx) if err != nil { - return err + switch e.Parent.DisconnectedServersBehavior { + case "error": + return err + case "ignore": + // Ignore the error and postpone the init until next collection cycle + e.log.Warnf("Error connecting to vCenter on init: %s", err) + return nil + default: + return fmt.Errorf("%q is not a valid value for disconnected_servers_behavior", + e.Parent.DisconnectedServersBehavior) + } } // Initial load of custom field metadata @@ -889,6 +899,15 @@ func (e *Endpoint) Close() { // Collect runs a round of data collections as specified in the configuration. func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error { + // Connection could have failed on init, so we need to check for a deferred + // init request. + if !e.initialized { + e.log.Debug("Performing deferred init") + err := e.init(ctx) + if err != nil { + return err + } + } // If we never managed to do a discovery, collection will be a no-op. Therefore, // we need to check that a connection is available, or the collection will // silently fail. diff --git a/plugins/inputs/vsphere/sample.conf b/plugins/inputs/vsphere/sample.conf index a7d235539..82ab4c5a1 100644 --- a/plugins/inputs/vsphere/sample.conf +++ b/plugins/inputs/vsphere/sample.conf @@ -193,3 +193,9 @@ ## The Historical Interval value must match EXACTLY the interval in the daily # "Interval Duration" found on the VCenter server under Configure > General > Statistics > Statistic intervals # historical_interval = "5m" + + ## Specifies plugin behavior regarding disconnected servers + ## Available choices : + ## - error: telegraf will return an error on startup if one the servers is unreachable + ## - skip: telegraf will skip unreachable servers on both startup and gather + # disconnected_servers_behavior = "error" diff --git a/plugins/inputs/vsphere/vsphere.go b/plugins/inputs/vsphere/vsphere.go index ed9f55887..6946ad40a 100644 --- a/plugins/inputs/vsphere/vsphere.go +++ b/plugins/inputs/vsphere/vsphere.go @@ -22,45 +22,46 @@ var sampleConfig string // VSphere is the top level type for the vSphere input plugin. It contains all the configuration // and a list of connected vSphere endpoints type VSphere struct { - Vcenters []string - Username config.Secret `toml:"username"` - Password config.Secret `toml:"password"` - DatacenterInstances bool - DatacenterMetricInclude []string - DatacenterMetricExclude []string - DatacenterInclude []string - DatacenterExclude []string - ClusterInstances bool - ClusterMetricInclude []string - ClusterMetricExclude []string - ClusterInclude []string - ClusterExclude []string - ResourcePoolInstances bool - ResourcePoolMetricInclude []string - ResourcePoolMetricExclude []string - ResourcePoolInclude []string - ResourcePoolExclude []string - HostInstances bool - HostMetricInclude []string - HostMetricExclude []string - HostInclude []string - HostExclude []string - VMInstances bool `toml:"vm_instances"` - VMMetricInclude []string `toml:"vm_metric_include"` - VMMetricExclude []string `toml:"vm_metric_exclude"` - VMInclude []string `toml:"vm_include"` - VMExclude []string `toml:"vm_exclude"` - DatastoreInstances bool - DatastoreMetricInclude []string - DatastoreMetricExclude []string - DatastoreInclude []string - DatastoreExclude []string - Separator string - CustomAttributeInclude []string - CustomAttributeExclude []string - UseIntSamples bool - IPAddresses []string - MetricLookback int + Vcenters []string + Username config.Secret `toml:"username"` + Password config.Secret `toml:"password"` + DatacenterInstances bool + DatacenterMetricInclude []string + DatacenterMetricExclude []string + DatacenterInclude []string + DatacenterExclude []string + ClusterInstances bool + ClusterMetricInclude []string + ClusterMetricExclude []string + ClusterInclude []string + ClusterExclude []string + ResourcePoolInstances bool + ResourcePoolMetricInclude []string + ResourcePoolMetricExclude []string + ResourcePoolInclude []string + ResourcePoolExclude []string + HostInstances bool + HostMetricInclude []string + HostMetricExclude []string + HostInclude []string + HostExclude []string + VMInstances bool `toml:"vm_instances"` + VMMetricInclude []string `toml:"vm_metric_include"` + VMMetricExclude []string `toml:"vm_metric_exclude"` + VMInclude []string `toml:"vm_include"` + VMExclude []string `toml:"vm_exclude"` + DatastoreInstances bool + DatastoreMetricInclude []string + DatastoreMetricExclude []string + DatastoreInclude []string + DatastoreExclude []string + Separator string + CustomAttributeInclude []string + CustomAttributeExclude []string + UseIntSamples bool + IPAddresses []string + MetricLookback int + DisconnectedServersBehavior string MaxQueryObjects int MaxQueryMetrics int @@ -186,15 +187,16 @@ func init() { UseIntSamples: true, IPAddresses: []string{}, - MaxQueryObjects: 256, - MaxQueryMetrics: 256, - CollectConcurrency: 1, - DiscoverConcurrency: 1, - MetricLookback: 3, - ForceDiscoverOnInit: true, - ObjectDiscoveryInterval: config.Duration(time.Second * 300), - Timeout: config.Duration(time.Second * 60), - HistoricalInterval: config.Duration(time.Second * 300), + MaxQueryObjects: 256, + MaxQueryMetrics: 256, + CollectConcurrency: 1, + DiscoverConcurrency: 1, + MetricLookback: 3, + ForceDiscoverOnInit: true, + ObjectDiscoveryInterval: config.Duration(time.Second * 300), + Timeout: config.Duration(time.Second * 60), + HistoricalInterval: config.Duration(time.Second * 300), + DisconnectedServersBehavior: "error", } }) } diff --git a/plugins/inputs/vsphere/vsphere_test.go b/plugins/inputs/vsphere/vsphere_test.go index 6f41ac0a2..608e2a599 100644 --- a/plugins/inputs/vsphere/vsphere_test.go +++ b/plugins/inputs/vsphere/vsphere_test.go @@ -4,6 +4,7 @@ import ( "context" "crypto/tls" "fmt" + "net/url" "os" "strings" "testing" @@ -132,15 +133,16 @@ func defaultVSphere() *VSphere { DatacenterInclude: []string{"/**"}, ClientConfig: itls.ClientConfig{InsecureSkipVerify: true}, - MaxQueryObjects: 256, - MaxQueryMetrics: 256, - ObjectDiscoveryInterval: config.Duration(time.Second * 300), - Timeout: config.Duration(time.Second * 20), - ForceDiscoverOnInit: true, - DiscoverConcurrency: 1, - CollectConcurrency: 1, - Separator: ".", - HistoricalInterval: config.Duration(time.Second * 300), + MaxQueryObjects: 256, + MaxQueryMetrics: 256, + ObjectDiscoveryInterval: config.Duration(time.Second * 300), + Timeout: config.Duration(time.Second * 20), + ForceDiscoverOnInit: true, + DiscoverConcurrency: 1, + CollectConcurrency: 1, + Separator: ".", + HistoricalInterval: config.Duration(time.Second * 300), + DisconnectedServersBehavior: "error", } } @@ -449,6 +451,22 @@ func TestCollectionNoClusterMetrics(t *testing.T) { testCollection(t, true) } +func TestDisconnectedServerBehavior(t *testing.T) { + u, err := url.Parse("https://definitely.not.a.valid.host") + require.NoError(t, err) + v := defaultVSphere() + v.DisconnectedServersBehavior = "error" + _, err = NewEndpoint(context.Background(), v, u, v.Log) + require.Error(t, err) + v.DisconnectedServersBehavior = "ignore" + _, err = NewEndpoint(context.Background(), v, u, v.Log) + require.NoError(t, err) + v.DisconnectedServersBehavior = "something else" + _, err = NewEndpoint(context.Background(), v, u, v.Log) + require.Error(t, err) + require.Equal(t, err.Error(), `"something else" is not a valid value for disconnected_servers_behavior`) +} + func testCollection(t *testing.T, excludeClusters bool) { mustHaveMetrics := map[string]struct{}{ "vsphere.vm.cpu": {},