feat(inputs.vsphere): Adding vSAN extension to vSphere plugin (#11955)

2023-04-18 22:33:28 +05:30 · 2023-04-18 22:33:28 +05:30 · b323d1ce3c
parent e211bd3f28
commit b323d1ce3c
6 changed files with 939 additions and 103 deletions
--- a/plugins/inputs/vsphere/README.md
+++ b/plugins/inputs/vsphere/README.md
@ -8,6 +8,7 @@ vCenter servers.
 * Resource Pools
 * VMs
 * Datastores
 * vSAN
 ## Supported versions of vSphere
@ -177,6 +178,12 @@ to use them.
  datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
  # datacenter_instances = false ## false by default
  ## VSAN
  # vsan_metric_include = [] ## if omitted or empty, all metrics are collected
  # vsan_metric_exclude = [ "*" ] ## vSAN are not collected by default.
  ## Whether to skip verifying vSAN metrics against the ones from GetSupportedEntityTypes API.
  # vsan_metric_skip_verify = false ## false by default.
  ## Plugin Settings
  ## separator character to use for measurement and field names (default: "_")
  # separator = "_"
@ -243,7 +250,7 @@ to use them.
 ```
 NOTE: To disable collection of a specific resource type, simply exclude all
-metrics using the XX_metric_exclude.  For example, to disable collection of VMs,
+metrics using the XX_metric_exclude. For example, to disable collection of VMs,
 add this:
 ```toml
@ -251,32 +258,34 @@ vm_metric_exclude = [ "*" ]
 ```
 NOTE: To disable collection of a specific resource type, simply exclude all
-metrics using the XX_metric_exclude.  For example, to disable collection of VMs,
+metrics using the XX_metric_exclude.
-add this:
+For example, to disable collection of VMs, add this:
-### Objects and Metrics Per Query
+### Objects and Metrics per Query
-By default, in vCenter's configuration a limit is set to the number of entities
+By default, in the vCenter configuration a limit is set to the number of
-that are included in a performance chart query. Default settings for vCenter 6.5
+entities that are included in a performance chart query. Default settings for
-and above is 256. Prior versions of vCenter have this set to 64.  A vCenter
+vCenter 6.5 and later is 256. Earlier versions of vCenter have this set to 64.
-administrator can change this setting, see this [VMware KB
+A vCenter administrator can change this setting.
-article](https://kb.vmware.com/s/article/2107096) for more information.
+See this [VMware KB article](https://kb.vmware.com/s/article/2107096) for more
 information.
 Any modification should be reflected in this plugin by modifying the parameter
 `max_query_objects`
 ```toml
-  ## number of objects to retrieve per query for realtime resources (vms and hosts)
+  ## number of objects to retrieve per query for realtime resources (VMs and hosts)
  ## set to 64 for vCenter 5.5 and 6.0 (default: 256)
  # max_query_objects = 256
 ```
-### Collection and Discovery concurrency
+### Collection and Discovery Concurrency
-On large vCenter setups it may be prudent to have multiple concurrent go
+In large vCenter setups it may be prudent to have multiple concurrent go
-routines collect performance metrics in order to avoid potential errors for time
+routines collect performance metrics in order to avoid potential errors for
-elapsed during a collection cycle. This should never be greater than 8, though
+time elapsed during a collection cycle. This should never be greater than 8,
-the default of 1 (no concurrency) should be sufficient for most configurations.
+though the default of 1 (no concurrency) should be sufficient for most
 configurations.
 For setting up concurrency, modify `collect_concurrency` and
 `discover_concurrency` parameters.
@ -289,8 +298,8 @@ For setting up concurrency, modify `collect_concurrency` and
 ### Inventory Paths
-Resources to be monitored can be selected using Inventory Paths. This treats the
+Resources to be monitored can be selected using Inventory Paths. This treats
-vSphere inventory as a tree structure similar to a file system. A vSphere
+the vSphere inventory as a tree structure similar to a file system. A vSphere
 inventory has a structure similar to this:
 ```bash
@ -330,15 +339,15 @@ Often, we want to select a group of resource, such as all the VMs in a
 folder. We could use the path `/DC0/vm/Folder1/*` for that.
 Another possibility is to select objects using a partial name, such as
-`/DC0/vm/Folder1/hadoop*` yielding all vms in Folder1 with a name starting
+`/DC0/vm/Folder1/hadoop*` yielding all VMs in Folder1 with a name starting
 with "hadoop".
 Finally, due to the arbitrary nesting of the folder structure, we need a
-"recursive wildcard" for traversing multiple folders. We use the "**" symbol for
+"recursive wildcard" for traversing multiple folders. We use the "**" symbol
-that. If we want to look for a VM with a name starting with "hadoop" in any
+for that. If we want to look for a VM with a name starting with "hadoop" in
-folder, we could use the following path: `/DC0/vm/**/hadoop*`
+any folder, we could use the following path: `/DC0/vm/**/hadoop*`
-#### Multiple paths to VMs
+#### Multiple Paths to VMs
 As we can see from the example tree above, VMs appear both in its on folder
 under the datacenter, as well as under the hosts. This is useful when you like
@ -368,7 +377,7 @@ be traversed.
 ## Performance Considerations
-### Realtime vs. historical metrics
+### Realtime vs. Historical Metrics
 vCenter keeps two different kinds of metrics, known as realtime and historical
 metrics.
@ -377,15 +386,15 @@ metrics.
 * Historical metrics: Available at a (default) 5 minute, 30 minutes, 2 hours and 24 hours rollup levels. The vSphere Telegraf plugin only uses the most granular rollup which defaults to 5 minutes but can be changed in vCenter to other interval durations. These metrics are stored in the vCenter database and can be expensive and slow to query. Historical metrics are the only type of metrics available for **clusters**, **datastores**, **resource pools** and **datacenters**.
 This distinction has an impact on how Telegraf collects metrics. A single
-instance of an input plugin can have one and only one collection interval, which
+instance of an input plugin can have one and only one collection interval,
-means that you typically set the collection interval based on the most
+which means that you typically set the collection interval based on the most
 frequently collected metric. Let's assume you set the collection interval to 1
 minute. All realtime metrics will be collected every minute. Since the
 historical metrics are only available on a 5 minute interval, the vSphere
-Telegraf plugin automatically skips four out of five collection cycles for these
+Telegraf plugin automatically skips four out of five collection cycles for
-metrics. This works fine in many cases. Problems arise when the collection of
+these metrics. This works fine in many cases. Problems arise when the
-historical metrics takes longer than the collection interval. This will cause
+collection of historical metrics takes longer than the collection interval.
-error messages similar to this to appear in the Telegraf logs:
+This will cause error messages similar to this to appear in the Telegraf logs:
 ```text
 2019-01-16T13:41:10Z W! [agent] input "inputs.vsphere" did not complete within its interval
@ -394,8 +403,8 @@ error messages similar to this to appear in the Telegraf logs:
 This will disrupt the metric collection and can result in missed samples. The
 best practice workaround is to specify two instances of the vSphere plugin, one
 for the realtime metrics with a short collection interval and one for the
-historical metrics with a longer interval. You can use the `*_metric_exclude` to
+historical metrics with a longer interval. You can use the `*_metric_exclude`
-turn off the resources you don't want to collect metrics for in each
+to turn off the resources you don't want to collect metrics for in each
 instance. For example:
 ```toml
@ -414,6 +423,7 @@ instance. For example:
  cluster_metric_exclude = ["*"]
  datacenter_metric_exclude = ["*"]
  resourcepool_metric_exclude = ["*"]
  vsan_metric_exclude = ["*"]
  collect_concurrency = 5
  discover_concurrency = 5
@ -436,14 +446,14 @@ instance. For example:
  collect_concurrency = 3
 ```
-### Configuring max_query_metrics setting
+### Configuring max_query_metrics Setting
 The `max_query_metrics` determines the maximum number of metrics to attempt to
 retrieve in one call to vCenter. Generally speaking, a higher number means
 faster and more efficient queries. However, the number of allowed metrics in a
 query is typically limited in vCenter by the `config.vpxd.stats.maxQueryMetrics`
-setting in vCenter. The value defaults to 64 on vSphere 5.5 and older and 256 on
+setting in vCenter. The value defaults to 64 on vSphere 5.5 and earlier and to
-newver versions of vCenter. The vSphere plugin always checks this setting and
+256 on more recent versions. The vSphere plugin always checks this setting and
 will automatically reduce the number if the limit configured in vCenter is lower
 than max_query_metrics in the plugin. This will result in a log message similar
 to this:
@ -455,15 +465,15 @@ to this:
 You may ask a vCenter administrator to increase this limit to help boost
 performance.
-### Cluster metrics and the max_query_metrics setting
+### Cluster Metrics and the max_query_metrics Setting
 Cluster metrics are handled a bit differently by vCenter. They are aggregated
 from ESXi and virtual machine metrics and may not be available when you query
 their most recent values. When this happens, vCenter will attempt to perform
-that aggregation on the fly. Unfortunately, all the subqueries needed internally
+that aggregation on the fly. Unfortunately, all the subqueries needed
-in vCenter to perform this aggregation will count towards
+internally in vCenter to perform this aggregation will count towards
-`config.vpxd.stats.maxQueryMetrics`. This means that even a very small query may
+`config.vpxd.stats.maxQueryMetrics`. This means that even a very small query
-result in an error message similar to this:
+may result in an error message similar to this:
 ```text
 2018-11-02T13:37:11Z E! Error in plugin [inputs.vsphere]: ServerFaultCode: This operation is restricted by the administrator - 'vpxd.stats.maxQueryMetrics'. Contact your system administrator
@ -474,22 +484,22 @@ There are two ways of addressing this:
 * Ask your vCenter administrator to set `config.vpxd.stats.maxQueryMetrics` to a number that's higher than the total number of virtual machines managed by a vCenter instance.
 * Exclude the cluster metrics and use either the basicstats aggregator to calculate sums and averages per cluster or use queries in the visualization tool to obtain the same result.
-### Concurrency settings
+### Concurrency Settings
 The vSphere plugin allows you to specify two concurrency settings:
 * `collect_concurrency`: The maximum number of simultaneous queries for performance metrics allowed per resource type.
-* `discover_concurrency`: The  maximum number of simultaneous queries for resource discovery allowed.
+* `discover_concurrency`: The maximum number of simultaneous queries for resource discovery allowed.
 While a higher level of concurrency typically has a positive impact on
 performance, increasing these numbers too much can cause performance issues at
 the vCenter server. A rule of thumb is to set these parameters to the number of
 virtual machines divided by 1500 and rounded up to the nearest integer.
-### Configuring historical_interval setting
+### Configuring historical_interval Setting
 When the vSphere plugin queries vCenter for historical statistics it queries for
-statistics that exist at a specific interval.  The default historical interval
+statistics that exist at a specific interval. The default historical interval
 duration is 5 minutes but if this interval has been changed then you must
 override the default query interval in the vSphere plugin.
@ -569,6 +579,230 @@ For a detailed list of commonly available metrics, please refer to
 * virtualDisk stats for VM
  * disk (name of virtual disk)
 ## Add a vSAN extension
 A vSAN resource is a special type of resource that can be collected by the
 plugin. The configuration of a vSAN resource slightly differs from the
 configuration of hosts, VMs, and other resources.
 ### Prerequisites for vSAN
 * vSphere 6.5 and later
 * Clusters with vSAN enabled
 * [Turn on Virtual SAN performance service](https://docs.vmware.com/en/VMware-vSphere/6.5/com.vmware.vsphere.virtualsan.doc/GUID-02F67DC3-3D5A-48A4-A445-D2BD6AF2862C.html): When you create a vSAN cluster,
 the performance service is disabled. To monitor the performance metrics,
 you must turn on vSAN performance service.
 ### vSAN Configuration
 ```toml
 [[inputs.vsphere]]
  interval = "300s"
  vcenters = ["https://<vcenter-ip>/sdk", "https://<vcenter2-ip>/sdk"]
  username = "<user>"
  password = "<pwd>"
  # Exclude all other metrics
  vm_metric_exclude = ["*"]
  datastore_metric_exclude = ["*"]
  datacenter_metric_exclude = ["*"]
  host_metric_exclude = ["*"]
  cluster_metric_exclude = ["*"]
  # By default all supported entity will be included
  vsan_metric_include = [
    "summary.disk-usage",
    "summary.health",
    "summary.resync",
    "performance.cluster-domclient",
    "performance.cluster-domcompmgr",
    "performance.host-domclient",
    "performance.host-domcompmgr",
    "performance.cache-disk",
    "performance.disk-group",
    "performance.capacity-disk",
    "performance.disk-group",
    "performance.virtual-machine",
    "performance.vscsi",
    "performance.virtual-disk",
    "performance.vsan-host-net",
    "performance.vsan-vnic-net",
    "performance.vsan-pnic-net",
    "performance.vsan-iscsi-host",
    "performance.vsan-iscsi-target",
    "performance.vsan-iscsi-lun",
    "performance.lsom-world-cpu",
    "performance.nic-world-cpu",
    "performance.dom-world-cpu",
    "performance.cmmds-world-cpu",
    "performance.host-cpu",
    "performance.host-domowner",
    "performance.host-memory-slab",
    "performance.host-memory-heap",
    "performance.system-mem",
  ]
  # by default vsan_metric_skip_verify = false
  vsan_metric_skip_verify = true
  vsan_metric_exclude = [ ]
  # vsan_cluster_include = [ "/*/host/**" ] # Inventory path to clusters to collect (by default all are collected)
  collect_concurrency = 5
  discover_concurrency = 5
  ## Optional SSL Config
  # ssl_ca = "/path/to/cafile"
  # ssl_cert = "/path/to/certfile"
  # ssl_key = "/path/to/keyfile"
  ## Use SSL but skip chain & host verification
  # insecure_skip_verify = false
 ```
 * Use `vsan_metric_include = [...]` to define the vSAN metrics that you want to collect.
 For example, `vsan_metric_include = ["summary.*", "performance.host-domclient", "performance.cache-disk", "performance.disk-group", "performance.capacity-disk"]`.
 To include all supported vSAN metrics, use `vsan_metric_include = [ "*" ]`.
 To disable all the vSAN metrics, use `vsan_metric_exclude = [ "*" ]`.
 * `vsan_metric_skip_verify` defines whether to skip verifying vSAN metrics against the ones from [GetSupportedEntityTypes API](https://code.vmware.com/apis/48/vsan#/doc/vim.cluster.VsanPerformanceManager.html#getSupportedEntityTypes).
 This option is given because some performance entities are not returned by the API, but we want to offer the flexibility if you really need the stats.
 When set to false, anything not in the supported entity list will be filtered out.
 When set to true, queried metrics will be identical to vsan_metric_include and the exclusive array will not be used in this case. By default the value is false.
 * `vsan_cluster_include` defines a list of inventory paths that will be used to select a portion of vSAN clusters.
 vSAN metrics are only collected on the cluster level. Therefore, use the same way as inventory paths for [vSphere clusters](README.md#inventory-paths).
 * Many vCenter environments use self-signed certificates. Update the bottom portion of the above configuration and provide proper values for all applicable SSL Config settings that apply in your vSphere environment. In some environments, setting insecure_skip_verify = true will be necessary when the SSL certificates are not available.
 * To ensure consistent collection in larger vSphere environments, you must increase concurrency for the plugin. Use the collect_concurrency setting to control concurrency. Set collect_concurrency to the number of virtual machines divided by 1500 and rounded up to the nearest integer. For example, for 1200 VMs use 1, and for 2300 VMs use 2.
 ### Measurements & Fields
 **NOTE**: Depending on the vSAN version, the vSAN performance measurements
 and fields may vary.
 * vSAN Summary
  * overall_health
  * total_capacity_bytes, free_capacity_bytes
  * total_bytes_to_sync, total_objects_to_sync, total_recovery_eta
 * vSAN Performance
  * cluster-domclient
    * iops_read, throughput_read, latency_avg_read, iops_write, throughput_write, latency_avg_write, congestion, oio
  * cluster-domcompmgr
    * iops_read, throughput_read, latency_avg_read, iops_write, throughput_write, latency_avg_write, iops_rec_write, throughput_rec_write, latency_avg_rec_write, congestion, oio, iops_resync_read, tput_resync_read, lat_avg_resyncread
  * host-domclient
    * iops_read, throughput_read, latency_avg_read, read_count, iops_write, throughput_write, latency_avg_write, write_count, congestion, oio, client_cache_hits, client_cache_hit_rate
  * host-domcompmgr
    * iops_read, throughput_read, latency_avg_read, read_count, iops_write, throughput_write, latency_avg_write, write_count, iops_rec_write, throughput_rec_write, latency_avg_rec_write, rec_write_count congestion, oio, iops_resync_read, tput_resync_read, lat_avg_resync_read
  * cache-disk
    * iops_dev_read, throughput_dev_read, latency_dev_read, io_count_dev_read, iops_dev_write, throughput_dev_write, latency_dev_write, io_count_dev_write, latency_dev_d_avg, latency_dev_g_avg
  * capacity-disk
    * iops_dev_read, throughput_dev_read, latency_dev_read, io_count_dev_read, iops_dev_write, throughput_dev_write, latency_dev_write, io_count_dev_write, latency_dev_d_avg, latency_dev_g_avg, iops_read, latency_read, io_count_read, iops_write, latency_write, io_count_write
  * disk-group
    * iops_sched, latency_sched, outstanding_bytes_sched, iops_sched_queue_rec, throughput_sched_queue_rec,latency_sched_queue_rec, iops_sched_queue_vm, throughput_sched_queue_vm,latency_sched_queue_vm, iops_sched_queue_meta, throughput_sched_queue_meta,latency_sched_queue_meta, iops_delay_pct_sched, latency_delay_sched, rc_hit_rate, wb_free_pct, war_evictions, quota_evictions, iops_rc_read, latency_rc_read, io_count_rc_read, iops_wb_read, latency_wb_read, io_count_wb_read, iops_rc_write, latency_rc_write, io_count_rc_write, iops_wb_write, latency_wb_write, io_count_wb_write, ssd_bytes_drained, zero_bytes_drained, mem_congestion, slab_congestion, ssd_congestion, iops_congestion, log_congestion, comp_congestion, iops_direct_sched, iops_read, throughput_read, latency_avg_read, read_count, iops_write, throughput_write, latency_avg_write, write_count, oio_write, oio_rec_write, oio_write_size, oio_rec_write_size, rc_size, wb_size, capacity, capacity_used, capacity_reserved, throughput_sched, iops_resync_read_policy, iops_resync_read_decom, iops_resync_read_rebalance, iops_resync_read_fix_comp, iops_resync_write_policy, iops_resync_write_decom, iops_resync_write_rebalance, iops_resync_write_fix_comp, tput_resync_read_policy, tput_resync_read_decom, tput_resync_read_rebalance, tput_resync_read_fix_comp, tput_resync_write_policy, tput_resync_write_decom, tput_resync_write_rebalance, tput_resync_write_fix_comp, lat_resync_read_policy, lat_resync_read_decom, lat_resync_read_rebalance, lat_resync_read_fix_comp, lat_resync_write_policy, lat_resync_write_decom, lat_resync_write_rebalance, lat_resync_write_fix_comp
  * virtual-machine
    * iops_read, throughput_read, latency_read_avg, latency_read_stddev, read_count, iops_write, throughput_write, latency_write_avg, latency_write_stddev, write_count
  * vscsi
    * iops_read, throughput_read, latency_read, read_count, iops_write, throughput_write, latency_write, write_count
  * virtual-disk
    * iops_limit, niops, niops_delayed
  * vsan-host-net
    * rx_throughput, rx_packets, rx_packets_loss_rate, tx_throughput, tx_packets, tx_packets_loss_rate
  * vsan-vnic-net
    * rx_throughput, rx_packets, rx_packets_loss_rate, tx_throughput, tx_packets, tx_packets_loss_rate
  * vsan-pnic-net
    * rx_throughput, rx_packets, rx_packets_loss_rate, tx_throughput, tx_packets, tx_packets_loss_rate
  * vsan-iscsi-host
    * iops_read, iops_write, iops_total, bandwidth_read, bandwidth_write, bandwidth_total, latency_read, latency_write, latency_total, queue_depth
  * vsan-iscsi-target
    * iops_read, iops_write, iops_total, bandwidth_read, bandwidth_write, bandwidth_total, latency_read, latency_write, latency_total, queue_depth
  * vsan-iscsi-lun
    * iops_read, iops_write, iops_total, bandwidth_read, bandwidth_write, bandwidth_total, latency_read, latency_write, latency_total, queue_depth
 ### vSAN Tags
 * all vSAN metrics
  * vcenter
  * dcname
  * clustername
  * moid (the cluster's managed object id)
 * host-domclient, host-domcompmgr
  * hostname
 * disk-group, cache-disk, capacity-disk
  * hostname
  * deviceName
  * ssdUuid (if SSD)
 * vsan-host-net
  * hostname
 * vsan-pnic-net
  * pnic
 * vsan-vnic-net
  * vnic
  * stackName
 ### Realtime vs. Historical Metrics in vSAN
 vSAN metrics also keep two different kinds of metrics - realtime and
 historical metrics.
 * Realtime metrics are metrics with the prefix 'summary'. These metrics are available in realtime.
 * Historical metrics are metrics with the prefix 'performance'. These are metrics queried from vSAN performance API, which is available at a 5-minute rollup level.
 For performance consideration, it is better to specify two instances of the
 plugin, one for the realtime metrics with a short collection interval,
 and the second one - for the historical metrics with a longer interval.
 For example:
 ```toml
 ## Realtime instance
 [[inputs.vsphere]]
  interval = "30s"
  vcenters = [ "https://someaddress/sdk" ]
  username = "someuser@vsphere.local"
  password = "secret"
  insecure_skip_verify = true
  force_discover_on_init = true
  # Exclude all other metrics
  vm_metric_exclude = ["*"]
  datastore_metric_exclude = ["*"]
  datacenter_metric_exclude = ["*"]
  host_metric_exclude = ["*"]
  cluster_metric_exclude = ["*"]
  vsan_metric_include = [ "summary.*" ]
  vsan_metric_exclude = [ ]
  vsan_metric_skip_verify = false
  collect_concurrency = 5
  discover_concurrency = 5
 # Historical instance
 [[inputs.vsphere]]
  interval = "300s"
  vcenters = [ "https://someaddress/sdk" ]
  username = "someuser@vsphere.local"
  password = "secret"
  insecure_skip_verify = true
  force_discover_on_init = true
  # Exclude all other metrics
  vm_metric_exclude = ["*"]
  datastore_metric_exclude = ["*"]
  datacenter_metric_exclude = ["*"]
  host_metric_exclude = ["*"]
  cluster_metric_exclude = ["*"]
  vsan_metric_include = [ "performance.*" ]
  vsan_metric_exclude = [ ]
  vsan_metric_skip_verify = false
  collect_concurrency = 5
  discover_concurrency = 5
 ```
 ## Example Output
 ```text
@ -677,3 +911,14 @@ vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,
 vsphere_host_mem,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 usage_average=116.21 1535660339000000000
 vsphere_host_net,clustername=DC0_C0,esxhostname=DC0_C0_H0,host=host.example.com,moid=host-30,os=Mac,source=DC0_C0_H0,vcenter=localhost:8989 bytesRx_average=726i,bytesTx_average=643i,usage_average=1504i 1535660339000000000
 ```
 ## vSAN Sample Output
 ```text
 vsphere_vsan_performance_hostdomclient,clustername=Example-VSAN,dcname=Example-DC,host=host.example.com,hostname=DC0_C0_H0,moid=domain-c8,source=Example-VSAN,vcenter=localhost:8898 iops_read=7,write_congestion=0,unmap_congestion=0,read_count=2199,iops=8,latency_max_write=8964,latency_avg_unmap=0,latency_avg_write=1883,write_count=364,num_oio=12623,throughput=564127,client_cache_hits=0,latency_max_read=17821,latency_max_unmap=0,read_congestion=0,latency_avg=1154,congestion=0,throughput_read=554721,latency_avg_read=1033,throughput_write=9406,client_cache_hit_rate=0,iops_unmap=0,throughput_unmap=0,latency_stddev=1315,io_count=2563,oio=4,iops_write=1,unmap_count=0 1578955200000000000
 vsphere_vsan_performance_clusterdomcompmgr,clustername=Example-VSAN,dcname=Example-DC,host=host.example.com,moid=domain-c7,source=Example-VSAN,uuid=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX,vcenter=localhost:8898 latency_avg_rec_write=0,latency_avg_write=9886,congestion=0,iops_resync_read=0,lat_avg_resync_read=0,iops_read=289,latency_avg_read=1184,throughput_write=50137368,iops_rec_write=0,throughput_rec_write=0,tput_resync_read=0,throughput_read=9043654,iops_write=1272,oio=97 1578954900000000000
 vsphere_vsan_performance_clusterdomclient,clustername=Example-VSAN,dcname=Example-DC,host=host.example.com,moid=domain-c7,source=Example-VSAN,uuid=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXX,vcenter=localhost:8898 latency_avg_write=1011,congestion=0,oio=26,iops_read=6,throughput_read=489093,latency_avg_read=1085,iops_write=43,throughput_write=435142 1578955200000000000
 vsphere_vsan_summary,clustername=Example-VSAN,dcname=Example-DC,host=host.example.com,moid=domain-c7,source=Example-VSAN,vcenter=localhost:8898 total_bytes_to_sync=0i,total_objects_to_sync=0i,total_recovery_eta=0i 1578955489000000000
 vsphere_vsan_summary,clustername=Example-VSAN,dcname=Example-DC,host=host.example.com,moid=domain-c7,source=Example-VSAN,vcenter=localhost:8898 overall_health=1i 1578955489000000000
 vsphere_vsan_summary,clustername=Example-VSAN,dcname=Example-DC,host=host.example.com,moid=domain-c7,source=Example-VSAN,vcenter=localhost:8898 free_capacity_byte=11022535578757i,total_capacity_byte=14102625779712i 1578955488000000000
 ```
--- a/plugins/inputs/vsphere/endpoint.go
+++ b/plugins/inputs/vsphere/endpoint.go
@ -60,6 +60,7 @@ type Endpoint struct {
 	metricNameLookup  map[int32]string
 	metricNameMux     sync.RWMutex
 	log               telegraf.Logger
 	apiVersion        string
 }
 type resourceKind struct {
@ -237,6 +238,23 @@ func NewEndpoint(ctx context.Context, parent *VSphere, address *url.URL, log tel
 			getObjects:       getDatastores,
 			parent:           "",
 		},
 		"vsan": {
 			name:             "vsan",
 			vcName:           "ClusterComputeResource",
 			pKey:             "clustername",
 			parentTag:        "dcname",
 			enabled:          anythingEnabled(parent.VSANMetricExclude),
 			realTime:         false,
 			sampling:         300,
 			objects:          make(objectMap),
 			filters:          newFilterOrPanic(parent.VSANMetricInclude, parent.VSANMetricExclude),
 			paths:            parent.VSANClusterInclude,
 			simple:           parent.VSANMetricSkipVerify,
 			include:          parent.VSANMetricInclude,
 			collectInstances: false,
 			getObjects:       getClusters,
 			parent:           "datacenter",
 		},
 	}
 	// Start discover and other goodness
@ -445,7 +463,10 @@ func (e *Endpoint) discover(ctx context.Context) error {
 		return err
 	}
-	e.log.Debugf("Discover new objects for %s", e.URL.Host)
+	// get the vSphere API version
 	e.apiVersion = client.Client.ServiceContent.About.ApiVersion
 	e.Parent.Log.Debugf("Discover new objects for %s", e.URL.Host)
 	dcNameCache := make(map[string]string)
 	numRes := int64(0)
@ -455,7 +476,7 @@ func (e *Endpoint) discover(ctx context.Context) error {
 	for k, res := range e.resourceKinds {
 		e.log.Debugf("Discovering resources for %s", res.name)
 		// Need to do this for all resource types even if they are not enabled
-		if res.enabled || k != "vm" {
+		if res.enabled || (k != "vm" && k != "vsan") {
 			rf := ResourceFilter{
 				finder:       &Finder{client},
 				resType:      res.vcName,
@ -480,7 +501,8 @@ func (e *Endpoint) discover(ctx context.Context) error {
 			}
 			// No need to collect metric metadata if resource type is not enabled
-			if res.enabled {
+			// VSAN is also skipped since vSAN metadata follow it's own format
 			if res.enabled && k != "vsan" {
 				if res.simple {
 					e.simpleMetadataSelect(ctx, client, res)
 				} else {
@ -935,7 +957,12 @@ func (e *Endpoint) Collect(ctx context.Context, acc telegraf.Accumulator) error
 			wg.Add(1)
 			go func(k string) {
 				defer wg.Done()
-				err := e.collectResource(ctx, k, acc)
+				var err error
 				if k == "vsan" {
 					err = e.collectVsan(ctx, acc)
 				} else {
 					err = e.collectResource(ctx, k, acc)
 				}
 				if err != nil {
 					acc.AddError(err)
 				}
--- a/plugins/inputs/vsphere/sample.conf
+++ b/plugins/inputs/vsphere/sample.conf
@ -136,6 +136,12 @@
  datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
  # datacenter_instances = false ## false by default
  ## VSAN
  # vsan_metric_include = [] ## if omitted or empty, all metrics are collected
  # vsan_metric_exclude = [ "*" ] ## vSAN are not collected by default.
  ## Whether to skip verifying vSAN metrics against the ones from GetSupportedEntityTypes API.
  # vsan_metric_skip_verify = false ## false by default.
  ## Plugin Settings
  ## separator character to use for measurement and field names (default: "_")
  # separator = "_"
--- a/plugins/inputs/vsphere/vsan.go
+++ b/plugins/inputs/vsphere/vsan.go
@ -0,0 +1,520 @@
 package vsphere
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"strconv"
 	"strings"
 	"time"
 	"github.com/coreos/go-semver/semver"
 	"github.com/vmware/govmomi/object"
 	"github.com/vmware/govmomi/vim25"
 	"github.com/vmware/govmomi/vim25/methods"
 	"github.com/vmware/govmomi/vim25/soap"
 	"github.com/vmware/govmomi/vim25/types"
 	vsanmethods "github.com/vmware/govmomi/vsan/methods"
 	vsantypes "github.com/vmware/govmomi/vsan/types"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/internal"
 )
 const (
 	vsanNamespace    = "vsan"
 	vsanPath         = "/vsanHealth"
 	hwMarksKeyPrefix = "vsan-perf-"
 	perfPrefix       = "performance."
 )
 var (
 	vsanPerfMetricsName    string
 	vsanSummaryMetricsName string
 	perfManagerRef         = types.ManagedObjectReference{
 		Type:  "VsanPerformanceManager",
 		Value: "vsan-performance-manager",
 	}
 	hyphenReplacer = strings.NewReplacer("-", "")
 )
 // collectVsan is the entry point for vsan metrics collection
 func (e *Endpoint) collectVsan(ctx context.Context, acc telegraf.Accumulator) error {
 	//resourceType := "vsan"
 	lower := versionLowerThan(e.apiVersion, "5.5")
 	if lower {
 		return fmt.Errorf("a minimum API version of 5.5 is required for vSAN. Found: %s. Skipping vCenter: %s", e.apiVersion, e.URL.Host)
 	}
 	vsanPerfMetricsName = strings.Join([]string{"vsphere", "vsan", "performance"}, e.Parent.Separator)
 	vsanSummaryMetricsName = strings.Join([]string{"vsphere", "vsan", "summary"}, e.Parent.Separator)
 	res := e.resourceKinds["vsan"]
 	client, err := e.clientFactory.GetClient(ctx)
 	if err != nil {
 		return fmt.Errorf("fail to get client when collect vsan: %w", err)
 	}
 	// Create vSAN client
 	vimClient := client.Client.Client
 	vsanClient := vimClient.NewServiceClient(vsanPath, vsanNamespace)
 	// vSAN Metrics to collect
 	metrics := e.getVsanMetadata(ctx, vsanClient, res)
 	// Iterate over all clusters, run a goroutine for each cluster
 	te := NewThrottledExecutor(e.Parent.CollectConcurrency)
 	for _, obj := range res.objects {
 		te.Run(ctx, func() {
 			e.collectVsanPerCluster(ctx, obj, vimClient, vsanClient, metrics, acc)
 		})
 	}
 	te.Wait()
 	return nil
 }
 // collectVsanPerCluster is called by goroutines in collectVsan function.
 func (e *Endpoint) collectVsanPerCluster(ctx context.Context, clusterRef *objectRef, vimClient *vim25.Client, vsanClient *soap.Client,
 	metrics map[string]string, acc telegraf.Accumulator) {
 	// Construct a map for cmmds
 	cluster := object.NewClusterComputeResource(vimClient, clusterRef.ref)
 	if !e.vsanEnabled(ctx, cluster) {
 		acc.AddError(fmt.Errorf("[vSAN] Fail to identify vSAN for cluster %s. Skipping", clusterRef.name))
 		return
 	}
 	// Do collection
 	if _, ok := metrics["summary.disk-usage"]; ok {
 		if err := e.queryDiskUsage(ctx, vsanClient, clusterRef, acc); err != nil {
 			acc.AddError(fmt.Errorf("error querying disk usage for cluster %s: %w", clusterRef.name, err))
 		}
 	}
 	if _, ok := metrics["summary.health"]; ok {
 		if err := e.queryHealthSummary(ctx, vsanClient, clusterRef, acc); err != nil {
 			acc.AddError(fmt.Errorf("error querying vsan health summary for cluster %s: %w", clusterRef.name, err))
 		}
 	}
 	if _, ok := metrics["summary.resync"]; ok {
 		if err := e.queryResyncSummary(ctx, vsanClient, cluster, clusterRef, acc); err != nil {
 			acc.AddError(fmt.Errorf("error querying vsan resync summary for cluster %s: %w", clusterRef.name, err))
 		}
 	}
 	cmmds, err := getCmmdsMap(ctx, vimClient, cluster)
 	if err != nil {
 		e.Parent.Log.Errorf("[vSAN] Error while query cmmds data. Error: %s. Skipping", err)
 		cmmds = make(map[string]CmmdsEntity)
 	}
 	if err := e.queryPerformance(ctx, vsanClient, clusterRef, metrics, cmmds, acc); err != nil {
 		acc.AddError(fmt.Errorf("error querying performance metrics for cluster %s: %w", clusterRef.name, err))
 	}
 }
 // vsanEnabled returns True if vSAN is enabled, otherwise False
 func (e *Endpoint) vsanEnabled(ctx context.Context, clusterObj *object.ClusterComputeResource) bool {
 	config, err := clusterObj.Configuration(ctx)
 	if err != nil {
 		return false
 	}
 	enabled := config.VsanConfigInfo.Enabled
 	return enabled != nil && *enabled
 }
 // getVsanMetadata returns a string list of the entity types that will be queried.
 // e.g ["summary.health", "summary.disk-usage", "summary.resync", "performance.cluster-domclient", "performance.host-domclient"]
 func (e *Endpoint) getVsanMetadata(ctx context.Context, vsanClient *soap.Client, res *resourceKind) map[string]string {
 	metrics := make(map[string]string)
 	if res.simple { // Skip getting supported Entity types from vCenter. Using user defined metrics without verifying.
 		for _, entity := range res.include {
 			if strings.Contains(entity, "*") {
 				e.Parent.Log.Infof("[vSAN] Won't use wildcard match \"*\" when vsan_metric_skip_verify = true. Skipping")
 				continue
 			}
 			metrics[entity] = ""
 		}
 		return metrics
 	}
 	// Use the include & exclude configuration to filter all summary metrics
 	for _, entity := range []string{"summary.health", "summary.disk-usage", "summary.resync"} {
 		if res.filters.Match(entity) {
 			metrics[entity] = ""
 		}
 	}
 	resp, err := vsanmethods.VsanPerfGetSupportedEntityTypes(ctx, vsanClient,
 		&vsantypes.VsanPerfGetSupportedEntityTypes{
 			This: perfManagerRef,
 		})
 	if err != nil {
 		e.Parent.Log.Errorf("[vSAN] Fail to get supported entities: %v. Skipping vsan performance data.", err)
 		return metrics
 	}
 	// Use the include & exclude configuration to filter all supported performance metrics
 	for _, entity := range resp.Returnval {
 		if res.filters.Match(perfPrefix + entity.Name) {
 			metrics[perfPrefix+entity.Name] = ""
 		}
 	}
 	return metrics
 }
 // getCmmdsMap returns a map which maps a uuid to a CmmdsEntity
 func getCmmdsMap(ctx context.Context, client *vim25.Client, clusterObj *object.ClusterComputeResource) (map[string]CmmdsEntity, error) {
 	hosts, err := clusterObj.Hosts(ctx)
 	if err != nil {
 		return nil, fmt.Errorf("fail to get host: %w", err)
 	}
 	if len(hosts) == 0 {
 		return make(map[string]CmmdsEntity), nil
 	}
 	queries := []types.HostVsanInternalSystemCmmdsQuery{
 		{Type: "HOSTNAME"},
 		{Type: "DISK"},
 	}
 	//Some esx host can be down or in maintenance mode. Hence cmmds query might fail on such hosts.
 	// We iterate until be get proper api response
 	var resp *types.QueryCmmdsResponse
 	for _, host := range hosts {
 		vis, err := host.ConfigManager().VsanInternalSystem(ctx)
 		if err != nil {
 			continue
 		}
 		request := types.QueryCmmds{
 			This:    vis.Reference(),
 			Queries: queries,
 		}
 		resp, err = methods.QueryCmmds(ctx, client.RoundTripper, &request)
 		if err == nil {
 			break
 		}
 	}
 	if resp == nil {
 		return nil, fmt.Errorf("all hosts fail to query cmmds")
 	}
 	var clusterCmmds Cmmds
 	if err := json.Unmarshal([]byte(resp.Returnval), &clusterCmmds); err != nil {
 		return nil, fmt.Errorf("fail to convert cmmds to json: %w", err)
 	}
 	cmmdsMap := make(map[string]CmmdsEntity)
 	for _, entity := range clusterCmmds.Res {
 		cmmdsMap[entity.UUID] = entity
 	}
 	return cmmdsMap, nil
 }
 // queryPerformance adds performance metrics to telegraf accumulator
 func (e *Endpoint) queryPerformance(ctx context.Context, vsanClient *soap.Client, clusterRef *objectRef, metrics map[string]string,
 	cmmds map[string]CmmdsEntity, acc telegraf.Accumulator) error {
 	end := time.Now().UTC()
 	// We're using a fake metric key, since we only store one highwater mark per resource
 	start, ok := e.hwMarks.Get(hwMarksKeyPrefix+clusterRef.ref.Value, "generic")
 	if !ok {
 		// Look back 3 sampling periods by default
 		start = end.Add(time.Duration(e.Parent.MetricLookback) * time.Duration(-e.resourceKinds["vsan"].sampling) * time.Second)
 	}
 	e.Parent.Log.Debugf("[vSAN] Query vsan performance for time interval: %s ~ %s", start, end)
 	latest := start
 	var commonError error
 	for entityRefID := range metrics {
 		if !strings.HasPrefix(entityRefID, perfPrefix) {
 			continue
 		}
 		entityRefID = strings.TrimPrefix(entityRefID, perfPrefix)
 		var perfSpecs []vsantypes.VsanPerfQuerySpec
 		perfSpec := vsantypes.VsanPerfQuerySpec{
 			EntityRefId: entityRefID + ":*",
 			StartTime:   &start,
 			EndTime:     &end,
 		}
 		perfSpecs = append(perfSpecs, perfSpec)
 		perfRequest := vsantypes.VsanPerfQueryPerf{
 			This:       perfManagerRef,
 			QuerySpecs: perfSpecs,
 			Cluster:    &clusterRef.ref,
 		}
 		resp, err := vsanmethods.VsanPerfQueryPerf(ctx, vsanClient, &perfRequest)
 		if err != nil {
 			if err.Error() == "ServerFaultCode: NotFound" {
 				e.Parent.Log.Errorf("[vSAN] Is vSAN performance service enabled for %s? Skipping ...", clusterRef.name)
 				commonError = err
 				break
 			}
 			e.Parent.Log.Errorf("[vSAN] Error querying performance data for %s: %s: %s.", clusterRef.name, entityRefID, err)
 			continue
 		}
 		tags := populateClusterTags(make(map[string]string), clusterRef, e.URL.Host)
 		count := 0
 		for _, em := range resp.Returnval {
 			vals := strings.Split(em.EntityRefId, ":")
 			var entityName, uuid string
 			if len(vals) == 1 {
 				entityName, uuid = vals[0], ""
 			} else {
 				entityName, uuid = vals[0], vals[1]
 			}
 			buckets := make(map[string]metricEntry)
 			tags := populateCMMDSTags(tags, entityName, uuid, cmmds)
 			var timeStamps []time.Time
 			// 1. Construct a timestamp list from sample info
 			formattedEntityName := hyphenReplacer.Replace(entityName)
 			for _, t := range strings.Split(em.SampleInfo, ",") {
 				// Parse the input string to a time.Time object
 				utcTimeStamp, err := time.Parse("2006-01-02 15:04:05", t)
 				if err != nil {
 					e.Parent.Log.Errorf("[vSAN] Failed to parse a timestamp: %s. Skipping", utcTimeStamp)
 					timeStamps = append(timeStamps, time.Time{})
 					continue
 				}
 				timeStamps = append(timeStamps, utcTimeStamp)
 			}
 			// 2. Iterate on each measurement
 			for _, counter := range em.Value {
 				metricLabel := internal.SnakeCase(counter.MetricId.Label)
 				// 3. Iterate on each data point.
 				for i, values := range strings.Split(counter.Values, ",") {
 					ts := timeStamps[i]
 					if ts.IsZero() {
 						continue
 					}
 					// Organize the metrics into a bucket per measurement.
 					bKey := em.EntityRefId + " " + strconv.FormatInt(ts.UnixNano(), 10)
 					bucket, found := buckets[bKey]
 					if !found {
 						mn := vsanPerfMetricsName + e.Parent.Separator + formattedEntityName
 						bucket = metricEntry{name: mn, ts: ts, fields: make(map[string]interface{}), tags: tags}
 						buckets[bKey] = bucket
 					}
 					if v, err := strconv.ParseFloat(values, 32); err == nil {
 						bucket.fields[metricLabel] = v
 					}
 				}
 			}
 			if len(timeStamps) > 0 {
 				lastSample := timeStamps[len(timeStamps)-1]
 				if lastSample != (time.Time{}) && lastSample.After(latest) {
 					latest = lastSample
 				}
 			}
 			// We've iterated through all the metrics and collected buckets for each measurement name. Now emit them!
 			for _, bucket := range buckets {
 				acc.AddFields(bucket.name, bucket.fields, bucket.tags, bucket.ts)
 			}
 			count += len(buckets)
 		}
 	}
 	e.hwMarks.Put(hwMarksKeyPrefix+clusterRef.ref.Value, "generic", latest)
 	return commonError
 }
 // queryDiskUsage adds 'FreeCapacityB' and 'TotalCapacityB' metrics to telegraf accumulator
 func (e *Endpoint) queryDiskUsage(ctx context.Context, vsanClient *soap.Client, clusterRef *objectRef, acc telegraf.Accumulator) error {
 	spaceManagerRef := types.ManagedObjectReference{
 		Type:  "VsanSpaceReportSystem",
 		Value: "vsan-cluster-space-report-system",
 	}
 	resp, err := vsanmethods.VsanQuerySpaceUsage(ctx, vsanClient,
 		&vsantypes.VsanQuerySpaceUsage{
 			This:    spaceManagerRef,
 			Cluster: clusterRef.ref,
 		})
 	if err != nil {
 		return err
 	}
 	fields := map[string]interface{}{
 		"free_capacity_byte":  resp.Returnval.FreeCapacityB,
 		"total_capacity_byte": resp.Returnval.TotalCapacityB,
 	}
 	tags := populateClusterTags(make(map[string]string), clusterRef, e.URL.Host)
 	acc.AddFields(vsanSummaryMetricsName, fields, tags)
 	return nil
 }
 // queryDiskUsage adds 'OverallHealth' metric to telegraf accumulator
 func (e *Endpoint) queryHealthSummary(ctx context.Context, vsanClient *soap.Client, clusterRef *objectRef, acc telegraf.Accumulator) error {
 	healthSystemRef := types.ManagedObjectReference{
 		Type:  "VsanVcClusterHealthSystem",
 		Value: "vsan-cluster-health-system",
 	}
 	fetchFromCache := true
 	resp, err := vsanmethods.VsanQueryVcClusterHealthSummary(ctx, vsanClient,
 		&vsantypes.VsanQueryVcClusterHealthSummary{
 			This:           healthSystemRef,
 			Cluster:        &clusterRef.ref,
 			Fields:         []string{"overallHealth", "overallHealthDescription"},
 			FetchFromCache: &fetchFromCache,
 		})
 	if err != nil {
 		return err
 	}
 	healthStr := resp.Returnval.OverallHealth
 	healthMap := map[string]int{"red": 2, "yellow": 1, "green": 0}
 	fields := make(map[string]interface{})
 	if val, ok := healthMap[healthStr]; ok {
 		fields["overall_health"] = val
 	}
 	tags := populateClusterTags(make(map[string]string), clusterRef, e.URL.Host)
 	acc.AddFields(vsanSummaryMetricsName, fields, tags)
 	return nil
 }
 // queryResyncSummary adds resync information to accumulator
 func (e *Endpoint) queryResyncSummary(ctx context.Context, vsanClient *soap.Client, clusterObj *object.ClusterComputeResource,
 	clusterRef *objectRef, acc telegraf.Accumulator) error {
 	if lower := versionLowerThan(e.apiVersion, "6.7"); lower {
 		e.Parent.Log.Infof("I! [inputs.vsphere][vSAN] Minimum API Version 6.7 required for resync summary. Found: %s. Skipping VCenter: %s",
 			e.apiVersion, e.URL.Host)
 		return nil
 	}
 	hosts, err := clusterObj.Hosts(ctx)
 	if err != nil {
 		return err
 	}
 	if len(hosts) == 0 {
 		return nil
 	}
 	hostRefValue := hosts[0].Reference().Value
 	hostRefValueParts := strings.Split(hostRefValue, "-")
 	if len(hostRefValueParts) != 2 {
 		e.Parent.Log.Errorf("[vSAN] Host reference value does not match expected pattern:  host-<num>. Actual Value %s", hostRefValue)
 		return err
 	}
 	vsanSystemEx := types.ManagedObjectReference{
 		Type:  "VsanSystemEx",
 		Value: fmt.Sprintf("vsanSystemEx-%s", strings.Split(hostRefValue, "-")[1]),
 	}
 	includeSummary := true
 	request := vsantypes.VsanQuerySyncingVsanObjects{
 		This:           vsanSystemEx,
 		Uuids:          []string{}, // We only need summary information.
 		Start:          0,
 		IncludeSummary: &includeSummary,
 	}
 	resp, err := vsanmethods.VsanQuerySyncingVsanObjects(ctx, vsanClient, &request)
 	if err != nil {
 		return err
 	}
 	fields := make(map[string]interface{})
 	fields["total_bytes_to_sync"] = resp.Returnval.TotalBytesToSync
 	fields["total_objects_to_sync"] = resp.Returnval.TotalObjectsToSync
 	fields["total_recovery_eta"] = resp.Returnval.TotalRecoveryETA
 	tags := populateClusterTags(make(map[string]string), clusterRef, e.URL.Host)
 	acc.AddFields(vsanSummaryMetricsName, fields, tags)
 	return nil
 }
 // populateClusterTags takes in a tag map, makes a copy, populates cluster related tags and returns the copy.
 func populateClusterTags(tags map[string]string, clusterRef *objectRef, vcenter string) map[string]string {
 	newTags := make(map[string]string)
 	// deep copy
 	for k, v := range tags {
 		newTags[k] = v
 	}
 	newTags["vcenter"] = vcenter
 	newTags["dcname"] = clusterRef.dcname
 	newTags["clustername"] = clusterRef.name
 	newTags["moid"] = clusterRef.ref.Value
 	newTags["source"] = clusterRef.name
 	return newTags
 }
 // populateCMMDSTags takes in a tag map, makes a copy, adds more tags using a cmmds map and returns the copy.
 func populateCMMDSTags(tags map[string]string, entityName string, uuid string, cmmds map[string]CmmdsEntity) map[string]string {
 	newTags := make(map[string]string)
 	// deep copy
 	for k, v := range tags {
 		newTags[k] = v
 	}
 	// There are cases when the uuid is missing. (Usually happens when performance service is just enabled or disabled)
 	// We need this check to avoid index-out-of-range error
 	if uuid == "*" || uuid == "" {
 		return newTags
 	}
 	// Add additional tags based on CMMDS data
 	switch {
 	case strings.Contains(entityName, "-disk") || strings.Contains(entityName, "disk-"):
 		if e, ok := cmmds[uuid]; ok {
 			if host, ok := cmmds[e.Owner]; ok {
 				newTags["hostname"] = host.Content.Hostname
 			}
 			newTags["devicename"] = e.Content.DevName
 			if int(e.Content.IsSsd) == 0 {
 				newTags["ssduuid"] = e.Content.SsdUUID
 			}
 		}
 	case strings.Contains(entityName, "host-memory-"):
 		memInfo := strings.Split(uuid, "|")
 		if strings.Contains(entityName, "-slab") && len(memInfo) > 1 {
 			newTags["slabname"] = memInfo[1]
 		}
 		if strings.Contains(entityName, "-heap") && len(memInfo) > 1 {
 			newTags["heapname"] = memInfo[1]
 		}
 		if e, ok := cmmds[memInfo[0]]; ok {
 			newTags["hostname"] = e.Content.Hostname
 		}
 	case strings.Contains(entityName, "host-") || strings.Contains(entityName, "system-mem"):
 		if e, ok := cmmds[uuid]; ok {
 			newTags["hostname"] = e.Content.Hostname
 		}
 	case strings.Contains(entityName, "vnic-net"):
 		nicInfo := strings.Split(uuid, "|")
 		if len(nicInfo) > 2 {
 			newTags["stackname"] = nicInfo[1]
 			newTags["vnic"] = nicInfo[2]
 		}
 		if e, ok := cmmds[nicInfo[0]]; ok {
 			newTags["hostname"] = e.Content.Hostname
 		}
 	case strings.Contains(entityName, "pnic-net"):
 		nicInfo := strings.Split(uuid, "|")
 		if len(nicInfo) > 1 {
 			newTags["pnic"] = nicInfo[1]
 		}
 		if e, ok := cmmds[nicInfo[0]]; ok {
 			newTags["hostname"] = e.Content.Hostname
 		}
 	case strings.Contains(entityName, "world-cpu"):
 		cpuInfo := strings.Split(uuid, "|")
 		if len(cpuInfo) > 1 {
 			newTags["worldname"] = cpuInfo[1]
 		}
 		if e, ok := cmmds[cpuInfo[0]]; ok {
 			newTags["hostname"] = e.Content.Hostname
 		}
 	default:
 		// If no tags are added in previous steps, we add uuid for it
 		if len(newTags) == len(tags) {
 			newTags["uuid"] = uuid
 		}
 	}
 	return newTags
 }
 // versionLowerThan returns true is the current version < a base version
 func versionLowerThan(current string, base string) bool {
 	v1 := semver.New(current)
 	v2 := semver.New(base)
 	return v1.LessThan(*v2)
 }
 type CmmdsEntity struct {
 	UUID    string       `json:"uuid"`
 	Owner   string       `json:"owner"` // ESXi UUID
 	Type    string       `json:"type"`
 	Content CmmdsContent `json:"content"`
 }
 type Cmmds struct {
 	Res []CmmdsEntity `json:"result"`
 }
 type CmmdsContent struct {
 	Hostname string  `json:"hostname"`
 	IsSsd    float64 `json:"isSsd"`
 	SsdUUID  string  `json:"ssdUuid"`
 	DevName  string  `json:"devName"`
 }
--- a/plugins/inputs/vsphere/vsphere.go
+++ b/plugins/inputs/vsphere/vsphere.go
@ -55,6 +55,10 @@ type VSphere struct {
 	DatastoreMetricExclude      []string
 	DatastoreInclude            []string
 	DatastoreExclude            []string
 	VSANMetricInclude           []string `toml:"vsan_metric_include"`
 	VSANMetricExclude           []string `toml:"vsan_metric_exclude"`
 	VSANMetricSkipVerify        bool     `toml:"vsan_metric_skip_verify"`
 	VSANClusterInclude          []string `toml:"vsan_cluster_include"`
 	Separator                   string
 	CustomAttributeInclude      []string
 	CustomAttributeExclude      []string
@ -62,15 +66,14 @@ type VSphere struct {
 	IPAddresses                 []string
 	MetricLookback              int
 	DisconnectedServersBehavior string
-
+	MaxQueryObjects             int
-	MaxQueryObjects         int
+	MaxQueryMetrics             int
-	MaxQueryMetrics         int
+	CollectConcurrency          int
-	CollectConcurrency      int
+	DiscoverConcurrency         int
-	DiscoverConcurrency     int
+	ForceDiscoverOnInit         bool `toml:"force_discover_on_init" deprecated:"1.14.0;option is ignored"`
-	ForceDiscoverOnInit     bool `toml:"force_discover_on_init" deprecated:"1.14.0;option is ignored"`
+	ObjectDiscoveryInterval     config.Duration
-	ObjectDiscoveryInterval config.Duration
+	Timeout                     config.Duration
-	Timeout                 config.Duration
+	HistoricalInterval          config.Duration
 	HistoricalInterval      config.Duration
 	endpoints []*Endpoint
 	cancel    context.CancelFunc
@ -155,38 +158,40 @@ func (v *VSphere) Gather(acc telegraf.Accumulator) error {
 func init() {
 	inputs.Add("vsphere", func() telegraf.Input {
 		return &VSphere{
-			Vcenters: []string{},
+			Vcenters:                    []string{},
-
+			DatacenterInstances:         false,
-			DatacenterInstances:       false,
+			DatacenterMetricInclude:     nil,
-			DatacenterMetricInclude:   nil,
+			DatacenterMetricExclude:     nil,
-			DatacenterMetricExclude:   nil,
+			DatacenterInclude:           []string{"/*"},
-			DatacenterInclude:         []string{"/*"},
+			ClusterInstances:            false,
-			ClusterInstances:          false,
+			ClusterMetricInclude:        nil,
-			ClusterMetricInclude:      nil,
+			ClusterMetricExclude:        nil,
-			ClusterMetricExclude:      nil,
+			ClusterInclude:              []string{"/*/host/**"},
-			ClusterInclude:            []string{"/*/host/**"},
+			HostInstances:               true,
-			HostInstances:             true,
+			HostMetricInclude:           nil,
-			HostMetricInclude:         nil,
+			HostMetricExclude:           nil,
-			HostMetricExclude:         nil,
+			HostInclude:                 []string{"/*/host/**"},
-			HostInclude:               []string{"/*/host/**"},
+			ResourcePoolInstances:       false,
-			ResourcePoolInstances:     false,
+			ResourcePoolMetricInclude:   nil,
-			ResourcePoolMetricInclude: nil,
+			ResourcePoolMetricExclude:   nil,
-			ResourcePoolMetricExclude: nil,
+			ResourcePoolInclude:         []string{"/*/host/**"},
-			ResourcePoolInclude:       []string{"/*/host/**"},
+			VMInstances:                 true,
-			VMInstances:               true,
+			VMMetricInclude:             nil,
-			VMMetricInclude:           nil,
+			VMMetricExclude:             nil,
-			VMMetricExclude:           nil,
+			VMInclude:                   []string{"/*/vm/**"},
-			VMInclude:                 []string{"/*/vm/**"},
+			DatastoreInstances:          false,
-			DatastoreInstances:        false,
+			DatastoreMetricInclude:      nil,
-			DatastoreMetricInclude:    nil,
+			DatastoreMetricExclude:      nil,
-			DatastoreMetricExclude:    nil,
+			DatastoreInclude:            []string{"/*/datastore/**"},
-			DatastoreInclude:          []string{"/*/datastore/**"},
+			VSANMetricInclude:           nil,
-			Separator:                 "_",
+			VSANMetricExclude:           []string{"*"},
-			CustomAttributeInclude:    []string{},
+			VSANMetricSkipVerify:        false,
-			CustomAttributeExclude:    []string{"*"},
+			VSANClusterInclude:          []string{"/*/host/**"},
-			UseIntSamples:             true,
+			Separator:                   "_",
-			IPAddresses:               []string{},
+			CustomAttributeInclude:      []string{},
-
+			CustomAttributeExclude:      []string{"*"},
 			UseIntSamples:               true,
 			IPAddresses:                 []string{},
 			MaxQueryObjects:             256,
 			MaxQueryMetrics:             256,
 			CollectConcurrency:          1,
--- a/plugins/inputs/vsphere/vsphere_test.go
+++ b/plugins/inputs/vsphere/vsphere_test.go
@ -132,16 +132,15 @@ func defaultVSphere() *VSphere {
 		DatacenterInclude:         []string{"/**"},
 		ClientConfig:              itls.ClientConfig{InsecureSkipVerify: true},
-		MaxQueryObjects:             256,
+		MaxQueryObjects:         256,
-		MaxQueryMetrics:             256,
+		MaxQueryMetrics:         256,
-		ObjectDiscoveryInterval:     config.Duration(time.Second * 300),
+		ObjectDiscoveryInterval: config.Duration(time.Second * 300),
-		Timeout:                     config.Duration(time.Second * 20),
+		Timeout:                 config.Duration(time.Second * 20),
-		ForceDiscoverOnInit:         true,
+		ForceDiscoverOnInit:     true,
-		DiscoverConcurrency:         1,
+		DiscoverConcurrency:     1,
-		CollectConcurrency:          1,
+		CollectConcurrency:      1,
-		Separator:                   ".",
+		Separator:               ".",
-		HistoricalInterval:          config.Duration(time.Second * 300),
+		HistoricalInterval:      config.Duration(time.Second * 300),
 		DisconnectedServersBehavior: "error",
 	}
 }
@ -414,12 +413,46 @@ func TestFolders(t *testing.T) {
 	testLookupVM(ctx, t, &f, "/F0/DC1/vm/**/F*/**", 4, "")
 }
-func TestCollectionWithClusterMetrics(t *testing.T) {
+func TestVsanCmmds(t *testing.T) {
-	if testing.Short() {
+	m, s, err := createSim(0)
-		t.Skip("Skipping long test in short mode")
+	require.NoError(t, err)
-	}
+	defer m.Remove()
 	defer s.Close()
-	testCollection(t, false)
+	v := defaultVSphere()
 	ctx := context.Background()
 	c, err := NewClient(ctx, s.URL, v)
 	require.NoError(t, err)
 	f := Finder{c}
 	var clusters []mo.ClusterComputeResource
 	err = f.FindAll(ctx, "ClusterComputeResource", []string{"/**"}, []string{}, &clusters)
 	require.NoError(t, err)
 	clusterObj := object.NewClusterComputeResource(c.Client.Client, clusters[0].Reference())
 	_, err = getCmmdsMap(ctx, c.Client.Client, clusterObj)
 	require.Error(t, err)
 }
 func TestVsanTags(t *testing.T) {
 	host := "5b860329-3bc4-a76c-48b6-246e963cfcc0"
 	disk := "52ee3be1-47cc-b50d-ecab-01af0f706381"
 	ssdDisk := "52f26fc8-0b9b-56d8-3a32-a9c3bfbc6148"
 	ssd := "52173131-3384-bb63-4ef8-c00b0ce7e3e7"
 	hostname := "sc2-hs1-b2801.eng.vmware.com"
 	devName := "naa.55cd2e414d82c815:2"
 	var cmmds = map[string]CmmdsEntity{
 		disk:    {UUID: disk, Type: "DISK", Owner: host, Content: CmmdsContent{DevName: devName, IsSsd: 1.}},
 		ssdDisk: {UUID: ssdDisk, Type: "DISK", Owner: host, Content: CmmdsContent{DevName: devName, IsSsd: 0., SsdUUID: ssd}},
 		host:    {UUID: host, Type: "HOSTNAME", Owner: host, Content: CmmdsContent{Hostname: hostname}},
 	}
 	tags := populateCMMDSTags(make(map[string]string), "capacity-disk", disk, cmmds)
 	require.Equal(t, 2, len(tags))
 	tags = populateCMMDSTags(make(map[string]string), "cache-disk", ssdDisk, cmmds)
 	require.Equal(t, 3, len(tags))
 	tags = populateCMMDSTags(make(map[string]string), "host-domclient", host, cmmds)
 	require.Equal(t, 1, len(tags))
 }
 func TestCollectionNoClusterMetrics(t *testing.T) {