feat(inputs.vsphere): Collect resource pools metrics and add resource pool tag in VM metrics (#10574)

* Collect Resource Pool metrics; Add rpname tag on VM metrics

* Update vSphere readme file

* Update vSphere readme file

* Correct typo in vSphere Readme

* Correct Markdown of metrics.md

* Fix metrics file

* Fix code in endpoint (filter); add some tests

* Update plugins/inputs/vsphere/endpoint.go

That's true I commit this suggestion

Co-authored-by: Sebastian Spaink <3441183+sspaink@users.noreply.github.com>

* Removed Context and Endpoint from getResourcePoolName func

Co-authored-by: Simon LAMBERT <silambert@cirilgroup.com>
Co-authored-by: Sebastian Spaink <3441183+sspaink@users.noreply.github.com>
This commit is contained in:
6monlambert 2022-05-12 22:36:56 +02:00 committed by GitHub
parent b36953d10b
commit fa723355f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 233 additions and 71 deletions

View File

@ -193,6 +193,59 @@ vmop.numSVMotion.latest
vmop.numXVMotion.latest
```
## Resource Pool Metrics
```metrics
cpu.usagemhz.average
cpu.cpuentitlement.latest
cpu.usagemhz.minimum
cpu.usagemhz.maximum
cpu.capacity.entitlement.average
cpu.capacity.usage.average
cpu.capacity.demand.average
cpu.capacity.contention.average
cpu.corecount.provisioned.average
cpu.corecount.contention.average
disk.throughput.usage.average
disk.throughput.contention.average
mem.capacity.contention.average
mem.overhead.average
mem.consumed.average
mem.granted.average
mem.active.average
mem.shared.average
mem.zero.average
mem.swapped.average
mem.vmmemctl.average
mem.capacity.provisioned.average
mem.capacity.entitlement.average
mem.capacity.usage.average
mem.mementitlement.latest
mem.compressed.average
mem.compressionRate.average
mem.decompressionRate.average
mem.overhead.minimum
mem.consumed.minimum
mem.granted.minimum
mem.active.minimum
mem.shared.minimum
mem.zero.minimum
mem.swapped.minimum
mem.vmmemctl.maximum
mem.overhead.maximum
mem.consumed.maximum
mem.granted.maximum
mem.active.maximum
mem.shared.maximum
mem.zero.maximum
mem.swapped.maximum
mem.vmmemctl.minimum
net.throughput.usage.average
net.throughput.contention.summation
power.power.average
power.energy.summation
```
## Cluster Metrics
```metrics

View File

@ -4,6 +4,7 @@ The VMware vSphere plugin uses the vSphere API to gather metrics from multiple v
* Clusters
* Hosts
* Resource Pools
* VMs
* Datastores
@ -140,7 +141,14 @@ vm_metric_exclude = [ "*" ]
# cluster_metric_exclude = [] ## Nothing excluded by default
# cluster_instances = false ## false by default
## Datastores
## Resource Pools
# datastore_include = [ "/*/host/**"] # Inventory path to datastores to collect (by default all are collected)
# datastore_exclude = [] # Inventory paths to exclude
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
# datastore_metric_exclude = [] ## Nothing excluded by default
# datastore_instances = false ## false by default
## Datastores
# datastore_include = [ "/*/datastore/**"] # Inventory path to datastores to collect (by default all are collected)
# datastore_exclude = [] # Inventory paths to exclude
# datastore_metric_include = [] ## if omitted or empty, all metrics are collected
@ -252,10 +260,13 @@ to a file system. A vSphere inventory has a structure similar to this:
| | | +-VM1
| | | +-VM2
| | | +-hadoop1
| +-Host2 # Dummy cluster created for non-clustered host
| | +-Host2
| | +-ResourcePool1
| | | +-VM3
| | | +-VM4
| +-Host2 # Dummy cluster created for non-clustered host
| | +-Host2
| | | +-VM5
| | | +-VM6
+-vm # VM folder (created by system)
| +-VM1
| +-VM2
@ -289,7 +300,7 @@ We can extend this to looking at a cluster level: ```/DC0/host/Cluster1/*/hadoop
vCenter keeps two different kinds of metrics, known as realtime and historical metrics.
* Realtime metrics: Available at a 20 second granularity. These metrics are stored in memory and are very fast and cheap to query. Our tests have shown that a complete set of realtime metrics for 7000 virtual machines can be obtained in less than 20 seconds. Realtime metrics are only available on **ESXi hosts** and **virtual machine** resources. Realtime metrics are only stored for 1 hour in vCenter.
* Historical metrics: Available at a (default) 5 minute, 30 minutes, 2 hours and 24 hours rollup levels. The vSphere Telegraf plugin only uses the most granular rollup which defaults to 5 minutes but can be changed in vCenter to other interval durations. These metrics are stored in the vCenter database and can be expensive and slow to query. Historical metrics are the only type of metrics available for **clusters**, **datastores** and **datacenters**.
* Historical metrics: Available at a (default) 5 minute, 30 minutes, 2 hours and 24 hours rollup levels. The vSphere Telegraf plugin only uses the most granular rollup which defaults to 5 minutes but can be changed in vCenter to other interval durations. These metrics are stored in the vCenter database and can be expensive and slow to query. Historical metrics are the only type of metrics available for **clusters**, **datastores**, **resource pools** and **datacenters**.
For more information, refer to the vSphere documentation here: <https://pubs.vmware.com/vsphere-50/index.jsp?topic=%2Fcom.vmware.wssdk.pg.doc_50%2FPG_Ch16_Performance.18.2.html>
@ -314,6 +325,7 @@ This will disrupt the metric collection and can result in missed samples. The be
datastore_metric_exclude = ["*"]
cluster_metric_exclude = ["*"]
datacenter_metric_exclude = ["*"]
resourcepool_metric_exclude = ["*"]
collect_concurrency = 5
discover_concurrency = 5
@ -400,6 +412,12 @@ When the vSphere plugin queries vCenter for historical statistics it queries for
* Res CPU: active, max, running
* System: operating system uptime, uptime
* Virtual Disk: seeks, # reads/writes, latency, load
* Resource Pools stats:
* Memory: total, usage, active, latency, swap, shared, vmmemctl
* CPU: capacity, usage, corecount
* Disk: throughput
* Network: throughput
* Power: energy, usage
* Datastore stats:
* Disk: Capacity, provisioned, used
@ -415,6 +433,7 @@ For a detailed list of commonly available metrics, please refer to [METRICS.md](
* cluster (vcenter cluster)
* esxhost (name of ESXi host)
* guest (guest operating system id)
* resource pool (name of resource pool)
* cpu stats for Host and VM
* cpu (cpu core - not all CPU fields will have this tag)
* datastore stats for Host and VM

View File

@ -100,6 +100,7 @@ type objectRef struct {
parentRef *types.ManagedObjectReference //Pointer because it must be nillable
guest string
dcname string
rpname string
customValues map[string]string
lookup map[string]string
}
@ -165,6 +166,24 @@ func NewEndpoint(ctx context.Context, parent *VSphere, address *url.URL, log tel
getObjects: getClusters,
parent: "datacenter",
},
"resourcepool": {
name: "resourcepool",
vcName: "ResourcePool",
pKey: "rpname",
parentTag: "clustername",
enabled: anythingEnabled(parent.ResourcePoolMetricExclude),
realTime: false,
sampling: int32(time.Duration(parent.HistoricalInterval).Seconds()),
objects: make(objectMap),
filters: newFilterOrPanic(parent.ResourcePoolMetricInclude, parent.ResourcePoolMetricExclude),
paths: parent.ResourcePoolInclude,
excludePaths: parent.ResourcePoolExclude,
simple: isSimple(parent.ResourcePoolMetricInclude, parent.ResourcePoolMetricExclude),
include: parent.ResourcePoolMetricInclude,
collectInstances: parent.ResourcePoolInstances,
getObjects: getResourcePools,
parent: "cluster",
},
"host": {
name: "host",
vcName: "HostSystem",
@ -653,6 +672,35 @@ func getClusters(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilte
return m, nil
}
//noinspection GoUnusedParameter
func getResourcePools(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (objectMap, error) {
var resources []mo.ResourcePool
err := resourceFilter.FindAll(ctx, &resources)
if err != nil {
return nil, err
}
m := make(objectMap)
for _, r := range resources {
m[r.ExtensibleManagedObject.Reference().Value] = &objectRef{
name: r.Name,
ref: r.ExtensibleManagedObject.Reference(),
parentRef: r.Parent,
customValues: e.loadCustomAttributes(&r.ManagedEntity),
}
}
return m, nil
}
func getResourcePoolName(rp types.ManagedObjectReference, rps objectMap) string {
//Loop through the Resource Pools objectmap to find the corresponding one
for _, r := range rps {
if r.ref == rp {
return r.name
}
}
return "Resources" //Default value
}
//noinspection GoUnusedParameter
func getHosts(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (objectMap, error) {
var resources []mo.HostSystem
@ -681,6 +729,20 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o
return nil, err
}
m := make(objectMap)
client, err := e.clientFactory.GetClient(ctx)
if err != nil {
return nil, err
}
//Create a ResourcePool Filter and get the list of Resource Pools
rprf := ResourceFilter{
finder: &Finder{client},
resType: "ResourcePool",
paths: []string{"/*/host/**"},
excludePaths: nil}
resourcePools, err := getResourcePools(ctx, e, &rprf)
if err != nil {
return nil, err
}
for _, r := range resources {
if r.Runtime.PowerState != "poweredOn" {
continue
@ -688,6 +750,8 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o
guest := "unknown"
uuid := ""
lookup := make(map[string]string)
// Get the name of the VM resource pool
rpname := getResourcePoolName(*r.ResourcePool, resourcePools)
// Extract host name
if r.Guest != nil && r.Guest.HostName != "" {
@ -755,6 +819,7 @@ func getVMs(ctx context.Context, e *Endpoint, resourceFilter *ResourceFilter) (o
parentRef: r.Runtime.Host,
guest: guest,
altID: uuid,
rpname: rpname,
customValues: e.loadCustomAttributes(&r.ManagedEntity),
lookup: lookup,
}
@ -1191,6 +1256,9 @@ func (e *Endpoint) populateTags(objectRef *objectRef, resourceType string, resou
if resourceType == "vm" && objectRef.altID != "" {
t["uuid"] = objectRef.altID
}
if resourceType == "vm" && objectRef.rpname != "" {
t["rpname"] = objectRef.rpname
}
// Map parent reference
parent, found := e.getParent(objectRef, resource)

View File

@ -246,6 +246,7 @@ func matchName(f property.Filter, props []types.DynamicProperty) bool {
func init() {
childTypes = map[string][]string{
"HostSystem": {"VirtualMachine"},
"ResourcePool": {"VirtualMachine"},
"ComputeResource": {"HostSystem", "ResourcePool", "VirtualApp"},
"ClusterComputeResource": {"HostSystem", "ResourcePool", "VirtualApp"},
"Datacenter": {"Folder"},
@ -260,9 +261,10 @@ func init() {
}
addFields = map[string][]string{
"HostSystem": {"parent", "summary.customValue", "customValue"},
"HostSystem": {"parent", "summary.customValue", "customValue"},
"ResourcePool": {"parent", "customValue"},
"VirtualMachine": {"runtime.host", "config.guestId", "config.uuid", "runtime.powerState",
"summary.customValue", "guest.net", "guest.hostName", "customValue"},
"summary.customValue", "guest.net", "guest.hostName", "resourcePool", "customValue"},
"Datastore": {"parent", "info", "customValue"},
"ClusterComputeResource": {"parent", "customValue"},
"Datacenter": {"parent", "customValue"},

View File

@ -15,40 +15,45 @@ import (
// VSphere is the top level type for the vSphere input plugin. It contains all the configuration
// and a list of connected vSphere endpoints
type VSphere struct {
Vcenters []string
Username string
Password string
DatacenterInstances bool
DatacenterMetricInclude []string
DatacenterMetricExclude []string
DatacenterInclude []string
DatacenterExclude []string
ClusterInstances bool
ClusterMetricInclude []string
ClusterMetricExclude []string
ClusterInclude []string
ClusterExclude []string
HostInstances bool
HostMetricInclude []string
HostMetricExclude []string
HostInclude []string
HostExclude []string
VMInstances bool `toml:"vm_instances"`
VMMetricInclude []string `toml:"vm_metric_include"`
VMMetricExclude []string `toml:"vm_metric_exclude"`
VMInclude []string `toml:"vm_include"`
VMExclude []string `toml:"vm_exclude"`
DatastoreInstances bool
DatastoreMetricInclude []string
DatastoreMetricExclude []string
DatastoreInclude []string
DatastoreExclude []string
Separator string
CustomAttributeInclude []string
CustomAttributeExclude []string
UseIntSamples bool
IPAddresses []string
MetricLookback int
Vcenters []string
Username string
Password string
DatacenterInstances bool
DatacenterMetricInclude []string
DatacenterMetricExclude []string
DatacenterInclude []string
DatacenterExclude []string
ClusterInstances bool
ClusterMetricInclude []string
ClusterMetricExclude []string
ClusterInclude []string
ClusterExclude []string
ResourcePoolInstances bool
ResourcePoolMetricInclude []string
ResourcePoolMetricExclude []string
ResourcePoolInclude []string
ResourcePoolExclude []string
HostInstances bool
HostMetricInclude []string
HostMetricExclude []string
HostInclude []string
HostExclude []string
VMInstances bool `toml:"vm_instances"`
VMMetricInclude []string `toml:"vm_metric_include"`
VMMetricExclude []string `toml:"vm_metric_exclude"`
VMInclude []string `toml:"vm_include"`
VMExclude []string `toml:"vm_exclude"`
DatastoreInstances bool
DatastoreMetricInclude []string
DatastoreMetricExclude []string
DatastoreInclude []string
DatastoreExclude []string
Separator string
CustomAttributeInclude []string
CustomAttributeExclude []string
UseIntSamples bool
IPAddresses []string
MetricLookback int
MaxQueryObjects int
MaxQueryMetrics int
@ -140,31 +145,35 @@ func init() {
return &VSphere{
Vcenters: []string{},
DatacenterInstances: false,
DatacenterMetricInclude: nil,
DatacenterMetricExclude: nil,
DatacenterInclude: []string{"/*"},
ClusterInstances: false,
ClusterMetricInclude: nil,
ClusterMetricExclude: nil,
ClusterInclude: []string{"/*/host/**"},
HostInstances: true,
HostMetricInclude: nil,
HostMetricExclude: nil,
HostInclude: []string{"/*/host/**"},
VMInstances: true,
VMMetricInclude: nil,
VMMetricExclude: nil,
VMInclude: []string{"/*/vm/**"},
DatastoreInstances: false,
DatastoreMetricInclude: nil,
DatastoreMetricExclude: nil,
DatastoreInclude: []string{"/*/datastore/**"},
Separator: "_",
CustomAttributeInclude: []string{},
CustomAttributeExclude: []string{"*"},
UseIntSamples: true,
IPAddresses: []string{},
DatacenterInstances: false,
DatacenterMetricInclude: nil,
DatacenterMetricExclude: nil,
DatacenterInclude: []string{"/*"},
ClusterInstances: false,
ClusterMetricInclude: nil,
ClusterMetricExclude: nil,
ClusterInclude: []string{"/*/host/**"},
HostInstances: true,
HostMetricInclude: nil,
HostMetricExclude: nil,
HostInclude: []string{"/*/host/**"},
ResourcePoolInstances: false,
ResourcePoolMetricInclude: nil,
ResourcePoolMetricExclude: nil,
ResourcePoolInclude: []string{"/*/host/**"},
VMInstances: true,
VMMetricInclude: nil,
VMMetricExclude: nil,
VMInclude: []string{"/*/vm/**"},
DatastoreInstances: false,
DatastoreMetricInclude: nil,
DatastoreMetricExclude: nil,
DatastoreInclude: []string{"/*/datastore/**"},
Separator: "_",
CustomAttributeInclude: []string{},
CustomAttributeExclude: []string{"*"},
UseIntSamples: true,
IPAddresses: []string{},
MaxQueryObjects: 256,
MaxQueryMetrics: 256,

View File

@ -137,12 +137,17 @@ func defaultVSphere() *VSphere {
DatastoreMetricInclude: []string{
"disk.used.*",
"disk.provisioned.*"},
DatastoreMetricExclude: nil,
DatastoreInclude: []string{"/**"},
DatacenterMetricInclude: nil,
DatacenterMetricExclude: nil,
DatacenterInclude: []string{"/**"},
ClientConfig: itls.ClientConfig{InsecureSkipVerify: true},
DatastoreMetricExclude: nil,
DatastoreInclude: []string{"/**"},
ResourcePoolMetricInclude: []string{
"cpu.capacity.*",
"mem.capacity.*"},
ResourcePoolMetricExclude: nil,
ResourcePoolInclude: []string{"/**"},
DatacenterMetricInclude: nil,
DatacenterMetricExclude: nil,
DatacenterInclude: []string{"/**"},
ClientConfig: itls.ClientConfig{InsecureSkipVerify: true},
MaxQueryObjects: 256,
MaxQueryMetrics: 256,
@ -331,6 +336,12 @@ func TestFinder(t *testing.T) {
require.Equal(t, 1, len(host))
require.Equal(t, "DC0_C0_H0", host[0].Name)
var resourcepool = []mo.ResourcePool{}
err = f.Find(ctx, "ResourcePool", "/DC0/host/DC0_C0/Resources/DC0_C0_RP0", &resourcepool)
require.NoError(t, err)
require.Equal(t, 1, len(host))
require.Equal(t, "DC0_C0_H0", host[0].Name)
host = []mo.HostSystem{}
err = f.Find(ctx, "HostSystem", "/DC0/host/DC0_C0/*", &host)
require.NoError(t, err)