fix(inputs.cloudwatch): Option to produce dense metrics (#15317)

This commit is contained in:
Joshua Powers 2024-05-15 10:11:55 -06:00 committed by GitHub
parent c3c6189a1b
commit 1a00a48d54
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 132 additions and 17 deletions

View File

@ -50,12 +50,12 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
# role_session_name = ""
# profile = ""
# shared_credential_file = ""
## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account
## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account
## and collect metrics from the linked source accounts
# include_linked_accounts = false
## Endpoint to make request against, the correct endpoint is automatically
## determined and this option should only be set if you wish to override the
## default.
@ -102,6 +102,13 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## Metric Statistic Namespaces (required)
namespaces = ["AWS/ELB"]
## Metric Format
## This determines the format of the produces metrics. 'sparse', the default
## will produce a unique field for each statistic. 'dense' will report all
## statistics will be in a field called value and have a metric_name tag
## defining the name of the statistic. See the plugin README for examples.
# metric_format = "sparse"
## Maximum requests per second. Note that the global default AWS rate limit
## is 50 reqs/sec, so if you define multiple namespaces, these should add up
## to a maximum of 50.
@ -212,15 +219,53 @@ but will output five metrics timestamped one minute apart.
## Metrics
Each CloudWatch Namespace monitored records a measurement with fields for each
available Metric Statistic. Namespace and Metrics are represented in [snake
available Metric Statistic. Namespace and Metrics are represented in [snake
case](https://en.wikipedia.org/wiki/Snake_case)
### Sparse Metrics
By default, metrics generated by this plugin are sparse. Use the `metric_format`
option to override this setting.
Sparse metrics produce a set of fields for every AWS Metric.
- cloudwatch_{namespace}
- {metric}_sum (metric Sum value)
- {metric}_average (metric Average value)
- {metric}_minimum (metric Minimum value)
- {metric}_maximum (metric Maximum value)
- {metric}_sample_count (metric SampleCount value)
- Fields
- {metric}_sum (metric Sum value)
- {metric}_average (metric Average value)
- {metric}_minimum (metric Minimum value)
- {metric}_maximum (metric Maximum value)
- {metric}_sample_count (metric SampleCount value)
For example:
```text
cloudwatch_aws_usage,class=None,resource=GetSecretValue,service=Secrets\ Manager,type=API call_count_maximum=1,call_count_minimum=1,call_count_sum=8,call_count_sample_count=8,call_count_average=1 1715097720000000000
```
### Dense Metrics
Dense metrics are generated when `metric_format` is set to `dense`.
Dense metrics use the same fields over and over for every AWS Metric and
differentiate between AWS Metrics using a tag called `metric_name` with the AWS
Metric name:
- cloudwatch_{namespace}
- Tags
- metric_name (AWS Metric name)
- Fields
- sum (metric Sum value)
- average (metric Average value)
- minimum (metric Minimum value)
- maximum (metric Maximum value)
- sample_count (metric SampleCount value)
For example:
```text
cloudwatch_aws_usage,class=None,resource=GetSecretValue,service=Secrets\ Manager,metric_name=call_count,type=API sum=6,sample_count=6,average=1,maximum=1,minimum=1 1715097840000000000
```
### Tags
@ -274,6 +319,8 @@ aws cloudwatch get-metric-data \
## Example Output
See the discussion above about sparse vs dense metrics for more details.
```text
cloudwatch_aws_elb,load_balancer_name=p-example,region=us-east-1 latency_average=0.004810798017284538,latency_maximum=0.1100282669067383,latency_minimum=0.0006084442138671875,latency_sample_count=4029,latency_sum=19.382705211639404 1459542420000000000
```

View File

@ -7,6 +7,7 @@ import (
"fmt"
"net"
"net/http"
"regexp"
"strconv"
"strings"
"sync"
@ -48,8 +49,8 @@ type CloudWatch struct {
RecentlyActive string `toml:"recently_active"`
BatchSize int `toml:"batch_size"`
IncludeLinkedAccounts bool `toml:"include_linked_accounts"`
Log telegraf.Logger `toml:"-"`
MetricFormat string `toml:"metric_format"`
Log telegraf.Logger `toml:"-"`
client cloudwatchClient
statFilter filter.Filter
@ -98,6 +99,14 @@ func (c *CloudWatch) Init() error {
c.Namespaces = append(c.Namespaces, c.Namespace)
}
switch c.MetricFormat {
case "":
c.MetricFormat = "sparse"
case "dense", "sparse":
default:
return fmt.Errorf("invalid metric_format: %s", c.MetricFormat)
}
err := c.initializeCloudWatch()
if err != nil {
return err
@ -462,7 +471,21 @@ func (c *CloudWatch) aggregateMetrics(
tags["region"] = c.Region
for i := range result.Values {
grouper.Add(namespace, tags, result.Timestamps[i], *result.Label, result.Values[i])
if c.MetricFormat == "dense" {
// Remove the IDs from the result ID to get the statistic type
// e.g. "average" from "average_0_0"
re := regexp.MustCompile(`_\d+_\d+$`)
statisticType := re.ReplaceAllString(*result.Id, "")
// Remove the statistic type from the label to get the AWS Metric name
// e.g. "CPUUtilization" from "CPUUtilization_average"
re = regexp.MustCompile(`_?` + regexp.QuoteMeta(statisticType) + `$`)
tags["metric_name"] = re.ReplaceAllString(*result.Label, "")
grouper.Add(namespace, tags, result.Timestamps[i], statisticType, result.Values[i])
} else {
grouper.Add(namespace, tags, result.Timestamps[i], *result.Label, result.Values[i])
}
}
}
}

View File

@ -198,6 +198,44 @@ func TestGather(t *testing.T) {
acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags)
}
func TestGatherDenseMetric(t *testing.T) {
duration, _ := time.ParseDuration("1m")
internalDuration := config.Duration(duration)
c := &CloudWatch{
CredentialConfig: internalaws.CredentialConfig{
Region: "us-east-1",
},
Namespace: "AWS/ELB",
Delay: internalDuration,
Period: internalDuration,
RateLimit: 200,
BatchSize: 500,
MetricFormat: "dense",
Log: testutil.Logger{},
}
var acc testutil.Accumulator
require.NoError(t, c.Init())
c.client = &mockGatherCloudWatchClient{}
require.NoError(t, acc.GatherError(c.Gather))
fields := map[string]interface{}{}
fields["minimum"] = 0.1
fields["maximum"] = 0.3
fields["average"] = 0.2
fields["sum"] = 123.0
fields["sample_count"] = 100.0
tags := map[string]string{}
tags["region"] = "us-east-1"
tags["load_balancer_name"] = "p-example1"
tags["metric_name"] = "latency"
require.True(t, acc.HasMeasurement("cloudwatch_aws_elb"))
acc.AssertContainsTaggedFields(t, "cloudwatch_aws_elb", fields, tags)
}
func TestMultiAccountGather(t *testing.T) {
duration, _ := time.ParseDuration("1m")
internalDuration := config.Duration(duration)

View File

@ -21,12 +21,12 @@
# role_session_name = ""
# profile = ""
# shared_credential_file = ""
## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account
## If you are using CloudWatch cross-account observability, you can
## set IncludeLinkedAccounts to true in a monitoring account
## and collect metrics from the linked source accounts
# include_linked_accounts = false
## Endpoint to make request against, the correct endpoint is automatically
## determined and this option should only be set if you wish to override the
## default.
@ -73,6 +73,13 @@
## Metric Statistic Namespaces (required)
namespaces = ["AWS/ELB"]
## Metric Format
## This determines the format of the produces metrics. 'sparse', the default
## will produce a unique field for each statistic. 'dense' will report all
## statistics will be in a field called value and have a metric_name tag
## defining the name of the statistic. See the plugin README for examples.
# metric_format = "sparse"
## Maximum requests per second. Note that the global default AWS rate limit
## is 50 reqs/sec, so if you define multiple namespaces, these should add up
## to a maximum of 50.