diff --git a/plugins/outputs/datadog/README.md b/plugins/outputs/datadog/README.md index f16940c6f..d83e4373f 100644 --- a/plugins/outputs/datadog/README.md +++ b/plugins/outputs/datadog/README.md @@ -36,6 +36,13 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## Override the default (none) compression used to send data. ## Supports: "zlib", "none" # compression = "none" + + ## When non-zero, converts count metrics submitted by inputs.statsd + ## into rate, while dividing the metric value by this number. + ## Note that in order for metrics to be submitted simultaenously alongside + ## a Datadog agent, rate_interval has to match the interval used by the + ## agent - which defaults to 10s + # rate_interval = 0s ``` ## Metrics @@ -46,11 +53,13 @@ field key with a `.` character. Field values are converted to floating point numbers. Strings and floats that cannot be sent over JSON, namely NaN and Inf, are ignored. -We do not send `Rate` types. Counts are sent as `count`, with an -interval hard-coded to 1. Note that this behavior does *not* play -super-well if running simultaneously with current Datadog agents; they -will attempt to change to `Rate` with `interval=10`. We prefer this -method, however, as it reflects the raw data more accurately. +Setting `rate_interval` to non-zero will convert `count` metrics to `rate` +and divide its value by this interval before submitting to Datadog. +This allows Telegraf to submit metrics alongside Datadog agents when their rate +intervals are the same (Datadog defaults to `10s`). +Note that this only supports metrics ingested via `inputs.statsd` given +the dependency on the `metric_type` tag it creates. There is only support for +`counter` metrics, and `count` values from `timing` and `histogram` metrics. [metrics]: https://docs.datadoghq.com/api/v1/metrics/#submit-metrics [apikey]: https://app.datadoghq.com/account/settings#api diff --git a/plugins/outputs/datadog/datadog.go b/plugins/outputs/datadog/datadog.go index 7450e38fd..848ebb3d3 100644 --- a/plugins/outputs/datadog/datadog.go +++ b/plugins/outputs/datadog/datadog.go @@ -25,11 +25,12 @@ import ( var sampleConfig string type Datadog struct { - Apikey string `toml:"apikey"` - Timeout config.Duration `toml:"timeout"` - URL string `toml:"url"` - Compression string `toml:"compression"` - Log telegraf.Logger `toml:"-"` + Apikey string `toml:"apikey"` + Timeout config.Duration `toml:"timeout"` + URL string `toml:"url"` + Compression string `toml:"compression"` + RateInterval config.Duration `toml:"rate_interval"` + Log telegraf.Logger `toml:"-"` client *http.Client proxy.HTTPProxy @@ -75,15 +76,15 @@ func (d *Datadog) Connect() error { return nil } -func (d *Datadog) Write(metrics []telegraf.Metric) error { - ts := TimeSeries{} +func (d *Datadog) convertToDatadogMetric(metrics []telegraf.Metric) []*Metric { tempSeries := []*Metric{} - metricCounter := 0 for _, m := range metrics { if dogMs, err := buildMetrics(m); err == nil { metricTags := buildTags(m.TagList()) host, _ := m.GetTag("host") + // Retrieve the metric_type tag created by inputs.statsd + statsDMetricType, _ := m.GetTag("metric_type") if len(dogMs) == 0 { continue @@ -99,9 +100,21 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { dname = m.Name() + "." + fieldName } var tname string + var interval int64 + interval = 1 switch m.Type() { - case telegraf.Counter: - tname = "count" + case telegraf.Counter, telegraf.Untyped: + if d.RateInterval > 0 && isRateable(statsDMetricType, fieldName) { + // interval is expected to be in seconds + rateIntervalSeconds := time.Duration(d.RateInterval).Seconds() + interval = int64(rateIntervalSeconds) + dogM[1] = dogM[1] / rateIntervalSeconds + tname = "rate" + } else if m.Type() == telegraf.Counter { + tname = "count" + } else { + tname = "" + } case telegraf.Gauge: tname = "gauge" default: @@ -112,23 +125,28 @@ func (d *Datadog) Write(metrics []telegraf.Metric) error { Tags: metricTags, Host: host, Type: tname, - Interval: 1, + Interval: interval, } metric.Points[0] = dogM tempSeries = append(tempSeries, metric) - metricCounter++ } } else { d.Log.Infof("Unable to build Metric for %s due to error '%v', skipping", m.Name(), err) } } + return tempSeries +} + +func (d *Datadog) Write(metrics []telegraf.Metric) error { + ts := TimeSeries{} + tempSeries := d.convertToDatadogMetric(metrics) if len(tempSeries) == 0 { return nil } redactedAPIKey := "****************" - ts.Series = make([]*Metric, metricCounter) + ts.Series = make([]*Metric, len(tempSeries)) copy(ts.Series, tempSeries[0:]) tsBytes, err := json.Marshal(ts) if err != nil { @@ -220,6 +238,20 @@ func verifyValue(v interface{}) bool { return true } +func isRateable(statsDMetricType string, fieldName string) bool { + switch statsDMetricType { + case + "counter": + return true + case + "timing", + "histogram": + return fieldName == "count" + default: + return false + } +} + func (p *Point) setValue(v interface{}) error { switch d := v.(type) { case int64: diff --git a/plugins/outputs/datadog/datadog_test.go b/plugins/outputs/datadog/datadog_test.go index 2fba36253..2915783f0 100644 --- a/plugins/outputs/datadog/datadog_test.go +++ b/plugins/outputs/datadog/datadog_test.go @@ -13,6 +13,7 @@ import ( "github.com/stretchr/testify/require" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/config" "github.com/influxdata/telegraf/testutil" ) @@ -305,3 +306,596 @@ func TestInfIsSkipped(t *testing.T) { }) require.NoError(t, err) } + +func TestNonZeroRateIntervalConvertsRatesToCount(t *testing.T) { + d := &Datadog{ + Apikey: "123456", + RateInterval: config.Duration(10 * time.Second), + } + + var tests = []struct { + name string + metricsIn []telegraf.Metric + metricsOut []*Metric + }{ + { + "convert counter metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "count_metric", + map[string]string{ + "metric_type": "counter", + }, + map[string]interface{}{ + "value": 100, + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Counter, + ), + }, + []*Metric{ + { + Metric: "count_metric", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 10, + }, + }, + Type: "rate", + Tags: []string{ + "metric_type:counter", + }, + Interval: 10, + }, + }, + }, + { + "convert count value in timing metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "timing_metric", + map[string]string{ + "metric_type": "timing", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "timing_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 0.1, + }, + }, + Type: "rate", + Tags: []string{ + "metric_type:timing", + }, + Interval: 10, + }, + { + Metric: "timing_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + }, + }, + { + "convert count value in histogram metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "histogram_metric", + map[string]string{ + "metric_type": "histogram", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "histogram_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 0.1, + }, + }, + Type: "rate", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 10, + }, + { + Metric: "histogram_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actualMetricsOut := d.convertToDatadogMetric(tt.metricsIn) + require.ElementsMatch(t, tt.metricsOut, actualMetricsOut) + }) + } +} + +func TestZeroRateIntervalConvertsRatesToCount(t *testing.T) { + d := &Datadog{ + Apikey: "123456", + } + + var tests = []struct { + name string + metricsIn []telegraf.Metric + metricsOut []*Metric + }{ + { + "does not convert counter metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "count_metric", + map[string]string{ + "metric_type": "counter", + }, + map[string]interface{}{ + "value": 100, + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Counter, + ), + }, + []*Metric{ + { + Metric: "count_metric", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 100, + }, + }, + Type: "count", + Tags: []string{ + "metric_type:counter", + }, + Interval: 1, + }, + }, + }, + { + "does not convert count value in timing metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "timing_metric", + map[string]string{ + "metric_type": "timing", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "timing_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 1, + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + { + Metric: "timing_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:timing", + }, + Interval: 1, + }, + }, + }, + { + "does not convert count value in histogram metrics to rate", + []telegraf.Metric{ + testutil.MustMetric( + "histogram_metric", + map[string]string{ + "metric_type": "histogram", + }, + map[string]interface{}{ + "count": 1, + "lower": float64(10), + "mean": float64(10), + "median": float64(10), + "stddev": float64(0), + "sum": float64(10), + "upper": float64(10), + }, + time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC), + telegraf.Untyped, + ), + }, + []*Metric{ + { + Metric: "histogram_metric.count", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + 1, + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.lower", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.mean", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.median", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.stddev", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(0), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.sum", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + { + Metric: "histogram_metric.upper", + Points: [1]Point{ + { + float64(time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC).Unix()), + float64(10), + }, + }, + Type: "", + Tags: []string{ + "metric_type:histogram", + }, + Interval: 1, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actualMetricsOut := d.convertToDatadogMetric(tt.metricsIn) + require.ElementsMatch(t, tt.metricsOut, actualMetricsOut) + }) + } +} diff --git a/plugins/outputs/datadog/sample.conf b/plugins/outputs/datadog/sample.conf index bd933be30..5775b84a4 100644 --- a/plugins/outputs/datadog/sample.conf +++ b/plugins/outputs/datadog/sample.conf @@ -18,3 +18,10 @@ ## Override the default (none) compression used to send data. ## Supports: "zlib", "none" # compression = "none" + + ## When non-zero, converts count metrics submitted by inputs.statsd + ## into rate, while dividing the metric value by this number. + ## Note that in order for metrics to be submitted simultaenously alongside + ## a Datadog agent, rate_interval has to match the interval used by the + ## agent - which defaults to 10s + # rate_interval = 0s