Add support for datadog distributions metric (#8179)

* Add support for datadog distributions in statsd * Parse metric distribution correctly * Add tests to check distributions are parsed correctly * Update Statsd plugin Readme with details about Distributions metric * Refactor metric distribution initialization code * Update distribution metric interface to replace fields with value * Refactor statsd distribution metric test code * Fix go formatting errors * Add tests to parse only when DataDog Distributions config is enabled * Add config to enable parsing DataDog Statsd Distributions * Document use of datadog_distributions config in Readme
2021-02-16 23:20:01 +05:30 · 2021-02-16 23:20:01 +05:30 · f888136333
parent f09e551cbd
commit f888136333
4 changed files with 117 additions and 7 deletions
--- a/etc/telegraf.conf
+++ b/etc/telegraf.conf
@ -7328,6 +7328,9 @@
 #   ## Parses datadog extensions to the statsd format
 #   datadog_extensions = false
 #
 #   ## Parses distributions metric from datadog's extension to the statsd format
 #   datadog_distributions = false
 #
 #   ## Statsd data translation templates, more info can be read here:
 #   ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
 #   # templates = [
--- a/plugins/inputs/statsd/README.md
+++ b/plugins/inputs/statsd/README.md
@ -50,6 +50,10 @@
  ## http://docs.datadoghq.com/guides/dogstatsd/
  datadog_extensions = false
  ## Parses distributions metric as specified in the datadog statsd format
  ## https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition
  datadog_distributions = false
  ## Statsd data translation templates, more info can be read here:
  ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
  # templates = [
@ -98,6 +102,10 @@ implementation. In short, the telegraf statsd listener will accept:
    - `load.time:320|ms`
    - `load.time.nanoseconds:1|h`
    - `load.time:200|ms|@0.1` <- sampled 1/10 of the time
 - Distributions
    - `load.time:320|d`
    - `load.time.nanoseconds:1|d`
    - `load.time:200|d|@0.1` <- sampled 1/10 of the time
 It is possible to omit repetitive names and merge individual stats into a
 single line by separating them with additional colons:
@ -172,6 +180,9 @@ metric type:
        that `P%` of all the values statsd saw for that stat during that time
        period are below x. The most common value that people use for `P` is the
        `90`, this is a great number to try to optimize.
 - Distributions
    - The Distribution metric represents the global statistical distribution of a set of values calculated across your entire distributed infrastructure in one time interval. A Distribution can be used to instrument logical objects, like services, independently from the underlying hosts.
    - Unlike the Histogram metric type, which aggregates on the Agent during a given time interval, a Distribution metric sends all the raw data during a time interval.
 ### Plugin arguments
@ -195,6 +206,7 @@ the accuracy of percentiles but also increases the memory usage and cpu time.
 measurements and tags.
 - **parse_data_dog_tags** boolean: Enable parsing of tags in DataDog's dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/)
 - **datadog_extensions** boolean: Enable parsing of DataDog's extensions to dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/)
 - **datadog_distributions** boolean: Enable parsing of the Distribution metric in DataDog's dogstatsd format (https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition)
 - **max_ttl** config.Duration: Max duration (TTL) for each metric to stay cached/reported without being updated.
 ### Statsd bucket -> InfluxDB line-protocol Templates
--- a/plugins/inputs/statsd/statsd.go
+++ b/plugins/inputs/statsd/statsd.go
@ -70,6 +70,11 @@ type Statsd struct {
 	// http://docs.datadoghq.com/guides/dogstatsd/
 	DataDogExtensions bool `toml:"datadog_extensions"`
 	// Parses distribution metrics in the datadog statsd format.
 	// Requires the DataDogExtension flag to be enabled.
 	// https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition
 	DataDogDistributions bool `toml:"datadog_distributions"`
 	// UDPPacketSize is deprecated, it's only here for legacy support
 	// we now always create 1 max size buffer and then copy only what we need
 	// into the in channel
@ -98,10 +103,12 @@ type Statsd struct {
 	// Cache gauges, counters & sets so they can be aggregated as they arrive
 	// gauges and counters map measurement/tags hash -> field name -> metrics
 	// sets and timings map measurement/tags hash -> metrics
 	// distributions aggregate measurement/tags and are published directly
 	gauges        map[string]cachedgauge
 	counters      map[string]cachedcounter
 	sets          map[string]cachedset
 	timings       map[string]cachedtimings
 	distributions []cacheddistributions
 	// bucket -> influx templates
 	Templates []string
@ -190,6 +197,12 @@ type cachedtimings struct {
 	expiresAt time.Time
 }
 type cacheddistributions struct {
 	name  string
 	value float64
 	tags  map[string]string
 }
 func (_ *Statsd) Description() string {
 	return "Statsd UDP/TCP Server"
 }
@ -237,6 +250,10 @@ const sampleConfig = `
  ## Parses datadog extensions to the statsd format
  datadog_extensions = false
  ## Parses distributions metric as specified in the datadog statsd format
  ## https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition
  datadog_distributions = false
  ## Statsd data translation templates, more info can be read here:
  ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
  # templates = [
@ -265,6 +282,14 @@ func (s *Statsd) Gather(acc telegraf.Accumulator) error {
 	defer s.Unlock()
 	now := time.Now()
 	for _, m := range s.distributions {
 		fields := map[string]interface{}{
 			defaultFieldName: m.value,
 		}
 		acc.AddFields(m.name, fields, m.tags, now)
 	}
 	s.distributions = make([]cacheddistributions, 0)
 	for _, m := range s.timings {
 		// Defining a template to parse field names for timers allows us to split
 		// out multiple fields per timer. In this case we prefix each stat with the
@ -336,6 +361,7 @@ func (s *Statsd) Start(ac telegraf.Accumulator) error {
 	s.counters = make(map[string]cachedcounter)
 	s.sets = make(map[string]cachedset)
 	s.timings = make(map[string]cachedtimings)
 	s.distributions = make([]cacheddistributions, 0)
 	s.Lock()
 	defer s.Unlock()
@ -601,7 +627,7 @@ func (s *Statsd) parseStatsdLine(line string) error {
 		// Validate metric type
 		switch pipesplit[1] {
-		case "g", "c", "s", "ms", "h":
+		case "g", "c", "s", "ms", "h", "d":
 			m.mtype = pipesplit[1]
 		default:
 			s.Log.Errorf("Metric type %q unsupported", pipesplit[1])
@ -618,7 +644,7 @@ func (s *Statsd) parseStatsdLine(line string) error {
 		}
 		switch m.mtype {
-		case "g", "ms", "h":
+		case "g", "ms", "h", "d":
 			v, err := strconv.ParseFloat(pipesplit[0], 64)
 			if err != nil {
 				s.Log.Errorf("Parsing value to float64, unable to parse metric: %s", line)
@ -658,6 +684,8 @@ func (s *Statsd) parseStatsdLine(line string) error {
 			m.tags["metric_type"] = "timing"
 		case "h":
 			m.tags["metric_type"] = "histogram"
 		case "d":
 			m.tags["metric_type"] = "distribution"
 		}
 		if len(lineTags) > 0 {
 			for k, v := range lineTags {
@ -749,6 +777,15 @@ func (s *Statsd) aggregate(m metric) {
 	defer s.Unlock()
 	switch m.mtype {
 	case "d":
 		if s.DataDogExtensions && s.DataDogDistributions {
 			cached := cacheddistributions{
 				name:  m.name,
 				value: m.floatvalue,
 				tags:  m.tags,
 			}
 			s.distributions = append(s.distributions, cached)
 		}
 	case "ms", "h":
 		// Check if the measurement exists
 		cached, ok := s.timings[m.hash]
--- a/plugins/inputs/statsd/statsd_test.go
+++ b/plugins/inputs/statsd/statsd_test.go
@ -31,6 +31,7 @@ func NewTestStatsd() *Statsd {
 	s.counters = make(map[string]cachedcounter)
 	s.sets = make(map[string]cachedset)
 	s.timings = make(map[string]cachedtimings)
 	s.distributions = make([]cacheddistributions, 0)
 	s.MetricSeparator = "_"
@ -430,7 +431,7 @@ func TestParse_Timings(t *testing.T) {
 	s.Percentiles = []internal.Number{{Value: 90.0}}
 	acc := &testutil.Accumulator{}
-	// Test that counters work
+	// Test that timings work
 	validLines := []string{
 		"test.timing:1|ms",
 		"test.timing:11|ms",
@ -461,6 +462,63 @@ func TestParse_Timings(t *testing.T) {
 	acc.AssertContainsFields(t, "test_timing", valid)
 }
 // Tests low-level functionality of distributions
 func TestParse_Distributions(t *testing.T) {
 	s := NewTestStatsd()
 	acc := &testutil.Accumulator{}
 	parseMetrics := func() {
 		// Test that distributions work
 		validLines := []string{
 			"test.distribution:1|d",
 			"test.distribution2:2|d",
 			"test.distribution3:3|d",
 			"test.distribution4:1|d",
 			"test.distribution5:1|d",
 		}
 		for _, line := range validLines {
 			err := s.parseStatsdLine(line)
 			if err != nil {
 				t.Errorf("Parsing line %s should not have resulted in an error\n", line)
 			}
 		}
 		s.Gather(acc)
 	}
 	validMeasurementMap := map[string]float64{
 		"test_distribution":  1,
 		"test_distribution2": 2,
 		"test_distribution3": 3,
 		"test_distribution4": 1,
 		"test_distribution5": 1,
 	}
 	// Test parsing when DataDogExtensions and DataDogDistributions aren't enabled
 	parseMetrics()
 	for key := range validMeasurementMap {
 		acc.AssertDoesNotContainMeasurement(t, key)
 	}
 	// Test parsing when DataDogDistributions is enabled but not DataDogExtensions
 	s.DataDogDistributions = true
 	parseMetrics()
 	for key := range validMeasurementMap {
 		acc.AssertDoesNotContainMeasurement(t, key)
 	}
 	// Test parsing when DataDogExtensions and DataDogDistributions are enabled
 	s.DataDogExtensions = true
 	parseMetrics()
 	for key, value := range validMeasurementMap {
 		field := map[string]interface{}{
 			"value": float64(value),
 		}
 		acc.AssertContainsFields(t, key, field)
 	}
 }
 func TestParseScientificNotation(t *testing.T) {
 	s := NewTestStatsd()
 	sciNotationLines := []string{