Add support for datadog distributions metric (#8179)
* Add support for datadog distributions in statsd * Parse metric distribution correctly * Add tests to check distributions are parsed correctly * Update Statsd plugin Readme with details about Distributions metric * Refactor metric distribution initialization code * Update distribution metric interface to replace fields with value * Refactor statsd distribution metric test code * Fix go formatting errors * Add tests to parse only when DataDog Distributions config is enabled * Add config to enable parsing DataDog Statsd Distributions * Document use of datadog_distributions config in Readme
This commit is contained in:
parent
f09e551cbd
commit
f888136333
|
|
@ -7328,6 +7328,9 @@
|
||||||
# ## Parses datadog extensions to the statsd format
|
# ## Parses datadog extensions to the statsd format
|
||||||
# datadog_extensions = false
|
# datadog_extensions = false
|
||||||
#
|
#
|
||||||
|
# ## Parses distributions metric from datadog's extension to the statsd format
|
||||||
|
# datadog_distributions = false
|
||||||
|
#
|
||||||
# ## Statsd data translation templates, more info can be read here:
|
# ## Statsd data translation templates, more info can be read here:
|
||||||
# ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
|
# ## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
|
||||||
# # templates = [
|
# # templates = [
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,10 @@
|
||||||
## http://docs.datadoghq.com/guides/dogstatsd/
|
## http://docs.datadoghq.com/guides/dogstatsd/
|
||||||
datadog_extensions = false
|
datadog_extensions = false
|
||||||
|
|
||||||
|
## Parses distributions metric as specified in the datadog statsd format
|
||||||
|
## https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition
|
||||||
|
datadog_distributions = false
|
||||||
|
|
||||||
## Statsd data translation templates, more info can be read here:
|
## Statsd data translation templates, more info can be read here:
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
|
## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
|
||||||
# templates = [
|
# templates = [
|
||||||
|
|
@ -98,6 +102,10 @@ implementation. In short, the telegraf statsd listener will accept:
|
||||||
- `load.time:320|ms`
|
- `load.time:320|ms`
|
||||||
- `load.time.nanoseconds:1|h`
|
- `load.time.nanoseconds:1|h`
|
||||||
- `load.time:200|ms|@0.1` <- sampled 1/10 of the time
|
- `load.time:200|ms|@0.1` <- sampled 1/10 of the time
|
||||||
|
- Distributions
|
||||||
|
- `load.time:320|d`
|
||||||
|
- `load.time.nanoseconds:1|d`
|
||||||
|
- `load.time:200|d|@0.1` <- sampled 1/10 of the time
|
||||||
|
|
||||||
It is possible to omit repetitive names and merge individual stats into a
|
It is possible to omit repetitive names and merge individual stats into a
|
||||||
single line by separating them with additional colons:
|
single line by separating them with additional colons:
|
||||||
|
|
@ -172,6 +180,9 @@ metric type:
|
||||||
that `P%` of all the values statsd saw for that stat during that time
|
that `P%` of all the values statsd saw for that stat during that time
|
||||||
period are below x. The most common value that people use for `P` is the
|
period are below x. The most common value that people use for `P` is the
|
||||||
`90`, this is a great number to try to optimize.
|
`90`, this is a great number to try to optimize.
|
||||||
|
- Distributions
|
||||||
|
- The Distribution metric represents the global statistical distribution of a set of values calculated across your entire distributed infrastructure in one time interval. A Distribution can be used to instrument logical objects, like services, independently from the underlying hosts.
|
||||||
|
- Unlike the Histogram metric type, which aggregates on the Agent during a given time interval, a Distribution metric sends all the raw data during a time interval.
|
||||||
|
|
||||||
### Plugin arguments
|
### Plugin arguments
|
||||||
|
|
||||||
|
|
@ -195,6 +206,7 @@ the accuracy of percentiles but also increases the memory usage and cpu time.
|
||||||
measurements and tags.
|
measurements and tags.
|
||||||
- **parse_data_dog_tags** boolean: Enable parsing of tags in DataDog's dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/)
|
- **parse_data_dog_tags** boolean: Enable parsing of tags in DataDog's dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/)
|
||||||
- **datadog_extensions** boolean: Enable parsing of DataDog's extensions to dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/)
|
- **datadog_extensions** boolean: Enable parsing of DataDog's extensions to dogstatsd format (http://docs.datadoghq.com/guides/dogstatsd/)
|
||||||
|
- **datadog_distributions** boolean: Enable parsing of the Distribution metric in DataDog's dogstatsd format (https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition)
|
||||||
- **max_ttl** config.Duration: Max duration (TTL) for each metric to stay cached/reported without being updated.
|
- **max_ttl** config.Duration: Max duration (TTL) for each metric to stay cached/reported without being updated.
|
||||||
|
|
||||||
### Statsd bucket -> InfluxDB line-protocol Templates
|
### Statsd bucket -> InfluxDB line-protocol Templates
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,11 @@ type Statsd struct {
|
||||||
// http://docs.datadoghq.com/guides/dogstatsd/
|
// http://docs.datadoghq.com/guides/dogstatsd/
|
||||||
DataDogExtensions bool `toml:"datadog_extensions"`
|
DataDogExtensions bool `toml:"datadog_extensions"`
|
||||||
|
|
||||||
|
// Parses distribution metrics in the datadog statsd format.
|
||||||
|
// Requires the DataDogExtension flag to be enabled.
|
||||||
|
// https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition
|
||||||
|
DataDogDistributions bool `toml:"datadog_distributions"`
|
||||||
|
|
||||||
// UDPPacketSize is deprecated, it's only here for legacy support
|
// UDPPacketSize is deprecated, it's only here for legacy support
|
||||||
// we now always create 1 max size buffer and then copy only what we need
|
// we now always create 1 max size buffer and then copy only what we need
|
||||||
// into the in channel
|
// into the in channel
|
||||||
|
|
@ -98,10 +103,12 @@ type Statsd struct {
|
||||||
// Cache gauges, counters & sets so they can be aggregated as they arrive
|
// Cache gauges, counters & sets so they can be aggregated as they arrive
|
||||||
// gauges and counters map measurement/tags hash -> field name -> metrics
|
// gauges and counters map measurement/tags hash -> field name -> metrics
|
||||||
// sets and timings map measurement/tags hash -> metrics
|
// sets and timings map measurement/tags hash -> metrics
|
||||||
|
// distributions aggregate measurement/tags and are published directly
|
||||||
gauges map[string]cachedgauge
|
gauges map[string]cachedgauge
|
||||||
counters map[string]cachedcounter
|
counters map[string]cachedcounter
|
||||||
sets map[string]cachedset
|
sets map[string]cachedset
|
||||||
timings map[string]cachedtimings
|
timings map[string]cachedtimings
|
||||||
|
distributions []cacheddistributions
|
||||||
|
|
||||||
// bucket -> influx templates
|
// bucket -> influx templates
|
||||||
Templates []string
|
Templates []string
|
||||||
|
|
@ -190,6 +197,12 @@ type cachedtimings struct {
|
||||||
expiresAt time.Time
|
expiresAt time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type cacheddistributions struct {
|
||||||
|
name string
|
||||||
|
value float64
|
||||||
|
tags map[string]string
|
||||||
|
}
|
||||||
|
|
||||||
func (_ *Statsd) Description() string {
|
func (_ *Statsd) Description() string {
|
||||||
return "Statsd UDP/TCP Server"
|
return "Statsd UDP/TCP Server"
|
||||||
}
|
}
|
||||||
|
|
@ -237,6 +250,10 @@ const sampleConfig = `
|
||||||
## Parses datadog extensions to the statsd format
|
## Parses datadog extensions to the statsd format
|
||||||
datadog_extensions = false
|
datadog_extensions = false
|
||||||
|
|
||||||
|
## Parses distributions metric as specified in the datadog statsd format
|
||||||
|
## https://docs.datadoghq.com/developers/metrics/types/?tab=distribution#definition
|
||||||
|
datadog_distributions = false
|
||||||
|
|
||||||
## Statsd data translation templates, more info can be read here:
|
## Statsd data translation templates, more info can be read here:
|
||||||
## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
|
## https://github.com/influxdata/telegraf/blob/master/docs/TEMPLATE_PATTERN.md
|
||||||
# templates = [
|
# templates = [
|
||||||
|
|
@ -265,6 +282,14 @@ func (s *Statsd) Gather(acc telegraf.Accumulator) error {
|
||||||
defer s.Unlock()
|
defer s.Unlock()
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
|
|
||||||
|
for _, m := range s.distributions {
|
||||||
|
fields := map[string]interface{}{
|
||||||
|
defaultFieldName: m.value,
|
||||||
|
}
|
||||||
|
acc.AddFields(m.name, fields, m.tags, now)
|
||||||
|
}
|
||||||
|
s.distributions = make([]cacheddistributions, 0)
|
||||||
|
|
||||||
for _, m := range s.timings {
|
for _, m := range s.timings {
|
||||||
// Defining a template to parse field names for timers allows us to split
|
// Defining a template to parse field names for timers allows us to split
|
||||||
// out multiple fields per timer. In this case we prefix each stat with the
|
// out multiple fields per timer. In this case we prefix each stat with the
|
||||||
|
|
@ -336,6 +361,7 @@ func (s *Statsd) Start(ac telegraf.Accumulator) error {
|
||||||
s.counters = make(map[string]cachedcounter)
|
s.counters = make(map[string]cachedcounter)
|
||||||
s.sets = make(map[string]cachedset)
|
s.sets = make(map[string]cachedset)
|
||||||
s.timings = make(map[string]cachedtimings)
|
s.timings = make(map[string]cachedtimings)
|
||||||
|
s.distributions = make([]cacheddistributions, 0)
|
||||||
|
|
||||||
s.Lock()
|
s.Lock()
|
||||||
defer s.Unlock()
|
defer s.Unlock()
|
||||||
|
|
@ -601,7 +627,7 @@ func (s *Statsd) parseStatsdLine(line string) error {
|
||||||
|
|
||||||
// Validate metric type
|
// Validate metric type
|
||||||
switch pipesplit[1] {
|
switch pipesplit[1] {
|
||||||
case "g", "c", "s", "ms", "h":
|
case "g", "c", "s", "ms", "h", "d":
|
||||||
m.mtype = pipesplit[1]
|
m.mtype = pipesplit[1]
|
||||||
default:
|
default:
|
||||||
s.Log.Errorf("Metric type %q unsupported", pipesplit[1])
|
s.Log.Errorf("Metric type %q unsupported", pipesplit[1])
|
||||||
|
|
@ -618,7 +644,7 @@ func (s *Statsd) parseStatsdLine(line string) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
switch m.mtype {
|
switch m.mtype {
|
||||||
case "g", "ms", "h":
|
case "g", "ms", "h", "d":
|
||||||
v, err := strconv.ParseFloat(pipesplit[0], 64)
|
v, err := strconv.ParseFloat(pipesplit[0], 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.Log.Errorf("Parsing value to float64, unable to parse metric: %s", line)
|
s.Log.Errorf("Parsing value to float64, unable to parse metric: %s", line)
|
||||||
|
|
@ -658,6 +684,8 @@ func (s *Statsd) parseStatsdLine(line string) error {
|
||||||
m.tags["metric_type"] = "timing"
|
m.tags["metric_type"] = "timing"
|
||||||
case "h":
|
case "h":
|
||||||
m.tags["metric_type"] = "histogram"
|
m.tags["metric_type"] = "histogram"
|
||||||
|
case "d":
|
||||||
|
m.tags["metric_type"] = "distribution"
|
||||||
}
|
}
|
||||||
if len(lineTags) > 0 {
|
if len(lineTags) > 0 {
|
||||||
for k, v := range lineTags {
|
for k, v := range lineTags {
|
||||||
|
|
@ -749,6 +777,15 @@ func (s *Statsd) aggregate(m metric) {
|
||||||
defer s.Unlock()
|
defer s.Unlock()
|
||||||
|
|
||||||
switch m.mtype {
|
switch m.mtype {
|
||||||
|
case "d":
|
||||||
|
if s.DataDogExtensions && s.DataDogDistributions {
|
||||||
|
cached := cacheddistributions{
|
||||||
|
name: m.name,
|
||||||
|
value: m.floatvalue,
|
||||||
|
tags: m.tags,
|
||||||
|
}
|
||||||
|
s.distributions = append(s.distributions, cached)
|
||||||
|
}
|
||||||
case "ms", "h":
|
case "ms", "h":
|
||||||
// Check if the measurement exists
|
// Check if the measurement exists
|
||||||
cached, ok := s.timings[m.hash]
|
cached, ok := s.timings[m.hash]
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ func NewTestStatsd() *Statsd {
|
||||||
s.counters = make(map[string]cachedcounter)
|
s.counters = make(map[string]cachedcounter)
|
||||||
s.sets = make(map[string]cachedset)
|
s.sets = make(map[string]cachedset)
|
||||||
s.timings = make(map[string]cachedtimings)
|
s.timings = make(map[string]cachedtimings)
|
||||||
|
s.distributions = make([]cacheddistributions, 0)
|
||||||
|
|
||||||
s.MetricSeparator = "_"
|
s.MetricSeparator = "_"
|
||||||
|
|
||||||
|
|
@ -430,7 +431,7 @@ func TestParse_Timings(t *testing.T) {
|
||||||
s.Percentiles = []internal.Number{{Value: 90.0}}
|
s.Percentiles = []internal.Number{{Value: 90.0}}
|
||||||
acc := &testutil.Accumulator{}
|
acc := &testutil.Accumulator{}
|
||||||
|
|
||||||
// Test that counters work
|
// Test that timings work
|
||||||
validLines := []string{
|
validLines := []string{
|
||||||
"test.timing:1|ms",
|
"test.timing:1|ms",
|
||||||
"test.timing:11|ms",
|
"test.timing:11|ms",
|
||||||
|
|
@ -461,6 +462,63 @@ func TestParse_Timings(t *testing.T) {
|
||||||
acc.AssertContainsFields(t, "test_timing", valid)
|
acc.AssertContainsFields(t, "test_timing", valid)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests low-level functionality of distributions
|
||||||
|
func TestParse_Distributions(t *testing.T) {
|
||||||
|
s := NewTestStatsd()
|
||||||
|
acc := &testutil.Accumulator{}
|
||||||
|
|
||||||
|
parseMetrics := func() {
|
||||||
|
// Test that distributions work
|
||||||
|
validLines := []string{
|
||||||
|
"test.distribution:1|d",
|
||||||
|
"test.distribution2:2|d",
|
||||||
|
"test.distribution3:3|d",
|
||||||
|
"test.distribution4:1|d",
|
||||||
|
"test.distribution5:1|d",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, line := range validLines {
|
||||||
|
err := s.parseStatsdLine(line)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Parsing line %s should not have resulted in an error\n", line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Gather(acc)
|
||||||
|
}
|
||||||
|
|
||||||
|
validMeasurementMap := map[string]float64{
|
||||||
|
"test_distribution": 1,
|
||||||
|
"test_distribution2": 2,
|
||||||
|
"test_distribution3": 3,
|
||||||
|
"test_distribution4": 1,
|
||||||
|
"test_distribution5": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test parsing when DataDogExtensions and DataDogDistributions aren't enabled
|
||||||
|
parseMetrics()
|
||||||
|
for key := range validMeasurementMap {
|
||||||
|
acc.AssertDoesNotContainMeasurement(t, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test parsing when DataDogDistributions is enabled but not DataDogExtensions
|
||||||
|
s.DataDogDistributions = true
|
||||||
|
parseMetrics()
|
||||||
|
for key := range validMeasurementMap {
|
||||||
|
acc.AssertDoesNotContainMeasurement(t, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test parsing when DataDogExtensions and DataDogDistributions are enabled
|
||||||
|
s.DataDogExtensions = true
|
||||||
|
parseMetrics()
|
||||||
|
for key, value := range validMeasurementMap {
|
||||||
|
field := map[string]interface{}{
|
||||||
|
"value": float64(value),
|
||||||
|
}
|
||||||
|
acc.AssertContainsFields(t, key, field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseScientificNotation(t *testing.T) {
|
func TestParseScientificNotation(t *testing.T) {
|
||||||
s := NewTestStatsd()
|
s := NewTestStatsd()
|
||||||
sciNotationLines := []string{
|
sciNotationLines := []string{
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue