From df47b41668f530cf2bc40593faaf6746ae46fee9 Mon Sep 17 00:00:00 2001 From: Greg Linton Date: Tue, 18 May 2021 12:29:30 -0400 Subject: [PATCH] Allow more characters in graphite tags (#9249) --- config/config.go | 12 +- plugins/outputs/graphite/graphite.go | 12 +- plugins/outputs/instrumental/instrumental.go | 2 +- plugins/serializers/graphite/README.md | 11 ++ plugins/serializers/graphite/graphite.go | 42 ++++-- plugins/serializers/graphite/graphite_test.go | 121 ++++++++++++++++++ plugins/serializers/registry.go | 22 +++- 7 files changed, 193 insertions(+), 29 deletions(-) diff --git a/config/config.go b/config/config.go index c1bf9235f..02586ff9b 100644 --- a/config/config.go +++ b/config/config.go @@ -1400,6 +1400,8 @@ func (c *Config) buildSerializer(tbl *ast.Table) (serializers.Serializer, error) c.getFieldBool(tbl, "influx_sort_fields", &sc.InfluxSortFields) c.getFieldBool(tbl, "influx_uint_support", &sc.InfluxUintSupport) c.getFieldBool(tbl, "graphite_tag_support", &sc.GraphiteTagSupport) + c.getFieldString(tbl, "graphite_tag_sanitize_mode", &sc.GraphiteTagSanitizeMode) + c.getFieldString(tbl, "graphite_separator", &sc.GraphiteSeparator) c.getFieldDuration(tbl, "json_timestamp_units", &sc.TimestampUnits) @@ -1464,11 +1466,11 @@ func (c *Config) missingTomlField(_ reflect.Type, key string) error { "data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path", "dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path", "fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys", - "grace", "graphite_separator", "graphite_tag_support", "grok_custom_pattern_files", - "grok_custom_patterns", "grok_named_patterns", "grok_patterns", "grok_timezone", - "grok_unique_timestamp", "influx_max_line_bytes", "influx_sort_fields", "influx_uint_support", - "interval", "json_name_key", "json_query", "json_strict", "json_string_fields", - "json_time_format", "json_time_key", "json_timestamp_units", "json_timezone", + "grace", "graphite_separator", "graphite_tag_sanitize_mode", "graphite_tag_support", + "grok_custom_pattern_files", "grok_custom_patterns", "grok_named_patterns", "grok_patterns", + "grok_timezone", "grok_unique_timestamp", "influx_max_line_bytes", "influx_sort_fields", + "influx_uint_support", "interval", "json_name_key", "json_query", "json_strict", + "json_string_fields", "json_time_format", "json_time_key", "json_timestamp_units", "json_timezone", "metric_batch_size", "metric_buffer_limit", "name_override", "name_prefix", "name_suffix", "namedrop", "namepass", "order", "pass", "period", "precision", "prefix", "prometheus_export_timestamp", "prometheus_sort_metrics", "prometheus_string_as_label", diff --git a/plugins/outputs/graphite/graphite.go b/plugins/outputs/graphite/graphite.go index 455c7c785..bd35a4203 100644 --- a/plugins/outputs/graphite/graphite.go +++ b/plugins/outputs/graphite/graphite.go @@ -15,8 +15,9 @@ import ( ) type Graphite struct { - GraphiteTagSupport bool `toml:"graphite_tag_support"` - GraphiteSeparator string `toml:"graphite_separator"` + GraphiteTagSupport bool `toml:"graphite_tag_support"` + GraphiteTagSanitizeMode string `toml:"graphite_tag_sanitize_mode"` + GraphiteSeparator string `toml:"graphite_separator"` // URL is only for backwards compatibility Servers []string `toml:"servers"` Prefix string `toml:"prefix"` @@ -43,6 +44,11 @@ var sampleConfig = ` ## Enable Graphite tags support # graphite_tag_support = false + ## Define how metric names and tags are sanitized; options are "strict", or "compatible" + ## strict - Default method, and backwards compatible with previous versionf of Telegraf + ## compatible - More relaxed sanitizing when using tags, and compatible with the graphite spec + # graphite_tag_sanitize_mode = "strict" + ## Character for separating metric name and field for Graphite tags # graphite_separator = "." @@ -150,7 +156,7 @@ func (g *Graphite) checkEOF(conn net.Conn) { func (g *Graphite) Write(metrics []telegraf.Metric) error { // Prepare data var batch []byte - s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, g.GraphiteTagSupport, g.GraphiteSeparator, g.Templates) + s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, g.GraphiteTagSupport, g.GraphiteTagSanitizeMode, g.GraphiteSeparator, g.Templates) if err != nil { return err } diff --git a/plugins/outputs/instrumental/instrumental.go b/plugins/outputs/instrumental/instrumental.go index 87148ed08..f7158f16f 100644 --- a/plugins/outputs/instrumental/instrumental.go +++ b/plugins/outputs/instrumental/instrumental.go @@ -88,7 +88,7 @@ func (i *Instrumental) Write(metrics []telegraf.Metric) error { } } - s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, false, ".", i.Templates) + s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, false, "strict", ".", i.Templates) if err != nil { return err } diff --git a/plugins/serializers/graphite/README.md b/plugins/serializers/graphite/README.md index f6fd0c2cc..f68765c54 100644 --- a/plugins/serializers/graphite/README.md +++ b/plugins/serializers/graphite/README.md @@ -35,6 +35,8 @@ method is used, otherwise the [Template Pattern](templates) is used. ## Support Graphite tags, recommended to enable when using Graphite 1.1 or later. # graphite_tag_support = false + ## Enable Graphite tags to support the full list of allowed characters + # graphite_tag_new_sanitize = false ## Character for separating metric name and field for Graphite tags # graphite_separator = "." ``` @@ -64,4 +66,13 @@ cpu_usage_user;cpu=cpu-total;dc=us-east-1;host=tars 0.89 1455320690 cpu_usage_idle;cpu=cpu-total;dc=us-east-1;host=tars 98.09 1455320690 ``` +The `graphite_tag_sanitize_mode` option defines how we should sanitize the tag names and values. Possible values are `strict`, or `compatible`, with the default being `strict`. + +When in `strict` mode Telegraf uses the same rules as metrics when not using tags. +When in `compatible` mode Telegraf allows more characters through, and is based on the Graphite specification: +>Tag names must have a length >= 1 and may contain any ascii characters except `;!^=`. Tag values must also have a length >= 1, they may contain any ascii characters except `;` and the first character must not be `~`. UTF-8 characters may work for names and values, but they are not well tested and it is not recommended to use non-ascii characters in metric names or tags. Metric names get indexed under the special tag name, if a metric name starts with one or multiple ~ they simply get removed from the derived tag value because the ~ character is not allowed to be in the first position of the tag value. If a metric name consists of no other characters than ~, then it is considered invalid and may get dropped. + + + + [templates]: /docs/TEMPLATE_PATTERN.md diff --git a/plugins/serializers/graphite/graphite.go b/plugins/serializers/graphite/graphite.go index f71e97fa4..c6130c7b7 100644 --- a/plugins/serializers/graphite/graphite.go +++ b/plugins/serializers/graphite/graphite.go @@ -16,8 +16,11 @@ import ( const DefaultTemplate = "host.tags.measurement.field" var ( - allowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`) - hyphenChars = strings.NewReplacer( + strictAllowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`) + compatibleAllowedCharsName = regexp.MustCompile(`[^ "-:\<>-\]_a-~\p{L}]`) + compatibleAllowedCharsValue = regexp.MustCompile(`[^ -:<-~\p{L}]`) + compatibleLeadingTildeDrop = regexp.MustCompile(`^[~]*(.*)`) + hyphenChars = strings.NewReplacer( "/", "-", "@", "-", "*", "-", @@ -36,11 +39,12 @@ type GraphiteTemplate struct { } type GraphiteSerializer struct { - Prefix string - Template string - TagSupport bool - Separator string - Templates []*GraphiteTemplate + Prefix string + Template string + TagSupport bool + TagSanitizeMode string + Separator string + Templates []*GraphiteTemplate } func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) { @@ -56,7 +60,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) { if fieldValue == "" { continue } - bucket := SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName) + bucket := SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName, s.TagSanitizeMode) metricString := fmt.Sprintf("%s %s %d\n", // insert "field" section of template bucket, @@ -87,7 +91,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) { } metricString := fmt.Sprintf("%s %s %d\n", // insert "field" section of template - sanitize(InsertField(bucket, fieldName)), + strictSanitize(InsertField(bucket, fieldName)), fieldValue, timestamp) point := []byte(metricString) @@ -248,6 +252,7 @@ func SerializeBucketNameWithTags( prefix string, separator string, field string, + tagSanitizeMode string, ) string { var out string var tagsCopy []string @@ -255,7 +260,11 @@ func SerializeBucketNameWithTags( if k == "name" { k = "_name" } - tagsCopy = append(tagsCopy, sanitize(k+"="+v)) + if tagSanitizeMode == "compatible" { + tagsCopy = append(tagsCopy, compatibleSanitize(k, v)) + } else { + tagsCopy = append(tagsCopy, strictSanitize(k+"="+v)) + } } sort.Strings(tagsCopy) @@ -269,7 +278,7 @@ func SerializeBucketNameWithTags( out += separator + field } - out = sanitize(out) + out = strictSanitize(out) if len(tagsCopy) > 0 { out += ";" + strings.Join(tagsCopy, ";") @@ -308,11 +317,18 @@ func buildTags(tags map[string]string) string { return tagStr } -func sanitize(value string) string { +func strictSanitize(value string) string { // Apply special hyphenation rules to preserve backwards compatibility value = hyphenChars.Replace(value) // Apply rule to drop some chars to preserve backwards compatibility value = dropChars.Replace(value) // Replace any remaining illegal chars - return allowedChars.ReplaceAllLiteralString(value, "_") + return strictAllowedChars.ReplaceAllLiteralString(value, "_") +} + +func compatibleSanitize(name string, value string) string { + name = compatibleAllowedCharsName.ReplaceAllLiteralString(name, "_") + value = compatibleAllowedCharsValue.ReplaceAllLiteralString(value, "_") + value = compatibleLeadingTildeDrop.FindStringSubmatch(value)[1] + return name + "=" + value } diff --git a/plugins/serializers/graphite/graphite_test.go b/plugins/serializers/graphite/graphite_test.go index 0a2e0bd7b..f2fd3b7f1 100644 --- a/plugins/serializers/graphite/graphite_test.go +++ b/plugins/serializers/graphite/graphite_test.go @@ -543,6 +543,32 @@ func TestSerializeTagWithSpacesWithTagSupport(t *testing.T) { assert.Equal(t, expS, mS) } +func TestSerializeTagWithSpacesWithTagSupportCompatibleSanitize(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "host": "localhost", + "cpu": `cpu\ 0`, + "datacenter": "us-west-2", + } + fields := map[string]interface{}{ + `field_with_spaces`: float64(91.5), + } + m := metric.New("cpu", tags, fields, now) + + s := GraphiteSerializer{ + TagSupport: true, + TagSanitizeMode: "compatible", + Separator: ".", + } + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + + expS := []string{ + fmt.Sprintf("cpu.field_with_spaces;cpu=cpu\\ 0;datacenter=us-west-2;host=localhost 91.5 %d", now.Unix()), + } + assert.Equal(t, expS, mS) +} + // test that a field named "value" gets ignored at beginning of template. func TestSerializeValueField3(t *testing.T) { now := time.Now() @@ -942,6 +968,101 @@ func TestCleanWithTagsSupport(t *testing.T) { } } +func TestCleanWithTagsSupportCompatibleSanitize(t *testing.T) { + now := time.Unix(1234567890, 0) + tests := []struct { + name string + metricName string + tags map[string]string + fields map[string]interface{} + expected string + }{ + { + "Base metric", + "cpu", + map[string]string{"host": "localhost"}, + map[string]interface{}{"usage_busy": float64(8.5)}, + "cpu.usage_busy;host=localhost 8.5 1234567890\n", + }, + { + "Dot and whitespace in tags", + "cpu", + map[string]string{"host": "localhost", "label.dot and space": "value with.dot"}, + map[string]interface{}{"usage_busy": float64(8.5)}, + "cpu.usage_busy;host=localhost;label.dot and space=value with.dot 8.5 1234567890\n", + }, + { + "Field with space", + "system", + map[string]string{"host": "localhost"}, + map[string]interface{}{"uptime_format": "20 days, 23:26"}, + "", // yes nothing. graphite don't serialize string fields + }, + { + "Allowed punct", + "cpu", + map[string]string{"host": "localhost", "tag": "-_:=!^~"}, + map[string]interface{}{"usage_busy": float64(10)}, + "cpu.usage_busy;host=localhost;tag=-_:=!^~ 10 1234567890\n", + }, + { + "Special characters preserved", + "cpu", + map[string]string{"host": "localhost", "tag": "/@*"}, + map[string]interface{}{"usage_busy": float64(10)}, + "cpu.usage_busy;host=localhost;tag=/@* 10 1234567890\n", + }, + { + "Special characters preserved 2", + "cpu", + map[string]string{"host": "localhost", "tag": `\no change to slash`}, + map[string]interface{}{"usage_busy": float64(10)}, + "cpu.usage_busy;host=localhost;tag=\\no change to slash 10 1234567890\n", + }, + { + "Empty tag & value field", + "cpu", + map[string]string{"host": "localhost"}, + map[string]interface{}{"value": float64(10)}, + "cpu;host=localhost 10 1234567890\n", + }, + { + "Unicode Letters allowed", + "cpu", + map[string]string{"host": "localhost", "tag": "μnicodε_letters"}, + map[string]interface{}{"value": float64(10)}, + "cpu;host=localhost;tag=μnicodε_letters 10 1234567890\n", + }, + { + "Other Unicode not allowed", + "cpu", + map[string]string{"host": "localhost", "tag": "“☢”"}, + map[string]interface{}{"value": float64(10)}, + "cpu;host=localhost;tag=___ 10 1234567890\n", + }, + { + "Newline in tags", + "cpu", + map[string]string{"host": "localhost", "label": "some\nthing\nwith\nnewline"}, + map[string]interface{}{"usage_busy": float64(8.5)}, + "cpu.usage_busy;host=localhost;label=some_thing_with_newline 8.5 1234567890\n", + }, + } + + s := GraphiteSerializer{ + TagSupport: true, + TagSanitizeMode: "compatible", + Separator: ".", + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := metric.New(tt.metricName, tt.tags, tt.fields, now) + actual, _ := s.Serialize(m) + require.Equal(t, tt.expected, string(actual)) + }) + } +} + func TestSerializeBatch(t *testing.T) { now := time.Unix(1234567890, 0) tests := []struct { diff --git a/plugins/serializers/registry.go b/plugins/serializers/registry.go index 247324d4a..e67a9594d 100644 --- a/plugins/serializers/registry.go +++ b/plugins/serializers/registry.go @@ -59,6 +59,9 @@ type Config struct { // Support tags in graphite protocol GraphiteTagSupport bool `toml:"graphite_tag_support"` + // Support tags which follow the spec + GraphiteTagSanitizeMode string `toml:"graphite_tag_sanitize_mode"` + // Character for separating metric name and field for Graphite tags GraphiteSeparator string `toml:"graphite_separator"` @@ -118,7 +121,7 @@ func NewSerializer(config *Config) (Serializer, error) { case "influx": serializer, err = NewInfluxSerializerConfig(config) case "graphite": - serializer, err = NewGraphiteSerializer(config.Prefix, config.Template, config.GraphiteTagSupport, config.GraphiteSeparator, config.Templates) + serializer, err = NewGraphiteSerializer(config.Prefix, config.Template, config.GraphiteTagSupport, config.GraphiteTagSanitizeMode, config.GraphiteSeparator, config.Templates) case "json": serializer, err = NewJSONSerializer(config.TimestampUnits) case "splunkmetric": @@ -223,7 +226,7 @@ func NewInfluxSerializer() (Serializer, error) { return influx.NewSerializer(), nil } -func NewGraphiteSerializer(prefix, template string, tagSupport bool, separator string, templates []string) (Serializer, error) { +func NewGraphiteSerializer(prefix, template string, tagSupport bool, tagSanitizeMode string, separator string, templates []string) (Serializer, error) { graphiteTemplates, defaultTemplate, err := graphite.InitGraphiteTemplates(templates) if err != nil { @@ -234,16 +237,21 @@ func NewGraphiteSerializer(prefix, template string, tagSupport bool, separator s template = defaultTemplate } + if tagSanitizeMode == "" { + tagSanitizeMode = "strict" + } + if separator == "" { separator = "." } return &graphite.GraphiteSerializer{ - Prefix: prefix, - Template: template, - TagSupport: tagSupport, - Separator: separator, - Templates: graphiteTemplates, + Prefix: prefix, + Template: template, + TagSupport: tagSupport, + TagSanitizeMode: tagSanitizeMode, + Separator: separator, + Templates: graphiteTemplates, }, nil }