Allow more characters in graphite tags (#9249)

This commit is contained in:
Greg Linton 2021-05-18 12:29:30 -04:00 committed by GitHub
parent ae7d31996b
commit df47b41668
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 193 additions and 29 deletions

View File

@ -1400,6 +1400,8 @@ func (c *Config) buildSerializer(tbl *ast.Table) (serializers.Serializer, error)
c.getFieldBool(tbl, "influx_sort_fields", &sc.InfluxSortFields) c.getFieldBool(tbl, "influx_sort_fields", &sc.InfluxSortFields)
c.getFieldBool(tbl, "influx_uint_support", &sc.InfluxUintSupport) c.getFieldBool(tbl, "influx_uint_support", &sc.InfluxUintSupport)
c.getFieldBool(tbl, "graphite_tag_support", &sc.GraphiteTagSupport) c.getFieldBool(tbl, "graphite_tag_support", &sc.GraphiteTagSupport)
c.getFieldString(tbl, "graphite_tag_sanitize_mode", &sc.GraphiteTagSanitizeMode)
c.getFieldString(tbl, "graphite_separator", &sc.GraphiteSeparator) c.getFieldString(tbl, "graphite_separator", &sc.GraphiteSeparator)
c.getFieldDuration(tbl, "json_timestamp_units", &sc.TimestampUnits) c.getFieldDuration(tbl, "json_timestamp_units", &sc.TimestampUnits)
@ -1464,11 +1466,11 @@ func (c *Config) missingTomlField(_ reflect.Type, key string) error {
"data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path", "data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path",
"dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path", "dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path",
"fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys", "fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys",
"grace", "graphite_separator", "graphite_tag_support", "grok_custom_pattern_files", "grace", "graphite_separator", "graphite_tag_sanitize_mode", "graphite_tag_support",
"grok_custom_patterns", "grok_named_patterns", "grok_patterns", "grok_timezone", "grok_custom_pattern_files", "grok_custom_patterns", "grok_named_patterns", "grok_patterns",
"grok_unique_timestamp", "influx_max_line_bytes", "influx_sort_fields", "influx_uint_support", "grok_timezone", "grok_unique_timestamp", "influx_max_line_bytes", "influx_sort_fields",
"interval", "json_name_key", "json_query", "json_strict", "json_string_fields", "influx_uint_support", "interval", "json_name_key", "json_query", "json_strict",
"json_time_format", "json_time_key", "json_timestamp_units", "json_timezone", "json_string_fields", "json_time_format", "json_time_key", "json_timestamp_units", "json_timezone",
"metric_batch_size", "metric_buffer_limit", "name_override", "name_prefix", "metric_batch_size", "metric_buffer_limit", "name_override", "name_prefix",
"name_suffix", "namedrop", "namepass", "order", "pass", "period", "precision", "name_suffix", "namedrop", "namepass", "order", "pass", "period", "precision",
"prefix", "prometheus_export_timestamp", "prometheus_sort_metrics", "prometheus_string_as_label", "prefix", "prometheus_export_timestamp", "prometheus_sort_metrics", "prometheus_string_as_label",

View File

@ -15,8 +15,9 @@ import (
) )
type Graphite struct { type Graphite struct {
GraphiteTagSupport bool `toml:"graphite_tag_support"` GraphiteTagSupport bool `toml:"graphite_tag_support"`
GraphiteSeparator string `toml:"graphite_separator"` GraphiteTagSanitizeMode string `toml:"graphite_tag_sanitize_mode"`
GraphiteSeparator string `toml:"graphite_separator"`
// URL is only for backwards compatibility // URL is only for backwards compatibility
Servers []string `toml:"servers"` Servers []string `toml:"servers"`
Prefix string `toml:"prefix"` Prefix string `toml:"prefix"`
@ -43,6 +44,11 @@ var sampleConfig = `
## Enable Graphite tags support ## Enable Graphite tags support
# graphite_tag_support = false # graphite_tag_support = false
## Define how metric names and tags are sanitized; options are "strict", or "compatible"
## strict - Default method, and backwards compatible with previous versionf of Telegraf
## compatible - More relaxed sanitizing when using tags, and compatible with the graphite spec
# graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags ## Character for separating metric name and field for Graphite tags
# graphite_separator = "." # graphite_separator = "."
@ -150,7 +156,7 @@ func (g *Graphite) checkEOF(conn net.Conn) {
func (g *Graphite) Write(metrics []telegraf.Metric) error { func (g *Graphite) Write(metrics []telegraf.Metric) error {
// Prepare data // Prepare data
var batch []byte var batch []byte
s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, g.GraphiteTagSupport, g.GraphiteSeparator, g.Templates) s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, g.GraphiteTagSupport, g.GraphiteTagSanitizeMode, g.GraphiteSeparator, g.Templates)
if err != nil { if err != nil {
return err return err
} }

View File

@ -88,7 +88,7 @@ func (i *Instrumental) Write(metrics []telegraf.Metric) error {
} }
} }
s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, false, ".", i.Templates) s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, false, "strict", ".", i.Templates)
if err != nil { if err != nil {
return err return err
} }

View File

@ -35,6 +35,8 @@ method is used, otherwise the [Template Pattern](templates) is used.
## Support Graphite tags, recommended to enable when using Graphite 1.1 or later. ## Support Graphite tags, recommended to enable when using Graphite 1.1 or later.
# graphite_tag_support = false # graphite_tag_support = false
## Enable Graphite tags to support the full list of allowed characters
# graphite_tag_new_sanitize = false
## Character for separating metric name and field for Graphite tags ## Character for separating metric name and field for Graphite tags
# graphite_separator = "." # graphite_separator = "."
``` ```
@ -64,4 +66,13 @@ cpu_usage_user;cpu=cpu-total;dc=us-east-1;host=tars 0.89 1455320690
cpu_usage_idle;cpu=cpu-total;dc=us-east-1;host=tars 98.09 1455320690 cpu_usage_idle;cpu=cpu-total;dc=us-east-1;host=tars 98.09 1455320690
``` ```
The `graphite_tag_sanitize_mode` option defines how we should sanitize the tag names and values. Possible values are `strict`, or `compatible`, with the default being `strict`.
When in `strict` mode Telegraf uses the same rules as metrics when not using tags.
When in `compatible` mode Telegraf allows more characters through, and is based on the Graphite specification:
>Tag names must have a length >= 1 and may contain any ascii characters except `;!^=`. Tag values must also have a length >= 1, they may contain any ascii characters except `;` and the first character must not be `~`. UTF-8 characters may work for names and values, but they are not well tested and it is not recommended to use non-ascii characters in metric names or tags. Metric names get indexed under the special tag name, if a metric name starts with one or multiple ~ they simply get removed from the derived tag value because the ~ character is not allowed to be in the first position of the tag value. If a metric name consists of no other characters than ~, then it is considered invalid and may get dropped.
[templates]: /docs/TEMPLATE_PATTERN.md [templates]: /docs/TEMPLATE_PATTERN.md

View File

@ -16,8 +16,11 @@ import (
const DefaultTemplate = "host.tags.measurement.field" const DefaultTemplate = "host.tags.measurement.field"
var ( var (
allowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`) strictAllowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
hyphenChars = strings.NewReplacer( compatibleAllowedCharsName = regexp.MustCompile(`[^ "-:\<>-\]_a-~\p{L}]`)
compatibleAllowedCharsValue = regexp.MustCompile(`[^ -:<-~\p{L}]`)
compatibleLeadingTildeDrop = regexp.MustCompile(`^[~]*(.*)`)
hyphenChars = strings.NewReplacer(
"/", "-", "/", "-",
"@", "-", "@", "-",
"*", "-", "*", "-",
@ -36,11 +39,12 @@ type GraphiteTemplate struct {
} }
type GraphiteSerializer struct { type GraphiteSerializer struct {
Prefix string Prefix string
Template string Template string
TagSupport bool TagSupport bool
Separator string TagSanitizeMode string
Templates []*GraphiteTemplate Separator string
Templates []*GraphiteTemplate
} }
func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) { func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
@ -56,7 +60,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
if fieldValue == "" { if fieldValue == "" {
continue continue
} }
bucket := SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName) bucket := SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName, s.TagSanitizeMode)
metricString := fmt.Sprintf("%s %s %d\n", metricString := fmt.Sprintf("%s %s %d\n",
// insert "field" section of template // insert "field" section of template
bucket, bucket,
@ -87,7 +91,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
} }
metricString := fmt.Sprintf("%s %s %d\n", metricString := fmt.Sprintf("%s %s %d\n",
// insert "field" section of template // insert "field" section of template
sanitize(InsertField(bucket, fieldName)), strictSanitize(InsertField(bucket, fieldName)),
fieldValue, fieldValue,
timestamp) timestamp)
point := []byte(metricString) point := []byte(metricString)
@ -248,6 +252,7 @@ func SerializeBucketNameWithTags(
prefix string, prefix string,
separator string, separator string,
field string, field string,
tagSanitizeMode string,
) string { ) string {
var out string var out string
var tagsCopy []string var tagsCopy []string
@ -255,7 +260,11 @@ func SerializeBucketNameWithTags(
if k == "name" { if k == "name" {
k = "_name" k = "_name"
} }
tagsCopy = append(tagsCopy, sanitize(k+"="+v)) if tagSanitizeMode == "compatible" {
tagsCopy = append(tagsCopy, compatibleSanitize(k, v))
} else {
tagsCopy = append(tagsCopy, strictSanitize(k+"="+v))
}
} }
sort.Strings(tagsCopy) sort.Strings(tagsCopy)
@ -269,7 +278,7 @@ func SerializeBucketNameWithTags(
out += separator + field out += separator + field
} }
out = sanitize(out) out = strictSanitize(out)
if len(tagsCopy) > 0 { if len(tagsCopy) > 0 {
out += ";" + strings.Join(tagsCopy, ";") out += ";" + strings.Join(tagsCopy, ";")
@ -308,11 +317,18 @@ func buildTags(tags map[string]string) string {
return tagStr return tagStr
} }
func sanitize(value string) string { func strictSanitize(value string) string {
// Apply special hyphenation rules to preserve backwards compatibility // Apply special hyphenation rules to preserve backwards compatibility
value = hyphenChars.Replace(value) value = hyphenChars.Replace(value)
// Apply rule to drop some chars to preserve backwards compatibility // Apply rule to drop some chars to preserve backwards compatibility
value = dropChars.Replace(value) value = dropChars.Replace(value)
// Replace any remaining illegal chars // Replace any remaining illegal chars
return allowedChars.ReplaceAllLiteralString(value, "_") return strictAllowedChars.ReplaceAllLiteralString(value, "_")
}
func compatibleSanitize(name string, value string) string {
name = compatibleAllowedCharsName.ReplaceAllLiteralString(name, "_")
value = compatibleAllowedCharsValue.ReplaceAllLiteralString(value, "_")
value = compatibleLeadingTildeDrop.FindStringSubmatch(value)[1]
return name + "=" + value
} }

View File

@ -543,6 +543,32 @@ func TestSerializeTagWithSpacesWithTagSupport(t *testing.T) {
assert.Equal(t, expS, mS) assert.Equal(t, expS, mS)
} }
func TestSerializeTagWithSpacesWithTagSupportCompatibleSanitize(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"cpu": `cpu\ 0`,
"datacenter": "us-west-2",
}
fields := map[string]interface{}{
`field_with_spaces`: float64(91.5),
}
m := metric.New("cpu", tags, fields, now)
s := GraphiteSerializer{
TagSupport: true,
TagSanitizeMode: "compatible",
Separator: ".",
}
buf, _ := s.Serialize(m)
mS := strings.Split(strings.TrimSpace(string(buf)), "\n")
expS := []string{
fmt.Sprintf("cpu.field_with_spaces;cpu=cpu\\ 0;datacenter=us-west-2;host=localhost 91.5 %d", now.Unix()),
}
assert.Equal(t, expS, mS)
}
// test that a field named "value" gets ignored at beginning of template. // test that a field named "value" gets ignored at beginning of template.
func TestSerializeValueField3(t *testing.T) { func TestSerializeValueField3(t *testing.T) {
now := time.Now() now := time.Now()
@ -942,6 +968,101 @@ func TestCleanWithTagsSupport(t *testing.T) {
} }
} }
func TestCleanWithTagsSupportCompatibleSanitize(t *testing.T) {
now := time.Unix(1234567890, 0)
tests := []struct {
name string
metricName string
tags map[string]string
fields map[string]interface{}
expected string
}{
{
"Base metric",
"cpu",
map[string]string{"host": "localhost"},
map[string]interface{}{"usage_busy": float64(8.5)},
"cpu.usage_busy;host=localhost 8.5 1234567890\n",
},
{
"Dot and whitespace in tags",
"cpu",
map[string]string{"host": "localhost", "label.dot and space": "value with.dot"},
map[string]interface{}{"usage_busy": float64(8.5)},
"cpu.usage_busy;host=localhost;label.dot and space=value with.dot 8.5 1234567890\n",
},
{
"Field with space",
"system",
map[string]string{"host": "localhost"},
map[string]interface{}{"uptime_format": "20 days, 23:26"},
"", // yes nothing. graphite don't serialize string fields
},
{
"Allowed punct",
"cpu",
map[string]string{"host": "localhost", "tag": "-_:=!^~"},
map[string]interface{}{"usage_busy": float64(10)},
"cpu.usage_busy;host=localhost;tag=-_:=!^~ 10 1234567890\n",
},
{
"Special characters preserved",
"cpu",
map[string]string{"host": "localhost", "tag": "/@*"},
map[string]interface{}{"usage_busy": float64(10)},
"cpu.usage_busy;host=localhost;tag=/@* 10 1234567890\n",
},
{
"Special characters preserved 2",
"cpu",
map[string]string{"host": "localhost", "tag": `\no change to slash`},
map[string]interface{}{"usage_busy": float64(10)},
"cpu.usage_busy;host=localhost;tag=\\no change to slash 10 1234567890\n",
},
{
"Empty tag & value field",
"cpu",
map[string]string{"host": "localhost"},
map[string]interface{}{"value": float64(10)},
"cpu;host=localhost 10 1234567890\n",
},
{
"Unicode Letters allowed",
"cpu",
map[string]string{"host": "localhost", "tag": "μnicodε_letters"},
map[string]interface{}{"value": float64(10)},
"cpu;host=localhost;tag=μnicodε_letters 10 1234567890\n",
},
{
"Other Unicode not allowed",
"cpu",
map[string]string{"host": "localhost", "tag": "“☢”"},
map[string]interface{}{"value": float64(10)},
"cpu;host=localhost;tag=___ 10 1234567890\n",
},
{
"Newline in tags",
"cpu",
map[string]string{"host": "localhost", "label": "some\nthing\nwith\nnewline"},
map[string]interface{}{"usage_busy": float64(8.5)},
"cpu.usage_busy;host=localhost;label=some_thing_with_newline 8.5 1234567890\n",
},
}
s := GraphiteSerializer{
TagSupport: true,
TagSanitizeMode: "compatible",
Separator: ".",
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m := metric.New(tt.metricName, tt.tags, tt.fields, now)
actual, _ := s.Serialize(m)
require.Equal(t, tt.expected, string(actual))
})
}
}
func TestSerializeBatch(t *testing.T) { func TestSerializeBatch(t *testing.T) {
now := time.Unix(1234567890, 0) now := time.Unix(1234567890, 0)
tests := []struct { tests := []struct {

View File

@ -59,6 +59,9 @@ type Config struct {
// Support tags in graphite protocol // Support tags in graphite protocol
GraphiteTagSupport bool `toml:"graphite_tag_support"` GraphiteTagSupport bool `toml:"graphite_tag_support"`
// Support tags which follow the spec
GraphiteTagSanitizeMode string `toml:"graphite_tag_sanitize_mode"`
// Character for separating metric name and field for Graphite tags // Character for separating metric name and field for Graphite tags
GraphiteSeparator string `toml:"graphite_separator"` GraphiteSeparator string `toml:"graphite_separator"`
@ -118,7 +121,7 @@ func NewSerializer(config *Config) (Serializer, error) {
case "influx": case "influx":
serializer, err = NewInfluxSerializerConfig(config) serializer, err = NewInfluxSerializerConfig(config)
case "graphite": case "graphite":
serializer, err = NewGraphiteSerializer(config.Prefix, config.Template, config.GraphiteTagSupport, config.GraphiteSeparator, config.Templates) serializer, err = NewGraphiteSerializer(config.Prefix, config.Template, config.GraphiteTagSupport, config.GraphiteTagSanitizeMode, config.GraphiteSeparator, config.Templates)
case "json": case "json":
serializer, err = NewJSONSerializer(config.TimestampUnits) serializer, err = NewJSONSerializer(config.TimestampUnits)
case "splunkmetric": case "splunkmetric":
@ -223,7 +226,7 @@ func NewInfluxSerializer() (Serializer, error) {
return influx.NewSerializer(), nil return influx.NewSerializer(), nil
} }
func NewGraphiteSerializer(prefix, template string, tagSupport bool, separator string, templates []string) (Serializer, error) { func NewGraphiteSerializer(prefix, template string, tagSupport bool, tagSanitizeMode string, separator string, templates []string) (Serializer, error) {
graphiteTemplates, defaultTemplate, err := graphite.InitGraphiteTemplates(templates) graphiteTemplates, defaultTemplate, err := graphite.InitGraphiteTemplates(templates)
if err != nil { if err != nil {
@ -234,16 +237,21 @@ func NewGraphiteSerializer(prefix, template string, tagSupport bool, separator s
template = defaultTemplate template = defaultTemplate
} }
if tagSanitizeMode == "" {
tagSanitizeMode = "strict"
}
if separator == "" { if separator == "" {
separator = "." separator = "."
} }
return &graphite.GraphiteSerializer{ return &graphite.GraphiteSerializer{
Prefix: prefix, Prefix: prefix,
Template: template, Template: template,
TagSupport: tagSupport, TagSupport: tagSupport,
Separator: separator, TagSanitizeMode: tagSanitizeMode,
Templates: graphiteTemplates, Separator: separator,
Templates: graphiteTemplates,
}, nil }, nil
} }