fix(serializers.graphite): Allow for specifying regex to sanitize (#12835)

This commit is contained in:
Joshua Powers 2023-03-14 04:46:44 -06:00 committed by GitHub
parent a7d4a59b6a
commit 7284c126ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 115 additions and 33 deletions

View File

@ -1393,9 +1393,10 @@ func (c *Config) buildSerializer(tbl *ast.Table) (serializers.Serializer, error)
c.getFieldInt(tbl, "influx_max_line_bytes", &sc.InfluxMaxLineBytes) c.getFieldInt(tbl, "influx_max_line_bytes", &sc.InfluxMaxLineBytes)
c.getFieldBool(tbl, "influx_sort_fields", &sc.InfluxSortFields) c.getFieldBool(tbl, "influx_sort_fields", &sc.InfluxSortFields)
c.getFieldBool(tbl, "influx_uint_support", &sc.InfluxUintSupport) c.getFieldBool(tbl, "influx_uint_support", &sc.InfluxUintSupport)
c.getFieldString(tbl, "graphite_strict_sanitize_regex", &sc.GraphiteStrictRegex)
c.getFieldBool(tbl, "graphite_tag_support", &sc.GraphiteTagSupport) c.getFieldBool(tbl, "graphite_tag_support", &sc.GraphiteTagSupport)
c.getFieldString(tbl, "graphite_tag_sanitize_mode", &sc.GraphiteTagSanitizeMode) c.getFieldString(tbl, "graphite_tag_sanitize_mode", &sc.GraphiteTagSanitizeMode)
c.getFieldString(tbl, "graphite_separator", &sc.GraphiteSeparator) c.getFieldString(tbl, "graphite_separator", &sc.GraphiteSeparator)
c.getFieldDuration(tbl, "json_timestamp_units", &sc.TimestampUnits) c.getFieldDuration(tbl, "json_timestamp_units", &sc.TimestampUnits)
@ -1485,6 +1486,7 @@ func (c *Config) missingTomlField(_ reflect.Type, key string) error {
case "prefix", "template", "templates", case "prefix", "template", "templates",
"carbon2_format", "carbon2_sanitize_replace_char", "carbon2_format", "carbon2_sanitize_replace_char",
"csv_column_prefix", "csv_header", "csv_separator", "csv_timestamp_format", "csv_column_prefix", "csv_header", "csv_separator", "csv_timestamp_format",
"graphite_strict_sanitize_regex",
"graphite_tag_sanitize_mode", "graphite_tag_support", "graphite_separator", "graphite_tag_sanitize_mode", "graphite_tag_support", "graphite_separator",
"influx_max_line_bytes", "influx_sort_fields", "influx_uint_support", "influx_max_line_bytes", "influx_sort_fields", "influx_uint_support",
"json_timestamp_format", "json_timestamp_units", "json_transformation", "json_timestamp_format", "json_timestamp_units", "json_transformation",

View File

@ -32,12 +32,20 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
template = "host.tags.measurement.field" template = "host.tags.measurement.field"
## Strict sanitization regex
## This is the default sanitization regex that is used on data passed to the
## graphite serializer. Users can add additional characters here if required.
## Be aware that the characters, '/' '@' '*' are always replaced with '_',
## '..' is replaced with '.', and '\' is removed even if added to the
## following regex.
# graphite_strict_sanitize_regex = '[^a-zA-Z0-9-:._=\p{L}]'
## Enable Graphite tags support ## Enable Graphite tags support
# graphite_tag_support = false # graphite_tag_support = false
## Define how metric names and tags are sanitized; options are "strict", or "compatible" ## Applied sanitization mode when graphite tag support is enabled.
## strict - Default method, and backwards compatible with previous versionf of Telegraf ## * strict - uses the regex specified above
## compatible - More relaxed sanitizing when using tags, and compatible with the graphite spec ## * compatible - allows for greater number of characters
# graphite_tag_sanitize_mode = "strict" # graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags ## Character for separating metric name and field for Graphite tags

View File

@ -165,7 +165,7 @@ func (g *Graphite) checkEOF(conn net.Conn) error {
func (g *Graphite) Write(metrics []telegraf.Metric) error { func (g *Graphite) Write(metrics []telegraf.Metric) error {
// Prepare data // Prepare data
var batch []byte var batch []byte
s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, g.GraphiteTagSupport, g.GraphiteTagSanitizeMode, g.GraphiteSeparator, g.Templates) s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, "", g.GraphiteTagSupport, g.GraphiteTagSanitizeMode, g.GraphiteSeparator, g.Templates)
if err != nil { if err != nil {
return err return err
} }

View File

@ -10,12 +10,20 @@
## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md ## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
template = "host.tags.measurement.field" template = "host.tags.measurement.field"
## Strict sanitization regex
## This is the default sanitization regex that is used on data passed to the
## graphite serializer. Users can add additional characters here if required.
## Be aware that the characters, '/' '@' '*' are always replaced with '_',
## '..' is replaced with '.', and '\' is removed even if added to the
## following regex.
# graphite_strict_sanitize_regex = '[^a-zA-Z0-9-:._=\p{L}]'
## Enable Graphite tags support ## Enable Graphite tags support
# graphite_tag_support = false # graphite_tag_support = false
## Define how metric names and tags are sanitized; options are "strict", or "compatible" ## Applied sanitization mode when graphite tag support is enabled.
## strict - Default method, and backwards compatible with previous versionf of Telegraf ## * strict - uses the regex specified above
## compatible - More relaxed sanitizing when using tags, and compatible with the graphite spec ## * compatible - allows for greater number of characters
# graphite_tag_sanitize_mode = "strict" # graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags ## Character for separating metric name and field for Graphite tags

View File

@ -84,7 +84,7 @@ func (i *Instrumental) Write(metrics []telegraf.Metric) error {
} }
} }
s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, false, "strict", ".", i.Templates) s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, "", false, "strict", ".", i.Templates)
if err != nil { if err != nil {
return err return err
} }

View File

@ -35,10 +35,22 @@ method is used, otherwise the [Template Pattern][templates] is used.
# "host.measurement.tags.field" # "host.measurement.tags.field"
#] #]
## Strict sanitization regex
## This is the default sanitization regex that is used on data passed to the
## graphite serializer. Users can add additional characters here if required.
## Be aware that the characters, '/' '@' '*' are always replaced with '_',
## '..' is replaced with '.', and '\' is removed even if added to the
## following regex.
# graphite_strict_sanitize_regex = '[^a-zA-Z0-9-:._=\p{L}]'
## Support Graphite tags, recommended to enable when using Graphite 1.1 or later. ## Support Graphite tags, recommended to enable when using Graphite 1.1 or later.
# graphite_tag_support = false # graphite_tag_support = false
## Enable Graphite tags to support the full list of allowed characters
# graphite_tag_new_sanitize = false ## Applied sanitization mode when graphite tag support is enabled.
## * strict - uses the regex specified above
## * compatible - allows for greater number of characters
# graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags ## Character for separating metric name and field for Graphite tags
# graphite_separator = "." # graphite_separator = "."
``` ```

View File

@ -16,7 +16,6 @@ import (
const DefaultTemplate = "host.tags.measurement.field" const DefaultTemplate = "host.tags.measurement.field"
var ( var (
strictAllowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
compatibleAllowedCharsName = regexp.MustCompile(`[^ "-:\<>-\]_a-~\p{L}]`) compatibleAllowedCharsName = regexp.MustCompile(`[^ "-:\<>-\]_a-~\p{L}]`)
compatibleAllowedCharsValue = regexp.MustCompile(`[^ -:<-~\p{L}]`) compatibleAllowedCharsValue = regexp.MustCompile(`[^ -:<-~\p{L}]`)
compatibleLeadingTildeDrop = regexp.MustCompile(`^[~]*(.*)`) compatibleLeadingTildeDrop = regexp.MustCompile(`^[~]*(.*)`)
@ -39,15 +38,20 @@ type GraphiteTemplate struct {
} }
type GraphiteSerializer struct { type GraphiteSerializer struct {
Prefix string Prefix string `json:"prefix"`
Template string Template string `json:"template"`
TagSupport bool StrictAllowedChars *regexp.Regexp `json:"graphite_strict_sanitize_regex"`
TagSanitizeMode string TagSupport bool `json:"graphite_tag_support"`
Separator string TagSanitizeMode string `json:"graphite_tag_sanitize_mode"`
Templates []*GraphiteTemplate Separator string `json:"graphite_separator"`
Templates []*GraphiteTemplate `json:"templates"`
} }
func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) { func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
if s.StrictAllowedChars == nil {
s.StrictAllowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
}
out := []byte{} out := []byte{}
// Convert UnixNano to Unix timestamps // Convert UnixNano to Unix timestamps
@ -60,7 +64,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
if fieldValue == "" { if fieldValue == "" {
continue continue
} }
bucket := SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName, s.TagSanitizeMode) bucket := s.SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName, s.TagSanitizeMode)
metricString := fmt.Sprintf("%s %s %d\n", metricString := fmt.Sprintf("%s %s %d\n",
// insert "field" section of template // insert "field" section of template
bucket, bucket,
@ -91,7 +95,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
} }
metricString := fmt.Sprintf("%s %s %d\n", metricString := fmt.Sprintf("%s %s %d\n",
// insert "field" section of template // insert "field" section of template
strictSanitize(InsertField(bucket, fieldName)), s.strictSanitize(InsertField(bucket, fieldName)),
fieldValue, fieldValue,
timestamp) timestamp)
point := []byte(metricString) point := []byte(metricString)
@ -245,7 +249,7 @@ func InitGraphiteTemplates(templates []string) ([]*GraphiteTemplate, string, err
// SerializeBucketNameWithTags will take the given measurement name and tags and // SerializeBucketNameWithTags will take the given measurement name and tags and
// produce a graphite bucket. It will use the Graphite11Serializer. // produce a graphite bucket. It will use the Graphite11Serializer.
// http://graphite.readthedocs.io/en/latest/tags.html // http://graphite.readthedocs.io/en/latest/tags.html
func SerializeBucketNameWithTags( func (s *GraphiteSerializer) SerializeBucketNameWithTags(
measurement string, measurement string,
tags map[string]string, tags map[string]string,
prefix string, prefix string,
@ -262,7 +266,7 @@ func SerializeBucketNameWithTags(
if tagSanitizeMode == "compatible" { if tagSanitizeMode == "compatible" {
tagsCopy = append(tagsCopy, compatibleSanitize(k, v)) tagsCopy = append(tagsCopy, compatibleSanitize(k, v))
} else { } else {
tagsCopy = append(tagsCopy, strictSanitize(k+"="+v)) tagsCopy = append(tagsCopy, s.strictSanitize(k+"="+v))
} }
} }
sort.Strings(tagsCopy) sort.Strings(tagsCopy)
@ -277,7 +281,7 @@ func SerializeBucketNameWithTags(
out += separator + field out += separator + field
} }
out = strictSanitize(out) out = s.strictSanitize(out)
if len(tagsCopy) > 0 { if len(tagsCopy) > 0 {
out += ";" + strings.Join(tagsCopy, ";") out += ";" + strings.Join(tagsCopy, ";")
@ -316,13 +320,13 @@ func buildTags(tags map[string]string) string {
return tagStr return tagStr
} }
func strictSanitize(value string) string { func (s *GraphiteSerializer) strictSanitize(value string) string {
// Apply special hyphenation rules to preserve backwards compatibility // Apply special hyphenation rules to preserve backwards compatibility
value = hyphenChars.Replace(value) value = hyphenChars.Replace(value)
// Apply rule to drop some chars to preserve backwards compatibility // Apply rule to drop some chars to preserve backwards compatibility
value = dropChars.Replace(value) value = dropChars.Replace(value)
// Replace any remaining illegal chars // Replace any remaining illegal chars
return strictAllowedChars.ReplaceAllLiteralString(value, "_") return s.StrictAllowedChars.ReplaceAllLiteralString(value, "_")
} }
func compatibleSanitize(name string, value string) string { func compatibleSanitize(name string, value string) string {

View File

@ -2,6 +2,7 @@ package graphite
import ( import (
"fmt" "fmt"
"regexp"
"sort" "sort"
"strings" "strings"
"testing" "testing"
@ -674,6 +675,31 @@ func TestSerializeMetricPrefixWithTagSupport(t *testing.T) {
require.Equal(t, expS, mS) require.Equal(t, expS, mS)
} }
// test that a custom regex allowing `|` works
func TestSerializeCustomRegex(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"cpu": "cpu0",
"datacenter": "|us-west-2|",
}
fields := map[string]interface{}{
"value": float64(91.5),
}
m := metric.New("cpu", tags, fields, now)
s := GraphiteSerializer{
StrictAllowedChars: regexp.MustCompile(`[^a-zA-Z0-9-:._=|\p{L}]`),
}
buf, _ := s.Serialize(m)
mS := strings.Split(strings.TrimSpace(string(buf)), "\n")
expS := []string{
fmt.Sprintf("localhost.cpu0.|us-west-2|.cpu 91.5 %d", now.Unix()),
}
require.Equal(t, expS, mS)
}
func TestSerializeBucketNameNoHost(t *testing.T) { func TestSerializeBucketNameNoHost(t *testing.T) {
now := time.Now() now := time.Now()
tags := map[string]string{ tags := map[string]string{

View File

@ -2,6 +2,7 @@ package serializers
import ( import (
"fmt" "fmt"
"regexp"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
@ -75,6 +76,9 @@ type Config struct {
// Character for separating metric name and field for Graphite tags // Character for separating metric name and field for Graphite tags
GraphiteSeparator string `toml:"graphite_separator"` GraphiteSeparator string `toml:"graphite_separator"`
// Regex string
GraphiteStrictRegex string `toml:"graphite_strict_sanitize_regex"`
// Maximum line length in bytes; influx format only // Maximum line length in bytes; influx format only
InfluxMaxLineBytes int `toml:"influx_max_line_bytes"` InfluxMaxLineBytes int `toml:"influx_max_line_bytes"`
@ -155,6 +159,7 @@ func NewSerializer(config *Config) (Serializer, error) {
serializer, err = NewGraphiteSerializer( serializer, err = NewGraphiteSerializer(
config.Prefix, config.Prefix,
config.Template, config.Template,
config.GraphiteStrictRegex,
config.GraphiteTagSupport, config.GraphiteTagSupport,
config.GraphiteTagSanitizeMode, config.GraphiteTagSanitizeMode,
config.GraphiteSeparator, config.GraphiteSeparator,
@ -280,9 +285,17 @@ func NewInfluxSerializer() Serializer {
return influx.NewSerializer() return influx.NewSerializer()
} }
func NewGraphiteSerializer(prefix, template string, tagSupport bool, tagSanitizeMode string, separator string, templates []string) (Serializer, error) { //nolint:revive //argument-limit conditionally more arguments allowed
func NewGraphiteSerializer(
prefix,
template string,
strictRegex string,
tagSupport bool,
tagSanitizeMode string,
separator string,
templates []string,
) (Serializer, error) {
graphiteTemplates, defaultTemplate, err := graphite.InitGraphiteTemplates(templates) graphiteTemplates, defaultTemplate, err := graphite.InitGraphiteTemplates(templates)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -299,13 +312,22 @@ func NewGraphiteSerializer(prefix, template string, tagSupport bool, tagSanitize
separator = "." separator = "."
} }
strictAllowedChars := regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
if strictRegex != "" {
strictAllowedChars, err = regexp.Compile(strictRegex)
if err != nil {
return nil, fmt.Errorf("invalid regex provided %q: %w", strictRegex, err)
}
}
return &graphite.GraphiteSerializer{ return &graphite.GraphiteSerializer{
Prefix: prefix, Prefix: prefix,
Template: template, Template: template,
TagSupport: tagSupport, StrictAllowedChars: strictAllowedChars,
TagSanitizeMode: tagSanitizeMode, TagSupport: tagSupport,
Separator: separator, TagSanitizeMode: tagSanitizeMode,
Templates: graphiteTemplates, Separator: separator,
Templates: graphiteTemplates,
}, nil }, nil
} }