fix(serializers.graphite): Allow for specifying regex to sanitize (#12835)

This commit is contained in:
Joshua Powers 2023-03-14 04:46:44 -06:00 committed by GitHub
parent a7d4a59b6a
commit 7284c126ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 115 additions and 33 deletions

View File

@ -1393,9 +1393,10 @@ func (c *Config) buildSerializer(tbl *ast.Table) (serializers.Serializer, error)
c.getFieldInt(tbl, "influx_max_line_bytes", &sc.InfluxMaxLineBytes)
c.getFieldBool(tbl, "influx_sort_fields", &sc.InfluxSortFields)
c.getFieldBool(tbl, "influx_uint_support", &sc.InfluxUintSupport)
c.getFieldString(tbl, "graphite_strict_sanitize_regex", &sc.GraphiteStrictRegex)
c.getFieldBool(tbl, "graphite_tag_support", &sc.GraphiteTagSupport)
c.getFieldString(tbl, "graphite_tag_sanitize_mode", &sc.GraphiteTagSanitizeMode)
c.getFieldString(tbl, "graphite_separator", &sc.GraphiteSeparator)
c.getFieldDuration(tbl, "json_timestamp_units", &sc.TimestampUnits)
@ -1485,6 +1486,7 @@ func (c *Config) missingTomlField(_ reflect.Type, key string) error {
case "prefix", "template", "templates",
"carbon2_format", "carbon2_sanitize_replace_char",
"csv_column_prefix", "csv_header", "csv_separator", "csv_timestamp_format",
"graphite_strict_sanitize_regex",
"graphite_tag_sanitize_mode", "graphite_tag_support", "graphite_separator",
"influx_max_line_bytes", "influx_sort_fields", "influx_uint_support",
"json_timestamp_format", "json_timestamp_units", "json_transformation",

View File

@ -32,12 +32,20 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
template = "host.tags.measurement.field"
## Strict sanitization regex
## This is the default sanitization regex that is used on data passed to the
## graphite serializer. Users can add additional characters here if required.
## Be aware that the characters, '/' '@' '*' are always replaced with '_',
## '..' is replaced with '.', and '\' is removed even if added to the
## following regex.
# graphite_strict_sanitize_regex = '[^a-zA-Z0-9-:._=\p{L}]'
## Enable Graphite tags support
# graphite_tag_support = false
## Define how metric names and tags are sanitized; options are "strict", or "compatible"
## strict - Default method, and backwards compatible with previous versionf of Telegraf
## compatible - More relaxed sanitizing when using tags, and compatible with the graphite spec
## Applied sanitization mode when graphite tag support is enabled.
## * strict - uses the regex specified above
## * compatible - allows for greater number of characters
# graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags

View File

@ -165,7 +165,7 @@ func (g *Graphite) checkEOF(conn net.Conn) error {
func (g *Graphite) Write(metrics []telegraf.Metric) error {
// Prepare data
var batch []byte
s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, g.GraphiteTagSupport, g.GraphiteTagSanitizeMode, g.GraphiteSeparator, g.Templates)
s, err := serializers.NewGraphiteSerializer(g.Prefix, g.Template, "", g.GraphiteTagSupport, g.GraphiteTagSanitizeMode, g.GraphiteSeparator, g.Templates)
if err != nil {
return err
}

View File

@ -10,12 +10,20 @@
## see https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md
template = "host.tags.measurement.field"
## Strict sanitization regex
## This is the default sanitization regex that is used on data passed to the
## graphite serializer. Users can add additional characters here if required.
## Be aware that the characters, '/' '@' '*' are always replaced with '_',
## '..' is replaced with '.', and '\' is removed even if added to the
## following regex.
# graphite_strict_sanitize_regex = '[^a-zA-Z0-9-:._=\p{L}]'
## Enable Graphite tags support
# graphite_tag_support = false
## Define how metric names and tags are sanitized; options are "strict", or "compatible"
## strict - Default method, and backwards compatible with previous versionf of Telegraf
## compatible - More relaxed sanitizing when using tags, and compatible with the graphite spec
## Applied sanitization mode when graphite tag support is enabled.
## * strict - uses the regex specified above
## * compatible - allows for greater number of characters
# graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags

View File

@ -84,7 +84,7 @@ func (i *Instrumental) Write(metrics []telegraf.Metric) error {
}
}
s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, false, "strict", ".", i.Templates)
s, err := serializers.NewGraphiteSerializer(i.Prefix, i.Template, "", false, "strict", ".", i.Templates)
if err != nil {
return err
}

View File

@ -35,10 +35,22 @@ method is used, otherwise the [Template Pattern][templates] is used.
# "host.measurement.tags.field"
#]
## Strict sanitization regex
## This is the default sanitization regex that is used on data passed to the
## graphite serializer. Users can add additional characters here if required.
## Be aware that the characters, '/' '@' '*' are always replaced with '_',
## '..' is replaced with '.', and '\' is removed even if added to the
## following regex.
# graphite_strict_sanitize_regex = '[^a-zA-Z0-9-:._=\p{L}]'
## Support Graphite tags, recommended to enable when using Graphite 1.1 or later.
# graphite_tag_support = false
## Enable Graphite tags to support the full list of allowed characters
# graphite_tag_new_sanitize = false
## Applied sanitization mode when graphite tag support is enabled.
## * strict - uses the regex specified above
## * compatible - allows for greater number of characters
# graphite_tag_sanitize_mode = "strict"
## Character for separating metric name and field for Graphite tags
# graphite_separator = "."
```

View File

@ -16,7 +16,6 @@ import (
const DefaultTemplate = "host.tags.measurement.field"
var (
strictAllowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
compatibleAllowedCharsName = regexp.MustCompile(`[^ "-:\<>-\]_a-~\p{L}]`)
compatibleAllowedCharsValue = regexp.MustCompile(`[^ -:<-~\p{L}]`)
compatibleLeadingTildeDrop = regexp.MustCompile(`^[~]*(.*)`)
@ -39,15 +38,20 @@ type GraphiteTemplate struct {
}
type GraphiteSerializer struct {
Prefix string
Template string
TagSupport bool
TagSanitizeMode string
Separator string
Templates []*GraphiteTemplate
Prefix string `json:"prefix"`
Template string `json:"template"`
StrictAllowedChars *regexp.Regexp `json:"graphite_strict_sanitize_regex"`
TagSupport bool `json:"graphite_tag_support"`
TagSanitizeMode string `json:"graphite_tag_sanitize_mode"`
Separator string `json:"graphite_separator"`
Templates []*GraphiteTemplate `json:"templates"`
}
func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
if s.StrictAllowedChars == nil {
s.StrictAllowedChars = regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
}
out := []byte{}
// Convert UnixNano to Unix timestamps
@ -60,7 +64,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
if fieldValue == "" {
continue
}
bucket := SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName, s.TagSanitizeMode)
bucket := s.SerializeBucketNameWithTags(metric.Name(), metric.Tags(), s.Prefix, s.Separator, fieldName, s.TagSanitizeMode)
metricString := fmt.Sprintf("%s %s %d\n",
// insert "field" section of template
bucket,
@ -91,7 +95,7 @@ func (s *GraphiteSerializer) Serialize(metric telegraf.Metric) ([]byte, error) {
}
metricString := fmt.Sprintf("%s %s %d\n",
// insert "field" section of template
strictSanitize(InsertField(bucket, fieldName)),
s.strictSanitize(InsertField(bucket, fieldName)),
fieldValue,
timestamp)
point := []byte(metricString)
@ -245,7 +249,7 @@ func InitGraphiteTemplates(templates []string) ([]*GraphiteTemplate, string, err
// SerializeBucketNameWithTags will take the given measurement name and tags and
// produce a graphite bucket. It will use the Graphite11Serializer.
// http://graphite.readthedocs.io/en/latest/tags.html
func SerializeBucketNameWithTags(
func (s *GraphiteSerializer) SerializeBucketNameWithTags(
measurement string,
tags map[string]string,
prefix string,
@ -262,7 +266,7 @@ func SerializeBucketNameWithTags(
if tagSanitizeMode == "compatible" {
tagsCopy = append(tagsCopy, compatibleSanitize(k, v))
} else {
tagsCopy = append(tagsCopy, strictSanitize(k+"="+v))
tagsCopy = append(tagsCopy, s.strictSanitize(k+"="+v))
}
}
sort.Strings(tagsCopy)
@ -277,7 +281,7 @@ func SerializeBucketNameWithTags(
out += separator + field
}
out = strictSanitize(out)
out = s.strictSanitize(out)
if len(tagsCopy) > 0 {
out += ";" + strings.Join(tagsCopy, ";")
@ -316,13 +320,13 @@ func buildTags(tags map[string]string) string {
return tagStr
}
func strictSanitize(value string) string {
func (s *GraphiteSerializer) strictSanitize(value string) string {
// Apply special hyphenation rules to preserve backwards compatibility
value = hyphenChars.Replace(value)
// Apply rule to drop some chars to preserve backwards compatibility
value = dropChars.Replace(value)
// Replace any remaining illegal chars
return strictAllowedChars.ReplaceAllLiteralString(value, "_")
return s.StrictAllowedChars.ReplaceAllLiteralString(value, "_")
}
func compatibleSanitize(name string, value string) string {

View File

@ -2,6 +2,7 @@ package graphite
import (
"fmt"
"regexp"
"sort"
"strings"
"testing"
@ -674,6 +675,31 @@ func TestSerializeMetricPrefixWithTagSupport(t *testing.T) {
require.Equal(t, expS, mS)
}
// test that a custom regex allowing `|` works
func TestSerializeCustomRegex(t *testing.T) {
now := time.Now()
tags := map[string]string{
"host": "localhost",
"cpu": "cpu0",
"datacenter": "|us-west-2|",
}
fields := map[string]interface{}{
"value": float64(91.5),
}
m := metric.New("cpu", tags, fields, now)
s := GraphiteSerializer{
StrictAllowedChars: regexp.MustCompile(`[^a-zA-Z0-9-:._=|\p{L}]`),
}
buf, _ := s.Serialize(m)
mS := strings.Split(strings.TrimSpace(string(buf)), "\n")
expS := []string{
fmt.Sprintf("localhost.cpu0.|us-west-2|.cpu 91.5 %d", now.Unix()),
}
require.Equal(t, expS, mS)
}
func TestSerializeBucketNameNoHost(t *testing.T) {
now := time.Now()
tags := map[string]string{

View File

@ -2,6 +2,7 @@ package serializers
import (
"fmt"
"regexp"
"time"
"github.com/influxdata/telegraf"
@ -75,6 +76,9 @@ type Config struct {
// Character for separating metric name and field for Graphite tags
GraphiteSeparator string `toml:"graphite_separator"`
// Regex string
GraphiteStrictRegex string `toml:"graphite_strict_sanitize_regex"`
// Maximum line length in bytes; influx format only
InfluxMaxLineBytes int `toml:"influx_max_line_bytes"`
@ -155,6 +159,7 @@ func NewSerializer(config *Config) (Serializer, error) {
serializer, err = NewGraphiteSerializer(
config.Prefix,
config.Template,
config.GraphiteStrictRegex,
config.GraphiteTagSupport,
config.GraphiteTagSanitizeMode,
config.GraphiteSeparator,
@ -280,9 +285,17 @@ func NewInfluxSerializer() Serializer {
return influx.NewSerializer()
}
func NewGraphiteSerializer(prefix, template string, tagSupport bool, tagSanitizeMode string, separator string, templates []string) (Serializer, error) {
//nolint:revive //argument-limit conditionally more arguments allowed
func NewGraphiteSerializer(
prefix,
template string,
strictRegex string,
tagSupport bool,
tagSanitizeMode string,
separator string,
templates []string,
) (Serializer, error) {
graphiteTemplates, defaultTemplate, err := graphite.InitGraphiteTemplates(templates)
if err != nil {
return nil, err
}
@ -299,13 +312,22 @@ func NewGraphiteSerializer(prefix, template string, tagSupport bool, tagSanitize
separator = "."
}
strictAllowedChars := regexp.MustCompile(`[^a-zA-Z0-9-:._=\p{L}]`)
if strictRegex != "" {
strictAllowedChars, err = regexp.Compile(strictRegex)
if err != nil {
return nil, fmt.Errorf("invalid regex provided %q: %w", strictRegex, err)
}
}
return &graphite.GraphiteSerializer{
Prefix: prefix,
Template: template,
TagSupport: tagSupport,
TagSanitizeMode: tagSanitizeMode,
Separator: separator,
Templates: graphiteTemplates,
Prefix: prefix,
Template: template,
StrictAllowedChars: strictAllowedChars,
TagSupport: tagSupport,
TagSanitizeMode: tagSanitizeMode,
Separator: separator,
Templates: graphiteTemplates,
}, nil
}