diff --git a/plugins/processors/strings/README.md b/plugins/processors/strings/README.md index a7aa0e2a5..e0fcec910 100644 --- a/plugins/processors/strings/README.md +++ b/plugins/processors/strings/README.md @@ -14,6 +14,7 @@ Implemented functions are: - replace - left - base64decode +- valid_utf8 Please note that in this implementation these are processed in the order that they appear above. @@ -78,6 +79,12 @@ If you'd like to apply multiple processings to the same `tag_key` or `field_key` ## Decode a base64 encoded utf-8 string # [[processors.strings.base64decode]] # field = "message" + + ## Sanitize a string to ensure it is a valid utf-8 string + ## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty + # [[processors.strings.valid_utf8]] + # field = "message" + # replacement = "" ``` #### Trim, TrimLeft, TrimRight diff --git a/plugins/processors/strings/strings.go b/plugins/processors/strings/strings.go index 92ce56098..7b2d3251e 100644 --- a/plugins/processors/strings/strings.go +++ b/plugins/processors/strings/strings.go @@ -22,6 +22,7 @@ type Strings struct { Replace []converter `toml:"replace"` Left []converter `toml:"left"` Base64Decode []converter `toml:"base64decode"` + ValidUTF8 []converter `toml:"valid_utf8"` converters []converter init bool @@ -42,6 +43,7 @@ type converter struct { Old string New string Width int + Replacement string fn ConvertFunc } @@ -98,6 +100,12 @@ const sampleConfig = ` ## Decode a base64 encoded utf-8 string # [[processors.strings.base64decode]] # field = "message" + + ## Sanitize a string to ensure it is a valid utf-8 string + ## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty + # [[processors.strings.valid_utf8]] + # field = "message" + # replacement = "" ` func (s *Strings) SampleConfig() string { @@ -318,6 +326,11 @@ func (s *Strings) initOnce() { } s.converters = append(s.converters, c) } + for _, c := range s.ValidUTF8 { + c := c + c.fn = func(s string) string { return strings.ToValidUTF8(s, c.Replacement) } + s.converters = append(s.converters, c) + } s.init = true } diff --git a/plugins/processors/strings/strings_test.go b/plugins/processors/strings/strings_test.go index 40d798a6d..c42011884 100644 --- a/plugins/processors/strings/strings_test.go +++ b/plugins/processors/strings/strings_test.go @@ -1047,3 +1047,113 @@ func TestBase64Decode(t *testing.T) { }) } } + +func TestValidUTF8(t *testing.T) { + tests := []struct { + name string + plugin *Strings + metric []telegraf.Metric + expected []telegraf.Metric + }{ + { + name: "valid utf-8 keeps original string", + plugin: &Strings{ + ValidUTF8: []converter{ + { + Field: "message", + Replacement: "r", + }, + }, + }, + metric: []telegraf.Metric{ + testutil.MustMetric( + "cpu", + map[string]string{}, + map[string]interface{}{ + "message": "howdy", + }, + time.Unix(0, 0), + ), + }, + expected: []telegraf.Metric{ + testutil.MustMetric( + "cpu", + map[string]string{}, + map[string]interface{}{ + "message": "howdy", + }, + time.Unix(0, 0), + ), + }, + }, + { + name: "non-valid utf-8 modifies original string", + plugin: &Strings{ + ValidUTF8: []converter{ + { + Field: "message", + Replacement: "r", + }, + }, + }, + metric: []telegraf.Metric{ + testutil.MustMetric( + "cpu", + map[string]string{}, + map[string]interface{}{ + "message": "ho" + string([]byte{0xff}) + "wdy", + }, + time.Unix(0, 0), + ), + }, + expected: []telegraf.Metric{ + testutil.MustMetric( + "cpu", + map[string]string{}, + map[string]interface{}{ + "message": "horwdy", + }, + time.Unix(0, 0), + ), + }, + }, + { + name: "non-valid utf-8 and empty replacement removes invalid characters", + plugin: &Strings{ + ValidUTF8: []converter{ + { + Field: "message", + Replacement: "", + }, + }, + }, + metric: []telegraf.Metric{ + testutil.MustMetric( + "cpu", + map[string]string{}, + map[string]interface{}{ + "message": "ho" + string([]byte{0xff}) + "wdy", + }, + time.Unix(0, 0), + ), + }, + expected: []telegraf.Metric{ + testutil.MustMetric( + "cpu", + map[string]string{}, + map[string]interface{}{ + "message": "howdy", + }, + time.Unix(0, 0), + ), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := tt.plugin.Apply(tt.metric...) + testutil.RequireMetricsEqual(t, tt.expected, actual) + }) + } +}