Add UTF-8 sanitizer to Strings plugin (#9118)
Adds a new option for the Strings processer to sanitize strings so that they conform to utf-8
This commit is contained in:
parent
4fc849d73f
commit
370836d436
|
|
@ -14,6 +14,7 @@ Implemented functions are:
|
|||
- replace
|
||||
- left
|
||||
- base64decode
|
||||
- valid_utf8
|
||||
|
||||
Please note that in this implementation these are processed in the order that they appear above.
|
||||
|
||||
|
|
@ -78,6 +79,12 @@ If you'd like to apply multiple processings to the same `tag_key` or `field_key`
|
|||
## Decode a base64 encoded utf-8 string
|
||||
# [[processors.strings.base64decode]]
|
||||
# field = "message"
|
||||
|
||||
## Sanitize a string to ensure it is a valid utf-8 string
|
||||
## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty
|
||||
# [[processors.strings.valid_utf8]]
|
||||
# field = "message"
|
||||
# replacement = ""
|
||||
```
|
||||
|
||||
#### Trim, TrimLeft, TrimRight
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ type Strings struct {
|
|||
Replace []converter `toml:"replace"`
|
||||
Left []converter `toml:"left"`
|
||||
Base64Decode []converter `toml:"base64decode"`
|
||||
ValidUTF8 []converter `toml:"valid_utf8"`
|
||||
|
||||
converters []converter
|
||||
init bool
|
||||
|
|
@ -42,6 +43,7 @@ type converter struct {
|
|||
Old string
|
||||
New string
|
||||
Width int
|
||||
Replacement string
|
||||
|
||||
fn ConvertFunc
|
||||
}
|
||||
|
|
@ -98,6 +100,12 @@ const sampleConfig = `
|
|||
## Decode a base64 encoded utf-8 string
|
||||
# [[processors.strings.base64decode]]
|
||||
# field = "message"
|
||||
|
||||
## Sanitize a string to ensure it is a valid utf-8 string
|
||||
## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty
|
||||
# [[processors.strings.valid_utf8]]
|
||||
# field = "message"
|
||||
# replacement = ""
|
||||
`
|
||||
|
||||
func (s *Strings) SampleConfig() string {
|
||||
|
|
@ -318,6 +326,11 @@ func (s *Strings) initOnce() {
|
|||
}
|
||||
s.converters = append(s.converters, c)
|
||||
}
|
||||
for _, c := range s.ValidUTF8 {
|
||||
c := c
|
||||
c.fn = func(s string) string { return strings.ToValidUTF8(s, c.Replacement) }
|
||||
s.converters = append(s.converters, c)
|
||||
}
|
||||
|
||||
s.init = true
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1047,3 +1047,113 @@ func TestBase64Decode(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidUTF8(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
plugin *Strings
|
||||
metric []telegraf.Metric
|
||||
expected []telegraf.Metric
|
||||
}{
|
||||
{
|
||||
name: "valid utf-8 keeps original string",
|
||||
plugin: &Strings{
|
||||
ValidUTF8: []converter{
|
||||
{
|
||||
Field: "message",
|
||||
Replacement: "r",
|
||||
},
|
||||
},
|
||||
},
|
||||
metric: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"message": "howdy",
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"message": "howdy",
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "non-valid utf-8 modifies original string",
|
||||
plugin: &Strings{
|
||||
ValidUTF8: []converter{
|
||||
{
|
||||
Field: "message",
|
||||
Replacement: "r",
|
||||
},
|
||||
},
|
||||
},
|
||||
metric: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"message": "ho" + string([]byte{0xff}) + "wdy",
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"message": "horwdy",
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "non-valid utf-8 and empty replacement removes invalid characters",
|
||||
plugin: &Strings{
|
||||
ValidUTF8: []converter{
|
||||
{
|
||||
Field: "message",
|
||||
Replacement: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
metric: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"message": "ho" + string([]byte{0xff}) + "wdy",
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"cpu",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"message": "howdy",
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
actual := tt.plugin.Apply(tt.metric...)
|
||||
testutil.RequireMetricsEqual(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue