Add UTF-8 sanitizer to Strings plugin (#9118)
Adds a new option for the Strings processer to sanitize strings so that they conform to utf-8
This commit is contained in:
parent
4fc849d73f
commit
370836d436
|
|
@ -14,6 +14,7 @@ Implemented functions are:
|
||||||
- replace
|
- replace
|
||||||
- left
|
- left
|
||||||
- base64decode
|
- base64decode
|
||||||
|
- valid_utf8
|
||||||
|
|
||||||
Please note that in this implementation these are processed in the order that they appear above.
|
Please note that in this implementation these are processed in the order that they appear above.
|
||||||
|
|
||||||
|
|
@ -78,6 +79,12 @@ If you'd like to apply multiple processings to the same `tag_key` or `field_key`
|
||||||
## Decode a base64 encoded utf-8 string
|
## Decode a base64 encoded utf-8 string
|
||||||
# [[processors.strings.base64decode]]
|
# [[processors.strings.base64decode]]
|
||||||
# field = "message"
|
# field = "message"
|
||||||
|
|
||||||
|
## Sanitize a string to ensure it is a valid utf-8 string
|
||||||
|
## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty
|
||||||
|
# [[processors.strings.valid_utf8]]
|
||||||
|
# field = "message"
|
||||||
|
# replacement = ""
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Trim, TrimLeft, TrimRight
|
#### Trim, TrimLeft, TrimRight
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ type Strings struct {
|
||||||
Replace []converter `toml:"replace"`
|
Replace []converter `toml:"replace"`
|
||||||
Left []converter `toml:"left"`
|
Left []converter `toml:"left"`
|
||||||
Base64Decode []converter `toml:"base64decode"`
|
Base64Decode []converter `toml:"base64decode"`
|
||||||
|
ValidUTF8 []converter `toml:"valid_utf8"`
|
||||||
|
|
||||||
converters []converter
|
converters []converter
|
||||||
init bool
|
init bool
|
||||||
|
|
@ -42,6 +43,7 @@ type converter struct {
|
||||||
Old string
|
Old string
|
||||||
New string
|
New string
|
||||||
Width int
|
Width int
|
||||||
|
Replacement string
|
||||||
|
|
||||||
fn ConvertFunc
|
fn ConvertFunc
|
||||||
}
|
}
|
||||||
|
|
@ -98,6 +100,12 @@ const sampleConfig = `
|
||||||
## Decode a base64 encoded utf-8 string
|
## Decode a base64 encoded utf-8 string
|
||||||
# [[processors.strings.base64decode]]
|
# [[processors.strings.base64decode]]
|
||||||
# field = "message"
|
# field = "message"
|
||||||
|
|
||||||
|
## Sanitize a string to ensure it is a valid utf-8 string
|
||||||
|
## Each run of invalid UTF-8 byte sequences is replaced by the replacement string, which may be empty
|
||||||
|
# [[processors.strings.valid_utf8]]
|
||||||
|
# field = "message"
|
||||||
|
# replacement = ""
|
||||||
`
|
`
|
||||||
|
|
||||||
func (s *Strings) SampleConfig() string {
|
func (s *Strings) SampleConfig() string {
|
||||||
|
|
@ -318,6 +326,11 @@ func (s *Strings) initOnce() {
|
||||||
}
|
}
|
||||||
s.converters = append(s.converters, c)
|
s.converters = append(s.converters, c)
|
||||||
}
|
}
|
||||||
|
for _, c := range s.ValidUTF8 {
|
||||||
|
c := c
|
||||||
|
c.fn = func(s string) string { return strings.ToValidUTF8(s, c.Replacement) }
|
||||||
|
s.converters = append(s.converters, c)
|
||||||
|
}
|
||||||
|
|
||||||
s.init = true
|
s.init = true
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1047,3 +1047,113 @@ func TestBase64Decode(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestValidUTF8(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
plugin *Strings
|
||||||
|
metric []telegraf.Metric
|
||||||
|
expected []telegraf.Metric
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid utf-8 keeps original string",
|
||||||
|
plugin: &Strings{
|
||||||
|
ValidUTF8: []converter{
|
||||||
|
{
|
||||||
|
Field: "message",
|
||||||
|
Replacement: "r",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
metric: []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"message": "howdy",
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
expected: []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"message": "howdy",
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-valid utf-8 modifies original string",
|
||||||
|
plugin: &Strings{
|
||||||
|
ValidUTF8: []converter{
|
||||||
|
{
|
||||||
|
Field: "message",
|
||||||
|
Replacement: "r",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
metric: []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"message": "ho" + string([]byte{0xff}) + "wdy",
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
expected: []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"message": "horwdy",
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "non-valid utf-8 and empty replacement removes invalid characters",
|
||||||
|
plugin: &Strings{
|
||||||
|
ValidUTF8: []converter{
|
||||||
|
{
|
||||||
|
Field: "message",
|
||||||
|
Replacement: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
metric: []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"message": "ho" + string([]byte{0xff}) + "wdy",
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
expected: []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"message": "howdy",
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
actual := tt.plugin.Apply(tt.metric...)
|
||||||
|
testutil.RequireMetricsEqual(t, tt.expected, actual)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue