fix(outputs.iotdb): Handle paths that contain illegal characters (#14519)

Co-authored-by: SeanGaluzzi <SeanGaluzzi@users.noreply.github.com>
Co-authored-by: SeanGaluzzi <sean.galuzzi@argo.consulting>
This commit is contained in:
giovanni-bellini-argo 2024-01-23 16:24:39 +01:00 committed by GitHub
parent 439df813ec
commit 4c1d8e3dd4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 146 additions and 2 deletions

View File

@ -141,4 +141,19 @@ to use them.
## - "fields" -- root.sg.device, s1=100, s2="hello", tag1="private", tag2="working"
## - "device_id" -- root.sg.device.private.working, s1=100, s2="hello"
# convert_tags_to = "device_id"
## Handling of unsupported characters
## Some characters in different versions of IoTDB are not supported in path name
## A guide with suggetions on valid paths can be found here:
## for iotdb 0.13.x -> https://iotdb.apache.org/UserGuide/V0.13.x/Reference/Syntax-Conventions.html#identifiers
## for iotdb 1.x.x and above -> https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Syntax-Rule.html#identifier
##
## Available values are:
## - "1.0", "1.1", "1.2", "1.3" -- enclose in `` the world having forbidden character
## such as @ $ # : [ ] { } ( ) space
## - "0.13" -- enclose in `` the world having forbidden character
## such as space
##
## Keep this section commented if you don't want to sanitize the path
# sanitize_tag = "1.3"
```

View File

@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"math"
"regexp"
"strconv"
"strings"
"time"
@ -21,6 +22,12 @@ import (
//go:embed sample.conf
var sampleConfig string
// matches any word that has a non valid backtick
// `word` <- dosen't match
// “word , `wo`rd` , `word , word` <- match
var forbiddenBacktick = regexp.MustCompile("^[^\x60].*?[\x60]+.*?[^\x60]$|^[\x60].*[\x60]+.*[\x60]$|^[\x60]+.*[^\x60]$|^[^\x60].*[\x60]+$")
var allowedBacktick = regexp.MustCompile("^[\x60].*[\x60]$")
type IoTDB struct {
Host string `toml:"host"`
Port string `toml:"port"`
@ -30,9 +37,11 @@ type IoTDB struct {
ConvertUint64To string `toml:"uint64_conversion"`
TimeStampUnit string `toml:"timestamp_precision"`
TreatTagsAs string `toml:"convert_tags_to"`
SanitizeTags string `toml:"sanitize_tag"`
Log telegraf.Logger `toml:"-"`
session *client.Session
sanityRegex []*regexp.Regexp
session *client.Session
}
type recordsWithTags struct {
@ -74,6 +83,22 @@ func (s *IoTDB) Init() error {
s.Password = config.NewSecret([]byte("root"))
}
switch s.SanitizeTags {
case "0.13":
matchUnsupportedCharacter := regexp.MustCompile("[^0-9a-zA-Z_:@#${}\x60]")
regex := []*regexp.Regexp{matchUnsupportedCharacter}
s.sanityRegex = append(s.sanityRegex, regex...)
// from version 1.x.x IoTDB changed the allowed keys in nodes
case "1.0", "1.1", "1.2", "1.3":
matchUnsupportedCharacter := regexp.MustCompile("[^0-9a-zA-Z_\x60]")
matchNumericString := regexp.MustCompile(`^\d+$`)
regex := []*regexp.Regexp{matchUnsupportedCharacter, matchNumericString}
s.sanityRegex = append(s.sanityRegex, regex...)
}
s.Log.Info("Initialization completed.")
return nil
}
@ -229,6 +254,28 @@ func (s *IoTDB) convertMetricsToRecordsWithTags(metrics []telegraf.Metric) (*rec
return rwt, nil
}
// checks is the tag contains any IoTDB invalid character
func (s *IoTDB) validateTag(tag string) (string, error) {
// IoTDB uses "root" as a keyword and can be called only at the start of the path
if tag == "root" {
return "", errors.New("cannot use 'root' as tag")
} else if forbiddenBacktick.MatchString(tag) { // returns an error if the backsticks are used in an inappropriate way
return "", errors.New("cannot use ` in tag names")
} else if allowedBacktick.MatchString(tag) { // if the tag in already enclosed in tags returns the tag
return tag, nil
}
// loops through all the regex patterns and if one
// pattern matches returns the tag between `
for _, regex := range s.sanityRegex {
if regex.MatchString(tag) {
return "`" + tag + "`", nil
}
}
return tag, nil
}
// modify recordsWithTags according to 'TreatTagsAs' Configuration
func (s *IoTDB) modifyRecordsWithTags(rwt *recordsWithTags) error {
switch s.TreatTagsAs {
@ -251,7 +298,11 @@ func (s *IoTDB) modifyRecordsWithTags(rwt *recordsWithTags) error {
for index, tags := range rwt.TagsList { // for each record
topic := []string{rwt.DeviceIDList[index]}
for _, tag := range tags { // for each tag, append it's Value
topic = append(topic, tag.Value)
tagValue, err := s.validateTag(tag.Value) // validates tag
if err != nil {
return err
}
topic = append(topic, tagValue)
}
rwt.DeviceIDList[index] = strings.Join(topic, ".")
}

View File

@ -271,6 +271,69 @@ func TestMetricConversionToRecordsWithTags(t *testing.T) {
}
}
// Test tag sanitize
func TestTagSanitization(t *testing.T) {
tests := []struct {
name string
plugin *IoTDB
expected []string
input []string
}{
{ //don't sanitize tags containing UnsopportedCharacter on IoTDB V1.3
name: "Don't Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
expected: []string{"word", "`word`", "word_"},
input: []string{"word", "`word`", "word_"},
},
{ //sanitize tags containing UnsopportedCharacter on IoTDB V1.3 enclosing them in backticks
name: "Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
expected: []string{"`wo rd`", "`@`", "`$`", "`#`", "`:`", "`{`", "`}`", "`1`", "`1234`"},
input: []string{"wo rd", "@", "$", "#", ":", "{", "}", "1", "1234"},
},
{ //test on forbidden word and forbidden syntax
name: "Errors",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
expected: []string{"", ""},
input: []string{"root", "wo`rd"},
},
{
name: "Don't Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
expected: []string{"word", "`word`", "word_", "@", "$", "#", ":", "{", "}"},
input: []string{"word", "`word`", "word_", "@", "$", "#", ":", "{", "}"},
},
{ //sanitize tags containing UnsopportedCharacter on IoTDB V0.13 enclosing them in backticks
name: "Sanitize Tags",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
expected: []string{"`wo rd`", "`\\`"},
input: []string{"wo rd", "\\"},
},
{ //test on forbidden word and forbidden syntax on IoTDB V0.13
name: "Errors",
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
expected: []string{"", ""},
input: []string{"root", "wo`rd"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tt.plugin.Log = &testutil.Logger{}
actuals := []string{}
require.NoError(t, tt.plugin.Init())
for _, input := range tt.input {
actual, _ := tt.plugin.validateTag(input)
actuals = append(actuals, actual)
}
require.EqualValues(t, tt.expected, actuals)
})
}
}
// Test tags handling, which means testing function `modifyRecordsWithTags`
func TestTagsHandling(t *testing.T) {
var testTimestamp = time.Date(2022, time.July, 20, 12, 25, 33, 44, time.UTC)

View File

@ -43,3 +43,18 @@
## - "fields" -- root.sg.device, s1=100, s2="hello", tag1="private", tag2="working"
## - "device_id" -- root.sg.device.private.working, s1=100, s2="hello"
# convert_tags_to = "device_id"
## Handling of unsupported characters
## Some characters in different versions of IoTDB are not supported in path name
## A guide with suggetions on valid paths can be found here:
## for iotdb 0.13.x -> https://iotdb.apache.org/UserGuide/V0.13.x/Reference/Syntax-Conventions.html#identifiers
## for iotdb 1.x.x and above -> https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Syntax-Rule.html#identifier
##
## Available values are:
## - "1.0", "1.1", "1.2", "1.3" -- enclose in `` the world having forbidden character
## such as @ $ # : [ ] { } ( ) space
## - "0.13" -- enclose in `` the world having forbidden character
## such as space
##
## Keep this section commented if you don't want to sanitize the path
# sanitize_tag = "1.3"