fix(outputs.iotdb): Handle paths that contain illegal characters (#14519)
Co-authored-by: SeanGaluzzi <SeanGaluzzi@users.noreply.github.com> Co-authored-by: SeanGaluzzi <sean.galuzzi@argo.consulting>
This commit is contained in:
parent
439df813ec
commit
4c1d8e3dd4
|
|
@ -141,4 +141,19 @@ to use them.
|
|||
## - "fields" -- root.sg.device, s1=100, s2="hello", tag1="private", tag2="working"
|
||||
## - "device_id" -- root.sg.device.private.working, s1=100, s2="hello"
|
||||
# convert_tags_to = "device_id"
|
||||
|
||||
## Handling of unsupported characters
|
||||
## Some characters in different versions of IoTDB are not supported in path name
|
||||
## A guide with suggetions on valid paths can be found here:
|
||||
## for iotdb 0.13.x -> https://iotdb.apache.org/UserGuide/V0.13.x/Reference/Syntax-Conventions.html#identifiers
|
||||
## for iotdb 1.x.x and above -> https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Syntax-Rule.html#identifier
|
||||
##
|
||||
## Available values are:
|
||||
## - "1.0", "1.1", "1.2", "1.3" -- enclose in `` the world having forbidden character
|
||||
## such as @ $ # : [ ] { } ( ) space
|
||||
## - "0.13" -- enclose in `` the world having forbidden character
|
||||
## such as space
|
||||
##
|
||||
## Keep this section commented if you don't want to sanitize the path
|
||||
# sanitize_tag = "1.3"
|
||||
```
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
|
@ -21,6 +22,12 @@ import (
|
|||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
// matches any word that has a non valid backtick
|
||||
// `word` <- dosen't match
|
||||
// “word , `wo`rd` , `word , word` <- match
|
||||
var forbiddenBacktick = regexp.MustCompile("^[^\x60].*?[\x60]+.*?[^\x60]$|^[\x60].*[\x60]+.*[\x60]$|^[\x60]+.*[^\x60]$|^[^\x60].*[\x60]+$")
|
||||
var allowedBacktick = regexp.MustCompile("^[\x60].*[\x60]$")
|
||||
|
||||
type IoTDB struct {
|
||||
Host string `toml:"host"`
|
||||
Port string `toml:"port"`
|
||||
|
|
@ -30,9 +37,11 @@ type IoTDB struct {
|
|||
ConvertUint64To string `toml:"uint64_conversion"`
|
||||
TimeStampUnit string `toml:"timestamp_precision"`
|
||||
TreatTagsAs string `toml:"convert_tags_to"`
|
||||
SanitizeTags string `toml:"sanitize_tag"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
session *client.Session
|
||||
sanityRegex []*regexp.Regexp
|
||||
session *client.Session
|
||||
}
|
||||
|
||||
type recordsWithTags struct {
|
||||
|
|
@ -74,6 +83,22 @@ func (s *IoTDB) Init() error {
|
|||
s.Password = config.NewSecret([]byte("root"))
|
||||
}
|
||||
|
||||
switch s.SanitizeTags {
|
||||
case "0.13":
|
||||
matchUnsupportedCharacter := regexp.MustCompile("[^0-9a-zA-Z_:@#${}\x60]")
|
||||
|
||||
regex := []*regexp.Regexp{matchUnsupportedCharacter}
|
||||
s.sanityRegex = append(s.sanityRegex, regex...)
|
||||
|
||||
// from version 1.x.x IoTDB changed the allowed keys in nodes
|
||||
case "1.0", "1.1", "1.2", "1.3":
|
||||
matchUnsupportedCharacter := regexp.MustCompile("[^0-9a-zA-Z_\x60]")
|
||||
matchNumericString := regexp.MustCompile(`^\d+$`)
|
||||
|
||||
regex := []*regexp.Regexp{matchUnsupportedCharacter, matchNumericString}
|
||||
s.sanityRegex = append(s.sanityRegex, regex...)
|
||||
}
|
||||
|
||||
s.Log.Info("Initialization completed.")
|
||||
return nil
|
||||
}
|
||||
|
|
@ -229,6 +254,28 @@ func (s *IoTDB) convertMetricsToRecordsWithTags(metrics []telegraf.Metric) (*rec
|
|||
return rwt, nil
|
||||
}
|
||||
|
||||
// checks is the tag contains any IoTDB invalid character
|
||||
func (s *IoTDB) validateTag(tag string) (string, error) {
|
||||
// IoTDB uses "root" as a keyword and can be called only at the start of the path
|
||||
if tag == "root" {
|
||||
return "", errors.New("cannot use 'root' as tag")
|
||||
} else if forbiddenBacktick.MatchString(tag) { // returns an error if the backsticks are used in an inappropriate way
|
||||
return "", errors.New("cannot use ` in tag names")
|
||||
} else if allowedBacktick.MatchString(tag) { // if the tag in already enclosed in tags returns the tag
|
||||
return tag, nil
|
||||
}
|
||||
|
||||
// loops through all the regex patterns and if one
|
||||
// pattern matches returns the tag between `
|
||||
for _, regex := range s.sanityRegex {
|
||||
if regex.MatchString(tag) {
|
||||
return "`" + tag + "`", nil
|
||||
}
|
||||
}
|
||||
|
||||
return tag, nil
|
||||
}
|
||||
|
||||
// modify recordsWithTags according to 'TreatTagsAs' Configuration
|
||||
func (s *IoTDB) modifyRecordsWithTags(rwt *recordsWithTags) error {
|
||||
switch s.TreatTagsAs {
|
||||
|
|
@ -251,7 +298,11 @@ func (s *IoTDB) modifyRecordsWithTags(rwt *recordsWithTags) error {
|
|||
for index, tags := range rwt.TagsList { // for each record
|
||||
topic := []string{rwt.DeviceIDList[index]}
|
||||
for _, tag := range tags { // for each tag, append it's Value
|
||||
topic = append(topic, tag.Value)
|
||||
tagValue, err := s.validateTag(tag.Value) // validates tag
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
topic = append(topic, tagValue)
|
||||
}
|
||||
rwt.DeviceIDList[index] = strings.Join(topic, ".")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -271,6 +271,69 @@ func TestMetricConversionToRecordsWithTags(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// Test tag sanitize
|
||||
func TestTagSanitization(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
plugin *IoTDB
|
||||
expected []string
|
||||
input []string
|
||||
}{
|
||||
{ //don't sanitize tags containing UnsopportedCharacter on IoTDB V1.3
|
||||
name: "Don't Sanitize Tags",
|
||||
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
|
||||
expected: []string{"word", "`word`", "word_"},
|
||||
input: []string{"word", "`word`", "word_"},
|
||||
},
|
||||
{ //sanitize tags containing UnsopportedCharacter on IoTDB V1.3 enclosing them in backticks
|
||||
name: "Sanitize Tags",
|
||||
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
|
||||
expected: []string{"`wo rd`", "`@`", "`$`", "`#`", "`:`", "`{`", "`}`", "`1`", "`1234`"},
|
||||
input: []string{"wo rd", "@", "$", "#", ":", "{", "}", "1", "1234"},
|
||||
},
|
||||
{ //test on forbidden word and forbidden syntax
|
||||
name: "Errors",
|
||||
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "1.3"; return s }(),
|
||||
expected: []string{"", ""},
|
||||
input: []string{"root", "wo`rd"},
|
||||
},
|
||||
{
|
||||
name: "Don't Sanitize Tags",
|
||||
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
|
||||
expected: []string{"word", "`word`", "word_", "@", "$", "#", ":", "{", "}"},
|
||||
input: []string{"word", "`word`", "word_", "@", "$", "#", ":", "{", "}"},
|
||||
},
|
||||
{ //sanitize tags containing UnsopportedCharacter on IoTDB V0.13 enclosing them in backticks
|
||||
name: "Sanitize Tags",
|
||||
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
|
||||
expected: []string{"`wo rd`", "`\\`"},
|
||||
input: []string{"wo rd", "\\"},
|
||||
},
|
||||
{ //test on forbidden word and forbidden syntax on IoTDB V0.13
|
||||
name: "Errors",
|
||||
plugin: func() *IoTDB { s := newIoTDB(); s.SanitizeTags = "0.13"; return s }(),
|
||||
expected: []string{"", ""},
|
||||
input: []string{"root", "wo`rd"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tt.plugin.Log = &testutil.Logger{}
|
||||
actuals := []string{}
|
||||
|
||||
require.NoError(t, tt.plugin.Init())
|
||||
|
||||
for _, input := range tt.input {
|
||||
actual, _ := tt.plugin.validateTag(input)
|
||||
actuals = append(actuals, actual)
|
||||
}
|
||||
|
||||
require.EqualValues(t, tt.expected, actuals)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Test tags handling, which means testing function `modifyRecordsWithTags`
|
||||
func TestTagsHandling(t *testing.T) {
|
||||
var testTimestamp = time.Date(2022, time.July, 20, 12, 25, 33, 44, time.UTC)
|
||||
|
|
|
|||
|
|
@ -43,3 +43,18 @@
|
|||
## - "fields" -- root.sg.device, s1=100, s2="hello", tag1="private", tag2="working"
|
||||
## - "device_id" -- root.sg.device.private.working, s1=100, s2="hello"
|
||||
# convert_tags_to = "device_id"
|
||||
|
||||
## Handling of unsupported characters
|
||||
## Some characters in different versions of IoTDB are not supported in path name
|
||||
## A guide with suggetions on valid paths can be found here:
|
||||
## for iotdb 0.13.x -> https://iotdb.apache.org/UserGuide/V0.13.x/Reference/Syntax-Conventions.html#identifiers
|
||||
## for iotdb 1.x.x and above -> https://iotdb.apache.org/UserGuide/V1.3.x/User-Manual/Syntax-Rule.html#identifier
|
||||
##
|
||||
## Available values are:
|
||||
## - "1.0", "1.1", "1.2", "1.3" -- enclose in `` the world having forbidden character
|
||||
## such as @ $ # : [ ] { } ( ) space
|
||||
## - "0.13" -- enclose in `` the world having forbidden character
|
||||
## such as space
|
||||
##
|
||||
## Keep this section commented if you don't want to sanitize the path
|
||||
# sanitize_tag = "1.3"
|
||||
Loading…
Reference in New Issue