feat(parsers.xpath): Add Concise Binary Object Representation parser (#13480)

This commit is contained in:
Sven Rebhan 2023-06-28 23:26:50 +02:00 committed by GitHub
parent 2fd589eea3
commit c35cabda9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 221 additions and 15 deletions

View File

@ -123,6 +123,7 @@ following works:
- github.com/emicklei/go-restful [MIT License](https://github.com/emicklei/go-restful/blob/v3/LICENSE)
- github.com/fatih/color [MIT License](https://github.com/fatih/color/blob/master/LICENSE.md)
- github.com/form3tech-oss/jwt-go [MIT License](https://github.com/form3tech-oss/jwt-go/blob/master/LICENSE)
- github.com/fxamacker/cbor [MIT License](https://github.com/fxamacker/cbor/blob/master/LICENSE)
- github.com/gabriel-vasile/mimetype [MIT License](https://github.com/gabriel-vasile/mimetype/blob/master/LICENSE)
- github.com/go-asn1-ber/asn1-ber [MIT License](https://github.com/go-asn1-ber/asn1-ber/blob/v1.3/LICENSE)
- github.com/go-ldap/ldap [MIT License](https://github.com/go-ldap/ldap/blob/v3.4.1/LICENSE)
@ -308,6 +309,7 @@ following works:
- github.com/sleepinggenius2/gosmi [MIT License](https://github.com/sleepinggenius2/gosmi/blob/master/LICENSE)
- github.com/snowflakedb/gosnowflake [Apache License 2.0](https://github.com/snowflakedb/gosnowflake/blob/master/LICENSE)
- github.com/spf13/pflag [BSD 3-Clause "New" or "Revised" License](https://github.com/spf13/pflag/blob/master/LICENSE)
- github.com/srebhan/cborquery [MIT License](https://github.com/srebhan/cborquery/blob/main/LICENSE)
- github.com/stoewer/go-strcase [MIT License](https://github.com/stoewer/go-strcase/blob/master/LICENSE)
- github.com/stretchr/objx [MIT License](https://github.com/stretchr/objx/blob/master/LICENSE)
- github.com/stretchr/testify [MIT License](https://github.com/stretchr/testify/blob/master/LICENSE)

2
go.mod
View File

@ -164,6 +164,7 @@ require (
github.com/sirupsen/logrus v1.9.0
github.com/sleepinggenius2/gosmi v0.4.4
github.com/snowflakedb/gosnowflake v1.6.13
github.com/srebhan/cborquery v0.0.0-20230626165538-38be85b82316
github.com/stretchr/testify v1.8.4
github.com/tbrandon/mbserver v0.0.0-20170611213546-993e1772cc62
github.com/testcontainers/testcontainers-go v0.18.0
@ -289,6 +290,7 @@ require (
github.com/emicklei/go-restful/v3 v3.10.1 // indirect
github.com/flosch/pongo2 v0.0.0-20200913210552-0d938eb266f3 // indirect
github.com/form3tech-oss/jwt-go v3.2.5+incompatible // indirect
github.com/fxamacker/cbor v1.5.1 // indirect
github.com/gabriel-vasile/mimetype v1.4.0 // indirect
github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect
github.com/go-logr/logr v1.2.4 // indirect

4
go.sum
View File

@ -545,6 +545,8 @@ github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fxamacker/cbor v1.5.1 h1:XjQWBgdmQyqimslUh5r4tUGmoqzHmBFQOImkWGi2awg=
github.com/fxamacker/cbor v1.5.1/go.mod h1:3aPGItF174ni7dDzd6JZ206H8cmr4GDNBGpPa971zsU=
github.com/gabriel-vasile/mimetype v1.4.0 h1:Cn9dkdYsMIu56tGho+fqzh7XmvY2YyGU0FnbhiOsEro=
github.com/gabriel-vasile/mimetype v1.4.0/go.mod h1:fA8fi6KUiG7MgQQ+mEWotXoEOvmxRtOJlERCzSmRvr8=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
@ -1428,6 +1430,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg=
github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg=
github.com/srebhan/cborquery v0.0.0-20230626165538-38be85b82316 h1:HVv8JjpX24FuI59aET1uInn0ItuEiyj8CZMuR9Uw+lE=
github.com/srebhan/cborquery v0.0.0-20230626165538-38be85b82316/go.mod h1:9vX3Dhehey14KFYwWo4K/4JOJRve6jvQf6R9Y8PymLI=
github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=

View File

@ -11,12 +11,13 @@ lib]. The only exception are _integer_ fields that need to be specified in a
## Supported data formats
| name | `data_format` setting | comment |
| --------------------------------------- | --------------------- | ------- |
| [Extensible Markup Language (XML)][xml] | `"xml"` | |
| [JSON][json] | `"xpath_json"` | |
| [MessagePack][msgpack] | `"xpath_msgpack"` | |
| [Protocol-buffers][protobuf] | `"xpath_protobuf"` | [see additional parameters](#protocol-buffers-additional-settings)|
| name | `data_format` setting | comment |
| -------------------------------------------- | --------------------- | ------- |
| [Extensible Markup Language (XML)][xml] | `"xml"` | |
| [Concise Binary Object Representation][cbor] | `"xpath_cbor"` | [see additional notes](#concise-binary-object-representation-notes)|
| [JSON][json] | `"xpath_json"` | |
| [MessagePack][msgpack] | `"xpath_msgpack"` | |
| [Protocol-buffers][protobuf] | `"xpath_protobuf"` | [see additional parameters](#protocol-buffers-additional-settings)|
### Protocol-buffers additional settings
@ -90,6 +91,15 @@ This is a list of known headers and the corresponding values for
[GRPC]: https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md
[PDNS]: https://docs.powerdns.com/recursor/lua-config/protobuf.html
### Concise Binary Object Representation notes
Concise Binary Object Representation support numeric keys in the data. However,
XML (and this parser) expects node names to be strings starting with a letter.
To be compatible with these requirements, numeric nodes will be prefixed with
a lower case `n` and converted to strings. This means that if you for example
have a node with the key `123` in CBOR you will need to query `n123` in your
XPath expressions.
## Configuration
```toml
@ -122,7 +132,7 @@ This is a list of known headers and the corresponding values for
# xpath_allow_empty_selection = false
## Get native data-types for all data-format that contain type information.
## Currently, protobuf, msgpack and JSON support native data-types
## Currently, CBOR, protobuf, msgpack and JSON support native data-types.
# xpath_native_types = false
## Multiple parsing sections are allowed
@ -607,13 +617,14 @@ respectively. The `field_name` derives the name of the first attribute of the
node, while `field_value` derives the value of the first attribute and converts
the result to a number.
[xpath lib]: https://github.com/antchfx/xpath
[cbor]: https://cbor.io/
[json]: https://www.json.org/
[msgpack]: https://msgpack.org/
[protobuf]: https://developers.google.com/protocol-buffers
[xml]: https://www.w3.org/XML/
[xpath]: https://www.w3.org/TR/xpath/
[xpather]: http://xpather.com/
[xpath tester]: https://codebeautify.org/Xpath-Tester
[time const]: https://golang.org/pkg/time/#pkg-constants
[time parse]: https://golang.org/pkg/time/#Parse
[xml]: https://www.w3.org/XML/
[xpath]: https://www.w3.org/TR/xpath/
[xpath lib]: https://github.com/antchfx/xpath
[xpath tester]: https://codebeautify.org/Xpath-Tester
[xpather]: http://xpather.com/

View File

@ -0,0 +1,89 @@
package xpath
import (
"reflect"
"strconv"
"strings"
path "github.com/antchfx/xpath"
"github.com/srebhan/cborquery"
)
type cborDocument struct{}
func (d *cborDocument) Parse(buf []byte) (dataNode, error) {
return cborquery.Parse(strings.NewReader(string(buf)))
}
func (d *cborDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) {
// If this panics it's a programming error as we changed the document type while processing
native, err := cborquery.QueryAll(node.(*cborquery.Node), expr)
if err != nil {
return nil, err
}
nodes := make([]dataNode, 0, len(native))
for _, n := range native {
nodes = append(nodes, n)
}
return nodes, nil
}
func (d *cborDocument) CreateXPathNavigator(node dataNode) path.NodeNavigator {
// If this panics it's a programming error as we changed the document type while processing
return cborquery.CreateXPathNavigator(node.(*cborquery.Node))
}
func (d *cborDocument) GetNodePath(node, relativeTo dataNode, sep string) string {
names := make([]string, 0)
// If these panic it's a programming error as we changed the document type while processing
nativeNode := node.(*cborquery.Node)
nativeRelativeTo := relativeTo.(*cborquery.Node)
// Climb up the tree and collect the node names
n := nativeNode.Parent
for n != nil && n != nativeRelativeTo {
kind := reflect.Invalid
if n.Parent != nil && n.Parent.Value() != nil {
kind = reflect.TypeOf(n.Parent.Value()).Kind()
}
switch kind {
case reflect.Slice, reflect.Array:
// Determine the index for array elements
names = append(names, d.index(n))
default:
// Use the name if not an array
names = append(names, n.Name)
}
n = n.Parent
}
if len(names) < 1 {
return ""
}
// Construct the nodes
nodepath := ""
for _, name := range names {
nodepath = name + sep + nodepath
}
return nodepath[:len(nodepath)-1]
}
func (d *cborDocument) OutputXML(node dataNode) string {
native := node.(*cborquery.Node)
return native.OutputXML()
}
func (d *cborDocument) index(node *cborquery.Node) string {
idx := 0
for n := node; n.PrevSibling != nil; n = n.PrevSibling {
idx++
}
return strconv.Itoa(idx)
}

View File

@ -11,6 +11,7 @@ import (
"github.com/antchfx/jsonquery"
path "github.com/antchfx/xpath"
"github.com/doclambda/protobufquery"
"github.com/srebhan/cborquery"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter"
@ -92,6 +93,8 @@ func (p *Parser) Init() error {
Notice: "use 'xpath' instead",
})
}
case "xpath_cbor":
p.document = &cborDocument{}
case "xpath_json":
p.document = &jsonDocument{}
@ -491,6 +494,8 @@ func (p *Parser) executeQuery(doc, selected dataNode, query string) (r interface
// enabled, we should return the native type of the data
if p.NativeTypes {
switch nn := current.(type) {
case *cborquery.NodeNavigator:
return nn.GetValue(), nil
case *jsonquery.NodeNavigator:
return nn.GetValue(), nil
case *protobufquery.NodeNavigator:
@ -595,6 +600,14 @@ func init() {
}
},
)
parsers.Add("xpath_cbor",
func(defaultMetricName string) telegraf.Parser {
return &Parser{
Format: "xpath_cbor",
DefaultMetricName: defaultMetricName,
}
},
)
parsers.Add("xpath_json",
func(defaultMetricName string) telegraf.Parser {
return &Parser{

View File

@ -9,15 +9,15 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"github.com/influxdata/toml"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/file"
"github.com/influxdata/telegraf/plugins/parsers/influx"
"github.com/influxdata/telegraf/testutil"
"github.com/influxdata/toml"
"github.com/stretchr/testify/require"
)
const invalidXML = `
@ -1367,6 +1367,7 @@ func TestTestCases(t *testing.T) {
if len(expectedErrors) == 0 {
require.NoError(t, err)
}
// If no timestamp is given we cannot test it. So use the one of the output
if cfg.Timestamp == "" {
testutil.RequireMetricsEqual(t, expectedOutputs, outputs, testutil.IgnoreTime())

View File

@ -0,0 +1 @@
¢fpeople…¤dnamehJohn Doebideeemailpjohn@example.comcage*£dnamehJane Doebidfcage(¥dnamehJack DoebidÉeemailpjack@example.comcage fphones<65>¢fnumberl555-555-5555dtype¥dnameiJack Buckbid-eemailpbuck@example.comcagefphonesƒ¢fnumberl555-555-0000dtype¡fnumberl555-555-0001¢fnumberl555-555-0002dtype¥dnameiJanet Doebidéeemailqjanet@example.comcagefphones¡fnumberl555-777-0000¢fnumberl555-777-0001dtypedtagsƒdhomegprivategfriends

View File

@ -0,0 +1,5 @@
addresses age="42",email="john@example.com",id="101",name="John Doe"
addresses age="40",id="102",name="Jane Doe"
addresses age="12",email="jack@example.com",id="201",name="Jack Doe",phones_number="555-555-5555",phones_type="2"
addresses age="19",email="buck@example.com",id="301",name="Jack Buck",phones_number="555-555-0000",phones_number_1="555-555-0001",phones_number_2="555-555-0002",phones_type="1",phones_type_1="2"
addresses age="16",email="janet@example.com",id="1001",name="Janet Doe",phones_number="555-777-0000",phones_number_1="555-777-0001",phones_type="1"

View File

@ -0,0 +1,9 @@
[[inputs.file]]
files = ["./testcases/cbor/addressbook.bin"]
data_format = "xpath_cbor"
[[inputs.file.xpath]]
metric_name = "'addresses'"
metric_selection = "//people"
field_selection = "descendant::*[not(*)]"
field_name_expansion = true

View File

@ -0,0 +1 @@
data str_a="this is a test",str_b="foobar",bytes_a="0001020304050607",bytes_b="666f6f626172",timestamp=1687852514u 1687852514000000000

View File

@ -0,0 +1,24 @@
# Example data:
# [
# {
# "str_a": bytearray("this is a test"),
# "str_b": bytearray("foobar"),
# "bytes_a": bytearray([0, 1, 2, 3, 4, 5, 6, 7]),
# "bytes_b": bytearray("foobar"),
# "timestamp": 1687852514
# }
# ]
[[inputs.file]]
files = ["./testcases/cbor_hex_encoding/data.bin"]
data_format = "xpath_cbor"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'data'"
metric_selection = "child::*"
timestamp = "timestamp"
timestamp_format = "unix"
field_selection = "child::*"
fields_bytes_as_hex = ["bytes_*"]

View File

@ -0,0 +1 @@
data n258="002-2.1.x",n259="14ca85ed9",n260=1687787189711304960u,n261=true,n263=3u,n264=23.76,n265=68.934,n266=false 1687787189711304960

View File

@ -0,0 +1,26 @@
# Example data:
# [
# {
# 258: "002-2.1.x",
# 259: "14ca85ed9",
# 260: 1687787189711304960,
# 261: true,
# 263: 3,
# 264: 23.760,
# 265: 68.934,
# 266: false
# }
# ]
[[inputs.file]]
files = ["./testcases/cbor_numeric_keys/data.bin"]
data_format = "xpath_cbor"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'data'"
metric_selection = "child::*"
timestamp = "n260"
timestamp_format = "unix_ns"
field_selection = "child::*"

View File

@ -0,0 +1 @@
¢fpeople…¤dnamehJohn Doebideeemailpjohn@example.comcage*£dnamehJane Doebidfcage(¥dnamehJack DoebidÉeemailpjack@example.comcage fphones<65>¢fnumberl555-555-5555dtype¥dnameiJack Buckbid-eemailpbuck@example.comcagefphonesƒ¢fnumberl555-555-0000dtype¡fnumberl555-555-0001¢fnumberl555-555-0002dtype¥dnameiJanet Doebidéeemailqjanet@example.comcagefphones¡fnumberl555-777-0000¢fnumberl555-777-0001dtypedtagsƒdhomegprivategfriends

View File

@ -0,0 +1,5 @@
addresses age=42u,email="john@example.com",id=101u,name="John Doe"
addresses age=40u,id=102u,name="Jane Doe"
addresses age=12u,email="jack@example.com",id=201u,name="Jack Doe",phones_number="555-555-5555",phones_type=2u
addresses age=19u,email="buck@example.com",id=301u,name="Jack Buck",phones_number="555-555-0000",phones_number_1="555-555-0001",phones_number_2="555-555-0002",phones_type=1u,phones_type_1=2u
addresses age=16u,email="janet@example.com",id=1001u,name="Janet Doe",phones_number="555-777-0000",phones_number_1="555-777-0001",phones_type=1u

View File

@ -0,0 +1,11 @@
[[inputs.file]]
files = ["./testcases/cbor/addressbook.bin"]
data_format = "xpath_cbor"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'addresses'"
metric_selection = "//people"
field_selection = "descendant::*[not(*)]"
field_name_expansion = true