From c35cabda9e656599f8806271a75f62e979224e93 Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Wed, 28 Jun 2023 23:26:50 +0200 Subject: [PATCH] feat(parsers.xpath): Add Concise Binary Object Representation parser (#13480) --- docs/LICENSE_OF_DEPENDENCIES.md | 2 + go.mod | 2 + go.sum | 4 + plugins/parsers/xpath/README.md | 35 ++++--- plugins/parsers/xpath/cbor_document.go | 89 ++++++++++++++++++ plugins/parsers/xpath/parser.go | 13 +++ plugins/parsers/xpath/parser_test.go | 7 +- .../xpath/testcases/cbor/addressbook.bin | 1 + .../parsers/xpath/testcases/cbor/expected.out | 5 + .../xpath/testcases/cbor/telegraf.conf | 9 ++ .../testcases/cbor_hex_encoding/data.bin | Bin 0 -> 83 bytes .../testcases/cbor_hex_encoding/expected.out | 1 + .../testcases/cbor_hex_encoding/telegraf.conf | 24 +++++ .../testcases/cbor_numeric_keys/data.bin | Bin 0 -> 76 bytes .../testcases/cbor_numeric_keys/expected.out | 1 + .../testcases/cbor_numeric_keys/telegraf.conf | 26 +++++ .../native_types_cbor/addressbook.bin | 1 + .../testcases/native_types_cbor/expected.out | 5 + .../testcases/native_types_cbor/telegraf.conf | 11 +++ 19 files changed, 221 insertions(+), 15 deletions(-) create mode 100644 plugins/parsers/xpath/cbor_document.go create mode 100644 plugins/parsers/xpath/testcases/cbor/addressbook.bin create mode 100644 plugins/parsers/xpath/testcases/cbor/expected.out create mode 100644 plugins/parsers/xpath/testcases/cbor/telegraf.conf create mode 100644 plugins/parsers/xpath/testcases/cbor_hex_encoding/data.bin create mode 100644 plugins/parsers/xpath/testcases/cbor_hex_encoding/expected.out create mode 100644 plugins/parsers/xpath/testcases/cbor_hex_encoding/telegraf.conf create mode 100644 plugins/parsers/xpath/testcases/cbor_numeric_keys/data.bin create mode 100644 plugins/parsers/xpath/testcases/cbor_numeric_keys/expected.out create mode 100644 plugins/parsers/xpath/testcases/cbor_numeric_keys/telegraf.conf create mode 100644 plugins/parsers/xpath/testcases/native_types_cbor/addressbook.bin create mode 100644 plugins/parsers/xpath/testcases/native_types_cbor/expected.out create mode 100644 plugins/parsers/xpath/testcases/native_types_cbor/telegraf.conf diff --git a/docs/LICENSE_OF_DEPENDENCIES.md b/docs/LICENSE_OF_DEPENDENCIES.md index ca9ef70ed..913186352 100644 --- a/docs/LICENSE_OF_DEPENDENCIES.md +++ b/docs/LICENSE_OF_DEPENDENCIES.md @@ -123,6 +123,7 @@ following works: - github.com/emicklei/go-restful [MIT License](https://github.com/emicklei/go-restful/blob/v3/LICENSE) - github.com/fatih/color [MIT License](https://github.com/fatih/color/blob/master/LICENSE.md) - github.com/form3tech-oss/jwt-go [MIT License](https://github.com/form3tech-oss/jwt-go/blob/master/LICENSE) +- github.com/fxamacker/cbor [MIT License](https://github.com/fxamacker/cbor/blob/master/LICENSE) - github.com/gabriel-vasile/mimetype [MIT License](https://github.com/gabriel-vasile/mimetype/blob/master/LICENSE) - github.com/go-asn1-ber/asn1-ber [MIT License](https://github.com/go-asn1-ber/asn1-ber/blob/v1.3/LICENSE) - github.com/go-ldap/ldap [MIT License](https://github.com/go-ldap/ldap/blob/v3.4.1/LICENSE) @@ -308,6 +309,7 @@ following works: - github.com/sleepinggenius2/gosmi [MIT License](https://github.com/sleepinggenius2/gosmi/blob/master/LICENSE) - github.com/snowflakedb/gosnowflake [Apache License 2.0](https://github.com/snowflakedb/gosnowflake/blob/master/LICENSE) - github.com/spf13/pflag [BSD 3-Clause "New" or "Revised" License](https://github.com/spf13/pflag/blob/master/LICENSE) +- github.com/srebhan/cborquery [MIT License](https://github.com/srebhan/cborquery/blob/main/LICENSE) - github.com/stoewer/go-strcase [MIT License](https://github.com/stoewer/go-strcase/blob/master/LICENSE) - github.com/stretchr/objx [MIT License](https://github.com/stretchr/objx/blob/master/LICENSE) - github.com/stretchr/testify [MIT License](https://github.com/stretchr/testify/blob/master/LICENSE) diff --git a/go.mod b/go.mod index 7d1363f00..c0be7af5e 100644 --- a/go.mod +++ b/go.mod @@ -164,6 +164,7 @@ require ( github.com/sirupsen/logrus v1.9.0 github.com/sleepinggenius2/gosmi v0.4.4 github.com/snowflakedb/gosnowflake v1.6.13 + github.com/srebhan/cborquery v0.0.0-20230626165538-38be85b82316 github.com/stretchr/testify v1.8.4 github.com/tbrandon/mbserver v0.0.0-20170611213546-993e1772cc62 github.com/testcontainers/testcontainers-go v0.18.0 @@ -289,6 +290,7 @@ require ( github.com/emicklei/go-restful/v3 v3.10.1 // indirect github.com/flosch/pongo2 v0.0.0-20200913210552-0d938eb266f3 // indirect github.com/form3tech-oss/jwt-go v3.2.5+incompatible // indirect + github.com/fxamacker/cbor v1.5.1 // indirect github.com/gabriel-vasile/mimetype v1.4.0 // indirect github.com/go-asn1-ber/asn1-ber v1.5.4 // indirect github.com/go-logr/logr v1.2.4 // indirect diff --git a/go.sum b/go.sum index ca98ea2fb..38a8acfb4 100644 --- a/go.sum +++ b/go.sum @@ -545,6 +545,8 @@ github.com/frankban/quicktest v1.13.0/go.mod h1:qLE0fzW0VuyUAJgPU19zByoIr0HtCHN/ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fxamacker/cbor v1.5.1 h1:XjQWBgdmQyqimslUh5r4tUGmoqzHmBFQOImkWGi2awg= +github.com/fxamacker/cbor v1.5.1/go.mod h1:3aPGItF174ni7dDzd6JZ206H8cmr4GDNBGpPa971zsU= github.com/gabriel-vasile/mimetype v1.4.0 h1:Cn9dkdYsMIu56tGho+fqzh7XmvY2YyGU0FnbhiOsEro= github.com/gabriel-vasile/mimetype v1.4.0/go.mod h1:fA8fi6KUiG7MgQQ+mEWotXoEOvmxRtOJlERCzSmRvr8= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= @@ -1428,6 +1430,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.7.0/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= github.com/spf13/viper v1.7.1/go.mod h1:8WkrPz2fc9jxqZNCJI/76HCieCp4Q8HaLFoCha5qpdg= +github.com/srebhan/cborquery v0.0.0-20230626165538-38be85b82316 h1:HVv8JjpX24FuI59aET1uInn0ItuEiyj8CZMuR9Uw+lE= +github.com/srebhan/cborquery v0.0.0-20230626165538-38be85b82316/go.mod h1:9vX3Dhehey14KFYwWo4K/4JOJRve6jvQf6R9Y8PymLI= github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU= github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= diff --git a/plugins/parsers/xpath/README.md b/plugins/parsers/xpath/README.md index 663c2e57f..65a43e162 100644 --- a/plugins/parsers/xpath/README.md +++ b/plugins/parsers/xpath/README.md @@ -11,12 +11,13 @@ lib]. The only exception are _integer_ fields that need to be specified in a ## Supported data formats -| name | `data_format` setting | comment | -| --------------------------------------- | --------------------- | ------- | -| [Extensible Markup Language (XML)][xml] | `"xml"` | | -| [JSON][json] | `"xpath_json"` | | -| [MessagePack][msgpack] | `"xpath_msgpack"` | | -| [Protocol-buffers][protobuf] | `"xpath_protobuf"` | [see additional parameters](#protocol-buffers-additional-settings)| +| name | `data_format` setting | comment | +| -------------------------------------------- | --------------------- | ------- | +| [Extensible Markup Language (XML)][xml] | `"xml"` | | +| [Concise Binary Object Representation][cbor] | `"xpath_cbor"` | [see additional notes](#concise-binary-object-representation-notes)| +| [JSON][json] | `"xpath_json"` | | +| [MessagePack][msgpack] | `"xpath_msgpack"` | | +| [Protocol-buffers][protobuf] | `"xpath_protobuf"` | [see additional parameters](#protocol-buffers-additional-settings)| ### Protocol-buffers additional settings @@ -90,6 +91,15 @@ This is a list of known headers and the corresponding values for [GRPC]: https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md [PDNS]: https://docs.powerdns.com/recursor/lua-config/protobuf.html +### Concise Binary Object Representation notes + +Concise Binary Object Representation support numeric keys in the data. However, +XML (and this parser) expects node names to be strings starting with a letter. +To be compatible with these requirements, numeric nodes will be prefixed with +a lower case `n` and converted to strings. This means that if you for example +have a node with the key `123` in CBOR you will need to query `n123` in your +XPath expressions. + ## Configuration ```toml @@ -122,7 +132,7 @@ This is a list of known headers and the corresponding values for # xpath_allow_empty_selection = false ## Get native data-types for all data-format that contain type information. - ## Currently, protobuf, msgpack and JSON support native data-types + ## Currently, CBOR, protobuf, msgpack and JSON support native data-types. # xpath_native_types = false ## Multiple parsing sections are allowed @@ -607,13 +617,14 @@ respectively. The `field_name` derives the name of the first attribute of the node, while `field_value` derives the value of the first attribute and converts the result to a number. -[xpath lib]: https://github.com/antchfx/xpath +[cbor]: https://cbor.io/ [json]: https://www.json.org/ [msgpack]: https://msgpack.org/ [protobuf]: https://developers.google.com/protocol-buffers -[xml]: https://www.w3.org/XML/ -[xpath]: https://www.w3.org/TR/xpath/ -[xpather]: http://xpather.com/ -[xpath tester]: https://codebeautify.org/Xpath-Tester [time const]: https://golang.org/pkg/time/#pkg-constants [time parse]: https://golang.org/pkg/time/#Parse +[xml]: https://www.w3.org/XML/ +[xpath]: https://www.w3.org/TR/xpath/ +[xpath lib]: https://github.com/antchfx/xpath +[xpath tester]: https://codebeautify.org/Xpath-Tester +[xpather]: http://xpather.com/ diff --git a/plugins/parsers/xpath/cbor_document.go b/plugins/parsers/xpath/cbor_document.go new file mode 100644 index 000000000..65a0f8644 --- /dev/null +++ b/plugins/parsers/xpath/cbor_document.go @@ -0,0 +1,89 @@ +package xpath + +import ( + "reflect" + "strconv" + "strings" + + path "github.com/antchfx/xpath" + "github.com/srebhan/cborquery" +) + +type cborDocument struct{} + +func (d *cborDocument) Parse(buf []byte) (dataNode, error) { + return cborquery.Parse(strings.NewReader(string(buf))) +} + +func (d *cborDocument) QueryAll(node dataNode, expr string) ([]dataNode, error) { + // If this panics it's a programming error as we changed the document type while processing + native, err := cborquery.QueryAll(node.(*cborquery.Node), expr) + if err != nil { + return nil, err + } + + nodes := make([]dataNode, 0, len(native)) + for _, n := range native { + nodes = append(nodes, n) + } + return nodes, nil +} + +func (d *cborDocument) CreateXPathNavigator(node dataNode) path.NodeNavigator { + // If this panics it's a programming error as we changed the document type while processing + return cborquery.CreateXPathNavigator(node.(*cborquery.Node)) +} + +func (d *cborDocument) GetNodePath(node, relativeTo dataNode, sep string) string { + names := make([]string, 0) + + // If these panic it's a programming error as we changed the document type while processing + nativeNode := node.(*cborquery.Node) + nativeRelativeTo := relativeTo.(*cborquery.Node) + + // Climb up the tree and collect the node names + n := nativeNode.Parent + for n != nil && n != nativeRelativeTo { + kind := reflect.Invalid + if n.Parent != nil && n.Parent.Value() != nil { + kind = reflect.TypeOf(n.Parent.Value()).Kind() + } + + switch kind { + case reflect.Slice, reflect.Array: + // Determine the index for array elements + names = append(names, d.index(n)) + default: + // Use the name if not an array + names = append(names, n.Name) + } + n = n.Parent + } + + if len(names) < 1 { + return "" + } + + // Construct the nodes + nodepath := "" + for _, name := range names { + nodepath = name + sep + nodepath + } + + return nodepath[:len(nodepath)-1] +} + +func (d *cborDocument) OutputXML(node dataNode) string { + native := node.(*cborquery.Node) + return native.OutputXML() +} + +func (d *cborDocument) index(node *cborquery.Node) string { + idx := 0 + + for n := node; n.PrevSibling != nil; n = n.PrevSibling { + idx++ + } + + return strconv.Itoa(idx) +} diff --git a/plugins/parsers/xpath/parser.go b/plugins/parsers/xpath/parser.go index a291f5b33..0ccf76653 100644 --- a/plugins/parsers/xpath/parser.go +++ b/plugins/parsers/xpath/parser.go @@ -11,6 +11,7 @@ import ( "github.com/antchfx/jsonquery" path "github.com/antchfx/xpath" "github.com/doclambda/protobufquery" + "github.com/srebhan/cborquery" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/filter" @@ -92,6 +93,8 @@ func (p *Parser) Init() error { Notice: "use 'xpath' instead", }) } + case "xpath_cbor": + p.document = &cborDocument{} case "xpath_json": p.document = &jsonDocument{} @@ -491,6 +494,8 @@ func (p *Parser) executeQuery(doc, selected dataNode, query string) (r interface // enabled, we should return the native type of the data if p.NativeTypes { switch nn := current.(type) { + case *cborquery.NodeNavigator: + return nn.GetValue(), nil case *jsonquery.NodeNavigator: return nn.GetValue(), nil case *protobufquery.NodeNavigator: @@ -595,6 +600,14 @@ func init() { } }, ) + parsers.Add("xpath_cbor", + func(defaultMetricName string) telegraf.Parser { + return &Parser{ + Format: "xpath_cbor", + DefaultMetricName: defaultMetricName, + } + }, + ) parsers.Add("xpath_json", func(defaultMetricName string) telegraf.Parser { return &Parser{ diff --git a/plugins/parsers/xpath/parser_test.go b/plugins/parsers/xpath/parser_test.go index c96e16d52..752276c6f 100644 --- a/plugins/parsers/xpath/parser_test.go +++ b/plugins/parsers/xpath/parser_test.go @@ -9,15 +9,15 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/influxdata/toml" + "github.com/stretchr/testify/require" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/config" "github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs/file" "github.com/influxdata/telegraf/plugins/parsers/influx" "github.com/influxdata/telegraf/testutil" - "github.com/influxdata/toml" - - "github.com/stretchr/testify/require" ) const invalidXML = ` @@ -1367,6 +1367,7 @@ func TestTestCases(t *testing.T) { if len(expectedErrors) == 0 { require.NoError(t, err) } + // If no timestamp is given we cannot test it. So use the one of the output if cfg.Timestamp == "" { testutil.RequireMetricsEqual(t, expectedOutputs, outputs, testutil.IgnoreTime()) diff --git a/plugins/parsers/xpath/testcases/cbor/addressbook.bin b/plugins/parsers/xpath/testcases/cbor/addressbook.bin new file mode 100644 index 000000000..2476f9c56 --- /dev/null +++ b/plugins/parsers/xpath/testcases/cbor/addressbook.bin @@ -0,0 +1 @@ +¢fpeople…€dnamehJohn Doebideeemailpjohn@example.comcage*£dnamehJane Doebidfcage(¥dnamehJack DoebidÉeemailpjack@example.comcage fphones¢fnumberl555-555-5555dtype¥dnameiJack Buckbid-eemailpbuck@example.comcagefphonesƒ¢fnumberl555-555-0000dtype¡fnumberl555-555-0001¢fnumberl555-555-0002dtype¥dnameiJanet Doebidéeemailqjanet@example.comcagefphones‚¡fnumberl555-777-0000¢fnumberl555-777-0001dtypedtagsƒdhomegprivategfriends \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/cbor/expected.out b/plugins/parsers/xpath/testcases/cbor/expected.out new file mode 100644 index 000000000..b6361ff22 --- /dev/null +++ b/plugins/parsers/xpath/testcases/cbor/expected.out @@ -0,0 +1,5 @@ +addresses age="42",email="john@example.com",id="101",name="John Doe" +addresses age="40",id="102",name="Jane Doe" +addresses age="12",email="jack@example.com",id="201",name="Jack Doe",phones_number="555-555-5555",phones_type="2" +addresses age="19",email="buck@example.com",id="301",name="Jack Buck",phones_number="555-555-0000",phones_number_1="555-555-0001",phones_number_2="555-555-0002",phones_type="1",phones_type_1="2" +addresses age="16",email="janet@example.com",id="1001",name="Janet Doe",phones_number="555-777-0000",phones_number_1="555-777-0001",phones_type="1" diff --git a/plugins/parsers/xpath/testcases/cbor/telegraf.conf b/plugins/parsers/xpath/testcases/cbor/telegraf.conf new file mode 100644 index 000000000..3ce396b0f --- /dev/null +++ b/plugins/parsers/xpath/testcases/cbor/telegraf.conf @@ -0,0 +1,9 @@ +[[inputs.file]] + files = ["./testcases/cbor/addressbook.bin"] + data_format = "xpath_cbor" + + [[inputs.file.xpath]] + metric_name = "'addresses'" + metric_selection = "//people" + field_selection = "descendant::*[not(*)]" + field_name_expansion = true diff --git a/plugins/parsers/xpath/testcases/cbor_hex_encoding/data.bin b/plugins/parsers/xpath/testcases/cbor_hex_encoding/data.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad1b39ad4be2083f98ea3dce761aff8662c0cada GIT binary patch literal 83 zcmZo%np#{^6rboA(P#`|hgMpEWnT3^&9V!5m T&n(Hz1!_plEs#o?HT4ky2+$tV literal 0 HcmV?d00001 diff --git a/plugins/parsers/xpath/testcases/cbor_hex_encoding/expected.out b/plugins/parsers/xpath/testcases/cbor_hex_encoding/expected.out new file mode 100644 index 000000000..be46f8dd4 --- /dev/null +++ b/plugins/parsers/xpath/testcases/cbor_hex_encoding/expected.out @@ -0,0 +1 @@ +data str_a="this is a test",str_b="foobar",bytes_a="0001020304050607",bytes_b="666f6f626172",timestamp=1687852514u 1687852514000000000 \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/cbor_hex_encoding/telegraf.conf b/plugins/parsers/xpath/testcases/cbor_hex_encoding/telegraf.conf new file mode 100644 index 000000000..d46dc92ad --- /dev/null +++ b/plugins/parsers/xpath/testcases/cbor_hex_encoding/telegraf.conf @@ -0,0 +1,24 @@ +# Example data: +# [ +# { +# "str_a": bytearray("this is a test"), +# "str_b": bytearray("foobar"), +# "bytes_a": bytearray([0, 1, 2, 3, 4, 5, 6, 7]), +# "bytes_b": bytearray("foobar"), +# "timestamp": 1687852514 +# } +# ] + +[[inputs.file]] + files = ["./testcases/cbor_hex_encoding/data.bin"] + data_format = "xpath_cbor" + + xpath_native_types = true + + [[inputs.file.xpath]] + metric_name = "'data'" + metric_selection = "child::*" + timestamp = "timestamp" + timestamp_format = "unix" + field_selection = "child::*" + fields_bytes_as_hex = ["bytes_*"] diff --git a/plugins/parsers/xpath/testcases/cbor_numeric_keys/data.bin b/plugins/parsers/xpath/testcases/cbor_numeric_keys/data.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d8cbd39319d4b87b9d2798fcc504d094cc0a1da GIT binary patch literal 76 zcmZo%A<4-4Rg#f8)6gV2(ZVz}#Zr=yDbv8fNY_ZuP_IIgk>j_6`Jw(8jjx9#899GD g1X>?k{(kNzNk(>NNk*