From da28cfdb4381761e4faa6a36e187b1e3c8679dcc Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Wed, 26 Jul 2023 17:44:22 +0200 Subject: [PATCH] fix(parsers.xpath): Ensure precedence of explicitly defined tags and fields (#13662) --- plugins/parsers/xpath/README.md | 3 +- plugins/parsers/xpath/parser.go | 135 ++++++++++-------- .../json_explicit_precedence/expected.out | 1 + .../json_explicit_precedence/telegraf.conf | 15 ++ .../json_explicit_precedence/test.json | 13 ++ 5 files changed, 106 insertions(+), 61 deletions(-) create mode 100644 plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out create mode 100644 plugins/parsers/xpath/testcases/json_explicit_precedence/telegraf.conf create mode 100644 plugins/parsers/xpath/testcases/json_explicit_precedence/test.json diff --git a/plugins/parsers/xpath/README.md b/plugins/parsers/xpath/README.md index 65a43e162..98d21a3c7 100644 --- a/plugins/parsers/xpath/README.md +++ b/plugins/parsers/xpath/README.md @@ -280,7 +280,8 @@ in the metric. __Please note__: The resulting fields are _always_ of type string! It is also possible to specify a mixture of the two alternative ways of -specifying fields. +specifying fields. In this case _explicitly_ defined tags and fields take +_precedence_ over the batch instances if both use the same tag/field name. ### metric_selection (optional) diff --git a/plugins/parsers/xpath/parser.go b/plugins/parsers/xpath/parser.go index cf5dea616..f3f3a180e 100644 --- a/plugins/parsers/xpath/parser.go +++ b/plugins/parsers/xpath/parser.go @@ -279,25 +279,6 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config // Query tags and add default ones tags := make(map[string]string) - for name, query := range config.Tags { - // Execute the query and cast the returned values into strings - v, err := p.executeQuery(doc, selected, query) - if err != nil { - return nil, fmt.Errorf("failed to query tag %q: %w", name, err) - } - switch v := v.(type) { - case string: - tags[name] = v - case bool: - tags[name] = strconv.FormatBool(v) - case float64: - tags[name] = strconv.FormatFloat(v, 'G', -1, 64) - case nil: - continue - default: - return nil, fmt.Errorf("unknown format '%T' for tag %q", v, name) - } - } // Handle the tag batch definitions if any. if len(config.TagSelection) > 0 { @@ -356,53 +337,34 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config } } + // Handle explicitly defined tags + for name, query := range config.Tags { + // Execute the query and cast the returned values into strings + v, err := p.executeQuery(doc, selected, query) + if err != nil { + return nil, fmt.Errorf("failed to query tag %q: %w", name, err) + } + switch v := v.(type) { + case string: + tags[name] = v + case bool: + tags[name] = strconv.FormatBool(v) + case float64: + tags[name] = strconv.FormatFloat(v, 'G', -1, 64) + case nil: + continue + default: + return nil, fmt.Errorf("unknown format '%T' for tag %q", v, name) + } + } + + // Add default tags for name, v := range p.DefaultTags { tags[name] = v } // Query fields fields := make(map[string]interface{}) - for name, query := range config.FieldsInt { - // Execute the query and cast the returned values into integers - v, err := p.executeQuery(doc, selected, query) - if err != nil { - return nil, fmt.Errorf("failed to query field (int) %q: %w", name, err) - } - switch v := v.(type) { - case string: - fields[name], err = strconv.ParseInt(v, 10, 54) - if err != nil { - return nil, fmt.Errorf("failed to parse field (int) %q: %w", name, err) - } - case bool: - fields[name] = int64(0) - if v { - fields[name] = int64(1) - } - case float64: - fields[name] = int64(v) - case nil: - continue - default: - return nil, fmt.Errorf("unknown format '%T' for field (int) %q", v, name) - } - } - - for name, query := range config.Fields { - // Execute the query and store the result in fields - v, err := p.executeQuery(doc, selected, query) - if err != nil { - return nil, fmt.Errorf("failed to query field %q: %w", name, err) - } - - if config.FieldsHexFilter != nil && config.FieldsHexFilter.Match(name) { - if b, ok := v.([]byte); ok { - v = hex.EncodeToString(b) - } - } - - fields[name] = v - } // Handle the field batch definitions if any. if len(config.FieldSelection) > 0 { @@ -471,6 +433,59 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config } } + // Handle explicitly defined fields + for name, query := range config.FieldsInt { + // Execute the query and cast the returned values into integers + v, err := p.executeQuery(doc, selected, query) + if err != nil { + return nil, fmt.Errorf("failed to query field (int) %q: %w", name, err) + } + switch v := v.(type) { + case string: + fields[name], err = strconv.ParseInt(v, 10, 54) + if err != nil { + return nil, fmt.Errorf("failed to parse field (int) %q: %w", name, err) + } + case bool: + fields[name] = int64(0) + if v { + fields[name] = int64(1) + } + case float64: + fields[name] = int64(v) + case nil: + continue + default: + return nil, fmt.Errorf("unknown format '%T' for field (int) %q", v, name) + } + } + + for name, query := range config.Fields { + // Execute the query and store the result in fields + v, err := p.executeQuery(doc, selected, query) + if err != nil { + return nil, fmt.Errorf("failed to query field %q: %w", name, err) + } + + // Handle complex types which would be dropped otherwise for + // native type handling + fmt.Printf("explicit field %q: %v (%T)\n", name, v, v) + if v != nil { + switch reflect.TypeOf(v).Kind() { + case reflect.Array, reflect.Slice, reflect.Map: + if b, ok := v.([]byte); ok { + if config.FieldsHexFilter != nil && config.FieldsHexFilter.Match(name) { + v = hex.EncodeToString(b) + } + } else { + v = fmt.Sprintf("%v", v) + } + } + } + + fields[name] = v + } + return metric.New(metricname, tags, fields, timestamp), nil } diff --git a/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out b/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out new file mode 100644 index 000000000..18b1d7010 --- /dev/null +++ b/plugins/parsers/xpath/testcases/json_explicit_precedence/expected.out @@ -0,0 +1 @@ +foo a="a string",b=3.1415,c=true,d="{\"d1\":1,\"d2\":\"foo\",\"d3\":true,\"d4\":null}",e="[\"master\",42,true]",timestamp=1690193829 1690193829000000000 \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/json_explicit_precedence/telegraf.conf b/plugins/parsers/xpath/testcases/json_explicit_precedence/telegraf.conf new file mode 100644 index 000000000..0bf7fe92d --- /dev/null +++ b/plugins/parsers/xpath/testcases/json_explicit_precedence/telegraf.conf @@ -0,0 +1,15 @@ +[[inputs.file]] + files = ["./testcases/json_string_representation/test.json"] + data_format = "xpath_json" + + xpath_native_types = true + + [[inputs.file.xpath]] + metric_name = "'foo'" + field_selection = "*" + timestamp = "timestamp" + timestamp_format = "unix" + + [inputs.file.xpath.fields] + d = "string(d)" + e = "string(e)" \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/json_explicit_precedence/test.json b/plugins/parsers/xpath/testcases/json_explicit_precedence/test.json new file mode 100644 index 000000000..976ee9492 --- /dev/null +++ b/plugins/parsers/xpath/testcases/json_explicit_precedence/test.json @@ -0,0 +1,13 @@ +{ + "a": "a string", + "b": 3.1415, + "c": true, + "d": { + "d1": 1, + "d2": "foo", + "d3": true, + "d4": null + }, + "e": ["master", 42, true], + "timestamp": 1690193829 + } \ No newline at end of file