feat(parsers.binary): handle hex-encoded inputs (#12232)

This commit is contained in:
Sven Rebhan 2022-11-16 21:43:13 +01:00 committed by GitHub
parent 58d7dfc43f
commit d2268f04d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 9 deletions

View File

@ -18,11 +18,14 @@ user-specified configurations.
## Do not error-out if none of the filter expressions below matches. ## Do not error-out if none of the filter expressions below matches.
# allow_no_match = false # allow_no_match = false
## Specify the endianess of the data. ## Specify the endianness of the data.
## Available values are "be" (big-endian), "le" (little-endian) and "host", ## Available values are "be" (big-endian), "le" (little-endian) and "host",
## where "host" means the same endianess as the machine running Telegraf. ## where "host" means the same endianness as the machine running Telegraf.
# endianess = "host" # endianess = "host"
## Interpret input as string containing hex-encoded data.
# hex_encoding = false
## Multiple parsing sections are allowed ## Multiple parsing sections are allowed
[[inputs.file.binary]] [[inputs.file.binary]]
## Optional: Metric (measurement) name to use if not extracted from the data. ## Optional: Metric (measurement) name to use if not extracted from the data.
@ -30,7 +33,7 @@ user-specified configurations.
## Definition of the message format and the extracted data. ## Definition of the message format and the extracted data.
## Please note that you need to define all elements of the data in the ## Please note that you need to define all elements of the data in the
## correct order with the correct length as the data is parsed in the order ## correct order with the correct length as the data is parsed in the order
## given. ## given.
## An entry can have the following properties: ## An entry can have the following properties:
## name -- Name of the element (e.g. field or tag). Can be omitted ## name -- Name of the element (e.g. field or tag). Can be omitted
@ -59,7 +62,7 @@ user-specified configurations.
{ name = "address", type = "uint16", assignment = "tag" }, { name = "address", type = "uint16", assignment = "tag" },
{ name = "value", type = "float64" }, { name = "value", type = "float64" },
{ type = "unix", assignment = "time" }, { type = "unix", assignment = "time" },
] ]
## Optional: Filter evaluated before applying the configuration. ## Optional: Filter evaluated before applying the configuration.
## This option can be used to mange multiple configuration specific for ## This option can be used to mange multiple configuration specific for
@ -101,14 +104,20 @@ By specifying `allow_no_match` you allow the parser to silently ignore data
that does not match _any_ given configuration filter. This can be useful if that does not match _any_ given configuration filter. This can be useful if
you only want to collect a subset of the available messages. you only want to collect a subset of the available messages.
#### `endianess` (optional) #### `endianness` (optional)
This specifies the endianess of the data. If not specified, the parser will This specifies the endianness of the data. If not specified, the parser will
fallback to the "host" endianess, assuming that the message and Telegraf fallback to the "host" endianness, assuming that the message and Telegraf
machine share the same endianess. machine share the same endianness.
Alternatively, you can explicitly specify big-endian format (`"be"`) or Alternatively, you can explicitly specify big-endian format (`"be"`) or
little-endian format (`"le"`). little-endian format (`"le"`).
#### `hex_encoding` (optional)
If `true`, the input data is interpreted as a string containing hex-encoded
data like `C0 C7 21 A9`. The value is _case insensitive_ and can handle spaces,
however prefixes like `0x` or `x` are _not_ allowed.
### Non-byte aligned value extraction ### Non-byte aligned value extraction
In both, `filter` and `entries` definitions, values can be extracted at non-byte In both, `filter` and `entries` definitions, values can be extracted at non-byte

View File

@ -2,8 +2,10 @@ package binary
import ( import (
"encoding/binary" "encoding/binary"
"encoding/hex"
"errors" "errors"
"fmt" "fmt"
"strings"
"time" "time"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
@ -14,6 +16,7 @@ type Parser struct {
AllowNoMatch bool `toml:"allow_no_match"` AllowNoMatch bool `toml:"allow_no_match"`
Endianess string `toml:"endianess"` Endianess string `toml:"endianess"`
Configs []Config `toml:"binary"` Configs []Config `toml:"binary"`
HexEncoding bool `toml:"hex_encoding"`
Log telegraf.Logger `toml:"-"` Log telegraf.Logger `toml:"-"`
metricName string metricName string
@ -47,9 +50,21 @@ func (p *Parser) Init() error {
return nil return nil
} }
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { func (p *Parser) Parse(data []byte) ([]telegraf.Metric, error) {
t := time.Now() t := time.Now()
// If the data is encoded in HEX, we need to decode it first
buf := data
if p.HexEncoding {
s := strings.ReplaceAll(string(data), " ", "")
s = strings.ReplaceAll(s, "\t", "")
var err error
buf, err = hex.DecodeString(s)
if err != nil {
return nil, fmt.Errorf("decoding hex failed: %w", err)
}
}
matches := 0 matches := 0
metrics := make([]telegraf.Metric, 0) metrics := make([]telegraf.Metric, 0)
for i, cfg := range p.Configs { for i, cfg := range p.Configs {

View File

@ -3,6 +3,7 @@ package binary
import ( import (
"bytes" "bytes"
"encoding/binary" "encoding/binary"
"encoding/hex"
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
@ -1465,3 +1466,35 @@ func TestCases(t *testing.T) {
}) })
} }
} }
func TestHexEncoding(t *testing.T) {
testdata := []interface{}{
uint64(0x01020304050607),
uint64(0x08090A0B0C0D0E),
uint64(0x0F101213141516),
uint64(0x1718191A1B1C1D),
uint64(0x1E1F2021222324),
}
parser := &Parser{
Endianess: "be",
HexEncoding: true,
Configs: []Config{
{
Entries: []Entry{dummyEntry},
},
},
Log: testutil.Logger{Name: "parsers.binary"},
metricName: "binary",
}
require.NoError(t, parser.Init())
// Generate the binary data and encode it to HEX
data, err := generateBinary(testdata, binary.BigEndian)
require.NoError(t, err)
encoded := hex.EncodeToString(data)
metrics, err := parser.Parse([]byte(encoded))
require.NoError(t, err)
require.NotEmpty(t, metrics)
}