feat(parsers.binary): Allow base64-encoded input data (#14961)

This commit is contained in:
Sven Rebhan 2024-03-13 17:42:42 +01:00 committed by GitHub
parent f674099fad
commit 03700b5983
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 105 additions and 60 deletions

View File

@ -23,8 +23,9 @@ user-specified configurations.
## where "host" means the same endianness as the machine running Telegraf.
# endianness = "host"
## Interpret input as string containing hex-encoded data.
# hex_encoding = false
## Interpret input using the specified encoding
## Available values are "none" (raw bytes), "hex" and "base64"
# binary_encoding = "none"
## Multiple parsing sections are allowed
[[inputs.file.binary]]
@ -112,11 +113,17 @@ machine share the same endianness.
Alternatively, you can explicitly specify big-endian format (`"be"`) or
little-endian format (`"le"`).
#### `hex_encoding` (optional)
#### `binary_encoding` (optional)
If `true`, the input data is interpreted as a string containing hex-encoded
data like `C0 C7 21 A9`. The value is _case insensitive_ and can handle spaces,
however prefixes like `0x` or `x` are _not_ allowed.
If this option is not specified or set to `none`, the input data contains the
binary data as raw bytes. This is the default.
If set to `hex`, the input data is interpreted as a string containing
hex-encoded data like `C0 C7 21 A9`. The value is _case insensitive_ and can
handle spaces and prefixes like `0x` or `x`.
If set to `base64` the input data is interpreted as a string containing
padded base64 data `RDLAAA==`.
### Non-byte aligned value extraction

View File

@ -1,6 +1,7 @@
package binary
import (
"encoding/base64"
"encoding/binary"
"encoding/hex"
"errors"
@ -18,7 +19,8 @@ type Parser struct {
Endianess string `toml:"endianess" deprecated:"1.27.4;use 'endianness' instead"`
Endianness string `toml:"endianness"`
Configs []Config `toml:"binary"`
HexEncoding bool `toml:"hex_encoding"`
HexEncoding bool `toml:"hex_encoding" deprecated:"1.30.0;use 'binary_encoding' instead"`
Encoding string `toml:"binary_encoding"`
Log telegraf.Logger `toml:"-"`
metricName string
@ -27,9 +29,16 @@ type Parser struct {
}
func (p *Parser) Init() error {
// Keep backward compatibility
if p.Endianess != "" && p.Endianness == "" {
p.Endianness = p.Endianess
}
if p.HexEncoding {
if p.Encoding != "" && p.Encoding != "hex" {
return errors.New("conflicting settings between 'hex_encoding' and 'binary_encoding'")
}
p.Encoding = "hex"
}
switch p.Endianness {
case "le":
@ -42,6 +51,12 @@ func (p *Parser) Init() error {
return fmt.Errorf("unknown endianness %q", p.Endianness)
}
switch p.Encoding {
case "", "none", "hex", "base64":
default:
return fmt.Errorf("unknown encoding %q", p.Encoding)
}
// Pre-process the configurations
if len(p.Configs) == 0 {
return errors.New("no configuration given")
@ -61,14 +76,25 @@ func (p *Parser) Parse(data []byte) ([]telegraf.Metric, error) {
// If the data is encoded in HEX, we need to decode it first
buf := data
if p.HexEncoding {
s := strings.ReplaceAll(string(data), " ", "")
switch p.Encoding {
case "hex":
s := strings.TrimPrefix(string(data), "0x")
s = strings.TrimPrefix(s, "x")
s = strings.TrimSpace(s)
s = strings.ReplaceAll(s, " ", "")
s = strings.ReplaceAll(s, "\t", "")
var err error
buf, err = hex.DecodeString(s)
if err != nil {
return nil, fmt.Errorf("decoding hex failed: %w", err)
}
case "base64":
decoder := base64.StdEncoding.WithPadding(base64.StdPadding)
var err error
buf, err = decoder.DecodeString(strings.TrimSpace(string(data)))
if err != nil {
return nil, fmt.Errorf("decoding base64 failed: %w", err)
}
}
matches := 0

View File

@ -11,16 +11,15 @@ import (
"time"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/config"
"github.com/influxdata/telegraf/internal"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/file"
"github.com/influxdata/telegraf/plugins/parsers/influx"
"github.com/influxdata/telegraf/testutil"
test "github.com/influxdata/telegraf/testutil/plugin_input"
)
var dummyEntry = Entry{
@ -1410,60 +1409,45 @@ func TestCases(t *testing.T) {
require.NoError(t, err)
require.NotEmpty(t, folders)
// Register the plugin
inputs.Add("file", func() telegraf.Input {
return &file.File{}
})
// Prepare the influx parser for expectations
parser := &influx.Parser{}
require.NoError(t, parser.Init())
for _, f := range folders {
testcasePath := filepath.Join("testcases", f.Name())
configFilename := filepath.Join(testcasePath, "telegraf.conf")
expectedFilename := filepath.Join(testcasePath, "expected.out")
expectedErrorFilename := filepath.Join(testcasePath, "expected.err")
t.Run(f.Name(), func(t *testing.T) {
// Read the expected output if any
var expected []telegraf.Metric
if _, err := os.Stat(expectedFilename); err == nil {
var err error
expected, err = testutil.ParseMetricsFromFile(expectedFilename, parser)
require.NoError(t, err)
}
// Read the expected errors if any
var expectedErrors []string
if _, err := os.Stat(expectedErrorFilename); err == nil {
var err error
expectedErrors, err = testutil.ParseLinesFromFile(expectedErrorFilename)
require.NoError(t, err)
require.NotEmpty(t, expectedErrors)
}
// Configure the plugin
cfg := config.NewConfig()
require.NoError(t, cfg.LoadConfig(configFilename))
require.NoError(t, err)
require.Len(t, cfg.Inputs, 1)
// Gather the metrics from the input file configure
// Tune the test-plugin
plugin := cfg.Inputs[0].Input.(*test.Plugin)
plugin.Path = testcasePath
require.NoError(t, plugin.Init())
// Gather the metrics and check for potential errors
var acc testutil.Accumulator
var actualErrors []string
for _, input := range cfg.Inputs {
require.NoError(t, input.Init())
if err := input.Gather(&acc); err != nil {
actualErrors = append(actualErrors, err.Error())
}
err := plugin.Gather(&acc)
switch len(plugin.ExpectedErrors) {
case 0:
require.NoError(t, err)
case 1:
require.ErrorContains(t, err, plugin.ExpectedErrors[0])
default:
require.Contains(t, plugin.ExpectedErrors, err.Error())
}
// Check for potential errors
require.ElementsMatch(t, actualErrors, expectedErrors)
// Determine checking options
options := []cmp.Option{
cmpopts.EquateApprox(0, 1e-6),
}
if plugin.ShouldIgnoreTimestamp {
options = append(options, testutil.IgnoreTime())
}
// Process expected metrics and compare with resulting metrics
actual := acc.GetTelegrafMetrics()
testutil.RequireMetricsEqual(t, expected, actual)
testutil.RequireMetricsEqual(t, plugin.Expected, actual, options...)
})
}
}
@ -1479,7 +1463,7 @@ func TestHexEncoding(t *testing.T) {
parser := &Parser{
Endianness: "be",
HexEncoding: true,
Encoding: "hex",
Configs: []Config{
{
Entries: []Entry{dummyEntry},

View File

@ -0,0 +1,3 @@
test value=715
test value=208.5
test value=0.471

View File

@ -0,0 +1 @@
RDLAAA==

View File

@ -0,0 +1 @@
Q1CAAA==

View File

@ -0,0 +1 @@
PvEm6Q==

View File

@ -0,0 +1,8 @@
[[inputs.test]]
files = ["messageA.bin", "messageB.bin", "messageC.bin"]
data_format = "binary"
endianness = "be"
binary_encoding = "base64"
[[inputs.test.binary]]
entries = [{ name = "value", type = "float32" }]

View File

@ -0,0 +1,3 @@
test value=715
test value=208.5
test value=0.471

View File

@ -0,0 +1 @@
0x4432c000

View File

@ -0,0 +1 @@
0x43508000

View File

@ -0,0 +1 @@
0x3ef126e9

View File

@ -0,0 +1,8 @@
[[inputs.test]]
files = ["messageA.bin", "messageB.bin", "messageC.bin"]
data_format = "binary"
endianness = "be"
binary_encoding = "hex"
[[inputs.test.binary]]
entries = [{ name = "value", type = "float32" }]

View File

@ -1,9 +1,9 @@
[[inputs.file]]
files = ["./testcases/multiple_messages/messageA.bin", "./testcases/multiple_messages/messageB.bin", "./testcases/multiple_messages/messageC.bin"]
[[inputs.test]]
files = ["messageA.bin", "messageB.bin", "messageC.bin"]
data_format = "binary"
endianness = "le"
[[inputs.file.binary]]
[[inputs.test.binary]]
metric_name = "metricA"
entries = [
@ -15,12 +15,12 @@
{ type = "unix", assignment = "time" },
]
[inputs.file.binary.filter]
[inputs.test.binary.filter]
selection = [
{ offset = 16, bits = 8, match = "0x0A" },
]
[[inputs.file.binary]]
[[inputs.test.binary]]
metric_name = "metricB"
entries = [
@ -29,10 +29,10 @@
{ type = "unix", assignment = "time" },
]
[inputs.file.binary.filter]
[inputs.test.binary.filter]
selection = [{ offset = 16, bits = 8, match = "0x0B" }]
[[inputs.file.binary]]
[[inputs.test.binary]]
metric_name = "metricC"
entries = [
@ -42,5 +42,5 @@
{ type = "unix", assignment = "time" },
]
[inputs.file.binary.filter]
[inputs.test.binary.filter]
selection = [{ offset = 16, bits = 8, match = "0x0C" }]