feat(parsers.xpath): Add timezone handling (#12820)

This commit is contained in:
Sven Rebhan 2023-03-13 12:17:14 +01:00 committed by GitHub
parent f104c1a708
commit bea5414384
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 201 additions and 41 deletions

View File

@ -253,7 +253,7 @@ func CompressWithGzip(data io.Reader) io.ReadCloser {
// The location is a location string suitable for time.LoadLocation. Unix
// times do not use the location string, a unix time is always return in the
// UTC location.
func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) {
func ParseTimestamp(format string, timestamp interface{}, location *time.Location, separator ...string) (time.Time, error) {
switch format {
case "unix", "unix_ms", "unix_us", "unix_ns":
sep := []string{",", "."}
@ -359,10 +359,10 @@ func sanitizeTimestamp(timestamp string, decimalSeparator []string) string {
}
// parseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp string, location string) (time.Time, error) {
loc, err := time.LoadLocation(location)
if err != nil {
return time.Unix(0, 0), err
func parseTime(format string, timestamp string, location *time.Location) (time.Time, error) {
loc := location
if loc == nil {
loc = time.UTC
}
switch strings.ToLower(format) {

View File

@ -670,7 +670,13 @@ func TestParseTimestamp(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location, tt.separator...)
var loc *time.Location
if tt.location != "" {
var err error
loc, err = time.LoadLocation(tt.location)
require.NoError(t, err)
}
tm, err := ParseTimestamp(tt.format, tt.timestamp, loc, tt.separator...)
require.NoError(t, err)
require.Equal(t, tt.expected, tm)
})
@ -682,7 +688,6 @@ func TestParseTimestampInvalid(t *testing.T) {
name string
format string
timestamp interface{}
location string
expected string
}{
{
@ -691,13 +696,6 @@ func TestParseTimestampInvalid(t *testing.T) {
timestamp: "2019-02-20 21:50",
expected: "cannot parse \"\" as \":\"",
},
{
name: "invalid timezone",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50:34",
location: "InvalidTimeZone",
expected: "unknown time zone InvalidTimeZone",
},
{
name: "invalid layout",
format: "rfc3399",
@ -737,7 +735,7 @@ func TestParseTimestampInvalid(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := ParseTimestamp(tt.format, tt.timestamp, tt.location)
_, err := ParseTimestamp(tt.format, tt.timestamp, nil)
require.ErrorContains(t, err, tt.expected)
})
}

View File

@ -109,7 +109,7 @@ func (q *Query) parse(acc telegraf.Accumulator, rows *dbsql.Rows, t time.Time) (
return 0, fmt.Errorf("time column %q of type \"%T\" unsupported", name, columnData[i])
}
if !skipParsing {
if timestamp, err = internal.ParseTimestamp(q.TimeFormat, fieldvalue, ""); err != nil {
if timestamp, err = internal.ParseTimestamp(q.TimeFormat, fieldvalue, nil); err != nil {
return 0, fmt.Errorf("parsing time failed: %w", err)
}
}

View File

@ -237,7 +237,7 @@ func (p *Parser) createMetric(data map[string]interface{}, schema string) (teleg
if p.Timestamp != "" {
rawTime := fmt.Sprintf("%v", fields[p.Timestamp])
var err error
timestamp, err = internal.ParseTimestamp(p.TimestampFormat, rawTime, "")
timestamp, err = internal.ParseTimestamp(p.TimestampFormat, rawTime, nil)
if err != nil {
return nil, fmt.Errorf("could not parse '%s' to '%s'", rawTime, p.TimestampFormat)
}

View File

@ -52,6 +52,7 @@ type Parser struct {
Log telegraf.Logger `toml:"-"`
metadataSeparatorList metadataPattern
location *time.Location
gotColumnNames bool
@ -170,6 +171,14 @@ func (p *Parser) Init() error {
p.TimeFunc = time.Now
}
if p.Timezone != "" {
loc, err := time.LoadLocation(p.Timezone)
if err != nil {
return fmt.Errorf("invalid timezone: %w", err)
}
p.location = loc
}
if p.ResetMode == "" {
p.ResetMode = "none"
}
@ -446,7 +455,7 @@ outer:
}
}
metricTime, err := parseTimestamp(p.TimeFunc, recordFields, p.TimestampColumn, p.TimestampFormat, p.Timezone)
metricTime, err := parseTimestamp(p.TimeFunc, recordFields, p.TimestampColumn, p.TimestampFormat, p.location)
if err != nil {
return nil, err
}
@ -464,7 +473,7 @@ outer:
// will be the current timestamp, else it will try to parse the time according
// to the format.
func parseTimestamp(timeFunc func() time.Time, recordFields map[string]interface{},
timestampColumn, timestampFormat string, timezone string,
timestampColumn, timestampFormat string, timezone *time.Location,
) (time.Time, error) {
if timestampColumn != "" {
if recordFields[timestampColumn] == nil {

View File

@ -36,6 +36,7 @@ type Parser struct {
DefaultTags map[string]string `toml:"-"`
Log telegraf.Logger `toml:"-"`
location *time.Location
tagFilter filter.Filter
stringFilter filter.Filter
}
@ -95,7 +96,7 @@ func (p *Parser) parseObject(data map[string]interface{}, timestamp time.Time) (
return nil, err
}
timestamp, err = internal.ParseTimestamp(p.TimeFormat, f.Fields[p.TimeKey], p.Timezone)
timestamp, err = internal.ParseTimestamp(p.TimeFormat, f.Fields[p.TimeKey], p.location)
if err != nil {
return nil, err
}
@ -169,6 +170,14 @@ func (p *Parser) Init() error {
return fmt.Errorf("compiling tag-key filter failed: %w", err)
}
if p.Timezone != "" {
loc, err := time.LoadLocation(p.Timezone)
if err != nil {
return fmt.Errorf("invalid timezone: %w", err)
}
p.location = loc
}
return nil
}

View File

@ -66,6 +66,13 @@ func (p *Parser) Init() error {
if cfg.MeasurementName == "" {
p.Configs[i].MeasurementName = p.DefaultMetricName
}
if cfg.TimestampTimezone != "" {
loc, err := time.LoadLocation(cfg.TimestampTimezone)
if err != nil {
return fmt.Errorf("invalid timezone in config %d: %w", i+1, err)
}
p.Configs[i].Location = loc
}
}
return nil
}
@ -111,7 +118,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) {
}
var err error
timestamp, err = internal.ParseTimestamp(c.TimestampFormat, result.String(), c.TimestampTimezone)
timestamp, err = internal.ParseTimestamp(c.TimestampFormat, result.String(), c.Location)
if err != nil {
return nil, err
@ -321,7 +328,15 @@ func (p *Parser) expandArray(result MetricNode, timestamp time.Time) ([]telegraf
err := fmt.Errorf("use of 'timestamp_query' requires 'timestamp_format'")
return nil, err
}
timestamp, err := internal.ParseTimestamp(p.objectConfig.TimestampFormat, result.String(), p.objectConfig.TimestampTimezone)
var loc *time.Location
if p.objectConfig.TimestampTimezone != "" {
var err error
loc, err = time.LoadLocation(p.objectConfig.TimestampTimezone)
if err != nil {
return nil, fmt.Errorf("invalid timezone: %w", err)
}
}
timestamp, err := internal.ParseTimestamp(p.objectConfig.TimestampFormat, result.String(), loc)
if err != nil {
return nil, err
}

View File

@ -1,5 +1,7 @@
package json_v2
import "time"
// Config definition for backward compatibility ONLY.
// We need this here to avoid cyclic dependencies. However, we need
// to move this to plugins/parsers/json_v2 once we deprecate parser
@ -14,6 +16,8 @@ type Config struct {
Fields []DataSet `toml:"field"`
Tags []DataSet `toml:"tag"`
JSONObjects []Object `toml:"object"`
Location *time.Location
}
type DataSet struct {

View File

@ -1,6 +1,10 @@
package xpath
import "github.com/influxdata/telegraf/filter"
import (
"time"
"github.com/influxdata/telegraf/filter"
)
// Config definition for backward compatibility ONLY.
// We need this here to avoid cyclic dependencies. However, we need
@ -11,6 +15,7 @@ type Config struct {
Selection string `toml:"metric_selection"`
Timestamp string `toml:"timestamp"`
TimestampFmt string `toml:"timestamp_format"`
Timezone string `toml:"timezone"`
Tags map[string]string `toml:"tags"`
Fields map[string]string `toml:"fields"`
FieldsInt map[string]string `toml:"fields_int"`
@ -27,4 +32,5 @@ type Config struct {
TagNameExpand bool `toml:"tag_name_expansion"`
FieldsHexFilter filter.Filter
Location *time.Location
}

View File

@ -140,6 +140,11 @@ This is a list of known headers and the corresponding values for
## This can be any of "unix", "unix_ms", "unix_us", "unix_ns" or a valid Golang
## time format. If not specified, a "unix" timestamp (in seconds) is expected.
# timestamp_format = "2006-01-02T15:04:05Z"
## Optional: Timezone of the parsed time
## This will locate the parsed time to the given timezone. Please note that
## for times with timezone-offsets (e.g. RFC3339) the timestamp is unchanged.
## This is ignored for all (unix) timestamp formats.
# timezone = "UTC"
## Optional: List of fields to convert to hex-strings if they are
## containing byte-arrays. This might be the case for e.g. protocol-buffer
@ -286,7 +291,7 @@ By specifying `metric_name` you can override the metric/measurement name with
the result of the given [XPath][xpath] query. If not specified, the default
metric name is used.
### timestamp, timestamp_format (optional)
### timestamp, timestamp_format, timezone (optional)
By default the current time will be used for all created metrics. To set the
time from values in the XML document you can specify a [XPath][xpath] query in
@ -298,6 +303,16 @@ package for details and additional examples on how to set the time format. If
`timestamp_format` is omitted `unix` format is assumed as result of the
`timestamp` query.
The `timezone` setting will be used to locate the parsed time in the given
timezone. This is helpful for cases where the time does not contain timezone
information, e.g. `2023-03-09 14:04:40` and is not located in _UTC_, which is
the default setting. It is also possible to set the `timezone` to `Local` which
used the configured host timezone.
For time formats with timezone information, e.g. RFC3339, the resulting
timestamp is unchanged. The `timezone` setting is ignored for all `unix`
timestamp formats.
### tags sub-section
[XPath][xpath] queries in the `tag name = query` format to add tags to the

View File

@ -131,6 +131,15 @@ func (p *Parser) Init() error {
if config.TimestampFmt == "" {
config.TimestampFmt = "unix"
}
if config.Timezone == "" {
config.Location = time.UTC
} else {
loc, err := time.LoadLocation(config.Timezone)
if err != nil {
return fmt.Errorf("invalid location in config %d: %w", i+1, err)
}
config.Location = loc
}
f, err := filter.Compile(config.FieldsHex)
if err != nil {
return fmt.Errorf("creating hex-fields filter failed: %w", err)
@ -232,7 +241,7 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config
return nil, fmt.Errorf("failed to query timestamp: %w", err)
}
if v != nil {
timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, "")
timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, config.Location)
if err != nil {
return nil, fmt.Errorf("failed to parse timestamp: %w", err)
}

View File

@ -1405,16 +1405,6 @@ func TestMultipleConfigs(t *testing.T) {
return &file.File{}
})
// Prepare the influx parser for expectations
parser := &influx.Parser{}
require.NoError(t, parser.Init())
// Compare options
options := []cmp.Option{
testutil.IgnoreTime(),
testutil.SortMetrics(),
}
for _, f := range folders {
// Only handle folders
if !f.IsDir() || f.Name() == "protos" {
@ -1426,6 +1416,14 @@ func TestMultipleConfigs(t *testing.T) {
expectedErrorFilename := filepath.Join(testcasePath, "expected.err")
t.Run(f.Name(), func(t *testing.T) {
// Prepare the influx parser for expectations
parser := &influx.Parser{}
require.NoError(t, parser.Init())
parser.SetTimeFunc(func() time.Time { return time.Time{} })
// Compare options
options := []cmp.Option{testutil.SortMetrics()}
// Read the expected output if any
var expected []telegraf.Metric
if _, err := os.Stat(expectedFilename); err == nil {
@ -1433,6 +1431,9 @@ func TestMultipleConfigs(t *testing.T) {
expected, err = testutil.ParseMetricsFromFile(expectedFilename, parser)
require.NoError(t, err)
}
if len(expected) > 0 && expected[0].Time().IsZero() {
options = append(options, testutil.IgnoreTime())
}
// Read the expected output if any
var expectedErrors []string

View File

@ -1,3 +1,3 @@
devices ok=true,phases_0_load=34,phases_0_voltage=231,phases_1_load=35,phases_1_voltage=231,phases_2_load=36,phases_2_voltage=231,rpm=423,type="Motor" 1662730607000000000
devices flow=3.1414,hours=8762,ok=true,type="Pump" 1662730607000000000
devices ok=true,phases_0_load=341,phases_0_voltage=231,phases_1_load=352,phases_1_voltage=231,phases_2_load=363,phases_2_voltage=231,throughput=1026,type="Machine" 1662730607000000000
devices ok=true,phases_0_load=34,phases_0_voltage=231,phases_1_load=35,phases_1_voltage=231,phases_2_load=36,phases_2_voltage=231,rpm=423,type="Motor"
devices flow=3.1414,hours=8762,ok=true,type="Pump"
devices ok=true,phases_0_load=341,phases_0_voltage=231,phases_1_load=352,phases_1_voltage=231,phases_2_load=363,phases_2_voltage=231,throughput=1026,type="Machine"

View File

@ -1 +1 @@
time_float_exponential truth=42.0 1663830962276000
time_float_exponential truth=42.0 1663830962276000000

View File

@ -0,0 +1 @@
time truth=42.0 1678370680000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_Berlin/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
timezone = "Europe/Berlin"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-03-09 15:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1683641080000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_CEST/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
timezone = "CET"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-05-09 16:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1683641080000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_MST/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
timezone = "MST"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-05-09 07:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1678370680000000000

View File

@ -0,0 +1,12 @@
[[inputs.file]]
files = ["./testcases/time_timezone_utc/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-03-09 14:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1683641080000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_with_offset/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02T15:04:05Z07:00"
timezone = "CET"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-05-09T07:04:40-07:00"
}

View File

@ -208,7 +208,7 @@ func (p *Converter) convertTags(metric telegraf.Metric) {
}
if p.tagConversions.Timestamp != nil && p.tagConversions.Timestamp.Match(key) {
time, err := internal.ParseTimestamp(p.Tags.TimestampFormat, value, "")
time, err := internal.ParseTimestamp(p.Tags.TimestampFormat, value, nil)
if err != nil {
p.Log.Errorf("error converting to timestamp [%T]: %v", value, value)
continue
@ -320,7 +320,7 @@ func (p *Converter) convertFields(metric telegraf.Metric) {
}
if p.fieldConversions.Timestamp != nil && p.fieldConversions.Timestamp.Match(key) {
time, err := internal.ParseTimestamp(p.Fields.TimestampFormat, value, "")
time, err := internal.ParseTimestamp(p.Fields.TimestampFormat, value, nil)
if err != nil {
p.Log.Errorf("error converting to timestamp [%T]: %v", value, value)
continue