feat(parsers.xpath): Add timezone handling (#12820)

This commit is contained in:
Sven Rebhan 2023-03-13 12:17:14 +01:00 committed by GitHub
parent f104c1a708
commit bea5414384
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 201 additions and 41 deletions

View File

@ -253,7 +253,7 @@ func CompressWithGzip(data io.Reader) io.ReadCloser {
// The location is a location string suitable for time.LoadLocation. Unix // The location is a location string suitable for time.LoadLocation. Unix
// times do not use the location string, a unix time is always return in the // times do not use the location string, a unix time is always return in the
// UTC location. // UTC location.
func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) { func ParseTimestamp(format string, timestamp interface{}, location *time.Location, separator ...string) (time.Time, error) {
switch format { switch format {
case "unix", "unix_ms", "unix_us", "unix_ns": case "unix", "unix_ms", "unix_us", "unix_ns":
sep := []string{",", "."} sep := []string{",", "."}
@ -359,10 +359,10 @@ func sanitizeTimestamp(timestamp string, decimalSeparator []string) string {
} }
// parseTime parses a string timestamp according to the format string. // parseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp string, location string) (time.Time, error) { func parseTime(format string, timestamp string, location *time.Location) (time.Time, error) {
loc, err := time.LoadLocation(location) loc := location
if err != nil { if loc == nil {
return time.Unix(0, 0), err loc = time.UTC
} }
switch strings.ToLower(format) { switch strings.ToLower(format) {

View File

@ -670,7 +670,13 @@ func TestParseTimestamp(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location, tt.separator...) var loc *time.Location
if tt.location != "" {
var err error
loc, err = time.LoadLocation(tt.location)
require.NoError(t, err)
}
tm, err := ParseTimestamp(tt.format, tt.timestamp, loc, tt.separator...)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, tt.expected, tm) require.Equal(t, tt.expected, tm)
}) })
@ -682,7 +688,6 @@ func TestParseTimestampInvalid(t *testing.T) {
name string name string
format string format string
timestamp interface{} timestamp interface{}
location string
expected string expected string
}{ }{
{ {
@ -691,13 +696,6 @@ func TestParseTimestampInvalid(t *testing.T) {
timestamp: "2019-02-20 21:50", timestamp: "2019-02-20 21:50",
expected: "cannot parse \"\" as \":\"", expected: "cannot parse \"\" as \":\"",
}, },
{
name: "invalid timezone",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50:34",
location: "InvalidTimeZone",
expected: "unknown time zone InvalidTimeZone",
},
{ {
name: "invalid layout", name: "invalid layout",
format: "rfc3399", format: "rfc3399",
@ -737,7 +735,7 @@ func TestParseTimestampInvalid(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
_, err := ParseTimestamp(tt.format, tt.timestamp, tt.location) _, err := ParseTimestamp(tt.format, tt.timestamp, nil)
require.ErrorContains(t, err, tt.expected) require.ErrorContains(t, err, tt.expected)
}) })
} }

View File

@ -109,7 +109,7 @@ func (q *Query) parse(acc telegraf.Accumulator, rows *dbsql.Rows, t time.Time) (
return 0, fmt.Errorf("time column %q of type \"%T\" unsupported", name, columnData[i]) return 0, fmt.Errorf("time column %q of type \"%T\" unsupported", name, columnData[i])
} }
if !skipParsing { if !skipParsing {
if timestamp, err = internal.ParseTimestamp(q.TimeFormat, fieldvalue, ""); err != nil { if timestamp, err = internal.ParseTimestamp(q.TimeFormat, fieldvalue, nil); err != nil {
return 0, fmt.Errorf("parsing time failed: %w", err) return 0, fmt.Errorf("parsing time failed: %w", err)
} }
} }

View File

@ -237,7 +237,7 @@ func (p *Parser) createMetric(data map[string]interface{}, schema string) (teleg
if p.Timestamp != "" { if p.Timestamp != "" {
rawTime := fmt.Sprintf("%v", fields[p.Timestamp]) rawTime := fmt.Sprintf("%v", fields[p.Timestamp])
var err error var err error
timestamp, err = internal.ParseTimestamp(p.TimestampFormat, rawTime, "") timestamp, err = internal.ParseTimestamp(p.TimestampFormat, rawTime, nil)
if err != nil { if err != nil {
return nil, fmt.Errorf("could not parse '%s' to '%s'", rawTime, p.TimestampFormat) return nil, fmt.Errorf("could not parse '%s' to '%s'", rawTime, p.TimestampFormat)
} }

View File

@ -52,6 +52,7 @@ type Parser struct {
Log telegraf.Logger `toml:"-"` Log telegraf.Logger `toml:"-"`
metadataSeparatorList metadataPattern metadataSeparatorList metadataPattern
location *time.Location
gotColumnNames bool gotColumnNames bool
@ -170,6 +171,14 @@ func (p *Parser) Init() error {
p.TimeFunc = time.Now p.TimeFunc = time.Now
} }
if p.Timezone != "" {
loc, err := time.LoadLocation(p.Timezone)
if err != nil {
return fmt.Errorf("invalid timezone: %w", err)
}
p.location = loc
}
if p.ResetMode == "" { if p.ResetMode == "" {
p.ResetMode = "none" p.ResetMode = "none"
} }
@ -446,7 +455,7 @@ outer:
} }
} }
metricTime, err := parseTimestamp(p.TimeFunc, recordFields, p.TimestampColumn, p.TimestampFormat, p.Timezone) metricTime, err := parseTimestamp(p.TimeFunc, recordFields, p.TimestampColumn, p.TimestampFormat, p.location)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -464,7 +473,7 @@ outer:
// will be the current timestamp, else it will try to parse the time according // will be the current timestamp, else it will try to parse the time according
// to the format. // to the format.
func parseTimestamp(timeFunc func() time.Time, recordFields map[string]interface{}, func parseTimestamp(timeFunc func() time.Time, recordFields map[string]interface{},
timestampColumn, timestampFormat string, timezone string, timestampColumn, timestampFormat string, timezone *time.Location,
) (time.Time, error) { ) (time.Time, error) {
if timestampColumn != "" { if timestampColumn != "" {
if recordFields[timestampColumn] == nil { if recordFields[timestampColumn] == nil {

View File

@ -36,6 +36,7 @@ type Parser struct {
DefaultTags map[string]string `toml:"-"` DefaultTags map[string]string `toml:"-"`
Log telegraf.Logger `toml:"-"` Log telegraf.Logger `toml:"-"`
location *time.Location
tagFilter filter.Filter tagFilter filter.Filter
stringFilter filter.Filter stringFilter filter.Filter
} }
@ -95,7 +96,7 @@ func (p *Parser) parseObject(data map[string]interface{}, timestamp time.Time) (
return nil, err return nil, err
} }
timestamp, err = internal.ParseTimestamp(p.TimeFormat, f.Fields[p.TimeKey], p.Timezone) timestamp, err = internal.ParseTimestamp(p.TimeFormat, f.Fields[p.TimeKey], p.location)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -169,6 +170,14 @@ func (p *Parser) Init() error {
return fmt.Errorf("compiling tag-key filter failed: %w", err) return fmt.Errorf("compiling tag-key filter failed: %w", err)
} }
if p.Timezone != "" {
loc, err := time.LoadLocation(p.Timezone)
if err != nil {
return fmt.Errorf("invalid timezone: %w", err)
}
p.location = loc
}
return nil return nil
} }

View File

@ -66,6 +66,13 @@ func (p *Parser) Init() error {
if cfg.MeasurementName == "" { if cfg.MeasurementName == "" {
p.Configs[i].MeasurementName = p.DefaultMetricName p.Configs[i].MeasurementName = p.DefaultMetricName
} }
if cfg.TimestampTimezone != "" {
loc, err := time.LoadLocation(cfg.TimestampTimezone)
if err != nil {
return fmt.Errorf("invalid timezone in config %d: %w", i+1, err)
}
p.Configs[i].Location = loc
}
} }
return nil return nil
} }
@ -111,7 +118,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) {
} }
var err error var err error
timestamp, err = internal.ParseTimestamp(c.TimestampFormat, result.String(), c.TimestampTimezone) timestamp, err = internal.ParseTimestamp(c.TimestampFormat, result.String(), c.Location)
if err != nil { if err != nil {
return nil, err return nil, err
@ -321,7 +328,15 @@ func (p *Parser) expandArray(result MetricNode, timestamp time.Time) ([]telegraf
err := fmt.Errorf("use of 'timestamp_query' requires 'timestamp_format'") err := fmt.Errorf("use of 'timestamp_query' requires 'timestamp_format'")
return nil, err return nil, err
} }
timestamp, err := internal.ParseTimestamp(p.objectConfig.TimestampFormat, result.String(), p.objectConfig.TimestampTimezone) var loc *time.Location
if p.objectConfig.TimestampTimezone != "" {
var err error
loc, err = time.LoadLocation(p.objectConfig.TimestampTimezone)
if err != nil {
return nil, fmt.Errorf("invalid timezone: %w", err)
}
}
timestamp, err := internal.ParseTimestamp(p.objectConfig.TimestampFormat, result.String(), loc)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -1,5 +1,7 @@
package json_v2 package json_v2
import "time"
// Config definition for backward compatibility ONLY. // Config definition for backward compatibility ONLY.
// We need this here to avoid cyclic dependencies. However, we need // We need this here to avoid cyclic dependencies. However, we need
// to move this to plugins/parsers/json_v2 once we deprecate parser // to move this to plugins/parsers/json_v2 once we deprecate parser
@ -14,6 +16,8 @@ type Config struct {
Fields []DataSet `toml:"field"` Fields []DataSet `toml:"field"`
Tags []DataSet `toml:"tag"` Tags []DataSet `toml:"tag"`
JSONObjects []Object `toml:"object"` JSONObjects []Object `toml:"object"`
Location *time.Location
} }
type DataSet struct { type DataSet struct {

View File

@ -1,6 +1,10 @@
package xpath package xpath
import "github.com/influxdata/telegraf/filter" import (
"time"
"github.com/influxdata/telegraf/filter"
)
// Config definition for backward compatibility ONLY. // Config definition for backward compatibility ONLY.
// We need this here to avoid cyclic dependencies. However, we need // We need this here to avoid cyclic dependencies. However, we need
@ -11,6 +15,7 @@ type Config struct {
Selection string `toml:"metric_selection"` Selection string `toml:"metric_selection"`
Timestamp string `toml:"timestamp"` Timestamp string `toml:"timestamp"`
TimestampFmt string `toml:"timestamp_format"` TimestampFmt string `toml:"timestamp_format"`
Timezone string `toml:"timezone"`
Tags map[string]string `toml:"tags"` Tags map[string]string `toml:"tags"`
Fields map[string]string `toml:"fields"` Fields map[string]string `toml:"fields"`
FieldsInt map[string]string `toml:"fields_int"` FieldsInt map[string]string `toml:"fields_int"`
@ -27,4 +32,5 @@ type Config struct {
TagNameExpand bool `toml:"tag_name_expansion"` TagNameExpand bool `toml:"tag_name_expansion"`
FieldsHexFilter filter.Filter FieldsHexFilter filter.Filter
Location *time.Location
} }

View File

@ -140,6 +140,11 @@ This is a list of known headers and the corresponding values for
## This can be any of "unix", "unix_ms", "unix_us", "unix_ns" or a valid Golang ## This can be any of "unix", "unix_ms", "unix_us", "unix_ns" or a valid Golang
## time format. If not specified, a "unix" timestamp (in seconds) is expected. ## time format. If not specified, a "unix" timestamp (in seconds) is expected.
# timestamp_format = "2006-01-02T15:04:05Z" # timestamp_format = "2006-01-02T15:04:05Z"
## Optional: Timezone of the parsed time
## This will locate the parsed time to the given timezone. Please note that
## for times with timezone-offsets (e.g. RFC3339) the timestamp is unchanged.
## This is ignored for all (unix) timestamp formats.
# timezone = "UTC"
## Optional: List of fields to convert to hex-strings if they are ## Optional: List of fields to convert to hex-strings if they are
## containing byte-arrays. This might be the case for e.g. protocol-buffer ## containing byte-arrays. This might be the case for e.g. protocol-buffer
@ -286,7 +291,7 @@ By specifying `metric_name` you can override the metric/measurement name with
the result of the given [XPath][xpath] query. If not specified, the default the result of the given [XPath][xpath] query. If not specified, the default
metric name is used. metric name is used.
### timestamp, timestamp_format (optional) ### timestamp, timestamp_format, timezone (optional)
By default the current time will be used for all created metrics. To set the By default the current time will be used for all created metrics. To set the
time from values in the XML document you can specify a [XPath][xpath] query in time from values in the XML document you can specify a [XPath][xpath] query in
@ -298,6 +303,16 @@ package for details and additional examples on how to set the time format. If
`timestamp_format` is omitted `unix` format is assumed as result of the `timestamp_format` is omitted `unix` format is assumed as result of the
`timestamp` query. `timestamp` query.
The `timezone` setting will be used to locate the parsed time in the given
timezone. This is helpful for cases where the time does not contain timezone
information, e.g. `2023-03-09 14:04:40` and is not located in _UTC_, which is
the default setting. It is also possible to set the `timezone` to `Local` which
used the configured host timezone.
For time formats with timezone information, e.g. RFC3339, the resulting
timestamp is unchanged. The `timezone` setting is ignored for all `unix`
timestamp formats.
### tags sub-section ### tags sub-section
[XPath][xpath] queries in the `tag name = query` format to add tags to the [XPath][xpath] queries in the `tag name = query` format to add tags to the

View File

@ -131,6 +131,15 @@ func (p *Parser) Init() error {
if config.TimestampFmt == "" { if config.TimestampFmt == "" {
config.TimestampFmt = "unix" config.TimestampFmt = "unix"
} }
if config.Timezone == "" {
config.Location = time.UTC
} else {
loc, err := time.LoadLocation(config.Timezone)
if err != nil {
return fmt.Errorf("invalid location in config %d: %w", i+1, err)
}
config.Location = loc
}
f, err := filter.Compile(config.FieldsHex) f, err := filter.Compile(config.FieldsHex)
if err != nil { if err != nil {
return fmt.Errorf("creating hex-fields filter failed: %w", err) return fmt.Errorf("creating hex-fields filter failed: %w", err)
@ -232,7 +241,7 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config
return nil, fmt.Errorf("failed to query timestamp: %w", err) return nil, fmt.Errorf("failed to query timestamp: %w", err)
} }
if v != nil { if v != nil {
timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, "") timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, config.Location)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse timestamp: %w", err) return nil, fmt.Errorf("failed to parse timestamp: %w", err)
} }

View File

@ -1405,16 +1405,6 @@ func TestMultipleConfigs(t *testing.T) {
return &file.File{} return &file.File{}
}) })
// Prepare the influx parser for expectations
parser := &influx.Parser{}
require.NoError(t, parser.Init())
// Compare options
options := []cmp.Option{
testutil.IgnoreTime(),
testutil.SortMetrics(),
}
for _, f := range folders { for _, f := range folders {
// Only handle folders // Only handle folders
if !f.IsDir() || f.Name() == "protos" { if !f.IsDir() || f.Name() == "protos" {
@ -1426,6 +1416,14 @@ func TestMultipleConfigs(t *testing.T) {
expectedErrorFilename := filepath.Join(testcasePath, "expected.err") expectedErrorFilename := filepath.Join(testcasePath, "expected.err")
t.Run(f.Name(), func(t *testing.T) { t.Run(f.Name(), func(t *testing.T) {
// Prepare the influx parser for expectations
parser := &influx.Parser{}
require.NoError(t, parser.Init())
parser.SetTimeFunc(func() time.Time { return time.Time{} })
// Compare options
options := []cmp.Option{testutil.SortMetrics()}
// Read the expected output if any // Read the expected output if any
var expected []telegraf.Metric var expected []telegraf.Metric
if _, err := os.Stat(expectedFilename); err == nil { if _, err := os.Stat(expectedFilename); err == nil {
@ -1433,6 +1431,9 @@ func TestMultipleConfigs(t *testing.T) {
expected, err = testutil.ParseMetricsFromFile(expectedFilename, parser) expected, err = testutil.ParseMetricsFromFile(expectedFilename, parser)
require.NoError(t, err) require.NoError(t, err)
} }
if len(expected) > 0 && expected[0].Time().IsZero() {
options = append(options, testutil.IgnoreTime())
}
// Read the expected output if any // Read the expected output if any
var expectedErrors []string var expectedErrors []string

View File

@ -1,3 +1,3 @@
devices ok=true,phases_0_load=34,phases_0_voltage=231,phases_1_load=35,phases_1_voltage=231,phases_2_load=36,phases_2_voltage=231,rpm=423,type="Motor" 1662730607000000000 devices ok=true,phases_0_load=34,phases_0_voltage=231,phases_1_load=35,phases_1_voltage=231,phases_2_load=36,phases_2_voltage=231,rpm=423,type="Motor"
devices flow=3.1414,hours=8762,ok=true,type="Pump" 1662730607000000000 devices flow=3.1414,hours=8762,ok=true,type="Pump"
devices ok=true,phases_0_load=341,phases_0_voltage=231,phases_1_load=352,phases_1_voltage=231,phases_2_load=363,phases_2_voltage=231,throughput=1026,type="Machine" 1662730607000000000 devices ok=true,phases_0_load=341,phases_0_voltage=231,phases_1_load=352,phases_1_voltage=231,phases_2_load=363,phases_2_voltage=231,throughput=1026,type="Machine"

View File

@ -1 +1 @@
time_float_exponential truth=42.0 1663830962276000 time_float_exponential truth=42.0 1663830962276000000

View File

@ -0,0 +1 @@
time truth=42.0 1678370680000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_Berlin/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
timezone = "Europe/Berlin"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-03-09 15:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1683641080000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_CEST/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
timezone = "CET"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-05-09 16:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1683641080000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_MST/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
timezone = "MST"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-05-09 07:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1678370680000000000

View File

@ -0,0 +1,12 @@
[[inputs.file]]
files = ["./testcases/time_timezone_utc/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02 15:04:05"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-03-09 14:04:40"
}

View File

@ -0,0 +1 @@
time truth=42.0 1683641080000000000

View File

@ -0,0 +1,13 @@
[[inputs.file]]
files = ["./testcases/time_timezone_with_offset/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time'"
timestamp = "t"
timestamp_format = "2006-01-02T15:04:05Z07:00"
timezone = "CET"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": "2023-05-09T07:04:40-07:00"
}

View File

@ -208,7 +208,7 @@ func (p *Converter) convertTags(metric telegraf.Metric) {
} }
if p.tagConversions.Timestamp != nil && p.tagConversions.Timestamp.Match(key) { if p.tagConversions.Timestamp != nil && p.tagConversions.Timestamp.Match(key) {
time, err := internal.ParseTimestamp(p.Tags.TimestampFormat, value, "") time, err := internal.ParseTimestamp(p.Tags.TimestampFormat, value, nil)
if err != nil { if err != nil {
p.Log.Errorf("error converting to timestamp [%T]: %v", value, value) p.Log.Errorf("error converting to timestamp [%T]: %v", value, value)
continue continue
@ -320,7 +320,7 @@ func (p *Converter) convertFields(metric telegraf.Metric) {
} }
if p.fieldConversions.Timestamp != nil && p.fieldConversions.Timestamp.Match(key) { if p.fieldConversions.Timestamp != nil && p.fieldConversions.Timestamp.Match(key) {
time, err := internal.ParseTimestamp(p.Fields.TimestampFormat, value, "") time, err := internal.ParseTimestamp(p.Fields.TimestampFormat, value, nil)
if err != nil { if err != nil {
p.Log.Errorf("error converting to timestamp [%T]: %v", value, value) p.Log.Errorf("error converting to timestamp [%T]: %v", value, value)
continue continue