fix(parser.xpath): Handle floating-point times correctly (#11875)

This commit is contained in:
Sven Rebhan 2022-10-03 16:32:52 +02:00 committed by GitHub
parent af53478e4f
commit 758f2cba7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 293 additions and 186 deletions

View File

@ -7,17 +7,18 @@ import (
"errors"
"fmt"
"io"
"math"
"math/big"
"math/rand"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"
"unicode"
"github.com/influxdata/telegraf/internal/choice"
)
const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
@ -257,113 +258,118 @@ func CompressWithGzip(data io.Reader) (io.ReadCloser, error) {
// The location is a location string suitable for time.LoadLocation. Unix
// times do not use the location string, a unix time is always return in the
// UTC location.
func ParseTimestamp(format string, timestamp interface{}, location string) (time.Time, error) {
func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) {
switch format {
case "unix", "unix_ms", "unix_us", "unix_ns":
return parseUnix(format, timestamp)
default:
if location == "" {
location = "UTC"
}
return parseTime(format, timestamp, location)
}
}
func parseUnix(format string, timestamp interface{}) (time.Time, error) {
integer, fractional, err := parseComponents(timestamp)
if err != nil {
return time.Unix(0, 0), err
}
switch strings.ToLower(format) {
case "unix":
return time.Unix(integer, fractional).UTC(), nil
case "unix_ms":
return time.Unix(0, integer*1e6).UTC(), nil
case "unix_us":
return time.Unix(0, integer*1e3).UTC(), nil
case "unix_ns":
return time.Unix(0, integer).UTC(), nil
sep := []string{",", "."}
if len(separator) > 0 {
sep = separator
}
return parseUnix(format, timestamp, sep)
default:
v, ok := timestamp.(string)
if !ok {
return time.Unix(0, 0), errors.New("unsupported type")
}
return parseTime(format, v, location)
}
}
// Returns the integers before and after an optional decimal point. Both '.'
// and ',' are supported for the decimal point. The timestamp can be an int64,
// float64, or string.
//
// ex: "42.5" -> (42, 5, nil)
func parseComponents(timestamp interface{}) (int64, int64, error) {
switch ts := timestamp.(type) {
// parseTime parses a timestamp in unix format with different resolutions
func parseUnix(format string, timestamp interface{}, separator []string) (time.Time, error) {
// Extract the scaling factor to nanoseconds from "format"
var factor int64
switch format {
case "unix":
factor = int64(time.Second)
case "unix_ms":
factor = int64(time.Millisecond)
case "unix_us":
factor = int64(time.Microsecond)
case "unix_ns":
factor = int64(time.Nanosecond)
}
zero := time.Unix(0, 0)
// Convert the representation to time
switch v := timestamp.(type) {
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
t, err := ToInt64(v)
if err != nil {
return zero, err
}
return time.Unix(0, t*factor).UTC(), nil
case float32, float64:
ts, err := ToFloat64(v)
if err != nil {
return zero, err
}
// Parse the float as a precise fraction to avoid precision loss
f := big.Rat{}
if f.SetFloat64(ts) == nil {
return zero, errors.New("invalid number")
}
return timeFromFraction(&f, factor), nil
case string:
parts := strings.SplitN(ts, ".", 2)
if len(parts) == 2 {
return parseUnixTimeComponents(parts[0], parts[1])
// Sanitize the string to have no thousand separators and dot
// as decimal separator to ease later parsing
v = sanitizeTimestamp(v, separator)
// Parse the string as a precise fraction to avoid precision loss
f := big.Rat{}
if _, ok := f.SetString(v); !ok {
return zero, errors.New("invalid number")
}
return timeFromFraction(&f, factor), nil
}
parts = strings.SplitN(ts, ",", 2)
if len(parts) == 2 {
return parseUnixTimeComponents(parts[0], parts[1])
return zero, errors.New("unsupported type")
}
integer, err := strconv.ParseInt(ts, 10, 64)
if err != nil {
return 0, 0, err
func timeFromFraction(f *big.Rat, factor int64) time.Time {
// Extract the numerator and denominator and scale to nanoseconds
num := f.Num()
denom := f.Denom()
num.Mul(num, big.NewInt(factor))
// Get the integer (non-fractional part) of the timestamp and convert
// it into time
t := big.Int{}
t.Div(num, denom)
return time.Unix(0, t.Int64()).UTC()
}
return integer, 0, nil
case int8:
return int64(ts), 0, nil
case int16:
return int64(ts), 0, nil
case int32:
return int64(ts), 0, nil
case int64:
return ts, 0, nil
case uint8:
return int64(ts), 0, nil
case uint16:
return int64(ts), 0, nil
case uint32:
return int64(ts), 0, nil
case uint64:
return int64(ts), 0, nil
case float32:
integer, fractional := math.Modf(float64(ts))
return int64(integer), int64(fractional * 1e9), nil
case float64:
integer, fractional := math.Modf(ts)
return int64(integer), int64(fractional * 1e9), nil
default:
return 0, 0, errors.New("unsupported type")
// sanitizeTimestamp removes thousand separators and uses dot as
// decimal separator. Returns also a boolean indicating success.
func sanitizeTimestamp(timestamp string, decimalSeparartor []string) string {
// Remove thousand-separators that are not used for decimal separation
sanitized := timestamp
for _, s := range []string{" ", ",", "."} {
if !choice.Contains(s, decimalSeparartor) {
sanitized = strings.ReplaceAll(sanitized, s, "")
}
}
func parseUnixTimeComponents(first, second string) (int64, int64, error) {
integer, err := strconv.ParseInt(first, 10, 64)
if err != nil {
return 0, 0, err
// Replace decimal separators by dot to have a standard, parsable format
for _, s := range decimalSeparartor {
// Make sure we replace only the first occurrence of any separator.
if strings.Contains(sanitized, s) {
return strings.Replace(sanitized, s, ".", 1)
}
}
return sanitized
}
// Convert to nanoseconds, dropping any greater precision.
buf := []byte("000000000")
copy(buf, second)
fractional, err := strconv.ParseInt(string(buf), 10, 64)
if err != nil {
return 0, 0, err
}
return integer, fractional, nil
}
// ParseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp interface{}, location string) (time.Time, error) {
switch ts := timestamp.(type) {
case string:
// parseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp string, location string) (time.Time, error) {
loc, err := time.LoadLocation(location)
if err != nil {
return time.Unix(0, 0), err
}
switch strings.ToLower(format) {
case "ansic":
format = time.ANSIC
@ -394,8 +400,5 @@ func parseTime(format string, timestamp interface{}, location string) (time.Time
case "stampnano":
format = time.StampNano
}
return time.ParseInLocation(format, ts, loc)
default:
return time.Unix(0, 0), errors.New("unsupported type")
}
return time.ParseInLocation(format, timestamp, loc)
}

View File

@ -394,8 +394,8 @@ func TestParseTimestamp(t *testing.T) {
format string
timestamp interface{}
location string
separator []string
expected time.Time
err bool
}{
{
name: "parse layout string in utc",
@ -404,13 +404,6 @@ func TestParseTimestamp(t *testing.T) {
location: "UTC",
expected: rfc3339("2019-02-20T21:50:34Z"),
},
{
name: "parse layout string with invalid timezone",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50:34",
location: "InvalidTimeZone",
err: true,
},
{
name: "layout regression 6386",
format: "02.01.2006 15:04:05",
@ -447,6 +440,48 @@ func TestParseTimestamp(t *testing.T) {
timestamp: "1568338208.00000050042",
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
},
{
name: "unix seconds with thousand separator only (dot)",
format: "unix",
timestamp: "1.568.338.208",
separator: []string{","},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand separator only (comma)",
format: "unix",
timestamp: "1,568,338,208",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand separator only (space)",
format: "unix",
timestamp: "1 568 338 208",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand separator only (underscore)",
format: "unix",
timestamp: "1_568_338_208",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand and decimal separator (US)",
format: "unix",
timestamp: "1,568,338,208.500",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{
name: "unix seconds with thousand and decimal separator (EU)",
format: "unix",
timestamp: "1.568.338.208,500",
separator: []string{","},
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{
name: "unix seconds integer",
format: "unix",
@ -459,6 +494,12 @@ func TestParseTimestamp(t *testing.T) {
timestamp: float64(1568338208.500),
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{
name: "unix seconds float exponential",
format: "unix",
timestamp: float64(1.5683382085e+9),
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{
name: "unix milliseconds",
format: "unix_ms",
@ -466,10 +507,10 @@ func TestParseTimestamp(t *testing.T) {
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{
name: "unix milliseconds with fractional is ignored",
name: "unix milliseconds with fractional",
format: "unix_ms",
timestamp: "1568338208500.42",
expected: rfc3339("2019-09-13T01:30:08.500Z"),
expected: rfc3339("2019-09-13T01:30:08.50042Z"),
},
{
name: "unix microseconds",
@ -483,6 +524,12 @@ func TestParseTimestamp(t *testing.T) {
timestamp: "1568338208000000500",
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
},
{
name: "unix nanoseconds exponential",
format: "unix_ns",
timestamp: "1.5683382080000005e+18",
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
},
{
name: "rfc339 test",
format: "RFC3339",
@ -591,13 +638,75 @@ func TestParseTimestamp(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location)
if tt.err {
require.Error(t, err)
} else {
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location, tt.separator...)
require.NoError(t, err)
require.Equal(t, tt.expected, tm)
})
}
}
func TestParseTimestampInvalid(t *testing.T) {
tests := []struct {
name string
format string
timestamp interface{}
location string
expected string
}{
{
name: "too few digits",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50",
expected: "cannot parse \"\" as \":\"",
},
{
name: "invalid timezone",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50:34",
location: "InvalidTimeZone",
expected: "unknown time zone InvalidTimeZone",
},
{
name: "invalid layout",
format: "rfc3399",
timestamp: "09.07.2019 00:11:00",
expected: "cannot parse \"09.07.2019 00:11:00\" as \"rfc\"",
},
{
name: "layout not matching time",
format: "rfc3339",
timestamp: "09.07.2019 00:11:00",
expected: "cannot parse \"7.2019 00:11:00\" as \"2006\"",
},
{
name: "unix wrong type",
format: "unix",
timestamp: true,
expected: "unsupported type",
},
{
name: "unix multiple separators (dot)",
format: "unix",
timestamp: "1568338.208.500",
expected: "invalid number",
},
{
name: "unix multiple separators (comma)",
format: "unix",
timestamp: "1568338,208,500",
expected: "invalid number",
},
{
name: "unix multiple separators (mixed)",
format: "unix",
timestamp: "1,568,338,208.500",
expected: "invalid number",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := ParseTimestamp(tt.format, tt.timestamp, tt.location)
require.ErrorContains(t, err, tt.expected)
})
}
}

View File

@ -119,6 +119,17 @@ func (p *Parser) Init() error {
return errors.New("missing default metric name")
}
// Update the configs with default values
for i, config := range p.Configs {
if config.Selection == "" {
config.Selection = "/"
}
if config.TimestampFmt == "" {
config.TimestampFmt = "unix"
}
p.Configs[i] = config
}
return nil
}
@ -138,9 +149,6 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0)
p.Log.Debugf("Number of configs: %d", len(p.Configs))
for _, config := range p.Configs {
if len(config.Selection) == 0 {
config.Selection = "/"
}
selectedNodes, err := p.document.QueryAll(doc, config.Selection)
if err != nil {
return nil, err
@ -213,42 +221,11 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config
if err != nil {
return nil, fmt.Errorf("failed to query timestamp: %v", err)
}
switch v := v.(type) {
case string:
// Parse the string with the given format or assume the string to contain
// a unix timestamp in seconds if no format is given.
if len(config.TimestampFmt) < 1 || strings.HasPrefix(config.TimestampFmt, "unix") {
var nanoseconds int64
t, err := strconv.ParseFloat(v, 64)
if v != nil {
timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, "")
if err != nil {
return nil, fmt.Errorf("failed to parse unix timestamp: %v", err)
return nil, fmt.Errorf("failed to parse timestamp: %w", err)
}
switch config.TimestampFmt {
case "unix_ns":
nanoseconds = int64(t)
case "unix_us":
nanoseconds = int64(t * 1e3)
case "unix_ms":
nanoseconds = int64(t * 1e6)
default:
nanoseconds = int64(t * 1e9)
}
timestamp = time.Unix(0, nanoseconds)
} else {
timestamp, err = time.Parse(config.TimestampFmt, v)
if err != nil {
return nil, fmt.Errorf("failed to query timestamp format: %v", err)
}
}
case float64:
// Assume the value to contain a timestamp in seconds and fractions thereof.
timestamp = time.Unix(0, int64(v*1e9))
case nil:
// No timestamp found. Just ignore the time and use "starttime"
default:
return nil, fmt.Errorf("unknown format '%T' for timestamp query '%v'", v, config.Timestamp)
}
}

View File

@ -0,0 +1 @@
time_float_exponential truth=42.0 1663830962276000

View File

@ -0,0 +1,12 @@
[[inputs.file]]
files = ["./testcases/time_float_exponential/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time_float_exponential'"
timestamp = "t"
timestamp_format = "unix_ms"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": 1.663830962276e+12
}