fix(parser.xpath): Handle floating-point times correctly (#11875)

This commit is contained in:
Sven Rebhan 2022-10-03 16:32:52 +02:00 committed by GitHub
parent af53478e4f
commit 758f2cba7a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 293 additions and 186 deletions

View File

@ -7,17 +7,18 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"math" "math/big"
"math/rand" "math/rand"
"os" "os"
"os/exec" "os/exec"
"runtime" "runtime"
"strconv"
"strings" "strings"
"sync" "sync"
"syscall" "syscall"
"time" "time"
"unicode" "unicode"
"github.com/influxdata/telegraf/internal/choice"
) )
const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
@ -257,145 +258,147 @@ func CompressWithGzip(data io.Reader) (io.ReadCloser, error) {
// The location is a location string suitable for time.LoadLocation. Unix // The location is a location string suitable for time.LoadLocation. Unix
// times do not use the location string, a unix time is always return in the // times do not use the location string, a unix time is always return in the
// UTC location. // UTC location.
func ParseTimestamp(format string, timestamp interface{}, location string) (time.Time, error) { func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) {
switch format { switch format {
case "unix", "unix_ms", "unix_us", "unix_ns": case "unix", "unix_ms", "unix_us", "unix_ns":
return parseUnix(format, timestamp) sep := []string{",", "."}
default: if len(separator) > 0 {
if location == "" { sep = separator
location = "UTC"
} }
return parseTime(format, timestamp, location) return parseUnix(format, timestamp, sep)
default:
v, ok := timestamp.(string)
if !ok {
return time.Unix(0, 0), errors.New("unsupported type")
}
return parseTime(format, v, location)
} }
} }
func parseUnix(format string, timestamp interface{}) (time.Time, error) { // parseTime parses a timestamp in unix format with different resolutions
integer, fractional, err := parseComponents(timestamp) func parseUnix(format string, timestamp interface{}, separator []string) (time.Time, error) {
// Extract the scaling factor to nanoseconds from "format"
var factor int64
switch format {
case "unix":
factor = int64(time.Second)
case "unix_ms":
factor = int64(time.Millisecond)
case "unix_us":
factor = int64(time.Microsecond)
case "unix_ns":
factor = int64(time.Nanosecond)
}
zero := time.Unix(0, 0)
// Convert the representation to time
switch v := timestamp.(type) {
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
t, err := ToInt64(v)
if err != nil {
return zero, err
}
return time.Unix(0, t*factor).UTC(), nil
case float32, float64:
ts, err := ToFloat64(v)
if err != nil {
return zero, err
}
// Parse the float as a precise fraction to avoid precision loss
f := big.Rat{}
if f.SetFloat64(ts) == nil {
return zero, errors.New("invalid number")
}
return timeFromFraction(&f, factor), nil
case string:
// Sanitize the string to have no thousand separators and dot
// as decimal separator to ease later parsing
v = sanitizeTimestamp(v, separator)
// Parse the string as a precise fraction to avoid precision loss
f := big.Rat{}
if _, ok := f.SetString(v); !ok {
return zero, errors.New("invalid number")
}
return timeFromFraction(&f, factor), nil
}
return zero, errors.New("unsupported type")
}
func timeFromFraction(f *big.Rat, factor int64) time.Time {
// Extract the numerator and denominator and scale to nanoseconds
num := f.Num()
denom := f.Denom()
num.Mul(num, big.NewInt(factor))
// Get the integer (non-fractional part) of the timestamp and convert
// it into time
t := big.Int{}
t.Div(num, denom)
return time.Unix(0, t.Int64()).UTC()
}
// sanitizeTimestamp removes thousand separators and uses dot as
// decimal separator. Returns also a boolean indicating success.
func sanitizeTimestamp(timestamp string, decimalSeparartor []string) string {
// Remove thousand-separators that are not used for decimal separation
sanitized := timestamp
for _, s := range []string{" ", ",", "."} {
if !choice.Contains(s, decimalSeparartor) {
sanitized = strings.ReplaceAll(sanitized, s, "")
}
}
// Replace decimal separators by dot to have a standard, parsable format
for _, s := range decimalSeparartor {
// Make sure we replace only the first occurrence of any separator.
if strings.Contains(sanitized, s) {
return strings.Replace(sanitized, s, ".", 1)
}
}
return sanitized
}
// parseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp string, location string) (time.Time, error) {
loc, err := time.LoadLocation(location)
if err != nil { if err != nil {
return time.Unix(0, 0), err return time.Unix(0, 0), err
} }
switch strings.ToLower(format) { switch strings.ToLower(format) {
case "unix": case "ansic":
return time.Unix(integer, fractional).UTC(), nil format = time.ANSIC
case "unix_ms": case "unixdate":
return time.Unix(0, integer*1e6).UTC(), nil format = time.UnixDate
case "unix_us": case "rubydate":
return time.Unix(0, integer*1e3).UTC(), nil format = time.RubyDate
case "unix_ns": case "rfc822":
return time.Unix(0, integer).UTC(), nil format = time.RFC822
default: case "rfc822z":
return time.Unix(0, 0), errors.New("unsupported type") format = time.RFC822Z
} case "rfc850":
} format = time.RFC850
case "rfc1123":
// Returns the integers before and after an optional decimal point. Both '.' format = time.RFC1123
// and ',' are supported for the decimal point. The timestamp can be an int64, case "rfc1123z":
// float64, or string. format = time.RFC1123Z
// case "rfc3339":
// ex: "42.5" -> (42, 5, nil) format = time.RFC3339
func parseComponents(timestamp interface{}) (int64, int64, error) { case "rfc3339nano":
switch ts := timestamp.(type) { format = time.RFC3339Nano
case string: case "stamp":
parts := strings.SplitN(ts, ".", 2) format = time.Stamp
if len(parts) == 2 { case "stampmilli":
return parseUnixTimeComponents(parts[0], parts[1]) format = time.StampMilli
} case "stampmicro":
format = time.StampMicro
parts = strings.SplitN(ts, ",", 2) case "stampnano":
if len(parts) == 2 { format = time.StampNano
return parseUnixTimeComponents(parts[0], parts[1])
}
integer, err := strconv.ParseInt(ts, 10, 64)
if err != nil {
return 0, 0, err
}
return integer, 0, nil
case int8:
return int64(ts), 0, nil
case int16:
return int64(ts), 0, nil
case int32:
return int64(ts), 0, nil
case int64:
return ts, 0, nil
case uint8:
return int64(ts), 0, nil
case uint16:
return int64(ts), 0, nil
case uint32:
return int64(ts), 0, nil
case uint64:
return int64(ts), 0, nil
case float32:
integer, fractional := math.Modf(float64(ts))
return int64(integer), int64(fractional * 1e9), nil
case float64:
integer, fractional := math.Modf(ts)
return int64(integer), int64(fractional * 1e9), nil
default:
return 0, 0, errors.New("unsupported type")
}
}
func parseUnixTimeComponents(first, second string) (int64, int64, error) {
integer, err := strconv.ParseInt(first, 10, 64)
if err != nil {
return 0, 0, err
}
// Convert to nanoseconds, dropping any greater precision.
buf := []byte("000000000")
copy(buf, second)
fractional, err := strconv.ParseInt(string(buf), 10, 64)
if err != nil {
return 0, 0, err
}
return integer, fractional, nil
}
// ParseTime parses a string timestamp according to the format string.
func parseTime(format string, timestamp interface{}, location string) (time.Time, error) {
switch ts := timestamp.(type) {
case string:
loc, err := time.LoadLocation(location)
if err != nil {
return time.Unix(0, 0), err
}
switch strings.ToLower(format) {
case "ansic":
format = time.ANSIC
case "unixdate":
format = time.UnixDate
case "rubydate":
format = time.RubyDate
case "rfc822":
format = time.RFC822
case "rfc822z":
format = time.RFC822Z
case "rfc850":
format = time.RFC850
case "rfc1123":
format = time.RFC1123
case "rfc1123z":
format = time.RFC1123Z
case "rfc3339":
format = time.RFC3339
case "rfc3339nano":
format = time.RFC3339Nano
case "stamp":
format = time.Stamp
case "stampmilli":
format = time.StampMilli
case "stampmicro":
format = time.StampMicro
case "stampnano":
format = time.StampNano
}
return time.ParseInLocation(format, ts, loc)
default:
return time.Unix(0, 0), errors.New("unsupported type")
} }
return time.ParseInLocation(format, timestamp, loc)
} }

View File

@ -394,8 +394,8 @@ func TestParseTimestamp(t *testing.T) {
format string format string
timestamp interface{} timestamp interface{}
location string location string
separator []string
expected time.Time expected time.Time
err bool
}{ }{
{ {
name: "parse layout string in utc", name: "parse layout string in utc",
@ -404,13 +404,6 @@ func TestParseTimestamp(t *testing.T) {
location: "UTC", location: "UTC",
expected: rfc3339("2019-02-20T21:50:34Z"), expected: rfc3339("2019-02-20T21:50:34Z"),
}, },
{
name: "parse layout string with invalid timezone",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50:34",
location: "InvalidTimeZone",
err: true,
},
{ {
name: "layout regression 6386", name: "layout regression 6386",
format: "02.01.2006 15:04:05", format: "02.01.2006 15:04:05",
@ -447,6 +440,48 @@ func TestParseTimestamp(t *testing.T) {
timestamp: "1568338208.00000050042", timestamp: "1568338208.00000050042",
expected: rfc3339("2019-09-13T01:30:08.000000500Z"), expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
}, },
{
name: "unix seconds with thousand separator only (dot)",
format: "unix",
timestamp: "1.568.338.208",
separator: []string{","},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand separator only (comma)",
format: "unix",
timestamp: "1,568,338,208",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand separator only (space)",
format: "unix",
timestamp: "1 568 338 208",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand separator only (underscore)",
format: "unix",
timestamp: "1_568_338_208",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08Z"),
},
{
name: "unix seconds with thousand and decimal separator (US)",
format: "unix",
timestamp: "1,568,338,208.500",
separator: []string{"."},
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{
name: "unix seconds with thousand and decimal separator (EU)",
format: "unix",
timestamp: "1.568.338.208,500",
separator: []string{","},
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{ {
name: "unix seconds integer", name: "unix seconds integer",
format: "unix", format: "unix",
@ -459,6 +494,12 @@ func TestParseTimestamp(t *testing.T) {
timestamp: float64(1568338208.500), timestamp: float64(1568338208.500),
expected: rfc3339("2019-09-13T01:30:08.500Z"), expected: rfc3339("2019-09-13T01:30:08.500Z"),
}, },
{
name: "unix seconds float exponential",
format: "unix",
timestamp: float64(1.5683382085e+9),
expected: rfc3339("2019-09-13T01:30:08.500Z"),
},
{ {
name: "unix milliseconds", name: "unix milliseconds",
format: "unix_ms", format: "unix_ms",
@ -466,10 +507,10 @@ func TestParseTimestamp(t *testing.T) {
expected: rfc3339("2019-09-13T01:30:08.500Z"), expected: rfc3339("2019-09-13T01:30:08.500Z"),
}, },
{ {
name: "unix milliseconds with fractional is ignored", name: "unix milliseconds with fractional",
format: "unix_ms", format: "unix_ms",
timestamp: "1568338208500.42", timestamp: "1568338208500.42",
expected: rfc3339("2019-09-13T01:30:08.500Z"), expected: rfc3339("2019-09-13T01:30:08.50042Z"),
}, },
{ {
name: "unix microseconds", name: "unix microseconds",
@ -483,6 +524,12 @@ func TestParseTimestamp(t *testing.T) {
timestamp: "1568338208000000500", timestamp: "1568338208000000500",
expected: rfc3339("2019-09-13T01:30:08.000000500Z"), expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
}, },
{
name: "unix nanoseconds exponential",
format: "unix_ns",
timestamp: "1.5683382080000005e+18",
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
},
{ {
name: "rfc339 test", name: "rfc339 test",
format: "RFC3339", format: "RFC3339",
@ -591,13 +638,75 @@ func TestParseTimestamp(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location) tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location, tt.separator...)
if tt.err { require.NoError(t, err)
require.Error(t, err) require.Equal(t, tt.expected, tm)
} else { })
require.NoError(t, err) }
require.Equal(t, tt.expected, tm) }
}
func TestParseTimestampInvalid(t *testing.T) {
tests := []struct {
name string
format string
timestamp interface{}
location string
expected string
}{
{
name: "too few digits",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50",
expected: "cannot parse \"\" as \":\"",
},
{
name: "invalid timezone",
format: "2006-01-02 15:04:05",
timestamp: "2019-02-20 21:50:34",
location: "InvalidTimeZone",
expected: "unknown time zone InvalidTimeZone",
},
{
name: "invalid layout",
format: "rfc3399",
timestamp: "09.07.2019 00:11:00",
expected: "cannot parse \"09.07.2019 00:11:00\" as \"rfc\"",
},
{
name: "layout not matching time",
format: "rfc3339",
timestamp: "09.07.2019 00:11:00",
expected: "cannot parse \"7.2019 00:11:00\" as \"2006\"",
},
{
name: "unix wrong type",
format: "unix",
timestamp: true,
expected: "unsupported type",
},
{
name: "unix multiple separators (dot)",
format: "unix",
timestamp: "1568338.208.500",
expected: "invalid number",
},
{
name: "unix multiple separators (comma)",
format: "unix",
timestamp: "1568338,208,500",
expected: "invalid number",
},
{
name: "unix multiple separators (mixed)",
format: "unix",
timestamp: "1,568,338,208.500",
expected: "invalid number",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := ParseTimestamp(tt.format, tt.timestamp, tt.location)
require.ErrorContains(t, err, tt.expected)
}) })
} }
} }

View File

@ -119,6 +119,17 @@ func (p *Parser) Init() error {
return errors.New("missing default metric name") return errors.New("missing default metric name")
} }
// Update the configs with default values
for i, config := range p.Configs {
if config.Selection == "" {
config.Selection = "/"
}
if config.TimestampFmt == "" {
config.TimestampFmt = "unix"
}
p.Configs[i] = config
}
return nil return nil
} }
@ -138,9 +149,6 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0) metrics := make([]telegraf.Metric, 0)
p.Log.Debugf("Number of configs: %d", len(p.Configs)) p.Log.Debugf("Number of configs: %d", len(p.Configs))
for _, config := range p.Configs { for _, config := range p.Configs {
if len(config.Selection) == 0 {
config.Selection = "/"
}
selectedNodes, err := p.document.QueryAll(doc, config.Selection) selectedNodes, err := p.document.QueryAll(doc, config.Selection)
if err != nil { if err != nil {
return nil, err return nil, err
@ -213,42 +221,11 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to query timestamp: %v", err) return nil, fmt.Errorf("failed to query timestamp: %v", err)
} }
switch v := v.(type) { if v != nil {
case string: timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, "")
// Parse the string with the given format or assume the string to contain if err != nil {
// a unix timestamp in seconds if no format is given. return nil, fmt.Errorf("failed to parse timestamp: %w", err)
if len(config.TimestampFmt) < 1 || strings.HasPrefix(config.TimestampFmt, "unix") {
var nanoseconds int64
t, err := strconv.ParseFloat(v, 64)
if err != nil {
return nil, fmt.Errorf("failed to parse unix timestamp: %v", err)
}
switch config.TimestampFmt {
case "unix_ns":
nanoseconds = int64(t)
case "unix_us":
nanoseconds = int64(t * 1e3)
case "unix_ms":
nanoseconds = int64(t * 1e6)
default:
nanoseconds = int64(t * 1e9)
}
timestamp = time.Unix(0, nanoseconds)
} else {
timestamp, err = time.Parse(config.TimestampFmt, v)
if err != nil {
return nil, fmt.Errorf("failed to query timestamp format: %v", err)
}
} }
case float64:
// Assume the value to contain a timestamp in seconds and fractions thereof.
timestamp = time.Unix(0, int64(v*1e9))
case nil:
// No timestamp found. Just ignore the time and use "starttime"
default:
return nil, fmt.Errorf("unknown format '%T' for timestamp query '%v'", v, config.Timestamp)
} }
} }

View File

@ -0,0 +1 @@
time_float_exponential truth=42.0 1663830962276000

View File

@ -0,0 +1,12 @@
[[inputs.file]]
files = ["./testcases/time_float_exponential/test.json"]
data_format = "xpath_json"
xpath_native_types = true
[[inputs.file.xpath]]
metric_name = "'time_float_exponential'"
timestamp = "t"
timestamp_format = "unix_ms"
field_selection = "."
field_name = "id"
field_value = "v"

View File

@ -0,0 +1,5 @@
{
"id": "truth",
"v": 42,
"t": 1.663830962276e+12
}