fix(parser.xpath): Handle floating-point times correctly (#11875)
This commit is contained in:
parent
af53478e4f
commit
758f2cba7a
|
|
@ -7,17 +7,18 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"math/big"
|
||||
"math/rand"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"syscall"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"github.com/influxdata/telegraf/internal/choice"
|
||||
)
|
||||
|
||||
const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
|
|
@ -257,113 +258,118 @@ func CompressWithGzip(data io.Reader) (io.ReadCloser, error) {
|
|||
// The location is a location string suitable for time.LoadLocation. Unix
|
||||
// times do not use the location string, a unix time is always return in the
|
||||
// UTC location.
|
||||
func ParseTimestamp(format string, timestamp interface{}, location string) (time.Time, error) {
|
||||
func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) {
|
||||
switch format {
|
||||
case "unix", "unix_ms", "unix_us", "unix_ns":
|
||||
return parseUnix(format, timestamp)
|
||||
default:
|
||||
if location == "" {
|
||||
location = "UTC"
|
||||
}
|
||||
return parseTime(format, timestamp, location)
|
||||
}
|
||||
}
|
||||
|
||||
func parseUnix(format string, timestamp interface{}) (time.Time, error) {
|
||||
integer, fractional, err := parseComponents(timestamp)
|
||||
if err != nil {
|
||||
return time.Unix(0, 0), err
|
||||
}
|
||||
|
||||
switch strings.ToLower(format) {
|
||||
case "unix":
|
||||
return time.Unix(integer, fractional).UTC(), nil
|
||||
case "unix_ms":
|
||||
return time.Unix(0, integer*1e6).UTC(), nil
|
||||
case "unix_us":
|
||||
return time.Unix(0, integer*1e3).UTC(), nil
|
||||
case "unix_ns":
|
||||
return time.Unix(0, integer).UTC(), nil
|
||||
sep := []string{",", "."}
|
||||
if len(separator) > 0 {
|
||||
sep = separator
|
||||
}
|
||||
return parseUnix(format, timestamp, sep)
|
||||
default:
|
||||
v, ok := timestamp.(string)
|
||||
if !ok {
|
||||
return time.Unix(0, 0), errors.New("unsupported type")
|
||||
}
|
||||
return parseTime(format, v, location)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the integers before and after an optional decimal point. Both '.'
|
||||
// and ',' are supported for the decimal point. The timestamp can be an int64,
|
||||
// float64, or string.
|
||||
//
|
||||
// ex: "42.5" -> (42, 5, nil)
|
||||
func parseComponents(timestamp interface{}) (int64, int64, error) {
|
||||
switch ts := timestamp.(type) {
|
||||
// parseTime parses a timestamp in unix format with different resolutions
|
||||
func parseUnix(format string, timestamp interface{}, separator []string) (time.Time, error) {
|
||||
// Extract the scaling factor to nanoseconds from "format"
|
||||
var factor int64
|
||||
switch format {
|
||||
case "unix":
|
||||
factor = int64(time.Second)
|
||||
case "unix_ms":
|
||||
factor = int64(time.Millisecond)
|
||||
case "unix_us":
|
||||
factor = int64(time.Microsecond)
|
||||
case "unix_ns":
|
||||
factor = int64(time.Nanosecond)
|
||||
}
|
||||
|
||||
zero := time.Unix(0, 0)
|
||||
|
||||
// Convert the representation to time
|
||||
switch v := timestamp.(type) {
|
||||
case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64:
|
||||
t, err := ToInt64(v)
|
||||
if err != nil {
|
||||
return zero, err
|
||||
}
|
||||
return time.Unix(0, t*factor).UTC(), nil
|
||||
case float32, float64:
|
||||
ts, err := ToFloat64(v)
|
||||
if err != nil {
|
||||
return zero, err
|
||||
}
|
||||
|
||||
// Parse the float as a precise fraction to avoid precision loss
|
||||
f := big.Rat{}
|
||||
if f.SetFloat64(ts) == nil {
|
||||
return zero, errors.New("invalid number")
|
||||
}
|
||||
return timeFromFraction(&f, factor), nil
|
||||
case string:
|
||||
parts := strings.SplitN(ts, ".", 2)
|
||||
if len(parts) == 2 {
|
||||
return parseUnixTimeComponents(parts[0], parts[1])
|
||||
// Sanitize the string to have no thousand separators and dot
|
||||
// as decimal separator to ease later parsing
|
||||
v = sanitizeTimestamp(v, separator)
|
||||
|
||||
// Parse the string as a precise fraction to avoid precision loss
|
||||
f := big.Rat{}
|
||||
if _, ok := f.SetString(v); !ok {
|
||||
return zero, errors.New("invalid number")
|
||||
}
|
||||
return timeFromFraction(&f, factor), nil
|
||||
}
|
||||
|
||||
parts = strings.SplitN(ts, ",", 2)
|
||||
if len(parts) == 2 {
|
||||
return parseUnixTimeComponents(parts[0], parts[1])
|
||||
return zero, errors.New("unsupported type")
|
||||
}
|
||||
|
||||
integer, err := strconv.ParseInt(ts, 10, 64)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
func timeFromFraction(f *big.Rat, factor int64) time.Time {
|
||||
// Extract the numerator and denominator and scale to nanoseconds
|
||||
num := f.Num()
|
||||
denom := f.Denom()
|
||||
num.Mul(num, big.NewInt(factor))
|
||||
|
||||
// Get the integer (non-fractional part) of the timestamp and convert
|
||||
// it into time
|
||||
t := big.Int{}
|
||||
t.Div(num, denom)
|
||||
|
||||
return time.Unix(0, t.Int64()).UTC()
|
||||
}
|
||||
return integer, 0, nil
|
||||
case int8:
|
||||
return int64(ts), 0, nil
|
||||
case int16:
|
||||
return int64(ts), 0, nil
|
||||
case int32:
|
||||
return int64(ts), 0, nil
|
||||
case int64:
|
||||
return ts, 0, nil
|
||||
case uint8:
|
||||
return int64(ts), 0, nil
|
||||
case uint16:
|
||||
return int64(ts), 0, nil
|
||||
case uint32:
|
||||
return int64(ts), 0, nil
|
||||
case uint64:
|
||||
return int64(ts), 0, nil
|
||||
case float32:
|
||||
integer, fractional := math.Modf(float64(ts))
|
||||
return int64(integer), int64(fractional * 1e9), nil
|
||||
case float64:
|
||||
integer, fractional := math.Modf(ts)
|
||||
return int64(integer), int64(fractional * 1e9), nil
|
||||
default:
|
||||
return 0, 0, errors.New("unsupported type")
|
||||
|
||||
// sanitizeTimestamp removes thousand separators and uses dot as
|
||||
// decimal separator. Returns also a boolean indicating success.
|
||||
func sanitizeTimestamp(timestamp string, decimalSeparartor []string) string {
|
||||
// Remove thousand-separators that are not used for decimal separation
|
||||
sanitized := timestamp
|
||||
for _, s := range []string{" ", ",", "."} {
|
||||
if !choice.Contains(s, decimalSeparartor) {
|
||||
sanitized = strings.ReplaceAll(sanitized, s, "")
|
||||
}
|
||||
}
|
||||
|
||||
func parseUnixTimeComponents(first, second string) (int64, int64, error) {
|
||||
integer, err := strconv.ParseInt(first, 10, 64)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
// Replace decimal separators by dot to have a standard, parsable format
|
||||
for _, s := range decimalSeparartor {
|
||||
// Make sure we replace only the first occurrence of any separator.
|
||||
if strings.Contains(sanitized, s) {
|
||||
return strings.Replace(sanitized, s, ".", 1)
|
||||
}
|
||||
}
|
||||
return sanitized
|
||||
}
|
||||
|
||||
// Convert to nanoseconds, dropping any greater precision.
|
||||
buf := []byte("000000000")
|
||||
copy(buf, second)
|
||||
|
||||
fractional, err := strconv.ParseInt(string(buf), 10, 64)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return integer, fractional, nil
|
||||
}
|
||||
|
||||
// ParseTime parses a string timestamp according to the format string.
|
||||
func parseTime(format string, timestamp interface{}, location string) (time.Time, error) {
|
||||
switch ts := timestamp.(type) {
|
||||
case string:
|
||||
// parseTime parses a string timestamp according to the format string.
|
||||
func parseTime(format string, timestamp string, location string) (time.Time, error) {
|
||||
loc, err := time.LoadLocation(location)
|
||||
if err != nil {
|
||||
return time.Unix(0, 0), err
|
||||
}
|
||||
|
||||
switch strings.ToLower(format) {
|
||||
case "ansic":
|
||||
format = time.ANSIC
|
||||
|
|
@ -394,8 +400,5 @@ func parseTime(format string, timestamp interface{}, location string) (time.Time
|
|||
case "stampnano":
|
||||
format = time.StampNano
|
||||
}
|
||||
return time.ParseInLocation(format, ts, loc)
|
||||
default:
|
||||
return time.Unix(0, 0), errors.New("unsupported type")
|
||||
}
|
||||
return time.ParseInLocation(format, timestamp, loc)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -394,8 +394,8 @@ func TestParseTimestamp(t *testing.T) {
|
|||
format string
|
||||
timestamp interface{}
|
||||
location string
|
||||
separator []string
|
||||
expected time.Time
|
||||
err bool
|
||||
}{
|
||||
{
|
||||
name: "parse layout string in utc",
|
||||
|
|
@ -404,13 +404,6 @@ func TestParseTimestamp(t *testing.T) {
|
|||
location: "UTC",
|
||||
expected: rfc3339("2019-02-20T21:50:34Z"),
|
||||
},
|
||||
{
|
||||
name: "parse layout string with invalid timezone",
|
||||
format: "2006-01-02 15:04:05",
|
||||
timestamp: "2019-02-20 21:50:34",
|
||||
location: "InvalidTimeZone",
|
||||
err: true,
|
||||
},
|
||||
{
|
||||
name: "layout regression 6386",
|
||||
format: "02.01.2006 15:04:05",
|
||||
|
|
@ -447,6 +440,48 @@ func TestParseTimestamp(t *testing.T) {
|
|||
timestamp: "1568338208.00000050042",
|
||||
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds with thousand separator only (dot)",
|
||||
format: "unix",
|
||||
timestamp: "1.568.338.208",
|
||||
separator: []string{","},
|
||||
expected: rfc3339("2019-09-13T01:30:08Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds with thousand separator only (comma)",
|
||||
format: "unix",
|
||||
timestamp: "1,568,338,208",
|
||||
separator: []string{"."},
|
||||
expected: rfc3339("2019-09-13T01:30:08Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds with thousand separator only (space)",
|
||||
format: "unix",
|
||||
timestamp: "1 568 338 208",
|
||||
separator: []string{"."},
|
||||
expected: rfc3339("2019-09-13T01:30:08Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds with thousand separator only (underscore)",
|
||||
format: "unix",
|
||||
timestamp: "1_568_338_208",
|
||||
separator: []string{"."},
|
||||
expected: rfc3339("2019-09-13T01:30:08Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds with thousand and decimal separator (US)",
|
||||
format: "unix",
|
||||
timestamp: "1,568,338,208.500",
|
||||
separator: []string{"."},
|
||||
expected: rfc3339("2019-09-13T01:30:08.500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds with thousand and decimal separator (EU)",
|
||||
format: "unix",
|
||||
timestamp: "1.568.338.208,500",
|
||||
separator: []string{","},
|
||||
expected: rfc3339("2019-09-13T01:30:08.500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds integer",
|
||||
format: "unix",
|
||||
|
|
@ -459,6 +494,12 @@ func TestParseTimestamp(t *testing.T) {
|
|||
timestamp: float64(1568338208.500),
|
||||
expected: rfc3339("2019-09-13T01:30:08.500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix seconds float exponential",
|
||||
format: "unix",
|
||||
timestamp: float64(1.5683382085e+9),
|
||||
expected: rfc3339("2019-09-13T01:30:08.500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix milliseconds",
|
||||
format: "unix_ms",
|
||||
|
|
@ -466,10 +507,10 @@ func TestParseTimestamp(t *testing.T) {
|
|||
expected: rfc3339("2019-09-13T01:30:08.500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix milliseconds with fractional is ignored",
|
||||
name: "unix milliseconds with fractional",
|
||||
format: "unix_ms",
|
||||
timestamp: "1568338208500.42",
|
||||
expected: rfc3339("2019-09-13T01:30:08.500Z"),
|
||||
expected: rfc3339("2019-09-13T01:30:08.50042Z"),
|
||||
},
|
||||
{
|
||||
name: "unix microseconds",
|
||||
|
|
@ -483,6 +524,12 @@ func TestParseTimestamp(t *testing.T) {
|
|||
timestamp: "1568338208000000500",
|
||||
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
|
||||
},
|
||||
{
|
||||
name: "unix nanoseconds exponential",
|
||||
format: "unix_ns",
|
||||
timestamp: "1.5683382080000005e+18",
|
||||
expected: rfc3339("2019-09-13T01:30:08.000000500Z"),
|
||||
},
|
||||
{
|
||||
name: "rfc339 test",
|
||||
format: "RFC3339",
|
||||
|
|
@ -591,13 +638,75 @@ func TestParseTimestamp(t *testing.T) {
|
|||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location)
|
||||
if tt.err {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location, tt.separator...)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tt.expected, tm)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseTimestampInvalid(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
format string
|
||||
timestamp interface{}
|
||||
location string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "too few digits",
|
||||
format: "2006-01-02 15:04:05",
|
||||
timestamp: "2019-02-20 21:50",
|
||||
expected: "cannot parse \"\" as \":\"",
|
||||
},
|
||||
{
|
||||
name: "invalid timezone",
|
||||
format: "2006-01-02 15:04:05",
|
||||
timestamp: "2019-02-20 21:50:34",
|
||||
location: "InvalidTimeZone",
|
||||
expected: "unknown time zone InvalidTimeZone",
|
||||
},
|
||||
{
|
||||
name: "invalid layout",
|
||||
format: "rfc3399",
|
||||
timestamp: "09.07.2019 00:11:00",
|
||||
expected: "cannot parse \"09.07.2019 00:11:00\" as \"rfc\"",
|
||||
},
|
||||
{
|
||||
name: "layout not matching time",
|
||||
format: "rfc3339",
|
||||
timestamp: "09.07.2019 00:11:00",
|
||||
expected: "cannot parse \"7.2019 00:11:00\" as \"2006\"",
|
||||
},
|
||||
{
|
||||
name: "unix wrong type",
|
||||
format: "unix",
|
||||
timestamp: true,
|
||||
expected: "unsupported type",
|
||||
},
|
||||
{
|
||||
name: "unix multiple separators (dot)",
|
||||
format: "unix",
|
||||
timestamp: "1568338.208.500",
|
||||
expected: "invalid number",
|
||||
},
|
||||
{
|
||||
name: "unix multiple separators (comma)",
|
||||
format: "unix",
|
||||
timestamp: "1568338,208,500",
|
||||
expected: "invalid number",
|
||||
},
|
||||
{
|
||||
name: "unix multiple separators (mixed)",
|
||||
format: "unix",
|
||||
timestamp: "1,568,338,208.500",
|
||||
expected: "invalid number",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, err := ParseTimestamp(tt.format, tt.timestamp, tt.location)
|
||||
require.ErrorContains(t, err, tt.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -119,6 +119,17 @@ func (p *Parser) Init() error {
|
|||
return errors.New("missing default metric name")
|
||||
}
|
||||
|
||||
// Update the configs with default values
|
||||
for i, config := range p.Configs {
|
||||
if config.Selection == "" {
|
||||
config.Selection = "/"
|
||||
}
|
||||
if config.TimestampFmt == "" {
|
||||
config.TimestampFmt = "unix"
|
||||
}
|
||||
p.Configs[i] = config
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -138,9 +149,6 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
|||
metrics := make([]telegraf.Metric, 0)
|
||||
p.Log.Debugf("Number of configs: %d", len(p.Configs))
|
||||
for _, config := range p.Configs {
|
||||
if len(config.Selection) == 0 {
|
||||
config.Selection = "/"
|
||||
}
|
||||
selectedNodes, err := p.document.QueryAll(doc, config.Selection)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -213,42 +221,11 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config
|
|||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query timestamp: %v", err)
|
||||
}
|
||||
switch v := v.(type) {
|
||||
case string:
|
||||
// Parse the string with the given format or assume the string to contain
|
||||
// a unix timestamp in seconds if no format is given.
|
||||
if len(config.TimestampFmt) < 1 || strings.HasPrefix(config.TimestampFmt, "unix") {
|
||||
var nanoseconds int64
|
||||
|
||||
t, err := strconv.ParseFloat(v, 64)
|
||||
if v != nil {
|
||||
timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse unix timestamp: %v", err)
|
||||
return nil, fmt.Errorf("failed to parse timestamp: %w", err)
|
||||
}
|
||||
|
||||
switch config.TimestampFmt {
|
||||
case "unix_ns":
|
||||
nanoseconds = int64(t)
|
||||
case "unix_us":
|
||||
nanoseconds = int64(t * 1e3)
|
||||
case "unix_ms":
|
||||
nanoseconds = int64(t * 1e6)
|
||||
default:
|
||||
nanoseconds = int64(t * 1e9)
|
||||
}
|
||||
timestamp = time.Unix(0, nanoseconds)
|
||||
} else {
|
||||
timestamp, err = time.Parse(config.TimestampFmt, v)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query timestamp format: %v", err)
|
||||
}
|
||||
}
|
||||
case float64:
|
||||
// Assume the value to contain a timestamp in seconds and fractions thereof.
|
||||
timestamp = time.Unix(0, int64(v*1e9))
|
||||
case nil:
|
||||
// No timestamp found. Just ignore the time and use "starttime"
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown format '%T' for timestamp query '%v'", v, config.Timestamp)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
time_float_exponential truth=42.0 1663830962276000
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
[[inputs.file]]
|
||||
files = ["./testcases/time_float_exponential/test.json"]
|
||||
data_format = "xpath_json"
|
||||
xpath_native_types = true
|
||||
|
||||
[[inputs.file.xpath]]
|
||||
metric_name = "'time_float_exponential'"
|
||||
timestamp = "t"
|
||||
timestamp_format = "unix_ms"
|
||||
field_selection = "."
|
||||
field_name = "id"
|
||||
field_value = "v"
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"id": "truth",
|
||||
"v": 42,
|
||||
"t": 1.663830962276e+12
|
||||
}
|
||||
Loading…
Reference in New Issue