diff --git a/internal/internal.go b/internal/internal.go index c6e1391c5..d595d1e18 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -7,17 +7,18 @@ import ( "errors" "fmt" "io" - "math" + "math/big" "math/rand" "os" "os/exec" "runtime" - "strconv" "strings" "sync" "syscall" "time" "unicode" + + "github.com/influxdata/telegraf/internal/choice" ) const alphanum string = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" @@ -257,145 +258,147 @@ func CompressWithGzip(data io.Reader) (io.ReadCloser, error) { // The location is a location string suitable for time.LoadLocation. Unix // times do not use the location string, a unix time is always return in the // UTC location. -func ParseTimestamp(format string, timestamp interface{}, location string) (time.Time, error) { +func ParseTimestamp(format string, timestamp interface{}, location string, separator ...string) (time.Time, error) { switch format { case "unix", "unix_ms", "unix_us", "unix_ns": - return parseUnix(format, timestamp) - default: - if location == "" { - location = "UTC" + sep := []string{",", "."} + if len(separator) > 0 { + sep = separator } - return parseTime(format, timestamp, location) + return parseUnix(format, timestamp, sep) + default: + v, ok := timestamp.(string) + if !ok { + return time.Unix(0, 0), errors.New("unsupported type") + } + return parseTime(format, v, location) } } -func parseUnix(format string, timestamp interface{}) (time.Time, error) { - integer, fractional, err := parseComponents(timestamp) +// parseTime parses a timestamp in unix format with different resolutions +func parseUnix(format string, timestamp interface{}, separator []string) (time.Time, error) { + // Extract the scaling factor to nanoseconds from "format" + var factor int64 + switch format { + case "unix": + factor = int64(time.Second) + case "unix_ms": + factor = int64(time.Millisecond) + case "unix_us": + factor = int64(time.Microsecond) + case "unix_ns": + factor = int64(time.Nanosecond) + } + + zero := time.Unix(0, 0) + + // Convert the representation to time + switch v := timestamp.(type) { + case int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64: + t, err := ToInt64(v) + if err != nil { + return zero, err + } + return time.Unix(0, t*factor).UTC(), nil + case float32, float64: + ts, err := ToFloat64(v) + if err != nil { + return zero, err + } + + // Parse the float as a precise fraction to avoid precision loss + f := big.Rat{} + if f.SetFloat64(ts) == nil { + return zero, errors.New("invalid number") + } + return timeFromFraction(&f, factor), nil + case string: + // Sanitize the string to have no thousand separators and dot + // as decimal separator to ease later parsing + v = sanitizeTimestamp(v, separator) + + // Parse the string as a precise fraction to avoid precision loss + f := big.Rat{} + if _, ok := f.SetString(v); !ok { + return zero, errors.New("invalid number") + } + return timeFromFraction(&f, factor), nil + } + + return zero, errors.New("unsupported type") +} + +func timeFromFraction(f *big.Rat, factor int64) time.Time { + // Extract the numerator and denominator and scale to nanoseconds + num := f.Num() + denom := f.Denom() + num.Mul(num, big.NewInt(factor)) + + // Get the integer (non-fractional part) of the timestamp and convert + // it into time + t := big.Int{} + t.Div(num, denom) + + return time.Unix(0, t.Int64()).UTC() +} + +// sanitizeTimestamp removes thousand separators and uses dot as +// decimal separator. Returns also a boolean indicating success. +func sanitizeTimestamp(timestamp string, decimalSeparartor []string) string { + // Remove thousand-separators that are not used for decimal separation + sanitized := timestamp + for _, s := range []string{" ", ",", "."} { + if !choice.Contains(s, decimalSeparartor) { + sanitized = strings.ReplaceAll(sanitized, s, "") + } + } + + // Replace decimal separators by dot to have a standard, parsable format + for _, s := range decimalSeparartor { + // Make sure we replace only the first occurrence of any separator. + if strings.Contains(sanitized, s) { + return strings.Replace(sanitized, s, ".", 1) + } + } + return sanitized +} + +// parseTime parses a string timestamp according to the format string. +func parseTime(format string, timestamp string, location string) (time.Time, error) { + loc, err := time.LoadLocation(location) if err != nil { return time.Unix(0, 0), err } switch strings.ToLower(format) { - case "unix": - return time.Unix(integer, fractional).UTC(), nil - case "unix_ms": - return time.Unix(0, integer*1e6).UTC(), nil - case "unix_us": - return time.Unix(0, integer*1e3).UTC(), nil - case "unix_ns": - return time.Unix(0, integer).UTC(), nil - default: - return time.Unix(0, 0), errors.New("unsupported type") - } -} - -// Returns the integers before and after an optional decimal point. Both '.' -// and ',' are supported for the decimal point. The timestamp can be an int64, -// float64, or string. -// -// ex: "42.5" -> (42, 5, nil) -func parseComponents(timestamp interface{}) (int64, int64, error) { - switch ts := timestamp.(type) { - case string: - parts := strings.SplitN(ts, ".", 2) - if len(parts) == 2 { - return parseUnixTimeComponents(parts[0], parts[1]) - } - - parts = strings.SplitN(ts, ",", 2) - if len(parts) == 2 { - return parseUnixTimeComponents(parts[0], parts[1]) - } - - integer, err := strconv.ParseInt(ts, 10, 64) - if err != nil { - return 0, 0, err - } - return integer, 0, nil - case int8: - return int64(ts), 0, nil - case int16: - return int64(ts), 0, nil - case int32: - return int64(ts), 0, nil - case int64: - return ts, 0, nil - case uint8: - return int64(ts), 0, nil - case uint16: - return int64(ts), 0, nil - case uint32: - return int64(ts), 0, nil - case uint64: - return int64(ts), 0, nil - case float32: - integer, fractional := math.Modf(float64(ts)) - return int64(integer), int64(fractional * 1e9), nil - case float64: - integer, fractional := math.Modf(ts) - return int64(integer), int64(fractional * 1e9), nil - default: - return 0, 0, errors.New("unsupported type") - } -} - -func parseUnixTimeComponents(first, second string) (int64, int64, error) { - integer, err := strconv.ParseInt(first, 10, 64) - if err != nil { - return 0, 0, err - } - - // Convert to nanoseconds, dropping any greater precision. - buf := []byte("000000000") - copy(buf, second) - - fractional, err := strconv.ParseInt(string(buf), 10, 64) - if err != nil { - return 0, 0, err - } - return integer, fractional, nil -} - -// ParseTime parses a string timestamp according to the format string. -func parseTime(format string, timestamp interface{}, location string) (time.Time, error) { - switch ts := timestamp.(type) { - case string: - loc, err := time.LoadLocation(location) - if err != nil { - return time.Unix(0, 0), err - } - switch strings.ToLower(format) { - case "ansic": - format = time.ANSIC - case "unixdate": - format = time.UnixDate - case "rubydate": - format = time.RubyDate - case "rfc822": - format = time.RFC822 - case "rfc822z": - format = time.RFC822Z - case "rfc850": - format = time.RFC850 - case "rfc1123": - format = time.RFC1123 - case "rfc1123z": - format = time.RFC1123Z - case "rfc3339": - format = time.RFC3339 - case "rfc3339nano": - format = time.RFC3339Nano - case "stamp": - format = time.Stamp - case "stampmilli": - format = time.StampMilli - case "stampmicro": - format = time.StampMicro - case "stampnano": - format = time.StampNano - } - return time.ParseInLocation(format, ts, loc) - default: - return time.Unix(0, 0), errors.New("unsupported type") + case "ansic": + format = time.ANSIC + case "unixdate": + format = time.UnixDate + case "rubydate": + format = time.RubyDate + case "rfc822": + format = time.RFC822 + case "rfc822z": + format = time.RFC822Z + case "rfc850": + format = time.RFC850 + case "rfc1123": + format = time.RFC1123 + case "rfc1123z": + format = time.RFC1123Z + case "rfc3339": + format = time.RFC3339 + case "rfc3339nano": + format = time.RFC3339Nano + case "stamp": + format = time.Stamp + case "stampmilli": + format = time.StampMilli + case "stampmicro": + format = time.StampMicro + case "stampnano": + format = time.StampNano } + return time.ParseInLocation(format, timestamp, loc) } diff --git a/internal/internal_test.go b/internal/internal_test.go index 5c02e4098..22f5a3bad 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -394,8 +394,8 @@ func TestParseTimestamp(t *testing.T) { format string timestamp interface{} location string + separator []string expected time.Time - err bool }{ { name: "parse layout string in utc", @@ -404,13 +404,6 @@ func TestParseTimestamp(t *testing.T) { location: "UTC", expected: rfc3339("2019-02-20T21:50:34Z"), }, - { - name: "parse layout string with invalid timezone", - format: "2006-01-02 15:04:05", - timestamp: "2019-02-20 21:50:34", - location: "InvalidTimeZone", - err: true, - }, { name: "layout regression 6386", format: "02.01.2006 15:04:05", @@ -447,6 +440,48 @@ func TestParseTimestamp(t *testing.T) { timestamp: "1568338208.00000050042", expected: rfc3339("2019-09-13T01:30:08.000000500Z"), }, + { + name: "unix seconds with thousand separator only (dot)", + format: "unix", + timestamp: "1.568.338.208", + separator: []string{","}, + expected: rfc3339("2019-09-13T01:30:08Z"), + }, + { + name: "unix seconds with thousand separator only (comma)", + format: "unix", + timestamp: "1,568,338,208", + separator: []string{"."}, + expected: rfc3339("2019-09-13T01:30:08Z"), + }, + { + name: "unix seconds with thousand separator only (space)", + format: "unix", + timestamp: "1 568 338 208", + separator: []string{"."}, + expected: rfc3339("2019-09-13T01:30:08Z"), + }, + { + name: "unix seconds with thousand separator only (underscore)", + format: "unix", + timestamp: "1_568_338_208", + separator: []string{"."}, + expected: rfc3339("2019-09-13T01:30:08Z"), + }, + { + name: "unix seconds with thousand and decimal separator (US)", + format: "unix", + timestamp: "1,568,338,208.500", + separator: []string{"."}, + expected: rfc3339("2019-09-13T01:30:08.500Z"), + }, + { + name: "unix seconds with thousand and decimal separator (EU)", + format: "unix", + timestamp: "1.568.338.208,500", + separator: []string{","}, + expected: rfc3339("2019-09-13T01:30:08.500Z"), + }, { name: "unix seconds integer", format: "unix", @@ -459,6 +494,12 @@ func TestParseTimestamp(t *testing.T) { timestamp: float64(1568338208.500), expected: rfc3339("2019-09-13T01:30:08.500Z"), }, + { + name: "unix seconds float exponential", + format: "unix", + timestamp: float64(1.5683382085e+9), + expected: rfc3339("2019-09-13T01:30:08.500Z"), + }, { name: "unix milliseconds", format: "unix_ms", @@ -466,10 +507,10 @@ func TestParseTimestamp(t *testing.T) { expected: rfc3339("2019-09-13T01:30:08.500Z"), }, { - name: "unix milliseconds with fractional is ignored", + name: "unix milliseconds with fractional", format: "unix_ms", timestamp: "1568338208500.42", - expected: rfc3339("2019-09-13T01:30:08.500Z"), + expected: rfc3339("2019-09-13T01:30:08.50042Z"), }, { name: "unix microseconds", @@ -483,6 +524,12 @@ func TestParseTimestamp(t *testing.T) { timestamp: "1568338208000000500", expected: rfc3339("2019-09-13T01:30:08.000000500Z"), }, + { + name: "unix nanoseconds exponential", + format: "unix_ns", + timestamp: "1.5683382080000005e+18", + expected: rfc3339("2019-09-13T01:30:08.000000500Z"), + }, { name: "rfc339 test", format: "RFC3339", @@ -591,13 +638,75 @@ func TestParseTimestamp(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location) - if tt.err { - require.Error(t, err) - } else { - require.NoError(t, err) - require.Equal(t, tt.expected, tm) - } + tm, err := ParseTimestamp(tt.format, tt.timestamp, tt.location, tt.separator...) + require.NoError(t, err) + require.Equal(t, tt.expected, tm) + }) + } +} + +func TestParseTimestampInvalid(t *testing.T) { + tests := []struct { + name string + format string + timestamp interface{} + location string + expected string + }{ + { + name: "too few digits", + format: "2006-01-02 15:04:05", + timestamp: "2019-02-20 21:50", + expected: "cannot parse \"\" as \":\"", + }, + { + name: "invalid timezone", + format: "2006-01-02 15:04:05", + timestamp: "2019-02-20 21:50:34", + location: "InvalidTimeZone", + expected: "unknown time zone InvalidTimeZone", + }, + { + name: "invalid layout", + format: "rfc3399", + timestamp: "09.07.2019 00:11:00", + expected: "cannot parse \"09.07.2019 00:11:00\" as \"rfc\"", + }, + { + name: "layout not matching time", + format: "rfc3339", + timestamp: "09.07.2019 00:11:00", + expected: "cannot parse \"7.2019 00:11:00\" as \"2006\"", + }, + { + name: "unix wrong type", + format: "unix", + timestamp: true, + expected: "unsupported type", + }, + { + name: "unix multiple separators (dot)", + format: "unix", + timestamp: "1568338.208.500", + expected: "invalid number", + }, + { + name: "unix multiple separators (comma)", + format: "unix", + timestamp: "1568338,208,500", + expected: "invalid number", + }, + { + name: "unix multiple separators (mixed)", + format: "unix", + timestamp: "1,568,338,208.500", + expected: "invalid number", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := ParseTimestamp(tt.format, tt.timestamp, tt.location) + require.ErrorContains(t, err, tt.expected) }) } } diff --git a/plugins/parsers/xpath/parser.go b/plugins/parsers/xpath/parser.go index 7d3e5e930..3fd3bdf15 100644 --- a/plugins/parsers/xpath/parser.go +++ b/plugins/parsers/xpath/parser.go @@ -119,6 +119,17 @@ func (p *Parser) Init() error { return errors.New("missing default metric name") } + // Update the configs with default values + for i, config := range p.Configs { + if config.Selection == "" { + config.Selection = "/" + } + if config.TimestampFmt == "" { + config.TimestampFmt = "unix" + } + p.Configs[i] = config + } + return nil } @@ -138,9 +149,6 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { metrics := make([]telegraf.Metric, 0) p.Log.Debugf("Number of configs: %d", len(p.Configs)) for _, config := range p.Configs { - if len(config.Selection) == 0 { - config.Selection = "/" - } selectedNodes, err := p.document.QueryAll(doc, config.Selection) if err != nil { return nil, err @@ -213,42 +221,11 @@ func (p *Parser) parseQuery(starttime time.Time, doc, selected dataNode, config if err != nil { return nil, fmt.Errorf("failed to query timestamp: %v", err) } - switch v := v.(type) { - case string: - // Parse the string with the given format or assume the string to contain - // a unix timestamp in seconds if no format is given. - if len(config.TimestampFmt) < 1 || strings.HasPrefix(config.TimestampFmt, "unix") { - var nanoseconds int64 - - t, err := strconv.ParseFloat(v, 64) - if err != nil { - return nil, fmt.Errorf("failed to parse unix timestamp: %v", err) - } - - switch config.TimestampFmt { - case "unix_ns": - nanoseconds = int64(t) - case "unix_us": - nanoseconds = int64(t * 1e3) - case "unix_ms": - nanoseconds = int64(t * 1e6) - default: - nanoseconds = int64(t * 1e9) - } - timestamp = time.Unix(0, nanoseconds) - } else { - timestamp, err = time.Parse(config.TimestampFmt, v) - if err != nil { - return nil, fmt.Errorf("failed to query timestamp format: %v", err) - } + if v != nil { + timestamp, err = internal.ParseTimestamp(config.TimestampFmt, v, "") + if err != nil { + return nil, fmt.Errorf("failed to parse timestamp: %w", err) } - case float64: - // Assume the value to contain a timestamp in seconds and fractions thereof. - timestamp = time.Unix(0, int64(v*1e9)) - case nil: - // No timestamp found. Just ignore the time and use "starttime" - default: - return nil, fmt.Errorf("unknown format '%T' for timestamp query '%v'", v, config.Timestamp) } } diff --git a/plugins/parsers/xpath/testcases/time_float_exponential/expected.out b/plugins/parsers/xpath/testcases/time_float_exponential/expected.out new file mode 100644 index 000000000..d158287be --- /dev/null +++ b/plugins/parsers/xpath/testcases/time_float_exponential/expected.out @@ -0,0 +1 @@ +time_float_exponential truth=42.0 1663830962276000 diff --git a/plugins/parsers/xpath/testcases/time_float_exponential/telegraf.conf b/plugins/parsers/xpath/testcases/time_float_exponential/telegraf.conf new file mode 100644 index 000000000..2617a4352 --- /dev/null +++ b/plugins/parsers/xpath/testcases/time_float_exponential/telegraf.conf @@ -0,0 +1,12 @@ +[[inputs.file]] + files = ["./testcases/time_float_exponential/test.json"] + data_format = "xpath_json" + xpath_native_types = true + + [[inputs.file.xpath]] + metric_name = "'time_float_exponential'" + timestamp = "t" + timestamp_format = "unix_ms" + field_selection = "." + field_name = "id" + field_value = "v" \ No newline at end of file diff --git a/plugins/parsers/xpath/testcases/time_float_exponential/test.json b/plugins/parsers/xpath/testcases/time_float_exponential/test.json new file mode 100644 index 000000000..da6cd05cc --- /dev/null +++ b/plugins/parsers/xpath/testcases/time_float_exponential/test.json @@ -0,0 +1,5 @@ +{ + "id": "truth", + "v": 42, + "t": 1.663830962276e+12 +}