Support utf-16 in file and tail inputs (#7792)
This commit is contained in:
parent
876fc5b4bc
commit
be77fbab31
6
go.mod
6
go.mod
|
|
@ -1,6 +1,6 @@
|
|||
module github.com/influxdata/telegraf
|
||||
|
||||
go 1.12
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
cloud.google.com/go v0.53.0
|
||||
|
|
@ -38,6 +38,7 @@ require (
|
|||
github.com/couchbase/goutils v0.0.0-20180530154633-e865a1461c8a // indirect
|
||||
github.com/denisenkom/go-mssqldb v0.0.0-20190707035753-2be1aa521ff4
|
||||
github.com/dgrijalva/jwt-go v3.2.0+incompatible
|
||||
github.com/dimchansky/utfbom v1.1.0
|
||||
github.com/docker/distribution v2.6.0-rc.1.0.20170726174610-edc3ab29cdff+incompatible // indirect
|
||||
github.com/docker/docker v1.4.2-0.20180327123150-ed7b6428c133
|
||||
github.com/docker/go-connections v0.3.0 // indirect
|
||||
|
|
@ -71,7 +72,7 @@ require (
|
|||
github.com/hashicorp/memberlist v0.1.5 // indirect
|
||||
github.com/hashicorp/serf v0.8.1 // indirect
|
||||
github.com/influxdata/go-syslog/v2 v2.0.1
|
||||
github.com/influxdata/tail v1.0.1-0.20180327235535-c43482518d41
|
||||
github.com/influxdata/tail v1.0.1-0.20200707181643-03a791b270e4
|
||||
github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65
|
||||
github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8
|
||||
github.com/jackc/fake v0.0.0-20150926172116-812a484cc733 // indirect
|
||||
|
|
@ -133,6 +134,7 @@ require (
|
|||
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d
|
||||
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
|
||||
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4
|
||||
golang.org/x/text v0.3.3
|
||||
golang.org/x/tools v0.0.0-20200317043434-63da46f3035e // indirect
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20200205215550-e35592f146e4
|
||||
gonum.org/v1/gonum v0.6.2 // indirect
|
||||
|
|
|
|||
8
go.sum
8
go.sum
|
|
@ -335,8 +335,8 @@ github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpO
|
|||
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
|
||||
github.com/influxdata/go-syslog/v2 v2.0.1 h1:l44S4l4Q8MhGQcoOxJpbo+QQYxJqp0vdgIVHh4+DO0s=
|
||||
github.com/influxdata/go-syslog/v2 v2.0.1/go.mod h1:hjvie1UTaD5E1fTnDmxaCw8RRDrT4Ve+XHr5O2dKSCo=
|
||||
github.com/influxdata/tail v1.0.1-0.20180327235535-c43482518d41 h1:HxQo1NpNXQDpvEBzthbQLmePvTLFTa5GzSFUjL03aEs=
|
||||
github.com/influxdata/tail v1.0.1-0.20180327235535-c43482518d41/go.mod h1:xTFF2SILpIYc5N+Srb0d5qpx7d+f733nBrbasb13DtQ=
|
||||
github.com/influxdata/tail v1.0.1-0.20200707181643-03a791b270e4 h1:K3A5vHPs/p8OjI4SL3l1+hs/98mhxTVDcV1Ap0c265E=
|
||||
github.com/influxdata/tail v1.0.1-0.20200707181643-03a791b270e4/go.mod h1:VeiWgI3qaGdJWust2fP27a6J+koITo/1c/UhxeOxgaM=
|
||||
github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65 h1:vvyMtD5LTJc1W9sQKjDkAWdcg0478CszSdzlHtiAXCY=
|
||||
github.com/influxdata/toml v0.0.0-20190415235208-270119a8ce65/go.mod h1:zApaNFpP/bTpQItGZNNUMISDMDAnTXu9UqJ4yT3ocz8=
|
||||
github.com/influxdata/wlog v0.0.0-20160411224016-7c63b0a71ef8 h1:W2IgzRCb0L9VzMujq/QuTaZUKcH8096jWwP519mHN6Q=
|
||||
|
|
@ -720,6 +720,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
|||
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c h1:fqgJT0MGcGpPgpWU7VRdRjuArfcOvC4AoJmILihzhDg=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
|
|
@ -838,6 +840,7 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8
|
|||
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
|
||||
gopkg.in/fatih/pool.v2 v2.0.0 h1:xIFeWtxifuQJGk/IEPKsTduEKcKvPmhoiVDGpC40nKg=
|
||||
gopkg.in/fatih/pool.v2 v2.0.0/go.mod h1:8xVGeu1/2jr2wm5V9SPuMht2H5AEmf5aFMGSQixtjTY=
|
||||
gopkg.in/fsnotify.v1 v1.2.1/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=
|
||||
gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys=
|
||||
gopkg.in/gorethink/gorethink.v3 v3.0.5 h1:e2Uc/Xe+hpcVQFsj6MuHlYog3r0JYpnTzwDj/y2O4MU=
|
||||
|
|
@ -861,6 +864,7 @@ gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce h1:xcEWjVhvbDy+nHP67nPDDpbYrY
|
|||
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
|
||||
gopkg.in/olivere/elastic.v5 v5.0.70 h1:DqFG2Odzs74JCz6SssgJjd6qpGnsOAzNc7+l5EnvsnE=
|
||||
gopkg.in/olivere/elastic.v5 v5.0.70/go.mod h1:FylZT6jQWtfHsicejzOm3jIMVPOAksa80i3o+6qtQRk=
|
||||
gopkg.in/tomb.v1 v1.0.0-20140529071818-c131134a1947/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
|
||||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
package encoding
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
type Decoder = encoding.Decoder
|
||||
|
||||
// NewDecoder returns a x/text Decoder for the specified text encoding. The
|
||||
// Decoder converts a character encoding into utf-8 bytes. If a BOM is found
|
||||
// it will be converted into a utf-8 BOM, you can use
|
||||
// github.com/dimchansky/utfbom to strip the BOM.
|
||||
//
|
||||
// The "none" or "" encoding will pass through bytes unchecked. Use the utf-8
|
||||
// encoding if you want invalid bytes replaced using the the unicode
|
||||
// replacement character.
|
||||
//
|
||||
// Detection of utf-16 endianness using the BOM is not currently provided due
|
||||
// to the tail input plugins requirement to be able to start at the middle or
|
||||
// end of the file.
|
||||
func NewDecoder(enc string) (*Decoder, error) {
|
||||
switch enc {
|
||||
case "utf-8":
|
||||
return unicode.UTF8.NewDecoder(), nil
|
||||
case "utf-16le":
|
||||
return unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM).NewDecoder(), nil
|
||||
case "utf-16be":
|
||||
return unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewDecoder(), nil
|
||||
case "none", "":
|
||||
return encoding.Nop.NewDecoder(), nil
|
||||
}
|
||||
return nil, errors.New("unknown character encoding")
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
package encoding
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDecoder(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
encoding string
|
||||
input []byte
|
||||
expected []byte
|
||||
expectedErr bool
|
||||
}{
|
||||
{
|
||||
name: "no decoder utf-8",
|
||||
encoding: "",
|
||||
input: []byte("howdy"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-8 decoder",
|
||||
encoding: "utf-8",
|
||||
input: []byte("howdy"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-8 decoder invalid bytes replaced with replacement char",
|
||||
encoding: "utf-8",
|
||||
input: []byte("\xff\xfe"),
|
||||
expected: []byte("\uFFFD\uFFFD"),
|
||||
},
|
||||
{
|
||||
name: "utf-16le decoder no BOM",
|
||||
encoding: "utf-16le",
|
||||
input: []byte("h\x00o\x00w\x00d\x00y\x00"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-16le decoder with BOM",
|
||||
encoding: "utf-16le",
|
||||
input: []byte("\xff\xfeh\x00o\x00w\x00d\x00y\x00"),
|
||||
expected: []byte("\xef\xbb\xbfhowdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-16be decoder no BOM",
|
||||
encoding: "utf-16be",
|
||||
input: []byte("\x00h\x00o\x00w\x00d\x00y"),
|
||||
expected: []byte("howdy"),
|
||||
},
|
||||
{
|
||||
name: "utf-16be decoder with BOM",
|
||||
encoding: "utf-16be",
|
||||
input: []byte("\xfe\xff\x00h\x00o\x00w\x00d\x00y"),
|
||||
expected: []byte("\xef\xbb\xbfhowdy"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
decoder, err := NewDecoder(tt.encoding)
|
||||
require.NoError(t, err)
|
||||
buf := bytes.NewBuffer(tt.input)
|
||||
r := decoder.Reader(buf)
|
||||
actual, err := ioutil.ReadAll(r)
|
||||
if tt.expectedErr {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tt.expected, actual)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -3,10 +3,13 @@ package file
|
|||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/dimchansky/utfbom"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/globpath"
|
||||
"github.com/influxdata/telegraf/plugins/common/encoding"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
)
|
||||
|
|
@ -14,9 +17,11 @@ import (
|
|||
type File struct {
|
||||
Files []string `toml:"files"`
|
||||
FileTag string `toml:"file_tag"`
|
||||
CharacterEncoding string `toml:"character_encoding"`
|
||||
parser parsers.Parser
|
||||
|
||||
filenames []string
|
||||
decoder *encoding.Decoder
|
||||
}
|
||||
|
||||
const sampleConfig = `
|
||||
|
|
@ -24,15 +29,24 @@ const sampleConfig = `
|
|||
## as well as ** to match recursive files and directories.
|
||||
files = ["/tmp/metrics.out"]
|
||||
|
||||
## Name a tag containing the name of the file the data was parsed from. Leave empty
|
||||
## to disable.
|
||||
# file_tag = ""
|
||||
|
||||
## Character encoding to use when interpreting the file contents. Invalid
|
||||
## characters are replaced using the unicode replacement character. When set
|
||||
## to the empty string the data is not decoded to text.
|
||||
## ex: character_encoding = "utf-8"
|
||||
## character_encoding = "utf-16le"
|
||||
## character_encoding = "utf-16be"
|
||||
## character_encoding = ""
|
||||
# character_encoding = ""
|
||||
|
||||
## The dataformat to be read from files
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
|
||||
data_format = "influx"
|
||||
|
||||
## Name a tag containing the name of the file the data was parsed from. Leave empty
|
||||
## to disable.
|
||||
# file_tag = ""
|
||||
`
|
||||
|
||||
// SampleConfig returns the default configuration of the Input
|
||||
|
|
@ -44,6 +58,12 @@ func (f *File) Description() string {
|
|||
return "Parse a complete file each interval"
|
||||
}
|
||||
|
||||
func (f *File) Init() error {
|
||||
var err error
|
||||
f.decoder, err = encoding.NewDecoder(f.CharacterEncoding)
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *File) Gather(acc telegraf.Accumulator) error {
|
||||
err := f.refreshFilePaths()
|
||||
if err != nil {
|
||||
|
|
@ -59,7 +79,7 @@ func (f *File) Gather(acc telegraf.Accumulator) error {
|
|||
if f.FileTag != "" {
|
||||
m.AddTag(f.FileTag, filepath.Base(k))
|
||||
}
|
||||
acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time())
|
||||
acc.AddMetric(m)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
|
@ -88,12 +108,18 @@ func (f *File) refreshFilePaths() error {
|
|||
}
|
||||
|
||||
func (f *File) readMetric(filename string) ([]telegraf.Metric, error) {
|
||||
fileContents, err := ioutil.ReadFile(filename)
|
||||
file, err := os.Open(filename)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
r, _ := utfbom.Skip(f.decoder.Reader(file))
|
||||
fileContents, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("E! Error file: %v could not be read, %s", filename, err)
|
||||
}
|
||||
return f.parser.Parse(fileContents)
|
||||
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
|
|
|||
|
|
@ -4,8 +4,11 @@ import (
|
|||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/csv"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
|
@ -16,6 +19,8 @@ func TestRefreshFilePaths(t *testing.T) {
|
|||
r := File{
|
||||
Files: []string{filepath.Join(wd, "dev/testfiles/**.log")},
|
||||
}
|
||||
err = r.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
err = r.refreshFilePaths()
|
||||
require.NoError(t, err)
|
||||
|
|
@ -30,6 +35,8 @@ func TestFileTag(t *testing.T) {
|
|||
Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")},
|
||||
FileTag: "filename",
|
||||
}
|
||||
err = r.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
parserConfig := parsers.Config{
|
||||
DataFormat: "json",
|
||||
|
|
@ -55,6 +62,8 @@ func TestJSONParserCompile(t *testing.T) {
|
|||
r := File{
|
||||
Files: []string{filepath.Join(wd, "dev/testfiles/json_a.log")},
|
||||
}
|
||||
err := r.Init()
|
||||
require.NoError(t, err)
|
||||
parserConfig := parsers.Config{
|
||||
DataFormat: "json",
|
||||
TagKeys: []string{"parent_ignored_child"},
|
||||
|
|
@ -74,6 +83,8 @@ func TestGrokParser(t *testing.T) {
|
|||
r := File{
|
||||
Files: []string{filepath.Join(wd, "dev/testfiles/grok_a.log")},
|
||||
}
|
||||
err := r.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
parserConfig := parsers.Config{
|
||||
DataFormat: "grok",
|
||||
|
|
@ -87,3 +98,152 @@ func TestGrokParser(t *testing.T) {
|
|||
err = r.Gather(&acc)
|
||||
assert.Equal(t, len(acc.Metrics), 2)
|
||||
}
|
||||
|
||||
func TestCharacterEncoding(t *testing.T) {
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric("file",
|
||||
map[string]string{
|
||||
"dest": "example.org",
|
||||
"hop": "1",
|
||||
"ip": "12.122.114.5",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"avg": 21.55,
|
||||
"best": 19.34,
|
||||
"loss": 0.0,
|
||||
"snt": 10,
|
||||
"status": "OK",
|
||||
"stdev": 2.05,
|
||||
"worst": 26.83,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("file",
|
||||
map[string]string{
|
||||
"dest": "example.org",
|
||||
"hop": "2",
|
||||
"ip": "192.205.32.238",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"avg": 25.11,
|
||||
"best": 20.8,
|
||||
"loss": 0.0,
|
||||
"snt": 10,
|
||||
"status": "OK",
|
||||
"stdev": 6.03,
|
||||
"worst": 38.85,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("file",
|
||||
map[string]string{
|
||||
"dest": "example.org",
|
||||
"hop": "3",
|
||||
"ip": "152.195.85.133",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"avg": 20.18,
|
||||
"best": 19.75,
|
||||
"loss": 0.0,
|
||||
"snt": 10,
|
||||
"status": "OK",
|
||||
"stdev": 0.0,
|
||||
"worst": 20.78,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("file",
|
||||
map[string]string{
|
||||
"dest": "example.org",
|
||||
"hop": "4",
|
||||
"ip": "93.184.216.34",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"avg": 24.02,
|
||||
"best": 19.75,
|
||||
"loss": 0.0,
|
||||
"snt": 10,
|
||||
"status": "OK",
|
||||
"stdev": 4.67,
|
||||
"worst": 32.41,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
plugin *File
|
||||
csv *csv.Config
|
||||
file string
|
||||
}{
|
||||
{
|
||||
name: "empty character_encoding with utf-8",
|
||||
plugin: &File{
|
||||
Files: []string{"testdata/mtr-utf-8.csv"},
|
||||
CharacterEncoding: "",
|
||||
},
|
||||
csv: &csv.Config{
|
||||
MetricName: "file",
|
||||
SkipRows: 1,
|
||||
ColumnNames: []string{"", "", "status", "dest", "hop", "ip", "loss", "snt", "", "", "avg", "best", "worst", "stdev"},
|
||||
TagColumns: []string{"dest", "hop", "ip"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "utf-8 character_encoding with utf-8",
|
||||
plugin: &File{
|
||||
Files: []string{"testdata/mtr-utf-8.csv"},
|
||||
CharacterEncoding: "utf-8",
|
||||
},
|
||||
csv: &csv.Config{
|
||||
MetricName: "file",
|
||||
SkipRows: 1,
|
||||
ColumnNames: []string{"", "", "status", "dest", "hop", "ip", "loss", "snt", "", "", "avg", "best", "worst", "stdev"},
|
||||
TagColumns: []string{"dest", "hop", "ip"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "utf-16le character_encoding with utf-16le",
|
||||
plugin: &File{
|
||||
Files: []string{"testdata/mtr-utf-16le.csv"},
|
||||
CharacterEncoding: "utf-16le",
|
||||
},
|
||||
csv: &csv.Config{
|
||||
MetricName: "file",
|
||||
SkipRows: 1,
|
||||
ColumnNames: []string{"", "", "status", "dest", "hop", "ip", "loss", "snt", "", "", "avg", "best", "worst", "stdev"},
|
||||
TagColumns: []string{"dest", "hop", "ip"},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "utf-16be character_encoding with utf-16be",
|
||||
plugin: &File{
|
||||
Files: []string{"testdata/mtr-utf-16be.csv"},
|
||||
CharacterEncoding: "utf-16be",
|
||||
},
|
||||
csv: &csv.Config{
|
||||
MetricName: "file",
|
||||
SkipRows: 1,
|
||||
ColumnNames: []string{"", "", "status", "dest", "hop", "ip", "loss", "snt", "", "", "avg", "best", "worst", "stdev"},
|
||||
TagColumns: []string{"dest", "hop", "ip"},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := tt.plugin.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
parser, err := csv.NewParser(tt.csv)
|
||||
require.NoError(t, err)
|
||||
tt.plugin.SetParser(parser)
|
||||
|
||||
var acc testutil.Accumulator
|
||||
err = tt.plugin.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
|
Binary file not shown.
|
|
|
@ -0,0 +1,5 @@
|
|||
Mtr_Version,Start_Time,Status,Host,Hop,Ip,Loss%,Snt, ,Last,Avg,Best,Wrst,StDev,
|
||||
MTR.0.87,1593667013,OK,example.org,1,12.122.114.5,0.00,10,0,21.86,21.55,19.34,26.83,2.05
|
||||
MTR.0.87,1593667013,OK,example.org,2,192.205.32.238,0.00,10,0,32.83,25.11,20.80,38.85,6.03
|
||||
MTR.0.87,1593667013,OK,example.org,3,152.195.85.133,0.00,10,0,19.75,20.18,19.75,20.78,0.00
|
||||
MTR.0.87,1593667013,OK,example.org,4,93.184.216.34,0.00,10,0,19.75,24.02,19.75,32.41,4.67
|
||||
|
|
|
@ -48,6 +48,15 @@ The plugin expects messages in one of the
|
|||
## line and the size of the output's metric_batch_size.
|
||||
# max_undelivered_lines = 1000
|
||||
|
||||
## Character encoding to use when interpreting the file contents. Invalid
|
||||
## characters are replaced using the unicode replacement character. When set
|
||||
## to the empty string the data is not decoded to text.
|
||||
## ex: character_encoding = "utf-8"
|
||||
## character_encoding = "utf-16le"
|
||||
## character_encoding = "utf-16be"
|
||||
## character_encoding = ""
|
||||
# character_encoding = ""
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
|
|
|
|||
|
|
@ -5,12 +5,15 @@ package tail
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/dimchansky/utfbom"
|
||||
"github.com/influxdata/tail"
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/internal/globpath"
|
||||
"github.com/influxdata/telegraf/plugins/common/encoding"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/csv"
|
||||
|
|
@ -35,6 +38,7 @@ type Tail struct {
|
|||
Pipe bool `toml:"pipe"`
|
||||
WatchMethod string `toml:"watch_method"`
|
||||
MaxUndeliveredLines int `toml:"max_undelivered_lines"`
|
||||
CharacterEncoding string `toml:"character_encoding"`
|
||||
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
tailers map[string]*tail.Tail
|
||||
|
|
@ -45,6 +49,7 @@ type Tail struct {
|
|||
cancel context.CancelFunc
|
||||
acc telegraf.TrackingAccumulator
|
||||
sem semaphore
|
||||
decoder *encoding.Decoder
|
||||
}
|
||||
|
||||
func NewTail() *Tail {
|
||||
|
|
@ -88,6 +93,15 @@ const sampleConfig = `
|
|||
## line and the size of the output's metric_batch_size.
|
||||
# max_undelivered_lines = 1000
|
||||
|
||||
## Character encoding to use when interpreting the file contents. Invalid
|
||||
## characters are replaced using the unicode replacement character. When set
|
||||
## to the empty string the data is not decoded to text.
|
||||
## ex: character_encoding = "utf-8"
|
||||
## character_encoding = "utf-16le"
|
||||
## character_encoding = "utf-16be"
|
||||
## character_encoding = ""
|
||||
# character_encoding = ""
|
||||
|
||||
## Data format to consume.
|
||||
## Each data format has its own unique set of configuration options, read
|
||||
## more about them here:
|
||||
|
|
@ -108,7 +122,10 @@ func (t *Tail) Init() error {
|
|||
return errors.New("max_undelivered_lines must be positive")
|
||||
}
|
||||
t.sem = make(semaphore, t.MaxUndeliveredLines)
|
||||
return nil
|
||||
|
||||
var err error
|
||||
t.decoder, err = encoding.NewDecoder(t.CharacterEncoding)
|
||||
return err
|
||||
}
|
||||
|
||||
func (t *Tail) Gather(acc telegraf.Accumulator) error {
|
||||
|
|
@ -190,6 +207,10 @@ func (t *Tail) tailNewFiles(fromBeginning bool) error {
|
|||
Poll: poll,
|
||||
Pipe: t.Pipe,
|
||||
Logger: tail.DiscardingLogger,
|
||||
OpenReaderFunc: func(rd io.Reader) io.Reader {
|
||||
r, _ := utfbom.Skip(t.decoder.Reader(rd))
|
||||
return r
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Log.Debugf("Failed to open file (%s): %v", file, err)
|
||||
|
|
@ -201,6 +222,7 @@ func (t *Tail) tailNewFiles(fromBeginning bool) error {
|
|||
parser, err := t.parserFunc()
|
||||
if err != nil {
|
||||
t.Log.Errorf("Creating parser: %s", err.Error())
|
||||
continue
|
||||
}
|
||||
|
||||
// create a goroutine for each "tailer"
|
||||
|
|
|
|||
|
|
@ -5,108 +5,32 @@ import (
|
|||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/parsers"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/csv"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/influx"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/json"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestTailFromBeginning(t *testing.T) {
|
||||
if os.Getenv("CIRCLE_PROJECT_REPONAME") != "" {
|
||||
t.Skip("Skipping CI testing due to race conditions")
|
||||
}
|
||||
|
||||
tmpfile, err := ioutil.TempFile("", "")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tmpfile.Name())
|
||||
defer tmpfile.Close()
|
||||
_, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n")
|
||||
require.NoError(t, err)
|
||||
|
||||
tt := NewTail()
|
||||
tt.Log = testutil.Logger{}
|
||||
tt.FromBeginning = true
|
||||
tt.Files = []string{tmpfile.Name()}
|
||||
tt.SetParserFunc(parsers.NewInfluxParser)
|
||||
|
||||
err = tt.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
require.NoError(t, tt.Start(&acc))
|
||||
defer tt.Stop()
|
||||
require.NoError(t, acc.GatherError(tt.Gather))
|
||||
|
||||
acc.Wait(1)
|
||||
acc.AssertContainsTaggedFields(t, "cpu",
|
||||
map[string]interface{}{
|
||||
"usage_idle": float64(100),
|
||||
},
|
||||
map[string]string{
|
||||
"mytag": "foo",
|
||||
"path": tmpfile.Name(),
|
||||
})
|
||||
}
|
||||
|
||||
func TestTailFromEnd(t *testing.T) {
|
||||
if os.Getenv("CIRCLE_PROJECT_REPONAME") != "" {
|
||||
t.Skip("Skipping CI testing due to race conditions")
|
||||
}
|
||||
|
||||
tmpfile, err := ioutil.TempFile("", "")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tmpfile.Name())
|
||||
defer tmpfile.Close()
|
||||
_, err = tmpfile.WriteString("cpu,mytag=foo usage_idle=100\n")
|
||||
require.NoError(t, err)
|
||||
|
||||
tt := NewTail()
|
||||
tt.Log = testutil.Logger{}
|
||||
tt.Files = []string{tmpfile.Name()}
|
||||
tt.SetParserFunc(parsers.NewInfluxParser)
|
||||
|
||||
err = tt.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
acc := testutil.Accumulator{}
|
||||
require.NoError(t, tt.Start(&acc))
|
||||
defer tt.Stop()
|
||||
for _, tailer := range tt.tailers {
|
||||
for n, err := tailer.Tell(); err == nil && n == 0; n, err = tailer.Tell() {
|
||||
// wait for tailer to jump to end
|
||||
runtime.Gosched()
|
||||
}
|
||||
}
|
||||
|
||||
_, err = tmpfile.WriteString("cpu,othertag=foo usage_idle=100\n")
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, acc.GatherError(tt.Gather))
|
||||
|
||||
acc.Wait(1)
|
||||
acc.AssertContainsTaggedFields(t, "cpu",
|
||||
map[string]interface{}{
|
||||
"usage_idle": float64(100),
|
||||
},
|
||||
map[string]string{
|
||||
"othertag": "foo",
|
||||
"path": tmpfile.Name(),
|
||||
})
|
||||
assert.Len(t, acc.Metrics, 1)
|
||||
}
|
||||
|
||||
func TestTailBadLine(t *testing.T) {
|
||||
tmpfile, err := ioutil.TempFile("", "")
|
||||
require.NoError(t, err)
|
||||
defer os.Remove(tmpfile.Name())
|
||||
defer tmpfile.Close()
|
||||
|
||||
_, err = tmpfile.WriteString("cpu mytag= foo usage_idle= 100\n")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Write good metric so we can detect when processing is complete
|
||||
_, err = tmpfile.WriteString("cpu usage_idle=100\n")
|
||||
require.NoError(t, err)
|
||||
|
||||
tt := NewTail()
|
||||
tt.Log = testutil.Logger{}
|
||||
tt.FromBeginning = true
|
||||
|
|
@ -124,10 +48,8 @@ func TestTailBadLine(t *testing.T) {
|
|||
|
||||
require.NoError(t, acc.GatherError(tt.Gather))
|
||||
|
||||
_, err = tmpfile.WriteString("cpu mytag= foo usage_idle= 100\n")
|
||||
require.NoError(t, err)
|
||||
acc.Wait(1)
|
||||
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
tt.Stop()
|
||||
assert.Contains(t, buf.String(), "Malformed log line")
|
||||
}
|
||||
|
|
@ -186,11 +108,11 @@ cpu,42
|
|||
plugin.FromBeginning = true
|
||||
plugin.Files = []string{tmpfile.Name()}
|
||||
plugin.SetParserFunc(func() (parsers.Parser, error) {
|
||||
return &csv.Parser{
|
||||
return csv.NewParser(&csv.Config{
|
||||
MeasurementColumn: "measurement",
|
||||
HeaderRowCount: 1,
|
||||
TimeFunc: func() time.Time { return time.Unix(0, 0) },
|
||||
}, nil
|
||||
})
|
||||
})
|
||||
|
||||
err = plugin.Init()
|
||||
|
|
@ -284,3 +206,146 @@ func TestMultipleMetricsOnFirstLine(t *testing.T) {
|
|||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(),
|
||||
testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestCharacterEncoding(t *testing.T) {
|
||||
full := []telegraf.Metric{
|
||||
testutil.MustMetric("cpu",
|
||||
map[string]string{
|
||||
"cpu": "cpu0",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"usage_active": 11.9,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("cpu",
|
||||
map[string]string{
|
||||
"cpu": "cpu1",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"usage_active": 26.0,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("cpu",
|
||||
map[string]string{
|
||||
"cpu": "cpu2",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"usage_active": 14.0,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("cpu",
|
||||
map[string]string{
|
||||
"cpu": "cpu3",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"usage_active": 20.4,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("cpu",
|
||||
map[string]string{
|
||||
"cpu": "cpu-total",
|
||||
},
|
||||
map[string]interface{}{
|
||||
"usage_active": 18.4,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
plugin *Tail
|
||||
offset int64
|
||||
expected []telegraf.Metric
|
||||
}{
|
||||
{
|
||||
name: "utf-8",
|
||||
plugin: &Tail{
|
||||
Files: []string{"testdata/cpu-utf-8.influx"},
|
||||
FromBeginning: true,
|
||||
MaxUndeliveredLines: 1000,
|
||||
Log: testutil.Logger{},
|
||||
CharacterEncoding: "utf-8",
|
||||
},
|
||||
expected: full,
|
||||
},
|
||||
{
|
||||
name: "utf-8 seek",
|
||||
plugin: &Tail{
|
||||
Files: []string{"testdata/cpu-utf-8.influx"},
|
||||
MaxUndeliveredLines: 1000,
|
||||
Log: testutil.Logger{},
|
||||
CharacterEncoding: "utf-8",
|
||||
},
|
||||
offset: 0x33,
|
||||
expected: full[1:],
|
||||
},
|
||||
{
|
||||
name: "utf-16le",
|
||||
plugin: &Tail{
|
||||
Files: []string{"testdata/cpu-utf-16le.influx"},
|
||||
FromBeginning: true,
|
||||
MaxUndeliveredLines: 1000,
|
||||
Log: testutil.Logger{},
|
||||
CharacterEncoding: "utf-16le",
|
||||
},
|
||||
expected: full,
|
||||
},
|
||||
{
|
||||
name: "utf-16le seek",
|
||||
plugin: &Tail{
|
||||
Files: []string{"testdata/cpu-utf-16le.influx"},
|
||||
MaxUndeliveredLines: 1000,
|
||||
Log: testutil.Logger{},
|
||||
CharacterEncoding: "utf-16le",
|
||||
},
|
||||
offset: 0x68,
|
||||
expected: full[1:],
|
||||
},
|
||||
{
|
||||
name: "utf-16be",
|
||||
plugin: &Tail{
|
||||
Files: []string{"testdata/cpu-utf-16be.influx"},
|
||||
FromBeginning: true,
|
||||
MaxUndeliveredLines: 1000,
|
||||
Log: testutil.Logger{},
|
||||
CharacterEncoding: "utf-16be",
|
||||
},
|
||||
expected: full,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
tt.plugin.SetParserFunc(func() (parsers.Parser, error) {
|
||||
handler := influx.NewMetricHandler()
|
||||
return influx.NewParser(handler), nil
|
||||
})
|
||||
|
||||
if tt.offset != 0 {
|
||||
tt.plugin.offsets = map[string]int64{
|
||||
tt.plugin.Files[0]: tt.offset,
|
||||
}
|
||||
}
|
||||
|
||||
err := tt.plugin.Init()
|
||||
require.NoError(t, err)
|
||||
|
||||
var acc testutil.Accumulator
|
||||
err = tt.plugin.Start(&acc)
|
||||
require.NoError(t, err)
|
||||
acc.Wait(len(tt.expected))
|
||||
tt.plugin.Stop()
|
||||
|
||||
actual := acc.GetTelegrafMetrics()
|
||||
for _, m := range actual {
|
||||
m.RemoveTag("path")
|
||||
}
|
||||
|
||||
testutil.RequireMetricsEqual(t, tt.expected, actual, testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,5 @@
|
|||
cpu,cpu=cpu0 usage_active=11.9 1594084375000000000
|
||||
cpu,cpu=cpu1 usage_active=26.0 1594084375000000000
|
||||
cpu,cpu=cpu2 usage_active=14.0 1594084375000000000
|
||||
cpu,cpu=cpu3 usage_active=20.4 1594084375000000000
|
||||
cpu,cpu=cpu-total usage_active=18.4 1594084375000000000
|
||||
|
|
@ -4,6 +4,7 @@ import (
|
|||
"bytes"
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
|
@ -15,30 +16,66 @@ import (
|
|||
|
||||
type TimeFunc func() time.Time
|
||||
|
||||
type Parser struct {
|
||||
MetricName string
|
||||
HeaderRowCount int
|
||||
SkipRows int
|
||||
SkipColumns int
|
||||
Delimiter string
|
||||
Comment string
|
||||
TrimSpace bool
|
||||
ColumnNames []string
|
||||
ColumnTypes []string
|
||||
TagColumns []string
|
||||
MeasurementColumn string
|
||||
TimestampColumn string
|
||||
TimestampFormat string
|
||||
DefaultTags map[string]string
|
||||
type Config struct {
|
||||
ColumnNames []string `toml:"csv_column_names"`
|
||||
ColumnTypes []string `toml:"csv_column_types"`
|
||||
Comment string `toml:"csv_comment"`
|
||||
Delimiter string `toml:"csv_delimiter"`
|
||||
HeaderRowCount int `toml:"csv_header_row_count"`
|
||||
MeasurementColumn string `toml:"csv_measurement_column"`
|
||||
MetricName string `toml:"metric_name"`
|
||||
SkipColumns int `toml:"csv_skip_columns"`
|
||||
SkipRows int `toml:"csv_skip_rows"`
|
||||
TagColumns []string `toml:"csv_tag_columns"`
|
||||
TimestampColumn string `toml:"csv_timestamp_column"`
|
||||
TimestampFormat string `toml:"csv_timestamp_format"`
|
||||
Timezone string `toml:"csv_timezone"`
|
||||
TrimSpace bool `toml:"csv_trim_space"`
|
||||
|
||||
TimeFunc func() time.Time
|
||||
Timezone string
|
||||
DefaultTags map[string]string
|
||||
}
|
||||
|
||||
// Parser is a CSV parser, you should use NewParser to create a new instance.
|
||||
type Parser struct {
|
||||
*Config
|
||||
}
|
||||
|
||||
func NewParser(c *Config) (*Parser, error) {
|
||||
if c.HeaderRowCount == 0 && len(c.ColumnNames) == 0 {
|
||||
return nil, fmt.Errorf("`csv_header_row_count` must be defined if `csv_column_names` is not specified")
|
||||
}
|
||||
|
||||
if c.Delimiter != "" {
|
||||
runeStr := []rune(c.Delimiter)
|
||||
if len(runeStr) > 1 {
|
||||
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", c.Delimiter)
|
||||
}
|
||||
}
|
||||
|
||||
if c.Comment != "" {
|
||||
runeStr := []rune(c.Comment)
|
||||
if len(runeStr) > 1 {
|
||||
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", c.Comment)
|
||||
}
|
||||
}
|
||||
|
||||
if len(c.ColumnNames) > 0 && len(c.ColumnTypes) > 0 && len(c.ColumnNames) != len(c.ColumnTypes) {
|
||||
return nil, fmt.Errorf("csv_column_names field count doesn't match with csv_column_types")
|
||||
}
|
||||
|
||||
if c.TimeFunc == nil {
|
||||
c.TimeFunc = time.Now
|
||||
}
|
||||
|
||||
return &Parser{Config: c}, nil
|
||||
}
|
||||
|
||||
func (p *Parser) SetTimeFunc(fn TimeFunc) {
|
||||
p.TimeFunc = fn
|
||||
}
|
||||
|
||||
func (p *Parser) compile(r *bytes.Reader) (*csv.Reader, error) {
|
||||
func (p *Parser) compile(r io.Reader) (*csv.Reader, error) {
|
||||
csvReader := csv.NewReader(r)
|
||||
// ensures that the reader reads records of different lengths without an error
|
||||
csvReader.FieldsPerRecord = -1
|
||||
|
|
@ -60,7 +97,10 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
|||
}
|
||||
// skip first rows
|
||||
for i := 0; i < p.SkipRows; i++ {
|
||||
csvReader.Read()
|
||||
_, err := csvReader.Read()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
// if there is a header and nothing in DataColumns
|
||||
// set DataColumns to names extracted from the header
|
||||
|
|
@ -88,7 +128,10 @@ func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
|
|||
} else {
|
||||
// if columns are named, just skip header rows
|
||||
for i := 0; i < p.HeaderRowCount; i++ {
|
||||
csvReader.Read()
|
||||
_, err := csvReader.Read()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -208,9 +251,11 @@ outer:
|
|||
|
||||
// will default to plugin name
|
||||
measurementName := p.MetricName
|
||||
if p.MeasurementColumn != "" {
|
||||
if recordFields[p.MeasurementColumn] != nil && recordFields[p.MeasurementColumn] != "" {
|
||||
measurementName = fmt.Sprintf("%v", recordFields[p.MeasurementColumn])
|
||||
}
|
||||
}
|
||||
|
||||
metricTime, err := parseTimestamp(p.TimeFunc, recordFields, p.TimestampColumn, p.TimestampFormat, p.Timezone)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -16,22 +16,28 @@ var DefaultTime = func() time.Time {
|
|||
}
|
||||
|
||||
func TestBasicCSV(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
TagColumns: []string{"third"},
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err := p.ParseLine("1.4,true,hi")
|
||||
_, err = p.ParseLine("1.4,true,hi")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestHeaderConcatenationCSV(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 2,
|
||||
MeasurementColumn: "3",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `first,second
|
||||
1,2,3
|
||||
3.4,70,test_name`
|
||||
|
|
@ -42,12 +48,15 @@ func TestHeaderConcatenationCSV(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestHeaderOverride(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `line1,line2,line3
|
||||
3.4,70,test_name`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
|
|
@ -56,14 +65,16 @@ func TestHeaderOverride(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTimestamp(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimestampFormat: "02/01/06 03:04:05 PM",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 04:05:06 PM,70,test_name
|
||||
07/11/09 04:05:06 PM,80,test_name2`
|
||||
|
|
@ -75,29 +86,35 @@ func TestTimestamp(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTimestampError(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 04:05:06 PM,70,test_name
|
||||
07/11/09 04:05:06 PM,80,test_name2`
|
||||
_, err := p.Parse([]byte(testCSV))
|
||||
_, err = p.Parse([]byte(testCSV))
|
||||
require.Equal(t, fmt.Errorf("timestamp format must be specified"), err)
|
||||
}
|
||||
|
||||
func TestTimestampUnixFormat(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimestampFormat: "unix",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `line1,line2,line3
|
||||
1243094706,70,test_name
|
||||
1257609906,80,test_name2`
|
||||
|
|
@ -108,14 +125,17 @@ func TestTimestampUnixFormat(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTimestampUnixMSFormat(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimestampColumn: "first",
|
||||
TimestampFormat: "unix_ms",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `line1,line2,line3
|
||||
1243094706123,70,test_name
|
||||
1257609906123,80,test_name2`
|
||||
|
|
@ -126,12 +146,15 @@ func TestTimestampUnixMSFormat(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestQuotedCharacter(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
testCSV := `line1,line2,line3
|
||||
"3,4",70,test_name`
|
||||
|
|
@ -141,13 +164,16 @@ func TestQuotedCharacter(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestDelimiter(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
Delimiter: "%",
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
testCSV := `line1%line2%line3
|
||||
3,4%70%test_name`
|
||||
|
|
@ -157,13 +183,16 @@ func TestDelimiter(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestValueConversion(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 0,
|
||||
Delimiter: ",",
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `3.3,4,true,hello`
|
||||
|
||||
expectedTags := make(map[string]string)
|
||||
|
|
@ -199,13 +228,16 @@ func TestValueConversion(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestSkipComment(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 0,
|
||||
Comment: "#",
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `#3.3,4,true,hello
|
||||
4,9.9,true,name_this`
|
||||
|
||||
|
|
@ -222,13 +254,16 @@ func TestSkipComment(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTrimSpace(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 0,
|
||||
TrimSpace: true,
|
||||
ColumnNames: []string{"first", "second", "third", "fourth"},
|
||||
MetricName: "test_value",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := ` 3.3, 4, true,hello`
|
||||
|
||||
expectedFields := map[string]interface{}{
|
||||
|
|
@ -244,12 +279,15 @@ func TestTrimSpace(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTrimSpaceDelimitedBySpace(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
Delimiter: " ",
|
||||
HeaderRowCount: 1,
|
||||
TrimSpace: true,
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := ` first second third fourth
|
||||
abcdefgh 0 2 false
|
||||
abcdef 3.3 4 true
|
||||
|
|
@ -268,13 +306,15 @@ abcdefgh 0 2 false
|
|||
}
|
||||
|
||||
func TestSkipRows(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
SkipRows: 1,
|
||||
TagColumns: []string{"line1"},
|
||||
MeasurementColumn: "line3",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
testCSV := `garbage nonsense
|
||||
line1,line2,line3
|
||||
hello,80,test_name2`
|
||||
|
|
@ -289,11 +329,14 @@ hello,80,test_name2`
|
|||
}
|
||||
|
||||
func TestSkipColumns(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
SkipColumns: 1,
|
||||
ColumnNames: []string{"line1", "line2"},
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `hello,80,test_name`
|
||||
|
||||
expectedFields := map[string]interface{}{
|
||||
|
|
@ -306,11 +349,14 @@ func TestSkipColumns(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestSkipColumnsWithHeader(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
SkipColumns: 1,
|
||||
HeaderRowCount: 2,
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `col,col,col
|
||||
1,2,3
|
||||
trash,80,test_name`
|
||||
|
|
@ -322,11 +368,14 @@ func TestSkipColumnsWithHeader(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestParseStream(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
csvHeader := "a,b,c"
|
||||
csvBody := "1,2,3"
|
||||
|
|
@ -349,13 +398,16 @@ func TestParseStream(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTimestampUnixFloatPrecision(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
ColumnNames: []string{"time", "value"},
|
||||
TimestampColumn: "time",
|
||||
TimestampFormat: "unix",
|
||||
TimeFunc: DefaultTime,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
data := `1551129661.95456123352050781250,42`
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
|
|
@ -375,14 +427,17 @@ func TestTimestampUnixFloatPrecision(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestSkipMeasurementColumn(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
TimestampColumn: "timestamp",
|
||||
TimestampFormat: "unix",
|
||||
TimeFunc: DefaultTime,
|
||||
TrimSpace: true,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
data := `id,value,timestamp
|
||||
1,5,1551129661.954561233`
|
||||
|
||||
|
|
@ -404,14 +459,17 @@ func TestSkipMeasurementColumn(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestSkipTimestampColumn(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
TimestampColumn: "timestamp",
|
||||
TimestampFormat: "unix",
|
||||
TimeFunc: DefaultTime,
|
||||
TrimSpace: true,
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
data := `id,value,timestamp
|
||||
1,5,1551129661.954561233`
|
||||
|
||||
|
|
@ -433,7 +491,8 @@ func TestSkipTimestampColumn(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestTimestampTimezone(t *testing.T) {
|
||||
p := Parser{
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"first", "second", "third"},
|
||||
MeasurementColumn: "third",
|
||||
|
|
@ -441,7 +500,9 @@ func TestTimestampTimezone(t *testing.T) {
|
|||
TimestampFormat: "02/01/06 03:04:05 PM",
|
||||
TimeFunc: DefaultTime,
|
||||
Timezone: "Asia/Jakarta",
|
||||
}
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `line1,line2,line3
|
||||
23/05/09 11:05:06 PM,70,test_name
|
||||
07/11/09 11:05:06 PM,80,test_name2`
|
||||
|
|
@ -451,3 +512,84 @@ func TestTimestampTimezone(t *testing.T) {
|
|||
require.Equal(t, metrics[0].Time().UnixNano(), int64(1243094706000000000))
|
||||
require.Equal(t, metrics[1].Time().UnixNano(), int64(1257609906000000000))
|
||||
}
|
||||
|
||||
func TestEmptyMeasurementName(t *testing.T) {
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"", "b"},
|
||||
MeasurementColumn: "",
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `,b
|
||||
1,2`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric("csv",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"b": 2,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestNumericMeasurementName(t *testing.T) {
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"a", "b"},
|
||||
MeasurementColumn: "a",
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `a,b
|
||||
1,2`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric("1",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"b": 2,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestStaticMeasurementName(t *testing.T) {
|
||||
p, err := NewParser(
|
||||
&Config{
|
||||
MetricName: "csv",
|
||||
HeaderRowCount: 1,
|
||||
ColumnNames: []string{"a", "b"},
|
||||
},
|
||||
)
|
||||
require.NoError(t, err)
|
||||
testCSV := `a,b
|
||||
1,2`
|
||||
metrics, err := p.Parse([]byte(testCSV))
|
||||
require.NoError(t, err)
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric("csv",
|
||||
map[string]string{},
|
||||
map[string]interface{}{
|
||||
"a": 1,
|
||||
"b": 2,
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package parsers
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/collectd"
|
||||
|
|
@ -206,21 +205,25 @@ func NewParser(config *Config) (Parser, error) {
|
|||
config.GrokTimezone,
|
||||
config.GrokUniqueTimestamp)
|
||||
case "csv":
|
||||
parser, err = newCSVParser(config.MetricName,
|
||||
config.CSVHeaderRowCount,
|
||||
config.CSVSkipRows,
|
||||
config.CSVSkipColumns,
|
||||
config.CSVDelimiter,
|
||||
config.CSVComment,
|
||||
config.CSVTrimSpace,
|
||||
config.CSVColumnNames,
|
||||
config.CSVColumnTypes,
|
||||
config.CSVTagColumns,
|
||||
config.CSVMeasurementColumn,
|
||||
config.CSVTimestampColumn,
|
||||
config.CSVTimestampFormat,
|
||||
config.CSVTimezone,
|
||||
config.DefaultTags)
|
||||
config := &csv.Config{
|
||||
MetricName: config.MetricName,
|
||||
HeaderRowCount: config.CSVHeaderRowCount,
|
||||
SkipRows: config.CSVSkipRows,
|
||||
SkipColumns: config.CSVSkipColumns,
|
||||
Delimiter: config.CSVDelimiter,
|
||||
Comment: config.CSVComment,
|
||||
TrimSpace: config.CSVTrimSpace,
|
||||
ColumnNames: config.CSVColumnNames,
|
||||
ColumnTypes: config.CSVColumnTypes,
|
||||
TagColumns: config.CSVTagColumns,
|
||||
MeasurementColumn: config.CSVMeasurementColumn,
|
||||
TimestampColumn: config.CSVTimestampColumn,
|
||||
TimestampFormat: config.CSVTimestampFormat,
|
||||
Timezone: config.CSVTimezone,
|
||||
DefaultTags: config.DefaultTags,
|
||||
}
|
||||
|
||||
return csv.NewParser(config)
|
||||
case "logfmt":
|
||||
parser, err = NewLogFmtParser(config.MetricName, config.DefaultTags)
|
||||
case "form_urlencoded":
|
||||
|
|
@ -235,66 +238,6 @@ func NewParser(config *Config) (Parser, error) {
|
|||
return parser, err
|
||||
}
|
||||
|
||||
func newCSVParser(metricName string,
|
||||
headerRowCount int,
|
||||
skipRows int,
|
||||
skipColumns int,
|
||||
delimiter string,
|
||||
comment string,
|
||||
trimSpace bool,
|
||||
columnNames []string,
|
||||
columnTypes []string,
|
||||
tagColumns []string,
|
||||
nameColumn string,
|
||||
timestampColumn string,
|
||||
timestampFormat string,
|
||||
timezone string,
|
||||
defaultTags map[string]string) (Parser, error) {
|
||||
|
||||
if headerRowCount == 0 && len(columnNames) == 0 {
|
||||
return nil, fmt.Errorf("`csv_header_row_count` must be defined if `csv_column_names` is not specified")
|
||||
}
|
||||
|
||||
if delimiter != "" {
|
||||
runeStr := []rune(delimiter)
|
||||
if len(runeStr) > 1 {
|
||||
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", delimiter)
|
||||
}
|
||||
}
|
||||
|
||||
if comment != "" {
|
||||
runeStr := []rune(comment)
|
||||
if len(runeStr) > 1 {
|
||||
return nil, fmt.Errorf("csv_delimiter must be a single character, got: %s", comment)
|
||||
}
|
||||
}
|
||||
|
||||
if len(columnNames) > 0 && len(columnTypes) > 0 && len(columnNames) != len(columnTypes) {
|
||||
return nil, fmt.Errorf("csv_column_names field count doesn't match with csv_column_types")
|
||||
}
|
||||
|
||||
parser := &csv.Parser{
|
||||
MetricName: metricName,
|
||||
HeaderRowCount: headerRowCount,
|
||||
SkipRows: skipRows,
|
||||
SkipColumns: skipColumns,
|
||||
Delimiter: delimiter,
|
||||
Comment: comment,
|
||||
TrimSpace: trimSpace,
|
||||
ColumnNames: columnNames,
|
||||
ColumnTypes: columnTypes,
|
||||
TagColumns: tagColumns,
|
||||
MeasurementColumn: nameColumn,
|
||||
TimestampColumn: timestampColumn,
|
||||
TimestampFormat: timestampFormat,
|
||||
Timezone: timezone,
|
||||
DefaultTags: defaultTags,
|
||||
TimeFunc: time.Now,
|
||||
}
|
||||
|
||||
return parser, nil
|
||||
}
|
||||
|
||||
func newGrokParser(metricName string,
|
||||
patterns []string, nPatterns []string,
|
||||
cPatterns string, cPatternFiles []string,
|
||||
|
|
|
|||
Loading…
Reference in New Issue