Added ability to define skip values in csv parser (#8627)
This commit is contained in:
parent
3531e9ddc6
commit
3b87438dea
|
|
@ -1320,6 +1320,7 @@ func (c *Config) getParserConfig(name string, tbl *ast.Table) (*parsers.Config,
|
||||||
c.getFieldInt(tbl, "csv_skip_rows", &pc.CSVSkipRows)
|
c.getFieldInt(tbl, "csv_skip_rows", &pc.CSVSkipRows)
|
||||||
c.getFieldInt(tbl, "csv_skip_columns", &pc.CSVSkipColumns)
|
c.getFieldInt(tbl, "csv_skip_columns", &pc.CSVSkipColumns)
|
||||||
c.getFieldBool(tbl, "csv_trim_space", &pc.CSVTrimSpace)
|
c.getFieldBool(tbl, "csv_trim_space", &pc.CSVTrimSpace)
|
||||||
|
c.getFieldStringSlice(tbl, "csv_skip_values", &pc.CSVSkipValues)
|
||||||
|
|
||||||
c.getFieldStringSlice(tbl, "form_urlencoded_tag_keys", &pc.FormUrlencodedTagKeys)
|
c.getFieldStringSlice(tbl, "form_urlencoded_tag_keys", &pc.FormUrlencodedTagKeys)
|
||||||
|
|
||||||
|
|
@ -1413,7 +1414,7 @@ func (c *Config) missingTomlField(typ reflect.Type, key string) error {
|
||||||
"collectd_security_level", "collectd_typesdb", "collection_jitter", "csv_column_names",
|
"collectd_security_level", "collectd_typesdb", "collection_jitter", "csv_column_names",
|
||||||
"csv_column_types", "csv_comment", "csv_delimiter", "csv_header_row_count",
|
"csv_column_types", "csv_comment", "csv_delimiter", "csv_header_row_count",
|
||||||
"csv_measurement_column", "csv_skip_columns", "csv_skip_rows", "csv_tag_columns",
|
"csv_measurement_column", "csv_skip_columns", "csv_skip_rows", "csv_tag_columns",
|
||||||
"csv_timestamp_column", "csv_timestamp_format", "csv_timezone", "csv_trim_space",
|
"csv_timestamp_column", "csv_timestamp_format", "csv_timezone", "csv_trim_space", "csv_skip_values",
|
||||||
"data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path",
|
"data_format", "data_type", "delay", "drop", "drop_original", "dropwizard_metric_registry_path",
|
||||||
"dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path",
|
"dropwizard_tag_paths", "dropwizard_tags_path", "dropwizard_time_format", "dropwizard_time_path",
|
||||||
"fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys",
|
"fielddrop", "fieldpass", "flush_interval", "flush_jitter", "form_urlencoded_tag_keys",
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,10 @@ values.
|
||||||
## in case of there is no timezone information.
|
## in case of there is no timezone information.
|
||||||
## It follows the IANA Time Zone database.
|
## It follows the IANA Time Zone database.
|
||||||
csv_timezone = ""
|
csv_timezone = ""
|
||||||
|
|
||||||
|
## Indicates values to skip, such as an empty string value "".
|
||||||
|
## The field will be skipped entirely where it matches any values inserted here.
|
||||||
|
csv_skip_values = []
|
||||||
```
|
```
|
||||||
#### csv_timestamp_column, csv_timestamp_format
|
#### csv_timestamp_column, csv_timestamp_format
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ type Config struct {
|
||||||
TimestampFormat string `toml:"csv_timestamp_format"`
|
TimestampFormat string `toml:"csv_timestamp_format"`
|
||||||
Timezone string `toml:"csv_timezone"`
|
Timezone string `toml:"csv_timezone"`
|
||||||
TrimSpace bool `toml:"csv_trim_space"`
|
TrimSpace bool `toml:"csv_trim_space"`
|
||||||
|
SkipValues []string `toml:"csv_skip_values"`
|
||||||
|
|
||||||
gotColumnNames bool
|
gotColumnNames bool
|
||||||
|
|
||||||
|
|
@ -197,6 +198,13 @@ outer:
|
||||||
value = strings.Trim(value, " ")
|
value = strings.Trim(value, " ")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// don't record fields where the value matches a skip value
|
||||||
|
for _, s := range p.SkipValues {
|
||||||
|
if value == s {
|
||||||
|
continue outer
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for _, tagName := range p.TagColumns {
|
for _, tagName := range p.TagColumns {
|
||||||
if tagName == fieldName {
|
if tagName == fieldName {
|
||||||
tags[tagName] = value
|
tags[tagName] = value
|
||||||
|
|
|
||||||
|
|
@ -613,3 +613,57 @@ func TestStaticMeasurementName(t *testing.T) {
|
||||||
}
|
}
|
||||||
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSkipEmptyStringValue(t *testing.T) {
|
||||||
|
p, err := NewParser(
|
||||||
|
&Config{
|
||||||
|
MetricName: "csv",
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
ColumnNames: []string{"a", "b"},
|
||||||
|
SkipValues: []string{""},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
testCSV := `a,b
|
||||||
|
1,""`
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
expected := []telegraf.Metric{
|
||||||
|
testutil.MustMetric("csv",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"a": 1,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSkipSpecifiedStringValue(t *testing.T) {
|
||||||
|
p, err := NewParser(
|
||||||
|
&Config{
|
||||||
|
MetricName: "csv",
|
||||||
|
HeaderRowCount: 1,
|
||||||
|
ColumnNames: []string{"a", "b"},
|
||||||
|
SkipValues: []string{"MM"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
testCSV := `a,b
|
||||||
|
1,MM`
|
||||||
|
metrics, err := p.Parse([]byte(testCSV))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
expected := []telegraf.Metric{
|
||||||
|
testutil.MustMetric("csv",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"a": 1,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
testutil.RequireMetricsEqual(t, expected, metrics, testutil.IgnoreTime())
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -146,6 +146,7 @@ type Config struct {
|
||||||
CSVTimestampFormat string `toml:"csv_timestamp_format"`
|
CSVTimestampFormat string `toml:"csv_timestamp_format"`
|
||||||
CSVTimezone string `toml:"csv_timezone"`
|
CSVTimezone string `toml:"csv_timezone"`
|
||||||
CSVTrimSpace bool `toml:"csv_trim_space"`
|
CSVTrimSpace bool `toml:"csv_trim_space"`
|
||||||
|
CSVSkipValues []string `toml:"csv_skip_values"`
|
||||||
|
|
||||||
// FormData configuration
|
// FormData configuration
|
||||||
FormUrlencodedTagKeys []string `toml:"form_urlencoded_tag_keys"`
|
FormUrlencodedTagKeys []string `toml:"form_urlencoded_tag_keys"`
|
||||||
|
|
@ -222,6 +223,7 @@ func NewParser(config *Config) (Parser, error) {
|
||||||
TimestampFormat: config.CSVTimestampFormat,
|
TimestampFormat: config.CSVTimestampFormat,
|
||||||
Timezone: config.CSVTimezone,
|
Timezone: config.CSVTimezone,
|
||||||
DefaultTags: config.DefaultTags,
|
DefaultTags: config.DefaultTags,
|
||||||
|
SkipValues: config.CSVSkipValues,
|
||||||
}
|
}
|
||||||
|
|
||||||
return csv.NewParser(config)
|
return csv.NewParser(config)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue