From 6b08068d6d6397a7e1681836399289478a20e2c3 Mon Sep 17 00:00:00 2001 From: Sven Rebhan <36194019+srebhan@users.noreply.github.com> Date: Wed, 7 Dec 2022 21:05:02 +0100 Subject: [PATCH] feat(parsers.grok): add option to allow multiline messages (#12320) --- plugins/parsers/grok/README.md | 3 ++ plugins/parsers/grok/influx_patterns.go | 3 ++ plugins/parsers/grok/parser.go | 11 ++++++++ plugins/parsers/grok/parser_test.go | 28 +++++++++++++++++++ .../parsers/grok/testdata/test_multiline.log | 3 ++ plugins/parsers/registry.go | 1 + 6 files changed, 49 insertions(+) create mode 100644 plugins/parsers/grok/testdata/test_multiline.log diff --git a/plugins/parsers/grok/README.md b/plugins/parsers/grok/README.md index 73969a26b..b08f9059c 100644 --- a/plugins/parsers/grok/README.md +++ b/plugins/parsers/grok/README.md @@ -124,6 +124,9 @@ Debug](https://grokdebug.herokuapp.com) application quite useful! ## When set to "disable" timestamp will not incremented if there is a ## duplicate. # grok_unique_timestamp = "auto" + + ## Enable multiline messages to be processed. + # grok_multiline = false ``` ### Timestamp Examples diff --git a/plugins/parsers/grok/influx_patterns.go b/plugins/parsers/grok/influx_patterns.go index f2c6839ac..7857198df 100644 --- a/plugins/parsers/grok/influx_patterns.go +++ b/plugins/parsers/grok/influx_patterns.go @@ -37,4 +37,7 @@ COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} "%{DATA:referrer}" "%{DATA:agent}" HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message} HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG} + +# DATA spanning multiple lines +MULTILINEDATA (.|\n)* ` diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index bc44386fe..f3c4655ab 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -75,6 +75,7 @@ type Parser struct { NamedPatterns []string `toml:"grok_named_patterns"` CustomPatterns string `toml:"grok_custom_patterns"` CustomPatternFiles []string `toml:"grok_custom_pattern_files"` + Multiline bool `toml:"grok_multiline"` Measurement string `toml:"-"` DefaultTags map[string]string `toml:"-"` Log telegraf.Logger `toml:"-"` @@ -381,6 +382,15 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) { metrics := make([]telegraf.Metric, 0) + if p.Multiline { + m, err := p.ParseLine(string(buf)) + if err != nil { + return nil, err + } + metrics = append(metrics, m) + return metrics, nil + } + scanner := bufio.NewScanner(bytes.NewReader(buf)) for scanner.Scan() { line := scanner.Text() @@ -572,6 +582,7 @@ func (p *Parser) InitFromConfig(config *parsers.Config) error { p.Patterns = config.GrokPatterns p.Timezone = config.GrokTimezone p.UniqueTimestamp = config.GrokUniqueTimestamp + p.Multiline = config.GrokMultiline return p.Init() } diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index bdab23b86..06b98c874 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -2,11 +2,14 @@ package grok import ( "log" + "os" "testing" "time" "github.com/stretchr/testify/require" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" "github.com/influxdata/telegraf/testutil" ) @@ -996,6 +999,31 @@ func TestNewlineInPatterns(t *testing.T) { require.NotNil(t, m) } +func TestMultilinePatterns(t *testing.T) { + buf, err := os.ReadFile("./testdata/test_multiline.log") + require.NoError(t, err) + + expected := []telegraf.Metric{ + metric.New( + "multiline", + map[string]string{}, + map[string]interface{}{"text": "Error A long and\n multiline\n message"}, + time.Date(2022, time.December, 1, 12, 41, 45, 0, time.UTC), + ), + } + + p := &Parser{ + Measurement: "multiline", + Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-rfc3339}\s%{MULTILINEDATA:text}`}, + Multiline: true, + Log: testutil.Logger{}, + } + require.NoError(t, p.Compile()) + actual, err := p.Parse(buf) + require.NoError(t, err) + testutil.RequireMetricsEqual(t, expected, actual) +} + func TestSyslogTimestamp(t *testing.T) { currentYear := time.Now().Year() tests := []struct { diff --git a/plugins/parsers/grok/testdata/test_multiline.log b/plugins/parsers/grok/testdata/test_multiline.log new file mode 100644 index 000000000..d299fb134 --- /dev/null +++ b/plugins/parsers/grok/testdata/test_multiline.log @@ -0,0 +1,3 @@ +2022-12-01T12:41:45Z Error A long and + multiline + message \ No newline at end of file diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index 6de33c055..b53b9ec7e 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -139,6 +139,7 @@ type Config struct { GrokCustomPatternFiles []string `toml:"grok_custom_pattern_files"` GrokTimezone string `toml:"grok_timezone"` GrokUniqueTimestamp string `toml:"grok_unique_timestamp"` + GrokMultiline bool `toml:"grok_multiline"` //csv configuration CSVColumnNames []string `toml:"csv_column_names"`