feat(parsers.grok): add option to allow multiline messages (#12320)

This commit is contained in:
Sven Rebhan 2022-12-07 21:05:02 +01:00 committed by GitHub
parent d9d2b01586
commit 6b08068d6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 49 additions and 0 deletions

View File

@ -124,6 +124,9 @@ Debug](https://grokdebug.herokuapp.com) application quite useful!
## When set to "disable" timestamp will not incremented if there is a
## duplicate.
# grok_unique_timestamp = "auto"
## Enable multiline messages to be processed.
# grok_multiline = false
```
### Timestamp Examples

View File

@ -37,4 +37,7 @@ COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} "%{DATA:referrer}" "%{DATA:agent}"
HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg}
HTTPD24_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{WORD:module}:%{LOGLEVEL:loglevel:tag}\] \[pid %{POSINT:pid:int}:tid %{NUMBER:tid:int}\]( \(%{POSINT:proxy_errorcode:int}\)%{DATA:proxy_errormessage}:)?( \[client %{IPORHOST:client}:%{POSINT:clientport}\])? %{DATA:errorcode}: %{GREEDYDATA:message}
HTTPD_ERRORLOG %{HTTPD20_ERRORLOG}|%{HTTPD24_ERRORLOG}
# DATA spanning multiple lines
MULTILINEDATA (.|\n)*
`

View File

@ -75,6 +75,7 @@ type Parser struct {
NamedPatterns []string `toml:"grok_named_patterns"`
CustomPatterns string `toml:"grok_custom_patterns"`
CustomPatternFiles []string `toml:"grok_custom_pattern_files"`
Multiline bool `toml:"grok_multiline"`
Measurement string `toml:"-"`
DefaultTags map[string]string `toml:"-"`
Log telegraf.Logger `toml:"-"`
@ -381,6 +382,15 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) {
func (p *Parser) Parse(buf []byte) ([]telegraf.Metric, error) {
metrics := make([]telegraf.Metric, 0)
if p.Multiline {
m, err := p.ParseLine(string(buf))
if err != nil {
return nil, err
}
metrics = append(metrics, m)
return metrics, nil
}
scanner := bufio.NewScanner(bytes.NewReader(buf))
for scanner.Scan() {
line := scanner.Text()
@ -572,6 +582,7 @@ func (p *Parser) InitFromConfig(config *parsers.Config) error {
p.Patterns = config.GrokPatterns
p.Timezone = config.GrokTimezone
p.UniqueTimestamp = config.GrokUniqueTimestamp
p.Multiline = config.GrokMultiline
return p.Init()
}

View File

@ -2,11 +2,14 @@ package grok
import (
"log"
"os"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/metric"
"github.com/influxdata/telegraf/testutil"
)
@ -996,6 +999,31 @@ func TestNewlineInPatterns(t *testing.T) {
require.NotNil(t, m)
}
func TestMultilinePatterns(t *testing.T) {
buf, err := os.ReadFile("./testdata/test_multiline.log")
require.NoError(t, err)
expected := []telegraf.Metric{
metric.New(
"multiline",
map[string]string{},
map[string]interface{}{"text": "Error A long and\n multiline\n message"},
time.Date(2022, time.December, 1, 12, 41, 45, 0, time.UTC),
),
}
p := &Parser{
Measurement: "multiline",
Patterns: []string{`%{TIMESTAMP_ISO8601:timestamp:ts-rfc3339}\s%{MULTILINEDATA:text}`},
Multiline: true,
Log: testutil.Logger{},
}
require.NoError(t, p.Compile())
actual, err := p.Parse(buf)
require.NoError(t, err)
testutil.RequireMetricsEqual(t, expected, actual)
}
func TestSyslogTimestamp(t *testing.T) {
currentYear := time.Now().Year()
tests := []struct {

View File

@ -0,0 +1,3 @@
2022-12-01T12:41:45Z Error A long and
multiline
message

View File

@ -139,6 +139,7 @@ type Config struct {
GrokCustomPatternFiles []string `toml:"grok_custom_pattern_files"`
GrokTimezone string `toml:"grok_timezone"`
GrokUniqueTimestamp string `toml:"grok_unique_timestamp"`
GrokMultiline bool `toml:"grok_multiline"`
//csv configuration
CSVColumnNames []string `toml:"csv_column_names"`