feat(inputs.tail): add option to preserve newlines for multiline data (#12281)

This commit is contained in:
Sven Rebhan 2022-11-28 16:18:57 +01:00 committed by GitHub
parent 2e99a1258e
commit ede6cfb920
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 119 additions and 22 deletions

View File

@ -104,6 +104,11 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## When handling quotes, escaped quotes (e.g. \") are handled correctly. ## When handling quotes, escaped quotes (e.g. \") are handled correctly.
#quotation = "ignore" #quotation = "ignore"
## The preserve_newline option can be true or false (defaults to false).
## If true, the newline character is preserved for multiline elements,
## this is useful to preserve message-structure e.g. for logging outputs.
#preserver_newline = false
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s. #After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
#timeout = 5s #timeout = 5s
``` ```

View File

@ -23,11 +23,12 @@ type Multiline struct {
} }
type MultilineConfig struct { type MultilineConfig struct {
Pattern string `toml:"pattern"` Pattern string `toml:"pattern"`
MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"` MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"`
InvertMatch bool `toml:"invert_match"` InvertMatch bool `toml:"invert_match"`
Quotation string `toml:"quotation"` PreserveNewline bool `toml:"preserve_newline"`
Timeout *config.Duration `toml:"timeout"` Quotation string `toml:"quotation"`
Timeout *config.Duration `toml:"timeout"`
} }
const ( const (
@ -80,12 +81,11 @@ func (m *Multiline) IsEnabled() bool {
} }
func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string { func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string {
if m.matchQuotation(text) { if m.matchQuotation(text) || m.matchString(text) {
// Ignore the returned error as we cannot do anything about it anyway // Restore the newline removed by tail's scanner
_, _ = buffer.WriteString(text + "\n") if buffer.Len() > 0 && m.config.PreserveNewline {
return "" _, _ = buffer.WriteString("\n")
} }
if m.matchString(text) {
// Ignore the returned error as we cannot do anything about it anyway // Ignore the returned error as we cannot do anything about it anyway
_, _ = buffer.WriteString(text) _, _ = buffer.WriteString(text)
return "" return ""
@ -101,6 +101,9 @@ func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string {
} else { } else {
// Next // Next
if buffer.Len() > 0 { if buffer.Len() > 0 {
if m.config.PreserveNewline {
_, _ = buffer.WriteString("\n")
}
if _, err := buffer.WriteString(text); err != nil { if _, err := buffer.WriteString(text); err != nil {
return "" return ""
} }

View File

@ -239,7 +239,7 @@ func TestMultilineWhat(t *testing.T) {
require.Equal(t, MultilineMatchWhichLine(-1), w7) require.Equal(t, MultilineMatchWhichLine(-1), w7)
} }
func TestMultiLineQuoted(t *testing.T) { func TestMultilineQuoted(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
quotation string quotation string
@ -281,8 +281,9 @@ func TestMultiLineQuoted(t *testing.T) {
} }
c := &MultilineConfig{ c := &MultilineConfig{
MatchWhichLine: Next, MatchWhichLine: Next,
Quotation: tt.quotation, Quotation: tt.quotation,
PreserveNewline: true,
} }
m, err := c.NewMultiline() m, err := c.NewMultiline()
require.NoError(t, err) require.NoError(t, err)
@ -303,13 +304,16 @@ func TestMultiLineQuoted(t *testing.T) {
} }
result = append(result, text) result = append(result, text)
} }
if text := m.Flush(&buffer); text != "" {
result = append(result, text)
}
require.EqualValues(t, expected, result) require.EqualValues(t, expected, result)
}) })
} }
} }
func TestMultiLineQuotedError(t *testing.T) { func TestMultilineQuotedError(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
filename string filename string
@ -332,15 +336,16 @@ func TestMultiLineQuotedError(t *testing.T) {
filename: "multiline_quoted_missing_close.csv", filename: "multiline_quoted_missing_close.csv",
quotation: "single-quotes", quotation: "single-quotes",
quote: `'`, quote: `'`,
expected: nil, expected: []string{"1660819827411,2,'some text all quoted,B\n1660819827410,1,some text without quotes,A"},
}, },
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
c := &MultilineConfig{ c := &MultilineConfig{
MatchWhichLine: Next, MatchWhichLine: Next,
Quotation: tt.quotation, Quotation: tt.quotation,
PreserveNewline: true,
} }
m, err := c.NewMultiline() m, err := c.NewMultiline()
require.NoError(t, err) require.NoError(t, err)
@ -361,6 +366,84 @@ func TestMultiLineQuotedError(t *testing.T) {
} }
result = append(result, text) result = append(result, text)
} }
if text := m.Flush(&buffer); text != "" {
result = append(result, text)
}
require.EqualValues(t, tt.expected, result)
})
}
}
func TestMultilineNewline(t *testing.T) {
tests := []struct {
name string
filename string
cfg *MultilineConfig
expected []string
}{
{
name: "do not preserve newline",
cfg: &MultilineConfig{
Pattern: `\[[0-9]{2}/[A-Za-z]{3}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2} \+[0-9]{4}\]`,
InvertMatch: true,
},
filename: "test_multiline.log",
expected: []string{
`[04/Jun/2016:12:41:45 +0100] DEBUG HelloExample: This is debug`,
`[04/Jun/2016:12:41:48 +0100] INFO HelloExample: This is info`,
"[04/Jun/2016:12:41:46 +0100] ERROR HelloExample: Sorry, something wrong! " +
"java.lang.ArithmeticException: / by zero" +
"\tat com.foo.HelloExample2.divide(HelloExample2.java:24)" +
"\tat com.foo.HelloExample2.main(HelloExample2.java:14)",
`[04/Jun/2016:12:41:48 +0100] WARN HelloExample: This is warn`,
},
},
{
name: "preserve newline",
cfg: &MultilineConfig{
Pattern: `\[[0-9]{2}/[A-Za-z]{3}/[0-9]{4}:[0-9]{2}:[0-9]{2}:[0-9]{2} \+[0-9]{4}\]`,
InvertMatch: true,
PreserveNewline: true,
},
filename: "test_multiline.log",
expected: []string{
`[04/Jun/2016:12:41:45 +0100] DEBUG HelloExample: This is debug`,
`[04/Jun/2016:12:41:48 +0100] INFO HelloExample: This is info`,
`[04/Jun/2016:12:41:46 +0100] ERROR HelloExample: Sorry, something wrong!` + ` ` + `
java.lang.ArithmeticException: / by zero
at com.foo.HelloExample2.divide(HelloExample2.java:24)
at com.foo.HelloExample2.main(HelloExample2.java:14)`,
`[04/Jun/2016:12:41:48 +0100] WARN HelloExample: This is warn`,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
m, err := tt.cfg.NewMultiline()
require.NoError(t, err)
f, err := os.Open(filepath.Join("testdata", tt.filename))
require.NoError(t, err)
scanner := bufio.NewScanner(f)
var buffer bytes.Buffer
var result []string
for scanner.Scan() {
line := scanner.Text()
text := m.ProcessLine(line, &buffer)
if text == "" {
continue
}
result = append(result, text)
}
if text := m.Flush(&buffer); text != "" {
result = append(result, text)
}
require.EqualValues(t, tt.expected, result) require.EqualValues(t, tt.expected, result)
}) })
} }
@ -368,9 +451,10 @@ func TestMultiLineQuotedError(t *testing.T) {
func TestMultiLineQuotedAndPattern(t *testing.T) { func TestMultiLineQuotedAndPattern(t *testing.T) {
c := &MultilineConfig{ c := &MultilineConfig{
Pattern: "=>$", Pattern: "=>$",
MatchWhichLine: Next, MatchWhichLine: Next,
Quotation: "double-quotes", Quotation: "double-quotes",
PreserveNewline: true,
} }
m, err := c.NewMultiline() m, err := c.NewMultiline()
require.NoError(t, err, "Configuration was OK.") require.NoError(t, err, "Configuration was OK.")
@ -397,7 +481,7 @@ func TestMultiLineQuotedAndPattern(t *testing.T) {
require.NotZero(t, buffer.Len()) require.NotZero(t, buffer.Len())
text = m.ProcessLine("4", &buffer) text = m.ProcessLine("4", &buffer)
require.Equal(t, "1=>2=>\"a quoted\nmultiline string\"=>3=>4", text) require.Equal(t, "1=>\n2=>\n\"a quoted\nmultiline string\"=>\n3=>\n4", text)
require.Zero(t, buffer.Len()) require.Zero(t, buffer.Len())
text = m.ProcessLine("5", &buffer) text = m.ProcessLine("5", &buffer)

View File

@ -71,5 +71,10 @@
## When handling quotes, escaped quotes (e.g. \") are handled correctly. ## When handling quotes, escaped quotes (e.g. \") are handled correctly.
#quotation = "ignore" #quotation = "ignore"
## The preserve_newline option can be true or false (defaults to false).
## If true, the newline character is preserved for multiline elements,
## this is useful to preserve message-structure e.g. for logging outputs.
#preserver_newline = false
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s. #After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
#timeout = 5s #timeout = 5s