diff --git a/plugins/inputs/tail/README.md b/plugins/inputs/tail/README.md index 362d9e2b0..4163bc2bf 100644 --- a/plugins/inputs/tail/README.md +++ b/plugins/inputs/tail/README.md @@ -95,6 +95,15 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true) #invert_match = false + ## The handling method for quoted text (defaults to 'ignore'). + ## The following methods are available: + ## ignore -- do not consider quotation (default) + ## single-quotes -- consider text quoted by single quotes (') + ## double-quotes -- consider text quoted by double quotes (") + ## backticks -- consider text quoted by backticks (`) + ## When handling quotes, escaped quotes (e.g. \") are handled correctly. + #quotation = "ignore" + #After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s. #timeout = 5s ``` @@ -103,3 +112,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. Metrics are produced according to the `data_format` option. Additionally a tag labeled `path` is added to the metric containing the filename being tailed. + +## Example Output + +There is no predefined metric format, so output depends on plugin input. diff --git a/plugins/inputs/tail/multiline.go b/plugins/inputs/tail/multiline.go index c8d5d4f6d..2b03aa11a 100644 --- a/plugins/inputs/tail/multiline.go +++ b/plugins/inputs/tail/multiline.go @@ -2,6 +2,7 @@ package tail import ( "bytes" + "errors" "fmt" "regexp" "strings" @@ -17,13 +18,16 @@ type Multiline struct { config *MultilineConfig enabled bool patternRegexp *regexp.Regexp + quote byte + inQuote bool } type MultilineConfig struct { - Pattern string + Pattern string `toml:"pattern"` MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"` - InvertMatch bool - Timeout *config.Duration + InvertMatch bool `toml:"invert_match"` + Quotation string `toml:"quotation"` + Timeout *config.Duration `toml:"timeout"` } const ( @@ -34,25 +38,41 @@ const ( ) func (m *MultilineConfig) NewMultiline() (*Multiline, error) { - enabled := false var r *regexp.Regexp - var err error if m.Pattern != "" { - enabled = true + var err error if r, err = regexp.Compile(m.Pattern); err != nil { return nil, err } - if m.Timeout == nil || time.Duration(*m.Timeout).Nanoseconds() == int64(0) { - d := config.Duration(5 * time.Second) - m.Timeout = &d - } + } + + var quote byte + switch m.Quotation { + case "", "ignore": + m.Quotation = "ignore" + case "single-quotes": + quote = '\'' + case "double-quotes": + quote = '"' + case "backticks": + quote = '`' + default: + return nil, errors.New("invalid 'quotation' setting") + } + + enabled := m.Pattern != "" || quote != 0 + if m.Timeout == nil || time.Duration(*m.Timeout).Nanoseconds() == int64(0) { + d := config.Duration(5 * time.Second) + m.Timeout = &d } return &Multiline{ config: m, enabled: enabled, - patternRegexp: r}, nil + patternRegexp: r, + quote: quote, + }, nil } func (m *Multiline) IsEnabled() bool { @@ -60,10 +80,14 @@ func (m *Multiline) IsEnabled() bool { } func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string { + if m.matchQuotation(text) { + // Ignore the returned error as we cannot do anything about it anyway + _, _ = buffer.WriteString(text + "\n") + return "" + } if m.matchString(text) { // Ignore the returned error as we cannot do anything about it anyway - //nolint:errcheck,revive - buffer.WriteString(text) + _, _ = buffer.WriteString(text) return "" } @@ -97,8 +121,39 @@ func (m *Multiline) Flush(buffer *bytes.Buffer) string { return text } +func (m *Multiline) matchQuotation(text string) bool { + if m.config.Quotation == "ignore" { + return false + } + escaped := 0 + count := 0 + for i := 0; i < len(text); i++ { + if text[i] == '\\' { + escaped++ + continue + } + + // If we do encounter a backslash-quote combination, we interpret this + // as an escaped-quoted and should not count the quote. However, + // backslash-backslash combinations (or any even number of backslashes) + // are interpreted as a literal backslash not escaping the quote. + if text[i] == m.quote && escaped%2 == 0 { + count++ + } + // If we encounter any non-quote, non-backslash character we can + // safely reset the escape state. + escaped = 0 + } + even := count%2 == 0 + m.inQuote = (m.inQuote && even) || (!m.inQuote && !even) + return m.inQuote +} + func (m *Multiline) matchString(text string) bool { - return m.patternRegexp.MatchString(text) != m.config.InvertMatch + if m.patternRegexp != nil { + return m.patternRegexp.MatchString(text) != m.config.InvertMatch + } + return false } func (w MultilineMatchWhichLine) String() string { diff --git a/plugins/inputs/tail/multiline_test.go b/plugins/inputs/tail/multiline_test.go index 70111f238..936b5df47 100644 --- a/plugins/inputs/tail/multiline_test.go +++ b/plugins/inputs/tail/multiline_test.go @@ -1,7 +1,11 @@ package tail import ( + "bufio" "bytes" + "fmt" + "os" + "path/filepath" "testing" "time" @@ -234,3 +238,169 @@ func TestMultilineWhat(t *testing.T) { require.Error(t, w7.UnmarshalTOML([]byte(`nope`))) require.Equal(t, MultilineMatchWhichLine(-1), w7) } + +func TestMultiLineQuoted(t *testing.T) { + tests := []struct { + name string + quotation string + quote string + filename string + }{ + { + name: "single-quotes", + quotation: "single-quotes", + quote: `'`, + filename: "multiline_quoted_single.csv", + }, + { + name: "double-quotes", + quotation: "double-quotes", + quote: `"`, + filename: "multiline_quoted_double.csv", + }, + { + name: "backticks", + quotation: "backticks", + quote: "`", + filename: "multiline_quoted_backticks.csv", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + expected := []string{ + `1660819827410,1,some text without quotes,A`, + fmt.Sprintf("1660819827411,1,%ssome text all quoted%s,A", tt.quote, tt.quote), + fmt.Sprintf("1660819827412,1,%ssome text all quoted\nbut wrapped%s,A", tt.quote, tt.quote), + fmt.Sprintf("1660819827420,2,some text with %squotes%s,B", tt.quote, tt.quote), + "1660819827430,3,some text with 'multiple \"quotes\" in `one` line',C", + fmt.Sprintf("1660819827440,4,some multiline text with %squotes\n", tt.quote) + + fmt.Sprintf("spanning \\%smultiple\\%s\n", tt.quote, tt.quote) + + fmt.Sprintf("lines%s but do not %send\ndirectly%s,D", tt.quote, tt.quote, tt.quote), + fmt.Sprintf("1660819827450,5,all of %sthis%s should %sbasically%s work...,E", tt.quote, tt.quote, tt.quote, tt.quote), + } + + c := &MultilineConfig{ + MatchWhichLine: Next, + Quotation: tt.quotation, + } + m, err := c.NewMultiline() + require.NoError(t, err) + + f, err := os.Open(filepath.Join("testdata", tt.filename)) + require.NoError(t, err) + + scanner := bufio.NewScanner(f) + + var buffer bytes.Buffer + var result []string + for scanner.Scan() { + line := scanner.Text() + + text := m.ProcessLine(line, &buffer) + if text == "" { + continue + } + result = append(result, text) + } + + require.EqualValues(t, expected, result) + }) + } +} + +func TestMultiLineQuotedError(t *testing.T) { + tests := []struct { + name string + filename string + quotation string + quote string + expected []string + }{ + { + name: "messed up quoting", + filename: "multiline_quoted_messed_up.csv", + quotation: "single-quotes", + quote: `'`, + expected: []string{ + "1660819827410,1,some text without quotes,A", + "1660819827411,1,'some text all quoted,A\n1660819827412,1,'some text all quoted", + "but wrapped,A"}, + }, + { + name: "missing closing quote", + filename: "multiline_quoted_missing_close.csv", + quotation: "single-quotes", + quote: `'`, + expected: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &MultilineConfig{ + MatchWhichLine: Next, + Quotation: tt.quotation, + } + m, err := c.NewMultiline() + require.NoError(t, err) + + f, err := os.Open(filepath.Join("testdata", tt.filename)) + require.NoError(t, err) + + scanner := bufio.NewScanner(f) + + var buffer bytes.Buffer + var result []string + for scanner.Scan() { + line := scanner.Text() + + text := m.ProcessLine(line, &buffer) + if text == "" { + continue + } + result = append(result, text) + } + require.EqualValues(t, tt.expected, result) + }) + } +} + +func TestMultiLineQuotedAndPattern(t *testing.T) { + c := &MultilineConfig{ + Pattern: "=>$", + MatchWhichLine: Next, + Quotation: "double-quotes", + } + m, err := c.NewMultiline() + require.NoError(t, err, "Configuration was OK.") + var buffer bytes.Buffer + + text := m.ProcessLine("1=>", &buffer) + require.Empty(t, text) + require.NotZero(t, buffer.Len()) + + text = m.ProcessLine("2=>", &buffer) + require.Empty(t, text) + require.NotZero(t, buffer.Len()) + + text = m.ProcessLine(`"a quoted`, &buffer) + require.Empty(t, text) + require.NotZero(t, buffer.Len()) + + text = m.ProcessLine(`multiline string"=>`, &buffer) + require.Empty(t, text) + require.NotZero(t, buffer.Len()) + + text = m.ProcessLine("3=>", &buffer) + require.Empty(t, text) + require.NotZero(t, buffer.Len()) + + text = m.ProcessLine("4", &buffer) + require.Equal(t, "1=>2=>\"a quoted\nmultiline string\"=>3=>4", text) + require.Zero(t, buffer.Len()) + + text = m.ProcessLine("5", &buffer) + require.Equal(t, "5", text) + require.Zero(t, buffer.Len()) +} diff --git a/plugins/inputs/tail/sample.conf b/plugins/inputs/tail/sample.conf index 07ed4a665..4a71cb846 100644 --- a/plugins/inputs/tail/sample.conf +++ b/plugins/inputs/tail/sample.conf @@ -62,5 +62,14 @@ ## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true) #invert_match = false + ## The handling method for quoted text (defaults to 'ignore'). + ## The following methods are available: + ## ignore -- do not consider quotation (default) + ## single-quotes -- consider text quoted by single quotes (') + ## double-quotes -- consider text quoted by double quotes (") + ## backticks -- consider text quoted by backticks (`) + ## When handling quotes, escaped quotes (e.g. \") are handled correctly. + #quotation = "ignore" + #After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s. #timeout = 5s diff --git a/plugins/inputs/tail/testdata/multiline_quoted_backticks.csv b/plugins/inputs/tail/testdata/multiline_quoted_backticks.csv new file mode 100644 index 000000000..6989d40ae --- /dev/null +++ b/plugins/inputs/tail/testdata/multiline_quoted_backticks.csv @@ -0,0 +1,12 @@ +1660819827410,1,some text without quotes,A +1660819827411,1,`some text all quoted`,A +1660819827412,1,`some text all quoted +but wrapped`,A +1660819827420,2,some text with `quotes`,B +1660819827430,3,some text with 'multiple "quotes" in `one` line',C +1660819827440,4,some multiline text with `quotes +spanning \`multiple\` +lines` but do not `end +directly`,D +1660819827450,5,all of `this` should `basically` work...,E + diff --git a/plugins/inputs/tail/testdata/multiline_quoted_double.csv b/plugins/inputs/tail/testdata/multiline_quoted_double.csv new file mode 100644 index 000000000..8a784dc4a --- /dev/null +++ b/plugins/inputs/tail/testdata/multiline_quoted_double.csv @@ -0,0 +1,12 @@ +1660819827410,1,some text without quotes,A +1660819827411,1,"some text all quoted",A +1660819827412,1,"some text all quoted +but wrapped",A +1660819827420,2,some text with "quotes",B +1660819827430,3,some text with 'multiple "quotes" in `one` line',C +1660819827440,4,some multiline text with "quotes +spanning \"multiple\" +lines" but do not "end +directly",D +1660819827450,5,all of "this" should "basically" work...,E + diff --git a/plugins/inputs/tail/testdata/multiline_quoted_messed_up.csv b/plugins/inputs/tail/testdata/multiline_quoted_messed_up.csv new file mode 100644 index 000000000..c3f3b8d94 --- /dev/null +++ b/plugins/inputs/tail/testdata/multiline_quoted_messed_up.csv @@ -0,0 +1,4 @@ +1660819827410,1,some text without quotes,A +1660819827411,1,'some text all quoted,A +1660819827412,1,'some text all quoted +but wrapped,A diff --git a/plugins/inputs/tail/testdata/multiline_quoted_missing_close.csv b/plugins/inputs/tail/testdata/multiline_quoted_missing_close.csv new file mode 100644 index 000000000..ddf9cc757 --- /dev/null +++ b/plugins/inputs/tail/testdata/multiline_quoted_missing_close.csv @@ -0,0 +1,2 @@ +1660819827411,2,'some text all quoted,B +1660819827410,1,some text without quotes,A diff --git a/plugins/inputs/tail/testdata/multiline_quoted_single.csv b/plugins/inputs/tail/testdata/multiline_quoted_single.csv new file mode 100644 index 000000000..ade0ada36 --- /dev/null +++ b/plugins/inputs/tail/testdata/multiline_quoted_single.csv @@ -0,0 +1,12 @@ +1660819827410,1,some text without quotes,A +1660819827411,1,'some text all quoted',A +1660819827412,1,'some text all quoted +but wrapped',A +1660819827420,2,some text with 'quotes',B +1660819827430,3,some text with 'multiple "quotes" in `one` line',C +1660819827440,4,some multiline text with 'quotes +spanning \'multiple\' +lines' but do not 'end +directly',D +1660819827450,5,all of 'this' should 'basically' work...,E +