feat(inputs.tail): Allow handling of quoted strings spanning multiple lines (#11762)
This commit is contained in:
parent
cdc622e9db
commit
9acbf23ebb
|
|
@ -95,6 +95,15 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true)
|
||||
#invert_match = false
|
||||
|
||||
## The handling method for quoted text (defaults to 'ignore').
|
||||
## The following methods are available:
|
||||
## ignore -- do not consider quotation (default)
|
||||
## single-quotes -- consider text quoted by single quotes (')
|
||||
## double-quotes -- consider text quoted by double quotes (")
|
||||
## backticks -- consider text quoted by backticks (`)
|
||||
## When handling quotes, escaped quotes (e.g. \") are handled correctly.
|
||||
#quotation = "ignore"
|
||||
|
||||
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
|
||||
#timeout = 5s
|
||||
```
|
||||
|
|
@ -103,3 +112,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
|
||||
Metrics are produced according to the `data_format` option. Additionally a
|
||||
tag labeled `path` is added to the metric containing the filename being tailed.
|
||||
|
||||
## Example Output
|
||||
|
||||
There is no predefined metric format, so output depends on plugin input.
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package tail
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
|
@ -17,13 +18,16 @@ type Multiline struct {
|
|||
config *MultilineConfig
|
||||
enabled bool
|
||||
patternRegexp *regexp.Regexp
|
||||
quote byte
|
||||
inQuote bool
|
||||
}
|
||||
|
||||
type MultilineConfig struct {
|
||||
Pattern string
|
||||
Pattern string `toml:"pattern"`
|
||||
MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"`
|
||||
InvertMatch bool
|
||||
Timeout *config.Duration
|
||||
InvertMatch bool `toml:"invert_match"`
|
||||
Quotation string `toml:"quotation"`
|
||||
Timeout *config.Duration `toml:"timeout"`
|
||||
}
|
||||
|
||||
const (
|
||||
|
|
@ -34,25 +38,41 @@ const (
|
|||
)
|
||||
|
||||
func (m *MultilineConfig) NewMultiline() (*Multiline, error) {
|
||||
enabled := false
|
||||
var r *regexp.Regexp
|
||||
var err error
|
||||
|
||||
if m.Pattern != "" {
|
||||
enabled = true
|
||||
var err error
|
||||
if r, err = regexp.Compile(m.Pattern); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if m.Timeout == nil || time.Duration(*m.Timeout).Nanoseconds() == int64(0) {
|
||||
d := config.Duration(5 * time.Second)
|
||||
m.Timeout = &d
|
||||
}
|
||||
}
|
||||
|
||||
var quote byte
|
||||
switch m.Quotation {
|
||||
case "", "ignore":
|
||||
m.Quotation = "ignore"
|
||||
case "single-quotes":
|
||||
quote = '\''
|
||||
case "double-quotes":
|
||||
quote = '"'
|
||||
case "backticks":
|
||||
quote = '`'
|
||||
default:
|
||||
return nil, errors.New("invalid 'quotation' setting")
|
||||
}
|
||||
|
||||
enabled := m.Pattern != "" || quote != 0
|
||||
if m.Timeout == nil || time.Duration(*m.Timeout).Nanoseconds() == int64(0) {
|
||||
d := config.Duration(5 * time.Second)
|
||||
m.Timeout = &d
|
||||
}
|
||||
|
||||
return &Multiline{
|
||||
config: m,
|
||||
enabled: enabled,
|
||||
patternRegexp: r}, nil
|
||||
patternRegexp: r,
|
||||
quote: quote,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (m *Multiline) IsEnabled() bool {
|
||||
|
|
@ -60,10 +80,14 @@ func (m *Multiline) IsEnabled() bool {
|
|||
}
|
||||
|
||||
func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string {
|
||||
if m.matchQuotation(text) {
|
||||
// Ignore the returned error as we cannot do anything about it anyway
|
||||
_, _ = buffer.WriteString(text + "\n")
|
||||
return ""
|
||||
}
|
||||
if m.matchString(text) {
|
||||
// Ignore the returned error as we cannot do anything about it anyway
|
||||
//nolint:errcheck,revive
|
||||
buffer.WriteString(text)
|
||||
_, _ = buffer.WriteString(text)
|
||||
return ""
|
||||
}
|
||||
|
||||
|
|
@ -97,8 +121,39 @@ func (m *Multiline) Flush(buffer *bytes.Buffer) string {
|
|||
return text
|
||||
}
|
||||
|
||||
func (m *Multiline) matchQuotation(text string) bool {
|
||||
if m.config.Quotation == "ignore" {
|
||||
return false
|
||||
}
|
||||
escaped := 0
|
||||
count := 0
|
||||
for i := 0; i < len(text); i++ {
|
||||
if text[i] == '\\' {
|
||||
escaped++
|
||||
continue
|
||||
}
|
||||
|
||||
// If we do encounter a backslash-quote combination, we interpret this
|
||||
// as an escaped-quoted and should not count the quote. However,
|
||||
// backslash-backslash combinations (or any even number of backslashes)
|
||||
// are interpreted as a literal backslash not escaping the quote.
|
||||
if text[i] == m.quote && escaped%2 == 0 {
|
||||
count++
|
||||
}
|
||||
// If we encounter any non-quote, non-backslash character we can
|
||||
// safely reset the escape state.
|
||||
escaped = 0
|
||||
}
|
||||
even := count%2 == 0
|
||||
m.inQuote = (m.inQuote && even) || (!m.inQuote && !even)
|
||||
return m.inQuote
|
||||
}
|
||||
|
||||
func (m *Multiline) matchString(text string) bool {
|
||||
return m.patternRegexp.MatchString(text) != m.config.InvertMatch
|
||||
if m.patternRegexp != nil {
|
||||
return m.patternRegexp.MatchString(text) != m.config.InvertMatch
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (w MultilineMatchWhichLine) String() string {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
package tail
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
|
@ -234,3 +238,169 @@ func TestMultilineWhat(t *testing.T) {
|
|||
require.Error(t, w7.UnmarshalTOML([]byte(`nope`)))
|
||||
require.Equal(t, MultilineMatchWhichLine(-1), w7)
|
||||
}
|
||||
|
||||
func TestMultiLineQuoted(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
quotation string
|
||||
quote string
|
||||
filename string
|
||||
}{
|
||||
{
|
||||
name: "single-quotes",
|
||||
quotation: "single-quotes",
|
||||
quote: `'`,
|
||||
filename: "multiline_quoted_single.csv",
|
||||
},
|
||||
{
|
||||
name: "double-quotes",
|
||||
quotation: "double-quotes",
|
||||
quote: `"`,
|
||||
filename: "multiline_quoted_double.csv",
|
||||
},
|
||||
{
|
||||
name: "backticks",
|
||||
quotation: "backticks",
|
||||
quote: "`",
|
||||
filename: "multiline_quoted_backticks.csv",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
expected := []string{
|
||||
`1660819827410,1,some text without quotes,A`,
|
||||
fmt.Sprintf("1660819827411,1,%ssome text all quoted%s,A", tt.quote, tt.quote),
|
||||
fmt.Sprintf("1660819827412,1,%ssome text all quoted\nbut wrapped%s,A", tt.quote, tt.quote),
|
||||
fmt.Sprintf("1660819827420,2,some text with %squotes%s,B", tt.quote, tt.quote),
|
||||
"1660819827430,3,some text with 'multiple \"quotes\" in `one` line',C",
|
||||
fmt.Sprintf("1660819827440,4,some multiline text with %squotes\n", tt.quote) +
|
||||
fmt.Sprintf("spanning \\%smultiple\\%s\n", tt.quote, tt.quote) +
|
||||
fmt.Sprintf("lines%s but do not %send\ndirectly%s,D", tt.quote, tt.quote, tt.quote),
|
||||
fmt.Sprintf("1660819827450,5,all of %sthis%s should %sbasically%s work...,E", tt.quote, tt.quote, tt.quote, tt.quote),
|
||||
}
|
||||
|
||||
c := &MultilineConfig{
|
||||
MatchWhichLine: Next,
|
||||
Quotation: tt.quotation,
|
||||
}
|
||||
m, err := c.NewMultiline()
|
||||
require.NoError(t, err)
|
||||
|
||||
f, err := os.Open(filepath.Join("testdata", tt.filename))
|
||||
require.NoError(t, err)
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
|
||||
var buffer bytes.Buffer
|
||||
var result []string
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
text := m.ProcessLine(line, &buffer)
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
result = append(result, text)
|
||||
}
|
||||
|
||||
require.EqualValues(t, expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiLineQuotedError(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
filename string
|
||||
quotation string
|
||||
quote string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "messed up quoting",
|
||||
filename: "multiline_quoted_messed_up.csv",
|
||||
quotation: "single-quotes",
|
||||
quote: `'`,
|
||||
expected: []string{
|
||||
"1660819827410,1,some text without quotes,A",
|
||||
"1660819827411,1,'some text all quoted,A\n1660819827412,1,'some text all quoted",
|
||||
"but wrapped,A"},
|
||||
},
|
||||
{
|
||||
name: "missing closing quote",
|
||||
filename: "multiline_quoted_missing_close.csv",
|
||||
quotation: "single-quotes",
|
||||
quote: `'`,
|
||||
expected: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
c := &MultilineConfig{
|
||||
MatchWhichLine: Next,
|
||||
Quotation: tt.quotation,
|
||||
}
|
||||
m, err := c.NewMultiline()
|
||||
require.NoError(t, err)
|
||||
|
||||
f, err := os.Open(filepath.Join("testdata", tt.filename))
|
||||
require.NoError(t, err)
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
|
||||
var buffer bytes.Buffer
|
||||
var result []string
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
text := m.ProcessLine(line, &buffer)
|
||||
if text == "" {
|
||||
continue
|
||||
}
|
||||
result = append(result, text)
|
||||
}
|
||||
require.EqualValues(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMultiLineQuotedAndPattern(t *testing.T) {
|
||||
c := &MultilineConfig{
|
||||
Pattern: "=>$",
|
||||
MatchWhichLine: Next,
|
||||
Quotation: "double-quotes",
|
||||
}
|
||||
m, err := c.NewMultiline()
|
||||
require.NoError(t, err, "Configuration was OK.")
|
||||
var buffer bytes.Buffer
|
||||
|
||||
text := m.ProcessLine("1=>", &buffer)
|
||||
require.Empty(t, text)
|
||||
require.NotZero(t, buffer.Len())
|
||||
|
||||
text = m.ProcessLine("2=>", &buffer)
|
||||
require.Empty(t, text)
|
||||
require.NotZero(t, buffer.Len())
|
||||
|
||||
text = m.ProcessLine(`"a quoted`, &buffer)
|
||||
require.Empty(t, text)
|
||||
require.NotZero(t, buffer.Len())
|
||||
|
||||
text = m.ProcessLine(`multiline string"=>`, &buffer)
|
||||
require.Empty(t, text)
|
||||
require.NotZero(t, buffer.Len())
|
||||
|
||||
text = m.ProcessLine("3=>", &buffer)
|
||||
require.Empty(t, text)
|
||||
require.NotZero(t, buffer.Len())
|
||||
|
||||
text = m.ProcessLine("4", &buffer)
|
||||
require.Equal(t, "1=>2=>\"a quoted\nmultiline string\"=>3=>4", text)
|
||||
require.Zero(t, buffer.Len())
|
||||
|
||||
text = m.ProcessLine("5", &buffer)
|
||||
require.Equal(t, "5", text)
|
||||
require.Zero(t, buffer.Len())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,5 +62,14 @@
|
|||
## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true)
|
||||
#invert_match = false
|
||||
|
||||
## The handling method for quoted text (defaults to 'ignore').
|
||||
## The following methods are available:
|
||||
## ignore -- do not consider quotation (default)
|
||||
## single-quotes -- consider text quoted by single quotes (')
|
||||
## double-quotes -- consider text quoted by double quotes (")
|
||||
## backticks -- consider text quoted by backticks (`)
|
||||
## When handling quotes, escaped quotes (e.g. \") are handled correctly.
|
||||
#quotation = "ignore"
|
||||
|
||||
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
|
||||
#timeout = 5s
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
1660819827410,1,some text without quotes,A
|
||||
1660819827411,1,`some text all quoted`,A
|
||||
1660819827412,1,`some text all quoted
|
||||
but wrapped`,A
|
||||
1660819827420,2,some text with `quotes`,B
|
||||
1660819827430,3,some text with 'multiple "quotes" in `one` line',C
|
||||
1660819827440,4,some multiline text with `quotes
|
||||
spanning \`multiple\`
|
||||
lines` but do not `end
|
||||
directly`,D
|
||||
1660819827450,5,all of `this` should `basically` work...,E
|
||||
|
||||
|
Can't render this file because it contains an unexpected character in line 6 and column 42.
|
|
|
@ -0,0 +1,12 @@
|
|||
1660819827410,1,some text without quotes,A
|
||||
1660819827411,1,"some text all quoted",A
|
||||
1660819827412,1,"some text all quoted
|
||||
but wrapped",A
|
||||
1660819827420,2,some text with "quotes",B
|
||||
1660819827430,3,some text with 'multiple "quotes" in `one` line',C
|
||||
1660819827440,4,some multiline text with "quotes
|
||||
spanning \"multiple\"
|
||||
lines" but do not "end
|
||||
directly",D
|
||||
1660819827450,5,all of "this" should "basically" work...,E
|
||||
|
||||
|
Can't render this file because it contains an unexpected character in line 5 and column 32.
|
|
|
@ -0,0 +1,4 @@
|
|||
1660819827410,1,some text without quotes,A
|
||||
1660819827411,1,'some text all quoted,A
|
||||
1660819827412,1,'some text all quoted
|
||||
but wrapped,A
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
1660819827411,2,'some text all quoted,B
|
||||
1660819827410,1,some text without quotes,A
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
1660819827410,1,some text without quotes,A
|
||||
1660819827411,1,'some text all quoted',A
|
||||
1660819827412,1,'some text all quoted
|
||||
but wrapped',A
|
||||
1660819827420,2,some text with 'quotes',B
|
||||
1660819827430,3,some text with 'multiple "quotes" in `one` line',C
|
||||
1660819827440,4,some multiline text with 'quotes
|
||||
spanning \'multiple\'
|
||||
lines' but do not 'end
|
||||
directly',D
|
||||
1660819827450,5,all of 'this' should 'basically' work...,E
|
||||
|
||||
|
Can't render this file because it contains an unexpected character in line 6 and column 42.
|
Loading…
Reference in New Issue