feat(inputs.tail): Allow handling of quoted strings spanning multiple lines (#11762)
This commit is contained in:
parent
cdc622e9db
commit
9acbf23ebb
|
|
@ -95,6 +95,15 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||||
## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true)
|
## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true)
|
||||||
#invert_match = false
|
#invert_match = false
|
||||||
|
|
||||||
|
## The handling method for quoted text (defaults to 'ignore').
|
||||||
|
## The following methods are available:
|
||||||
|
## ignore -- do not consider quotation (default)
|
||||||
|
## single-quotes -- consider text quoted by single quotes (')
|
||||||
|
## double-quotes -- consider text quoted by double quotes (")
|
||||||
|
## backticks -- consider text quoted by backticks (`)
|
||||||
|
## When handling quotes, escaped quotes (e.g. \") are handled correctly.
|
||||||
|
#quotation = "ignore"
|
||||||
|
|
||||||
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
|
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
|
||||||
#timeout = 5s
|
#timeout = 5s
|
||||||
```
|
```
|
||||||
|
|
@ -103,3 +112,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||||
|
|
||||||
Metrics are produced according to the `data_format` option. Additionally a
|
Metrics are produced according to the `data_format` option. Additionally a
|
||||||
tag labeled `path` is added to the metric containing the filename being tailed.
|
tag labeled `path` is added to the metric containing the filename being tailed.
|
||||||
|
|
||||||
|
## Example Output
|
||||||
|
|
||||||
|
There is no predefined metric format, so output depends on plugin input.
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package tail
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
@ -17,13 +18,16 @@ type Multiline struct {
|
||||||
config *MultilineConfig
|
config *MultilineConfig
|
||||||
enabled bool
|
enabled bool
|
||||||
patternRegexp *regexp.Regexp
|
patternRegexp *regexp.Regexp
|
||||||
|
quote byte
|
||||||
|
inQuote bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type MultilineConfig struct {
|
type MultilineConfig struct {
|
||||||
Pattern string
|
Pattern string `toml:"pattern"`
|
||||||
MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"`
|
MatchWhichLine MultilineMatchWhichLine `toml:"match_which_line"`
|
||||||
InvertMatch bool
|
InvertMatch bool `toml:"invert_match"`
|
||||||
Timeout *config.Duration
|
Quotation string `toml:"quotation"`
|
||||||
|
Timeout *config.Duration `toml:"timeout"`
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -34,25 +38,41 @@ const (
|
||||||
)
|
)
|
||||||
|
|
||||||
func (m *MultilineConfig) NewMultiline() (*Multiline, error) {
|
func (m *MultilineConfig) NewMultiline() (*Multiline, error) {
|
||||||
enabled := false
|
|
||||||
var r *regexp.Regexp
|
var r *regexp.Regexp
|
||||||
var err error
|
|
||||||
|
|
||||||
if m.Pattern != "" {
|
if m.Pattern != "" {
|
||||||
enabled = true
|
var err error
|
||||||
if r, err = regexp.Compile(m.Pattern); err != nil {
|
if r, err = regexp.Compile(m.Pattern); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if m.Timeout == nil || time.Duration(*m.Timeout).Nanoseconds() == int64(0) {
|
}
|
||||||
d := config.Duration(5 * time.Second)
|
|
||||||
m.Timeout = &d
|
var quote byte
|
||||||
}
|
switch m.Quotation {
|
||||||
|
case "", "ignore":
|
||||||
|
m.Quotation = "ignore"
|
||||||
|
case "single-quotes":
|
||||||
|
quote = '\''
|
||||||
|
case "double-quotes":
|
||||||
|
quote = '"'
|
||||||
|
case "backticks":
|
||||||
|
quote = '`'
|
||||||
|
default:
|
||||||
|
return nil, errors.New("invalid 'quotation' setting")
|
||||||
|
}
|
||||||
|
|
||||||
|
enabled := m.Pattern != "" || quote != 0
|
||||||
|
if m.Timeout == nil || time.Duration(*m.Timeout).Nanoseconds() == int64(0) {
|
||||||
|
d := config.Duration(5 * time.Second)
|
||||||
|
m.Timeout = &d
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Multiline{
|
return &Multiline{
|
||||||
config: m,
|
config: m,
|
||||||
enabled: enabled,
|
enabled: enabled,
|
||||||
patternRegexp: r}, nil
|
patternRegexp: r,
|
||||||
|
quote: quote,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Multiline) IsEnabled() bool {
|
func (m *Multiline) IsEnabled() bool {
|
||||||
|
|
@ -60,10 +80,14 @@ func (m *Multiline) IsEnabled() bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string {
|
func (m *Multiline) ProcessLine(text string, buffer *bytes.Buffer) string {
|
||||||
|
if m.matchQuotation(text) {
|
||||||
|
// Ignore the returned error as we cannot do anything about it anyway
|
||||||
|
_, _ = buffer.WriteString(text + "\n")
|
||||||
|
return ""
|
||||||
|
}
|
||||||
if m.matchString(text) {
|
if m.matchString(text) {
|
||||||
// Ignore the returned error as we cannot do anything about it anyway
|
// Ignore the returned error as we cannot do anything about it anyway
|
||||||
//nolint:errcheck,revive
|
_, _ = buffer.WriteString(text)
|
||||||
buffer.WriteString(text)
|
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -97,8 +121,39 @@ func (m *Multiline) Flush(buffer *bytes.Buffer) string {
|
||||||
return text
|
return text
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *Multiline) matchQuotation(text string) bool {
|
||||||
|
if m.config.Quotation == "ignore" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
escaped := 0
|
||||||
|
count := 0
|
||||||
|
for i := 0; i < len(text); i++ {
|
||||||
|
if text[i] == '\\' {
|
||||||
|
escaped++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we do encounter a backslash-quote combination, we interpret this
|
||||||
|
// as an escaped-quoted and should not count the quote. However,
|
||||||
|
// backslash-backslash combinations (or any even number of backslashes)
|
||||||
|
// are interpreted as a literal backslash not escaping the quote.
|
||||||
|
if text[i] == m.quote && escaped%2 == 0 {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
// If we encounter any non-quote, non-backslash character we can
|
||||||
|
// safely reset the escape state.
|
||||||
|
escaped = 0
|
||||||
|
}
|
||||||
|
even := count%2 == 0
|
||||||
|
m.inQuote = (m.inQuote && even) || (!m.inQuote && !even)
|
||||||
|
return m.inQuote
|
||||||
|
}
|
||||||
|
|
||||||
func (m *Multiline) matchString(text string) bool {
|
func (m *Multiline) matchString(text string) bool {
|
||||||
return m.patternRegexp.MatchString(text) != m.config.InvertMatch
|
if m.patternRegexp != nil {
|
||||||
|
return m.patternRegexp.MatchString(text) != m.config.InvertMatch
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w MultilineMatchWhichLine) String() string {
|
func (w MultilineMatchWhichLine) String() string {
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,11 @@
|
||||||
package tail
|
package tail
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -234,3 +238,169 @@ func TestMultilineWhat(t *testing.T) {
|
||||||
require.Error(t, w7.UnmarshalTOML([]byte(`nope`)))
|
require.Error(t, w7.UnmarshalTOML([]byte(`nope`)))
|
||||||
require.Equal(t, MultilineMatchWhichLine(-1), w7)
|
require.Equal(t, MultilineMatchWhichLine(-1), w7)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestMultiLineQuoted(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
quotation string
|
||||||
|
quote string
|
||||||
|
filename string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single-quotes",
|
||||||
|
quotation: "single-quotes",
|
||||||
|
quote: `'`,
|
||||||
|
filename: "multiline_quoted_single.csv",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "double-quotes",
|
||||||
|
quotation: "double-quotes",
|
||||||
|
quote: `"`,
|
||||||
|
filename: "multiline_quoted_double.csv",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "backticks",
|
||||||
|
quotation: "backticks",
|
||||||
|
quote: "`",
|
||||||
|
filename: "multiline_quoted_backticks.csv",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
expected := []string{
|
||||||
|
`1660819827410,1,some text without quotes,A`,
|
||||||
|
fmt.Sprintf("1660819827411,1,%ssome text all quoted%s,A", tt.quote, tt.quote),
|
||||||
|
fmt.Sprintf("1660819827412,1,%ssome text all quoted\nbut wrapped%s,A", tt.quote, tt.quote),
|
||||||
|
fmt.Sprintf("1660819827420,2,some text with %squotes%s,B", tt.quote, tt.quote),
|
||||||
|
"1660819827430,3,some text with 'multiple \"quotes\" in `one` line',C",
|
||||||
|
fmt.Sprintf("1660819827440,4,some multiline text with %squotes\n", tt.quote) +
|
||||||
|
fmt.Sprintf("spanning \\%smultiple\\%s\n", tt.quote, tt.quote) +
|
||||||
|
fmt.Sprintf("lines%s but do not %send\ndirectly%s,D", tt.quote, tt.quote, tt.quote),
|
||||||
|
fmt.Sprintf("1660819827450,5,all of %sthis%s should %sbasically%s work...,E", tt.quote, tt.quote, tt.quote, tt.quote),
|
||||||
|
}
|
||||||
|
|
||||||
|
c := &MultilineConfig{
|
||||||
|
MatchWhichLine: Next,
|
||||||
|
Quotation: tt.quotation,
|
||||||
|
}
|
||||||
|
m, err := c.NewMultiline()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
f, err := os.Open(filepath.Join("testdata", tt.filename))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
|
||||||
|
var buffer bytes.Buffer
|
||||||
|
var result []string
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
|
||||||
|
text := m.ProcessLine(line, &buffer)
|
||||||
|
if text == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result = append(result, text)
|
||||||
|
}
|
||||||
|
|
||||||
|
require.EqualValues(t, expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMultiLineQuotedError(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
filename string
|
||||||
|
quotation string
|
||||||
|
quote string
|
||||||
|
expected []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "messed up quoting",
|
||||||
|
filename: "multiline_quoted_messed_up.csv",
|
||||||
|
quotation: "single-quotes",
|
||||||
|
quote: `'`,
|
||||||
|
expected: []string{
|
||||||
|
"1660819827410,1,some text without quotes,A",
|
||||||
|
"1660819827411,1,'some text all quoted,A\n1660819827412,1,'some text all quoted",
|
||||||
|
"but wrapped,A"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missing closing quote",
|
||||||
|
filename: "multiline_quoted_missing_close.csv",
|
||||||
|
quotation: "single-quotes",
|
||||||
|
quote: `'`,
|
||||||
|
expected: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
c := &MultilineConfig{
|
||||||
|
MatchWhichLine: Next,
|
||||||
|
Quotation: tt.quotation,
|
||||||
|
}
|
||||||
|
m, err := c.NewMultiline()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
f, err := os.Open(filepath.Join("testdata", tt.filename))
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(f)
|
||||||
|
|
||||||
|
var buffer bytes.Buffer
|
||||||
|
var result []string
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
|
||||||
|
text := m.ProcessLine(line, &buffer)
|
||||||
|
if text == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
result = append(result, text)
|
||||||
|
}
|
||||||
|
require.EqualValues(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMultiLineQuotedAndPattern(t *testing.T) {
|
||||||
|
c := &MultilineConfig{
|
||||||
|
Pattern: "=>$",
|
||||||
|
MatchWhichLine: Next,
|
||||||
|
Quotation: "double-quotes",
|
||||||
|
}
|
||||||
|
m, err := c.NewMultiline()
|
||||||
|
require.NoError(t, err, "Configuration was OK.")
|
||||||
|
var buffer bytes.Buffer
|
||||||
|
|
||||||
|
text := m.ProcessLine("1=>", &buffer)
|
||||||
|
require.Empty(t, text)
|
||||||
|
require.NotZero(t, buffer.Len())
|
||||||
|
|
||||||
|
text = m.ProcessLine("2=>", &buffer)
|
||||||
|
require.Empty(t, text)
|
||||||
|
require.NotZero(t, buffer.Len())
|
||||||
|
|
||||||
|
text = m.ProcessLine(`"a quoted`, &buffer)
|
||||||
|
require.Empty(t, text)
|
||||||
|
require.NotZero(t, buffer.Len())
|
||||||
|
|
||||||
|
text = m.ProcessLine(`multiline string"=>`, &buffer)
|
||||||
|
require.Empty(t, text)
|
||||||
|
require.NotZero(t, buffer.Len())
|
||||||
|
|
||||||
|
text = m.ProcessLine("3=>", &buffer)
|
||||||
|
require.Empty(t, text)
|
||||||
|
require.NotZero(t, buffer.Len())
|
||||||
|
|
||||||
|
text = m.ProcessLine("4", &buffer)
|
||||||
|
require.Equal(t, "1=>2=>\"a quoted\nmultiline string\"=>3=>4", text)
|
||||||
|
require.Zero(t, buffer.Len())
|
||||||
|
|
||||||
|
text = m.ProcessLine("5", &buffer)
|
||||||
|
require.Equal(t, "5", text)
|
||||||
|
require.Zero(t, buffer.Len())
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -62,5 +62,14 @@
|
||||||
## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true)
|
## If true, a message not matching the pattern will constitute a match of the multiline filter and the what will be applied. (vice-versa is also true)
|
||||||
#invert_match = false
|
#invert_match = false
|
||||||
|
|
||||||
|
## The handling method for quoted text (defaults to 'ignore').
|
||||||
|
## The following methods are available:
|
||||||
|
## ignore -- do not consider quotation (default)
|
||||||
|
## single-quotes -- consider text quoted by single quotes (')
|
||||||
|
## double-quotes -- consider text quoted by double quotes (")
|
||||||
|
## backticks -- consider text quoted by backticks (`)
|
||||||
|
## When handling quotes, escaped quotes (e.g. \") are handled correctly.
|
||||||
|
#quotation = "ignore"
|
||||||
|
|
||||||
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
|
#After the specified timeout, this plugin sends the multiline event even if no new pattern is found to start a new event. The default is 5s.
|
||||||
#timeout = 5s
|
#timeout = 5s
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
1660819827410,1,some text without quotes,A
|
||||||
|
1660819827411,1,`some text all quoted`,A
|
||||||
|
1660819827412,1,`some text all quoted
|
||||||
|
but wrapped`,A
|
||||||
|
1660819827420,2,some text with `quotes`,B
|
||||||
|
1660819827430,3,some text with 'multiple "quotes" in `one` line',C
|
||||||
|
1660819827440,4,some multiline text with `quotes
|
||||||
|
spanning \`multiple\`
|
||||||
|
lines` but do not `end
|
||||||
|
directly`,D
|
||||||
|
1660819827450,5,all of `this` should `basically` work...,E
|
||||||
|
|
||||||
|
Can't render this file because it contains an unexpected character in line 6 and column 42.
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
1660819827410,1,some text without quotes,A
|
||||||
|
1660819827411,1,"some text all quoted",A
|
||||||
|
1660819827412,1,"some text all quoted
|
||||||
|
but wrapped",A
|
||||||
|
1660819827420,2,some text with "quotes",B
|
||||||
|
1660819827430,3,some text with 'multiple "quotes" in `one` line',C
|
||||||
|
1660819827440,4,some multiline text with "quotes
|
||||||
|
spanning \"multiple\"
|
||||||
|
lines" but do not "end
|
||||||
|
directly",D
|
||||||
|
1660819827450,5,all of "this" should "basically" work...,E
|
||||||
|
|
||||||
|
Can't render this file because it contains an unexpected character in line 5 and column 32.
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
1660819827410,1,some text without quotes,A
|
||||||
|
1660819827411,1,'some text all quoted,A
|
||||||
|
1660819827412,1,'some text all quoted
|
||||||
|
but wrapped,A
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
1660819827411,2,'some text all quoted,B
|
||||||
|
1660819827410,1,some text without quotes,A
|
||||||
|
|
|
@ -0,0 +1,12 @@
|
||||||
|
1660819827410,1,some text without quotes,A
|
||||||
|
1660819827411,1,'some text all quoted',A
|
||||||
|
1660819827412,1,'some text all quoted
|
||||||
|
but wrapped',A
|
||||||
|
1660819827420,2,some text with 'quotes',B
|
||||||
|
1660819827430,3,some text with 'multiple "quotes" in `one` line',C
|
||||||
|
1660819827440,4,some multiline text with 'quotes
|
||||||
|
spanning \'multiple\'
|
||||||
|
lines' but do not 'end
|
||||||
|
directly',D
|
||||||
|
1660819827450,5,all of 'this' should 'basically' work...,E
|
||||||
|
|
||||||
|
Can't render this file because it contains an unexpected character in line 6 and column 42.
|
Loading…
Reference in New Issue