fix(config): Fix comment removal in TOML files (#14240)

This commit is contained in:
Sven Rebhan 2023-11-06 14:37:33 +01:00 committed by GitHub
parent ac171a07be
commit 1887d2983c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 474 additions and 99 deletions

View File

@ -20,8 +20,6 @@ import (
"sync" "sync"
"time" "time"
"github.com/compose-spec/compose-go/template"
"github.com/compose-spec/compose-go/utils"
"github.com/coreos/go-semver/semver" "github.com/coreos/go-semver/semver"
"github.com/influxdata/toml" "github.com/influxdata/toml"
"github.com/influxdata/toml/ast" "github.com/influxdata/toml/ast"
@ -790,103 +788,6 @@ func parseConfig(contents []byte) (*ast.Table, error) {
return toml.Parse(outputBytes) return toml.Parse(outputBytes)
} }
func removeComments(contents []byte) ([]byte, error) {
tomlReader := bytes.NewReader(contents)
// Initialize variables for tracking state
var inQuote, inComment, escaped bool
var quoteChar byte
// Initialize buffer for modified TOML data
var output bytes.Buffer
buf := make([]byte, 1)
// Iterate over each character in the file
for {
_, err := tomlReader.Read(buf)
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return nil, err
}
char := buf[0]
// Toggle the escaped state at backslash to we have true every odd occurrence.
if char == '\\' {
escaped = !escaped
}
if inComment {
// If we're currently in a comment, check if this character ends the comment
if char == '\n' {
// End of line, comment is finished
inComment = false
_, _ = output.WriteRune('\n')
}
} else if inQuote {
// If we're currently in a quote, check if this character ends the quote
if char == quoteChar && !escaped {
// End of quote, we're no longer in a quote
inQuote = false
}
output.WriteByte(char)
} else {
// Not in a comment or a quote
if (char == '"' || char == '\'') && !escaped {
// Start of quote
inQuote = true
quoteChar = char
output.WriteByte(char)
} else if char == '#' && !escaped {
// Start of comment
inComment = true
} else {
// Not a comment or a quote, just output the character
output.WriteByte(char)
}
}
// Reset escaping if any other character occurred
if char != '\\' {
escaped = false
}
}
return output.Bytes(), nil
}
func substituteEnvironment(contents []byte, oldReplacementBehavior bool) ([]byte, error) {
options := []template.Option{
template.WithReplacementFunction(func(s string, m template.Mapping, cfg *template.Config) (string, error) {
result, applied, err := template.DefaultReplacementAppliedFunc(s, m, cfg)
if err == nil && !applied {
// Keep undeclared environment-variable patterns to reproduce
// pre-v1.27 behavior
return s, nil
}
if err != nil && strings.HasPrefix(err.Error(), "Invalid template:") {
// Keep invalid template patterns to ignore regexp substitutions
// like ${1}
return s, nil
}
return result, err
}),
template.WithoutLogging,
}
if oldReplacementBehavior {
options = append(options, template.WithPattern(oldVarRe))
}
envMap := utils.GetAsEqualsMap(os.Environ())
retVal, err := template.SubstituteWithOptions(string(contents), func(k string) (string, bool) {
if v, ok := envMap[k]; ok {
return v, ok
}
return "", false
}, options...)
return []byte(retVal), err
}
func (c *Config) addAggregator(name string, table *ast.Table) error { func (c *Config) addAggregator(name string, table *ast.Table) error {
creator, ok := aggregators.Aggregators[name] creator, ok := aggregators.Aggregators[name]
if !ok { if !ok {

253
config/envvar.go Normal file
View File

@ -0,0 +1,253 @@
package config
import (
"bytes"
"errors"
"io"
"os"
"strings"
"github.com/compose-spec/compose-go/template"
"github.com/compose-spec/compose-go/utils"
)
type trimmer struct {
input *bytes.Reader
output bytes.Buffer
}
func removeComments(buf []byte) ([]byte, error) {
t := &trimmer{
input: bytes.NewReader(buf),
output: bytes.Buffer{},
}
err := t.process()
return t.output.Bytes(), err
}
func (t *trimmer) process() error {
for {
// Read the next byte until EOF
c, err := t.input.ReadByte()
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return err
}
// Switch states if we need to
switch c {
case '\\':
_ = t.input.UnreadByte()
err = t.escape()
case '\'':
_ = t.input.UnreadByte()
if t.hasNQuotes(c, 3) {
err = t.tripleSingleQuote()
} else {
err = t.singleQuote()
}
case '"':
_ = t.input.UnreadByte()
if t.hasNQuotes(c, 3) {
err = t.tripleDoubleQuote()
} else {
err = t.doubleQuote()
}
case '#':
err = t.comment()
default:
if err := t.output.WriteByte(c); err != nil {
return err
}
continue
}
if err != nil {
if errors.Is(err, io.EOF) {
break
}
return err
}
}
return nil
}
func (t *trimmer) hasNQuotes(ref byte, limit int64) bool {
var count int64
// Look ahead check if the next characters are what we expect
for count = 0; count < limit; count++ {
c, err := t.input.ReadByte()
if err != nil || c != ref {
break
}
}
// We also need to unread the non-matching character
offset := -count
if count < limit {
offset--
}
// Unread the matched characters
_, _ = t.input.Seek(offset, io.SeekCurrent)
return count >= limit
}
func (t *trimmer) readWriteByte() (byte, error) {
c, err := t.input.ReadByte()
if err != nil {
return 0, err
}
return c, t.output.WriteByte(c)
}
func (t *trimmer) escape() error {
// Consumer the known starting backslash and quote
_, _ = t.readWriteByte()
// Read the next character which is the escaped one and exit
_, err := t.readWriteByte()
return err
}
func (t *trimmer) singleQuote() error {
// Consumer the known starting quote
_, _ = t.readWriteByte()
// Read bytes until EOF, line end or another single quote
for {
if c, err := t.readWriteByte(); err != nil || c == '\'' || c == '\n' {
return err
}
}
}
func (t *trimmer) tripleSingleQuote() error {
for i := 0; i < 3; i++ {
// Consumer the known starting quotes
_, _ = t.readWriteByte()
}
// Read bytes until EOF or another set of triple single quotes
for {
c, err := t.readWriteByte()
if err != nil {
return err
}
if c == '\'' && t.hasNQuotes('\'', 2) {
// Consumer the two additional ending quotes
_, _ = t.readWriteByte()
_, _ = t.readWriteByte()
return nil
}
}
}
func (t *trimmer) doubleQuote() error {
// Consumer the known starting quote
_, _ = t.readWriteByte()
// Read bytes until EOF, line end or another double quote
for {
c, err := t.input.ReadByte()
if err != nil {
return err
}
switch c {
case '\\':
// Found escaped character
_ = t.input.UnreadByte()
if err := t.escape(); err != nil {
return err
}
continue
case '"', '\n':
// Found terminator
return t.output.WriteByte(c)
}
if err := t.output.WriteByte(c); err != nil {
return err
}
}
}
func (t *trimmer) tripleDoubleQuote() error {
for i := 0; i < 3; i++ {
// Consumer the known starting quotes
_, _ = t.readWriteByte()
}
// Read bytes until EOF or another set of triple double quotes
for {
c, err := t.input.ReadByte()
if err != nil {
return err
}
switch c {
case '\\':
// Found escaped character
_ = t.input.UnreadByte()
if err := t.escape(); err != nil {
return err
}
continue
case '"':
_ = t.output.WriteByte(c)
if t.hasNQuotes('"', 2) {
// Consumer the two additional ending quotes
_, _ = t.readWriteByte()
_, _ = t.readWriteByte()
return nil
}
continue
}
if err := t.output.WriteByte(c); err != nil {
return err
}
}
}
func (t *trimmer) comment() error {
// Read bytes until EOF or a line break
for {
c, err := t.input.ReadByte()
if err != nil {
return err
}
if c == '\n' {
return t.output.WriteByte(c)
}
}
}
func substituteEnvironment(contents []byte, oldReplacementBehavior bool) ([]byte, error) {
options := []template.Option{
template.WithReplacementFunction(func(s string, m template.Mapping, cfg *template.Config) (string, error) {
result, applied, err := template.DefaultReplacementAppliedFunc(s, m, cfg)
if err == nil && !applied {
// Keep undeclared environment-variable patterns to reproduce
// pre-v1.27 behavior
return s, nil
}
if err != nil && strings.HasPrefix(err.Error(), "Invalid template:") {
// Keep invalid template patterns to ignore regexp substitutions
// like ${1}
return s, nil
}
return result, err
}),
template.WithoutLogging,
}
if oldReplacementBehavior {
options = append(options, template.WithPattern(oldVarRe))
}
envMap := utils.GetAsEqualsMap(os.Environ())
retVal, err := template.SubstituteWithOptions(string(contents), func(k string) (string, bool) {
if v, ok := envMap[k]; ok {
return v, ok
}
return "", false
}, options...)
return []byte(retVal), err
}

View File

@ -1,9 +1,12 @@
package config package config
import ( import (
"bytes"
"fmt" "fmt"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os"
"path/filepath"
"testing" "testing"
"time" "time"
@ -351,6 +354,26 @@ func TestParseConfig(t *testing.T) {
} }
} }
func TestRemoveComments(t *testing.T) {
// Read expectation
expected, err := os.ReadFile(filepath.Join("testdata", "envvar_comments_expected.toml"))
require.NoError(t, err)
// Read the file and remove the comments
buf, err := os.ReadFile(filepath.Join("testdata", "envvar_comments.toml"))
require.NoError(t, err)
removed, err := removeComments(buf)
require.NoError(t, err)
lines := bytes.Split(removed, []byte{'\n'})
for i, line := range lines {
lines[i] = bytes.TrimRight(line, " \t")
}
actual := bytes.Join(lines, []byte{'\n'})
// Do the comparison
require.Equal(t, string(expected), string(actual))
}
func TestURLRetries3Fails(t *testing.T) { func TestURLRetries3Fails(t *testing.T) {
httpLoadConfigRetryInterval = 0 * time.Second httpLoadConfigRetryInterval = 0 * time.Second
responseCounter := 0 responseCounter := 0

99
config/testdata/envvar_comments.toml vendored Normal file
View File

@ -0,0 +1,99 @@
# Telegraf Configuration
#
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared inputs, and sent to the declared outputs.
#
# Plugins must be declared in here to be active.
# To deactivate a plugin, comment out the name and any variables.
#
# Use 'telegraf -config telegraf.conf -test' to see what metrics a config
# file would generate.
#
# Environment variables can be used anywhere in this config file, simply surround
# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"),
# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR})
[global_tags]
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = '10s'
flush_jitter = "0s"
precision = ""
hostname = ''
omit_hostname = false
[[outputs.influxdb]]
setting1 = '#'#test
setting2 = '''#'''#test
setting3 = "#"#test
setting4 = """#"""#test
wicked1 = "\""#test
wicked2 = """\""""#test
[[inputs.cpu]]
percpu = true
#totalcpu = true
# collect_cpu_time = false
## report_active = false
[[a.plugin]]
mylist = [
"value 1", # a good value
"value 2", # a better value
"value 3", "value 4",
'value5', """tagwith#value""",
] # Should work
[[some.stuff]]
a = 'not a #comment'
b = '''not a #comment'''
c = "not a #comment"
d = """not a #comment"""
e = '''not a #comment containing "quotes"'''
f = '''not a #comment containing 'quotes'?'''
g = """not a #comment containing "quotes"?"""
# Issue #14237
[[inputs.myplugin]]
value = '''This isn't a #comment.'''
[[processors.starlark]]
script = """
# Drop fields if they contain a string.
#
# Example Input:
# measurement,host=hostname a=1,b="somestring" 1597255410000000000
#
# Example Output:
# measurement,host=hostname a=1 1597255410000000000
def apply(metric):
for k, v in metric.fields.items():
if type(v) == "string":
metric.fields.pop(k)
return metric
"""
[[processors.starlark]]
script = '''
# Drop fields if they contain a string.
#
# Example Input:
# measurement,host=hostname a=1,b="somestring" 1597255410000000000
#
# Example Output:
# measurement,host=hostname a=1 1597255410000000000
def apply(metric):
for k, v in metric.fields.items():
if type(v) == "string":
metric.fields.pop(k)
return metric
'''

View File

@ -0,0 +1,99 @@
[global_tags]
[agent]
interval = "10s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = '10s'
flush_jitter = "0s"
precision = ""
hostname = ''
omit_hostname = false
[[outputs.influxdb]]
setting1 = '#'
setting2 = '''#'''
setting3 = "#"
setting4 = """#"""
wicked1 = "\""
wicked2 = """\""""
[[inputs.cpu]]
percpu = true
[[a.plugin]]
mylist = [
"value 1",
"value 2",
"value 3", "value 4",
'value5', """tagwith#value""",
]
[[some.stuff]]
a = 'not a #comment'
b = '''not a #comment'''
c = "not a #comment"
d = """not a #comment"""
e = '''not a #comment containing "quotes"'''
f = '''not a #comment containing 'quotes'?'''
g = """not a #comment containing "quotes"?"""
[[inputs.myplugin]]
value = '''This isn't a #comment.'''
[[processors.starlark]]
script = """
# Drop fields if they contain a string.
#
# Example Input:
# measurement,host=hostname a=1,b="somestring" 1597255410000000000
#
# Example Output:
# measurement,host=hostname a=1 1597255410000000000
def apply(metric):
for k, v in metric.fields.items():
if type(v) == "string":
metric.fields.pop(k)
return metric
"""
[[processors.starlark]]
script = '''
# Drop fields if they contain a string.
#
# Example Input:
# measurement,host=hostname a=1,b="somestring" 1597255410000000000
#
# Example Output:
# measurement,host=hostname a=1 1597255410000000000
def apply(metric):
for k, v in metric.fields.items():
if type(v) == "string":
metric.fields.pop(k)
return metric
'''