feat(common.shim): Add batch to shim (#16148)

Co-authored-by: Thomas Casteleyn <thomas.casteleyn@me.com>
Co-authored-by: Sven Rebhan <srebhan@influxdata.com>
This commit is contained in:
Evgenii Kuznetsov 2025-04-23 16:47:39 +03:00 committed by GitHub
parent bcea4c278e
commit b715237606
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 209 additions and 21 deletions

View File

@ -39,6 +39,9 @@ type Shim struct {
Processor telegraf.StreamingProcessor
Output telegraf.Output
BatchSize int
BatchTimeout time.Duration
log telegraf.Logger
// streams
@ -56,11 +59,13 @@ type Shim struct {
// New creates a new shim interface
func New() *Shim {
return &Shim{
metricCh: make(chan telegraf.Metric, 1),
stdin: os.Stdin,
stdout: os.Stdout,
stderr: os.Stderr,
log: logger.New("", "", ""),
BatchSize: 1,
BatchTimeout: 10 * time.Second,
metricCh: make(chan telegraf.Metric, 1),
stdin: os.Stdin,
stdout: os.Stdout,
stderr: os.Stderr,
log: logger.New("", "", ""),
}
}

View File

@ -3,6 +3,9 @@ package shim
import (
"bufio"
"fmt"
"os"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/models"
@ -24,31 +27,97 @@ func (s *Shim) AddOutput(output telegraf.Output) error {
}
func (s *Shim) RunOutput() error {
// Create a parser for receiving the metrics in line-protocol format
parser := influx.Parser{}
err := parser.Init()
if err != nil {
if err := parser.Init(); err != nil {
return fmt.Errorf("failed to create new parser: %w", err)
}
err = s.Output.Connect()
if err != nil {
// Connect the output
if err := s.Output.Connect(); err != nil {
return fmt.Errorf("failed to start processor: %w", err)
}
defer s.Output.Close()
var m telegraf.Metric
// Collect the metrics from stdin. Note, we need to flush the metrics
// when the batch is full or after the configured time, whatever comes
// first. We need to lock the batch as we run into race conditions
// otherwise.
var mu sync.Mutex
metrics := make([]telegraf.Metric, 0, s.BatchSize)
// Prepare the flush timer...
flush := func(whole bool) {
mu.Lock()
defer mu.Unlock()
// Exit early if there is nothing to do
if len(metrics) == 0 {
return
}
// Determine the threshold on when to stop flushing depending on the
// given flag.
var threshold int
if whole {
threshold = s.BatchSize
}
// Flush out the metrics in batches of the configured size until we
// got all of them out or if there is less than a whole batch left.
for len(metrics) > 0 && len(metrics) >= threshold {
// Write the metrics and remove the batch
batch := metrics[:min(len(metrics), s.BatchSize)]
if err := s.Output.Write(batch); err != nil {
fmt.Fprintf(os.Stderr, "Failed to write metrics: %s\n", err)
}
metrics = metrics[len(batch):]
}
}
// Setup the time-based flush
var timer *time.Timer
if s.BatchTimeout > 0 {
timer = time.AfterFunc(s.BatchTimeout, func() { flush(false) })
defer func() {
if timer != nil {
timer.Stop()
}
}()
}
// Start the processing loop
scanner := bufio.NewScanner(s.stdin)
for scanner.Scan() {
m, err = parser.ParseLine(scanner.Text())
// Read metrics from stdin
m, err := parser.ParseLine(scanner.Text())
if err != nil {
fmt.Fprintf(s.stderr, "Failed to parse metric: %s\n", err)
continue
}
if err = s.Output.Write([]telegraf.Metric{m}); err != nil {
fmt.Fprintf(s.stderr, "Failed to write metric: %s\n", err)
mu.Lock()
metrics = append(metrics, m)
shouldFlush := len(metrics) >= s.BatchSize
mu.Unlock()
// If we got more enough metrics to fill the batch flush it out and
// reset the time-based guard.
if shouldFlush {
if timer != nil {
timer.Stop()
}
flush(true)
if s.BatchTimeout > 0 {
timer = time.AfterFunc(s.BatchTimeout, func() { flush(false) })
}
}
}
// Output all remaining metrics
if timer != nil {
timer.Stop()
}
flush(false)
return nil
}

View File

@ -3,6 +3,7 @@ package shim
import (
"io"
"sync"
"sync/atomic"
"testing"
"time"
@ -21,11 +22,9 @@ func TestOutputShim(t *testing.T) {
s := New()
s.stdin = stdinReader
err := s.AddOutput(o)
require.NoError(t, err)
wg := sync.WaitGroup{}
require.NoError(t, s.AddOutput(o))
var wg sync.WaitGroup
wg.Add(1)
go func() {
if err := s.RunOutput(); err != nil {
@ -50,19 +49,133 @@ func TestOutputShim(t *testing.T) {
require.NoError(t, err)
_, err = stdinWriter.Write(b)
require.NoError(t, err)
err = stdinWriter.Close()
require.NoError(t, err)
require.NoError(t, stdinWriter.Close())
wg.Wait()
require.Len(t, o.MetricsWritten, 1)
mOut := o.MetricsWritten[0]
testutil.RequireMetricEqual(t, m, o.MetricsWritten[0])
}
testutil.RequireMetricEqual(t, m, mOut)
func TestOutputShimWithBatchSize(t *testing.T) {
o := &testOutput{}
stdinReader, stdinWriter := io.Pipe()
// Setup a shim with a batch size but no timeout
s := New()
s.stdin = stdinReader
s.BatchSize = 5
s.BatchTimeout = 0
require.NoError(t, s.AddOutput(o))
// Start the output processing
var wg sync.WaitGroup
wg.Add(1)
go func() {
if err := s.RunOutput(); err != nil {
t.Error(err)
}
wg.Done()
}()
// Serialize the test metric
serializer := &influx.Serializer{}
require.NoError(t, serializer.Init())
m := metric.New("thing",
map[string]string{
"a": "b",
},
map[string]interface{}{
"v": 1,
},
time.Now(),
)
payload, err := serializer.Serialize(m)
require.NoError(t, err)
// Write a few more metrics than the batch-size and check that we only get
// a full batch before closing the input stream.
expected := make([]telegraf.Metric, 0, s.BatchSize+3)
for range cap(expected) {
_, err := stdinWriter.Write(payload)
require.NoError(t, err)
expected = append(expected, m)
}
// Wait for the metrics to arrive
require.Eventually(t, func() bool {
return o.Count.Load() >= uint32(s.BatchSize)
}, 3*time.Second, 100*time.Millisecond)
testutil.RequireMetricsEqual(t, expected[:s.BatchSize], o.MetricsWritten)
// Closing the input should force the remaining metrics to be written
require.NoError(t, stdinWriter.Close())
wg.Wait()
testutil.RequireMetricsEqual(t, expected, o.MetricsWritten)
}
func TestOutputShimWithFlushTimeout(t *testing.T) {
o := &testOutput{}
stdinReader, stdinWriter := io.Pipe()
// Setup a shim with a batch size and a short timeout
s := New()
s.stdin = stdinReader
s.BatchSize = 5
s.BatchTimeout = 500 * time.Millisecond
require.NoError(t, s.AddOutput(o))
// Start the output processing
var wg sync.WaitGroup
wg.Add(1)
go func() {
if err := s.RunOutput(); err != nil {
t.Error(err)
}
wg.Done()
}()
// Serialize the test metric
serializer := &influx.Serializer{}
require.NoError(t, serializer.Init())
m := metric.New("thing",
map[string]string{
"a": "b",
},
map[string]interface{}{
"v": 1,
},
time.Now(),
)
payload, err := serializer.Serialize(m)
require.NoError(t, err)
// Write less metrics than the batch-size and check if the flush timeout
// triggers..
expected := make([]telegraf.Metric, 0, s.BatchSize-1)
for range cap(expected) {
_, err := stdinWriter.Write(payload)
require.NoError(t, err)
expected = append(expected, m)
}
// Wait for the batch to be flushed
require.Eventually(t, func() bool {
return o.Count.Load() >= uint32(len(expected))
}, 3*time.Second, 100*time.Millisecond)
testutil.RequireMetricsEqual(t, expected, o.MetricsWritten)
// Closing the input should not change anything
require.NoError(t, stdinWriter.Close())
wg.Wait()
testutil.RequireMetricsEqual(t, expected, o.MetricsWritten)
}
type testOutput struct {
MetricsWritten []telegraf.Metric
Count atomic.Uint32
}
func (*testOutput) Connect() error {
@ -73,6 +186,7 @@ func (*testOutput) Close() error {
}
func (o *testOutput) Write(metrics []telegraf.Metric) error {
o.MetricsWritten = append(o.MetricsWritten, metrics...)
o.Count.Store(uint32(len(o.MetricsWritten)))
return nil
}