feat(common.shim): Add batch to shim (#16148)

Co-authored-by: Thomas Casteleyn <thomas.casteleyn@me.com> Co-authored-by: Sven Rebhan <srebhan@influxdata.com>
2025-04-23 16:47:39 +03:00 · 2025-04-23 16:47:39 +03:00 · b715237606
parent bcea4c278e
commit b715237606
3 changed files with 209 additions and 21 deletions
--- a/plugins/common/shim/goshim.go
+++ b/plugins/common/shim/goshim.go
@ -39,6 +39,9 @@ type Shim struct {
 	Processor telegraf.StreamingProcessor
 	Output    telegraf.Output
 	BatchSize    int
 	BatchTimeout time.Duration
 	log telegraf.Logger
 	// streams
@ -56,11 +59,13 @@ type Shim struct {
 // New creates a new shim interface
 func New() *Shim {
 	return &Shim{
-		metricCh: make(chan telegraf.Metric, 1),
+		BatchSize:    1,
-		stdin:    os.Stdin,
+		BatchTimeout: 10 * time.Second,
-		stdout:   os.Stdout,
+		metricCh:     make(chan telegraf.Metric, 1),
-		stderr:   os.Stderr,
+		stdin:        os.Stdin,
-		log:      logger.New("", "", ""),
+		stdout:       os.Stdout,
 		stderr:       os.Stderr,
 		log:          logger.New("", "", ""),
 	}
 }
--- a/plugins/common/shim/output.go
+++ b/plugins/common/shim/output.go
@ -3,6 +3,9 @@ package shim
 import (
 	"bufio"
 	"fmt"
 	"os"
 	"sync"
 	"time"
 	"github.com/influxdata/telegraf"
 	"github.com/influxdata/telegraf/models"
@ -24,31 +27,97 @@ func (s *Shim) AddOutput(output telegraf.Output) error {
 }
 func (s *Shim) RunOutput() error {
 	// Create a parser for receiving the metrics in line-protocol format
 	parser := influx.Parser{}
-	err := parser.Init()
+	if err := parser.Init(); err != nil {
 	if err != nil {
 		return fmt.Errorf("failed to create new parser: %w", err)
 	}
-	err = s.Output.Connect()
+	// Connect the output
-	if err != nil {
+	if err := s.Output.Connect(); err != nil {
 		return fmt.Errorf("failed to start processor: %w", err)
 	}
 	defer s.Output.Close()
-	var m telegraf.Metric
+	// Collect the metrics from stdin. Note, we need to flush the metrics
 	// when the batch is full or after the configured time, whatever comes
 	// first. We need to lock the batch as we run into race conditions
 	// otherwise.
 	var mu sync.Mutex
 	metrics := make([]telegraf.Metric, 0, s.BatchSize)
 	// Prepare the flush timer...
 	flush := func(whole bool) {
 		mu.Lock()
 		defer mu.Unlock()
 		// Exit early if there is nothing to do
 		if len(metrics) == 0 {
 			return
 		}
 		// Determine the threshold on when to stop flushing depending on the
 		// given flag.
 		var threshold int
 		if whole {
 			threshold = s.BatchSize
 		}
 		// Flush out the metrics in batches of the configured size until we
 		// got all of them out or if there is less than a whole batch left.
 		for len(metrics) > 0 && len(metrics) >= threshold {
 			// Write the metrics and remove the batch
 			batch := metrics[:min(len(metrics), s.BatchSize)]
 			if err := s.Output.Write(batch); err != nil {
 				fmt.Fprintf(os.Stderr, "Failed to write metrics: %s\n", err)
 			}
 			metrics = metrics[len(batch):]
 		}
 	}
 	// Setup the time-based flush
 	var timer *time.Timer
 	if s.BatchTimeout > 0 {
 		timer = time.AfterFunc(s.BatchTimeout, func() { flush(false) })
 		defer func() {
 			if timer != nil {
 				timer.Stop()
 			}
 		}()
 	}
 	// Start the processing loop
 	scanner := bufio.NewScanner(s.stdin)
 	for scanner.Scan() {
-		m, err = parser.ParseLine(scanner.Text())
+		// Read metrics from stdin
 		m, err := parser.ParseLine(scanner.Text())
 		if err != nil {
 			fmt.Fprintf(s.stderr, "Failed to parse metric: %s\n", err)
 			continue
 		}
-		if err = s.Output.Write([]telegraf.Metric{m}); err != nil {
+		mu.Lock()
-			fmt.Fprintf(s.stderr, "Failed to write metric: %s\n", err)
+		metrics = append(metrics, m)
 		shouldFlush := len(metrics) >= s.BatchSize
 		mu.Unlock()
 		// If we got more enough metrics to fill the batch flush it out and
 		// reset the time-based guard.
 		if shouldFlush {
 			if timer != nil {
 				timer.Stop()
 			}
 			flush(true)
 			if s.BatchTimeout > 0 {
 				timer = time.AfterFunc(s.BatchTimeout, func() { flush(false) })
 			}
 		}
 	}
 	// Output all remaining metrics
 	if timer != nil {
 		timer.Stop()
 	}
 	flush(false)
 	return nil
 }
--- a/plugins/common/shim/output_test.go
+++ b/plugins/common/shim/output_test.go
@ -3,6 +3,7 @@ package shim
 import (
 	"io"
 	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
@ -21,11 +22,9 @@ func TestOutputShim(t *testing.T) {
 	s := New()
 	s.stdin = stdinReader
-	err := s.AddOutput(o)
+	require.NoError(t, s.AddOutput(o))
 	require.NoError(t, err)
 	wg := sync.WaitGroup{}
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
 		if err := s.RunOutput(); err != nil {
@ -50,19 +49,133 @@ func TestOutputShim(t *testing.T) {
 	require.NoError(t, err)
 	_, err = stdinWriter.Write(b)
 	require.NoError(t, err)
-	err = stdinWriter.Close()
+	require.NoError(t, stdinWriter.Close())
 	require.NoError(t, err)
 	wg.Wait()
 	require.Len(t, o.MetricsWritten, 1)
-	mOut := o.MetricsWritten[0]
+	testutil.RequireMetricEqual(t, m, o.MetricsWritten[0])
 }
-	testutil.RequireMetricEqual(t, m, mOut)
+func TestOutputShimWithBatchSize(t *testing.T) {
 	o := &testOutput{}
 	stdinReader, stdinWriter := io.Pipe()
 	// Setup a shim with a batch size but no timeout
 	s := New()
 	s.stdin = stdinReader
 	s.BatchSize = 5
 	s.BatchTimeout = 0
 	require.NoError(t, s.AddOutput(o))
 	// Start the output processing
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
 		if err := s.RunOutput(); err != nil {
 			t.Error(err)
 		}
 		wg.Done()
 	}()
 	// Serialize the test metric
 	serializer := &influx.Serializer{}
 	require.NoError(t, serializer.Init())
 	m := metric.New("thing",
 		map[string]string{
 			"a": "b",
 		},
 		map[string]interface{}{
 			"v": 1,
 		},
 		time.Now(),
 	)
 	payload, err := serializer.Serialize(m)
 	require.NoError(t, err)
 	// Write a few more metrics than the batch-size and check that we only get
 	// a full batch before closing the input stream.
 	expected := make([]telegraf.Metric, 0, s.BatchSize+3)
 	for range cap(expected) {
 		_, err := stdinWriter.Write(payload)
 		require.NoError(t, err)
 		expected = append(expected, m)
 	}
 	// Wait for the metrics to arrive
 	require.Eventually(t, func() bool {
 		return o.Count.Load() >= uint32(s.BatchSize)
 	}, 3*time.Second, 100*time.Millisecond)
 	testutil.RequireMetricsEqual(t, expected[:s.BatchSize], o.MetricsWritten)
 	// Closing the input should force the remaining metrics to be written
 	require.NoError(t, stdinWriter.Close())
 	wg.Wait()
 	testutil.RequireMetricsEqual(t, expected, o.MetricsWritten)
 }
 func TestOutputShimWithFlushTimeout(t *testing.T) {
 	o := &testOutput{}
 	stdinReader, stdinWriter := io.Pipe()
 	// Setup a shim with a batch size and a short timeout
 	s := New()
 	s.stdin = stdinReader
 	s.BatchSize = 5
 	s.BatchTimeout = 500 * time.Millisecond
 	require.NoError(t, s.AddOutput(o))
 	// Start the output processing
 	var wg sync.WaitGroup
 	wg.Add(1)
 	go func() {
 		if err := s.RunOutput(); err != nil {
 			t.Error(err)
 		}
 		wg.Done()
 	}()
 	// Serialize the test metric
 	serializer := &influx.Serializer{}
 	require.NoError(t, serializer.Init())
 	m := metric.New("thing",
 		map[string]string{
 			"a": "b",
 		},
 		map[string]interface{}{
 			"v": 1,
 		},
 		time.Now(),
 	)
 	payload, err := serializer.Serialize(m)
 	require.NoError(t, err)
 	// Write less metrics than the batch-size and check if the flush timeout
 	// triggers..
 	expected := make([]telegraf.Metric, 0, s.BatchSize-1)
 	for range cap(expected) {
 		_, err := stdinWriter.Write(payload)
 		require.NoError(t, err)
 		expected = append(expected, m)
 	}
 	// Wait for the batch to be flushed
 	require.Eventually(t, func() bool {
 		return o.Count.Load() >= uint32(len(expected))
 	}, 3*time.Second, 100*time.Millisecond)
 	testutil.RequireMetricsEqual(t, expected, o.MetricsWritten)
 	// Closing the input should not change anything
 	require.NoError(t, stdinWriter.Close())
 	wg.Wait()
 	testutil.RequireMetricsEqual(t, expected, o.MetricsWritten)
 }
 type testOutput struct {
 	MetricsWritten []telegraf.Metric
 	Count          atomic.Uint32
 }
 func (*testOutput) Connect() error {
@ -73,6 +186,7 @@ func (*testOutput) Close() error {
 }
 func (o *testOutput) Write(metrics []telegraf.Metric) error {
 	o.MetricsWritten = append(o.MetricsWritten, metrics...)
 	o.Count.Store(uint32(len(o.MetricsWritten)))
 	return nil
 }