feat(outputs.influxdb_v2): Add rate limit implementation (#15742)
This commit is contained in:
parent
be2d5efed1
commit
11709858e3
|
|
@ -1,6 +1,7 @@
|
||||||
package ratelimiter
|
package ratelimiter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf/config"
|
"github.com/influxdata/telegraf/config"
|
||||||
|
|
@ -11,9 +12,12 @@ type RateLimitConfig struct {
|
||||||
Period config.Duration `toml:"rate_limit_period"`
|
Period config.Duration `toml:"rate_limit_period"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cfg *RateLimitConfig) CreateRateLimiter() *RateLimiter {
|
func (cfg *RateLimitConfig) CreateRateLimiter() (*RateLimiter, error) {
|
||||||
|
if cfg.Limit > 0 && cfg.Period <= 0 {
|
||||||
|
return nil, errors.New("invalid period for rate-limit")
|
||||||
|
}
|
||||||
return &RateLimiter{
|
return &RateLimiter{
|
||||||
limit: int64(cfg.Limit),
|
limit: int64(cfg.Limit),
|
||||||
period: time.Duration(cfg.Period),
|
period: time.Duration(cfg.Period),
|
||||||
}
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,16 @@ import (
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestInvalidPeriod(t *testing.T) {
|
||||||
|
cfg := &RateLimitConfig{Limit: config.Size(1024)}
|
||||||
|
_, err := cfg.CreateRateLimiter()
|
||||||
|
require.ErrorContains(t, err, "invalid period for rate-limit")
|
||||||
|
}
|
||||||
|
|
||||||
func TestUnlimited(t *testing.T) {
|
func TestUnlimited(t *testing.T) {
|
||||||
cfg := &RateLimitConfig{}
|
cfg := &RateLimitConfig{}
|
||||||
limiter := cfg.CreateRateLimiter()
|
limiter, err := cfg.CreateRateLimiter()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
end := start.Add(30 * time.Minute)
|
end := start.Add(30 * time.Minute)
|
||||||
|
|
@ -24,7 +31,8 @@ func TestUnlimitedWithPeriod(t *testing.T) {
|
||||||
cfg := &RateLimitConfig{
|
cfg := &RateLimitConfig{
|
||||||
Period: config.Duration(5 * time.Minute),
|
Period: config.Duration(5 * time.Minute),
|
||||||
}
|
}
|
||||||
limiter := cfg.CreateRateLimiter()
|
limiter, err := cfg.CreateRateLimiter()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
end := start.Add(30 * time.Minute)
|
end := start.Add(30 * time.Minute)
|
||||||
|
|
@ -67,7 +75,8 @@ func TestLimited(t *testing.T) {
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name+" at period", func(t *testing.T) {
|
t.Run(tt.name+" at period", func(t *testing.T) {
|
||||||
// Setup the limiter
|
// Setup the limiter
|
||||||
limiter := tt.cfg.CreateRateLimiter()
|
limiter, err := tt.cfg.CreateRateLimiter()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Compute the actual values
|
// Compute the actual values
|
||||||
start := time.Now().Truncate(tt.step)
|
start := time.Now().Truncate(tt.step)
|
||||||
|
|
@ -85,7 +94,8 @@ func TestLimited(t *testing.T) {
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
// Setup the limiter
|
// Setup the limiter
|
||||||
limiter := tt.cfg.CreateRateLimiter()
|
limiter, err := tt.cfg.CreateRateLimiter()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Compute the actual values
|
// Compute the actual values
|
||||||
start := time.Now().Truncate(tt.step).Add(1 * time.Second)
|
start := time.Now().Truncate(tt.step).Add(1 * time.Second)
|
||||||
|
|
@ -134,7 +144,8 @@ func TestUndo(t *testing.T) {
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name+" at period", func(t *testing.T) {
|
t.Run(tt.name+" at period", func(t *testing.T) {
|
||||||
// Setup the limiter
|
// Setup the limiter
|
||||||
limiter := tt.cfg.CreateRateLimiter()
|
limiter, err := tt.cfg.CreateRateLimiter()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Compute the actual values
|
// Compute the actual values
|
||||||
start := time.Now().Truncate(tt.step)
|
start := time.Now().Truncate(tt.step)
|
||||||
|
|
@ -156,7 +167,8 @@ func TestUndo(t *testing.T) {
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
// Setup the limiter
|
// Setup the limiter
|
||||||
limiter := tt.cfg.CreateRateLimiter()
|
limiter, err := tt.cfg.CreateRateLimiter()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Compute the actual values
|
// Compute the actual values
|
||||||
start := time.Now().Truncate(tt.step).Add(1 * time.Second)
|
start := time.Now().Truncate(tt.step).Add(1 * time.Second)
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,12 @@ to use them.
|
||||||
# tls_key = "/etc/telegraf/key.pem"
|
# tls_key = "/etc/telegraf/key.pem"
|
||||||
## Use TLS but skip chain & host verification
|
## Use TLS but skip chain & host verification
|
||||||
# insecure_skip_verify = false
|
# insecure_skip_verify = false
|
||||||
|
|
||||||
|
## Rate limits for sending data (disabled by default)
|
||||||
|
## Available, uncompressed payload size e.g. "5Mb"
|
||||||
|
# rate_limit = "unlimited"
|
||||||
|
## Fixed time-window for the available payload size e.g. "5m"
|
||||||
|
# rate_limit_period = "0s"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Metrics
|
## Metrics
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ import (
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/config"
|
"github.com/influxdata/telegraf/config"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
"github.com/influxdata/telegraf/plugins/common/ratelimiter"
|
||||||
)
|
)
|
||||||
|
|
||||||
type APIError struct {
|
type APIError struct {
|
||||||
|
|
@ -59,8 +59,9 @@ type httpClient struct {
|
||||||
pingTimeout config.Duration
|
pingTimeout config.Duration
|
||||||
readIdleTimeout config.Duration
|
readIdleTimeout config.Duration
|
||||||
tlsConfig *tls.Config
|
tlsConfig *tls.Config
|
||||||
serializer *influx.Serializer
|
|
||||||
encoder internal.ContentEncoder
|
encoder internal.ContentEncoder
|
||||||
|
serializer ratelimiter.Serializer
|
||||||
|
rateLimiter *ratelimiter.RateLimiter
|
||||||
client *http.Client
|
client *http.Client
|
||||||
params url.Values
|
params url.Values
|
||||||
retryTime time.Time
|
retryTime time.Time
|
||||||
|
|
@ -160,52 +161,69 @@ func (c *httpClient) Write(ctx context.Context, metrics []telegraf.Metric) error
|
||||||
}
|
}
|
||||||
|
|
||||||
batches := make(map[string][]telegraf.Metric)
|
batches := make(map[string][]telegraf.Metric)
|
||||||
|
batchIndices := make(map[string][]int)
|
||||||
if c.bucketTag == "" {
|
if c.bucketTag == "" {
|
||||||
err := c.writeBatch(ctx, c.bucket, metrics)
|
batches[c.bucket] = metrics
|
||||||
if err != nil {
|
batchIndices[c.bucket] = make([]int, len(metrics))
|
||||||
var apiErr *APIError
|
for i := range metrics {
|
||||||
if errors.As(err, &apiErr) {
|
batchIndices[c.bucket][i] = i
|
||||||
if apiErr.StatusCode == http.StatusRequestEntityTooLarge {
|
|
||||||
return c.splitAndWriteBatch(ctx, c.bucket, metrics)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for _, metric := range metrics {
|
for i, metric := range metrics {
|
||||||
bucket, ok := metric.GetTag(c.bucketTag)
|
bucket, ok := metric.GetTag(c.bucketTag)
|
||||||
if !ok {
|
if !ok {
|
||||||
bucket = c.bucket
|
bucket = c.bucket
|
||||||
}
|
} else if c.excludeBucketTag {
|
||||||
|
// Avoid modifying the metric if we do remove the tag
|
||||||
if _, ok := batches[bucket]; !ok {
|
|
||||||
batches[bucket] = make([]telegraf.Metric, 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
if c.excludeBucketTag {
|
|
||||||
// Avoid modifying the metric in case we need to retry the request.
|
|
||||||
metric = metric.Copy()
|
metric = metric.Copy()
|
||||||
metric.Accept()
|
metric.Accept()
|
||||||
metric.RemoveTag(c.bucketTag)
|
metric.RemoveTag(c.bucketTag)
|
||||||
}
|
}
|
||||||
|
|
||||||
batches[bucket] = append(batches[bucket], metric)
|
batches[bucket] = append(batches[bucket], metric)
|
||||||
|
batchIndices[c.bucket] = append(batchIndices[c.bucket], i)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var wErr internal.PartialWriteError
|
||||||
|
for bucket, batch := range batches {
|
||||||
|
err := c.writeBatch(ctx, bucket, batch)
|
||||||
|
if err == nil {
|
||||||
|
wErr.MetricsAccept = append(wErr.MetricsAccept, batchIndices[bucket]...)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
for bucket, batch := range batches {
|
// Check if the request was too large and split it
|
||||||
err := c.writeBatch(ctx, bucket, batch)
|
var apiErr *APIError
|
||||||
if err != nil {
|
if errors.As(err, &apiErr) {
|
||||||
var apiErr *APIError
|
if apiErr.StatusCode == http.StatusRequestEntityTooLarge {
|
||||||
if errors.As(err, &apiErr) {
|
return c.splitAndWriteBatch(ctx, c.bucket, metrics)
|
||||||
if apiErr.StatusCode == http.StatusRequestEntityTooLarge {
|
|
||||||
return c.splitAndWriteBatch(ctx, c.bucket, metrics)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
wErr.Err = err
|
||||||
|
wErr.MetricsReject = append(wErr.MetricsReject, batchIndices[bucket]...)
|
||||||
|
return &wErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if we got a write error and if so, translate the returned
|
||||||
|
// metric indices to return the original indices in case of bucketing
|
||||||
|
var writeErr *internal.PartialWriteError
|
||||||
|
if errors.As(err, &writeErr) {
|
||||||
|
wErr.Err = writeErr.Err
|
||||||
|
for _, idx := range writeErr.MetricsAccept {
|
||||||
|
wErr.MetricsAccept = append(wErr.MetricsAccept, batchIndices[bucket][idx])
|
||||||
|
}
|
||||||
|
for _, idx := range writeErr.MetricsReject {
|
||||||
|
wErr.MetricsReject = append(wErr.MetricsReject, batchIndices[bucket][idx])
|
||||||
|
}
|
||||||
|
if !errors.Is(writeErr.Err, internal.ErrSizeLimitReached) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return &wErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the error without special treatment
|
||||||
|
wErr.Err = err
|
||||||
|
return &wErr
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
@ -222,11 +240,16 @@ func (c *httpClient) splitAndWriteBatch(ctx context.Context, bucket string, metr
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []telegraf.Metric) error {
|
func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []telegraf.Metric) error {
|
||||||
// Serialize the metrics
|
// Get the current limit for the outbound data
|
||||||
body, err := c.serializer.SerializeBatch(metrics)
|
ratets := time.Now()
|
||||||
if err != nil {
|
limit := c.rateLimiter.Remaining(ratets)
|
||||||
return err
|
|
||||||
|
// Serialize the metrics with the remaining limit, exit early if nothing was serialized
|
||||||
|
body, werr := c.serializer.SerializeBatch(metrics, limit)
|
||||||
|
if werr != nil && !errors.Is(werr, internal.ErrSizeLimitReached) || len(body) == 0 {
|
||||||
|
return werr
|
||||||
}
|
}
|
||||||
|
used := int64(len(body))
|
||||||
|
|
||||||
// Encode the content if requested
|
// Encode the content if requested
|
||||||
if c.encoder != nil {
|
if c.encoder != nil {
|
||||||
|
|
@ -249,6 +272,7 @@ func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []te
|
||||||
c.addHeaders(req)
|
c.addHeaders(req)
|
||||||
|
|
||||||
// Execute the request
|
// Execute the request
|
||||||
|
c.rateLimiter.Accept(ratets, used)
|
||||||
resp, err := c.client.Do(req.WithContext(ctx))
|
resp, err := c.client.Do(req.WithContext(ctx))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
internal.OnClientError(c.client, err)
|
internal.OnClientError(c.client, err)
|
||||||
|
|
@ -269,7 +293,7 @@ func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []te
|
||||||
http.StatusMultiStatus,
|
http.StatusMultiStatus,
|
||||||
http.StatusAlreadyReported:
|
http.StatusAlreadyReported:
|
||||||
c.retryCount = 0
|
c.retryCount = 0
|
||||||
return nil
|
return werr
|
||||||
}
|
}
|
||||||
|
|
||||||
// We got an error and now try to decode further
|
// We got an error and now try to decode further
|
||||||
|
|
@ -294,11 +318,18 @@ func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []te
|
||||||
http.StatusBadRequest,
|
http.StatusBadRequest,
|
||||||
// request was received but server refused to process it due to a semantic problem with the request.
|
// request was received but server refused to process it due to a semantic problem with the request.
|
||||||
// for example, submitting metrics outside the retention period.
|
// for example, submitting metrics outside the retention period.
|
||||||
// Clients should *not* repeat the request and the metrics should be dropped.
|
|
||||||
http.StatusUnprocessableEntity,
|
http.StatusUnprocessableEntity,
|
||||||
http.StatusNotAcceptable:
|
http.StatusNotAcceptable:
|
||||||
c.log.Errorf("Failed to write metric to %s (will be dropped: %s): %s\n", bucket, resp.Status, desc)
|
|
||||||
return nil
|
// Clients should *not* repeat the request and the metrics should be dropped.
|
||||||
|
rejected := make([]int, 0, len(metrics))
|
||||||
|
for i := range len(metrics) {
|
||||||
|
rejected = append(rejected, i)
|
||||||
|
}
|
||||||
|
return &internal.PartialWriteError{
|
||||||
|
Err: fmt.Errorf("failed to write metric to %s (will be dropped: %s): %s", bucket, resp.Status, desc),
|
||||||
|
MetricsReject: rejected,
|
||||||
|
}
|
||||||
case http.StatusUnauthorized, http.StatusForbidden:
|
case http.StatusUnauthorized, http.StatusForbidden:
|
||||||
return fmt.Errorf("failed to write metric to %s (%s): %s", bucket, resp.Status, desc)
|
return fmt.Errorf("failed to write metric to %s (%s): %s", bucket, resp.Status, desc)
|
||||||
case http.StatusTooManyRequests,
|
case http.StatusTooManyRequests,
|
||||||
|
|
@ -316,8 +347,14 @@ func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []te
|
||||||
// if it's any other 4xx code, the client should not retry as it's the client's mistake.
|
// if it's any other 4xx code, the client should not retry as it's the client's mistake.
|
||||||
// retrying will not make the request magically work.
|
// retrying will not make the request magically work.
|
||||||
if len(resp.Status) > 0 && resp.Status[0] == '4' {
|
if len(resp.Status) > 0 && resp.Status[0] == '4' {
|
||||||
c.log.Errorf("Failed to write metric to %s (will be dropped: %s): %s\n", bucket, resp.Status, desc)
|
rejected := make([]int, 0, len(metrics))
|
||||||
return nil
|
for i := range len(metrics) {
|
||||||
|
rejected = append(rejected, i)
|
||||||
|
}
|
||||||
|
return &internal.PartialWriteError{
|
||||||
|
Err: fmt.Errorf("failed to write metric to %s (will be dropped: %s): %s", bucket, resp.Status, desc),
|
||||||
|
MetricsReject: rejected,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is only until platform spec is fully implemented. As of the
|
// This is only until platform spec is fully implemented. As of the
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@ import (
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/config"
|
"github.com/influxdata/telegraf/config"
|
||||||
"github.com/influxdata/telegraf/internal"
|
"github.com/influxdata/telegraf/internal"
|
||||||
|
"github.com/influxdata/telegraf/plugins/common/ratelimiter"
|
||||||
commontls "github.com/influxdata/telegraf/plugins/common/tls"
|
commontls "github.com/influxdata/telegraf/plugins/common/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/outputs"
|
"github.com/influxdata/telegraf/plugins/outputs"
|
||||||
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
"github.com/influxdata/telegraf/plugins/serializers/influx"
|
||||||
|
|
@ -44,10 +45,11 @@ type InfluxDB struct {
|
||||||
ReadIdleTimeout config.Duration `toml:"read_idle_timeout"`
|
ReadIdleTimeout config.Duration `toml:"read_idle_timeout"`
|
||||||
Log telegraf.Logger `toml:"-"`
|
Log telegraf.Logger `toml:"-"`
|
||||||
commontls.ClientConfig
|
commontls.ClientConfig
|
||||||
|
ratelimiter.RateLimitConfig
|
||||||
|
|
||||||
clients []*httpClient
|
clients []*httpClient
|
||||||
encoder internal.ContentEncoder
|
encoder internal.ContentEncoder
|
||||||
serializer *influx.Serializer
|
serializer ratelimiter.Serializer
|
||||||
tlsCfg *tls.Config
|
tlsCfg *tls.Config
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -65,7 +67,7 @@ func (i *InfluxDB) Init() error {
|
||||||
i.URLs = append(i.URLs, "http://localhost:8086")
|
i.URLs = append(i.URLs, "http://localhost:8086")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check options
|
// Init encoding if configured
|
||||||
switch i.ContentEncoding {
|
switch i.ContentEncoding {
|
||||||
case "", "gzip":
|
case "", "gzip":
|
||||||
i.ContentEncoding = "gzip"
|
i.ContentEncoding = "gzip"
|
||||||
|
|
@ -80,13 +82,14 @@ func (i *InfluxDB) Init() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup the limited serializer
|
// Setup the limited serializer
|
||||||
i.serializer = &influx.Serializer{
|
serializer := &influx.Serializer{
|
||||||
UintSupport: i.UintSupport,
|
UintSupport: i.UintSupport,
|
||||||
OmitTimestamp: i.OmitTimestamp,
|
OmitTimestamp: i.OmitTimestamp,
|
||||||
}
|
}
|
||||||
if err := i.serializer.Init(); err != nil {
|
if err := serializer.Init(); err != nil {
|
||||||
return fmt.Errorf("setting up serializer failed: %w", err)
|
return fmt.Errorf("setting up serializer failed: %w", err)
|
||||||
}
|
}
|
||||||
|
i.serializer = ratelimiter.NewIndividualSerializer(serializer)
|
||||||
|
|
||||||
// Setup the client config
|
// Setup the client config
|
||||||
tlsCfg, err := i.ClientConfig.TLSConfig()
|
tlsCfg, err := i.ClientConfig.TLSConfig()
|
||||||
|
|
@ -142,6 +145,10 @@ func (i *InfluxDB) Connect() error {
|
||||||
|
|
||||||
switch parts.Scheme {
|
switch parts.Scheme {
|
||||||
case "http", "https", "unix":
|
case "http", "https", "unix":
|
||||||
|
limiter, err := i.RateLimitConfig.CreateRateLimiter()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
c := &httpClient{
|
c := &httpClient{
|
||||||
url: parts,
|
url: parts,
|
||||||
localAddr: localAddr,
|
localAddr: localAddr,
|
||||||
|
|
@ -158,8 +165,9 @@ func (i *InfluxDB) Connect() error {
|
||||||
tlsConfig: i.tlsCfg,
|
tlsConfig: i.tlsCfg,
|
||||||
pingTimeout: i.PingTimeout,
|
pingTimeout: i.PingTimeout,
|
||||||
readIdleTimeout: i.ReadIdleTimeout,
|
readIdleTimeout: i.ReadIdleTimeout,
|
||||||
serializer: i.serializer,
|
|
||||||
encoder: i.encoder,
|
encoder: i.encoder,
|
||||||
|
rateLimiter: limiter,
|
||||||
|
serializer: i.serializer,
|
||||||
log: i.Log,
|
log: i.Log,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -191,6 +199,10 @@ func (i *InfluxDB) Write(metrics []telegraf.Metric) error {
|
||||||
for _, n := range rand.Perm(len(i.clients)) {
|
for _, n := range rand.Perm(len(i.clients)) {
|
||||||
client := i.clients[n]
|
client := i.clients[n]
|
||||||
if err := client.Write(ctx, metrics); err != nil {
|
if err := client.Write(ctx, metrics); err != nil {
|
||||||
|
var werr *internal.PartialWriteError
|
||||||
|
if errors.As(err, &werr) || errors.Is(err, internal.ErrSizeLimitReached) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
i.Log.Errorf("When writing to [%s]: %v", client.url, err)
|
i.Log.Errorf("When writing to [%s]: %v", client.url, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -14,7 +15,9 @@ import (
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
"github.com/influxdata/telegraf/config"
|
"github.com/influxdata/telegraf/config"
|
||||||
|
"github.com/influxdata/telegraf/internal"
|
||||||
"github.com/influxdata/telegraf/metric"
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/influxdata/telegraf/plugins/common/ratelimiter"
|
||||||
"github.com/influxdata/telegraf/plugins/common/tls"
|
"github.com/influxdata/telegraf/plugins/common/tls"
|
||||||
"github.com/influxdata/telegraf/plugins/outputs"
|
"github.com/influxdata/telegraf/plugins/outputs"
|
||||||
influxdb "github.com/influxdata/telegraf/plugins/outputs/influxdb_v2"
|
influxdb "github.com/influxdata/telegraf/plugins/outputs/influxdb_v2"
|
||||||
|
|
@ -373,3 +376,113 @@ func TestTooLargeWriteRetry(t *testing.T) {
|
||||||
}
|
}
|
||||||
require.Error(t, plugin.Write(hugeMetrics))
|
require.Error(t, plugin.Write(hugeMetrics))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestRateLimit(t *testing.T) {
|
||||||
|
// Setup a test server
|
||||||
|
var received atomic.Uint64
|
||||||
|
ts := httptest.NewServer(
|
||||||
|
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/api/v2/write":
|
||||||
|
if err := r.ParseForm(); err != nil {
|
||||||
|
w.WriteHeader(http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := io.ReadAll(r.Body)
|
||||||
|
if err != nil {
|
||||||
|
w.WriteHeader(http.StatusUnprocessableEntity)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
received.Add(uint64(len(body)))
|
||||||
|
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
// Setup plugin and connect
|
||||||
|
plugin := &influxdb.InfluxDB{
|
||||||
|
URLs: []string{"http://" + ts.Listener.Addr().String()},
|
||||||
|
Bucket: "telegraf",
|
||||||
|
ContentEncoding: "identity",
|
||||||
|
RateLimitConfig: ratelimiter.RateLimitConfig{
|
||||||
|
Limit: 50,
|
||||||
|
Period: config.Duration(time.Second),
|
||||||
|
},
|
||||||
|
Log: &testutil.Logger{},
|
||||||
|
}
|
||||||
|
require.NoError(t, plugin.Init())
|
||||||
|
require.NoError(t, plugin.Connect())
|
||||||
|
defer plugin.Close()
|
||||||
|
|
||||||
|
// Together the metric batch size is too big, split up, we get success
|
||||||
|
metrics := []telegraf.Metric{
|
||||||
|
metric.New(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42.0,
|
||||||
|
},
|
||||||
|
time.Unix(0, 1),
|
||||||
|
),
|
||||||
|
metric.New(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 99.0,
|
||||||
|
},
|
||||||
|
time.Unix(0, 2),
|
||||||
|
),
|
||||||
|
metric.New(
|
||||||
|
"operating_hours",
|
||||||
|
map[string]string{
|
||||||
|
"machine": "A",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 123.456,
|
||||||
|
},
|
||||||
|
time.Unix(0, 3),
|
||||||
|
),
|
||||||
|
metric.New(
|
||||||
|
"status",
|
||||||
|
map[string]string{
|
||||||
|
"machine": "B",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"temp": 48.235,
|
||||||
|
"remaining": 999.999,
|
||||||
|
},
|
||||||
|
time.Unix(0, 4),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write the metrics the first time. Only the first two metrics should be
|
||||||
|
// received by the server due to the rate limit.
|
||||||
|
require.ErrorIs(t, plugin.Write(metrics), internal.ErrSizeLimitReached)
|
||||||
|
require.LessOrEqual(t, received.Load(), uint64(30))
|
||||||
|
|
||||||
|
// A direct follow-up write attempt with the remaining metrics should fail
|
||||||
|
// due to the rate limit being reached
|
||||||
|
require.ErrorIs(t, plugin.Write(metrics[2:]), internal.ErrSizeLimitReached)
|
||||||
|
require.LessOrEqual(t, received.Load(), uint64(30))
|
||||||
|
|
||||||
|
// Wait for at least the period (plus some safety margin) to write the third metric
|
||||||
|
time.Sleep(time.Duration(plugin.RateLimitConfig.Period) + 100*time.Millisecond)
|
||||||
|
require.ErrorIs(t, plugin.Write(metrics[2:]), internal.ErrSizeLimitReached)
|
||||||
|
require.Greater(t, received.Load(), uint64(30))
|
||||||
|
require.LessOrEqual(t, received.Load(), uint64(72))
|
||||||
|
|
||||||
|
// Wait again for the period for at least the period (plus some safety margin)
|
||||||
|
// to write the last metric. This should finally succeed as all metrics
|
||||||
|
// are written.
|
||||||
|
time.Sleep(time.Duration(plugin.RateLimitConfig.Period) + 100*time.Millisecond)
|
||||||
|
require.NoError(t, plugin.Write(metrics[3:]))
|
||||||
|
require.Equal(t, uint64(121), received.Load())
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -71,3 +71,9 @@
|
||||||
# tls_key = "/etc/telegraf/key.pem"
|
# tls_key = "/etc/telegraf/key.pem"
|
||||||
## Use TLS but skip chain & host verification
|
## Use TLS but skip chain & host verification
|
||||||
# insecure_skip_verify = false
|
# insecure_skip_verify = false
|
||||||
|
|
||||||
|
## Rate limits for sending data (disabled by default)
|
||||||
|
## Available, uncompressed payload size e.g. "5Mb"
|
||||||
|
# rate_limit = "unlimited"
|
||||||
|
## Fixed time-window for the available payload size e.g. "5m"
|
||||||
|
# rate_limit_period = "0s"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue