feat: add retry to 413 errors with InfluxDB output (#10130)
This commit is contained in:
parent
79e479c691
commit
de6c2f74d6
|
|
@ -177,6 +177,12 @@ func (c *httpClient) Write(ctx context.Context, metrics []telegraf.Metric) error
|
||||||
if c.BucketTag == "" {
|
if c.BucketTag == "" {
|
||||||
err := c.writeBatch(ctx, c.Bucket, metrics)
|
err := c.writeBatch(ctx, c.Bucket, metrics)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if err, ok := err.(*APIError); ok {
|
||||||
|
if err.StatusCode == http.StatusRequestEntityTooLarge {
|
||||||
|
return c.splitAndWriteBatch(ctx, c.Bucket, metrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -203,6 +209,12 @@ func (c *httpClient) Write(ctx context.Context, metrics []telegraf.Metric) error
|
||||||
for bucket, batch := range batches {
|
for bucket, batch := range batches {
|
||||||
err := c.writeBatch(ctx, bucket, batch)
|
err := c.writeBatch(ctx, bucket, batch)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if err, ok := err.(*APIError); ok {
|
||||||
|
if err.StatusCode == http.StatusRequestEntityTooLarge {
|
||||||
|
return c.splitAndWriteBatch(ctx, c.Bucket, metrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -210,6 +222,17 @@ func (c *httpClient) Write(ctx context.Context, metrics []telegraf.Metric) error
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *httpClient) splitAndWriteBatch(ctx context.Context, bucket string, metrics []telegraf.Metric) error {
|
||||||
|
log.Printf("W! [outputs.influxdb_v2] Retrying write after splitting metric payload in half to reduce batch size")
|
||||||
|
midpoint := len(metrics) / 2
|
||||||
|
|
||||||
|
if err := c.writeBatch(ctx, bucket, metrics[:midpoint]); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return c.writeBatch(ctx, bucket, metrics[midpoint:])
|
||||||
|
}
|
||||||
|
|
||||||
func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []telegraf.Metric) error {
|
func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []telegraf.Metric) error {
|
||||||
loc, err := makeWriteURL(*c.url, c.Organization, bucket)
|
loc, err := makeWriteURL(*c.url, c.Organization, bucket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -257,11 +280,17 @@ func (c *httpClient) writeBatch(ctx context.Context, bucket string, metrics []te
|
||||||
}
|
}
|
||||||
|
|
||||||
switch resp.StatusCode {
|
switch resp.StatusCode {
|
||||||
|
// request was too large, send back to try again
|
||||||
|
case http.StatusRequestEntityTooLarge:
|
||||||
|
log.Printf("E! [outputs.influxdb_v2] Failed to write metric, request was too large (413)")
|
||||||
|
return &APIError{
|
||||||
|
StatusCode: resp.StatusCode,
|
||||||
|
Title: resp.Status,
|
||||||
|
Description: desc,
|
||||||
|
}
|
||||||
case
|
case
|
||||||
// request was malformed:
|
// request was malformed:
|
||||||
http.StatusBadRequest,
|
http.StatusBadRequest,
|
||||||
// request was too large:
|
|
||||||
http.StatusRequestEntityTooLarge,
|
|
||||||
// request was received but server refused to process it due to a semantic problem with the request.
|
// request was received but server refused to process it due to a semantic problem with the request.
|
||||||
// for example, submitting metrics outside the retention period.
|
// for example, submitting metrics outside the retention period.
|
||||||
// Clients should *not* repeat the request and the metrics should be dropped.
|
// Clients should *not* repeat the request and the metrics should be dropped.
|
||||||
|
|
|
||||||
|
|
@ -111,3 +111,101 @@ func TestWriteBucketTagWorksOnRetry(t *testing.T) {
|
||||||
err = client.Write(ctx, metrics)
|
err = client.Write(ctx, metrics)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestTooLargeWriteRetry(t *testing.T) {
|
||||||
|
ts := httptest.NewServer(
|
||||||
|
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
switch r.URL.Path {
|
||||||
|
case "/api/v2/write":
|
||||||
|
err := r.ParseForm()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
body, err := io.ReadAll(r.Body)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Ensure metric body size is small
|
||||||
|
if len(body) > 16 {
|
||||||
|
w.WriteHeader(http.StatusRequestEntityTooLarge)
|
||||||
|
} else {
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
defer ts.Close()
|
||||||
|
|
||||||
|
addr := &url.URL{
|
||||||
|
Scheme: "http",
|
||||||
|
Host: ts.Listener.Addr().String(),
|
||||||
|
}
|
||||||
|
|
||||||
|
config := &influxdb.HTTPConfig{
|
||||||
|
URL: addr,
|
||||||
|
Bucket: "telegraf",
|
||||||
|
BucketTag: "bucket",
|
||||||
|
ExcludeBucketTag: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
client, err := influxdb.NewHTTPClient(config)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Together the metric batch size is too big, split up, we get success
|
||||||
|
metrics := []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{
|
||||||
|
"bucket": "foo",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 42.0,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
testutil.MustMetric(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{
|
||||||
|
"bucket": "bar",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 99.0,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
err = client.Write(ctx, metrics)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// These metrics are too big, even after splitting in half, expect error
|
||||||
|
hugeMetrics := []telegraf.Metric{
|
||||||
|
testutil.MustMetric(
|
||||||
|
"reallyLargeMetric",
|
||||||
|
map[string]string{
|
||||||
|
"bucket": "foobar",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 123.456,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
testutil.MustMetric(
|
||||||
|
"evenBiggerMetric",
|
||||||
|
map[string]string{
|
||||||
|
"bucket": "fizzbuzzbang",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 999.999,
|
||||||
|
},
|
||||||
|
time.Unix(0, 0),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
err = client.Write(ctx, hugeMetrics)
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue