feat(agent): Add metric disk buffer (#15564)
This commit is contained in:
parent
a3a8a8c465
commit
7245ea96bd
|
|
@ -57,6 +57,14 @@ type Accumulator interface {
|
||||||
// TrackingID uniquely identifies a tracked metric group
|
// TrackingID uniquely identifies a tracked metric group
|
||||||
type TrackingID uint64
|
type TrackingID uint64
|
||||||
|
|
||||||
|
type TrackingData interface {
|
||||||
|
// ID is the TrackingID
|
||||||
|
ID() TrackingID
|
||||||
|
|
||||||
|
// RefCount is the number of tracking metrics still persistent and referencing this tracking ID
|
||||||
|
RefCount() int32
|
||||||
|
}
|
||||||
|
|
||||||
// DeliveryInfo provides the results of a delivered metric group.
|
// DeliveryInfo provides the results of a delivered metric group.
|
||||||
type DeliveryInfo interface {
|
type DeliveryInfo interface {
|
||||||
// ID is the TrackingID
|
// ID is the TrackingID
|
||||||
|
|
|
||||||
|
|
@ -278,6 +278,9 @@ type AgentConfig struct {
|
||||||
// Number of attempts to obtain a remote configuration via a URL during
|
// Number of attempts to obtain a remote configuration via a URL during
|
||||||
// startup. Set to -1 for unlimited attempts.
|
// startup. Set to -1 for unlimited attempts.
|
||||||
ConfigURLRetryAttempts int `toml:"config_url_retry_attempts"`
|
ConfigURLRetryAttempts int `toml:"config_url_retry_attempts"`
|
||||||
|
|
||||||
|
BufferStrategy string `toml:"buffer_strategy"`
|
||||||
|
BufferDirectory string `toml:"buffer_directory"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// InputNames returns a list of strings of the configured inputs.
|
// InputNames returns a list of strings of the configured inputs.
|
||||||
|
|
@ -1521,6 +1524,8 @@ func (c *Config) buildOutput(name string, tbl *ast.Table) (*models.OutputConfig,
|
||||||
c.getFieldString(tbl, "name_suffix", &oc.NameSuffix)
|
c.getFieldString(tbl, "name_suffix", &oc.NameSuffix)
|
||||||
c.getFieldString(tbl, "name_prefix", &oc.NamePrefix)
|
c.getFieldString(tbl, "name_prefix", &oc.NamePrefix)
|
||||||
c.getFieldString(tbl, "startup_error_behavior", &oc.StartupErrorBehavior)
|
c.getFieldString(tbl, "startup_error_behavior", &oc.StartupErrorBehavior)
|
||||||
|
c.getFieldString(tbl, "buffer_strategy", &oc.BufferStrategy)
|
||||||
|
c.getFieldString(tbl, "buffer_directory", &oc.BufferDirectory)
|
||||||
|
|
||||||
if c.hasErrs() {
|
if c.hasErrs() {
|
||||||
return nil, c.firstErr()
|
return nil, c.firstErr()
|
||||||
|
|
|
||||||
|
|
@ -361,6 +361,8 @@ following works:
|
||||||
- github.com/tidwall/gjson [MIT License](https://github.com/tidwall/gjson/blob/master/LICENSE)
|
- github.com/tidwall/gjson [MIT License](https://github.com/tidwall/gjson/blob/master/LICENSE)
|
||||||
- github.com/tidwall/match [MIT License](https://github.com/tidwall/match/blob/master/LICENSE)
|
- github.com/tidwall/match [MIT License](https://github.com/tidwall/match/blob/master/LICENSE)
|
||||||
- github.com/tidwall/pretty [MIT License](https://github.com/tidwall/pretty/blob/master/LICENSE)
|
- github.com/tidwall/pretty [MIT License](https://github.com/tidwall/pretty/blob/master/LICENSE)
|
||||||
|
- github.com/tidwall/tinylru [MIT License](https://github.com/tidwall/tinylru/blob/master/LICENSE)
|
||||||
|
- github.com/tidwall/wal [MIT License](https://github.com/tidwall/wal/blob/master/LICENSE)
|
||||||
- github.com/tinylib/msgp [MIT License](https://github.com/tinylib/msgp/blob/master/LICENSE)
|
- github.com/tinylib/msgp [MIT License](https://github.com/tinylib/msgp/blob/master/LICENSE)
|
||||||
- github.com/tklauser/go-sysconf [BSD 3-Clause "New" or "Revised" License](https://github.com/tklauser/go-sysconf/blob/master/LICENSE)
|
- github.com/tklauser/go-sysconf [BSD 3-Clause "New" or "Revised" License](https://github.com/tklauser/go-sysconf/blob/master/LICENSE)
|
||||||
- github.com/tklauser/numcpus [Apache License 2.0](https://github.com/tklauser/numcpus/blob/master/LICENSE)
|
- github.com/tklauser/numcpus [Apache License 2.0](https://github.com/tklauser/numcpus/blob/master/LICENSE)
|
||||||
|
|
|
||||||
2
go.mod
2
go.mod
|
|
@ -187,6 +187,7 @@ require (
|
||||||
github.com/testcontainers/testcontainers-go/modules/kafka v0.31.0
|
github.com/testcontainers/testcontainers-go/modules/kafka v0.31.0
|
||||||
github.com/thomasklein94/packer-plugin-libvirt v0.5.0
|
github.com/thomasklein94/packer-plugin-libvirt v0.5.0
|
||||||
github.com/tidwall/gjson v1.17.0
|
github.com/tidwall/gjson v1.17.0
|
||||||
|
github.com/tidwall/wal v1.1.7
|
||||||
github.com/tinylib/msgp v1.2.0
|
github.com/tinylib/msgp v1.2.0
|
||||||
github.com/urfave/cli/v2 v2.27.2
|
github.com/urfave/cli/v2 v2.27.2
|
||||||
github.com/vapourismo/knx-go v0.0.0-20240217175130-922a0d50c241
|
github.com/vapourismo/knx-go v0.0.0-20240217175130-922a0d50c241
|
||||||
|
|
@ -461,6 +462,7 @@ require (
|
||||||
github.com/stretchr/objx v0.5.2 // indirect
|
github.com/stretchr/objx v0.5.2 // indirect
|
||||||
github.com/tidwall/match v1.1.1 // indirect
|
github.com/tidwall/match v1.1.1 // indirect
|
||||||
github.com/tidwall/pretty v1.2.0 // indirect
|
github.com/tidwall/pretty v1.2.0 // indirect
|
||||||
|
github.com/tidwall/tinylru v1.1.0 // indirect
|
||||||
github.com/tklauser/go-sysconf v0.3.13 // indirect
|
github.com/tklauser/go-sysconf v0.3.13 // indirect
|
||||||
github.com/tklauser/numcpus v0.7.0 // indirect
|
github.com/tklauser/numcpus v0.7.0 // indirect
|
||||||
github.com/twmb/murmur3 v1.1.7 // indirect
|
github.com/twmb/murmur3 v1.1.7 // indirect
|
||||||
|
|
|
||||||
5
go.sum
5
go.sum
|
|
@ -2336,12 +2336,17 @@ github.com/testcontainers/testcontainers-go/modules/kafka v0.31.0 h1:8B1u+sDwYhT
|
||||||
github.com/testcontainers/testcontainers-go/modules/kafka v0.31.0/go.mod h1:W1+yLUfUl8VLTzvmApP2FBHgCk8I5SKKjDWjxWEc33U=
|
github.com/testcontainers/testcontainers-go/modules/kafka v0.31.0/go.mod h1:W1+yLUfUl8VLTzvmApP2FBHgCk8I5SKKjDWjxWEc33U=
|
||||||
github.com/thomasklein94/packer-plugin-libvirt v0.5.0 h1:aj2HLHZZM/ClGLIwVp9rrgh+2TOU/w4EiaZHAwCpOgs=
|
github.com/thomasklein94/packer-plugin-libvirt v0.5.0 h1:aj2HLHZZM/ClGLIwVp9rrgh+2TOU/w4EiaZHAwCpOgs=
|
||||||
github.com/thomasklein94/packer-plugin-libvirt v0.5.0/go.mod h1:GwN82FQ6KxCNKtS8LNUgLbwTZs90GGhBzCmTNkrTCrY=
|
github.com/thomasklein94/packer-plugin-libvirt v0.5.0/go.mod h1:GwN82FQ6KxCNKtS8LNUgLbwTZs90GGhBzCmTNkrTCrY=
|
||||||
|
github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||||
github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
|
github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
|
||||||
github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||||
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
||||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||||
|
github.com/tidwall/tinylru v1.1.0 h1:XY6IUfzVTU9rpwdhKUF6nQdChgCdGjkMfLzbWyiau6I=
|
||||||
|
github.com/tidwall/tinylru v1.1.0/go.mod h1:3+bX+TJ2baOLMWTnlyNWHh4QMnFyARg2TLTQ6OFbzw8=
|
||||||
|
github.com/tidwall/wal v1.1.7 h1:emc1TRjIVsdKKSnpwGBAcsAGg0767SvUk8+ygx7Bb+4=
|
||||||
|
github.com/tidwall/wal v1.1.7/go.mod h1:r6lR1j27W9EPalgHiB7zLJDYu3mzW5BQP5KrzBpYY/E=
|
||||||
github.com/tinylib/msgp v1.2.0 h1:0uKB/662twsVBpYUPbokj4sTSKhWFKB7LopO2kWK8lY=
|
github.com/tinylib/msgp v1.2.0 h1:0uKB/662twsVBpYUPbokj4sTSKhWFKB7LopO2kWK8lY=
|
||||||
github.com/tinylib/msgp v1.2.0/go.mod h1:2vIGs3lcUo8izAATNobrCHevYZC/LMsJtw4JPiYPHro=
|
github.com/tinylib/msgp v1.2.0/go.mod h1:2vIGs3lcUo8izAATNobrCHevYZC/LMsJtw4JPiYPHro=
|
||||||
github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0=
|
github.com/tj/assert v0.0.0-20171129193455-018094318fb0/go.mod h1:mZ9/Rh9oLWpLLDRpvE+3b7gP/C2YyLFYxNmcLnPTMe0=
|
||||||
|
|
|
||||||
|
|
@ -149,5 +149,6 @@ type UnwrappableMetric interface {
|
||||||
type TrackingMetric interface {
|
type TrackingMetric interface {
|
||||||
// TrackingID returns the ID used for tracking the metric
|
// TrackingID returns the ID used for tracking the metric
|
||||||
TrackingID() TrackingID
|
TrackingID() TrackingID
|
||||||
|
TrackingData() TrackingData
|
||||||
UnwrappableMetric
|
UnwrappableMetric
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,85 @@
|
||||||
|
package metric
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/gob"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
)
|
||||||
|
|
||||||
|
// storage for tracking data that can't be serialized to disk
|
||||||
|
var (
|
||||||
|
// grouped tracking metrics means that ID->Data association is not one to one,
|
||||||
|
// many metrics could be associated with one tracking ID so we cannot just
|
||||||
|
// clear this every time in FromBytes.
|
||||||
|
trackingStore = make(map[telegraf.TrackingID]telegraf.TrackingData)
|
||||||
|
mu = sync.Mutex{}
|
||||||
|
|
||||||
|
// ErrSkipTracking indicates that tracking information could not be found after
|
||||||
|
// deserializing a metric from bytes. In this case we should skip the metric
|
||||||
|
// and continue as if it does not exist.
|
||||||
|
ErrSkipTracking = errors.New("metric tracking data not found")
|
||||||
|
)
|
||||||
|
|
||||||
|
type serializedMetric struct {
|
||||||
|
M telegraf.Metric
|
||||||
|
TID telegraf.TrackingID
|
||||||
|
}
|
||||||
|
|
||||||
|
func ToBytes(m telegraf.Metric) ([]byte, error) {
|
||||||
|
var sm serializedMetric
|
||||||
|
if um, ok := m.(telegraf.UnwrappableMetric); ok {
|
||||||
|
sm.M = um.Unwrap()
|
||||||
|
} else {
|
||||||
|
sm.M = m
|
||||||
|
}
|
||||||
|
|
||||||
|
if tm, ok := m.(telegraf.TrackingMetric); ok {
|
||||||
|
sm.TID = tm.TrackingID()
|
||||||
|
|
||||||
|
mu.Lock()
|
||||||
|
trackingStore[sm.TID] = tm.TrackingData()
|
||||||
|
mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
encoder := gob.NewEncoder(&buf)
|
||||||
|
if err := encoder.Encode(&sm); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to encode metric to bytes: %w", err)
|
||||||
|
}
|
||||||
|
return buf.Bytes(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func FromBytes(b []byte) (telegraf.Metric, error) {
|
||||||
|
buf := bytes.NewBuffer(b)
|
||||||
|
decoder := gob.NewDecoder(buf)
|
||||||
|
|
||||||
|
var sm *serializedMetric
|
||||||
|
if err := decoder.Decode(&sm); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to decode metric from bytes: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
m := sm.M
|
||||||
|
if sm.TID != 0 {
|
||||||
|
mu.Lock()
|
||||||
|
td := trackingStore[sm.TID]
|
||||||
|
if td == nil {
|
||||||
|
mu.Unlock()
|
||||||
|
return nil, ErrSkipTracking
|
||||||
|
}
|
||||||
|
rc := td.RefCount()
|
||||||
|
if rc <= 1 {
|
||||||
|
// only 1 metric left referencing this tracking ID, we can remove here since no subsequent metrics
|
||||||
|
// read can use this ID. If another metric in a metric group with this ID gets added later, it will
|
||||||
|
// simply be added back into the tracking store again.
|
||||||
|
trackingStore[sm.TID] = nil
|
||||||
|
}
|
||||||
|
mu.Unlock()
|
||||||
|
|
||||||
|
m = rebuildTrackingMetric(m, td)
|
||||||
|
}
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
package metric
|
||||||
|
|
||||||
|
import "encoding/gob"
|
||||||
|
|
||||||
|
func Init() {
|
||||||
|
gob.RegisterName("metric.metric", &metric{})
|
||||||
|
}
|
||||||
|
|
@ -33,35 +33,40 @@ func newTrackingID() telegraf.TrackingID {
|
||||||
}
|
}
|
||||||
|
|
||||||
type trackingData struct {
|
type trackingData struct {
|
||||||
id telegraf.TrackingID
|
//nolint:revive // method is already named ID
|
||||||
rc int32
|
Id telegraf.TrackingID
|
||||||
acceptCount int32
|
Rc int32
|
||||||
rejectCount int32
|
AcceptCount int32
|
||||||
|
RejectCount int32
|
||||||
notifyFunc NotifyFunc
|
notifyFunc NotifyFunc
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *trackingData) incr() {
|
func (d *trackingData) incr() {
|
||||||
atomic.AddInt32(&d.rc, 1)
|
atomic.AddInt32(&d.Rc, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *trackingData) RefCount() int32 {
|
||||||
|
return d.Rc
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *trackingData) decr() int32 {
|
func (d *trackingData) decr() int32 {
|
||||||
return atomic.AddInt32(&d.rc, -1)
|
return atomic.AddInt32(&d.Rc, -1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *trackingData) accept() {
|
func (d *trackingData) accept() {
|
||||||
atomic.AddInt32(&d.acceptCount, 1)
|
atomic.AddInt32(&d.AcceptCount, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *trackingData) reject() {
|
func (d *trackingData) reject() {
|
||||||
atomic.AddInt32(&d.rejectCount, 1)
|
atomic.AddInt32(&d.RejectCount, 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *trackingData) notify() {
|
func (d *trackingData) notify() {
|
||||||
d.notifyFunc(
|
d.notifyFunc(
|
||||||
&deliveryInfo{
|
&deliveryInfo{
|
||||||
id: d.id,
|
id: d.Id,
|
||||||
accepted: int(d.acceptCount),
|
accepted: int(d.AcceptCount),
|
||||||
rejected: int(d.rejectCount),
|
rejected: int(d.RejectCount),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -75,10 +80,10 @@ func newTrackingMetric(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric,
|
||||||
m := &trackingMetric{
|
m := &trackingMetric{
|
||||||
Metric: metric,
|
Metric: metric,
|
||||||
d: &trackingData{
|
d: &trackingData{
|
||||||
id: newTrackingID(),
|
Id: newTrackingID(),
|
||||||
rc: 1,
|
Rc: 1,
|
||||||
acceptCount: 0,
|
AcceptCount: 0,
|
||||||
rejectCount: 0,
|
RejectCount: 0,
|
||||||
notifyFunc: fn,
|
notifyFunc: fn,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
@ -86,15 +91,22 @@ func newTrackingMetric(metric telegraf.Metric, fn NotifyFunc) (telegraf.Metric,
|
||||||
if finalizer != nil {
|
if finalizer != nil {
|
||||||
runtime.SetFinalizer(m.d, finalizer)
|
runtime.SetFinalizer(m.d, finalizer)
|
||||||
}
|
}
|
||||||
return m, m.d.id
|
return m, m.d.Id
|
||||||
|
}
|
||||||
|
|
||||||
|
func rebuildTrackingMetric(metric telegraf.Metric, td telegraf.TrackingData) telegraf.Metric {
|
||||||
|
return &trackingMetric{
|
||||||
|
Metric: metric,
|
||||||
|
d: td.(*trackingData),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
|
func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.Metric, telegraf.TrackingID) {
|
||||||
d := &trackingData{
|
d := &trackingData{
|
||||||
id: newTrackingID(),
|
Id: newTrackingID(),
|
||||||
rc: 0,
|
Rc: 0,
|
||||||
acceptCount: 0,
|
AcceptCount: 0,
|
||||||
rejectCount: 0,
|
RejectCount: 0,
|
||||||
notifyFunc: fn,
|
notifyFunc: fn,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -114,7 +126,7 @@ func newTrackingMetricGroup(group []telegraf.Metric, fn NotifyFunc) ([]telegraf.
|
||||||
d.notify()
|
d.notify()
|
||||||
}
|
}
|
||||||
|
|
||||||
return group, d.id
|
return group, d.Id
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *trackingMetric) Copy() telegraf.Metric {
|
func (m *trackingMetric) Copy() telegraf.Metric {
|
||||||
|
|
@ -152,7 +164,11 @@ func (m *trackingMetric) decr() {
|
||||||
|
|
||||||
// Unwrap allows to access the underlying metric directly e.g. for go-templates
|
// Unwrap allows to access the underlying metric directly e.g. for go-templates
|
||||||
func (m *trackingMetric) TrackingID() telegraf.TrackingID {
|
func (m *trackingMetric) TrackingID() telegraf.TrackingID {
|
||||||
return m.d.id
|
return m.d.Id
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *trackingMetric) TrackingData() telegraf.TrackingData {
|
||||||
|
return m.d
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unwrap allows to access the underlying metric directly e.g. for go-templates
|
// Unwrap allows to access the underlying metric directly e.g. for go-templates
|
||||||
|
|
@ -173,3 +189,7 @@ func (r *deliveryInfo) ID() telegraf.TrackingID {
|
||||||
func (r *deliveryInfo) Delivered() bool {
|
func (r *deliveryInfo) Delivered() bool {
|
||||||
return r.rejected == 0
|
return r.rejected == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (d *trackingData) ID() telegraf.TrackingID {
|
||||||
|
return d.Id
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,18 @@ package models
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/influxdata/telegraf"
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
"github.com/influxdata/telegraf/selfstat"
|
"github.com/influxdata/telegraf/selfstat"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
|
AgentMetricsWritten = selfstat.Register("agent", "metrics_written", map[string]string{})
|
||||||
AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
|
AgentMetricsDropped = selfstat.Register("agent", "metrics_dropped", map[string]string{})
|
||||||
|
|
||||||
|
registerGob = sync.OnceFunc(func() { metric.Init() })
|
||||||
)
|
)
|
||||||
|
|
||||||
type Buffer interface {
|
type Buffer interface {
|
||||||
|
|
@ -45,12 +49,16 @@ type BufferStats struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewBuffer returns a new empty Buffer with the given capacity.
|
// NewBuffer returns a new empty Buffer with the given capacity.
|
||||||
func NewBuffer(name string, alias string, capacity int, strategy string, _ string) (Buffer, error) {
|
func NewBuffer(name string, alias string, capacity int, strategy string, path string) (Buffer, error) {
|
||||||
|
registerGob()
|
||||||
|
|
||||||
bs := NewBufferStats(name, alias, capacity)
|
bs := NewBufferStats(name, alias, capacity)
|
||||||
|
|
||||||
switch strategy {
|
switch strategy {
|
||||||
case "", "memory":
|
case "", "memory":
|
||||||
return NewMemoryBuffer(capacity, bs)
|
return NewMemoryBuffer(capacity, bs)
|
||||||
|
case "disk":
|
||||||
|
return NewDiskBuffer(name, path, bs)
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("invalid buffer strategy %q", strategy)
|
return nil, fmt.Errorf("invalid buffer strategy %q", strategy)
|
||||||
}
|
}
|
||||||
|
|
@ -97,14 +105,14 @@ func (b *BufferStats) metricAdded() {
|
||||||
b.MetricsAdded.Incr(1)
|
b.MetricsAdded.Incr(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *BufferStats) metricWritten(metric telegraf.Metric) {
|
func (b *BufferStats) metricWritten(m telegraf.Metric) {
|
||||||
AgentMetricsWritten.Incr(1)
|
AgentMetricsWritten.Incr(1)
|
||||||
b.MetricsWritten.Incr(1)
|
b.MetricsWritten.Incr(1)
|
||||||
metric.Accept()
|
m.Accept()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *BufferStats) metricDropped(metric telegraf.Metric) {
|
func (b *BufferStats) metricDropped(m telegraf.Metric) {
|
||||||
AgentMetricsDropped.Incr(1)
|
AgentMetricsDropped.Incr(1)
|
||||||
b.MetricsDropped.Incr(1)
|
b.MetricsDropped.Incr(1)
|
||||||
metric.Reject()
|
m.Reject()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,216 @@
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/tidwall/wal"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
type DiskBuffer struct {
|
||||||
|
BufferStats
|
||||||
|
sync.Mutex
|
||||||
|
|
||||||
|
file *wal.Log
|
||||||
|
path string
|
||||||
|
|
||||||
|
batchFirst uint64 // Index of the first metric in the batch
|
||||||
|
batchSize uint64 // Number of metrics currently in the batch
|
||||||
|
|
||||||
|
// Ending point of metrics read from disk on telegraf launch.
|
||||||
|
// Used to know whether to discard tracking metrics.
|
||||||
|
originalEnd uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDiskBuffer(name string, path string, stats BufferStats) (*DiskBuffer, error) {
|
||||||
|
filePath := filepath.Join(path, name)
|
||||||
|
walFile, err := wal.Open(filePath, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open wal file: %w", err)
|
||||||
|
}
|
||||||
|
buf := &DiskBuffer{
|
||||||
|
BufferStats: stats,
|
||||||
|
file: walFile,
|
||||||
|
path: filePath,
|
||||||
|
}
|
||||||
|
if buf.length() > 0 {
|
||||||
|
buf.originalEnd = buf.writeIndex()
|
||||||
|
}
|
||||||
|
return buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) Len() int {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
return b.length()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) length() int {
|
||||||
|
// Special case for when the read index is zero, it must be empty (otherwise it would be >= 1)
|
||||||
|
if b.readIndex() == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return int(b.writeIndex() - b.readIndex())
|
||||||
|
}
|
||||||
|
|
||||||
|
// readIndex is the first index to start reading metrics from, or the head of the buffer
|
||||||
|
func (b *DiskBuffer) readIndex() uint64 {
|
||||||
|
index, err := b.file.FirstIndex()
|
||||||
|
if err != nil {
|
||||||
|
panic(err) // can only occur with a corrupt wal file
|
||||||
|
}
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeIndex is the first index to start writing metrics to, or the tail of the buffer
|
||||||
|
func (b *DiskBuffer) writeIndex() uint64 {
|
||||||
|
index, err := b.file.LastIndex()
|
||||||
|
if err != nil {
|
||||||
|
panic(err) // can only occur with a corrupt wal file
|
||||||
|
}
|
||||||
|
return index + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) Add(metrics ...telegraf.Metric) int {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
dropped := 0
|
||||||
|
for _, m := range metrics {
|
||||||
|
if !b.addSingleMetric(m) {
|
||||||
|
dropped++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b.BufferSize.Set(int64(b.length()))
|
||||||
|
return dropped
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) addSingleMetric(m telegraf.Metric) bool {
|
||||||
|
data, err := metric.ToBytes(m)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
err = b.file.Write(b.writeIndex(), data)
|
||||||
|
if err == nil {
|
||||||
|
b.metricAdded()
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) Batch(batchSize int) []telegraf.Metric {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
if b.length() == 0 {
|
||||||
|
// no metrics in the wal file, so return an empty array
|
||||||
|
return []telegraf.Metric{}
|
||||||
|
}
|
||||||
|
b.batchFirst = b.readIndex()
|
||||||
|
var metrics []telegraf.Metric
|
||||||
|
|
||||||
|
b.batchSize = 0
|
||||||
|
readIndex := b.batchFirst
|
||||||
|
endIndex := b.writeIndex()
|
||||||
|
for batchSize > 0 && readIndex < endIndex {
|
||||||
|
data, err := b.file.Read(readIndex)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
readIndex++
|
||||||
|
|
||||||
|
m, err := metric.FromBytes(data)
|
||||||
|
|
||||||
|
// Validate that a tracking metric is from this instance of telegraf and skip ones from older instances.
|
||||||
|
// A tracking metric can be skipped here because metric.Accept() is only called once data is successfully
|
||||||
|
// written to an output, so any tracking metrics from older instances can be dropped and reacquired to
|
||||||
|
// have an accurate tracking information.
|
||||||
|
// There are two primary cases here:
|
||||||
|
// - ErrSkipTracking: means that the tracking information was unable to be found for a tracking ID.
|
||||||
|
// - Outside of range: means that the metric was guaranteed to be left over from the previous instance
|
||||||
|
// as it was here when we opened the wal file in this instance.
|
||||||
|
if errors.Is(err, metric.ErrSkipTracking) {
|
||||||
|
// could not look up tracking information for metric, skip
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
// non-recoverable error in deserialization, abort
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
if _, ok := m.(telegraf.TrackingMetric); ok && readIndex < b.originalEnd {
|
||||||
|
// tracking metric left over from previous instance, skip
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
metrics = append(metrics, m)
|
||||||
|
b.batchSize++
|
||||||
|
batchSize--
|
||||||
|
}
|
||||||
|
return metrics
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) Accept(batch []telegraf.Metric) {
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
|
||||||
|
if b.batchSize == 0 || len(batch) == 0 {
|
||||||
|
// nothing to accept
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, m := range batch {
|
||||||
|
b.metricWritten(m)
|
||||||
|
}
|
||||||
|
if b.length() == len(batch) {
|
||||||
|
b.resetWalFile()
|
||||||
|
} else {
|
||||||
|
err := b.file.TruncateFront(b.batchFirst + uint64(len(batch)))
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if the original end index is still valid, clear if not
|
||||||
|
if b.originalEnd < b.readIndex() {
|
||||||
|
b.originalEnd = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
b.resetBatch()
|
||||||
|
b.BufferSize.Set(int64(b.length()))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) Reject(_ []telegraf.Metric) {
|
||||||
|
// very little to do here as the disk buffer retains metrics in
|
||||||
|
// the wal file until a call to accept
|
||||||
|
b.Lock()
|
||||||
|
defer b.Unlock()
|
||||||
|
b.resetBatch()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) Stats() BufferStats {
|
||||||
|
return b.BufferStats
|
||||||
|
}
|
||||||
|
|
||||||
|
func (b *DiskBuffer) resetBatch() {
|
||||||
|
b.batchFirst = 0
|
||||||
|
b.batchSize = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is very messy and not ideal, but serves as the only way I can find currently
|
||||||
|
// to actually clear the walfile completely if needed, since Truncate() calls require
|
||||||
|
// that at least one entry remains in them otherwise they return an error.
|
||||||
|
// Related issue: https://github.com/tidwall/wal/issues/20
|
||||||
|
func (b *DiskBuffer) resetWalFile() {
|
||||||
|
b.file.Close()
|
||||||
|
os.Remove(b.path)
|
||||||
|
walFile, err := wal.Open(b.path, nil)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
b.file = walFile
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,110 @@
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"github.com/tidwall/wal"
|
||||||
|
|
||||||
|
"github.com/influxdata/telegraf"
|
||||||
|
"github.com/influxdata/telegraf/metric"
|
||||||
|
"github.com/influxdata/telegraf/testutil"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTestDiskBuffer(t testing.TB) Buffer {
|
||||||
|
path, err := os.MkdirTemp("", "*-buffer-test")
|
||||||
|
require.NoError(t, err)
|
||||||
|
return newTestDiskBufferWithPath(t, "test", path)
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestDiskBufferWithPath(t testing.TB, name string, path string) Buffer {
|
||||||
|
t.Helper()
|
||||||
|
buf, err := NewBuffer(name, "", 0, "disk", path)
|
||||||
|
require.NoError(t, err)
|
||||||
|
buf.Stats().MetricsAdded.Set(0)
|
||||||
|
buf.Stats().MetricsWritten.Set(0)
|
||||||
|
buf.Stats().MetricsDropped.Set(0)
|
||||||
|
return buf
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_RetainsTrackingInformation(t *testing.T) {
|
||||||
|
var delivered int
|
||||||
|
mm, _ := metric.WithTracking(Metric(), func(_ telegraf.DeliveryInfo) {
|
||||||
|
delivered++
|
||||||
|
})
|
||||||
|
b := newTestDiskBuffer(t)
|
||||||
|
b.Add(mm)
|
||||||
|
batch := b.Batch(1)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, 1, delivered)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuffer_TrackingDroppedFromOldWal(t *testing.T) {
|
||||||
|
path, err := os.MkdirTemp("", "*-buffer-test")
|
||||||
|
require.NoError(t, err)
|
||||||
|
walfile, err := wal.Open(path, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
tm, _ := metric.WithTracking(Metric(), func(_ telegraf.DeliveryInfo) {})
|
||||||
|
|
||||||
|
metrics := []telegraf.Metric{
|
||||||
|
// Basic metric with 1 field, 0 timestamp
|
||||||
|
Metric(),
|
||||||
|
// Basic metric with 1 field, different timestamp
|
||||||
|
metric.New(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 20.0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
// Metric with a field
|
||||||
|
metric.New(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{
|
||||||
|
"x": "y",
|
||||||
|
},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value": 18.0,
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
// Tracking metric
|
||||||
|
tm,
|
||||||
|
// Metric with lots of tag types
|
||||||
|
metric.New(
|
||||||
|
"cpu",
|
||||||
|
map[string]string{},
|
||||||
|
map[string]interface{}{
|
||||||
|
"value_f64": 20.0,
|
||||||
|
"value_uint64": uint64(10),
|
||||||
|
"value_int16": int16(5),
|
||||||
|
"value_string": "foo",
|
||||||
|
"value_boolean": true,
|
||||||
|
"value_byte_array": []byte{1, 2, 3, 4, 5},
|
||||||
|
},
|
||||||
|
time.Now(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
// call manually so that we can properly use metric.ToBytes() without having initialized a buffer
|
||||||
|
registerGob()
|
||||||
|
|
||||||
|
for i, m := range metrics {
|
||||||
|
data, err := metric.ToBytes(m)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NoError(t, walfile.Write(uint64(i+1), data))
|
||||||
|
}
|
||||||
|
|
||||||
|
b := newTestDiskBufferWithPath(t, filepath.Base(path), filepath.Dir(path))
|
||||||
|
batch := b.Batch(4)
|
||||||
|
// expected skips the tracking metric
|
||||||
|
expected := []telegraf.Metric{
|
||||||
|
metrics[0], metrics[1], metrics[2], metrics[4],
|
||||||
|
}
|
||||||
|
testutil.RequireMetricsEqual(t, expected, batch)
|
||||||
|
}
|
||||||
|
|
@ -16,6 +16,21 @@ func newTestMemoryBuffer(t testing.TB, capacity int) Buffer {
|
||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuffer_AcceptCallsMetricAccept(t *testing.T) {
|
||||||
|
var accept int
|
||||||
|
mm := &MockMetric{
|
||||||
|
Metric: Metric(),
|
||||||
|
AcceptF: func() {
|
||||||
|
accept++
|
||||||
|
},
|
||||||
|
}
|
||||||
|
b := newTestMemoryBuffer(t, 5)
|
||||||
|
b.Add(mm, mm, mm)
|
||||||
|
batch := b.Batch(2)
|
||||||
|
b.Accept(batch)
|
||||||
|
require.Equal(t, 2, accept)
|
||||||
|
}
|
||||||
|
|
||||||
func BenchmarkAddMetrics(b *testing.B) {
|
func BenchmarkAddMetrics(b *testing.B) {
|
||||||
buf := newTestMemoryBuffer(b, 10000)
|
buf := newTestMemoryBuffer(b, 10000)
|
||||||
m := Metric()
|
m := Metric()
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,11 @@ func (s *BufferSuiteTest) SetupTest() {
|
||||||
switch s.bufferType {
|
switch s.bufferType {
|
||||||
case "", "memory":
|
case "", "memory":
|
||||||
s.hasMaxCapacity = true
|
s.hasMaxCapacity = true
|
||||||
|
case "disk":
|
||||||
|
path, err := os.MkdirTemp("", "*-buffer-test")
|
||||||
|
s.Require().NoError(err)
|
||||||
|
s.bufferPath = path
|
||||||
|
s.hasMaxCapacity = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -57,6 +62,10 @@ func TestMemoryBufferSuite(t *testing.T) {
|
||||||
suite.Run(t, &BufferSuiteTest{bufferType: "memory"})
|
suite.Run(t, &BufferSuiteTest{bufferType: "memory"})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDiskBufferSuite(t *testing.T) {
|
||||||
|
suite.Run(t, &BufferSuiteTest{bufferType: "disk"})
|
||||||
|
}
|
||||||
|
|
||||||
func Metric() telegraf.Metric {
|
func Metric() telegraf.Metric {
|
||||||
return MetricTime(0)
|
return MetricTime(0)
|
||||||
}
|
}
|
||||||
|
|
@ -671,21 +680,6 @@ func (s *BufferSuiteTest) TestBuffer_BatchRejectAcceptNoop() {
|
||||||
s.Equal(5, b.Len())
|
s.Equal(5, b.Len())
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *BufferSuiteTest) TestBuffer_AcceptCallsMetricAccept() {
|
|
||||||
var accept int
|
|
||||||
mm := &MockMetric{
|
|
||||||
Metric: Metric(),
|
|
||||||
AcceptF: func() {
|
|
||||||
accept++
|
|
||||||
},
|
|
||||||
}
|
|
||||||
b := s.newTestBuffer(5)
|
|
||||||
b.Add(mm, mm, mm)
|
|
||||||
batch := b.Batch(2)
|
|
||||||
b.Accept(batch)
|
|
||||||
s.Equal(2, accept)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *BufferSuiteTest) TestBuffer_AddCallsMetricRejectWhenNoBatch() {
|
func (s *BufferSuiteTest) TestBuffer_AddCallsMetricRejectWhenNoBatch() {
|
||||||
if !s.hasMaxCapacity {
|
if !s.hasMaxCapacity {
|
||||||
s.T().Skip("tested buffer does not have a maximum capacity")
|
s.T().Skip("tested buffer does not have a maximum capacity")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue