feat(inputs.conntrack): Parse conntrack stats (#8958)

This commit is contained in:
Tomas Barton 2022-10-12 21:12:16 +02:00 committed by GitHub
parent 7c1d1755d8
commit 0087a5d245
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 293 additions and 18 deletions

View File

@ -4,12 +4,18 @@ Collects stats from Netfilter's conntrack-tools.
The conntrack-tools provide a mechanism for tracking various aspects of
network connections as they are processed by netfilter. At runtime,
conntrack exposes many of those connection statistics within /proc/sys/net.
conntrack exposes many of those connection statistics within `/proc/sys/net`.
Depending on your kernel version, these files can be found in either
/proc/sys/net/ipv4/netfilter or /proc/sys/net/netfilter and will be
prefixed with either ip or nf. This plugin reads the files specified
`/proc/sys/net/ipv4/netfilter` or `/proc/sys/net/netfilter` and will be
prefixed with either `ip` or `nf`. This plugin reads the files specified
in its configuration and publishes each one as a field, with the prefix
normalized to ip_.
conntrack exposes many of those connection statistics within `/proc/sys/net`.
Depending on your kernel version, these files can be found in either
`/proc/sys/net/ipv4/netfilter` or `/proc/sys/net/netfilter` and will be
prefixed with either `ip_` or `nf_`. This plugin reads the files specified
in its configuration and publishes each one as a field, with the prefix
normalized to `ip_`.
In order to simplify configuration in a heterogeneous environment, a superset
of directory and filenames can be specified. Any locations that don't exist
@ -35,17 +41,46 @@ For more information on conntrack-tools, see the
## Directories to search within for the conntrack files above.
## Missing directories will be ignored.
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
## all - aggregated statistics
## percpu - include detailed statistics with cpu tag
collect = ["all", "percpu"]
```
## Metrics
A detailed explanation of each fields can be found in [kernel documentation](
https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt).
- conntrack
- ip_conntrack_count (int, count): the number of entries in the conntrack table
- ip_conntrack_max (int, size): the max capacity of the conntrack table
- `ip_conntrack_count` `(int, count)`: The number of entries in the conntrack table
- `ip_conntrack_max` `(int, size)`: The max capacity of the conntrack table
- `ip_conntrack_buckets` `(int, size)`: The size of hash table.
With `collect = ["all"]`:
- `entries`: The number of entries in the conntrack table
- `searched`: The number of conntrack table lookups performed
- `found`: The number of searched entries which were successful
- `new`: The number of entries added which were not expected before
- `invalid`: The number of packets seen which can not be tracked
- `ignore`: The number of packets seen which are already connected to an entry
- `delete`: The number of entries which were removed
- `delete_list`: The number of entries which were put to dying list
- `insert`: The number of entries inserted into the list
- `insert_failed`: The number of insertion attempted but failed (same entry exists)
- `drop`: The number of packets dropped due to conntrack failure
- `early_drop`: The number of dropped entries to make room for new ones, if maxsize reached
- `icmp_error`: Subset of invalid. Packets that can't be tracked due to error
- `expect_new`: Entries added after an expectation was already present
- `expect_create`: Expectations added
- `expect_delete`: Expectations deleted
- `search_restart`: Conntrack table lookups restarted due to hashtable resizes
### Tags
This input does not use tags.
With `collect = ["percpu"]` will include detailed statistics per CPU thread.
Without `"percpu"` the `cpu` tag will have `all` value.
## Example Output
@ -53,3 +88,11 @@ This input does not use tags.
$ ./telegraf --config telegraf.conf --input-filter conntrack --test
conntrack,host=myhost ip_conntrack_count=2,ip_conntrack_max=262144 1461620427667995735
```
with stats:
```shell
$ telegraf --config /etc/telegraf/telegraf.conf --input-filter conntrack --test
> conntrack,cpu=all,host=localhost delete=0i,delete_list=0i,drop=2i,early_drop=0i,entries=5568i,expect_create=0i,expect_delete=0i,expect_new=0i,found=7i,icmp_error=1962i,ignore=2586413402i,insert=0i,insert_failed=2i,invalid=46853i,new=0i,search_restart=453336i,searched=0i 1615233542000000000
> conntrack,host=localhost ip_conntrack_count=464,ip_conntrack_max=262144 1615233542000000000
```

View File

@ -13,15 +13,18 @@ import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/system"
)
//go:embed sample.conf
var sampleConfig string
type Conntrack struct {
ps system.PS
Path string
Dirs []string
Files []string
Collect []string
}
const (
@ -77,19 +80,73 @@ func (c *Conntrack) Gather(acc telegraf.Accumulator) error {
contents, err := os.ReadFile(fName)
if err != nil {
acc.AddError(fmt.Errorf("E! failed to read file '%s': %v", fName, err))
acc.AddError(fmt.Errorf("failed to read file '%s': %v", fName, err))
continue
}
v := strings.TrimSpace(string(contents))
fields[metricKey], err = strconv.ParseFloat(v, 64)
if err != nil {
acc.AddError(fmt.Errorf("E! failed to parse metric, expected number but "+
acc.AddError(fmt.Errorf("failed to parse metric, expected number but "+
" found '%s': %v", v, err))
}
}
}
var all bool
var perCPU bool
for _, collect := range c.Collect {
if collect == "all" {
all = true
}
if collect == "percpu" {
perCPU = true
}
}
if all || perCPU {
stats, err := c.ps.NetConntrack(perCPU)
if err != nil {
acc.AddError(fmt.Errorf("failed to retrieve conntrack statistics: %v", err))
}
if len(stats) == 0 {
acc.AddError(fmt.Errorf("conntrack input failed to collect stats"))
}
for i, sts := range stats {
cpuTag := "all"
if perCPU {
cpuTag = fmt.Sprintf("cpu%d", i)
}
tags := map[string]string{
"cpu": cpuTag,
}
statFields := map[string]interface{}{
"entries": sts.Entries, // entries in the conntrack table
"searched": sts.Searched, // conntrack table lookups performed
"found": sts.Found, // searched entries which were successful
"new": sts.New, // entries added which were not expected before
"invalid": sts.Invalid, // packets seen which can not be tracked
"ignore": sts.Ignore, // packets seen which are already connected to an entry
"delete": sts.Delete, // entries which were removed
"delete_list": sts.DeleteList, // entries which were put to dying list
"insert": sts.Insert, // entries inserted into the list
"insert_failed": sts.InsertFailed, // insertion attempted but failed (same entry exists)
"drop": sts.Drop, // packets dropped due to conntrack failure
"early_drop": sts.EarlyDrop, // dropped entries to make room for new ones, if maxsize reached
"icmp_error": sts.IcmpError, // Subset of invalid. Packets that can't be tracked d/t error
"expect_new": sts.ExpectNew, // Entries added after an expectation was already present
"expect_create": sts.ExpectCreate, // Expectations added
"expect_delete": sts.ExpectDelete, // Expectations deleted
"search_restart": sts.SearchRestart, // onntrack table lookups restarted due to hashtable resizes
}
acc.AddCounter(inputName, statFields, tags)
}
}
if len(fields) == 0 {
return fmt.Errorf("Conntrack input failed to collect metrics. " +
"Is the conntrack kernel module loaded?")

View File

@ -9,8 +9,11 @@ import (
"strings"
"testing"
"github.com/influxdata/telegraf/testutil"
"github.com/shirou/gopsutil/v3/net"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/influxdata/telegraf/testutil"
)
func restoreDflts(savedFiles, savedDirs []string) {
@ -33,11 +36,13 @@ func TestNoFilesFound(t *testing.T) {
func TestDefaultsUsed(t *testing.T) {
defer restoreDflts(dfltFiles, dfltDirs)
tmpdir := t.TempDir()
tmpdir, err := os.MkdirTemp("", "tmp1")
require.NoError(t, err)
defer os.Remove(tmpdir)
tmpFile, err := os.CreateTemp(tmpdir, "ip_conntrack_count")
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, tmpFile.Close()) })
defer os.Remove(tmpFile.Name())
dfltDirs = []string{tmpdir}
fname := path.Base(tmpFile.Name())
@ -55,15 +60,16 @@ func TestDefaultsUsed(t *testing.T) {
func TestConfigsUsed(t *testing.T) {
defer restoreDflts(dfltFiles, dfltDirs)
tmpdir := t.TempDir()
tmpdir, err := os.MkdirTemp("", "tmp1")
require.NoError(t, err)
defer os.Remove(tmpdir)
cntFile, err := os.CreateTemp(tmpdir, "nf_conntrack_count")
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, cntFile.Close()) })
maxFile, err := os.CreateTemp(tmpdir, "nf_conntrack_max")
require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, maxFile.Close()) })
defer os.Remove(cntFile.Name())
defer os.Remove(maxFile.Name())
dfltDirs = []string{tmpdir}
cntFname := path.Base(cntFile.Name())
@ -89,3 +95,155 @@ func TestConfigsUsed(t *testing.T) {
fix(maxFname): float64(max),
})
}
func TestCollectStats(t *testing.T) {
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
sts := net.ConntrackStat{
Entries: 1234,
Searched: 10,
Found: 1,
New: 5,
Invalid: 43,
Ignore: 13,
Delete: 3,
DeleteList: 5,
Insert: 9,
InsertFailed: 20,
Drop: 49,
EarlyDrop: 7,
IcmpError: 21,
ExpectNew: 12,
ExpectCreate: 44,
ExpectDelete: 53,
SearchRestart: 31,
}
mps.On("NetConntrack", false).Return([]net.ConntrackStat{sts}, nil)
cs := &Conntrack{
ps: &mps,
}
cs.Collect = []string{"all"}
err := cs.Gather(&acc)
require.NoError(t, err)
expectedTags := map[string]string{
"cpu": "all",
}
expectedFields := map[string]interface{}{
"entries": uint32(1234),
"searched": uint32(10),
"found": uint32(1),
"new": uint32(5),
"invalid": uint32(43),
"ignore": uint32(13),
"delete": uint32(3),
"delete_list": uint32(5),
"insert": uint32(9),
"insert_failed": uint32(20),
"drop": uint32(49),
"early_drop": uint32(7),
"icmp_error": uint32(21),
"expect_new": uint32(12),
"expect_create": uint32(44),
"expect_delete": uint32(53),
"search_restart": uint32(31),
}
acc.AssertContainsFields(t, inputName, expectedFields)
acc.AssertContainsTaggedFields(t, inputName, expectedFields, expectedTags)
require.Equal(t, 19, acc.NFields())
}
func TestCollectStatsPerCpu(t *testing.T) {
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
sts := []net.ConntrackStat{
{
Entries: 59,
Searched: 10,
Found: 1,
New: 5,
Invalid: 43,
Ignore: 13,
Delete: 3,
DeleteList: 5,
Insert: 9,
InsertFailed: 20,
Drop: 49,
EarlyDrop: 7,
IcmpError: 21,
ExpectNew: 12,
ExpectCreate: 44,
ExpectDelete: 53,
SearchRestart: 31,
},
{
Entries: 79,
Searched: 10,
Found: 1,
New: 5,
Invalid: 43,
Ignore: 13,
Delete: 3,
DeleteList: 5,
Insert: 9,
InsertFailed: 10,
Drop: 49,
EarlyDrop: 7,
IcmpError: 21,
ExpectNew: 12,
ExpectCreate: 44,
ExpectDelete: 53,
SearchRestart: 31,
},
}
mps.On("NetConntrack", true).Return(sts, nil)
cs := &Conntrack{
ps: &mps,
}
cs.Collect = []string{"all", "percpu"}
err := cs.Gather(&acc)
require.NoError(t, err)
//cpu0
expectedFields := map[string]interface{}{
"entries": uint32(59),
"searched": uint32(10),
"found": uint32(1),
"new": uint32(5),
"invalid": uint32(43),
"ignore": uint32(13),
"delete": uint32(3),
"delete_list": uint32(5),
"insert": uint32(9),
"insert_failed": uint32(20),
"drop": uint32(49),
"early_drop": uint32(7),
"icmp_error": uint32(21),
"expect_new": uint32(12),
"expect_create": uint32(44),
"expect_delete": uint32(53),
"search_restart": uint32(31),
}
acc.AssertContainsFields(t, inputName, expectedFields)
acc.AssertContainsTaggedFields(t, inputName, expectedFields,
map[string]string{
"cpu": "cpu0",
})
//TODO: check cpu1 fields
require.Equal(t, 36, acc.NFields())
}

View File

@ -12,3 +12,6 @@
## Directories to search within for the conntrack files above.
## Missing directories will be ignored.
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
## all - aggregated statistics
## percpu - include detailed statistics with cpu tag
collect = ["all", "percpu"]

View File

@ -119,6 +119,15 @@ func (m *MockPS) NetConnections() ([]net.ConnectionStat, error) {
return r0, r1
}
func (m *MockPS) NetConntrack(perCPU bool) ([]net.ConntrackStat, error) {
ret := m.Called(perCPU)
r0 := ret.Get(0).([]net.ConntrackStat)
r1 := ret.Error(1)
return r0, r1
}
func (m *MockDiskUsage) Partitions(all bool) ([]disk.PartitionStat, error) {
ret := m.Called(all)

View File

@ -24,6 +24,7 @@ type PS interface {
VMStat() (*mem.VirtualMemoryStat, error)
SwapStat() (*mem.SwapMemoryStat, error)
NetConnections() ([]net.ConnectionStat, error)
NetConntrack(perCPU bool) ([]net.ConntrackStat, error)
Temperature() ([]host.TemperatureStat, error)
}
@ -191,6 +192,10 @@ func (s *SystemPS) NetConnections() ([]net.ConnectionStat, error) {
return net.Connections("all")
}
func (s *SystemPS) NetConntrack(perCPU bool) ([]net.ConntrackStat, error) {
return net.ConntrackStats(perCPU)
}
func (s *SystemPS) DiskIO(names []string) (map[string]disk.IOCountersStat, error) {
m, err := disk.IOCounters(names...)
if err == internal.ErrorNotImplemented {