feat(inputs.conntrack): Parse conntrack stats (#8958)

This commit is contained in:
Tomas Barton 2022-10-12 21:12:16 +02:00 committed by GitHub
parent 7c1d1755d8
commit 0087a5d245
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 293 additions and 18 deletions

View File

@ -4,12 +4,18 @@ Collects stats from Netfilter's conntrack-tools.
The conntrack-tools provide a mechanism for tracking various aspects of The conntrack-tools provide a mechanism for tracking various aspects of
network connections as they are processed by netfilter. At runtime, network connections as they are processed by netfilter. At runtime,
conntrack exposes many of those connection statistics within /proc/sys/net. conntrack exposes many of those connection statistics within `/proc/sys/net`.
Depending on your kernel version, these files can be found in either Depending on your kernel version, these files can be found in either
/proc/sys/net/ipv4/netfilter or /proc/sys/net/netfilter and will be `/proc/sys/net/ipv4/netfilter` or `/proc/sys/net/netfilter` and will be
prefixed with either ip or nf. This plugin reads the files specified prefixed with either `ip` or `nf`. This plugin reads the files specified
in its configuration and publishes each one as a field, with the prefix in its configuration and publishes each one as a field, with the prefix
normalized to ip_. normalized to ip_.
conntrack exposes many of those connection statistics within `/proc/sys/net`.
Depending on your kernel version, these files can be found in either
`/proc/sys/net/ipv4/netfilter` or `/proc/sys/net/netfilter` and will be
prefixed with either `ip_` or `nf_`. This plugin reads the files specified
in its configuration and publishes each one as a field, with the prefix
normalized to `ip_`.
In order to simplify configuration in a heterogeneous environment, a superset In order to simplify configuration in a heterogeneous environment, a superset
of directory and filenames can be specified. Any locations that don't exist of directory and filenames can be specified. Any locations that don't exist
@ -35,17 +41,46 @@ For more information on conntrack-tools, see the
## Directories to search within for the conntrack files above. ## Directories to search within for the conntrack files above.
## Missing directories will be ignored. ## Missing directories will be ignored.
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
## all - aggregated statistics
## percpu - include detailed statistics with cpu tag
collect = ["all", "percpu"]
``` ```
## Metrics ## Metrics
A detailed explanation of each fields can be found in [kernel documentation](
https://www.kernel.org/doc/Documentation/networking/nf_conntrack-sysctl.txt).
- conntrack - conntrack
- ip_conntrack_count (int, count): the number of entries in the conntrack table - `ip_conntrack_count` `(int, count)`: The number of entries in the conntrack table
- ip_conntrack_max (int, size): the max capacity of the conntrack table - `ip_conntrack_max` `(int, size)`: The max capacity of the conntrack table
- `ip_conntrack_buckets` `(int, size)`: The size of hash table.
With `collect = ["all"]`:
- `entries`: The number of entries in the conntrack table
- `searched`: The number of conntrack table lookups performed
- `found`: The number of searched entries which were successful
- `new`: The number of entries added which were not expected before
- `invalid`: The number of packets seen which can not be tracked
- `ignore`: The number of packets seen which are already connected to an entry
- `delete`: The number of entries which were removed
- `delete_list`: The number of entries which were put to dying list
- `insert`: The number of entries inserted into the list
- `insert_failed`: The number of insertion attempted but failed (same entry exists)
- `drop`: The number of packets dropped due to conntrack failure
- `early_drop`: The number of dropped entries to make room for new ones, if maxsize reached
- `icmp_error`: Subset of invalid. Packets that can't be tracked due to error
- `expect_new`: Entries added after an expectation was already present
- `expect_create`: Expectations added
- `expect_delete`: Expectations deleted
- `search_restart`: Conntrack table lookups restarted due to hashtable resizes
### Tags ### Tags
This input does not use tags. With `collect = ["percpu"]` will include detailed statistics per CPU thread.
Without `"percpu"` the `cpu` tag will have `all` value.
## Example Output ## Example Output
@ -53,3 +88,11 @@ This input does not use tags.
$ ./telegraf --config telegraf.conf --input-filter conntrack --test $ ./telegraf --config telegraf.conf --input-filter conntrack --test
conntrack,host=myhost ip_conntrack_count=2,ip_conntrack_max=262144 1461620427667995735 conntrack,host=myhost ip_conntrack_count=2,ip_conntrack_max=262144 1461620427667995735
``` ```
with stats:
```shell
$ telegraf --config /etc/telegraf/telegraf.conf --input-filter conntrack --test
> conntrack,cpu=all,host=localhost delete=0i,delete_list=0i,drop=2i,early_drop=0i,entries=5568i,expect_create=0i,expect_delete=0i,expect_new=0i,found=7i,icmp_error=1962i,ignore=2586413402i,insert=0i,insert_failed=2i,invalid=46853i,new=0i,search_restart=453336i,searched=0i 1615233542000000000
> conntrack,host=localhost ip_conntrack_count=464,ip_conntrack_max=262144 1615233542000000000
```

View File

@ -13,15 +13,18 @@ import (
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs" "github.com/influxdata/telegraf/plugins/inputs"
"github.com/influxdata/telegraf/plugins/inputs/system"
) )
//go:embed sample.conf //go:embed sample.conf
var sampleConfig string var sampleConfig string
type Conntrack struct { type Conntrack struct {
ps system.PS
Path string Path string
Dirs []string Dirs []string
Files []string Files []string
Collect []string
} }
const ( const (
@ -77,19 +80,73 @@ func (c *Conntrack) Gather(acc telegraf.Accumulator) error {
contents, err := os.ReadFile(fName) contents, err := os.ReadFile(fName)
if err != nil { if err != nil {
acc.AddError(fmt.Errorf("E! failed to read file '%s': %v", fName, err)) acc.AddError(fmt.Errorf("failed to read file '%s': %v", fName, err))
continue continue
} }
v := strings.TrimSpace(string(contents)) v := strings.TrimSpace(string(contents))
fields[metricKey], err = strconv.ParseFloat(v, 64) fields[metricKey], err = strconv.ParseFloat(v, 64)
if err != nil { if err != nil {
acc.AddError(fmt.Errorf("E! failed to parse metric, expected number but "+ acc.AddError(fmt.Errorf("failed to parse metric, expected number but "+
" found '%s': %v", v, err)) " found '%s': %v", v, err))
} }
} }
} }
var all bool
var perCPU bool
for _, collect := range c.Collect {
if collect == "all" {
all = true
}
if collect == "percpu" {
perCPU = true
}
}
if all || perCPU {
stats, err := c.ps.NetConntrack(perCPU)
if err != nil {
acc.AddError(fmt.Errorf("failed to retrieve conntrack statistics: %v", err))
}
if len(stats) == 0 {
acc.AddError(fmt.Errorf("conntrack input failed to collect stats"))
}
for i, sts := range stats {
cpuTag := "all"
if perCPU {
cpuTag = fmt.Sprintf("cpu%d", i)
}
tags := map[string]string{
"cpu": cpuTag,
}
statFields := map[string]interface{}{
"entries": sts.Entries, // entries in the conntrack table
"searched": sts.Searched, // conntrack table lookups performed
"found": sts.Found, // searched entries which were successful
"new": sts.New, // entries added which were not expected before
"invalid": sts.Invalid, // packets seen which can not be tracked
"ignore": sts.Ignore, // packets seen which are already connected to an entry
"delete": sts.Delete, // entries which were removed
"delete_list": sts.DeleteList, // entries which were put to dying list
"insert": sts.Insert, // entries inserted into the list
"insert_failed": sts.InsertFailed, // insertion attempted but failed (same entry exists)
"drop": sts.Drop, // packets dropped due to conntrack failure
"early_drop": sts.EarlyDrop, // dropped entries to make room for new ones, if maxsize reached
"icmp_error": sts.IcmpError, // Subset of invalid. Packets that can't be tracked d/t error
"expect_new": sts.ExpectNew, // Entries added after an expectation was already present
"expect_create": sts.ExpectCreate, // Expectations added
"expect_delete": sts.ExpectDelete, // Expectations deleted
"search_restart": sts.SearchRestart, // onntrack table lookups restarted due to hashtable resizes
}
acc.AddCounter(inputName, statFields, tags)
}
}
if len(fields) == 0 { if len(fields) == 0 {
return fmt.Errorf("Conntrack input failed to collect metrics. " + return fmt.Errorf("Conntrack input failed to collect metrics. " +
"Is the conntrack kernel module loaded?") "Is the conntrack kernel module loaded?")

View File

@ -9,8 +9,11 @@ import (
"strings" "strings"
"testing" "testing"
"github.com/influxdata/telegraf/testutil" "github.com/shirou/gopsutil/v3/net"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/plugins/inputs/system"
"github.com/influxdata/telegraf/testutil"
) )
func restoreDflts(savedFiles, savedDirs []string) { func restoreDflts(savedFiles, savedDirs []string) {
@ -33,11 +36,13 @@ func TestNoFilesFound(t *testing.T) {
func TestDefaultsUsed(t *testing.T) { func TestDefaultsUsed(t *testing.T) {
defer restoreDflts(dfltFiles, dfltDirs) defer restoreDflts(dfltFiles, dfltDirs)
tmpdir := t.TempDir() tmpdir, err := os.MkdirTemp("", "tmp1")
require.NoError(t, err)
defer os.Remove(tmpdir)
tmpFile, err := os.CreateTemp(tmpdir, "ip_conntrack_count") tmpFile, err := os.CreateTemp(tmpdir, "ip_conntrack_count")
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, tmpFile.Close()) }) defer os.Remove(tmpFile.Name())
dfltDirs = []string{tmpdir} dfltDirs = []string{tmpdir}
fname := path.Base(tmpFile.Name()) fname := path.Base(tmpFile.Name())
@ -55,15 +60,16 @@ func TestDefaultsUsed(t *testing.T) {
func TestConfigsUsed(t *testing.T) { func TestConfigsUsed(t *testing.T) {
defer restoreDflts(dfltFiles, dfltDirs) defer restoreDflts(dfltFiles, dfltDirs)
tmpdir := t.TempDir() tmpdir, err := os.MkdirTemp("", "tmp1")
require.NoError(t, err)
defer os.Remove(tmpdir)
cntFile, err := os.CreateTemp(tmpdir, "nf_conntrack_count") cntFile, err := os.CreateTemp(tmpdir, "nf_conntrack_count")
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, cntFile.Close()) })
maxFile, err := os.CreateTemp(tmpdir, "nf_conntrack_max") maxFile, err := os.CreateTemp(tmpdir, "nf_conntrack_max")
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, maxFile.Close()) }) defer os.Remove(cntFile.Name())
defer os.Remove(maxFile.Name())
dfltDirs = []string{tmpdir} dfltDirs = []string{tmpdir}
cntFname := path.Base(cntFile.Name()) cntFname := path.Base(cntFile.Name())
@ -89,3 +95,155 @@ func TestConfigsUsed(t *testing.T) {
fix(maxFname): float64(max), fix(maxFname): float64(max),
}) })
} }
func TestCollectStats(t *testing.T) {
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
sts := net.ConntrackStat{
Entries: 1234,
Searched: 10,
Found: 1,
New: 5,
Invalid: 43,
Ignore: 13,
Delete: 3,
DeleteList: 5,
Insert: 9,
InsertFailed: 20,
Drop: 49,
EarlyDrop: 7,
IcmpError: 21,
ExpectNew: 12,
ExpectCreate: 44,
ExpectDelete: 53,
SearchRestart: 31,
}
mps.On("NetConntrack", false).Return([]net.ConntrackStat{sts}, nil)
cs := &Conntrack{
ps: &mps,
}
cs.Collect = []string{"all"}
err := cs.Gather(&acc)
require.NoError(t, err)
expectedTags := map[string]string{
"cpu": "all",
}
expectedFields := map[string]interface{}{
"entries": uint32(1234),
"searched": uint32(10),
"found": uint32(1),
"new": uint32(5),
"invalid": uint32(43),
"ignore": uint32(13),
"delete": uint32(3),
"delete_list": uint32(5),
"insert": uint32(9),
"insert_failed": uint32(20),
"drop": uint32(49),
"early_drop": uint32(7),
"icmp_error": uint32(21),
"expect_new": uint32(12),
"expect_create": uint32(44),
"expect_delete": uint32(53),
"search_restart": uint32(31),
}
acc.AssertContainsFields(t, inputName, expectedFields)
acc.AssertContainsTaggedFields(t, inputName, expectedFields, expectedTags)
require.Equal(t, 19, acc.NFields())
}
func TestCollectStatsPerCpu(t *testing.T) {
var mps system.MockPS
defer mps.AssertExpectations(t)
var acc testutil.Accumulator
sts := []net.ConntrackStat{
{
Entries: 59,
Searched: 10,
Found: 1,
New: 5,
Invalid: 43,
Ignore: 13,
Delete: 3,
DeleteList: 5,
Insert: 9,
InsertFailed: 20,
Drop: 49,
EarlyDrop: 7,
IcmpError: 21,
ExpectNew: 12,
ExpectCreate: 44,
ExpectDelete: 53,
SearchRestart: 31,
},
{
Entries: 79,
Searched: 10,
Found: 1,
New: 5,
Invalid: 43,
Ignore: 13,
Delete: 3,
DeleteList: 5,
Insert: 9,
InsertFailed: 10,
Drop: 49,
EarlyDrop: 7,
IcmpError: 21,
ExpectNew: 12,
ExpectCreate: 44,
ExpectDelete: 53,
SearchRestart: 31,
},
}
mps.On("NetConntrack", true).Return(sts, nil)
cs := &Conntrack{
ps: &mps,
}
cs.Collect = []string{"all", "percpu"}
err := cs.Gather(&acc)
require.NoError(t, err)
//cpu0
expectedFields := map[string]interface{}{
"entries": uint32(59),
"searched": uint32(10),
"found": uint32(1),
"new": uint32(5),
"invalid": uint32(43),
"ignore": uint32(13),
"delete": uint32(3),
"delete_list": uint32(5),
"insert": uint32(9),
"insert_failed": uint32(20),
"drop": uint32(49),
"early_drop": uint32(7),
"icmp_error": uint32(21),
"expect_new": uint32(12),
"expect_create": uint32(44),
"expect_delete": uint32(53),
"search_restart": uint32(31),
}
acc.AssertContainsFields(t, inputName, expectedFields)
acc.AssertContainsTaggedFields(t, inputName, expectedFields,
map[string]string{
"cpu": "cpu0",
})
//TODO: check cpu1 fields
require.Equal(t, 36, acc.NFields())
}

View File

@ -12,3 +12,6 @@
## Directories to search within for the conntrack files above. ## Directories to search within for the conntrack files above.
## Missing directories will be ignored. ## Missing directories will be ignored.
dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"] dirs = ["/proc/sys/net/ipv4/netfilter","/proc/sys/net/netfilter"]
## all - aggregated statistics
## percpu - include detailed statistics with cpu tag
collect = ["all", "percpu"]

View File

@ -119,6 +119,15 @@ func (m *MockPS) NetConnections() ([]net.ConnectionStat, error) {
return r0, r1 return r0, r1
} }
func (m *MockPS) NetConntrack(perCPU bool) ([]net.ConntrackStat, error) {
ret := m.Called(perCPU)
r0 := ret.Get(0).([]net.ConntrackStat)
r1 := ret.Error(1)
return r0, r1
}
func (m *MockDiskUsage) Partitions(all bool) ([]disk.PartitionStat, error) { func (m *MockDiskUsage) Partitions(all bool) ([]disk.PartitionStat, error) {
ret := m.Called(all) ret := m.Called(all)

View File

@ -24,6 +24,7 @@ type PS interface {
VMStat() (*mem.VirtualMemoryStat, error) VMStat() (*mem.VirtualMemoryStat, error)
SwapStat() (*mem.SwapMemoryStat, error) SwapStat() (*mem.SwapMemoryStat, error)
NetConnections() ([]net.ConnectionStat, error) NetConnections() ([]net.ConnectionStat, error)
NetConntrack(perCPU bool) ([]net.ConntrackStat, error)
Temperature() ([]host.TemperatureStat, error) Temperature() ([]host.TemperatureStat, error)
} }
@ -191,6 +192,10 @@ func (s *SystemPS) NetConnections() ([]net.ConnectionStat, error) {
return net.Connections("all") return net.Connections("all")
} }
func (s *SystemPS) NetConntrack(perCPU bool) ([]net.ConntrackStat, error) {
return net.ConntrackStats(perCPU)
}
func (s *SystemPS) DiskIO(names []string) (map[string]disk.IOCountersStat, error) { func (s *SystemPS) DiskIO(names []string) (map[string]disk.IOCountersStat, error) {
m, err := disk.IOCounters(names...) m, err := disk.IOCounters(names...)
if err == internal.ErrorNotImplemented { if err == internal.ErrorNotImplemented {