feat(inputs.docker): Add disk usage (#13894)

This commit is contained in:
R290 2023-09-27 22:28:36 +02:00 committed by GitHub
parent 1f029cb127
commit 705176b8d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 303 additions and 12 deletions

2
go.mod
View File

@ -243,7 +243,7 @@ require (
github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect
github.com/Masterminds/goutils v1.1.1 // indirect
github.com/Masterminds/semver v1.5.0 // indirect
github.com/Masterminds/semver/v3 v3.2.0 // indirect
github.com/Masterminds/semver/v3 v3.2.0
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/alecthomas/participle v0.4.1 // indirect
github.com/andybalholm/brotli v1.0.5 // indirect

View File

@ -53,6 +53,11 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
# container_state_include = []
# container_state_exclude = []
## Objects to include for disk usage query
## Allowed values are "container", "image", "volume"
## When empty disk usage is excluded
storage_objects = []
## Timeout for docker list, info, and stats commands
timeout = "5s"
@ -379,6 +384,23 @@ status if configured.
- tasks_desired
- tasks_running
- docker_disk_usage
- tags:
- engine_host
- server_version
- container_name
- container_image
- container_version
- image_id
- image_name
- image_version
- volume_name
- fields:
- size_rw
- size_root_fs
- size
- shared_size
## Example Output
```text
@ -392,4 +414,8 @@ docker_container_net,container_image=telegraf,container_name=zen_ritchie,contain
docker_container_blkio,container_image=telegraf,container_name=zen_ritchie,container_status=running,container_version=unknown,device=254:0,engine_host=debian-stretch-docker,server_version=17.09.0-ce container_id="adc4ba9593871bf2ab95f3ffde70d1b638b897bb225d21c2c9c84226a10a8cf4",io_service_bytes_recursive_async=27398144i,io_service_bytes_recursive_read=27398144i,io_service_bytes_recursive_sync=0i,io_service_bytes_recursive_total=27398144i,io_service_bytes_recursive_write=0i,io_serviced_recursive_async=529i,io_serviced_recursive_read=529i,io_serviced_recursive_sync=0i,io_serviced_recursive_total=529i,io_serviced_recursive_write=0i 1524002042000000000
docker_container_health,container_image=telegraf,container_name=zen_ritchie,container_status=running,container_version=unknown,engine_host=debian-stretch-docker,server_version=17.09.0-ce failing_streak=0i,health_status="healthy" 1524007529000000000
docker_swarm,service_id=xaup2o9krw36j2dy1mjx1arjw,service_mode=replicated,service_name=test tasks_desired=3,tasks_running=3 1508968160000000000
docker_disk_usage,engine_host=docker-desktop,server_version=24.0.5 layers_size=17654519107i 1695742041000000000
docker_disk_usage,container_image=influxdb,container_name=frosty_wright,container_version=1.8,engine_host=docker-desktop,server_version=24.0.5 size_root_fs=286593526i,size_rw=538i 1695742041000000000
docker_disk_usage,engine_host=docker-desktop,image_id=7f4a1cc74046,image_name=telegraf,image_version=latest,server_version=24.0.5 shared_size=0i,size=425484494i 1695742041000000000
docker_disk_usage,engine_host=docker-desktop,server_version=24.0.5,volume_name=docker_influxdb-data size=91989940i 1695742041000000000
```

View File

@ -11,7 +11,6 @@ import (
)
var (
version = "1.24" // https://docs.docker.com/engine/api/
defaultHeaders = map[string]string{"User-Agent": "engine-api-cli-1.0"}
)
@ -23,6 +22,8 @@ type Client interface {
ServiceList(ctx context.Context, options types.ServiceListOptions) ([]swarm.Service, error)
TaskList(ctx context.Context, options types.TaskListOptions) ([]swarm.Task, error)
NodeList(ctx context.Context, options types.NodeListOptions) ([]swarm.Node, error)
DiskUsage(ctx context.Context, options types.DiskUsageOptions) (types.DiskUsage, error)
ClientVersion() string
Close() error
}
@ -43,7 +44,7 @@ func NewClient(host string, tlsConfig *tls.Config) (Client, error) {
client, err := dockerClient.NewClientWithOpts(
dockerClient.WithHTTPHeaders(defaultHeaders),
dockerClient.WithHTTPClient(httpClient),
dockerClient.WithVersion(version),
dockerClient.WithAPIVersionNegotiation(),
dockerClient.WithHost(host))
if err != nil {
return nil, err
@ -77,6 +78,14 @@ func (c *SocketClient) TaskList(ctx context.Context, options types.TaskListOptio
func (c *SocketClient) NodeList(ctx context.Context, options types.NodeListOptions) ([]swarm.Node, error) {
return c.client.NodeList(ctx, options)
}
func (c *SocketClient) DiskUsage(ctx context.Context, options types.DiskUsageOptions) (types.DiskUsage, error) {
return c.client.DiskUsage(ctx, options)
}
func (c *SocketClient) ClientVersion() string {
return c.client.ClientVersion()
}
func (c *SocketClient) Close() error {
return c.client.Close()
}

View File

@ -15,6 +15,7 @@ import (
"sync"
"time"
"github.com/Masterminds/semver/v3"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/filters"
"github.com/docker/docker/api/types/swarm"
@ -53,6 +54,8 @@ type Docker struct {
ContainerStateInclude []string `toml:"container_state_include"`
ContainerStateExclude []string `toml:"container_state_exclude"`
StorageObjects []string `toml:"storage_objects"`
IncludeSourceTag bool `toml:"source_tag"`
Log telegraf.Logger
@ -69,6 +72,7 @@ type Docker struct {
labelFilter filter.Filter
containerFilter filter.Filter
stateFilter filter.Filter
objectTypes []types.DiskUsageObject
}
// KB, MB, GB, TB, PB...human friendly
@ -87,6 +91,9 @@ var (
containerStates = []string{"created", "restarting", "running", "removing", "paused", "exited", "dead"}
containerMetricClasses = []string{"cpu", "network", "blkio"}
now = time.Now
minVersion = semver.MustParse("1.23")
minDiskUsageVersion = semver.MustParse("1.42")
)
func (*Docker) SampleConfig() string {
@ -123,6 +130,21 @@ func (d *Docker) Init() error {
}
}
d.objectTypes = make([]types.DiskUsageObject, 0, len(d.StorageObjects))
for _, object := range d.StorageObjects {
switch object {
case "container":
d.objectTypes = append(d.objectTypes, types.ContainerObject)
case "image":
d.objectTypes = append(d.objectTypes, types.ImageObject)
case "volume":
d.objectTypes = append(d.objectTypes, types.VolumeObject)
default:
d.Log.Warnf("Unrecognized storage object type: %s", object)
}
}
return nil
}
@ -134,6 +156,19 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
return err
}
d.client = c
version, err := semver.NewVersion(d.client.ClientVersion())
if err != nil {
return err
}
if version.LessThan(minVersion) {
d.Log.Warnf("Unsupported api version (%v.%v), upgrade to docker engine 1.12 or later (api version 1.24)",
version.Major(), version.Minor())
} else if version.LessThan(minDiskUsageVersion) && len(d.objectTypes) > 0 {
d.Log.Warnf("Unsupported api version for disk usage (%v.%v), upgrade to docker engine 23.0 or later (api version 1.42)",
version.Major(), version.Minor())
}
}
// Close any idle connections in the end of gathering
@ -209,6 +244,11 @@ func (d *Docker) Gather(acc telegraf.Accumulator) error {
}
wg.Wait()
// Get disk usage data
if len(d.objectTypes) > 0 {
d.gatherDiskUsage(acc, types.DiskUsageOptions{Types: d.objectTypes})
}
return nil
}
@ -412,21 +452,27 @@ func hostnameFromID(id string) string {
return id
}
// Parse container name
func parseContainerName(containerNames []string) string {
var cname string
for _, name := range containerNames {
trimmedName := strings.TrimPrefix(name, "/")
if !strings.Contains(trimmedName, "/") {
cname = trimmedName
return cname
}
}
return cname
}
func (d *Docker) gatherContainer(
container types.Container,
acc telegraf.Accumulator,
) error {
var v *types.StatsJSON
// Parse container name
var cname string
for _, name := range container.Names {
trimmedName := strings.TrimPrefix(name, "/")
if !strings.Contains(trimmedName, "/") {
cname = trimmedName
break
}
}
cname := parseContainerName(container.Names)
if cname == "" {
return nil
@ -849,6 +895,93 @@ func (d *Docker) gatherBlockIOMetrics(
}
}
func (d *Docker) gatherDiskUsage(acc telegraf.Accumulator, opts types.DiskUsageOptions) {
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(d.Timeout))
defer cancel()
du, err := d.client.DiskUsage(ctx, opts)
if err != nil {
acc.AddError(err)
}
now := time.Now()
duName := "docker_disk_usage"
// Layers size
fields := map[string]interface{}{
"layers_size": du.LayersSize,
}
tags := map[string]string{
"engine_host": d.engineHost,
"server_version": d.serverVersion,
}
acc.AddFields(duName, fields, tags, now)
// Containers
for _, container := range du.Containers {
fields := map[string]interface{}{
"size_rw": container.SizeRw,
"size_root_fs": container.SizeRootFs,
}
imageName, imageVersion := dockerint.ParseImage(container.Image)
tags := map[string]string{
"engine_host": d.engineHost,
"server_version": d.serverVersion,
"container_name": parseContainerName(container.Names),
"container_image": imageName,
"container_version": imageVersion,
}
if d.IncludeSourceTag {
tags["source"] = hostnameFromID(container.ID)
}
acc.AddFields(duName, fields, tags, now)
}
// Images
for _, image := range du.Images {
fields := map[string]interface{}{
"size": image.Size,
"shared_size": image.SharedSize,
}
tags := map[string]string{
"engine_host": d.engineHost,
"server_version": d.serverVersion,
"image_id": image.ID[7:19], // remove "sha256:" and keep the first 12 characters
}
if len(image.RepoTags) > 0 {
imageName, imageVersion := dockerint.ParseImage(image.RepoTags[0])
tags["image_name"] = imageName
tags["image_version"] = imageVersion
}
acc.AddFields(duName, fields, tags, now)
}
// Volumes
for _, volume := range du.Volumes {
fields := map[string]interface{}{
"size": volume.UsageData.Size,
}
tags := map[string]string{
"engine_host": d.engineHost,
"server_version": d.serverVersion,
"volume_name": volume.Name,
}
acc.AddFields(duName, fields, tags, now)
}
}
func copyTags(in map[string]string) map[string]string {
out := make(map[string]string)
for k, v := range in {

View File

@ -27,6 +27,8 @@ type MockClient struct {
ServiceListF func(ctx context.Context, options types.ServiceListOptions) ([]swarm.Service, error)
TaskListF func(ctx context.Context, options types.TaskListOptions) ([]swarm.Task, error)
NodeListF func(ctx context.Context, options types.NodeListOptions) ([]swarm.Node, error)
DiskUsageF func(ctx context.Context, options types.DiskUsageOptions) (types.DiskUsage, error)
ClientVersionF func() string
CloseF func() error
}
@ -77,6 +79,17 @@ func (c *MockClient) NodeList(
return c.NodeListF(ctx, options)
}
func (c *MockClient) DiskUsage(
ctx context.Context,
options types.DiskUsageOptions,
) (types.DiskUsage, error) {
return c.DiskUsageF(ctx, options)
}
func (c *MockClient) ClientVersion() string {
return c.ClientVersionF()
}
func (c *MockClient) Close() error {
return c.CloseF()
}
@ -103,6 +116,12 @@ var baseClient = MockClient{
NodeListF: func(context.Context, types.NodeListOptions) ([]swarm.Node, error) {
return NodeList, nil
},
DiskUsageF: func(context.Context, types.DiskUsageOptions) (types.DiskUsage, error) {
return diskUsage, nil
},
ClientVersionF: func() string {
return version
},
CloseF: func() error {
return nil
},
@ -445,6 +464,12 @@ func TestDocker_WindowsMemoryContainerStats(t *testing.T) {
NodeListF: func(context.Context, types.NodeListOptions) ([]swarm.Node, error) {
return NodeList, nil
},
DiskUsageF: func(context.Context, types.DiskUsageOptions) (types.DiskUsage, error) {
return diskUsage, nil
},
ClientVersionF: func() string {
return version
},
CloseF: func() error {
return nil
},
@ -1537,3 +1562,81 @@ func TestDocker_Init(t *testing.T) {
})
}
}
func TestDockerGatherDiskUsage(t *testing.T) {
var acc testutil.Accumulator
d := Docker{
Log: testutil.Logger{},
newClient: func(string, *tls.Config) (Client, error) { return &baseClient, nil },
}
require.NoError(t, acc.GatherError(d.Gather))
duOpts := types.DiskUsageOptions{Types: []types.DiskUsageObject{}}
d.gatherDiskUsage(&acc, duOpts)
acc.AssertContainsTaggedFields(t,
"docker_disk_usage",
map[string]interface{}{
"layers_size": int64(1e10),
},
map[string]string{
"engine_host": "absol",
"server_version": "17.09.0-ce",
},
)
acc.AssertContainsTaggedFields(t,
"docker_disk_usage",
map[string]interface{}{
"size_root_fs": int64(123456789),
"size_rw": int64(0)},
map[string]string{
"container_image": "some_image",
"container_version": "1.0.0-alpine",
"engine_host": "absol",
"server_version": "17.09.0-ce",
"container_name": "some_container",
},
)
acc.AssertContainsTaggedFields(t,
"docker_disk_usage",
map[string]interface{}{
"size": int64(123456789),
"shared_size": int64(0)},
map[string]string{
"image_id": "some_imageid",
"image_name": "some_image_tag",
"image_version": "1.0.0-alpine",
"engine_host": "absol",
"server_version": "17.09.0-ce",
},
)
acc.AssertContainsTaggedFields(t,
"docker_disk_usage",
map[string]interface{}{
"size": int64(425484494),
"shared_size": int64(0)},
map[string]string{
"image_id": "7f4a1cc74046",
"image_name": "telegraf",
"image_version": "latest",
"engine_host": "absol",
"server_version": "17.09.0-ce",
},
)
acc.AssertContainsTaggedFields(t,
"docker_disk_usage",
map[string]interface{}{
"size": int64(123456789),
},
map[string]string{
"volume_name": "some_volume",
"engine_host": "absol",
"server_version": "17.09.0-ce",
},
)
}

View File

@ -10,6 +10,7 @@ import (
"github.com/docker/docker/api/types/container"
"github.com/docker/docker/api/types/registry"
"github.com/docker/docker/api/types/swarm"
"github.com/docker/docker/api/types/volume"
)
var info = types.Info{
@ -542,3 +543,17 @@ func containerInspect() types.ContainerJSON {
},
}
}
var diskUsage = types.DiskUsage{
LayersSize: 1e10,
Containers: []*types.Container{
{Names: []string{"/some_container"}, Image: "some_image:1.0.0-alpine", SizeRw: 0, SizeRootFs: 123456789},
},
Images: []*types.ImageSummary{
{ID: "sha256:some_imageid", RepoTags: []string{"some_image_tag:1.0.0-alpine"}, Size: 123456789, SharedSize: 0},
{ID: "sha256:7f4a1cc74046ce48cd918693cd6bf4b2683f4ce0d7be3f7148a21df9f06f5b5f", RepoTags: []string{"telegraf:latest"}, Size: 425484494, SharedSize: 0},
},
Volumes: []*volume.Volume{{Name: "some_volume", UsageData: &volume.UsageData{Size: 123456789}}},
}
var version = "1.43"

View File

@ -29,6 +29,11 @@
# container_state_include = []
# container_state_exclude = []
## Objects to include for disk usage query
## Allowed values are "container", "image", "volume"
## When empty disk usage is excluded
storage_objects = []
## Timeout for docker list, info, and stats commands
timeout = "5s"