feat(inputs.diskio): Add field io await and util (#15950)

This commit is contained in:
jiangxianfu 2024-10-04 00:50:01 +08:00 committed by GitHub
parent 9e99116ef5
commit bcbecb03f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 112 additions and 8 deletions

View File

@ -73,6 +73,9 @@ docker run --privileged -v /:/hostfs:ro -v /run/udev:/run/udev:ro -e HOST_PROC=/
- iops_in_progress (integer, gauge) - iops_in_progress (integer, gauge)
- merged_reads (integer, counter) - merged_reads (integer, counter)
- merged_writes (integer, counter) - merged_writes (integer, counter)
- io_util (float64, gauge, percent)
- io_await (float64, gauge, milliseconds)
- io_svctm (float64, gauge, milliseconds)
On linux these values correspond to the values in [`/proc/diskstats`][1] and On linux these values correspond to the values in [`/proc/diskstats`][1] and
[`/sys/block/<dev>/stat`][2]. [`/sys/block/<dev>/stat`][2].
@ -123,6 +126,18 @@ efficiency. Thus two 4K reads may become one 8K read before it is
ultimately handed to the disk, and so it will be counted (and queued) ultimately handed to the disk, and so it will be counted (and queued)
as only one I/O. These fields lets you know how often this was done. as only one I/O. These fields lets you know how often this was done.
### `io_await`
The average time per I/O operation (ms)
### `io_svctm`
The service time per I/O operation, excluding wait time (ms)
### `io_util`
The percentage of time the disk was active (%)
## Sample Queries ## Sample Queries
### Calculate percent IO utilization per disk and host ### Calculate percent IO utilization per disk and host
@ -147,3 +162,9 @@ diskio,name=sda1 merged_reads=0i,reads=2353i,writes=10i,write_bytes=2117632i,wri
diskio,name=centos/var_log reads=1063077i,writes=591025i,read_bytes=139325491712i,write_bytes=144233131520i,read_time=650221i,write_time=24368817i,io_time=852490i,weighted_io_time=25037394i,iops_in_progress=1i,merged_reads=0i,merged_writes=0i 1578326400000000000 diskio,name=centos/var_log reads=1063077i,writes=591025i,read_bytes=139325491712i,write_bytes=144233131520i,read_time=650221i,write_time=24368817i,io_time=852490i,weighted_io_time=25037394i,iops_in_progress=1i,merged_reads=0i,merged_writes=0i 1578326400000000000
diskio,name=sda write_time=49i,io_time=1317i,weighted_io_time=1404i,reads=2495i,read_time=1357i,write_bytes=2117632i,iops_in_progress=0i,merged_reads=0i,merged_writes=0i,writes=10i,read_bytes=38956544i 1578326400000000000 diskio,name=sda write_time=49i,io_time=1317i,weighted_io_time=1404i,reads=2495i,read_time=1357i,write_bytes=2117632i,iops_in_progress=0i,merged_reads=0i,merged_writes=0i,writes=10i,read_bytes=38956544i 1578326400000000000
``` ```
```text
diskio,name=sda io_await:0.3317307692307692,io_svctm:0.07692307692307693,io_util:0.5329780146568954 1578326400000000000
diskio,name=sda1 io_await:0.3317307692307692,io_svctm:0.07692307692307693,io_util:0.5329780146568954 1578326400000000000
diskio,name=sda2 io_await:0.3317307692307692,io_svctm:0.07692307692307693,io_util:0.5329780146568954 1578326400000000000
```

View File

@ -6,6 +6,9 @@ import (
"fmt" "fmt"
"regexp" "regexp"
"strings" "strings"
"time"
"github.com/shirou/gopsutil/v3/disk"
"github.com/influxdata/telegraf" "github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/filter" "github.com/influxdata/telegraf/filter"
@ -32,11 +35,13 @@ type DiskIO struct {
SkipSerialNumber bool `toml:"skip_serial_number"` SkipSerialNumber bool `toml:"skip_serial_number"`
Log telegraf.Logger `toml:"-"` Log telegraf.Logger `toml:"-"`
ps system.PS ps system.PS
infoCache map[string]diskInfoCache infoCache map[string]diskInfoCache
deviceFilter filter.Filter deviceFilter filter.Filter
warnDiskName map[string]bool warnDiskName map[string]bool
warnDiskTags map[string]bool warnDiskTags map[string]bool
lastIOCounterStat map[string]disk.IOCountersStat
lastCollectTime time.Time
} }
func (*DiskIO) SampleConfig() string { func (*DiskIO) SampleConfig() string {
@ -57,6 +62,7 @@ func (d *DiskIO) Init() error {
d.infoCache = make(map[string]diskInfoCache) d.infoCache = make(map[string]diskInfoCache)
d.warnDiskName = make(map[string]bool) d.warnDiskName = make(map[string]bool)
d.warnDiskTags = make(map[string]bool) d.warnDiskTags = make(map[string]bool)
d.lastIOCounterStat = make(map[string]disk.IOCountersStat)
return nil return nil
} }
@ -73,8 +79,8 @@ func (d *DiskIO) Gather(acc telegraf.Accumulator) error {
if err != nil { if err != nil {
return fmt.Errorf("error getting disk io info: %w", err) return fmt.Errorf("error getting disk io info: %w", err)
} }
collectTime := time.Now()
for _, io := range diskio { for k, io := range diskio {
match := false match := false
if d.deviceFilter != nil && d.deviceFilter.Match(io.Name) { if d.deviceFilter != nil && d.deviceFilter.Match(io.Name) {
match = true match = true
@ -125,9 +131,23 @@ func (d *DiskIO) Gather(acc telegraf.Accumulator) error {
"merged_reads": io.MergedReadCount, "merged_reads": io.MergedReadCount,
"merged_writes": io.MergedWriteCount, "merged_writes": io.MergedWriteCount,
} }
if lastValue, exists := d.lastIOCounterStat[k]; exists {
deltaRWCount := float64(io.ReadCount + io.WriteCount - lastValue.ReadCount - lastValue.WriteCount)
deltaRWTime := float64(io.ReadTime + io.WriteTime - lastValue.ReadTime - lastValue.WriteTime)
deltaIOTime := float64(io.IoTime - lastValue.IoTime)
if deltaRWCount > 0 {
fields["io_await"] = deltaRWTime / deltaRWCount
fields["io_svctm"] = deltaIOTime / deltaRWCount
}
itv := float64(collectTime.Sub(d.lastCollectTime).Milliseconds())
if itv > 0 {
fields["io_util"] = 100 * deltaIOTime / itv
}
}
acc.AddCounter("diskio", fields, tags) acc.AddCounter("diskio", fields, tags)
} }
d.lastCollectTime = collectTime
d.lastIOCounterStat = diskio
return nil return nil
} }

View File

@ -2,6 +2,7 @@ package diskio
import ( import (
"testing" "testing"
"time"
"github.com/shirou/gopsutil/v3/disk" "github.com/shirou/gopsutil/v3/disk"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -127,3 +128,65 @@ func TestDiskIO(t *testing.T) {
}) })
} }
} }
func TestDiskIOUtil(t *testing.T) {
cts := map[string]disk.IOCountersStat{
"sda": {
ReadCount: 888,
WriteCount: 5341,
ReadBytes: 100000,
WriteBytes: 200000,
ReadTime: 7123,
WriteTime: 9087,
MergedReadCount: 11,
MergedWriteCount: 12,
Name: "sda",
IoTime: 123552,
SerialNumber: "ab-123-ad",
},
}
cts2 := map[string]disk.IOCountersStat{
"sda": {
ReadCount: 1000,
WriteCount: 6000,
ReadBytes: 200000,
WriteBytes: 300000,
ReadTime: 8123,
WriteTime: 9187,
MergedReadCount: 16,
MergedWriteCount: 30,
Name: "sda",
IoTime: 163552,
SerialNumber: "ab-123-ad",
},
}
var acc testutil.Accumulator
var mps system.MockPS
mps.On("DiskIO").Return(cts, nil)
diskio := &DiskIO{
Log: testutil.Logger{},
Devices: []string{"sd*"},
ps: &mps,
}
require.NoError(t, diskio.Init())
// gather
require.NoError(t, diskio.Gather(&acc))
// sleep
time.Sleep(1 * time.Second)
// gather twice
mps2 := system.MockPS{}
mps2.On("DiskIO").Return(cts2, nil)
diskio.ps = &mps2
err := diskio.Gather(&acc)
require.NoError(t, err)
require.True(t, acc.HasField("diskio", "io_util"), "miss io util")
require.True(t, acc.HasField("diskio", "io_svctm"), "miss io_svctm")
require.True(t, acc.HasField("diskio", "io_await"), "miss io_await")
require.True(t, acc.HasFloatField("diskio", "io_util"), "io_util not have value")
require.True(t, acc.HasFloatField("diskio", "io_svctm"), "io_svctm not have value")
require.True(t, acc.HasFloatField("diskio", "io_await"), "io_await not have value")
}