feat(inputs.lustre2): Add eviction_count field (#15044)
This commit is contained in:
parent
40b88b039e
commit
c7466b8835
|
|
@ -24,6 +24,9 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
## An array of /proc globs to search for Lustre stats
|
||||
## If not specified, the default will work on Lustre 2.12.x
|
||||
##
|
||||
# mgs_procfiles = [
|
||||
# "/sys/fs/lustre/mgs/*/eviction_count",
|
||||
# ]
|
||||
# ost_procfiles = [
|
||||
# "/proc/fs/lustre/obdfilter/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
||||
|
|
@ -31,6 +34,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
|
||||
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
|
||||
# "/sys/fs/lustre/odbfilter/*/eviction_count",
|
||||
# ]
|
||||
# mds_procfiles = [
|
||||
# "/proc/fs/lustre/mdt/*/md_stats",
|
||||
|
|
@ -38,6 +42,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
# "/proc/fs/lustre/mdt/*/exports/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
|
||||
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
|
||||
# "/sys/fs/lustre/mdt/*/eviction_count",
|
||||
# ]
|
||||
```
|
||||
|
||||
|
|
@ -174,6 +179,14 @@ From `/proc/fs/lustre/mdt/*/job_stats`:
|
|||
- jobstats_sync
|
||||
- jobstats_unlink
|
||||
|
||||
From `/proc/fs/lustre/*/*/eviction_count`:
|
||||
|
||||
- lustre2
|
||||
- tags:
|
||||
- name
|
||||
- fields:
|
||||
- evictions
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Check for the default or custom procfiles in the proc filesystem, and reference
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ type tags struct {
|
|||
// Lustre proc files can change between versions, so we want to future-proof
|
||||
// by letting people choose what to look at.
|
||||
type Lustre2 struct {
|
||||
MgsProcfiles []string `toml:"mgs_procfiles"`
|
||||
OstProcfiles []string `toml:"ost_procfiles"`
|
||||
MdsProcfiles []string `toml:"mds_procfiles"`
|
||||
|
||||
|
|
@ -600,6 +601,43 @@ func (l *Lustre2) getLustreProcBrwStats(fileglob string, wantedFields []*mapping
|
|||
return nil
|
||||
}
|
||||
|
||||
func (l *Lustre2) getLustreEvictionCount(fileglob string) error {
|
||||
files, err := filepath.Glob(filepath.Join(l.rootdir, fileglob))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to find files matching glob %s: %w", fileglob, err)
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
// Turn /sys/fs/lustre/*/<mgt/mdt/ost_name>/eviction_count into just the object store target name
|
||||
// This assumes that the target name is always second to last, which is true in Lustre 2.1->2.12
|
||||
path := strings.Split(file, "/")
|
||||
if len(path) < 2 {
|
||||
continue
|
||||
}
|
||||
name := path[len(path)-2]
|
||||
|
||||
contents, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read file %s: %w", file, err)
|
||||
}
|
||||
|
||||
value, err := strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse file %s: %w", file, err)
|
||||
}
|
||||
|
||||
tag := tags{name, "", "", "", ""}
|
||||
fields, ok := l.allFields[tag]
|
||||
if !ok {
|
||||
fields = make(map[string]interface{})
|
||||
l.allFields[tag] = fields
|
||||
}
|
||||
|
||||
fields["evictions"] = value
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Gather reads stats from all lustre targets
|
||||
func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
||||
l.allFields = make(map[tags]map[string]interface{})
|
||||
|
|
@ -609,6 +647,13 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
|||
return err
|
||||
}
|
||||
|
||||
if len(l.MgsProcfiles) == 0 {
|
||||
l.MgsProcfiles = []string{
|
||||
// eviction count
|
||||
"/sys/fs/lustre/mgs/*/eviction_count",
|
||||
}
|
||||
}
|
||||
|
||||
if len(l.OstProcfiles) == 0 {
|
||||
l.OstProcfiles = []string{
|
||||
// read/write bytes are in obdfilter/<ost_name>/stats
|
||||
|
|
@ -621,6 +666,8 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
|||
"/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
|
||||
// bulk read/write statistics for zfs
|
||||
"/proc/fs/lustre/osd-zfs/*/brw_stats",
|
||||
// eviction count
|
||||
"/sys/fs/lustre/obdfilter/*/eviction_count",
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -630,9 +677,20 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
|||
"/proc/fs/lustre/mdt/*/md_stats",
|
||||
// Metadata target job stats
|
||||
"/proc/fs/lustre/mdt/*/job_stats",
|
||||
// eviction count
|
||||
"/sys/fs/lustre/mdt/*/eviction_count",
|
||||
}
|
||||
}
|
||||
|
||||
for _, procfile := range l.MgsProcfiles {
|
||||
if !strings.HasSuffix(procfile, "eviction_count") {
|
||||
return fmt.Errorf("no handler found for mgs procfile pattern \"%s\"", procfile)
|
||||
}
|
||||
err := l.getLustreEvictionCount(procfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, procfile := range l.OstProcfiles {
|
||||
if strings.HasSuffix(procfile, "brw_stats") {
|
||||
err := l.getLustreProcBrwStats(procfile, wantedBrwstatsFields)
|
||||
|
|
@ -644,6 +702,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if strings.HasSuffix(procfile, "eviction_count") {
|
||||
err := l.getLustreEvictionCount(procfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
err := l.GetLustreProcStats(procfile, wantedOstFields)
|
||||
if err != nil {
|
||||
|
|
@ -662,6 +725,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else if strings.HasSuffix(procfile, "eviction_count") {
|
||||
err := l.getLustreEvictionCount(procfile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
err := l.GetLustreProcStats(procfile, wantedMdsFields)
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@
|
|||
package lustre2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/toml"
|
||||
|
|
@ -570,3 +572,46 @@ func TestLustre2GeneratesBrwstatsMetrics(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLustre2GeneratesEvictionMetrics(t *testing.T) {
|
||||
rootdir, err := os.MkdirTemp("", "telegraf-lustre-evictions")
|
||||
require.NoError(t, err)
|
||||
defer os.RemoveAll(rootdir)
|
||||
|
||||
// setup files in mock sysfs
|
||||
type fileEntry struct {
|
||||
targetType string
|
||||
targetName string
|
||||
value uint64
|
||||
}
|
||||
fileEntries := []fileEntry{
|
||||
{"mdt", "fs-MDT0000", 101},
|
||||
{"mgs", "MGS", 202},
|
||||
{"obdfilter", "fs-OST0001", 303},
|
||||
}
|
||||
for _, f := range fileEntries {
|
||||
d := filepath.Join(rootdir, "sys", "fs", "lustre", f.targetType, f.targetName)
|
||||
err := os.MkdirAll(d, 0750)
|
||||
require.NoError(t, err)
|
||||
err = os.WriteFile(filepath.Join(d, "eviction_count"), []byte(fmt.Sprintf("%d\n", f.value)), 0640)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// gather metrics
|
||||
m := &Lustre2{rootdir: rootdir}
|
||||
var acc testutil.Accumulator
|
||||
err = m.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
// compare with expectations
|
||||
for _, f := range fileEntries {
|
||||
acc.AssertContainsTaggedFields(
|
||||
t,
|
||||
"lustre2",
|
||||
map[string]interface{}{
|
||||
"evictions": f.value,
|
||||
},
|
||||
map[string]string{"name": f.targetName},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@
|
|||
## An array of /proc globs to search for Lustre stats
|
||||
## If not specified, the default will work on Lustre 2.12.x
|
||||
##
|
||||
# mgs_procfiles = [
|
||||
# "/sys/fs/lustre/mgs/*/eviction_count",
|
||||
# ]
|
||||
# ost_procfiles = [
|
||||
# "/proc/fs/lustre/obdfilter/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
||||
|
|
@ -11,6 +14,7 @@
|
|||
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
|
||||
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
|
||||
# "/sys/fs/lustre/odbfilter/*/eviction_count",
|
||||
# ]
|
||||
# mds_procfiles = [
|
||||
# "/proc/fs/lustre/mdt/*/md_stats",
|
||||
|
|
@ -18,4 +22,5 @@
|
|||
# "/proc/fs/lustre/mdt/*/exports/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
|
||||
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
|
||||
# "/sys/fs/lustre/mdt/*/eviction_count",
|
||||
# ]
|
||||
|
|
|
|||
Loading…
Reference in New Issue