feat(inputs.lustre2): Add eviction_count field (#15044)

This commit is contained in:
Luke Yeager 2024-03-25 07:33:33 -05:00 committed by GitHub
parent 40b88b039e
commit c7466b8835
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 131 additions and 0 deletions

View File

@ -24,6 +24,9 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## An array of /proc globs to search for Lustre stats
## If not specified, the default will work on Lustre 2.12.x
##
# mgs_procfiles = [
# "/sys/fs/lustre/mgs/*/eviction_count",
# ]
# ost_procfiles = [
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
@ -31,6 +34,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/odbfilter/*/eviction_count",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
@ -38,6 +42,7 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/mdt/*/eviction_count",
# ]
```
@ -174,6 +179,14 @@ From `/proc/fs/lustre/mdt/*/job_stats`:
- jobstats_sync
- jobstats_unlink
From `/proc/fs/lustre/*/*/eviction_count`:
- lustre2
- tags:
- name
- fields:
- evictions
## Troubleshooting
Check for the default or custom procfiles in the proc filesystem, and reference

View File

@ -30,6 +30,7 @@ type tags struct {
// Lustre proc files can change between versions, so we want to future-proof
// by letting people choose what to look at.
type Lustre2 struct {
MgsProcfiles []string `toml:"mgs_procfiles"`
OstProcfiles []string `toml:"ost_procfiles"`
MdsProcfiles []string `toml:"mds_procfiles"`
@ -600,6 +601,43 @@ func (l *Lustre2) getLustreProcBrwStats(fileglob string, wantedFields []*mapping
return nil
}
func (l *Lustre2) getLustreEvictionCount(fileglob string) error {
files, err := filepath.Glob(filepath.Join(l.rootdir, fileglob))
if err != nil {
return fmt.Errorf("failed to find files matching glob %s: %w", fileglob, err)
}
for _, file := range files {
// Turn /sys/fs/lustre/*/<mgt/mdt/ost_name>/eviction_count into just the object store target name
// This assumes that the target name is always second to last, which is true in Lustre 2.1->2.12
path := strings.Split(file, "/")
if len(path) < 2 {
continue
}
name := path[len(path)-2]
contents, err := os.ReadFile(file)
if err != nil {
return fmt.Errorf("failed to read file %s: %w", file, err)
}
value, err := strconv.ParseUint(strings.TrimSpace(string(contents)), 10, 64)
if err != nil {
return fmt.Errorf("failed to parse file %s: %w", file, err)
}
tag := tags{name, "", "", "", ""}
fields, ok := l.allFields[tag]
if !ok {
fields = make(map[string]interface{})
l.allFields[tag] = fields
}
fields["evictions"] = value
}
return nil
}
// Gather reads stats from all lustre targets
func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
l.allFields = make(map[tags]map[string]interface{})
@ -609,6 +647,13 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
return err
}
if len(l.MgsProcfiles) == 0 {
l.MgsProcfiles = []string{
// eviction count
"/sys/fs/lustre/mgs/*/eviction_count",
}
}
if len(l.OstProcfiles) == 0 {
l.OstProcfiles = []string{
// read/write bytes are in obdfilter/<ost_name>/stats
@ -621,6 +666,8 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
"/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
// bulk read/write statistics for zfs
"/proc/fs/lustre/osd-zfs/*/brw_stats",
// eviction count
"/sys/fs/lustre/obdfilter/*/eviction_count",
}
}
@ -630,9 +677,20 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
"/proc/fs/lustre/mdt/*/md_stats",
// Metadata target job stats
"/proc/fs/lustre/mdt/*/job_stats",
// eviction count
"/sys/fs/lustre/mdt/*/eviction_count",
}
}
for _, procfile := range l.MgsProcfiles {
if !strings.HasSuffix(procfile, "eviction_count") {
return fmt.Errorf("no handler found for mgs procfile pattern \"%s\"", procfile)
}
err := l.getLustreEvictionCount(procfile)
if err != nil {
return err
}
}
for _, procfile := range l.OstProcfiles {
if strings.HasSuffix(procfile, "brw_stats") {
err := l.getLustreProcBrwStats(procfile, wantedBrwstatsFields)
@ -644,6 +702,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
} else if strings.HasSuffix(procfile, "eviction_count") {
err := l.getLustreEvictionCount(procfile)
if err != nil {
return err
}
} else {
err := l.GetLustreProcStats(procfile, wantedOstFields)
if err != nil {
@ -662,6 +725,11 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if err != nil {
return err
}
} else if strings.HasSuffix(procfile, "eviction_count") {
err := l.getLustreEvictionCount(procfile)
if err != nil {
return err
}
} else {
err := l.GetLustreProcStats(procfile, wantedMdsFields)
if err != nil {

View File

@ -3,7 +3,9 @@
package lustre2
import (
"fmt"
"os"
"path/filepath"
"testing"
"github.com/influxdata/toml"
@ -570,3 +572,46 @@ func TestLustre2GeneratesBrwstatsMetrics(t *testing.T) {
}
}
}
func TestLustre2GeneratesEvictionMetrics(t *testing.T) {
rootdir, err := os.MkdirTemp("", "telegraf-lustre-evictions")
require.NoError(t, err)
defer os.RemoveAll(rootdir)
// setup files in mock sysfs
type fileEntry struct {
targetType string
targetName string
value uint64
}
fileEntries := []fileEntry{
{"mdt", "fs-MDT0000", 101},
{"mgs", "MGS", 202},
{"obdfilter", "fs-OST0001", 303},
}
for _, f := range fileEntries {
d := filepath.Join(rootdir, "sys", "fs", "lustre", f.targetType, f.targetName)
err := os.MkdirAll(d, 0750)
require.NoError(t, err)
err = os.WriteFile(filepath.Join(d, "eviction_count"), []byte(fmt.Sprintf("%d\n", f.value)), 0640)
require.NoError(t, err)
}
// gather metrics
m := &Lustre2{rootdir: rootdir}
var acc testutil.Accumulator
err = m.Gather(&acc)
require.NoError(t, err)
// compare with expectations
for _, f := range fileEntries {
acc.AssertContainsTaggedFields(
t,
"lustre2",
map[string]interface{}{
"evictions": f.value,
},
map[string]string{"name": f.targetName},
)
}
}

View File

@ -4,6 +4,9 @@
## An array of /proc globs to search for Lustre stats
## If not specified, the default will work on Lustre 2.12.x
##
# mgs_procfiles = [
# "/sys/fs/lustre/mgs/*/eviction_count",
# ]
# ost_procfiles = [
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
@ -11,6 +14,7 @@
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/odbfilter/*/eviction_count",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
@ -18,4 +22,5 @@
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/brw_stats",
# "/proc/fs/lustre/osd-zfs/*/brw_stats",
# "/sys/fs/lustre/mdt/*/eviction_count",
# ]