feat: In Lustre input plugin, support collecting per-client stats. (#10607)

This commit is contained in:
omgold 2022-03-29 23:13:29 +02:00 committed by GitHub
parent fba9769720
commit 32222d96ce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 127 additions and 8 deletions

View File

@ -17,10 +17,12 @@ This plugin monitors the Lustre file system using its entries in the proc filesy
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# ]
```
@ -40,6 +42,18 @@ From `/proc/fs/lustre/obdfilter/*/stats` and `/proc/fs/lustre/osd-ldiskfs/*/stat
- cache_miss
- cache_access
From `/proc/fs/lustre/obdfilter/*/exports/*/stats`:
- lustre2
- tags:
- name
- client
- fields:
- write_bytes
- write_calls
- read_bytes
- read_calls
From `/proc/fs/lustre/obdfilter/*/job_stats`:
- lustre2
@ -89,6 +103,30 @@ From `/proc/fs/lustre/mdt/*/md_stats`:
- samedir_rename
- crossdir_rename
From `/proc/fs/lustre/mdt/*/exports/*/stats`:
- lustre2
- tags:
- name
- client
- fields:
- open
- close
- mknod
- link
- unlink
- mkdir
- rmdir
- rename
- getattr
- setattr
- getxattr
- setxattr
- statfs
- sync
- samedir_rename
- crossdir_rename
From `/proc/fs/lustre/mdt/*/job_stats`:
- lustre2

View File

@ -19,7 +19,7 @@ import (
)
type tags struct {
name, job string
name, job, client string
}
// Lustre proc files can change between versions, so we want to future-proof
@ -40,10 +40,12 @@ var sampleConfig = `
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# ]
`
@ -365,13 +367,26 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
fieldSplitter := regexp.MustCompile(`[ :]+`)
for _, file := range files {
/* Turn /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
* into just the object store target name
* Assumption: the target name is always second to last,
* which is true in Lustre 2.1->2.8
/* From /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
* extract the object store target name,
* and for per-client files under
* /proc/fs/lustre/obdfilter/<ost_name>/exports/<client_nid>/stats
* and similar the client NID
* Assumption: the target name is fourth to last
* for per-client files and second to last otherwise
* and the client NID is always second to last,
* which is true in Lustre 2.1->2.14
*/
path := strings.Split(file, "/")
name := path[len(path)-2]
var name, client string
if strings.Contains(file, "/exports/") {
name = path[len(path)-4]
client = path[len(path)-2]
} else {
name = path[len(path)-2]
client = ""
}
//lines, err := internal.ReadLines(file)
wholeFile, err := os.ReadFile(file)
@ -401,10 +416,10 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
}
var fields map[string]interface{}
fields, ok := l.allFields[tags{name, jobid}]
fields, ok := l.allFields[tags{name, jobid, client}]
if !ok {
fields = make(map[string]interface{})
l.allFields[tags{name, jobid}] = fields
l.allFields[tags{name, jobid, client}] = fields
}
for _, wanted := range wantedFields {
@ -508,6 +523,9 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if len(tgs.job) > 0 {
tags["jobid"] = tgs.job
}
if len(tgs.client) > 0 {
tags["client"] = tgs.client
}
acc.AddFields("lustre2", fields, tags)
}

View File

@ -204,6 +204,69 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
require.NoError(t, err)
}
func TestLustre2GeneratesClientMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
ostName := "OST0001"
clientName := "10.2.4.27@o2ib1"
mdtdir := tempdir + "/mdt/"
err := os.MkdirAll(mdtdir+"/"+ostName+"/exports/"+clientName, 0755)
require.NoError(t, err)
obddir := tempdir + "/obdfilter/"
err = os.MkdirAll(obddir+"/"+ostName+"/exports/"+clientName, 0755)
require.NoError(t, err)
err = os.WriteFile(mdtdir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(mdtProcContents), 0644)
require.NoError(t, err)
err = os.WriteFile(obddir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(obdfilterProcContents), 0644)
require.NoError(t, err)
// Begin by testing standard Lustre stats
m := &Lustre2{
OstProcfiles: []string{obddir + "/*/exports/*/stats"},
MdsProcfiles: []string{mdtdir + "/*/exports/*/stats"},
}
var acc testutil.Accumulator
err = m.Gather(&acc)
require.NoError(t, err)
tags := map[string]string{
"name": ostName,
"client": clientName,
}
fields := map[string]interface{}{
"close": uint64(873243496),
"crossdir_rename": uint64(369571),
"getattr": uint64(1503663097),
"getxattr": uint64(6145349681),
"link": uint64(445),
"mkdir": uint64(705499),
"mknod": uint64(349042),
"open": uint64(1024577037),
"read_bytes": uint64(78026117632000),
"read_calls": uint64(203238095),
"rename": uint64(629196),
"rmdir": uint64(227434),
"samedir_rename": uint64(259625),
"setattr": uint64(1898364),
"setxattr": uint64(83969),
"statfs": uint64(2916320),
"sync": uint64(434081),
"unlink": uint64(3549417),
"write_bytes": uint64(15201500833981),
"write_calls": uint64(71893382),
}
acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)
err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
ostName := "OST0001"