feat: In Lustre input plugin, support collecting per-client stats. (#10607)
This commit is contained in:
parent
fba9769720
commit
32222d96ce
|
|
@ -17,10 +17,12 @@ This plugin monitors the Lustre file system using its entries in the proc filesy
|
|||
# "/proc/fs/lustre/obdfilter/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
||||
# "/proc/fs/lustre/obdfilter/*/job_stats",
|
||||
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
|
||||
# ]
|
||||
# mds_procfiles = [
|
||||
# "/proc/fs/lustre/mdt/*/md_stats",
|
||||
# "/proc/fs/lustre/mdt/*/job_stats",
|
||||
# "/proc/fs/lustre/mdt/*/exports/*/stats",
|
||||
# ]
|
||||
```
|
||||
|
||||
|
|
@ -40,6 +42,18 @@ From `/proc/fs/lustre/obdfilter/*/stats` and `/proc/fs/lustre/osd-ldiskfs/*/stat
|
|||
- cache_miss
|
||||
- cache_access
|
||||
|
||||
From `/proc/fs/lustre/obdfilter/*/exports/*/stats`:
|
||||
|
||||
- lustre2
|
||||
- tags:
|
||||
- name
|
||||
- client
|
||||
- fields:
|
||||
- write_bytes
|
||||
- write_calls
|
||||
- read_bytes
|
||||
- read_calls
|
||||
|
||||
From `/proc/fs/lustre/obdfilter/*/job_stats`:
|
||||
|
||||
- lustre2
|
||||
|
|
@ -89,6 +103,30 @@ From `/proc/fs/lustre/mdt/*/md_stats`:
|
|||
- samedir_rename
|
||||
- crossdir_rename
|
||||
|
||||
From `/proc/fs/lustre/mdt/*/exports/*/stats`:
|
||||
|
||||
- lustre2
|
||||
- tags:
|
||||
- name
|
||||
- client
|
||||
- fields:
|
||||
- open
|
||||
- close
|
||||
- mknod
|
||||
- link
|
||||
- unlink
|
||||
- mkdir
|
||||
- rmdir
|
||||
- rename
|
||||
- getattr
|
||||
- setattr
|
||||
- getxattr
|
||||
- setxattr
|
||||
- statfs
|
||||
- sync
|
||||
- samedir_rename
|
||||
- crossdir_rename
|
||||
|
||||
From `/proc/fs/lustre/mdt/*/job_stats`:
|
||||
|
||||
- lustre2
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ import (
|
|||
)
|
||||
|
||||
type tags struct {
|
||||
name, job string
|
||||
name, job, client string
|
||||
}
|
||||
|
||||
// Lustre proc files can change between versions, so we want to future-proof
|
||||
|
|
@ -40,10 +40,12 @@ var sampleConfig = `
|
|||
# "/proc/fs/lustre/obdfilter/*/stats",
|
||||
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
||||
# "/proc/fs/lustre/obdfilter/*/job_stats",
|
||||
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
|
||||
# ]
|
||||
# mds_procfiles = [
|
||||
# "/proc/fs/lustre/mdt/*/md_stats",
|
||||
# "/proc/fs/lustre/mdt/*/job_stats",
|
||||
# "/proc/fs/lustre/mdt/*/exports/*/stats",
|
||||
# ]
|
||||
`
|
||||
|
||||
|
|
@ -365,13 +367,26 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
|
|||
fieldSplitter := regexp.MustCompile(`[ :]+`)
|
||||
|
||||
for _, file := range files {
|
||||
/* Turn /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
|
||||
* into just the object store target name
|
||||
* Assumption: the target name is always second to last,
|
||||
* which is true in Lustre 2.1->2.8
|
||||
|
||||
/* From /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
|
||||
* extract the object store target name,
|
||||
* and for per-client files under
|
||||
* /proc/fs/lustre/obdfilter/<ost_name>/exports/<client_nid>/stats
|
||||
* and similar the client NID
|
||||
* Assumption: the target name is fourth to last
|
||||
* for per-client files and second to last otherwise
|
||||
* and the client NID is always second to last,
|
||||
* which is true in Lustre 2.1->2.14
|
||||
*/
|
||||
path := strings.Split(file, "/")
|
||||
name := path[len(path)-2]
|
||||
var name, client string
|
||||
if strings.Contains(file, "/exports/") {
|
||||
name = path[len(path)-4]
|
||||
client = path[len(path)-2]
|
||||
} else {
|
||||
name = path[len(path)-2]
|
||||
client = ""
|
||||
}
|
||||
|
||||
//lines, err := internal.ReadLines(file)
|
||||
wholeFile, err := os.ReadFile(file)
|
||||
|
|
@ -401,10 +416,10 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
|
|||
}
|
||||
|
||||
var fields map[string]interface{}
|
||||
fields, ok := l.allFields[tags{name, jobid}]
|
||||
fields, ok := l.allFields[tags{name, jobid, client}]
|
||||
if !ok {
|
||||
fields = make(map[string]interface{})
|
||||
l.allFields[tags{name, jobid}] = fields
|
||||
l.allFields[tags{name, jobid, client}] = fields
|
||||
}
|
||||
|
||||
for _, wanted := range wantedFields {
|
||||
|
|
@ -508,6 +523,9 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
|||
if len(tgs.job) > 0 {
|
||||
tags["jobid"] = tgs.job
|
||||
}
|
||||
if len(tgs.client) > 0 {
|
||||
tags["client"] = tgs.client
|
||||
}
|
||||
acc.AddFields("lustre2", fields, tags)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -204,6 +204,69 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestLustre2GeneratesClientMetrics(t *testing.T) {
|
||||
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
||||
ostName := "OST0001"
|
||||
clientName := "10.2.4.27@o2ib1"
|
||||
mdtdir := tempdir + "/mdt/"
|
||||
err := os.MkdirAll(mdtdir+"/"+ostName+"/exports/"+clientName, 0755)
|
||||
require.NoError(t, err)
|
||||
|
||||
obddir := tempdir + "/obdfilter/"
|
||||
err = os.MkdirAll(obddir+"/"+ostName+"/exports/"+clientName, 0755)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = os.WriteFile(mdtdir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(mdtProcContents), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = os.WriteFile(obddir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(obdfilterProcContents), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Begin by testing standard Lustre stats
|
||||
m := &Lustre2{
|
||||
OstProcfiles: []string{obddir + "/*/exports/*/stats"},
|
||||
MdsProcfiles: []string{mdtdir + "/*/exports/*/stats"},
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
|
||||
err = m.Gather(&acc)
|
||||
require.NoError(t, err)
|
||||
|
||||
tags := map[string]string{
|
||||
"name": ostName,
|
||||
"client": clientName,
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"close": uint64(873243496),
|
||||
"crossdir_rename": uint64(369571),
|
||||
"getattr": uint64(1503663097),
|
||||
"getxattr": uint64(6145349681),
|
||||
"link": uint64(445),
|
||||
"mkdir": uint64(705499),
|
||||
"mknod": uint64(349042),
|
||||
"open": uint64(1024577037),
|
||||
"read_bytes": uint64(78026117632000),
|
||||
"read_calls": uint64(203238095),
|
||||
"rename": uint64(629196),
|
||||
"rmdir": uint64(227434),
|
||||
"samedir_rename": uint64(259625),
|
||||
"setattr": uint64(1898364),
|
||||
"setxattr": uint64(83969),
|
||||
"statfs": uint64(2916320),
|
||||
"sync": uint64(434081),
|
||||
"unlink": uint64(3549417),
|
||||
"write_bytes": uint64(15201500833981),
|
||||
"write_calls": uint64(71893382),
|
||||
}
|
||||
|
||||
acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)
|
||||
|
||||
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
|
||||
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
||||
ostName := "OST0001"
|
||||
|
|
|
|||
Loading…
Reference in New Issue