feat: In Lustre input plugin, support collecting per-client stats. (#10607)
This commit is contained in:
parent
fba9769720
commit
32222d96ce
|
|
@ -17,10 +17,12 @@ This plugin monitors the Lustre file system using its entries in the proc filesy
|
||||||
# "/proc/fs/lustre/obdfilter/*/stats",
|
# "/proc/fs/lustre/obdfilter/*/stats",
|
||||||
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
||||||
# "/proc/fs/lustre/obdfilter/*/job_stats",
|
# "/proc/fs/lustre/obdfilter/*/job_stats",
|
||||||
|
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
|
||||||
# ]
|
# ]
|
||||||
# mds_procfiles = [
|
# mds_procfiles = [
|
||||||
# "/proc/fs/lustre/mdt/*/md_stats",
|
# "/proc/fs/lustre/mdt/*/md_stats",
|
||||||
# "/proc/fs/lustre/mdt/*/job_stats",
|
# "/proc/fs/lustre/mdt/*/job_stats",
|
||||||
|
# "/proc/fs/lustre/mdt/*/exports/*/stats",
|
||||||
# ]
|
# ]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -40,6 +42,18 @@ From `/proc/fs/lustre/obdfilter/*/stats` and `/proc/fs/lustre/osd-ldiskfs/*/stat
|
||||||
- cache_miss
|
- cache_miss
|
||||||
- cache_access
|
- cache_access
|
||||||
|
|
||||||
|
From `/proc/fs/lustre/obdfilter/*/exports/*/stats`:
|
||||||
|
|
||||||
|
- lustre2
|
||||||
|
- tags:
|
||||||
|
- name
|
||||||
|
- client
|
||||||
|
- fields:
|
||||||
|
- write_bytes
|
||||||
|
- write_calls
|
||||||
|
- read_bytes
|
||||||
|
- read_calls
|
||||||
|
|
||||||
From `/proc/fs/lustre/obdfilter/*/job_stats`:
|
From `/proc/fs/lustre/obdfilter/*/job_stats`:
|
||||||
|
|
||||||
- lustre2
|
- lustre2
|
||||||
|
|
@ -89,6 +103,30 @@ From `/proc/fs/lustre/mdt/*/md_stats`:
|
||||||
- samedir_rename
|
- samedir_rename
|
||||||
- crossdir_rename
|
- crossdir_rename
|
||||||
|
|
||||||
|
From `/proc/fs/lustre/mdt/*/exports/*/stats`:
|
||||||
|
|
||||||
|
- lustre2
|
||||||
|
- tags:
|
||||||
|
- name
|
||||||
|
- client
|
||||||
|
- fields:
|
||||||
|
- open
|
||||||
|
- close
|
||||||
|
- mknod
|
||||||
|
- link
|
||||||
|
- unlink
|
||||||
|
- mkdir
|
||||||
|
- rmdir
|
||||||
|
- rename
|
||||||
|
- getattr
|
||||||
|
- setattr
|
||||||
|
- getxattr
|
||||||
|
- setxattr
|
||||||
|
- statfs
|
||||||
|
- sync
|
||||||
|
- samedir_rename
|
||||||
|
- crossdir_rename
|
||||||
|
|
||||||
From `/proc/fs/lustre/mdt/*/job_stats`:
|
From `/proc/fs/lustre/mdt/*/job_stats`:
|
||||||
|
|
||||||
- lustre2
|
- lustre2
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type tags struct {
|
type tags struct {
|
||||||
name, job string
|
name, job, client string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lustre proc files can change between versions, so we want to future-proof
|
// Lustre proc files can change between versions, so we want to future-proof
|
||||||
|
|
@ -40,10 +40,12 @@ var sampleConfig = `
|
||||||
# "/proc/fs/lustre/obdfilter/*/stats",
|
# "/proc/fs/lustre/obdfilter/*/stats",
|
||||||
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
|
||||||
# "/proc/fs/lustre/obdfilter/*/job_stats",
|
# "/proc/fs/lustre/obdfilter/*/job_stats",
|
||||||
|
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
|
||||||
# ]
|
# ]
|
||||||
# mds_procfiles = [
|
# mds_procfiles = [
|
||||||
# "/proc/fs/lustre/mdt/*/md_stats",
|
# "/proc/fs/lustre/mdt/*/md_stats",
|
||||||
# "/proc/fs/lustre/mdt/*/job_stats",
|
# "/proc/fs/lustre/mdt/*/job_stats",
|
||||||
|
# "/proc/fs/lustre/mdt/*/exports/*/stats",
|
||||||
# ]
|
# ]
|
||||||
`
|
`
|
||||||
|
|
||||||
|
|
@ -365,13 +367,26 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
|
||||||
fieldSplitter := regexp.MustCompile(`[ :]+`)
|
fieldSplitter := regexp.MustCompile(`[ :]+`)
|
||||||
|
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
/* Turn /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
|
|
||||||
* into just the object store target name
|
/* From /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
|
||||||
* Assumption: the target name is always second to last,
|
* extract the object store target name,
|
||||||
* which is true in Lustre 2.1->2.8
|
* and for per-client files under
|
||||||
|
* /proc/fs/lustre/obdfilter/<ost_name>/exports/<client_nid>/stats
|
||||||
|
* and similar the client NID
|
||||||
|
* Assumption: the target name is fourth to last
|
||||||
|
* for per-client files and second to last otherwise
|
||||||
|
* and the client NID is always second to last,
|
||||||
|
* which is true in Lustre 2.1->2.14
|
||||||
*/
|
*/
|
||||||
path := strings.Split(file, "/")
|
path := strings.Split(file, "/")
|
||||||
name := path[len(path)-2]
|
var name, client string
|
||||||
|
if strings.Contains(file, "/exports/") {
|
||||||
|
name = path[len(path)-4]
|
||||||
|
client = path[len(path)-2]
|
||||||
|
} else {
|
||||||
|
name = path[len(path)-2]
|
||||||
|
client = ""
|
||||||
|
}
|
||||||
|
|
||||||
//lines, err := internal.ReadLines(file)
|
//lines, err := internal.ReadLines(file)
|
||||||
wholeFile, err := os.ReadFile(file)
|
wholeFile, err := os.ReadFile(file)
|
||||||
|
|
@ -401,10 +416,10 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
|
||||||
}
|
}
|
||||||
|
|
||||||
var fields map[string]interface{}
|
var fields map[string]interface{}
|
||||||
fields, ok := l.allFields[tags{name, jobid}]
|
fields, ok := l.allFields[tags{name, jobid, client}]
|
||||||
if !ok {
|
if !ok {
|
||||||
fields = make(map[string]interface{})
|
fields = make(map[string]interface{})
|
||||||
l.allFields[tags{name, jobid}] = fields
|
l.allFields[tags{name, jobid, client}] = fields
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, wanted := range wantedFields {
|
for _, wanted := range wantedFields {
|
||||||
|
|
@ -508,6 +523,9 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
|
||||||
if len(tgs.job) > 0 {
|
if len(tgs.job) > 0 {
|
||||||
tags["jobid"] = tgs.job
|
tags["jobid"] = tgs.job
|
||||||
}
|
}
|
||||||
|
if len(tgs.client) > 0 {
|
||||||
|
tags["client"] = tgs.client
|
||||||
|
}
|
||||||
acc.AddFields("lustre2", fields, tags)
|
acc.AddFields("lustre2", fields, tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,69 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLustre2GeneratesClientMetrics(t *testing.T) {
|
||||||
|
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
||||||
|
ostName := "OST0001"
|
||||||
|
clientName := "10.2.4.27@o2ib1"
|
||||||
|
mdtdir := tempdir + "/mdt/"
|
||||||
|
err := os.MkdirAll(mdtdir+"/"+ostName+"/exports/"+clientName, 0755)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
obddir := tempdir + "/obdfilter/"
|
||||||
|
err = os.MkdirAll(obddir+"/"+ostName+"/exports/"+clientName, 0755)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = os.WriteFile(mdtdir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(mdtProcContents), 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
err = os.WriteFile(obddir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(obdfilterProcContents), 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Begin by testing standard Lustre stats
|
||||||
|
m := &Lustre2{
|
||||||
|
OstProcfiles: []string{obddir + "/*/exports/*/stats"},
|
||||||
|
MdsProcfiles: []string{mdtdir + "/*/exports/*/stats"},
|
||||||
|
}
|
||||||
|
|
||||||
|
var acc testutil.Accumulator
|
||||||
|
|
||||||
|
err = m.Gather(&acc)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
tags := map[string]string{
|
||||||
|
"name": ostName,
|
||||||
|
"client": clientName,
|
||||||
|
}
|
||||||
|
|
||||||
|
fields := map[string]interface{}{
|
||||||
|
"close": uint64(873243496),
|
||||||
|
"crossdir_rename": uint64(369571),
|
||||||
|
"getattr": uint64(1503663097),
|
||||||
|
"getxattr": uint64(6145349681),
|
||||||
|
"link": uint64(445),
|
||||||
|
"mkdir": uint64(705499),
|
||||||
|
"mknod": uint64(349042),
|
||||||
|
"open": uint64(1024577037),
|
||||||
|
"read_bytes": uint64(78026117632000),
|
||||||
|
"read_calls": uint64(203238095),
|
||||||
|
"rename": uint64(629196),
|
||||||
|
"rmdir": uint64(227434),
|
||||||
|
"samedir_rename": uint64(259625),
|
||||||
|
"setattr": uint64(1898364),
|
||||||
|
"setxattr": uint64(83969),
|
||||||
|
"statfs": uint64(2916320),
|
||||||
|
"sync": uint64(434081),
|
||||||
|
"unlink": uint64(3549417),
|
||||||
|
"write_bytes": uint64(15201500833981),
|
||||||
|
"write_calls": uint64(71893382),
|
||||||
|
}
|
||||||
|
|
||||||
|
acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)
|
||||||
|
|
||||||
|
err = os.RemoveAll(os.TempDir() + "/telegraf")
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
|
func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
|
||||||
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
|
||||||
ostName := "OST0001"
|
ostName := "OST0001"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue