feat(inputs.slurm): Add a SLURM input plugin (#15700)
This commit is contained in:
parent
371b9887fb
commit
7b5462692b
|
|
@ -307,6 +307,7 @@ following works:
|
|||
- github.com/opentracing/opentracing-go [Apache License 2.0](https://github.com/opentracing/opentracing-go/blob/master/LICENSE)
|
||||
- github.com/p4lang/p4runtime [Apache License 2.0](https://github.com/p4lang/p4runtime/blob/main/LICENSE)
|
||||
- github.com/pborman/ansi [BSD 3-Clause "New" or "Revised" License](https://github.com/pborman/ansi/blob/master/LICENSE)
|
||||
- github.com/pcolladosoto/goslurm [MIT License](https://github.com/pcolladosoto/goslurm/blob/main/LICENSE)
|
||||
- github.com/peterbourgon/unixtransport [Apache License 2.0](https://github.com/peterbourgon/unixtransport/blob/main/LICENSE)
|
||||
- github.com/philhofer/fwd [MIT License](https://github.com/philhofer/fwd/blob/master/LICENSE.md)
|
||||
- github.com/pierrec/lz4 [BSD 3-Clause "New" or "Revised" License](https://github.com/pierrec/lz4/blob/master/LICENSE)
|
||||
|
|
|
|||
17
go.mod
17
go.mod
|
|
@ -31,7 +31,7 @@ require (
|
|||
github.com/PaesslerAG/gval v1.2.2
|
||||
github.com/SAP/go-hdb v1.9.10
|
||||
github.com/aerospike/aerospike-client-go/v5 v5.11.0
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9
|
||||
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30
|
||||
github.com/alitto/pond v1.9.0
|
||||
github.com/aliyun/alibaba-cloud-sdk-go v1.62.721
|
||||
github.com/amir/raidman v0.0.0-20170415203553-1ccc43bfb9c9
|
||||
|
|
@ -159,6 +159,7 @@ require (
|
|||
github.com/openzipkin/zipkin-go v0.4.3
|
||||
github.com/p4lang/p4runtime v1.3.0
|
||||
github.com/pborman/ansi v1.0.0
|
||||
github.com/pcolladosoto/goslurm v0.1.0
|
||||
github.com/peterbourgon/unixtransport v0.0.4
|
||||
github.com/pion/dtls/v2 v2.2.12
|
||||
github.com/prometheus-community/pro-bing v0.4.1
|
||||
|
|
@ -191,7 +192,7 @@ require (
|
|||
github.com/testcontainers/testcontainers-go v0.32.0
|
||||
github.com/testcontainers/testcontainers-go/modules/kafka v0.32.0
|
||||
github.com/thomasklein94/packer-plugin-libvirt v0.5.0
|
||||
github.com/tidwall/gjson v1.17.0
|
||||
github.com/tidwall/gjson v1.17.1
|
||||
github.com/tidwall/wal v1.1.7
|
||||
github.com/tinylib/msgp v1.2.0
|
||||
github.com/urfave/cli/v2 v2.27.2
|
||||
|
|
@ -271,11 +272,11 @@ require (
|
|||
github.com/abbot/go-http-auth v0.4.0 // indirect
|
||||
github.com/alecthomas/participle v0.4.1 // indirect
|
||||
github.com/andybalholm/brotli v1.1.0 // indirect
|
||||
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
|
||||
github.com/antlr4-go/antlr/v4 v4.13.1 // indirect
|
||||
github.com/apache/arrow/go/v15 v15.0.2 // indirect
|
||||
github.com/aristanetworks/glog v0.0.0-20191112221043-67e8567f59f3 // indirect
|
||||
github.com/armon/go-metrics v0.4.1 // indirect
|
||||
github.com/awnumar/memcall v0.2.0 // indirect
|
||||
github.com/awnumar/memcall v0.3.0 // indirect
|
||||
github.com/aws/aws-sdk-go v1.53.16 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.13.7 // indirect
|
||||
|
|
@ -460,11 +461,11 @@ require (
|
|||
github.com/signalfx/sapm-proto v0.12.0 // indirect
|
||||
github.com/spf13/cast v1.6.0 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/stoewer/go-strcase v1.2.0 // indirect
|
||||
github.com/stoewer/go-strcase v1.3.0 // indirect
|
||||
github.com/stretchr/objx v0.5.2 // indirect
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.0 // indirect
|
||||
github.com/tidwall/tinylru v1.1.0 // indirect
|
||||
github.com/tidwall/pretty v1.2.1 // indirect
|
||||
github.com/tidwall/tinylru v1.2.1 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.13 // indirect
|
||||
github.com/tklauser/numcpus v0.7.0 // indirect
|
||||
github.com/twmb/murmur3 v1.1.7 // indirect
|
||||
|
|
@ -496,7 +497,7 @@ require (
|
|||
go.uber.org/atomic v1.11.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.24.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20240529005216-23cca8864a10 // indirect
|
||||
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
|
||||
golang.org/x/time v0.5.0 // indirect
|
||||
golang.org/x/tools v0.23.0 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
|
||||
|
|
|
|||
32
go.sum
32
go.sum
|
|
@ -797,8 +797,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
|
|||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 h1:ez/4by2iGztzR4L0zgAOR8lTQK9VlyBVVd7G4omaOQs=
|
||||
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9/go.mod h1:OMCwj8VM1Kc9e19TLln2VL61YJF0x1XFtfdL4JdbSyE=
|
||||
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30 h1:t3eaIm0rUkzbrIewtiFmMK5RXHej2XnoXNhxVsAYUfg=
|
||||
github.com/alecthomas/units v0.0.0-20240626203959-61d1e3462e30/go.mod h1:fvzegU4vN3H1qMT+8wDmzjAcDONcgo2/SZ/TyfdUOFs=
|
||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
|
||||
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4=
|
||||
github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc=
|
||||
|
|
@ -823,8 +823,8 @@ github.com/antchfx/xpath v1.2.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwq
|
|||
github.com/antchfx/xpath v1.3.1 h1:PNbFuUqHwWl0xRjvUPjJ95Agbmdj2uzzIwmQKgu4oCk=
|
||||
github.com/antchfx/xpath v1.3.1/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
|
||||
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ=
|
||||
github.com/antlr4-go/antlr/v4 v4.13.1/go.mod h1:GKmUxMtwp6ZgGwZSva4eWPC5mS6vUAmOABFgjdkM7Nw=
|
||||
github.com/apache/arrow/go/v10 v10.0.1/go.mod h1:YvhnlEePVnBS4+0z3fhPfUy7W1Ikj0Ih0vcRo/gZ1M0=
|
||||
github.com/apache/arrow/go/v11 v11.0.0/go.mod h1:Eg5OsL5H+e299f7u5ssuXsuHQVEGC4xei5aX110hRiI=
|
||||
github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE=
|
||||
|
|
@ -853,8 +853,8 @@ github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj
|
|||
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
||||
github.com/awnumar/memcall v0.2.0 h1:sRaogqExTOOkkNwO9pzJsL8jrOV29UuUW7teRMfbqtI=
|
||||
github.com/awnumar/memcall v0.2.0/go.mod h1:S911igBPR9CThzd/hYQQmTc9SWNu3ZHIlCGaWsWsoJo=
|
||||
github.com/awnumar/memcall v0.3.0 h1:8b/3Sptrtgejj2kLgL6M5F2r4OzTf19CTllO+gIXUg8=
|
||||
github.com/awnumar/memcall v0.3.0/go.mod h1:8xOx1YbfyuCg3Fy6TO8DK0kZUua3V42/goA5Ru47E8w=
|
||||
github.com/awnumar/memguard v0.22.5 h1:PH7sbUVERS5DdXh3+mLo8FDcl1eIeVjJVYMnyuYpvuI=
|
||||
github.com/awnumar/memguard v0.22.5/go.mod h1:+APmZGThMBWjnMlKiSM1X7MVpbIVewen2MTkqWkA/zE=
|
||||
github.com/aws/aws-sdk-go v1.19.48/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo=
|
||||
|
|
@ -2085,6 +2085,8 @@ github.com/pborman/ansi v1.0.0 h1:OqjHMhvlSuCCV5JT07yqPuJPQzQl+WXsiZ14gZsqOrQ=
|
|||
github.com/pborman/ansi v1.0.0/go.mod h1:SgWzwMAx1X/Ez7i90VqF8LRiQtx52pWDiQP+x3iGnzw=
|
||||
github.com/pborman/getopt v0.0.0-20190409184431-ee0cd42419d3/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o=
|
||||
github.com/pborman/getopt v1.1.0/go.mod h1:FxXoW1Re00sQG/+KIkuSqRL/LwQgSkv7uyac+STFsbk=
|
||||
github.com/pcolladosoto/goslurm v0.1.0 h1:d2KigvDfsIIeVeHHj/pTtajz2T0cHHqhGk9iJWUdGaM=
|
||||
github.com/pcolladosoto/goslurm v0.1.0/go.mod h1:eLuBFfN/tj4O/HDMrAJXb+3s3rGhdHQVZFcOUV1Sbbo=
|
||||
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
|
||||
github.com/pelletier/go-toml v1.8.1/go.mod h1:T2/BmBdy8dvIRq1a/8aqjN41wvWlN4lrapLU/GW4pbc=
|
||||
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||
|
|
@ -2312,8 +2314,8 @@ github.com/srebhan/cborquery v1.0.1 h1:cFG1falVzmlfyVI8tY6hYM7RQqLxFzt9STusdxHoy
|
|||
github.com/srebhan/cborquery v1.0.1/go.mod h1:GgsaIoCW+qlqyU+cjSeOpaWhbiiMVkA0uU/H3+PWvjQ=
|
||||
github.com/srebhan/protobufquery v0.0.0-20230803132024-ae4c0d878e55 h1:ksmbrLbJAm+8yxB7fJ245usD0b1v9JHBJrWF+WqGyjs=
|
||||
github.com/srebhan/protobufquery v0.0.0-20230803132024-ae4c0d878e55/go.mod h1:SIB3zq5pZq2Ff7aJtCdRpGiHc/meKyMLPEj8F5Tf1j8=
|
||||
github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
|
||||
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
|
||||
github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs=
|
||||
github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE=
|
||||
|
|
@ -2350,14 +2352,16 @@ github.com/testcontainers/testcontainers-go/modules/kafka v0.32.0/go.mod h1:GCPw
|
|||
github.com/thomasklein94/packer-plugin-libvirt v0.5.0 h1:aj2HLHZZM/ClGLIwVp9rrgh+2TOU/w4EiaZHAwCpOgs=
|
||||
github.com/thomasklein94/packer-plugin-libvirt v0.5.0/go.mod h1:GwN82FQ6KxCNKtS8LNUgLbwTZs90GGhBzCmTNkrTCrY=
|
||||
github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/gjson v1.17.0 h1:/Jocvlh98kcTfpN2+JzGQWQcqrPQwDrVEMApx/M5ZwM=
|
||||
github.com/tidwall/gjson v1.17.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/gjson v1.17.1 h1:wlYEnwqAHgzmhNUFfw7Xalt2JzQvsMx2Se4PcoFCT/U=
|
||||
github.com/tidwall/gjson v1.17.1/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk=
|
||||
github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA=
|
||||
github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM=
|
||||
github.com/tidwall/pretty v1.2.0 h1:RWIZEg2iJ8/g6fDDYzMpobmaoGh5OLl4AXtGUGPcqCs=
|
||||
github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/tinylru v1.1.0 h1:XY6IUfzVTU9rpwdhKUF6nQdChgCdGjkMfLzbWyiau6I=
|
||||
github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4=
|
||||
github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU=
|
||||
github.com/tidwall/tinylru v1.1.0/go.mod h1:3+bX+TJ2baOLMWTnlyNWHh4QMnFyARg2TLTQ6OFbzw8=
|
||||
github.com/tidwall/tinylru v1.2.1 h1:VgBr72c2IEr+V+pCdkPZUwiQ0KJknnWIYbhxAVkYfQk=
|
||||
github.com/tidwall/tinylru v1.2.1/go.mod h1:9bQnEduwB6inr2Y7AkBP7JPgCkyrhTV/ZpX0oOOpBI4=
|
||||
github.com/tidwall/wal v1.1.7 h1:emc1TRjIVsdKKSnpwGBAcsAGg0767SvUk8+ygx7Bb+4=
|
||||
github.com/tidwall/wal v1.1.7/go.mod h1:r6lR1j27W9EPalgHiB7zLJDYu3mzW5BQP5KrzBpYY/E=
|
||||
github.com/tinylib/msgp v1.2.0 h1:0uKB/662twsVBpYUPbokj4sTSKhWFKB7LopO2kWK8lY=
|
||||
|
|
@ -2584,8 +2588,8 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0
|
|||
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
|
||||
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
|
||||
golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
|
||||
golang.org/x/exp v0.0.0-20240529005216-23cca8864a10 h1:vpzMC/iZhYFAjJzHU0Cfuq+w1vLLsF2vLkDrPjzKYck=
|
||||
golang.org/x/exp v0.0.0-20240529005216-23cca8864a10/go.mod h1:XtvwrStGgqGPLc4cjQfWqZHG1YFdYs6swckp8vpsjnc=
|
||||
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
|
||||
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
|
||||
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
|
||||
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
|
||||
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
//go:build !custom || inputs || inputs.slurm
|
||||
|
||||
package all
|
||||
|
||||
import _ "github.com/influxdata/telegraf/plugins/inputs/slurm" // register plugin
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
# SLURM Input Plugin
|
||||
|
||||
This plugin gather diag, jobs, nodes, partitions and reservation metrics by
|
||||
leveraging SLURM's REST API as provided by the `slurmrestd` daemon.
|
||||
|
||||
This plugin targets the `openapi/v0.0.38` OpenAPI plugin as defined in SLURM's
|
||||
documentation. That particular plugin should be configured when starting the
|
||||
`slurmrestd` daemon up. For more information, be sure to check SLURM's
|
||||
documentation [here][SLURM Doc].
|
||||
|
||||
A great wealth of information can also be found on the repository of the
|
||||
Go module implementing the API client, [pcolladosoto/goslurm][].
|
||||
|
||||
[SLURM Doc]: https://slurm.schedmd.com/rest.html
|
||||
[pcolladosoto/goslurm]: https://github.com/pcolladosoto/goslurm
|
||||
|
||||
## Global configuration options <!-- @/docs/includes/plugin_config.md -->
|
||||
|
||||
In addition to the plugin-specific configuration settings, plugins support
|
||||
additional global and plugin configuration settings. These settings are used to
|
||||
modify metrics, tags, and field or create aliases and configure ordering, etc.
|
||||
See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
||||
|
||||
[CONFIGURATION.md]: ../../../docs/CONFIGURATION.md#plugins
|
||||
|
||||
## Configuration
|
||||
|
||||
```toml @sample.conf
|
||||
# Gather SLURM metrics
|
||||
[[inputs.slurm]]
|
||||
## Slurmrestd URL. Both http and https can be used as schemas.
|
||||
url = "http://127.0.0.1:6820"
|
||||
|
||||
## Credentials for JWT-based authentication.
|
||||
# username = "foo"
|
||||
# token = "topSecret"
|
||||
|
||||
## Enabled endpoints
|
||||
## List of endpoints a user can acquire data from.
|
||||
## Available values are: diag, jobs, nodes, partitions, reservations.
|
||||
# enabled_endpoints = ["diag", "jobs", "nodes", "partitions", "reservations"]
|
||||
|
||||
## Maximum time to receive a response. If set to 0s, the
|
||||
## request will not time out.
|
||||
# response_timeout = "5s"
|
||||
|
||||
## Optional TLS Config. Note these options will only
|
||||
## be taken into account when the scheme specififed on
|
||||
## the URL parameter is https. They will be silently
|
||||
## ignored otherwise.
|
||||
## Set to true/false to enforce TLS being enabled/disabled. If not set,
|
||||
## enable TLS only if any of the other options are specified.
|
||||
# tls_enable =
|
||||
## Trusted root certificates for server
|
||||
# tls_ca = "/path/to/cafile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_cert = "/path/to/certfile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_key = "/path/to/keyfile"
|
||||
## Password for the key file if it is encrypted
|
||||
# tls_key_pwd = ""
|
||||
## Send the specified TLS server name via SNI
|
||||
# tls_server_name = "kubernetes.example.com"
|
||||
## Minimal TLS version to accept by the client
|
||||
# tls_min_version = "TLS12"
|
||||
## List of ciphers to accept, by default all secure ciphers will be accepted
|
||||
## See https://pkg.go.dev/crypto/tls#pkg-constants for supported values.
|
||||
## Use "all", "secure" and "insecure" to add all support ciphers, secure
|
||||
## suites or insecure suites respectively.
|
||||
# tls_cipher_suites = ["secure"]
|
||||
## Renegotiation method, "never", "once" or "freely"
|
||||
# tls_renegotiation_method = "never"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
Given the great deal of metrics offered by SLURM's API, an attempt has been
|
||||
done to strike a balance between verbosity and usefulness in terms of the
|
||||
gathered information.
|
||||
|
||||
- slurm_diag
|
||||
- tags:
|
||||
- source
|
||||
- fields:
|
||||
- server_thread_count
|
||||
- jobs_canceled
|
||||
- jobs_submitted
|
||||
- jobs_started
|
||||
- jobs_completed
|
||||
- jobs_failed
|
||||
- jobs_pending
|
||||
- jobs_running
|
||||
- schedule_cycle_last
|
||||
- schedule_cycle_mean
|
||||
- bf_queue_len
|
||||
- bf_queue_len_mean
|
||||
- bf_active
|
||||
- slurm_jobs
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- job_id
|
||||
- fields:
|
||||
- state
|
||||
- state_reason
|
||||
- partition
|
||||
- nodes
|
||||
- node_count
|
||||
- priority
|
||||
- nice
|
||||
- group_id
|
||||
- command
|
||||
- standard_output
|
||||
- standard_error
|
||||
- standard_input
|
||||
- current_working_directory
|
||||
- submit_time
|
||||
- start_time
|
||||
- cpus
|
||||
- tasks
|
||||
- time_limit
|
||||
- tres_cpu
|
||||
- tres_mem
|
||||
- tres_node
|
||||
- tres_billing
|
||||
- slurm_nodes
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- fields:
|
||||
- state
|
||||
- cores
|
||||
- cpus
|
||||
- cpu_load
|
||||
- alloc_cpu
|
||||
- real_memory
|
||||
- free_memory
|
||||
- alloc_memory
|
||||
- tres_cpu
|
||||
- tres_mem
|
||||
- tres_billing
|
||||
- tres_used_cpu
|
||||
- tres_used_mem
|
||||
- weight
|
||||
- slurmd_version
|
||||
- architecture
|
||||
- slurm_partitions
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- fields:
|
||||
- state
|
||||
- total_cpu
|
||||
- total_nodes
|
||||
- nodes
|
||||
- tres_cpu
|
||||
- tres_mem
|
||||
- tres_node
|
||||
- tres_billing
|
||||
- slurm_reservations
|
||||
- tags:
|
||||
- source
|
||||
- name
|
||||
- fields:
|
||||
- core_count
|
||||
- core_spec_count
|
||||
- groups
|
||||
- users
|
||||
- start_time
|
||||
- partition
|
||||
- accounts
|
||||
- node_count
|
||||
- node_list
|
||||
|
||||
## Example Output
|
||||
|
||||
```text
|
||||
slurm_diag,host=hoth,source=slurm_primary.example.net bf_active=false,bf_queue_len=1i,bf_queue_len_mean=1i,jobs_canceled=0i,jobs_completed=137i,jobs_failed=0i,jobs_pending=0i,jobs_running=100i,jobs_started=137i,jobs_submitted=137i,schedule_cycle_last=27i,schedule_cycle_mean=86i,server_thread_count=3i 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23160,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.11BCgQ",cpus=2i,current_working_directory="/home/sessiondir/7CQODmQ3uw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmG9JKDmILUkln",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878569i,standard_error="/home/sessiondir/7CQODmQ3uw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmG9JKDmILUkln.comment",standard_input="/dev/null",standard_output="/home/sessiondir/7CQODmQ3uw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmG9JKDmILUkln.comment",start_time=1723354525i,state="RUNNING",state_reason="None",submit_time=1723354525i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=2000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23365,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.yRcFYL",cpus=2i,current_working_directory="/home/sessiondir/LgwNDmTLAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm2BKKDm8bFZsm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878364i,standard_error="/home/sessiondir/LgwNDmTLAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm2BKKDm8bFZsm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/LgwNDmTLAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm2BKKDm8bFZsm.comment",start_time=1723376763i,state="RUNNING",state_reason="None",submit_time=1723376761i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23366,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.5Y9Ngb",cpus=2i,current_working_directory="/home/sessiondir/HFYKDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm3BKKDmiyK3em",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878363i,standard_error="/home/sessiondir/HFYKDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm3BKKDmiyK3em.comment",standard_input="/dev/null",standard_output="/home/sessiondir/HFYKDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm3BKKDmiyK3em.comment",start_time=1723376883i,state="RUNNING",state_reason="None",submit_time=1723376882i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23367,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.NmOqMU",cpus=2i,current_working_directory="/home/sessiondir/nnLLDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm4BKKDmfhjFPn",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878362i,standard_error="/home/sessiondir/nnLLDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm4BKKDmfhjFPn.comment",standard_input="/dev/null",standard_output="/home/sessiondir/nnLLDmULAx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm4BKKDmfhjFPn.comment",start_time=1723376883i,state="RUNNING",state_reason="None",submit_time=1723376882i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23385,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.NNsI08",cpus=2i,current_working_directory="/home/sessiondir/PWvNDmH7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmz7JKDmqgKyRo",group_id=2005i,nice=50i,node_count=1i,nodes="naboo225",partition="atlas",priority=4294878344i,standard_error="/home/sessiondir/PWvNDmH7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmz7JKDmqgKyRo.comment",standard_input="/dev/null",standard_output="/home/sessiondir/PWvNDmH7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmz7JKDmqgKyRo.comment",start_time=1723378725i,state="RUNNING",state_reason="None",submit_time=1723378725i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23386,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.bcmS4h",cpus=2i,current_working_directory="/home/sessiondir/ZNHMDmI7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm27JKDm3Ve66n",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878343i,standard_error="/home/sessiondir/ZNHMDmI7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm27JKDm3Ve66n.comment",standard_input="/dev/null",standard_output="/home/sessiondir/ZNHMDmI7tw5nKG01gq4B3BRpm7wtQmABFKDmbnHPDm27JKDm3Ve66n.comment",start_time=1723379206i,state="RUNNING",state_reason="None",submit_time=1723379205i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23387,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.OgpoQZ",cpus=2i,current_working_directory="/home/sessiondir/qohNDmUqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmMCKKDmzM4Yhn",group_id=2005i,nice=50i,node_count=1i,nodes="naboo222",partition="atlas",priority=4294878342i,standard_error="/home/sessiondir/qohNDmUqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmMCKKDmzM4Yhn.comment",standard_input="/dev/null",standard_output="/home/sessiondir/qohNDmUqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmMCKKDmzM4Yhn.comment",start_time=1723379246i,state="RUNNING",state_reason="None",submit_time=1723379245i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23388,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.xYbxSe",cpus=2i,current_working_directory="/home/sessiondir/u9HODmXqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmWCKKDmRlccYn",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878341i,standard_error="/home/sessiondir/u9HODmXqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmWCKKDmRlccYn.comment",standard_input="/dev/null",standard_output="/home/sessiondir/u9HODmXqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmWCKKDmRlccYn.comment",start_time=1723379326i,state="RUNNING",state_reason="None",submit_time=1723379326i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23389,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.QHtIIm",cpus=2i,current_working_directory="/home/sessiondir/ZLvKDmYqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXCKKDmjp19km",group_id=2005i,nice=50i,node_count=1i,nodes="naboo227",partition="atlas",priority=4294878340i,standard_error="/home/sessiondir/ZLvKDmYqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXCKKDmjp19km.comment",standard_input="/dev/null",standard_output="/home/sessiondir/ZLvKDmYqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXCKKDmjp19km.comment",start_time=1723379326i,state="RUNNING",state_reason="None",submit_time=1723379326i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_jobs,host=hoth,job_id=23393,name=gridjob,source=slurm_primary.example.net command="/tmp/SLURM_job_script.IH19bN",cpus=2i,current_working_directory="/home/sessiondir/YdPODmVqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmSCKKDmrYDOwm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo224",partition="atlas",priority=4294878336i,standard_error="/home/sessiondir/YdPODmVqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmSCKKDmrYDOwm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/YdPODmVqBx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmSCKKDmrYDOwm.comment",start_time=1723379767i,state="RUNNING",state_reason="None",submit_time=1723379766i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=1000,tres_node=1 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo145,source=slurm_primary.example.net alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=0i,cpus=36i,free_memory=86450i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo146,source=slurm_primary.example.net alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=0i,cpus=36i,free_memory=92148i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo147,source=slurm_primary.example.net alloc_cpu=36i,alloc_memory=45000i,architecture="x86_64",cores=18i,cpu_load=3826i,cpus=36i,free_memory=1607i,real_memory=94793i,slurmd_version="22.05.9",state="allocated",tres_billing=36,tres_cpu=36,tres_mem=94793,tres_used_cpu=36,tres_used_mem=45000,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo216,source=slurm_primary.example.net alloc_cpu=8i,alloc_memory=8000i,architecture="x86_64",cores=4i,cpu_load=891i,cpus=8i,free_memory=17972i,real_memory=31877i,slurmd_version="22.05.9",state="allocated",tres_billing=8,tres_cpu=8,tres_mem=31877,tres_used_cpu=8,tres_used_mem=8000,weight=1i 1723466497000000000
|
||||
slurm_nodes,host=hoth,name=naboo219,source=slurm_primary.example.net alloc_cpu=16i,alloc_memory=16000i,architecture="x86_64",cores=4i,cpu_load=1382i,cpus=16i,free_memory=15645i,real_memory=31875i,slurmd_version="22.05.9",state="allocated",tres_billing=16,tres_cpu=16,tres_mem=31875,tres_used_cpu=16,tres_used_mem=16000,weight=1i 1723466497000000000
|
||||
slurm_partitions,host=hoth,name=atlas,source=slurm_primary.example.net nodes="naboo145,naboo146,naboo147,naboo216,naboo219,naboo222,naboo224,naboo225,naboo227,naboo228,naboo229,naboo234,naboo235,naboo236,naboo237,naboo238,naboo239,naboo240,naboo241,naboo242,naboo243",state="UP",total_cpu=632i,total_nodes=21i,tres_billing=632,tres_cpu=632,tres_mem=1415207,tres_node=21 1723466497000000000
|
||||
```
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
# Gather SLURM metrics
|
||||
[[inputs.slurm]]
|
||||
## Slurmrestd URL. Both http and https can be used as schemas.
|
||||
url = "http://127.0.0.1:6820"
|
||||
|
||||
## Credentials for JWT-based authentication.
|
||||
# username = "foo"
|
||||
# token = "topSecret"
|
||||
|
||||
## Enabled endpoints
|
||||
## List of endpoints a user can acquire data from.
|
||||
## Available values are: diag, jobs, nodes, partitions, reservations.
|
||||
# enabled_endpoints = ["diag", "jobs", "nodes", "partitions", "reservations"]
|
||||
|
||||
## Maximum time to receive a response. If set to 0s, the
|
||||
## request will not time out.
|
||||
# response_timeout = "5s"
|
||||
|
||||
## Optional TLS Config. Note these options will only
|
||||
## be taken into account when the scheme specififed on
|
||||
## the URL parameter is https. They will be silently
|
||||
## ignored otherwise.
|
||||
## Set to true/false to enforce TLS being enabled/disabled. If not set,
|
||||
## enable TLS only if any of the other options are specified.
|
||||
# tls_enable =
|
||||
## Trusted root certificates for server
|
||||
# tls_ca = "/path/to/cafile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_cert = "/path/to/certfile"
|
||||
## Used for TLS client certificate authentication
|
||||
# tls_key = "/path/to/keyfile"
|
||||
## Password for the key file if it is encrypted
|
||||
# tls_key_pwd = ""
|
||||
## Send the specified TLS server name via SNI
|
||||
# tls_server_name = "kubernetes.example.com"
|
||||
## Minimal TLS version to accept by the client
|
||||
# tls_min_version = "TLS12"
|
||||
## List of ciphers to accept, by default all secure ciphers will be accepted
|
||||
## See https://pkg.go.dev/crypto/tls#pkg-constants for supported values.
|
||||
## Use "all", "secure" and "insecure" to add all support ciphers, secure
|
||||
## suites or insecure suites respectively.
|
||||
# tls_cipher_suites = ["secure"]
|
||||
## Renegotiation method, "never", "once" or "freely"
|
||||
# tls_renegotiation_method = "never"
|
||||
## Use TLS but skip chain & host verification
|
||||
# insecure_skip_verify = false
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Gather SLURM metrics
|
||||
[[inputs.slurm]]
|
||||
## Slurmrestd URL. Both http and https can be used as schemas.
|
||||
url = "http://127.0.0.1:6820"
|
||||
|
||||
## Credentials for JWT-based authentication.
|
||||
# username = "foo"
|
||||
# token = "topSecret"
|
||||
|
||||
## Enabled endpoints
|
||||
## List of endpoints a user can acquire data from.
|
||||
## Available values are: diag, jobs, nodes, partitions, reservations.
|
||||
# enabled_endpoints = ["diag", "jobs", "nodes", "partitions", "reservations"]
|
||||
|
||||
## Maximum time to receive a response. If set to 0s, the
|
||||
## request will not time out.
|
||||
# response_timeout = "5s"
|
||||
|
||||
## Optional TLS Config. Note these options will only
|
||||
## be taken into account when the scheme specififed on
|
||||
## the URL parameter is https. They will be silently
|
||||
## ignored otherwise.
|
||||
{{template "/plugins/common/tls/client.conf"}}
|
||||
|
|
@ -0,0 +1,476 @@
|
|||
//go:generate ../../../tools/config_includer/generator
|
||||
//go:generate ../../../tools/readme_config_includer/generator
|
||||
package slurm
|
||||
|
||||
import (
|
||||
"context"
|
||||
_ "embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/plugins/common/tls"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
|
||||
goslurm "github.com/pcolladosoto/goslurm/v0038"
|
||||
)
|
||||
|
||||
//go:embed sample.conf
|
||||
var sampleConfig string
|
||||
|
||||
type Slurm struct {
|
||||
URL string `toml:"url"`
|
||||
Username string `toml:"username"`
|
||||
Token string `toml:"token"`
|
||||
EnabledEndpoints []string `toml:"enabled_endpoints"`
|
||||
ResponseTimeout config.Duration `toml:"response_timeout"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
tls.ClientConfig
|
||||
|
||||
client *goslurm.APIClient
|
||||
baseURL *url.URL
|
||||
endpointMap map[string]bool
|
||||
}
|
||||
|
||||
func (*Slurm) SampleConfig() string {
|
||||
return sampleConfig
|
||||
}
|
||||
|
||||
func (s *Slurm) Init() error {
|
||||
if len(s.EnabledEndpoints) == 0 {
|
||||
s.EnabledEndpoints = []string{"diag", "jobs", "nodes", "partitions", "reservations"}
|
||||
}
|
||||
|
||||
s.endpointMap = make(map[string]bool, len(s.EnabledEndpoints))
|
||||
for _, endpoint := range s.EnabledEndpoints {
|
||||
switch e := strings.ToLower(endpoint); e {
|
||||
case "diag", "jobs", "nodes", "partitions", "reservations":
|
||||
s.endpointMap[e] = true
|
||||
default:
|
||||
return fmt.Errorf("unknown endpoint %q", endpoint)
|
||||
}
|
||||
}
|
||||
|
||||
if s.URL == "" {
|
||||
return errors.New("empty URL provided")
|
||||
}
|
||||
|
||||
u, err := url.Parse(s.URL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if u.Hostname() == "" {
|
||||
return fmt.Errorf("empty hostname for url %q", s.URL)
|
||||
}
|
||||
|
||||
s.baseURL = u
|
||||
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return fmt.Errorf("invalid scheme %q", u.Scheme)
|
||||
}
|
||||
|
||||
tlsCfg, err := s.ClientConfig.TLSConfig()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if u.Scheme == "http" && tlsCfg != nil {
|
||||
s.Log.Warn("non-empty TLS configuration for a URL with an http scheme. Ignoring it...")
|
||||
tlsCfg = nil
|
||||
}
|
||||
|
||||
configuration := goslurm.NewConfiguration()
|
||||
configuration.Host = u.Host
|
||||
configuration.Scheme = u.Scheme
|
||||
configuration.UserAgent = internal.ProductToken()
|
||||
configuration.HTTPClient = &http.Client{
|
||||
Transport: &http.Transport{
|
||||
TLSClientConfig: tlsCfg,
|
||||
},
|
||||
Timeout: time.Duration(s.ResponseTimeout),
|
||||
}
|
||||
|
||||
s.client = goslurm.NewAPIClient(configuration)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Slurm) parseTres(tres string) map[string]interface{} {
|
||||
tresKVs := strings.Split(tres, ",")
|
||||
parsedValues := make(map[string]interface{}, len(tresKVs))
|
||||
|
||||
for _, tresVal := range tresKVs {
|
||||
parsedTresVal := strings.Split(tresVal, "=")
|
||||
if len(parsedTresVal) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
tag := parsedTresVal[0]
|
||||
val := parsedTresVal[1]
|
||||
var factor float64 = 1
|
||||
|
||||
if tag == "mem" {
|
||||
var ok bool
|
||||
factor, ok = map[string]float64{
|
||||
"K": 1.0 / 1024.0,
|
||||
"M": 1,
|
||||
"G": 1024,
|
||||
"T": 1024 * 1024,
|
||||
"P": 1024 * 1024 * 1024,
|
||||
}[strings.ToUpper(val[len(val)-1:])]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
val = val[:len(val)-1]
|
||||
}
|
||||
|
||||
parsedFloat, err := strconv.ParseFloat(val, 64)
|
||||
if err == nil {
|
||||
parsedValues[tag] = parsedFloat * factor
|
||||
continue
|
||||
}
|
||||
parsedValues[tag] = val
|
||||
}
|
||||
|
||||
return parsedValues
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherDiagMetrics(acc telegraf.Accumulator, diag *goslurm.V0038DiagStatistics) {
|
||||
records := make(map[string]interface{}, 13)
|
||||
tags := map[string]string{"source": s.baseURL.Hostname()}
|
||||
|
||||
if int32Ptr, ok := diag.GetServerThreadCountOk(); ok {
|
||||
records["server_thread_count"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsCanceledOk(); ok {
|
||||
records["jobs_canceled"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsSubmittedOk(); ok {
|
||||
records["jobs_submitted"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsStartedOk(); ok {
|
||||
records["jobs_started"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsCompletedOk(); ok {
|
||||
records["jobs_completed"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsFailedOk(); ok {
|
||||
records["jobs_failed"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsPendingOk(); ok {
|
||||
records["jobs_pending"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetJobsRunningOk(); ok {
|
||||
records["jobs_running"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetScheduleCycleLastOk(); ok {
|
||||
records["schedule_cycle_last"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetScheduleCycleMeanOk(); ok {
|
||||
records["schedule_cycle_mean"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetBfQueueLenOk(); ok {
|
||||
records["bf_queue_len"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := diag.GetBfQueueLenMeanOk(); ok {
|
||||
records["bf_queue_len_mean"] = *int32Ptr
|
||||
}
|
||||
if boolPtr, ok := diag.GetBfActiveOk(); ok {
|
||||
records["bf_active"] = *boolPtr
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_diag", records, tags)
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherJobsMetrics(acc telegraf.Accumulator, jobs []goslurm.V0038JobResponseProperties) {
|
||||
for i := range jobs {
|
||||
records := make(map[string]interface{}, 19)
|
||||
tags := make(map[string]string, 3)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := jobs[i].GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetJobIdOk(); ok {
|
||||
tags["job_id"] = strconv.Itoa(int(*int32Ptr))
|
||||
}
|
||||
|
||||
if strPtr, ok := jobs[i].GetJobStateOk(); ok {
|
||||
records["state"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStateReasonOk(); ok {
|
||||
records["state_reason"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetPartitionOk(); ok {
|
||||
records["partition"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetNodesOk(); ok {
|
||||
records["nodes"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetNodeCountOk(); ok {
|
||||
records["node_count"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetPriorityOk(); ok {
|
||||
records["priority"] = *int64Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetNiceOk(); ok {
|
||||
records["nice"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetGroupIdOk(); ok {
|
||||
records["group_id"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetCommandOk(); ok {
|
||||
records["command"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStandardOutputOk(); ok {
|
||||
records["standard_output"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStandardErrorOk(); ok {
|
||||
records["standard_error"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetStandardInputOk(); ok {
|
||||
records["standard_input"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetCurrentWorkingDirectoryOk(); ok {
|
||||
records["current_working_directory"] = strings.ReplaceAll(*strPtr, "\\", "")
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetSubmitTimeOk(); ok {
|
||||
records["submit_time"] = *int64Ptr
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetStartTimeOk(); ok {
|
||||
records["start_time"] = *int64Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetCpusOk(); ok {
|
||||
records["cpus"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := jobs[i].GetTasksOk(); ok {
|
||||
records["tasks"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := jobs[i].GetTimeLimitOk(); ok {
|
||||
records["time_limit"] = *int64Ptr
|
||||
}
|
||||
if strPtr, ok := jobs[i].GetTresReqStrOk(); ok {
|
||||
for k, v := range s.parseTres(*strPtr) {
|
||||
records["tres_"+k] = v
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_jobs", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherNodesMetrics(acc telegraf.Accumulator, nodes []goslurm.V0038Node) {
|
||||
for _, node := range nodes {
|
||||
records := make(map[string]interface{}, 13)
|
||||
tags := make(map[string]string, 2)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := node.GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
|
||||
if strPtr, ok := node.GetStateOk(); ok {
|
||||
records["state"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := node.GetCoresOk(); ok {
|
||||
records["cores"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := node.GetCpusOk(); ok {
|
||||
records["cpus"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := node.GetCpuLoadOk(); ok {
|
||||
records["cpu_load"] = *int64Ptr
|
||||
}
|
||||
if int64Ptr, ok := node.GetAllocCpusOk(); ok {
|
||||
records["alloc_cpu"] = *int64Ptr
|
||||
}
|
||||
if int32Ptr, ok := node.GetRealMemoryOk(); ok {
|
||||
records["real_memory"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := node.GetFreeMemoryOk(); ok {
|
||||
records["free_memory"] = *int32Ptr
|
||||
}
|
||||
if int64Ptr, ok := node.GetAllocMemoryOk(); ok {
|
||||
records["alloc_memory"] = *int64Ptr
|
||||
}
|
||||
if strPtr, ok := node.GetTresOk(); ok {
|
||||
for k, v := range s.parseTres(*strPtr) {
|
||||
records["tres_"+k] = v
|
||||
}
|
||||
}
|
||||
if strPtr, ok := node.GetTresUsedOk(); ok {
|
||||
for k, v := range s.parseTres(*strPtr) {
|
||||
records["tres_used_"+k] = v
|
||||
}
|
||||
}
|
||||
if int32Ptr, ok := node.GetWeightOk(); ok {
|
||||
records["weight"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := node.GetSlurmdVersionOk(); ok {
|
||||
records["slurmd_version"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := node.GetArchitectureOk(); ok {
|
||||
records["architecture"] = *strPtr
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_nodes", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherPartitionsMetrics(acc telegraf.Accumulator, partitions []goslurm.V0038Partition) {
|
||||
for _, partition := range partitions {
|
||||
records := make(map[string]interface{}, 5)
|
||||
tags := make(map[string]string, 2)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := partition.GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
|
||||
if strPtr, ok := partition.GetStateOk(); ok {
|
||||
records["state"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := partition.GetTotalCpusOk(); ok {
|
||||
records["total_cpu"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := partition.GetTotalNodesOk(); ok {
|
||||
records["total_nodes"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := partition.GetNodesOk(); ok {
|
||||
records["nodes"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := partition.GetTresOk(); ok {
|
||||
for k, v := range s.parseTres(*strPtr) {
|
||||
records["tres_"+k] = v
|
||||
}
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_partitions", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) gatherReservationsMetrics(acc telegraf.Accumulator, reservations []goslurm.V0038Reservation) {
|
||||
for _, reservation := range reservations {
|
||||
records := make(map[string]interface{}, 9)
|
||||
tags := make(map[string]string, 2)
|
||||
|
||||
tags["source"] = s.baseURL.Hostname()
|
||||
if strPtr, ok := reservation.GetNameOk(); ok {
|
||||
tags["name"] = *strPtr
|
||||
}
|
||||
|
||||
if int32Ptr, ok := reservation.GetCoreCountOk(); ok {
|
||||
records["core_count"] = *int32Ptr
|
||||
}
|
||||
if int32Ptr, ok := reservation.GetCoreSpecCntOk(); ok {
|
||||
records["core_spec_count"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := reservation.GetGroupsOk(); ok {
|
||||
records["groups"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := reservation.GetUsersOk(); ok {
|
||||
records["users"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := reservation.GetStartTimeOk(); ok {
|
||||
records["start_time"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := reservation.GetPartitionOk(); ok {
|
||||
records["partition"] = *strPtr
|
||||
}
|
||||
if strPtr, ok := reservation.GetAccountsOk(); ok {
|
||||
records["accounts"] = *strPtr
|
||||
}
|
||||
if int32Ptr, ok := reservation.GetNodeCountOk(); ok {
|
||||
records["node_count"] = *int32Ptr
|
||||
}
|
||||
if strPtr, ok := reservation.GetNodeListOk(); ok {
|
||||
records["node_list"] = *strPtr
|
||||
}
|
||||
|
||||
acc.AddFields("slurm_reservations", records, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Slurm) Gather(acc telegraf.Accumulator) (err error) {
|
||||
auth := context.WithValue(
|
||||
context.Background(),
|
||||
goslurm.ContextAPIKeys,
|
||||
map[string]goslurm.APIKey{
|
||||
"user": {Key: s.Username},
|
||||
"token": {Key: s.Token},
|
||||
},
|
||||
)
|
||||
|
||||
if s.endpointMap["diag"] {
|
||||
diagResp, respRaw, err := s.client.SlurmAPI.SlurmV0038Diag(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting diag: %w", err)
|
||||
}
|
||||
if diag, ok := diagResp.GetStatisticsOk(); ok {
|
||||
s.gatherDiagMetrics(acc, diag)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["jobs"] {
|
||||
jobsResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetJobs(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting jobs: %w", err)
|
||||
}
|
||||
if jobs, ok := jobsResp.GetJobsOk(); ok {
|
||||
s.gatherJobsMetrics(acc, jobs)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["nodes"] {
|
||||
nodesResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetNodes(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting nodes: %w", err)
|
||||
}
|
||||
if nodes, ok := nodesResp.GetNodesOk(); ok {
|
||||
s.gatherNodesMetrics(acc, nodes)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["partitions"] {
|
||||
partitionsResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetPartitions(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting partitions: %w", err)
|
||||
}
|
||||
if partitions, ok := partitionsResp.GetPartitionsOk(); ok {
|
||||
s.gatherPartitionsMetrics(acc, partitions)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
if s.endpointMap["reservations"] {
|
||||
reservationsResp, respRaw, err := s.client.SlurmAPI.SlurmV0038GetReservations(auth).Execute()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting reservations: %w", err)
|
||||
}
|
||||
if reservations, ok := reservationsResp.GetReservationsOk(); ok {
|
||||
s.gatherReservationsMetrics(acc, reservations)
|
||||
}
|
||||
respRaw.Body.Close()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("slurm", func() telegraf.Input {
|
||||
return &Slurm{
|
||||
ResponseTimeout: config.Duration(5 * time.Second),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
package slurm
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/plugins/parsers/influx"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGoodURLs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
}{
|
||||
{"http", "http://example.com:6820"},
|
||||
{"https", "https://example.com:6820"},
|
||||
{"http no port", "http://example.com"},
|
||||
{"https no port", "https://example.com"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
plugin := Slurm{
|
||||
URL: tt.url,
|
||||
}
|
||||
require.NoError(t, plugin.Init())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrongURLs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
url string
|
||||
}{
|
||||
{"wrong http scheme", "httpp://example.com:6820"},
|
||||
{"wrong https scheme", "httpss://example.com:6820"},
|
||||
{"empty url", ""},
|
||||
{"empty hostname", "http://:6820"},
|
||||
{"only scheme", "http://"},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
plugin := Slurm{
|
||||
URL: tt.url,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWrongEndpoints(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
enabledEndpoints []string
|
||||
}{
|
||||
{"empty endpoint", []string{"diag", "", "jobs"}},
|
||||
{"mistyped endpoint", []string{"diagg", "jobs", "partitions"}},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
plugin := Slurm{
|
||||
URL: "http://example.net",
|
||||
EnabledEndpoints: tt.enabledEndpoints,
|
||||
}
|
||||
require.Error(t, plugin.Init())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCases(t *testing.T) {
|
||||
entries, err := os.ReadDir("testcases")
|
||||
require.NoError(t, err)
|
||||
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
t.Run(entry.Name(), func(t *testing.T) {
|
||||
testcasePath := filepath.Join("testcases", entry.Name())
|
||||
responsesPath := filepath.Join(testcasePath, "responses")
|
||||
expectedFilename := filepath.Join(testcasePath, "expected.out")
|
||||
configFilename := filepath.Join(testcasePath, "telegraf.conf")
|
||||
|
||||
responses, err := os.ReadDir(responsesPath)
|
||||
require.NoError(t, err)
|
||||
|
||||
pathToResponse := map[string][]byte{}
|
||||
for _, response := range responses {
|
||||
if response.IsDir() {
|
||||
continue
|
||||
}
|
||||
fName := response.Name()
|
||||
buf, err := os.ReadFile(filepath.Join(responsesPath, fName))
|
||||
require.NoError(t, err)
|
||||
pathToResponse[strings.TrimSuffix(fName, filepath.Ext(fName))] = buf
|
||||
}
|
||||
|
||||
// Prepare the influx parser for expectations
|
||||
parser := &influx.Parser{}
|
||||
require.NoError(t, parser.Init())
|
||||
|
||||
// Read expected values, if any
|
||||
var expected []telegraf.Metric
|
||||
if _, err := os.Stat(expectedFilename); err == nil {
|
||||
var err error
|
||||
expected, err = testutil.ParseMetricsFromFile(expectedFilename, parser)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
ts := httptest.NewServer(http.NotFoundHandler())
|
||||
defer ts.Close()
|
||||
|
||||
ts.Config.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
resp, ok := pathToResponse[strings.TrimPrefix(r.URL.Path, "/slurm/v0.0.38/")]
|
||||
require.True(t, ok)
|
||||
w.Header().Add("Content-Type", "application/json")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, err := w.Write(resp)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
// Load the test-specific configuration
|
||||
cfg := config.NewConfig()
|
||||
cfg.Agent.Quiet = true
|
||||
require.NoError(t, cfg.LoadConfig(configFilename))
|
||||
require.Len(t, cfg.Inputs, 1)
|
||||
|
||||
// Instantiate the plugin. As seen on NewConfig's documentation,
|
||||
// parsing the configuration will instantiate the plugins, so that
|
||||
// we only need to assert the plugin's type!
|
||||
plugin := cfg.Inputs[0].Input.(*Slurm)
|
||||
plugin.URL = "http://" + ts.Listener.Addr().String()
|
||||
plugin.Log = testutil.Logger{}
|
||||
require.NoError(t, plugin.Init())
|
||||
|
||||
var acc testutil.Accumulator
|
||||
require.NoError(t, plugin.Gather(&acc))
|
||||
|
||||
actual := acc.GetTelegrafMetrics()
|
||||
testutil.RequireMetricsEqual(t, expected, actual, testutil.SortMetrics(), testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
slurm_diag,source=127.0.0.1 bf_active=false,bf_queue_len=1i,bf_queue_len_mean=1i,jobs_canceled=0i,jobs_completed=287i,jobs_failed=1i,jobs_pending=0i,jobs_running=100i,jobs_started=287i,jobs_submitted=287i,schedule_cycle_last=298i,schedule_cycle_mean=137i,server_thread_count=3i 1723464650000000000
|
||||
|
||||
slurm_jobs,job_id=20464,name=gridjob,source=127.0.0.1 command="/tmp/SLURM_job_script.OjQEIH",cpus=2i,current_working_directory="/home/sessiondir/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo222",partition="atlas",priority=4294881265i,standard_error="/home/sessiondir/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",start_time=1722989851i,state="RUNNING",state_reason="None",submit_time=1722989851i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=2000,tres_node=1 1723464650000000000
|
||||
slurm_jobs,job_id=20468,name=gridjob,source=127.0.0.1 command="/tmp/SLURM_job_script.XTwtdj",cpus=2i,current_working_directory="/home/sessiondir/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n",group_id=2005i,nice=50i,node_count=1i,nodes="naboo222",partition="atlas",priority=4294881261i,standard_error="/home/sessiondir/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",standard_input="/dev/null",standard_output="/home/sessiondir/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",start_time=1722990772i,state="RUNNING",state_reason="None",submit_time=1722990772i,tasks=1i,time_limit=3600i,tres_billing=1,tres_cpu=1,tres_mem=2000,tres_node=1 1723464650000000000
|
||||
slurm_jobs,job_id=23772,name=gridjob,source=127.0.0.1 command="/tmp/SLURM_job_script.8PMmVe",cpus=8i,current_working_directory="/home/sessiondir/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm",group_id=2005i,nice=50i,node_count=1i,nodes="naboo147",partition="atlas",priority=4294877957i,standard_error="/home/sessiondir/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",standard_input="/dev/null",standard_output="/home/sessiondir/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",start_time=1723457333i,state="COMPLETED",state_reason="None",submit_time=1723457333i,tasks=8i,time_limit=3600i,tres_billing=8,tres_cpu=8,tres_mem=16000,tres_node=1 1723464650000000000
|
||||
|
||||
slurm_nodes,name=naboo145,source=127.0.0.1 alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=27i,cpus=36i,free_memory=86423i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723464650000000000
|
||||
slurm_nodes,name=naboo146,source=127.0.0.1 alloc_cpu=0i,alloc_memory=0i,architecture="x86_64",cores=18i,cpu_load=0i,cpus=36i,free_memory=92151i,real_memory=94791i,slurmd_version="22.05.9",state="idle",tres_billing=36,tres_cpu=36,tres_mem=94791,weight=1i 1723464650000000000
|
||||
slurm_nodes,name=naboo147,source=127.0.0.1 alloc_cpu=36i,alloc_memory=56000i,architecture="x86_64",cores=18i,cpu_load=2969i,cpus=36i,free_memory=10908i,real_memory=94793i,slurmd_version="22.05.9",state="allocated",tres_billing=36,tres_cpu=36,tres_mem=94793,tres_used_cpu=36,tres_used_mem=56000,weight=1i 1723464650000000000
|
||||
|
||||
slurm_partitions,name=atlas,source=127.0.0.1 nodes="naboo145,naboo146,naboo147,naboo216,naboo219,naboo222,naboo224,naboo225,naboo227,naboo228,naboo229,naboo234,naboo235,naboo236,naboo237,naboo238,naboo239,naboo240,naboo241,naboo242,naboo243",state="UP",total_cpu=632i,total_nodes=21i,tres_billing=632,tres_cpu=632,tres_mem=1415207,tres_node=21 1723464650000000000
|
||||
|
|
@ -0,0 +1,224 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"statistics": {
|
||||
"rpcs_by_message_type": [
|
||||
{
|
||||
"message_type": "REQUEST_JOB_INFO",
|
||||
"type_id": 2003,
|
||||
"count": 73587,
|
||||
"average_time": 658,
|
||||
"total_time": 48479000
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_PARTITION_INFO",
|
||||
"type_id": 2009,
|
||||
"count": 158967,
|
||||
"average_time": 101,
|
||||
"total_time": 16185440
|
||||
},
|
||||
{
|
||||
"message_type": "MESSAGE_NODE_REGISTRATION_STATUS",
|
||||
"type_id": 1002,
|
||||
"count": 18690,
|
||||
"average_time": 137,
|
||||
"total_time": 2566758
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_COMPLETE_BATCH_SCRIPT",
|
||||
"type_id": 5018,
|
||||
"count": 12233,
|
||||
"average_time": 486,
|
||||
"total_time": 5946490
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_AUTH_TOKEN",
|
||||
"type_id": 5039,
|
||||
"count": 36,
|
||||
"average_time": 291,
|
||||
"total_time": 10489
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_BUILD_INFO",
|
||||
"type_id": 2001,
|
||||
"count": 28201,
|
||||
"average_time": 194,
|
||||
"total_time": 5486061
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_PING",
|
||||
"type_id": 1008,
|
||||
"count": 28201,
|
||||
"average_time": 103,
|
||||
"total_time": 2925195
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_NODE_INFO",
|
||||
"type_id": 2007,
|
||||
"count": 85379,
|
||||
"average_time": 175,
|
||||
"total_time": 15007960
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_FED_INFO",
|
||||
"type_id": 2049,
|
||||
"count": 24466,
|
||||
"average_time": 109,
|
||||
"total_time": 2681655
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_JOB_INFO_SINGLE",
|
||||
"type_id": 2021,
|
||||
"count": 24466,
|
||||
"average_time": 121,
|
||||
"total_time": 2963320
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_SUBMIT_BATCH_JOB",
|
||||
"type_id": 4003,
|
||||
"count": 12233,
|
||||
"average_time": 6504,
|
||||
"total_time": 79574600
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_STATS_INFO",
|
||||
"type_id": 2035,
|
||||
"count": 1040,
|
||||
"average_time": 61,
|
||||
"total_time": 64431
|
||||
},
|
||||
{
|
||||
"message_type": "MESSAGE_EPILOG_COMPLETE",
|
||||
"type_id": 6012,
|
||||
"count": 40,
|
||||
"average_time": 86,
|
||||
"total_time": 3455
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_RESERVATION_INFO",
|
||||
"type_id": 2024,
|
||||
"count": 1017,
|
||||
"average_time": 47,
|
||||
"total_time": 48788
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_LICENSE_INFO",
|
||||
"type_id": 1021,
|
||||
"count": 42,
|
||||
"average_time": 43,
|
||||
"total_time": 1823
|
||||
},
|
||||
{
|
||||
"message_type": "REQUEST_UPDATE_NODE",
|
||||
"type_id": 3002,
|
||||
"count": 2,
|
||||
"average_time": 415,
|
||||
"total_time": 830
|
||||
}
|
||||
],
|
||||
"rpcs_by_user": [
|
||||
{
|
||||
"user": "root",
|
||||
"user_id": 0,
|
||||
"count": 456365,
|
||||
"average_time": 224,
|
||||
"total_time": 102371523
|
||||
},
|
||||
{
|
||||
"user": "atl001",
|
||||
"user_id": 2006,
|
||||
"count": 11699,
|
||||
"average_time": 6611,
|
||||
"total_time": 77353396
|
||||
},
|
||||
{
|
||||
"user": "atl002",
|
||||
"user_id": 2007,
|
||||
"count": 120,
|
||||
"average_time": 3684,
|
||||
"total_time": 442106
|
||||
},
|
||||
{
|
||||
"user": "ops001",
|
||||
"user_id": 18006,
|
||||
"count": 298,
|
||||
"average_time": 4447,
|
||||
"total_time": 1325496
|
||||
},
|
||||
{
|
||||
"user": "ops003",
|
||||
"user_id": 18008,
|
||||
"count": 58,
|
||||
"average_time": 3732,
|
||||
"total_time": 216488
|
||||
},
|
||||
{
|
||||
"user": "ops002",
|
||||
"user_id": 18007,
|
||||
"count": 58,
|
||||
"average_time": 4088,
|
||||
"total_time": 237114
|
||||
},
|
||||
{
|
||||
"user": "99",
|
||||
"user_id": 99,
|
||||
"count": 2,
|
||||
"average_time": 86,
|
||||
"total_time": 172
|
||||
}
|
||||
],
|
||||
"parts_packed": 1,
|
||||
"req_time": 1723103198,
|
||||
"req_time_start": 1723075200,
|
||||
"server_thread_count": 3,
|
||||
"agent_queue_size": 0,
|
||||
"agent_count": 0,
|
||||
"agent_thread_count": 0,
|
||||
"dbd_agent_queue_size": 0,
|
||||
"gettimeofday_latency": 21,
|
||||
"schedule_cycle_max": 1116,
|
||||
"schedule_cycle_last": 298,
|
||||
"schedule_cycle_total": 960,
|
||||
"schedule_cycle_mean": 137,
|
||||
"schedule_cycle_mean_depth": 0,
|
||||
"schedule_cycle_per_minute": 2,
|
||||
"schedule_queue_length": 1,
|
||||
"jobs_submitted": 287,
|
||||
"jobs_started": 287,
|
||||
"jobs_completed": 287,
|
||||
"jobs_canceled": 0,
|
||||
"jobs_failed": 1,
|
||||
"jobs_pending": 0,
|
||||
"jobs_running": 100,
|
||||
"job_states_ts": 1723103172,
|
||||
"bf_backfilled_jobs": 1626,
|
||||
"bf_last_backfilled_jobs": 14,
|
||||
"bf_backfilled_het_jobs": 0,
|
||||
"bf_cycle_counter": 12,
|
||||
"bf_cycle_mean": 440,
|
||||
"bf_depth_mean": 1,
|
||||
"bf_depth_mean_try": 1,
|
||||
"bf_cycle_last": 387,
|
||||
"bf_cycle_max": 811,
|
||||
"bf_queue_len": 1,
|
||||
"bf_queue_len_mean": 1,
|
||||
"bf_table_size": 1,
|
||||
"bf_table_size_mean": 1,
|
||||
"bf_when_last_cycle": 1723102514,
|
||||
"bf_active": false
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,448 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"jobs": [
|
||||
{
|
||||
"account": "",
|
||||
"accrue_time": 1722989851,
|
||||
"admin_comment": "",
|
||||
"array_job_id": 0,
|
||||
"array_task_id": null,
|
||||
"array_max_tasks": 0,
|
||||
"array_task_string": "",
|
||||
"association_id": 0,
|
||||
"batch_features": "",
|
||||
"batch_flag": true,
|
||||
"batch_host": "naboo222",
|
||||
"flags": [
|
||||
"JOB_WAS_RUNNING",
|
||||
"JOB_MEM_SET"
|
||||
],
|
||||
"burst_buffer": "",
|
||||
"burst_buffer_state": "",
|
||||
"cluster": "local",
|
||||
"cluster_features": "",
|
||||
"command": "\/tmp\/SLURM_job_script.OjQEIH",
|
||||
"comment": "",
|
||||
"container": "",
|
||||
"contiguous": false,
|
||||
"core_spec": null,
|
||||
"thread_spec": null,
|
||||
"cores_per_socket": null,
|
||||
"billable_tres": 2.0,
|
||||
"cpus_per_task": null,
|
||||
"cpu_frequency_minimum": null,
|
||||
"cpu_frequency_maximum": null,
|
||||
"cpu_frequency_governor": null,
|
||||
"cpus_per_tres": "",
|
||||
"deadline": 0,
|
||||
"delay_boot": 0,
|
||||
"dependency": "",
|
||||
"derived_exit_code": 0,
|
||||
"eligible_time": 1722989851,
|
||||
"end_time": 1723205851,
|
||||
"excluded_nodes": "",
|
||||
"exit_code": 0,
|
||||
"features": "",
|
||||
"federation_origin": "",
|
||||
"federation_siblings_active": "",
|
||||
"federation_siblings_viable": "",
|
||||
"gres_detail": [
|
||||
],
|
||||
"group_id": 2005,
|
||||
"group_name": "atlas",
|
||||
"job_id": 20464,
|
||||
"job_resources": {
|
||||
"nodes": "naboo222",
|
||||
"allocated_hosts": 1,
|
||||
"allocated_nodes": [
|
||||
{
|
||||
"sockets": {
|
||||
"0": {
|
||||
"cores": {
|
||||
"0": "allocated"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodename": "naboo222",
|
||||
"cpus_used": 0,
|
||||
"memory_used": 0,
|
||||
"memory_allocated": 4000
|
||||
}
|
||||
]
|
||||
},
|
||||
"job_state": "RUNNING",
|
||||
"last_sched_evaluation": 1722989851,
|
||||
"licenses": "",
|
||||
"max_cpus": 0,
|
||||
"max_nodes": 0,
|
||||
"mcs_label": "",
|
||||
"memory_per_tres": "",
|
||||
"name": "gridjob",
|
||||
"nodes": "naboo222",
|
||||
"nice": 50,
|
||||
"tasks_per_core": null,
|
||||
"tasks_per_node": 0,
|
||||
"tasks_per_socket": null,
|
||||
"tasks_per_board": 0,
|
||||
"cpus": 2,
|
||||
"node_count": 1,
|
||||
"tasks": 1,
|
||||
"het_job_id": 0,
|
||||
"het_job_id_set": "",
|
||||
"het_job_offset": 0,
|
||||
"partition": "atlas",
|
||||
"prefer": "",
|
||||
"memory_per_node": null,
|
||||
"memory_per_cpu": 2000,
|
||||
"minimum_cpus_per_node": 1,
|
||||
"minimum_tmp_disk_per_node": 0,
|
||||
"preempt_time": 0,
|
||||
"pre_sus_time": 0,
|
||||
"priority": 4294881265,
|
||||
"profile": null,
|
||||
"qos": "",
|
||||
"reboot": false,
|
||||
"required_nodes": "",
|
||||
"requeue": false,
|
||||
"resize_time": 0,
|
||||
"restart_cnt": 0,
|
||||
"resv_name": "",
|
||||
"shared": null,
|
||||
"show_flags": [
|
||||
"SHOW_ALL",
|
||||
"SHOW_DETAIL",
|
||||
"SHOW_LOCAL"
|
||||
],
|
||||
"sockets_per_board": 0,
|
||||
"sockets_per_node": null,
|
||||
"start_time": 1722989851,
|
||||
"state_description": "",
|
||||
"state_reason": "None",
|
||||
"standard_error": "\/home\/sessiondir\/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",
|
||||
"standard_input": "\/dev\/null",
|
||||
"standard_output": "\/home\/sessiondir\/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm.comment",
|
||||
"submit_time": 1722989851,
|
||||
"suspend_time": 0,
|
||||
"system_comment": "",
|
||||
"time_limit": 3600,
|
||||
"time_minimum": 0,
|
||||
"threads_per_core": null,
|
||||
"tres_bind": "",
|
||||
"tres_freq": "",
|
||||
"tres_per_job": "",
|
||||
"tres_per_node": "",
|
||||
"tres_per_socket": "",
|
||||
"tres_per_task": "",
|
||||
"tres_req_str": "cpu=1,mem=2000M,node=1,billing=1",
|
||||
"tres_alloc_str": "cpu=2,mem=4000M,node=1,billing=2",
|
||||
"user_id": 2006,
|
||||
"user_name": "atl001",
|
||||
"wckey": "",
|
||||
"current_working_directory": "\/home\/sessiondir\/zv6NDmqNcv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmXSJKDmFRYcQm"
|
||||
},
|
||||
{
|
||||
"account": "",
|
||||
"accrue_time": 1722990772,
|
||||
"admin_comment": "",
|
||||
"array_job_id": 0,
|
||||
"array_task_id": null,
|
||||
"array_max_tasks": 0,
|
||||
"array_task_string": "",
|
||||
"association_id": 0,
|
||||
"batch_features": "",
|
||||
"batch_flag": true,
|
||||
"batch_host": "naboo222",
|
||||
"flags": [
|
||||
"JOB_WAS_RUNNING",
|
||||
"JOB_MEM_SET"
|
||||
],
|
||||
"burst_buffer": "",
|
||||
"burst_buffer_state": "",
|
||||
"cluster": "local",
|
||||
"cluster_features": "",
|
||||
"command": "\/tmp\/SLURM_job_script.XTwtdj",
|
||||
"comment": "",
|
||||
"container": "",
|
||||
"contiguous": false,
|
||||
"core_spec": null,
|
||||
"thread_spec": null,
|
||||
"cores_per_socket": null,
|
||||
"billable_tres": 2.0,
|
||||
"cpus_per_task": null,
|
||||
"cpu_frequency_minimum": null,
|
||||
"cpu_frequency_maximum": null,
|
||||
"cpu_frequency_governor": null,
|
||||
"cpus_per_tres": "",
|
||||
"deadline": 0,
|
||||
"delay_boot": 0,
|
||||
"dependency": "",
|
||||
"derived_exit_code": 0,
|
||||
"eligible_time": 1722990772,
|
||||
"end_time": 1723206772,
|
||||
"excluded_nodes": "",
|
||||
"exit_code": 0,
|
||||
"features": "",
|
||||
"federation_origin": "",
|
||||
"federation_siblings_active": "",
|
||||
"federation_siblings_viable": "",
|
||||
"gres_detail": [
|
||||
],
|
||||
"group_id": 2005,
|
||||
"group_name": "atlas",
|
||||
"job_id": 20468,
|
||||
"job_resources": {
|
||||
"nodes": "naboo222",
|
||||
"allocated_hosts": 1,
|
||||
"allocated_nodes": [
|
||||
{
|
||||
"sockets": {
|
||||
"1": {
|
||||
"cores": {
|
||||
"2": "allocated"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodename": "naboo222",
|
||||
"cpus_used": 0,
|
||||
"memory_used": 0,
|
||||
"memory_allocated": 4000
|
||||
}
|
||||
]
|
||||
},
|
||||
"job_state": "RUNNING",
|
||||
"last_sched_evaluation": 1722990772,
|
||||
"licenses": "",
|
||||
"max_cpus": 0,
|
||||
"max_nodes": 0,
|
||||
"mcs_label": "",
|
||||
"memory_per_tres": "",
|
||||
"name": "gridjob",
|
||||
"nodes": "naboo222",
|
||||
"nice": 50,
|
||||
"tasks_per_core": null,
|
||||
"tasks_per_node": 0,
|
||||
"tasks_per_socket": null,
|
||||
"tasks_per_board": 0,
|
||||
"cpus": 2,
|
||||
"node_count": 1,
|
||||
"tasks": 1,
|
||||
"het_job_id": 0,
|
||||
"het_job_id_set": "",
|
||||
"het_job_offset": 0,
|
||||
"partition": "atlas",
|
||||
"prefer": "",
|
||||
"memory_per_node": null,
|
||||
"memory_per_cpu": 2000,
|
||||
"minimum_cpus_per_node": 1,
|
||||
"minimum_tmp_disk_per_node": 0,
|
||||
"preempt_time": 0,
|
||||
"pre_sus_time": 0,
|
||||
"priority": 4294881261,
|
||||
"profile": null,
|
||||
"qos": "",
|
||||
"reboot": false,
|
||||
"required_nodes": "",
|
||||
"requeue": false,
|
||||
"resize_time": 0,
|
||||
"restart_cnt": 0,
|
||||
"resv_name": "",
|
||||
"shared": null,
|
||||
"show_flags": [
|
||||
"SHOW_ALL",
|
||||
"SHOW_DETAIL",
|
||||
"SHOW_LOCAL"
|
||||
],
|
||||
"sockets_per_board": 0,
|
||||
"sockets_per_node": null,
|
||||
"start_time": 1722990772,
|
||||
"state_description": "",
|
||||
"state_reason": "None",
|
||||
"standard_error": "\/home\/sessiondir\/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",
|
||||
"standard_input": "\/dev\/null",
|
||||
"standard_output": "\/home\/sessiondir\/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n.comment",
|
||||
"submit_time": 1722990772,
|
||||
"suspend_time": 0,
|
||||
"system_comment": "",
|
||||
"time_limit": 3600,
|
||||
"time_minimum": 0,
|
||||
"threads_per_core": null,
|
||||
"tres_bind": "",
|
||||
"tres_freq": "",
|
||||
"tres_per_job": "",
|
||||
"tres_per_node": "",
|
||||
"tres_per_socket": "",
|
||||
"tres_per_task": "",
|
||||
"tres_req_str": "cpu=1,mem=2000M,node=1,billing=1",
|
||||
"tres_alloc_str": "cpu=2,mem=4000M,node=1,billing=2",
|
||||
"user_id": 2006,
|
||||
"user_name": "atl001",
|
||||
"wckey": "",
|
||||
"current_working_directory": "\/home\/sessiondir\/ljvLDmQccv5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmcSJKDmor4c2n"
|
||||
},
|
||||
{
|
||||
"account": "",
|
||||
"accrue_time": 1723457333,
|
||||
"admin_comment": "",
|
||||
"array_job_id": 0,
|
||||
"array_task_id": null,
|
||||
"array_max_tasks": 0,
|
||||
"array_task_string": "",
|
||||
"association_id": 0,
|
||||
"batch_features": "",
|
||||
"batch_flag": true,
|
||||
"batch_host": "naboo147",
|
||||
"flags": [
|
||||
"TRES_STR_CALC",
|
||||
"JOB_MEM_SET"
|
||||
],
|
||||
"burst_buffer": "",
|
||||
"burst_buffer_state": "",
|
||||
"cluster": "local",
|
||||
"cluster_features": "",
|
||||
"command": "\/tmp\/SLURM_job_script.8PMmVe",
|
||||
"comment": "",
|
||||
"container": "",
|
||||
"contiguous": false,
|
||||
"core_spec": null,
|
||||
"thread_spec": null,
|
||||
"cores_per_socket": null,
|
||||
"billable_tres": 8.0,
|
||||
"cpus_per_task": null,
|
||||
"cpu_frequency_minimum": null,
|
||||
"cpu_frequency_maximum": null,
|
||||
"cpu_frequency_governor": null,
|
||||
"cpus_per_tres": "",
|
||||
"deadline": 0,
|
||||
"delay_boot": 0,
|
||||
"dependency": "",
|
||||
"derived_exit_code": 0,
|
||||
"eligible_time": 1723457333,
|
||||
"end_time": 1723463525,
|
||||
"excluded_nodes": "",
|
||||
"exit_code": 0,
|
||||
"features": "",
|
||||
"federation_origin": "",
|
||||
"federation_siblings_active": "",
|
||||
"federation_siblings_viable": "",
|
||||
"gres_detail": [
|
||||
],
|
||||
"group_id": 2005,
|
||||
"group_name": "atlas",
|
||||
"job_id": 23772,
|
||||
"job_resources": {
|
||||
"nodes": "naboo147",
|
||||
"allocated_hosts": 1,
|
||||
"allocated_nodes": [
|
||||
{
|
||||
"sockets": {
|
||||
"0": {
|
||||
"cores": {
|
||||
"3": "allocated",
|
||||
"10": "allocated",
|
||||
"12": "allocated",
|
||||
"13": "allocated"
|
||||
}
|
||||
},
|
||||
"1": {
|
||||
"cores": {
|
||||
"8": "allocated",
|
||||
"11": "allocated",
|
||||
"12": "allocated",
|
||||
"13": "allocated"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nodename": "naboo147",
|
||||
"cpus_used": 0,
|
||||
"memory_used": 0,
|
||||
"memory_allocated": 16000
|
||||
}
|
||||
]
|
||||
},
|
||||
"job_state": "COMPLETED",
|
||||
"last_sched_evaluation": 1723457333,
|
||||
"licenses": "",
|
||||
"max_cpus": 0,
|
||||
"max_nodes": 0,
|
||||
"mcs_label": "",
|
||||
"memory_per_tres": "",
|
||||
"name": "gridjob",
|
||||
"nodes": "naboo147",
|
||||
"nice": 50,
|
||||
"tasks_per_core": null,
|
||||
"tasks_per_node": 8,
|
||||
"tasks_per_socket": null,
|
||||
"tasks_per_board": 0,
|
||||
"cpus": 8,
|
||||
"node_count": 1,
|
||||
"tasks": 8,
|
||||
"het_job_id": 0,
|
||||
"het_job_id_set": "",
|
||||
"het_job_offset": 0,
|
||||
"partition": "atlas",
|
||||
"prefer": "",
|
||||
"memory_per_node": null,
|
||||
"memory_per_cpu": 2000,
|
||||
"minimum_cpus_per_node": 8,
|
||||
"minimum_tmp_disk_per_node": 0,
|
||||
"preempt_time": 0,
|
||||
"pre_sus_time": 0,
|
||||
"priority": 4294877957,
|
||||
"profile": null,
|
||||
"qos": "",
|
||||
"reboot": false,
|
||||
"required_nodes": "",
|
||||
"requeue": false,
|
||||
"resize_time": 0,
|
||||
"restart_cnt": 0,
|
||||
"resv_name": "",
|
||||
"shared": null,
|
||||
"show_flags": [
|
||||
"SHOW_ALL",
|
||||
"SHOW_DETAIL",
|
||||
"SHOW_LOCAL"
|
||||
],
|
||||
"sockets_per_board": 0,
|
||||
"sockets_per_node": null,
|
||||
"start_time": 1723457333,
|
||||
"state_description": "",
|
||||
"state_reason": "None",
|
||||
"standard_error": "\/home\/sessiondir\/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",
|
||||
"standard_input": "\/dev\/null",
|
||||
"standard_output": "\/home\/sessiondir\/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm.comment",
|
||||
"submit_time": 1723457333,
|
||||
"suspend_time": 0,
|
||||
"system_comment": "",
|
||||
"time_limit": 3600,
|
||||
"time_minimum": 0,
|
||||
"threads_per_core": null,
|
||||
"tres_bind": "",
|
||||
"tres_freq": "",
|
||||
"tres_per_job": "",
|
||||
"tres_per_node": "",
|
||||
"tres_per_socket": "",
|
||||
"tres_per_task": "",
|
||||
"tres_req_str": "cpu=8,mem=16000M,node=1,billing=8",
|
||||
"tres_alloc_str": "cpu=8,mem=16000M,node=1,billing=8",
|
||||
"user_id": 2006,
|
||||
"user_name": "atl001",
|
||||
"wckey": "",
|
||||
"current_working_directory": "\/home\/sessiondir\/nN8KDmNMPx5nKG01gq4B3BRpm7wtQmABFKDmbnHPDmeIKKDml0xJjm"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"nodes": [
|
||||
{
|
||||
"architecture": "x86_64",
|
||||
"burstbuffer_network_address": "",
|
||||
"boards": 1,
|
||||
"boot_time": 1719400973,
|
||||
"comment": "",
|
||||
"cores": 18,
|
||||
"cpu_binding": 0,
|
||||
"cpu_load": 27,
|
||||
"extra": "",
|
||||
"free_memory": 86423,
|
||||
"cpus": 36,
|
||||
"last_busy": 1723102876,
|
||||
"features": "",
|
||||
"active_features": "",
|
||||
"gres": "",
|
||||
"gres_drained": "N\/A",
|
||||
"gres_used": "",
|
||||
"mcs_label": "",
|
||||
"name": "naboo145",
|
||||
"next_state_after_reboot": "invalid",
|
||||
"address": "naboo145",
|
||||
"hostname": "naboo145",
|
||||
"state": "idle",
|
||||
"state_flags": [
|
||||
"DRAIN"
|
||||
],
|
||||
"next_state_after_reboot_flags": [
|
||||
],
|
||||
"operating_system": "Linux 5.14.0-427.13.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Apr 30 18:22:29 EDT 2024",
|
||||
"owner": null,
|
||||
"partitions": [
|
||||
"atlas"
|
||||
],
|
||||
"port": 6818,
|
||||
"real_memory": 94791,
|
||||
"reason": "Kill task failed",
|
||||
"reason_changed_at": 1723077306,
|
||||
"reason_set_by_user": "root",
|
||||
"slurmd_start_time": 1720394759,
|
||||
"sockets": 2,
|
||||
"threads": 1,
|
||||
"temporary_disk": 0,
|
||||
"weight": 1,
|
||||
"tres": "cpu=36,mem=94791M,billing=36",
|
||||
"slurmd_version": "22.05.9",
|
||||
"alloc_memory": 0,
|
||||
"alloc_cpus": 0,
|
||||
"idle_cpus": 36,
|
||||
"tres_used": null,
|
||||
"tres_weighted": 0.0
|
||||
},
|
||||
{
|
||||
"architecture": "x86_64",
|
||||
"burstbuffer_network_address": "",
|
||||
"boards": 1,
|
||||
"boot_time": 1719400759,
|
||||
"comment": "",
|
||||
"cores": 18,
|
||||
"cpu_binding": 0,
|
||||
"cpu_load": 0,
|
||||
"extra": "",
|
||||
"free_memory": 92151,
|
||||
"cpus": 36,
|
||||
"last_busy": 1722780995,
|
||||
"features": "",
|
||||
"active_features": "",
|
||||
"gres": "",
|
||||
"gres_drained": "N\/A",
|
||||
"gres_used": "",
|
||||
"mcs_label": "",
|
||||
"name": "naboo146",
|
||||
"next_state_after_reboot": "invalid",
|
||||
"address": "naboo146",
|
||||
"hostname": "naboo146",
|
||||
"state": "idle",
|
||||
"state_flags": [
|
||||
"DRAIN"
|
||||
],
|
||||
"next_state_after_reboot_flags": [
|
||||
],
|
||||
"operating_system": "Linux 5.14.0-427.13.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Apr 30 18:22:29 EDT 2024",
|
||||
"owner": null,
|
||||
"partitions": [
|
||||
"atlas"
|
||||
],
|
||||
"port": 6818,
|
||||
"real_memory": 94791,
|
||||
"reason": "Kill task failed",
|
||||
"reason_changed_at": 1722748927,
|
||||
"reason_set_by_user": "root",
|
||||
"slurmd_start_time": 1720394759,
|
||||
"sockets": 2,
|
||||
"threads": 1,
|
||||
"temporary_disk": 0,
|
||||
"weight": 1,
|
||||
"tres": "cpu=36,mem=94791M,billing=36",
|
||||
"slurmd_version": "22.05.9",
|
||||
"alloc_memory": 0,
|
||||
"alloc_cpus": 0,
|
||||
"idle_cpus": 36,
|
||||
"tres_used": null,
|
||||
"tres_weighted": 0.0
|
||||
},
|
||||
{
|
||||
"architecture": "x86_64",
|
||||
"burstbuffer_network_address": "",
|
||||
"boards": 1,
|
||||
"boot_time": 1719406605,
|
||||
"comment": "",
|
||||
"cores": 18,
|
||||
"cpu_binding": 0,
|
||||
"cpu_load": 2969,
|
||||
"extra": "",
|
||||
"free_memory": 10908,
|
||||
"cpus": 36,
|
||||
"last_busy": 1722881704,
|
||||
"features": "",
|
||||
"active_features": "",
|
||||
"gres": "",
|
||||
"gres_drained": "N\/A",
|
||||
"gres_used": "",
|
||||
"mcs_label": "",
|
||||
"name": "naboo147",
|
||||
"next_state_after_reboot": "invalid",
|
||||
"address": "naboo147",
|
||||
"hostname": "naboo147",
|
||||
"state": "allocated",
|
||||
"state_flags": [
|
||||
],
|
||||
"next_state_after_reboot_flags": [
|
||||
],
|
||||
"operating_system": "Linux 5.14.0-427.13.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Apr 30 18:22:29 EDT 2024",
|
||||
"owner": null,
|
||||
"partitions": [
|
||||
"atlas"
|
||||
],
|
||||
"port": 6818,
|
||||
"real_memory": 94793,
|
||||
"reason": "",
|
||||
"reason_changed_at": 0,
|
||||
"reason_set_by_user": null,
|
||||
"slurmd_start_time": 1720394759,
|
||||
"sockets": 2,
|
||||
"threads": 1,
|
||||
"temporary_disk": 0,
|
||||
"weight": 1,
|
||||
"tres": "cpu=36,mem=94793M,billing=36",
|
||||
"slurmd_version": "22.05.9",
|
||||
"alloc_memory": 56000,
|
||||
"alloc_cpus": 36,
|
||||
"idle_cpus": 0,
|
||||
"tres_used": "cpu=36,mem=56000M",
|
||||
"tres_weighted": 36.0
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"partitions": [
|
||||
{
|
||||
"flags": [
|
||||
"default"
|
||||
],
|
||||
"preemption_mode": [
|
||||
"disabled"
|
||||
],
|
||||
"allowed_allocation_nodes": "",
|
||||
"allowed_accounts": "",
|
||||
"allowed_groups": "",
|
||||
"allowed_qos": "",
|
||||
"alternative": "",
|
||||
"billing_weights": "",
|
||||
"default_memory_per_cpu": null,
|
||||
"default_memory_per_node": null,
|
||||
"default_time_limit": null,
|
||||
"denied_accounts": "",
|
||||
"denied_qos": "",
|
||||
"preemption_grace_time": 0,
|
||||
"maximum_cpus_per_node": -1,
|
||||
"maximum_memory_per_cpu": null,
|
||||
"maximum_memory_per_node": null,
|
||||
"maximum_nodes_per_job": -1,
|
||||
"max_time_limit": -1,
|
||||
"min nodes per job": 0,
|
||||
"name": "atlas",
|
||||
"nodes": "naboo145,naboo146,naboo147,naboo216,naboo219,naboo222,naboo224,naboo225,naboo227,naboo228,naboo229,naboo234,naboo235,naboo236,naboo237,naboo238,naboo239,naboo240,naboo241,naboo242,naboo243",
|
||||
"over_time_limit": null,
|
||||
"priority_job_factor": 1,
|
||||
"priority_tier": 1,
|
||||
"qos": "",
|
||||
"state": "UP",
|
||||
"total_cpus": 632,
|
||||
"total_nodes": 21,
|
||||
"tres": "cpu=632,mem=1415207M,node=21,billing=632"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
{
|
||||
"meta": {
|
||||
"plugin": {
|
||||
"type": "openapi\/v0.0.38",
|
||||
"name": "Slurm OpenAPI v0.0.38"
|
||||
},
|
||||
"Slurm": {
|
||||
"version": {
|
||||
"major": 22,
|
||||
"micro": 9,
|
||||
"minor": 5
|
||||
},
|
||||
"release": "22.05.9"
|
||||
}
|
||||
},
|
||||
"errors": [
|
||||
],
|
||||
"reservations": [
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
[[inputs.slurm]]
|
||||
url = "willBeOverriden"
|
||||
response_timeout = "5s"
|
||||
# enabled_endpoints = []
|
||||
|
||||
## Credentials for JWT-based authentication
|
||||
username = "root"
|
||||
token = "topSecret"
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"statistics": {}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"jobs": []
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"nodes": []
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"partitions": []
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"meta": {},
|
||||
"errors": [],
|
||||
"reservations": []
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
[[inputs.slurm]]
|
||||
url = "willBeOverriden"
|
||||
response_timeout = "5s"
|
||||
enabled_endpoints = []
|
||||
|
||||
## Credentials for JWT-based authentication
|
||||
username = "root"
|
||||
token = "topSecret"
|
||||
Loading…
Reference in New Issue