feat(inputs.ipmi_sensor): Collect additional commands (#15495)
This commit is contained in:
parent
784ede96f8
commit
6fb42764e1
|
|
@ -44,42 +44,48 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
```toml @sample.conf
|
||||
# Read metrics from the bare metal servers via IPMI
|
||||
[[inputs.ipmi_sensor]]
|
||||
## optionally specify the path to the ipmitool executable
|
||||
## Specify the path to the ipmitool executable
|
||||
# path = "/usr/bin/ipmitool"
|
||||
##
|
||||
|
||||
## Use sudo
|
||||
## Setting 'use_sudo' to true will make use of sudo to run ipmitool.
|
||||
## Sudo must be configured to allow the telegraf user to run ipmitool
|
||||
## without a password.
|
||||
# use_sudo = false
|
||||
##
|
||||
## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR
|
||||
# privilege = "ADMINISTRATOR"
|
||||
##
|
||||
## optionally specify one or more servers via a url matching
|
||||
|
||||
## Servers
|
||||
## Specify one or more servers via a url. If no servers are specified, local
|
||||
## machine sensor stats will be queried. Uses the format:
|
||||
## [username[:password]@][protocol[(address)]]
|
||||
## e.g.
|
||||
## root:passwd@lan(127.0.0.1)
|
||||
##
|
||||
## if no servers are specified, local machine sensor stats will be queried
|
||||
##
|
||||
## e.g. root:passwd@lan(127.0.0.1)
|
||||
# servers = ["USERID:PASSW0RD@lan(192.168.1.1)"]
|
||||
|
||||
## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid
|
||||
## gaps or overlap in pulled data
|
||||
interval = "30s"
|
||||
## Session privilege level
|
||||
## Choose from: CALLBACK, USER, OPERATOR, ADMINISTRATOR
|
||||
# privilege = "ADMINISTRATOR"
|
||||
|
||||
## Timeout for the ipmitool command to complete. Default is 20 seconds.
|
||||
timeout = "20s"
|
||||
## Timeout
|
||||
## Timeout for the ipmitool command to complete.
|
||||
# timeout = "20s"
|
||||
|
||||
## Schema Version: (Optional, defaults to version 1)
|
||||
metric_version = 2
|
||||
## Metric schema version
|
||||
## See the plugin readme for more information on schema versioning.
|
||||
# metric_version = 1
|
||||
|
||||
## Sensors to collect
|
||||
## Choose from:
|
||||
## * sdr: default, collects sensor data records
|
||||
## * chassis_power_status: collects the power status of the chassis
|
||||
## * dcmi_power_reading: collects the power readings from the Data Center Management Interface
|
||||
# sensors = ["sdr"]
|
||||
|
||||
## Hex key
|
||||
## Optionally provide the hex key for the IMPI connection.
|
||||
# hex_key = ""
|
||||
|
||||
## Cache
|
||||
## If ipmitool should use a cache
|
||||
## for me ipmitool runs about 2 to 10 times faster with cache enabled on HP G10 servers (when using ubuntu20.04)
|
||||
## the cache file may not work well for you if some sensors come up late
|
||||
## Using a cache can speed up collection times depending on your device.
|
||||
# use_cache = false
|
||||
|
||||
## Path to the ipmitools cache file (defaults to OS temp dir)
|
||||
|
|
@ -87,6 +93,17 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
# cache_path = ""
|
||||
```
|
||||
|
||||
## Sensors
|
||||
|
||||
By default the plugin collects data via the `sdr` command and returns those
|
||||
values. However, there are additonal sensor options that be call on:
|
||||
|
||||
- `chassis_power_status` - returns 0 or 1 depending on the output of
|
||||
`chassis power status`
|
||||
- `dcmi_power_reading` - Returns the watt values from `dcmi power reading`
|
||||
|
||||
These sensor options are not affected by the metric version.
|
||||
|
||||
## Metrics
|
||||
|
||||
Version 1 schema:
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ import (
|
|||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/config"
|
||||
"github.com/influxdata/telegraf/internal"
|
||||
"github.com/influxdata/telegraf/internal/choice"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
|
|
@ -31,21 +32,22 @@ var (
|
|||
reV2ParseLine = regexp.MustCompile(`^(?P<name>[^|]*)\|[^|]+\|(?P<status_code>[^|]*)\|(?P<entity_id>[^|]*)\|(?:(?P<description>[^|]+))?`)
|
||||
reV2ParseDescription = regexp.MustCompile(`^(?P<analogValue>-?[0-9.]+)\s(?P<analogUnit>.*)|(?P<status>.+)|^$`)
|
||||
reV2ParseUnit = regexp.MustCompile(`^(?P<realAnalogUnit>[^,]+)(?:,\s*(?P<statusDesc>.*))?`)
|
||||
dcmiPowerReading = regexp.MustCompile(`^(?P<name>[^|]*)\:(?P<value>.* Watts)?`)
|
||||
)
|
||||
|
||||
// Ipmi stores the configuration values for the ipmi_sensor input plugin
|
||||
type Ipmi struct {
|
||||
Path string
|
||||
Privilege string
|
||||
HexKey string `toml:"hex_key"`
|
||||
Servers []string
|
||||
Timeout config.Duration
|
||||
MetricVersion int
|
||||
UseSudo bool
|
||||
UseCache bool
|
||||
CachePath string
|
||||
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
Path string `toml:"path"`
|
||||
Privilege string `toml:"privilege"`
|
||||
HexKey string `toml:"hex_key"`
|
||||
Servers []string `toml:"servers"`
|
||||
Sensors []string `toml:"sensors"`
|
||||
Timeout config.Duration `toml:"timeout"`
|
||||
MetricVersion int `toml:"metric_version"`
|
||||
UseSudo bool `toml:"use_sudo"`
|
||||
UseCache bool `toml:"use_cache"`
|
||||
CachePath string `toml:"cache_path"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
}
|
||||
|
||||
const cmd = "ipmitool"
|
||||
|
|
@ -66,6 +68,12 @@ func (m *Ipmi) Init() error {
|
|||
if m.CachePath == "" {
|
||||
m.CachePath = os.TempDir()
|
||||
}
|
||||
if len(m.Sensors) == 0 {
|
||||
m.Sensors = []string{"sdr"}
|
||||
}
|
||||
if err := choice.CheckSlice(m.Sensors, []string{"sdr", "chassis_power_status", "dcmi_power_reading"}); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check parameters
|
||||
if m.Path == "" {
|
||||
|
|
@ -87,24 +95,37 @@ func (m *Ipmi) Gather(acc telegraf.Accumulator) error {
|
|||
wg.Add(1)
|
||||
go func(a telegraf.Accumulator, s string) {
|
||||
defer wg.Done()
|
||||
err := m.parse(a, s)
|
||||
if err != nil {
|
||||
a.AddError(err)
|
||||
for _, sensor := range m.Sensors {
|
||||
a.AddError(m.parse(a, s, sensor))
|
||||
}
|
||||
}(acc, server)
|
||||
}
|
||||
wg.Wait()
|
||||
} else {
|
||||
err := m.parse(acc, "")
|
||||
if err != nil {
|
||||
return err
|
||||
for _, sensor := range m.Sensors {
|
||||
err := m.parse(acc, "", sensor)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
|
||||
func (m *Ipmi) parse(acc telegraf.Accumulator, server string, sensor string) error {
|
||||
var command []string
|
||||
switch sensor {
|
||||
case "sdr":
|
||||
command = append(command, "sdr")
|
||||
case "chassis_power_status":
|
||||
command = append(command, "chassis", "power", "status")
|
||||
case "dcmi_power_reading":
|
||||
command = append(command, "dcmi", "power", "reading")
|
||||
default:
|
||||
return fmt.Errorf("unknown sensor type %q", sensor)
|
||||
}
|
||||
|
||||
opts := make([]string, 0)
|
||||
hostname := ""
|
||||
if server != "" {
|
||||
|
|
@ -112,7 +133,9 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
|
|||
hostname = conn.Hostname
|
||||
opts = conn.options()
|
||||
}
|
||||
opts = append(opts, "sdr")
|
||||
|
||||
opts = append(opts, command...)
|
||||
|
||||
if m.UseCache {
|
||||
cacheFile := filepath.Join(m.CachePath, server+"_ipmi_cache")
|
||||
_, err := os.Stat(cacheFile)
|
||||
|
|
@ -134,7 +157,7 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
|
|||
}
|
||||
opts = append(opts, "-S", cacheFile)
|
||||
}
|
||||
if m.MetricVersion == 2 {
|
||||
if m.MetricVersion == 2 && sensor == "sdr" {
|
||||
opts = append(opts, "elist")
|
||||
}
|
||||
name := m.Path
|
||||
|
|
@ -149,10 +172,78 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("failed to run command %q: %w - %s", strings.Join(sanitizeIPMICmd(cmd.Args), " "), err, string(out))
|
||||
}
|
||||
if m.MetricVersion == 2 {
|
||||
return m.parseV2(acc, hostname, out, timestamp)
|
||||
|
||||
switch sensor {
|
||||
case "sdr":
|
||||
if m.MetricVersion == 2 {
|
||||
return m.parseV2(acc, hostname, out, timestamp)
|
||||
} else {
|
||||
return m.parseV1(acc, hostname, out, timestamp)
|
||||
}
|
||||
case "chassis_power_status":
|
||||
return m.parseChassisPowerStatus(acc, hostname, out, timestamp)
|
||||
case "dcmi_power_reading":
|
||||
return m.parseDCMIPowerReading(acc, hostname, out, timestamp)
|
||||
}
|
||||
return m.parseV1(acc, hostname, out, timestamp)
|
||||
|
||||
return fmt.Errorf("unknown sensor type %q", sensor)
|
||||
}
|
||||
|
||||
func (m *Ipmi) parseChassisPowerStatus(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
|
||||
// each line will look something like
|
||||
// Chassis Power is on
|
||||
// Chassis Power is off
|
||||
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.Contains(line, "Chassis Power is on") {
|
||||
acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 1}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt)
|
||||
} else if strings.Contains(line, "Chassis Power is off") {
|
||||
acc.AddFields("ipmi_sensor", map[string]interface{}{"value": 0}, map[string]string{"name": "chassis_power_status", "server": hostname}, measuredAt)
|
||||
}
|
||||
}
|
||||
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
func (m *Ipmi) parseDCMIPowerReading(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
|
||||
// each line will look something like
|
||||
// Current Power Reading : 0.000
|
||||
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
|
||||
for scanner.Scan() {
|
||||
ipmiFields := m.extractFieldsFromRegex(dcmiPowerReading, scanner.Text())
|
||||
if len(ipmiFields) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"name": transform(ipmiFields["name"]),
|
||||
}
|
||||
|
||||
// tag the server is we have one
|
||||
if hostname != "" {
|
||||
tags["server"] = hostname
|
||||
}
|
||||
|
||||
fields := make(map[string]interface{})
|
||||
valunit := strings.Split(ipmiFields["value"], " ")
|
||||
if len(valunit) != 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
var err error
|
||||
fields["value"], err = aToFloat(valunit[0])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if len(valunit) > 1 {
|
||||
tags["unit"] = transform(valunit[1])
|
||||
}
|
||||
|
||||
acc.AddFields("ipmi_sensor", fields, tags, measuredAt)
|
||||
}
|
||||
|
||||
return scanner.Err()
|
||||
}
|
||||
|
||||
func (m *Ipmi) parseV1(acc telegraf.Accumulator, hostname string, cmdOut []byte, measuredAt time.Time) error {
|
||||
|
|
|
|||
|
|
@ -771,6 +771,128 @@ func Test_parseV2(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func Test_parsePowerStatus(t *testing.T) {
|
||||
type args struct {
|
||||
hostname string
|
||||
cmdOut []byte
|
||||
measuredAt time.Time
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
expected []telegraf.Metric
|
||||
}{
|
||||
{
|
||||
name: "Test correct parse power status off",
|
||||
args: args{
|
||||
hostname: "host",
|
||||
cmdOut: []byte("Chassis Power is off"),
|
||||
measuredAt: time.Now(),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric("ipmi_sensor",
|
||||
map[string]string{
|
||||
"name": "chassis_power_status",
|
||||
"server": "host",
|
||||
},
|
||||
map[string]interface{}{"value": 0},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Test correct parse power status on",
|
||||
args: args{
|
||||
hostname: "host",
|
||||
cmdOut: []byte("Chassis Power is on"),
|
||||
measuredAt: time.Now(),
|
||||
},
|
||||
expected: []telegraf.Metric{
|
||||
testutil.MustMetric("ipmi_sensor",
|
||||
map[string]string{
|
||||
"name": "chassis_power_status",
|
||||
"server": "host",
|
||||
},
|
||||
map[string]interface{}{"value": 1},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
ipmi := &Ipmi{
|
||||
Log: testutil.Logger{},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
err := ipmi.parseChassisPowerStatus(&acc, tt.args.hostname, tt.args.cmdOut, tt.args.measuredAt)
|
||||
require.NoError(t, err)
|
||||
testutil.RequireMetricsEqual(t, tt.expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_parsePowerReading(t *testing.T) {
|
||||
output := `Instantaneous power reading: 167 Watts
|
||||
Minimum during sampling period: 124 Watts
|
||||
Maximum during sampling period: 422 Watts
|
||||
Average power reading over sample period: 156 Watts
|
||||
IPMI timestamp: Mon Aug 1 21:22:51 2016
|
||||
Sampling period: 00699043 Seconds.
|
||||
Power reading state is: activated
|
||||
`
|
||||
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric("ipmi_sensor",
|
||||
map[string]string{
|
||||
"name": "instantaneous_power_reading",
|
||||
"server": "host",
|
||||
"unit": "watts",
|
||||
},
|
||||
map[string]interface{}{"value": float64(167)},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("ipmi_sensor",
|
||||
map[string]string{
|
||||
"name": "minimum_during_sampling_period",
|
||||
"server": "host",
|
||||
"unit": "watts",
|
||||
},
|
||||
map[string]interface{}{"value": float64(124)},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("ipmi_sensor",
|
||||
map[string]string{
|
||||
"name": "maximum_during_sampling_period",
|
||||
"server": "host",
|
||||
"unit": "watts",
|
||||
},
|
||||
map[string]interface{}{"value": float64(422)},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
testutil.MustMetric("ipmi_sensor",
|
||||
map[string]string{
|
||||
"name": "average_power_reading_over_sample_period",
|
||||
"server": "host",
|
||||
"unit": "watts",
|
||||
},
|
||||
map[string]interface{}{"value": float64(156)},
|
||||
time.Unix(0, 0),
|
||||
),
|
||||
}
|
||||
|
||||
ipmi := &Ipmi{
|
||||
Log: testutil.Logger{},
|
||||
}
|
||||
|
||||
var acc testutil.Accumulator
|
||||
err := ipmi.parseDCMIPowerReading(&acc, "host", []byte(output), time.Now())
|
||||
require.NoError(t, err)
|
||||
testutil.RequireMetricsEqual(t, expected, acc.GetTelegrafMetrics(), testutil.IgnoreTime())
|
||||
}
|
||||
|
||||
func TestSanitizeIPMICmd(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
|
|
|||
|
|
@ -1,41 +1,47 @@
|
|||
# Read metrics from the bare metal servers via IPMI
|
||||
[[inputs.ipmi_sensor]]
|
||||
## optionally specify the path to the ipmitool executable
|
||||
## Specify the path to the ipmitool executable
|
||||
# path = "/usr/bin/ipmitool"
|
||||
##
|
||||
|
||||
## Use sudo
|
||||
## Setting 'use_sudo' to true will make use of sudo to run ipmitool.
|
||||
## Sudo must be configured to allow the telegraf user to run ipmitool
|
||||
## without a password.
|
||||
# use_sudo = false
|
||||
##
|
||||
## optionally force session privilege level. Can be CALLBACK, USER, OPERATOR, ADMINISTRATOR
|
||||
# privilege = "ADMINISTRATOR"
|
||||
##
|
||||
## optionally specify one or more servers via a url matching
|
||||
|
||||
## Servers
|
||||
## Specify one or more servers via a url. If no servers are specified, local
|
||||
## machine sensor stats will be queried. Uses the format:
|
||||
## [username[:password]@][protocol[(address)]]
|
||||
## e.g.
|
||||
## root:passwd@lan(127.0.0.1)
|
||||
##
|
||||
## if no servers are specified, local machine sensor stats will be queried
|
||||
##
|
||||
## e.g. root:passwd@lan(127.0.0.1)
|
||||
# servers = ["USERID:PASSW0RD@lan(192.168.1.1)"]
|
||||
|
||||
## Recommended: use metric 'interval' that is a multiple of 'timeout' to avoid
|
||||
## gaps or overlap in pulled data
|
||||
interval = "30s"
|
||||
## Session privilege level
|
||||
## Choose from: CALLBACK, USER, OPERATOR, ADMINISTRATOR
|
||||
# privilege = "ADMINISTRATOR"
|
||||
|
||||
## Timeout for the ipmitool command to complete. Default is 20 seconds.
|
||||
timeout = "20s"
|
||||
## Timeout
|
||||
## Timeout for the ipmitool command to complete.
|
||||
# timeout = "20s"
|
||||
|
||||
## Schema Version: (Optional, defaults to version 1)
|
||||
metric_version = 2
|
||||
## Metric schema version
|
||||
## See the plugin readme for more information on schema versioning.
|
||||
# metric_version = 1
|
||||
|
||||
## Sensors to collect
|
||||
## Choose from:
|
||||
## * sdr: default, collects sensor data records
|
||||
## * chassis_power_status: collects the power status of the chassis
|
||||
## * dcmi_power_reading: collects the power readings from the Data Center Management Interface
|
||||
# sensors = ["sdr"]
|
||||
|
||||
## Hex key
|
||||
## Optionally provide the hex key for the IMPI connection.
|
||||
# hex_key = ""
|
||||
|
||||
## Cache
|
||||
## If ipmitool should use a cache
|
||||
## for me ipmitool runs about 2 to 10 times faster with cache enabled on HP G10 servers (when using ubuntu20.04)
|
||||
## the cache file may not work well for you if some sensors come up late
|
||||
## Using a cache can speed up collection times depending on your device.
|
||||
# use_cache = false
|
||||
|
||||
## Path to the ipmitools cache file (defaults to OS temp dir)
|
||||
|
|
|
|||
Loading…
Reference in New Issue