New Intel PowerStat input plugin (#8488)
This commit is contained in:
parent
99287d89e0
commit
9166a16577
|
|
@ -214,6 +214,7 @@ For documentation on the latest development code see the [documentation index][d
|
|||
* [influxdb](./plugins/inputs/influxdb)
|
||||
* [influxdb_listener](./plugins/inputs/influxdb_listener)
|
||||
* [influxdb_v2_listener](./plugins/inputs/influxdb_v2_listener)
|
||||
* [intel_powerstat](plugins/inputs/intel_powerstat)
|
||||
* [intel_rdt](./plugins/inputs/intel_rdt)
|
||||
* [internal](./plugins/inputs/internal)
|
||||
* [interrupts](./plugins/inputs/interrupts)
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ import (
|
|||
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_v2_listener"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/intel_powerstat"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/intel_rdt"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/internal"
|
||||
_ "github.com/influxdata/telegraf/plugins/inputs/interrupts"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,206 @@
|
|||
# Intel PowerStat Input Plugin
|
||||
|
||||
Telemetry frameworks allow users to monitor critical platform level metrics.
|
||||
Key source of platform telemetry is power domain that is beneficial for MANO/Monitoring&Analytics systems
|
||||
to take preventive/corrective actions based on platform busyness, CPU temperature, actual CPU utilization
|
||||
and power statistics. Main use cases are power saving and workload migration.
|
||||
|
||||
Intel PowerStat plugin supports Intel based platforms and assumes presence of Linux based OS.
|
||||
|
||||
### Configuration:
|
||||
```toml
|
||||
# Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and per-CPU metrics like temperature, power and utilization.
|
||||
[[inputs.intel_powerstat]]
|
||||
## All global metrics are always collected by Intel PowerStat plugin.
|
||||
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
|
||||
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level
|
||||
## telemetry will be exposed by Intel PowerStat plugin.
|
||||
## Supported options:
|
||||
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"
|
||||
# cpu_metrics = []
|
||||
```
|
||||
### Example: Configuration with no per-CPU telemetry
|
||||
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
cpu_metrics = []
|
||||
```
|
||||
|
||||
### Example: Configuration with no per-CPU telemetry - equivalent case
|
||||
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
```
|
||||
|
||||
### Example: Configuration for CPU Temperature and Frequency only
|
||||
This configuration allows getting global metrics plus subset of per-CPU metrics (CPU Temperature and Current Frequency):
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
|
||||
```
|
||||
|
||||
### Example: Configuration with all available metrics
|
||||
This configuration allows getting global metrics and all per-CPU metrics:
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"]
|
||||
```
|
||||
|
||||
### SW Dependencies:
|
||||
Plugin is based on Linux Kernel modules that expose specific metrics over `sysfs` or `devfs` interfaces.
|
||||
The following dependencies are expected by plugin:
|
||||
- _intel-rapl_ module which exposes Intel Runtime Power Limiting metrics over `sysfs` (`/sys/devices/virtual/powercap/intel-rapl`),
|
||||
- _msr_ kernel module that provides access to processor model specific registers over `devfs` (`/dev/cpu/cpu%d/msr`),
|
||||
- _cpufreq_ kernel module - which exposes per-CPU Frequency over `sysfs` (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`).
|
||||
|
||||
Minimum kernel version required is 3.13 to satisfy all requirements.
|
||||
|
||||
Please make sure that kernel modules are loaded and running. You might have to manually enable them by using `modprobe`.
|
||||
Exact commands to be executed are:
|
||||
```
|
||||
sudo modprobe cpufreq-stats
|
||||
sudo modprobe msr
|
||||
sudo modprobe intel_rapl
|
||||
```
|
||||
|
||||
**Telegraf with Intel PowerStat plugin enabled may require root access to read model specific registers (MSRs)**
|
||||
to retrieve data for calculation of most critical per-CPU specific metrics:
|
||||
- `cpu_busy_frequency_mhz`
|
||||
- `cpu_temperature_celsius`
|
||||
- `cpu_c1_state_residency_percent`
|
||||
- `cpu_c6_state_residency_percent`
|
||||
- `cpu_busy_cycles_percent`
|
||||
|
||||
To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration).
|
||||
|
||||
### HW Dependencies:
|
||||
Specific metrics require certain processor features to be present, otherwise Intel PowerStat plugin won't be able to
|
||||
read them. When using Linux Kernel based OS, user can detect supported processor features reading `/proc/cpuinfo` file.
|
||||
Plugin assumes crucial properties are the same for all CPU cores in the system.
|
||||
The following processor properties are examined in more detail in this section:
|
||||
processor _cpu family_, _model_ and _flags_.
|
||||
The following processor properties are required by the plugin:
|
||||
- Processor _cpu family_ must be Intel (0x6) - since data used by the plugin assumes Intel specific
|
||||
model specific registers for all features
|
||||
- The following processor flags shall be present:
|
||||
- "_msr_" shall be present for plugin to read platform data from processor model specific registers and collect
|
||||
the following metrics: _powerstat_core.cpu_temperature_, _powerstat_core.cpu_busy_frequency_,
|
||||
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_, _powerstat_core._cpu_c6_state_residency_
|
||||
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat_core.cpu_busy_frequency_,
|
||||
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_
|
||||
- "_dts_" shall be present to collect _powerstat_core.cpu_temperature_
|
||||
- Processor _Model number_ must be one of the following values for plugin to read _powerstat_core.cpu_c1_state_residency_
|
||||
and _powerstat_core.cpu_c6_state_residency_ metrics:
|
||||
|
||||
| Model number | Processor name |
|
||||
|-----|-------------|
|
||||
| 0x37 | Intel Atom® Bay Trail |
|
||||
| 0x4D | Intel Atom® Avaton |
|
||||
| 0x5C | Intel Atom® Apollo Lake |
|
||||
| 0x5F | Intel Atom® Denverton |
|
||||
| 0x7A | Intel Atom® Goldmont |
|
||||
| 0x4C | Intel Atom® Airmont |
|
||||
| 0x86 | Intel Atom® Jacobsville |
|
||||
| 0x96 | Intel Atom® Elkhart Lake |
|
||||
| 0x9C | Intel Atom® Jasper Lake |
|
||||
| 0x1A | Intel Nehalem-EP |
|
||||
| 0x1E | Intel Nehalem |
|
||||
| 0x1F | Intel Nehalem-G |
|
||||
| 0x2E | Intel Nehalem-EX |
|
||||
| 0x25 | Intel Westmere |
|
||||
| 0x2C | Intel Westmere-EP |
|
||||
| 0x2F | Intel Westmere-EX |
|
||||
| 0x2A | Intel Sandybridge |
|
||||
| 0x2D | Intel Sandybridge-X |
|
||||
| 0x3A | Intel Ivybridge |
|
||||
| 0x3E | Intel Ivybridge-X |
|
||||
| 0x4E | Intel Atom® Silvermont-MID |
|
||||
| 0x5E | Intel Skylake |
|
||||
| 0x55 | Intel Skylake-X |
|
||||
| 0x8E | Intel Kabylake-L |
|
||||
| 0x9E | Intel Kabylake |
|
||||
| 0x6A | Intel Icelake-X |
|
||||
| 0x6C | Intel Icelake-D |
|
||||
| 0x7D | Intel Icelake |
|
||||
| 0x7E | Intel Icelake-L |
|
||||
| 0x9D | Intel Icelake-NNPI |
|
||||
| 0x3C | Intel Haswell |
|
||||
| 0x3F | Intel Haswell-X |
|
||||
| 0x45 | Intel Haswell-L |
|
||||
| 0x46 | Intel Haswell-G |
|
||||
| 0x3D | Intel Broadwell |
|
||||
| 0x47 | Intel Broadwell-G |
|
||||
| 0x4F | Intel Broadwell-X |
|
||||
| 0x56 | Intel Broadwell-D |
|
||||
| 0x66 | Intel Cannonlake-L |
|
||||
| 0x57 | Intel Xeon® PHI Knights Landing |
|
||||
| 0x85 | Intel Xeon® PHI Knights Mill |
|
||||
| 0xA5 | Intel CometLake |
|
||||
| 0xA6 | Intel CometLake-L |
|
||||
| 0x8F | Intel Sapphire Rapids X |
|
||||
| 0x8C | Intel TigerLake-L |
|
||||
| 0x8D | Intel TigerLake |
|
||||
|
||||
### Metrics
|
||||
All metrics collected by Intel PowerStat plugin are collected in fixed intervals.
|
||||
Metrics that reports processor C-state residency or power are calculated over elapsed intervals.
|
||||
When starting to measure metrics, plugin skips first iteration of metrics if they are based on deltas with previous value.
|
||||
|
||||
**The following measurements are supported by Intel PowerStat plugin:**
|
||||
- powerstat_core
|
||||
|
||||
- The following Tags are returned by plugin with powerstat_core measurements:
|
||||
|
||||
| Tag | Description |
|
||||
|-----|-------------|
|
||||
| `package_id` | ID of platform package/socket |
|
||||
| `core_id` | ID of physical processor core |
|
||||
| `cpu_id` | ID of logical processor core |
|
||||
Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key)
|
||||
while core_id and package_id tags are additional topology information.
|
||||
|
||||
- Available metrics for powerstat_core measurement
|
||||
|
||||
| Metric name (field) | Description | Units |
|
||||
|-----|-------------|-----|
|
||||
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
|
||||
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
|
||||
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
|
||||
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
|
||||
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
|
||||
| `cpu_busy_cycles_percent` | CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
|
||||
|
||||
|
||||
|
||||
- powerstat_package
|
||||
|
||||
- The following Tags are returned by plugin with powerstat_package measurements:
|
||||
|
||||
| Tag | Description |
|
||||
|-----|-------------|
|
||||
| `package_id` | ID of platform package/socket |
|
||||
Measurement powerstat_package metrics are collected per processor package - _package_id_ tag indicates which
|
||||
package metric refers to.
|
||||
|
||||
- Available metrics for powerstat_package measurement
|
||||
|
||||
| Metric name (field) | Description | Units |
|
||||
|-----|-------------|-----|
|
||||
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
|
||||
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
|
||||
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
|
||||
|
||||
|
||||
### Example Output:
|
||||
|
||||
```
|
||||
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_cycles_percent=0.8 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000
|
||||
```
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
package intel_powerstat
|
||||
|
||||
type msrData struct {
|
||||
mperf uint64
|
||||
aperf uint64
|
||||
timeStampCounter uint64
|
||||
c3 uint64
|
||||
c6 uint64
|
||||
c7 uint64
|
||||
throttleTemp uint64
|
||||
temp uint64
|
||||
mperfDelta uint64
|
||||
aperfDelta uint64
|
||||
timeStampCounterDelta uint64
|
||||
c3Delta uint64
|
||||
c6Delta uint64
|
||||
c7Delta uint64
|
||||
readDate int64
|
||||
}
|
||||
|
||||
type raplData struct {
|
||||
dramCurrentEnergy float64
|
||||
socketCurrentEnergy float64
|
||||
socketEnergy float64
|
||||
dramEnergy float64
|
||||
readDate int64
|
||||
}
|
||||
|
||||
type cpuInfo struct {
|
||||
physicalID string
|
||||
coreID string
|
||||
cpuID string
|
||||
vendorID string
|
||||
cpuFamily string
|
||||
model string
|
||||
flags string
|
||||
}
|
||||
|
|
@ -0,0 +1,154 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// fileService is responsible for handling operations on files.
|
||||
type fileService interface {
|
||||
getCPUInfoStats() (map[string]*cpuInfo, error)
|
||||
getStringsMatchingPatternOnPath(path string) ([]string, error)
|
||||
readFile(path string) ([]byte, error)
|
||||
readFileToFloat64(reader io.Reader) (float64, int64, error)
|
||||
readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error)
|
||||
}
|
||||
|
||||
type fileServiceImpl struct {
|
||||
}
|
||||
|
||||
// getCPUInfoStats retrieves basic information about CPU from /proc/cpuinfo.
|
||||
func (fs *fileServiceImpl) getCPUInfoStats() (map[string]*cpuInfo, error) {
|
||||
path := "/proc/cpuinfo"
|
||||
cpuInfoFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error while reading %s, err: %v", path, err)
|
||||
}
|
||||
defer cpuInfoFile.Close()
|
||||
|
||||
scanner := bufio.NewScanner(cpuInfoFile)
|
||||
|
||||
processorRegexp := regexp.MustCompile(`^processor\t+:\s([0-9]+)\n*$`)
|
||||
physicalIDRegexp := regexp.MustCompile(`^physical id\t+:\s([0-9]+)\n*$`)
|
||||
coreIDRegexp := regexp.MustCompile(`^core id\t+:\s([0-9]+)\n*$`)
|
||||
vendorIDRegexp := regexp.MustCompile(`^vendor_id\t+:\s([a-zA-Z]+)\n*$`)
|
||||
cpuFamilyRegexp := regexp.MustCompile(`^cpu\sfamily\t+:\s([0-9]+)\n*$`)
|
||||
modelRegexp := regexp.MustCompile(`^model\t+:\s([0-9]+)\n*$`)
|
||||
flagsRegexp := regexp.MustCompile(`^flags\t+:\s(.+)\n*$`)
|
||||
|
||||
stats := make(map[string]*cpuInfo)
|
||||
currentInfo := &cpuInfo{}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
processorRes := processorRegexp.FindStringSubmatch(line)
|
||||
if len(processorRes) > 1 {
|
||||
currentInfo = &cpuInfo{
|
||||
cpuID: processorRes[1],
|
||||
}
|
||||
}
|
||||
|
||||
vendorIDRes := vendorIDRegexp.FindStringSubmatch(line)
|
||||
if len(vendorIDRes) > 1 {
|
||||
currentInfo.vendorID = vendorIDRes[1]
|
||||
}
|
||||
|
||||
physicalIDRes := physicalIDRegexp.FindStringSubmatch(line)
|
||||
if len(physicalIDRes) > 1 {
|
||||
currentInfo.physicalID = physicalIDRes[1]
|
||||
}
|
||||
|
||||
coreIDRes := coreIDRegexp.FindStringSubmatch(line)
|
||||
if len(coreIDRes) > 1 {
|
||||
currentInfo.coreID = coreIDRes[1]
|
||||
}
|
||||
|
||||
cpuFamilyRes := cpuFamilyRegexp.FindStringSubmatch(line)
|
||||
if len(cpuFamilyRes) > 1 {
|
||||
currentInfo.cpuFamily = cpuFamilyRes[1]
|
||||
}
|
||||
|
||||
modelRes := modelRegexp.FindStringSubmatch(line)
|
||||
if len(modelRes) > 1 {
|
||||
currentInfo.model = modelRes[1]
|
||||
}
|
||||
|
||||
flagsRes := flagsRegexp.FindStringSubmatch(line)
|
||||
if len(flagsRes) > 1 {
|
||||
currentInfo.flags = flagsRes[1]
|
||||
|
||||
// Flags is the last value we have to acquire, so currentInfo is added to map.
|
||||
stats[currentInfo.cpuID] = currentInfo
|
||||
}
|
||||
}
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// getStringsMatchingPatternOnPath looks for filenames and directory names on path matching given regexp.
|
||||
// It ignores file system errors such as I/O errors reading directories. The only possible returned error
|
||||
// is ErrBadPattern, when pattern is malformed.
|
||||
func (fs *fileServiceImpl) getStringsMatchingPatternOnPath(path string) ([]string, error) {
|
||||
return filepath.Glob(path)
|
||||
}
|
||||
|
||||
// readFile reads file on path and return string content.
|
||||
func (fs *fileServiceImpl) readFile(path string) ([]byte, error) {
|
||||
out, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return make([]byte, 0), err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// readFileToFloat64 reads file on path and tries to parse content to float64.
|
||||
func (fs *fileServiceImpl) readFileToFloat64(reader io.Reader) (float64, int64, error) {
|
||||
read, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
readDate := time.Now().UnixNano()
|
||||
|
||||
// Remove new line character
|
||||
trimmedString := strings.TrimRight(string(read), "\n")
|
||||
// Parse result to float64
|
||||
parsedValue, err := strconv.ParseFloat(trimmedString, 64)
|
||||
if err != nil {
|
||||
return 0, 0, fmt.Errorf("error parsing string to float for %s", trimmedString)
|
||||
}
|
||||
|
||||
return parsedValue, readDate, nil
|
||||
}
|
||||
|
||||
// readFileAtOffsetToUint64 reads 8 bytes from passed file at given offset.
|
||||
func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) {
|
||||
buffer := make([]byte, 8)
|
||||
|
||||
if offset == 0 {
|
||||
return 0, fmt.Errorf("file offset %d should not be 0", offset)
|
||||
}
|
||||
|
||||
_, err := reader.ReadAt(buffer, offset)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error on reading file at offset %d, err: %v", offset, err)
|
||||
}
|
||||
|
||||
return binary.LittleEndian.Uint64(buffer), nil
|
||||
}
|
||||
|
||||
func newFileService() *fileServiceImpl {
|
||||
return &fileServiceImpl{}
|
||||
}
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
io "io"
|
||||
|
||||
mock "github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// mockFileService is an autogenerated mock type for the fileService type
|
||||
type mockFileService struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// getCPUInfoStats provides a mock function with given fields:
|
||||
func (_m *mockFileService) getCPUInfoStats() (map[string]*cpuInfo, error) {
|
||||
ret := _m.Called()
|
||||
|
||||
var r0 map[string]*cpuInfo
|
||||
if rf, ok := ret.Get(0).(func() map[string]*cpuInfo); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).(map[string]*cpuInfo)
|
||||
}
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func() error); ok {
|
||||
r1 = rf()
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// getStringsMatchingPatternOnPath provides a mock function with given fields: path
|
||||
func (_m *mockFileService) getStringsMatchingPatternOnPath(path string) ([]string, error) {
|
||||
ret := _m.Called(path)
|
||||
|
||||
var r0 []string
|
||||
if rf, ok := ret.Get(0).(func(string) []string); ok {
|
||||
r0 = rf(path)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([]string)
|
||||
}
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(string) error); ok {
|
||||
r1 = rf(path)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// readFile provides a mock function with given fields: path
|
||||
func (_m *mockFileService) readFile(path string) ([]byte, error) {
|
||||
ret := _m.Called(path)
|
||||
|
||||
var r0 []byte
|
||||
if rf, ok := ret.Get(0).(func(string) []byte); ok {
|
||||
r0 = rf(path)
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).([]byte)
|
||||
}
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(string) error); ok {
|
||||
r1 = rf(path)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// readFileAtOffsetToUint64 provides a mock function with given fields: reader, offset
|
||||
func (_m *mockFileService) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) {
|
||||
ret := _m.Called(reader, offset)
|
||||
|
||||
var r0 uint64
|
||||
if rf, ok := ret.Get(0).(func(io.ReaderAt, int64) uint64); ok {
|
||||
r0 = rf(reader, offset)
|
||||
} else {
|
||||
r0 = ret.Get(0).(uint64)
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(io.ReaderAt, int64) error); ok {
|
||||
r1 = rf(reader, offset)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// readFileToFloat64 provides a mock function with given fields: reader
|
||||
func (_m *mockFileService) readFileToFloat64(reader io.Reader) (float64, int64, error) {
|
||||
ret := _m.Called(reader)
|
||||
|
||||
var r0 float64
|
||||
if rf, ok := ret.Get(0).(func(io.Reader) float64); ok {
|
||||
r0 = rf(reader)
|
||||
} else {
|
||||
r0 = ret.Get(0).(float64)
|
||||
}
|
||||
|
||||
var r1 int64
|
||||
if rf, ok := ret.Get(1).(func(io.Reader) int64); ok {
|
||||
r1 = rf(reader)
|
||||
} else {
|
||||
r1 = ret.Get(1).(int64)
|
||||
}
|
||||
|
||||
var r2 error
|
||||
if rf, ok := ret.Get(2).(func(io.Reader) error); ok {
|
||||
r2 = rf(reader)
|
||||
} else {
|
||||
r2 = ret.Error(2)
|
||||
}
|
||||
|
||||
return r0, r1, r2
|
||||
}
|
||||
|
|
@ -0,0 +1,486 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/big"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/plugins/inputs"
|
||||
)
|
||||
|
||||
const (
|
||||
cpuFrequency = "cpu_frequency"
|
||||
cpuBusyFrequency = "cpu_busy_frequency"
|
||||
cpuTemperature = "cpu_temperature"
|
||||
cpuC1StateResidency = "cpu_c1_state_residency"
|
||||
cpuC6StateResidency = "cpu_c6_state_residency"
|
||||
cpuBusyCycles = "cpu_busy_cycles"
|
||||
percentageMultiplier = 100
|
||||
)
|
||||
|
||||
// PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.
|
||||
type PowerStat struct {
|
||||
CPUMetrics []string `toml:"cpu_metrics"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
fs fileService
|
||||
rapl raplService
|
||||
msr msrService
|
||||
|
||||
cpuFrequency bool
|
||||
cpuBusyFrequency bool
|
||||
cpuTemperature bool
|
||||
cpuC1StateResidency bool
|
||||
cpuC6StateResidency bool
|
||||
cpuBusyCycles bool
|
||||
cpuInfo map[string]*cpuInfo
|
||||
skipFirstIteration bool
|
||||
}
|
||||
|
||||
// Description returns a one-sentence description on the plugin.
|
||||
func (p *PowerStat) Description() string {
|
||||
return `Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.`
|
||||
}
|
||||
|
||||
// SampleConfig returns the default configuration of the plugin.
|
||||
func (p *PowerStat) SampleConfig() string {
|
||||
return `
|
||||
## All global metrics are always collected by Intel PowerStat plugin.
|
||||
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
|
||||
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level
|
||||
## telemetry will be exposed by Intel PowerStat plugin.
|
||||
## Supported options:
|
||||
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"
|
||||
# cpu_metrics = []
|
||||
`
|
||||
}
|
||||
|
||||
// Init performs one time setup of the plugin.
|
||||
func (p *PowerStat) Init() error {
|
||||
p.parseCPUMetricsConfig()
|
||||
err := p.verifyProcessor()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Initialize MSR service only when there is at least one core metric enabled.
|
||||
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC1StateResidency ||
|
||||
p.cpuC6StateResidency || p.cpuBusyCycles {
|
||||
p.msr = newMsrServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Gather takes in an accumulator and adds the metrics that the Input gathers.
|
||||
func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
|
||||
p.addGlobalMetrics(acc)
|
||||
|
||||
if p.areCoreMetricsEnabled() {
|
||||
p.addPerCoreMetrics(acc)
|
||||
}
|
||||
|
||||
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations.
|
||||
p.skipFirstIteration = false
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
|
||||
// Prepare RAPL data each gather because there is a possibility to disable rapl kernel module
|
||||
p.rapl.initializeRaplData()
|
||||
|
||||
for socketID := range p.rapl.getRaplData() {
|
||||
err := p.rapl.retrieveAndCalculateData(socketID)
|
||||
if err != nil {
|
||||
// In case of an error skip calculating metrics for this socket
|
||||
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
|
||||
continue
|
||||
}
|
||||
p.addThermalDesignPowerMetric(socketID, acc)
|
||||
if p.skipFirstIteration {
|
||||
continue
|
||||
}
|
||||
p.addCurrentSocketPowerConsumption(socketID, acc)
|
||||
p.addCurrentDramPowerConsumption(socketID, acc)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) addThermalDesignPowerMetric(socketID string, acc telegraf.Accumulator) {
|
||||
maxPower, err := p.rapl.getConstraintMaxPowerWatts(socketID)
|
||||
if err != nil {
|
||||
p.Log.Errorf("error while retrieving TDP of the socket %s, err: %v", socketID, err)
|
||||
return
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"package_id": socketID,
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"thermal_design_power_watts": roundFloatToNearestTwoDecimalPlaces(maxPower),
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_package", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCurrentSocketPowerConsumption(socketID string, acc telegraf.Accumulator) {
|
||||
tags := map[string]string{
|
||||
"package_id": socketID,
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"current_power_consumption_watts": roundFloatToNearestTwoDecimalPlaces(p.rapl.getRaplData()[socketID].socketCurrentEnergy),
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_package", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCurrentDramPowerConsumption(socketID string, acc telegraf.Accumulator) {
|
||||
tags := map[string]string{
|
||||
"package_id": socketID,
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"current_dram_power_consumption_watts": roundFloatToNearestTwoDecimalPlaces(p.rapl.getRaplData()[socketID].dramCurrentEnergy),
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_package", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addPerCoreMetrics(acc telegraf.Accumulator) {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(p.msr.getCPUCoresData()))
|
||||
|
||||
for cpuID := range p.msr.getCPUCoresData() {
|
||||
go p.addMetricsForSingleCore(cpuID, acc, &wg)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulator, wg *sync.WaitGroup) {
|
||||
defer wg.Done()
|
||||
|
||||
if p.cpuFrequency {
|
||||
p.addCPUFrequencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
// Read data from MSR only if required
|
||||
if p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature ||
|
||||
p.cpuBusyFrequency {
|
||||
err := p.msr.openAndReadMsr(cpuID)
|
||||
if err != nil {
|
||||
// In case of an error exit the function. All metrics past this point are dependant on MSR.
|
||||
p.Log.Debugf("error while reading msr: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if p.cpuTemperature {
|
||||
p.addCPUTemperatureMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle.
|
||||
if p.cpuBusyFrequency {
|
||||
p.addCPUBusyFrequencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
if !p.skipFirstIteration {
|
||||
if p.cpuC1StateResidency {
|
||||
p.addCPUC1StateResidencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
if p.cpuC6StateResidency {
|
||||
p.addCPUC6StateResidencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
if p.cpuBusyCycles {
|
||||
p.addCPUBusyCyclesMetric(cpuID, acc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
frequency, err := p.msr.retrieveCPUFrequencyForCore(cpuID)
|
||||
|
||||
// In case of an error leave func
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
"package_id": cpu.physicalID,
|
||||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
|
||||
fields := map[string]interface{}{
|
||||
"cpu_frequency_mhz": roundFloatToNearestTwoDecimalPlaces(frequency),
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUTemperatureMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
temp := coresData[cpuID].throttleTemp - coresData[cpuID].temp
|
||||
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
"package_id": cpu.physicalID,
|
||||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_temperature_celsius": temp,
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
mperfDelta := coresData[cpuID].mperfDelta
|
||||
// Avoid division by 0
|
||||
if mperfDelta == 0 {
|
||||
p.Log.Errorf("mperf delta should not equal 0 on core %s", cpuID)
|
||||
return
|
||||
}
|
||||
aperfMperf := float64(coresData[cpuID].aperfDelta) / float64(mperfDelta)
|
||||
tsc := convertProcessorCyclesToHertz(coresData[cpuID].timeStampCounterDelta)
|
||||
timeNow := time.Now().UnixNano()
|
||||
interval := convertNanoSecondsToSeconds(timeNow - coresData[cpuID].readDate)
|
||||
coresData[cpuID].readDate = timeNow
|
||||
|
||||
if p.skipFirstIteration {
|
||||
return
|
||||
}
|
||||
|
||||
if interval == 0 {
|
||||
p.Log.Errorf("interval between last two Telegraf cycles is 0")
|
||||
return
|
||||
}
|
||||
|
||||
busyMhzValue := roundFloatToNearestTwoDecimalPlaces(tsc * aperfMperf / interval)
|
||||
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
"package_id": cpu.physicalID,
|
||||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_busy_frequency_mhz": busyMhzValue,
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUC1StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
timestampDeltaBig := new(big.Int).SetUint64(coresData[cpuID].timeStampCounterDelta)
|
||||
// Avoid division by 0
|
||||
if timestampDeltaBig.Sign() < 1 {
|
||||
p.Log.Errorf("timestamp delta value %v should not be lower than 1", timestampDeltaBig)
|
||||
return
|
||||
}
|
||||
|
||||
// Since counter collection is not atomic it may happen that sum of C0, C1, C3, C6 and C7
|
||||
// is bigger value than TSC, in such case C1 residency shall be set to 0.
|
||||
// Operating on big.Int to avoid overflow
|
||||
mperfDeltaBig := new(big.Int).SetUint64(coresData[cpuID].mperfDelta)
|
||||
c3DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c3Delta)
|
||||
c6DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c6Delta)
|
||||
c7DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c7Delta)
|
||||
|
||||
c1Big := new(big.Int).Sub(timestampDeltaBig, mperfDeltaBig)
|
||||
c1Big.Sub(c1Big, c3DeltaBig)
|
||||
c1Big.Sub(c1Big, c6DeltaBig)
|
||||
c1Big.Sub(c1Big, c7DeltaBig)
|
||||
|
||||
if c1Big.Sign() < 0 {
|
||||
c1Big = c1Big.SetInt64(0)
|
||||
}
|
||||
c1Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1Big.Uint64()) / float64(timestampDeltaBig.Uint64()))
|
||||
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
"package_id": cpu.physicalID,
|
||||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_c1_state_residency_percent": c1Value,
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUC6StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
// Avoid division by 0
|
||||
if coresData[cpuID].timeStampCounterDelta == 0 {
|
||||
p.Log.Errorf("timestamp counter on offset %s should not equal 0 on cpuID %s",
|
||||
timestampCounterLocation, cpuID)
|
||||
return
|
||||
}
|
||||
c6Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
|
||||
float64(coresData[cpuID].c6Delta) / float64(coresData[cpuID].timeStampCounterDelta))
|
||||
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
"package_id": cpu.physicalID,
|
||||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_c6_state_residency_percent": c6Value,
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
// Avoid division by 0
|
||||
if coresData[cpuID].timeStampCounterDelta == 0 {
|
||||
p.Log.Errorf("timestamp counter on offset %s should not equal 0 on cpuID %s",
|
||||
timestampCounterLocation, cpuID)
|
||||
return
|
||||
}
|
||||
busyCyclesValue := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
|
||||
float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta))
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
"package_id": cpu.physicalID,
|
||||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_busy_cycles_percent": busyCyclesValue,
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) parseCPUMetricsConfig() {
|
||||
if len(p.CPUMetrics) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuFrequency) {
|
||||
p.cpuFrequency = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuC1StateResidency) {
|
||||
p.cpuC1StateResidency = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuC6StateResidency) {
|
||||
p.cpuC6StateResidency = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuBusyCycles) {
|
||||
p.cpuBusyCycles = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuBusyFrequency) {
|
||||
p.cpuBusyFrequency = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuTemperature) {
|
||||
p.cpuTemperature = true
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) verifyProcessor() error {
|
||||
allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C,
|
||||
0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E,
|
||||
0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56,
|
||||
0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8F, 0x8C, 0x8D}
|
||||
stats, err := p.fs.getCPUInfoStats()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
p.cpuInfo = stats
|
||||
|
||||
// First CPU is sufficient for verification.
|
||||
firstCPU := p.cpuInfo["0"]
|
||||
if firstCPU == nil {
|
||||
return fmt.Errorf("first core not found while parsing /proc/cpuinfo")
|
||||
}
|
||||
|
||||
if firstCPU.vendorID != "GenuineIntel" || firstCPU.cpuFamily != "6" {
|
||||
return fmt.Errorf("Intel processor not found, vendorId: %s", firstCPU.vendorID)
|
||||
}
|
||||
|
||||
if !contains(convertIntegerArrayToStringArray(allowedProcessorModelsForC1C6), firstCPU.model) {
|
||||
p.cpuC1StateResidency = false
|
||||
p.cpuC6StateResidency = false
|
||||
}
|
||||
|
||||
if !strings.Contains(firstCPU.flags, "msr") {
|
||||
p.cpuTemperature = false
|
||||
p.cpuC6StateResidency = false
|
||||
p.cpuBusyCycles = false
|
||||
p.cpuBusyFrequency = false
|
||||
p.cpuC1StateResidency = false
|
||||
}
|
||||
|
||||
if !strings.Contains(firstCPU.flags, "aperfmperf") {
|
||||
p.cpuBusyFrequency = false
|
||||
p.cpuBusyCycles = false
|
||||
p.cpuC1StateResidency = false
|
||||
}
|
||||
|
||||
if !strings.Contains(firstCPU.flags, "dts") {
|
||||
p.cpuTemperature = false
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func contains(slice []string, str string) bool {
|
||||
for _, v := range slice {
|
||||
if v == str {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (p *PowerStat) areCoreMetricsEnabled() bool {
|
||||
return p.msr != nil && len(p.msr.getCPUCoresData()) > 0
|
||||
}
|
||||
|
||||
// newPowerStat creates and returns PowerStat struct.
|
||||
func newPowerStat(fs fileService) *PowerStat {
|
||||
p := &PowerStat{
|
||||
cpuFrequency: false,
|
||||
cpuC1StateResidency: false,
|
||||
cpuC6StateResidency: false,
|
||||
cpuBusyCycles: false,
|
||||
cpuTemperature: false,
|
||||
cpuBusyFrequency: false,
|
||||
skipFirstIteration: true,
|
||||
fs: fs,
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
func init() {
|
||||
inputs.Add("intel_powerstat", func() telegraf.Input {
|
||||
return newPowerStat(newFileService())
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
// +build !linux
|
||||
|
||||
package intel_powerstat
|
||||
|
|
@ -0,0 +1,494 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestInitPlugin(t *testing.T) {
|
||||
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
|
||||
power, fsMock, _, _ := getPowerWithMockedServices()
|
||||
|
||||
fsMock.On("getCPUInfoStats", mock.Anything).
|
||||
Return(nil, errors.New("error getting cpu stats")).Once()
|
||||
require.Error(t, power.Init())
|
||||
|
||||
fsMock.On("getCPUInfoStats", mock.Anything).
|
||||
Return(make(map[string]*cpuInfo), nil).Once()
|
||||
require.Error(t, power.Init())
|
||||
|
||||
fsMock.On("getCPUInfoStats", mock.Anything).
|
||||
Return(map[string]*cpuInfo{"0": {
|
||||
vendorID: "GenuineIntel",
|
||||
cpuFamily: "test",
|
||||
}}, nil).Once()
|
||||
require.Error(t, power.Init())
|
||||
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(cores, nil).Once().
|
||||
On("getCPUInfoStats", mock.Anything).
|
||||
Return(map[string]*cpuInfo{"0": {
|
||||
vendorID: "GenuineIntel",
|
||||
cpuFamily: "6",
|
||||
}}, nil)
|
||||
// Verify MSR service initialization.
|
||||
power.cpuFrequency = true
|
||||
require.NoError(t, power.Init())
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, len(cores), len(power.msr.getCPUCoresData()))
|
||||
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once()
|
||||
|
||||
// In case of an error when fetching cpu cores plugin should proceed with execution.
|
||||
require.NoError(t, power.Init())
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, 0, len(power.msr.getCPUCoresData()))
|
||||
}
|
||||
|
||||
func TestParseCPUMetricsConfig(t *testing.T) {
|
||||
power, _, _, _ := getPowerWithMockedServices()
|
||||
disableCoreMetrics(power)
|
||||
|
||||
power.CPUMetrics = []string{
|
||||
"cpu_frequency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
|
||||
"cpu_busy_frequency",
|
||||
}
|
||||
power.parseCPUMetricsConfig()
|
||||
verifyCoreMetrics(t, power, true)
|
||||
disableCoreMetrics(power)
|
||||
verifyCoreMetrics(t, power, false)
|
||||
|
||||
power.CPUMetrics = []string{}
|
||||
power.parseCPUMetricsConfig()
|
||||
|
||||
power.CPUMetrics = []string{"cpu_c6_state_residency", "#@$sdkjdfsdf3@", "1pu_c1_state_residency"}
|
||||
power.parseCPUMetricsConfig()
|
||||
require.Equal(t, false, power.cpuC1StateResidency)
|
||||
require.Equal(t, true, power.cpuC6StateResidency)
|
||||
disableCoreMetrics(power)
|
||||
verifyCoreMetrics(t, power, false)
|
||||
|
||||
power.CPUMetrics = []string{"#@$sdkjdfsdf3@", "1pu_c1_state_residency", "123"}
|
||||
power.parseCPUMetricsConfig()
|
||||
verifyCoreMetrics(t, power, false)
|
||||
}
|
||||
|
||||
func verifyCoreMetrics(t *testing.T, power *PowerStat, enabled bool) {
|
||||
require.Equal(t, enabled, power.cpuFrequency)
|
||||
require.Equal(t, enabled, power.cpuC1StateResidency)
|
||||
require.Equal(t, enabled, power.cpuC6StateResidency)
|
||||
require.Equal(t, enabled, power.cpuBusyCycles)
|
||||
require.Equal(t, enabled, power.cpuBusyFrequency)
|
||||
require.Equal(t, enabled, power.cpuTemperature)
|
||||
}
|
||||
|
||||
func TestGather(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
packageIDs := []string{"0", "1"}
|
||||
coreIDs := []string{"0", "1", "2", "3"}
|
||||
socketCurrentEnergy := 13213852.2
|
||||
dramCurrentEnergy := 784552.0
|
||||
preparedCPUData := getPreparedCPUData(coreIDs)
|
||||
raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy)
|
||||
|
||||
power, _, raplMock, msrMock := getPowerWithMockedServices()
|
||||
prepareCPUInfo(power, coreIDs, packageIDs)
|
||||
enableCoreMetrics(power)
|
||||
power.skipFirstIteration = false
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).
|
||||
On("getRaplData").Return(raplDataMap).
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil)
|
||||
msrMock.On("getCPUCoresData").Return(preparedCPUData).
|
||||
On("openAndReadMsr", mock.Anything).Return(nil).
|
||||
On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil)
|
||||
|
||||
require.NoError(t, power.Gather(&acc))
|
||||
// Number of global metrics : 3
|
||||
// Number of per core metrics : 6
|
||||
require.Equal(t, 3*len(packageIDs)+6*len(coreIDs), len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddGlobalMetricsNegative(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
socketCurrentEnergy := 13213852.2
|
||||
dramCurrentEnergy := 784552.0
|
||||
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
|
||||
power, _, raplMock, _ := getPowerWithMockedServices()
|
||||
power.skipFirstIteration = false
|
||||
raplMock.On("initializeRaplData", mock.Anything).Once().
|
||||
On("getRaplData").Return(raplDataMap).Once().
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap))
|
||||
|
||||
power.addGlobalMetrics(&acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
raplMock.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).Once().
|
||||
On("getRaplData").Return(make(map[string]*raplData)).Once()
|
||||
|
||||
power.addGlobalMetrics(&acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
raplMock.AssertNotCalled(t, "retrieveAndCalculateData")
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).Once().
|
||||
On("getRaplData").Return(raplDataMap).
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(nil).Once().
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once().
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(12313851.5, nil).Twice()
|
||||
|
||||
power.addGlobalMetrics(&acc)
|
||||
require.Equal(t, 3, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddGlobalMetricsPositive(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
socketCurrentEnergy := 3644574.4
|
||||
dramCurrentEnergy := 124234872.5
|
||||
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
|
||||
maxPower := 546783852.9
|
||||
power, _, raplMock, _ := getPowerWithMockedServices()
|
||||
power.skipFirstIteration = false
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).
|
||||
On("getRaplData").Return(raplDataMap).
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice().
|
||||
On("getCurrentDramPowerConsumption", mock.Anything).Return(dramCurrentEnergy)
|
||||
|
||||
power.addGlobalMetrics(&acc)
|
||||
require.Equal(t, 6, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedResults := getGlobalMetrics(maxPower, socketCurrentEnergy, dramCurrentEnergy)
|
||||
for _, test := range expectedResults {
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_package", test.fields, test.tags)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddMetricsForSingleCoreNegative(t *testing.T) {
|
||||
var wg sync.WaitGroup
|
||||
var acc testutil.Accumulator
|
||||
core := "0"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
|
||||
msrMock.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
|
||||
|
||||
// Skip generating metric for CPU frequency.
|
||||
power.cpuFrequency = false
|
||||
|
||||
wg.Add(1)
|
||||
power.addMetricsForSingleCore(core, &acc, &wg)
|
||||
wg.Wait()
|
||||
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddCPUFrequencyMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
frequency := 1200000.2
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
|
||||
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).
|
||||
Return(float64(0), errors.New("error on reading file")).Once()
|
||||
|
||||
power.addCPUFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
|
||||
|
||||
power.addCPUFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedFrequency := roundFloatToNearestTwoDecimalPlaces(frequency)
|
||||
expectedMetric := getPowerCoreMetric("cpu_frequency_mhz", expectedFrequency, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
}
|
||||
|
||||
func TestAddCoreCPUTemperatureMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Once()
|
||||
power.addCPUTemperatureMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedMetric := getPowerCoreMetric("cpu_temperature_celsius", expectedTemp, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
}
|
||||
|
||||
func TestAddC6StateResidencyMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
|
||||
float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta))
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.addCPUC6StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedMetric := getPowerCoreMetric("cpu_c6_state_residency_percent", expectedC6, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
|
||||
acc.ClearMetrics()
|
||||
preparedData[cpuID].timeStampCounterDelta = 0
|
||||
|
||||
power.addCPUC6StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddProcessorBusyCyclesMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) /
|
||||
float64(preparedData[cpuID].timeStampCounterDelta))
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.addCPUBusyCyclesMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedMetric := getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
|
||||
acc.ClearMetrics()
|
||||
preparedData[cpuID].timeStampCounterDelta = 0
|
||||
power.addCPUBusyCyclesMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddProcessorBusyFrequencyMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
power.skipFirstIteration = false
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.addCPUBusyFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
acc.ClearMetrics()
|
||||
preparedData[cpuID].mperfDelta = 0
|
||||
power.addCPUBusyFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddC1StateResidencyMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta -
|
||||
preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta
|
||||
expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta))
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
|
||||
power.addCPUC1StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedMetric := getPowerCoreMetric("cpu_c1_state_residency_percent", expectedC1, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
|
||||
acc.ClearMetrics()
|
||||
preparedData[cpuID].timeStampCounterDelta = 0
|
||||
power.addCPUC1StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddThermalDesignPowerMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
sockets := []string{"0"}
|
||||
maxPower := 195720672.1
|
||||
power, _, raplMock, _ := getPowerWithMockedServices()
|
||||
|
||||
raplMock.On("getConstraintMaxPowerWatts", mock.Anything).
|
||||
Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once().
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once()
|
||||
|
||||
power.addThermalDesignPowerMetric(sockets[0], &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
power.addThermalDesignPowerMetric(sockets[0], &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedTDP := roundFloatToNearestTwoDecimalPlaces(maxPower)
|
||||
expectedMetric := getPowerGlobalMetric("thermal_design_power_watts", expectedTDP, sockets[0])
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
|
||||
}
|
||||
|
||||
func getPreparedCPUData(cores []string) map[string]*msrData {
|
||||
msrDataMap := make(map[string]*msrData)
|
||||
|
||||
for _, core := range cores {
|
||||
msrDataMap[core] = &msrData{
|
||||
mperf: 43079,
|
||||
aperf: 82001,
|
||||
timeStampCounter: 15514,
|
||||
c3: 52829,
|
||||
c6: 86930,
|
||||
c7: 25340,
|
||||
throttleTemp: 88150,
|
||||
temp: 40827,
|
||||
mperfDelta: 23515,
|
||||
aperfDelta: 33866,
|
||||
timeStampCounterDelta: 13686000,
|
||||
c3Delta: 20003,
|
||||
c6Delta: 44518,
|
||||
c7Delta: 20979,
|
||||
}
|
||||
}
|
||||
|
||||
return msrDataMap
|
||||
}
|
||||
|
||||
func getGlobalMetrics(maxPower float64, socketCurrentEnergy float64, dramCurrentEnergy float64) []struct {
|
||||
fields map[string]interface{}
|
||||
tags map[string]string
|
||||
} {
|
||||
return []struct {
|
||||
fields map[string]interface{}
|
||||
tags map[string]string
|
||||
}{
|
||||
getPowerGlobalMetric("thermal_design_power_watts", roundFloatToNearestTwoDecimalPlaces(maxPower), "0"),
|
||||
getPowerGlobalMetric("thermal_design_power_watts", roundFloatToNearestTwoDecimalPlaces(maxPower), "1"),
|
||||
getPowerGlobalMetric("current_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(socketCurrentEnergy), "0"),
|
||||
getPowerGlobalMetric("current_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(socketCurrentEnergy), "1"),
|
||||
getPowerGlobalMetric("current_dram_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(dramCurrentEnergy), "0"),
|
||||
getPowerGlobalMetric("current_dram_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(dramCurrentEnergy), "1"),
|
||||
}
|
||||
}
|
||||
|
||||
func getPowerCoreMetric(name string, value interface{}, coreID string, packageID string, cpuID string) struct {
|
||||
fields map[string]interface{}
|
||||
tags map[string]string
|
||||
} {
|
||||
return getPowerMetric(name, value, map[string]string{"package_id": packageID, "core_id": coreID, "cpu_id": cpuID})
|
||||
}
|
||||
|
||||
func getPowerGlobalMetric(name string, value interface{}, socketID string) struct {
|
||||
fields map[string]interface{}
|
||||
tags map[string]string
|
||||
} {
|
||||
return getPowerMetric(name, value, map[string]string{"package_id": socketID})
|
||||
}
|
||||
|
||||
func getPowerMetric(name string, value interface{}, tags map[string]string) struct {
|
||||
fields map[string]interface{}
|
||||
tags map[string]string
|
||||
} {
|
||||
return struct {
|
||||
fields map[string]interface{}
|
||||
tags map[string]string
|
||||
}{
|
||||
map[string]interface{}{
|
||||
name: value,
|
||||
},
|
||||
tags,
|
||||
}
|
||||
}
|
||||
|
||||
func prepareCPUInfoForSingleCPU(power *PowerStat, cpuID string, coreID string, packageID string) {
|
||||
power.cpuInfo = make(map[string]*cpuInfo)
|
||||
power.cpuInfo[cpuID] = &cpuInfo{
|
||||
physicalID: packageID,
|
||||
coreID: coreID,
|
||||
cpuID: cpuID,
|
||||
}
|
||||
}
|
||||
|
||||
func prepareCPUInfo(power *PowerStat, coreIDs []string, packageIDs []string) {
|
||||
power.cpuInfo = make(map[string]*cpuInfo)
|
||||
currentCPU := 0
|
||||
for _, packageID := range packageIDs {
|
||||
for _, coreID := range coreIDs {
|
||||
cpuID := strconv.Itoa(currentCPU)
|
||||
power.cpuInfo[cpuID] = &cpuInfo{
|
||||
physicalID: packageID,
|
||||
cpuID: cpuID,
|
||||
coreID: coreID,
|
||||
}
|
||||
currentCPU++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func enableCoreMetrics(power *PowerStat) {
|
||||
power.cpuC1StateResidency = true
|
||||
power.cpuC6StateResidency = true
|
||||
power.cpuTemperature = true
|
||||
power.cpuBusyFrequency = true
|
||||
power.cpuFrequency = true
|
||||
power.cpuBusyCycles = true
|
||||
}
|
||||
|
||||
func disableCoreMetrics(power *PowerStat) {
|
||||
power.cpuC1StateResidency = false
|
||||
power.cpuC6StateResidency = false
|
||||
power.cpuTemperature = false
|
||||
power.cpuBusyFrequency = false
|
||||
power.cpuFrequency = false
|
||||
power.cpuBusyCycles = false
|
||||
}
|
||||
|
||||
func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData {
|
||||
raplDataMap := make(map[string]*raplData, len(socketIDs))
|
||||
for _, socketID := range socketIDs {
|
||||
raplDataMap[socketID] = &raplData{
|
||||
socketCurrentEnergy: socketCurrentEnergy,
|
||||
dramCurrentEnergy: dramCurrentEnergy,
|
||||
}
|
||||
}
|
||||
|
||||
return raplDataMap
|
||||
}
|
||||
|
||||
func getPowerWithMockedServices() (*PowerStat, *mockFileService, *mockRaplService, *mockMsrService) {
|
||||
fsMock := &mockFileService{}
|
||||
msrMock := &mockMsrService{}
|
||||
raplMock := &mockRaplService{}
|
||||
logger := testutil.Logger{Name: "PowerPluginTest"}
|
||||
p := newPowerStat(fsMock)
|
||||
p.Log = logger
|
||||
p.fs = fsMock
|
||||
p.rapl = raplMock
|
||||
p.msr = msrMock
|
||||
|
||||
return p, fsMock, raplMock, msrMock
|
||||
}
|
||||
|
|
@ -0,0 +1,207 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
)
|
||||
|
||||
const (
|
||||
systemCPUPath = "/sys/devices/system/cpu/"
|
||||
cpuCurrentFreqPartialPath = "/sys/devices/system/cpu/cpu%s/cpufreq/scaling_cur_freq"
|
||||
msrPartialPath = "/dev/cpu/%s/msr"
|
||||
c3StateResidencyLocation = 0x3FC
|
||||
c6StateResidencyLocation = 0x3FD
|
||||
c7StateResidencyLocation = 0x3FE
|
||||
maximumFrequencyClockCountLocation = 0xE7
|
||||
actualFrequencyClockCountLocation = 0xE8
|
||||
throttleTemperatureLocation = 0x1A2
|
||||
temperatureLocation = 0x19C
|
||||
timestampCounterLocation = 0x10
|
||||
)
|
||||
|
||||
// msrService is responsible for interactions with MSR.
|
||||
type msrService interface {
|
||||
getCPUCoresData() map[string]*msrData
|
||||
retrieveCPUFrequencyForCore(core string) (float64, error)
|
||||
openAndReadMsr(core string) error
|
||||
}
|
||||
|
||||
type msrServiceImpl struct {
|
||||
cpuCoresData map[string]*msrData
|
||||
msrOffsets []int64
|
||||
fs fileService
|
||||
log telegraf.Logger
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
|
||||
return m.cpuCoresData
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) {
|
||||
cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core)
|
||||
cpuFreqFile, err := os.Open(cpuFreqPath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening scaling_cur_freq file on path %s, err: %v", cpuFreqPath, err)
|
||||
}
|
||||
defer cpuFreqFile.Close()
|
||||
|
||||
cpuFreq, _, err := m.fs.readFileToFloat64(cpuFreqFile)
|
||||
return convertKiloHertzToMegaHertz(cpuFreq), err
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) openAndReadMsr(core string) error {
|
||||
path := fmt.Sprintf(msrPartialPath, core)
|
||||
msrFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
|
||||
}
|
||||
defer msrFile.Close()
|
||||
|
||||
err = m.readDataFromMsr(core, msrFile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading data from MSR for core %s, err: %v", core, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error {
|
||||
g, ctx := errgroup.WithContext(context.Background())
|
||||
|
||||
// Create and populate a map that contains msr offsets along with their respective channels
|
||||
msrOffsetsWithChannels := make(map[int64]chan uint64)
|
||||
for _, offset := range m.msrOffsets {
|
||||
msrOffsetsWithChannels[offset] = make(chan uint64)
|
||||
}
|
||||
|
||||
// Start a goroutine for each msr offset
|
||||
for offset, channel := range msrOffsetsWithChannels {
|
||||
// Wrap around function to avoid race on loop counter
|
||||
func(off int64, ch chan uint64) {
|
||||
g.Go(func() error {
|
||||
defer close(ch)
|
||||
|
||||
err := m.readValueFromFileAtOffset(ctx, ch, reader, off)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading MSR file, err: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}(offset, channel)
|
||||
}
|
||||
|
||||
newC3 := <-msrOffsetsWithChannels[c3StateResidencyLocation]
|
||||
newC6 := <-msrOffsetsWithChannels[c6StateResidencyLocation]
|
||||
newC7 := <-msrOffsetsWithChannels[c7StateResidencyLocation]
|
||||
newMperf := <-msrOffsetsWithChannels[maximumFrequencyClockCountLocation]
|
||||
newAperf := <-msrOffsetsWithChannels[actualFrequencyClockCountLocation]
|
||||
newTsc := <-msrOffsetsWithChannels[timestampCounterLocation]
|
||||
newThrottleTemp := <-msrOffsetsWithChannels[throttleTemperatureLocation]
|
||||
newTemp := <-msrOffsetsWithChannels[temperatureLocation]
|
||||
|
||||
if err := g.Wait(); err != nil {
|
||||
return fmt.Errorf("received error during reading MSR values in goroutines: %v", err)
|
||||
}
|
||||
|
||||
m.cpuCoresData[core].c3Delta = newC3 - m.cpuCoresData[core].c3
|
||||
m.cpuCoresData[core].c6Delta = newC6 - m.cpuCoresData[core].c6
|
||||
m.cpuCoresData[core].c7Delta = newC7 - m.cpuCoresData[core].c7
|
||||
m.cpuCoresData[core].mperfDelta = newMperf - m.cpuCoresData[core].mperf
|
||||
m.cpuCoresData[core].aperfDelta = newAperf - m.cpuCoresData[core].aperf
|
||||
m.cpuCoresData[core].timeStampCounterDelta = newTsc - m.cpuCoresData[core].timeStampCounter
|
||||
|
||||
m.cpuCoresData[core].c3 = newC3
|
||||
m.cpuCoresData[core].c6 = newC6
|
||||
m.cpuCoresData[core].c7 = newC7
|
||||
m.cpuCoresData[core].mperf = newMperf
|
||||
m.cpuCoresData[core].aperf = newAperf
|
||||
m.cpuCoresData[core].timeStampCounter = newTsc
|
||||
// MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16.
|
||||
m.cpuCoresData[core].throttleTemp = (newThrottleTemp >> 16) & 0xFF
|
||||
// MSR (19Ch) IA32_THERM_STATUS bits 22:16.
|
||||
m.cpuCoresData[core].temp = (newTemp >> 16) & 0x7F
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) readValueFromFileAtOffset(ctx context.Context, ch chan uint64, reader io.ReaderAt, offset int64) error {
|
||||
value, err := m.fs.readFileAtOffsetToUint64(reader, offset)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Detect context cancellation and return an error if other goroutine fails
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case ch <- value:
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setCPUCores initialize cpuCoresData map.
|
||||
func (m *msrServiceImpl) setCPUCores() error {
|
||||
m.cpuCoresData = make(map[string]*msrData)
|
||||
cpuPrefix := "cpu"
|
||||
cpuCore := fmt.Sprintf("%s%s", cpuPrefix, "[0-9]*")
|
||||
cpuCorePattern := fmt.Sprintf("%s/%s", systemCPUPath, cpuCore)
|
||||
cpuPaths, err := m.fs.getStringsMatchingPatternOnPath(cpuCorePattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(cpuPaths) == 0 {
|
||||
m.log.Debugf("CPU core data wasn't found using pattern: %s", cpuCorePattern)
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, cpuPath := range cpuPaths {
|
||||
core := strings.TrimPrefix(filepath.Base(cpuPath), cpuPrefix)
|
||||
m.cpuCoresData[core] = &msrData{
|
||||
mperf: 0,
|
||||
aperf: 0,
|
||||
timeStampCounter: 0,
|
||||
c3: 0,
|
||||
c6: 0,
|
||||
c7: 0,
|
||||
throttleTemp: 0,
|
||||
temp: 0,
|
||||
mperfDelta: 0,
|
||||
aperfDelta: 0,
|
||||
timeStampCounterDelta: 0,
|
||||
c3Delta: 0,
|
||||
c6Delta: 0,
|
||||
c7Delta: 0,
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func newMsrServiceWithFs(logger telegraf.Logger, fs fileService) *msrServiceImpl {
|
||||
msrService := &msrServiceImpl{
|
||||
fs: fs,
|
||||
log: logger,
|
||||
}
|
||||
err := msrService.setCPUCores()
|
||||
if err != nil {
|
||||
// This error does not prevent plugin from working thus it is not returned.
|
||||
msrService.log.Error(err)
|
||||
}
|
||||
|
||||
msrService.msrOffsets = []int64{c3StateResidencyLocation, c6StateResidencyLocation, c7StateResidencyLocation,
|
||||
maximumFrequencyClockCountLocation, actualFrequencyClockCountLocation, timestampCounterLocation,
|
||||
throttleTemperatureLocation, temperatureLocation}
|
||||
|
||||
return msrService
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import mock "github.com/stretchr/testify/mock"
|
||||
|
||||
// mockMsrService is an autogenerated mock type for the msrService type
|
||||
type mockMsrService struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// getCPUCoresData provides a mock function with given fields:
|
||||
func (_m *mockMsrService) getCPUCoresData() map[string]*msrData {
|
||||
ret := _m.Called()
|
||||
|
||||
var r0 map[string]*msrData
|
||||
if rf, ok := ret.Get(0).(func() map[string]*msrData); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).(map[string]*msrData)
|
||||
}
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// openAndReadMsr provides a mock function with given fields: core
|
||||
func (_m *mockMsrService) openAndReadMsr(core string) error {
|
||||
ret := _m.Called(core)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(string) error); ok {
|
||||
r0 = rf(core)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// retrieveCPUFrequencyForCore provides a mock function with given fields: core
|
||||
func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) {
|
||||
ret := _m.Called(core)
|
||||
|
||||
var r0 float64
|
||||
if rf, ok := ret.Get(0).(func(string) float64); ok {
|
||||
r0 = rf(core)
|
||||
} else {
|
||||
r0 = ret.Get(0).(float64)
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(string) error); ok {
|
||||
r1 = rf(core)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
|
@ -0,0 +1,134 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestReadDataFromMsrPositive(t *testing.T) {
|
||||
firstValue := uint64(1000000)
|
||||
secondValue := uint64(5000000)
|
||||
delta := secondValue - firstValue
|
||||
cpuCores := []string{"cpu0", "cpu1"}
|
||||
msr, fsMock := getMsrServiceWithMockedFs()
|
||||
prepareTestData(fsMock, cpuCores, msr, t)
|
||||
cores := trimCPUFromCores(cpuCores)
|
||||
|
||||
methodCallNumberForFirstValue := len(msr.msrOffsets) * len(cores)
|
||||
methodCallNumberForSecondValue := methodCallNumberForFirstValue * 2
|
||||
|
||||
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
|
||||
Return(firstValue, nil).Times(methodCallNumberForFirstValue)
|
||||
for _, core := range cores {
|
||||
require.NoError(t, msr.readDataFromMsr(core, nil))
|
||||
}
|
||||
fsMock.AssertNumberOfCalls(t, "readFileAtOffsetToUint64", methodCallNumberForFirstValue)
|
||||
verifyCPUCoresData(cores, t, msr, firstValue, false, 0)
|
||||
|
||||
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
|
||||
Return(secondValue, nil).Times(methodCallNumberForFirstValue)
|
||||
for _, core := range cores {
|
||||
require.NoError(t, msr.readDataFromMsr(core, nil))
|
||||
}
|
||||
fsMock.AssertNumberOfCalls(t, "readFileAtOffsetToUint64", methodCallNumberForSecondValue)
|
||||
verifyCPUCoresData(cores, t, msr, secondValue, true, delta)
|
||||
}
|
||||
|
||||
func trimCPUFromCores(cpuCores []string) []string {
|
||||
cores := make([]string, 0)
|
||||
for _, core := range cpuCores {
|
||||
cores = append(cores, strings.TrimPrefix(core, "cpu"))
|
||||
}
|
||||
return cores
|
||||
}
|
||||
|
||||
func TestReadDataFromMsrNegative(t *testing.T) {
|
||||
firstValue := uint64(1000000)
|
||||
cpuCores := []string{"cpu0", "cpu1"}
|
||||
msr, fsMock := getMsrServiceWithMockedFs()
|
||||
|
||||
prepareTestData(fsMock, cpuCores, msr, t)
|
||||
cores := trimCPUFromCores(cpuCores)
|
||||
|
||||
methodCallNumberPerCore := len(msr.msrOffsets)
|
||||
|
||||
// Normal execution for first core.
|
||||
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
|
||||
Return(firstValue, nil).Times(methodCallNumberPerCore).
|
||||
// Fail to read file for second core.
|
||||
On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
|
||||
Return(uint64(0), errors.New("error reading file")).Times(methodCallNumberPerCore)
|
||||
|
||||
require.NoError(t, msr.readDataFromMsr(cores[0], nil))
|
||||
require.Error(t, msr.readDataFromMsr(cores[1], nil))
|
||||
}
|
||||
|
||||
func TestReadValueFromFileAtOffset(t *testing.T) {
|
||||
cores := []string{"cpu0", "cpu1"}
|
||||
msr, fsMock := getMsrServiceWithMockedFs()
|
||||
ctx := context.Background()
|
||||
testChannel := make(chan uint64, 1)
|
||||
defer close(testChannel)
|
||||
zero := uint64(0)
|
||||
|
||||
prepareTestData(fsMock, cores, msr, t)
|
||||
|
||||
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
|
||||
Return(zero, errors.New("error reading file")).Once()
|
||||
require.Error(t, msr.readValueFromFileAtOffset(ctx, testChannel, nil, 0))
|
||||
|
||||
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
|
||||
Return(zero, nil).Once()
|
||||
require.Equal(t, nil, msr.readValueFromFileAtOffset(ctx, testChannel, nil, 0))
|
||||
require.Equal(t, zero, <-testChannel)
|
||||
}
|
||||
|
||||
func prepareTestData(fsMock *mockFileService, cores []string, msr *msrServiceImpl, t *testing.T) {
|
||||
// Prepare MSR offsets and CPUCoresData for test.
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(cores, nil).Once()
|
||||
require.NoError(t, msr.setCPUCores())
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
}
|
||||
|
||||
func verifyCPUCoresData(cores []string, t *testing.T, msr *msrServiceImpl, expectedValue uint64, verifyDelta bool, delta uint64) {
|
||||
for _, core := range cores {
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].c3)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].c6)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].c7)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter)
|
||||
require.Equal(t, (expectedValue>>16)&0xFF, msr.cpuCoresData[core].throttleTemp)
|
||||
require.Equal(t, (expectedValue>>16)&0x7F, msr.cpuCoresData[core].temp)
|
||||
|
||||
if verifyDelta {
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].c3Delta)
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].c6Delta)
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].c7Delta)
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].mperfDelta)
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].aperfDelta)
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].timeStampCounterDelta)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getMsrServiceWithMockedFs() (*msrServiceImpl, *mockFileService) {
|
||||
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
|
||||
logger := testutil.Logger{Name: "PowerPluginTest"}
|
||||
fsMock := &mockFileService{}
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(cores, nil).Once()
|
||||
msr := newMsrServiceWithFs(logger, fsMock)
|
||||
|
||||
return msr, fsMock
|
||||
}
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
)
|
||||
|
||||
const (
|
||||
intelRaplPath = "/sys/devices/virtual/powercap/intel-rapl"
|
||||
intelRaplSocketPartialPath = "%s/intel-rapl:%s"
|
||||
energyUjPartialPath = "%s/energy_uj"
|
||||
maxEnergyRangeUjPartialPath = "%s/max_energy_range_uj"
|
||||
maxPowerUwPartialPath = "%s/constraint_0_max_power_uw"
|
||||
intelRaplDramPartialPath = "%s/intel-rapl:%s/%s"
|
||||
intelRaplDramNamePartialPath = "%s/name"
|
||||
)
|
||||
|
||||
// raplService is responsible for interactions with RAPL.
|
||||
type raplService interface {
|
||||
initializeRaplData()
|
||||
getRaplData() map[string]*raplData
|
||||
retrieveAndCalculateData(socketID string) error
|
||||
getConstraintMaxPowerWatts(socketID string) (float64, error)
|
||||
}
|
||||
|
||||
type raplServiceImpl struct {
|
||||
log telegraf.Logger
|
||||
data map[string]*raplData
|
||||
dramFolders map[string]string
|
||||
fs fileService
|
||||
}
|
||||
|
||||
// initializeRaplData looks for RAPL folders and initializes data map with fetched information.
|
||||
func (r *raplServiceImpl) initializeRaplData() {
|
||||
r.prepareData()
|
||||
r.findDramFolders()
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) getRaplData() map[string]*raplData {
|
||||
return r.data
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
|
||||
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
|
||||
socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath)
|
||||
socketEnergyUjFile, err := os.Open(socketEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening socket energy_uj file on path %s, err: %v", socketEnergyUjPath, err)
|
||||
}
|
||||
defer socketEnergyUjFile.Close()
|
||||
|
||||
dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID])
|
||||
dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath)
|
||||
dramEnergyUjFile, err := os.Open(dramEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening dram energy_uj file on path %s, err: %v", dramEnergyUjPath, err)
|
||||
}
|
||||
defer dramEnergyUjFile.Close()
|
||||
|
||||
socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath)
|
||||
socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening socket max_energy_range_uj file on path %s, err: %v", socketMaxEnergyUjPath, err)
|
||||
}
|
||||
defer socketMaxEnergyUjFile.Close()
|
||||
|
||||
dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath)
|
||||
dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening dram max_energy_range_uj file on path %s, err: %v", dramMaxEnergyUjPath, err)
|
||||
}
|
||||
defer dramMaxEnergyUjFile.Close()
|
||||
|
||||
return r.calculateData(socketID, socketEnergyUjFile, dramEnergyUjFile, socketMaxEnergyUjFile, dramMaxEnergyUjFile)
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) {
|
||||
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
|
||||
socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath)
|
||||
socketMaxPowerFile, err := os.Open(socketMaxPowerPath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %s, err: %v", socketMaxPowerPath, err)
|
||||
}
|
||||
defer socketMaxPowerFile.Close()
|
||||
|
||||
socketMaxPower, _, err := r.fs.readFileToFloat64(socketMaxPowerFile)
|
||||
return convertMicroWattToWatt(socketMaxPower), err
|
||||
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) prepareData() {
|
||||
intelRaplPrefix := "intel-rapl:"
|
||||
intelRapl := fmt.Sprintf("%s%s", intelRaplPrefix, "[0-9]*")
|
||||
raplPattern := fmt.Sprintf("%s/%s", intelRaplPath, intelRapl)
|
||||
|
||||
raplPaths, err := r.fs.getStringsMatchingPatternOnPath(raplPattern)
|
||||
if err != nil {
|
||||
r.log.Errorf("error while preparing RAPL data: %v", err)
|
||||
r.data = make(map[string]*raplData)
|
||||
return
|
||||
}
|
||||
if len(raplPaths) == 0 {
|
||||
r.log.Debugf("RAPL data wasn't found using pattern: %s", raplPattern)
|
||||
r.data = make(map[string]*raplData)
|
||||
return
|
||||
}
|
||||
|
||||
// If RAPL exists initialize data map (if it wasn't initialized before).
|
||||
if len(r.data) == 0 {
|
||||
for _, raplPath := range raplPaths {
|
||||
socketID := strings.TrimPrefix(filepath.Base(raplPath), intelRaplPrefix)
|
||||
r.data[socketID] = &raplData{
|
||||
socketCurrentEnergy: 0,
|
||||
dramCurrentEnergy: 0,
|
||||
socketEnergy: 0,
|
||||
dramEnergy: 0,
|
||||
readDate: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) findDramFolders() {
|
||||
intelRaplPrefix := "intel-rapl:"
|
||||
intelRaplDram := fmt.Sprintf("%s%s", intelRaplPrefix, "[0-9]*[0-9]*")
|
||||
// Clean existing map
|
||||
r.dramFolders = make(map[string]string)
|
||||
|
||||
for socketID := range r.data {
|
||||
path := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
|
||||
raplFoldersPattern := fmt.Sprintf("%s/%s", path, intelRaplDram)
|
||||
pathsToRaplFolders, err := r.fs.getStringsMatchingPatternOnPath(raplFoldersPattern)
|
||||
if err != nil {
|
||||
r.log.Errorf("error during lookup for rapl dram: %v", err)
|
||||
continue
|
||||
}
|
||||
if len(pathsToRaplFolders) == 0 {
|
||||
r.log.Debugf("RAPL folders weren't found using pattern: %s", raplFoldersPattern)
|
||||
continue
|
||||
}
|
||||
|
||||
raplFolders := make([]string, 0)
|
||||
for _, folderPath := range pathsToRaplFolders {
|
||||
raplFolders = append(raplFolders, filepath.Base(folderPath))
|
||||
}
|
||||
|
||||
r.findDramFolder(raplFolders, socketID)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) {
|
||||
for _, raplFolder := range raplFolders {
|
||||
potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder)
|
||||
nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath)
|
||||
read, err := r.fs.readFile(nameFilePath)
|
||||
if err != nil {
|
||||
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove new line character
|
||||
trimmedString := strings.TrimRight(string(read), "\n")
|
||||
if trimmedString == "dram" {
|
||||
// There should be only one DRAM folder per socket
|
||||
r.dramFolders[socketID] = raplFolder
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.Reader, dramEnergyUjFile io.Reader,
|
||||
socketMaxEnergyUjFile io.Reader, dramMaxEnergyUjFile io.Reader) error {
|
||||
|
||||
newSocketEnergy, _, err := r.readEnergyInJoules(socketEnergyUjFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
newDramEnergy, readDate, err := r.readEnergyInJoules(dramEnergyUjFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
interval := convertNanoSecondsToSeconds(readDate - r.data[socketID].readDate)
|
||||
r.data[socketID].readDate = readDate
|
||||
if interval == 0 {
|
||||
return fmt.Errorf("interval between last two Telegraf cycles is 0")
|
||||
}
|
||||
|
||||
if newSocketEnergy > r.data[socketID].socketEnergy {
|
||||
r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval
|
||||
} else {
|
||||
socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// When socket energy_uj counter reaches maximum value defined in max_energy_range_uj file it
|
||||
// starts counting from 0.
|
||||
r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval
|
||||
}
|
||||
|
||||
if newDramEnergy > r.data[socketID].dramEnergy {
|
||||
r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval
|
||||
} else {
|
||||
dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// When dram energy_uj counter reaches maximum value defined in max_energy_range_uj file it
|
||||
// starts counting from 0.
|
||||
r.data[socketID].dramCurrentEnergy = (dramMaxEnergy - r.data[socketID].dramEnergy + newDramEnergy) / interval
|
||||
}
|
||||
r.data[socketID].socketEnergy = newSocketEnergy
|
||||
r.data[socketID].dramEnergy = newDramEnergy
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *raplServiceImpl) readEnergyInJoules(reader io.Reader) (float64, int64, error) {
|
||||
currentEnergy, readDate, err := r.fs.readFileToFloat64(reader)
|
||||
return convertMicroJoulesToJoules(currentEnergy), readDate, err
|
||||
}
|
||||
|
||||
func newRaplServiceWithFs(logger telegraf.Logger, fs fileService) *raplServiceImpl {
|
||||
return &raplServiceImpl{
|
||||
log: logger,
|
||||
data: make(map[string]*raplData),
|
||||
dramFolders: make(map[string]string),
|
||||
fs: fs,
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import mock "github.com/stretchr/testify/mock"
|
||||
|
||||
// mockRaplService is an autogenerated mock type for the raplService type
|
||||
type mockRaplService struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
||||
// getConstraintMaxPowerWatts provides a mock function with given fields: socketID
|
||||
func (_m *mockRaplService) getConstraintMaxPowerWatts(socketID string) (float64, error) {
|
||||
ret := _m.Called(socketID)
|
||||
|
||||
var r0 float64
|
||||
if rf, ok := ret.Get(0).(func(string) float64); ok {
|
||||
r0 = rf(socketID)
|
||||
} else {
|
||||
r0 = ret.Get(0).(float64)
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(string) error); ok {
|
||||
r1 = rf(socketID)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// getRaplData provides a mock function with given fields:
|
||||
func (_m *mockRaplService) getRaplData() map[string]*raplData {
|
||||
ret := _m.Called()
|
||||
|
||||
var r0 map[string]*raplData
|
||||
if rf, ok := ret.Get(0).(func() map[string]*raplData); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).(map[string]*raplData)
|
||||
}
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// initializeRaplData provides a mock function with given fields:
|
||||
func (_m *mockRaplService) initializeRaplData() {
|
||||
_m.Called()
|
||||
}
|
||||
|
||||
// retrieveAndCalculateData provides a mock function with given fields: socketID
|
||||
func (_m *mockRaplService) retrieveAndCalculateData(socketID string) error {
|
||||
ret := _m.Called(socketID)
|
||||
|
||||
var r0 error
|
||||
if rf, ok := ret.Get(0).(func(string) error); ok {
|
||||
r0 = rf(socketID)
|
||||
} else {
|
||||
r0 = ret.Error(0)
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
func TestPrepareData(t *testing.T) {
|
||||
sockets := []string{"intel-rapl:0", "intel-rapl:1"}
|
||||
rapl, fsMock := getRaplWithMockedFs()
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).Return(sockets, nil).Twice()
|
||||
rapl.prepareData()
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, len(sockets), len(rapl.getRaplData()))
|
||||
|
||||
// Verify no data is wiped in the next calls
|
||||
socketEnergy := 74563813417.0
|
||||
socketID := "0"
|
||||
rapl.data[socketID].socketEnergy = socketEnergy
|
||||
|
||||
rapl.prepareData()
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, len(sockets), len(rapl.getRaplData()))
|
||||
require.Equal(t, socketEnergy, rapl.data[socketID].socketEnergy)
|
||||
|
||||
// Verify data is wiped once there is no RAPL folders
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(nil, errors.New("missing RAPL")).Once()
|
||||
rapl.prepareData()
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, 0, len(rapl.getRaplData()))
|
||||
}
|
||||
|
||||
func TestFindDramFolders(t *testing.T) {
|
||||
sockets := []string{"0", "1"}
|
||||
raplFolders := []string{"intel-rapl:0:1", "intel-rapl:0:2", "intel-rapl:0:3"}
|
||||
rapl, fsMock := getRaplWithMockedFs()
|
||||
|
||||
for _, socketID := range sockets {
|
||||
rapl.data[socketID] = &raplData{}
|
||||
}
|
||||
|
||||
firstPath := fmt.Sprintf(intelRaplDramNamePartialPath,
|
||||
fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, "0", raplFolders[2]))
|
||||
secondPath := fmt.Sprintf(intelRaplDramNamePartialPath,
|
||||
fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, "1", raplFolders[1]))
|
||||
|
||||
fsMock.
|
||||
On("getStringsMatchingPatternOnPath", mock.Anything).Return(raplFolders, nil).Twice().
|
||||
On("readFile", firstPath).Return([]byte("dram"), nil).Once().
|
||||
On("readFile", secondPath).Return([]byte("dram"), nil).Once().
|
||||
On("readFile", mock.Anything).Return([]byte("random"), nil)
|
||||
|
||||
rapl.findDramFolders()
|
||||
|
||||
require.Equal(t, len(sockets), len(rapl.dramFolders))
|
||||
require.Equal(t, raplFolders[2], rapl.dramFolders["0"])
|
||||
require.Equal(t, raplFolders[1], rapl.dramFolders["1"])
|
||||
fsMock.AssertNumberOfCalls(t, "readFile", 5)
|
||||
}
|
||||
|
||||
func TestCalculateDataOverflowCases(t *testing.T) {
|
||||
socketID := "1"
|
||||
rapl, fsMock := getRaplWithMockedFs()
|
||||
|
||||
rapl.data[socketID] = &raplData{}
|
||||
rapl.data[socketID].socketEnergy = convertMicroJoulesToJoules(23424123.1)
|
||||
rapl.data[socketID].dramEnergy = convertMicroJoulesToJoules(345611233.2)
|
||||
rapl.data[socketID].readDate = 54123
|
||||
|
||||
interval := int64(54343)
|
||||
convertedInterval := convertNanoSecondsToSeconds(interval - rapl.data[socketID].readDate)
|
||||
|
||||
newEnergy := 3343443.4
|
||||
maxEnergy := 234324546456.6
|
||||
convertedNewEnergy := convertMicroJoulesToJoules(newEnergy)
|
||||
convertedMaxNewEnergy := convertMicroJoulesToJoules(maxEnergy)
|
||||
|
||||
maxDramEnergy := 981230834098.3
|
||||
newDramEnergy := 4533311.1
|
||||
convertedMaxDramEnergy := convertMicroJoulesToJoules(maxDramEnergy)
|
||||
convertedDramEnergy := convertMicroJoulesToJoules(newDramEnergy)
|
||||
|
||||
expectedCurrentEnergy := (convertedMaxNewEnergy - rapl.data[socketID].socketEnergy + convertedNewEnergy) / convertedInterval
|
||||
expectedDramCurrentEnergy := (convertedMaxDramEnergy - rapl.data[socketID].dramEnergy + convertedDramEnergy) / convertedInterval
|
||||
|
||||
fsMock.
|
||||
On("readFileToFloat64", mock.Anything).Return(newEnergy, int64(12321), nil).Once().
|
||||
On("readFileToFloat64", mock.Anything).Return(newDramEnergy, interval, nil).Once().
|
||||
On("readFileToFloat64", mock.Anything).Return(maxEnergy, int64(64534), nil).Once().
|
||||
On("readFileToFloat64", mock.Anything).Return(maxDramEnergy, int64(98342), nil).Once()
|
||||
|
||||
require.NoError(t, rapl.calculateData(socketID, strings.NewReader(mock.Anything), strings.NewReader(mock.Anything),
|
||||
strings.NewReader(mock.Anything), strings.NewReader(mock.Anything)))
|
||||
|
||||
require.Equal(t, expectedCurrentEnergy, rapl.data[socketID].socketCurrentEnergy)
|
||||
require.Equal(t, expectedDramCurrentEnergy, rapl.data[socketID].dramCurrentEnergy)
|
||||
}
|
||||
|
||||
func getRaplWithMockedFs() (*raplServiceImpl, *mockFileService) {
|
||||
logger := testutil.Logger{Name: "PowerPluginTest"}
|
||||
fsMock := &mockFileService{}
|
||||
rapl := newRaplServiceWithFs(logger, fsMock)
|
||||
|
||||
return rapl, fsMock
|
||||
}
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
// +build linux
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
const (
|
||||
microJouleToJoule = 1.0 / 1000000
|
||||
microWattToWatt = 1.0 / 1000000
|
||||
kiloHertzToMegaHertz = 1.0 / 1000
|
||||
nanoSecondsToSeconds = 1.0 / 1000000000
|
||||
cyclesToHertz = 1.0 / 1000000
|
||||
)
|
||||
|
||||
func convertMicroJoulesToJoules(mJ float64) float64 {
|
||||
return mJ * microJouleToJoule
|
||||
}
|
||||
|
||||
func convertMicroWattToWatt(mW float64) float64 {
|
||||
return mW * microWattToWatt
|
||||
}
|
||||
|
||||
func convertKiloHertzToMegaHertz(kHz float64) float64 {
|
||||
return kHz * kiloHertzToMegaHertz
|
||||
}
|
||||
|
||||
func convertNanoSecondsToSeconds(ns int64) float64 {
|
||||
return float64(ns) * nanoSecondsToSeconds
|
||||
}
|
||||
|
||||
func convertProcessorCyclesToHertz(pc uint64) float64 {
|
||||
return float64(pc) * cyclesToHertz
|
||||
}
|
||||
|
||||
func roundFloatToNearestTwoDecimalPlaces(n float64) float64 {
|
||||
return math.Round(n*100) / 100
|
||||
}
|
||||
|
||||
func convertIntegerArrayToStringArray(array []int64) []string {
|
||||
stringArray := make([]string, 0)
|
||||
for _, value := range array {
|
||||
stringArray = append(stringArray, strconv.FormatInt(value, 10))
|
||||
}
|
||||
|
||||
return stringArray
|
||||
}
|
||||
Loading…
Reference in New Issue