New Intel PowerStat input plugin (#8488)

This commit is contained in:
MaciejMis 2020-12-10 21:23:27 +01:00 committed by GitHub
parent 99287d89e0
commit 9166a16577
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 2384 additions and 0 deletions

View File

@ -214,6 +214,7 @@ For documentation on the latest development code see the [documentation index][d
* [influxdb](./plugins/inputs/influxdb)
* [influxdb_listener](./plugins/inputs/influxdb_listener)
* [influxdb_v2_listener](./plugins/inputs/influxdb_v2_listener)
* [intel_powerstat](plugins/inputs/intel_powerstat)
* [intel_rdt](./plugins/inputs/intel_rdt)
* [internal](./plugins/inputs/internal)
* [interrupts](./plugins/inputs/interrupts)

View File

@ -63,6 +63,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_v2_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/intel_powerstat"
_ "github.com/influxdata/telegraf/plugins/inputs/intel_rdt"
_ "github.com/influxdata/telegraf/plugins/inputs/internal"
_ "github.com/influxdata/telegraf/plugins/inputs/interrupts"

View File

@ -0,0 +1,206 @@
# Intel PowerStat Input Plugin
Telemetry frameworks allow users to monitor critical platform level metrics.
Key source of platform telemetry is power domain that is beneficial for MANO/Monitoring&Analytics systems
to take preventive/corrective actions based on platform busyness, CPU temperature, actual CPU utilization
and power statistics. Main use cases are power saving and workload migration.
Intel PowerStat plugin supports Intel based platforms and assumes presence of Linux based OS.
### Configuration:
```toml
# Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and per-CPU metrics like temperature, power and utilization.
[[inputs.intel_powerstat]]
## All global metrics are always collected by Intel PowerStat plugin.
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level
## telemetry will be exposed by Intel PowerStat plugin.
## Supported options:
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"
# cpu_metrics = []
```
### Example: Configuration with no per-CPU telemetry
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
```toml
[[inputs.intel_powerstat]]
cpu_metrics = []
```
### Example: Configuration with no per-CPU telemetry - equivalent case
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
```toml
[[inputs.intel_powerstat]]
```
### Example: Configuration for CPU Temperature and Frequency only
This configuration allows getting global metrics plus subset of per-CPU metrics (CPU Temperature and Current Frequency):
```toml
[[inputs.intel_powerstat]]
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
```
### Example: Configuration with all available metrics
This configuration allows getting global metrics and all per-CPU metrics:
```toml
[[inputs.intel_powerstat]]
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"]
```
### SW Dependencies:
Plugin is based on Linux Kernel modules that expose specific metrics over `sysfs` or `devfs` interfaces.
The following dependencies are expected by plugin:
- _intel-rapl_ module which exposes Intel Runtime Power Limiting metrics over `sysfs` (`/sys/devices/virtual/powercap/intel-rapl`),
- _msr_ kernel module that provides access to processor model specific registers over `devfs` (`/dev/cpu/cpu%d/msr`),
- _cpufreq_ kernel module - which exposes per-CPU Frequency over `sysfs` (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`).
Minimum kernel version required is 3.13 to satisfy all requirements.
Please make sure that kernel modules are loaded and running. You might have to manually enable them by using `modprobe`.
Exact commands to be executed are:
```
sudo modprobe cpufreq-stats
sudo modprobe msr
sudo modprobe intel_rapl
```
**Telegraf with Intel PowerStat plugin enabled may require root access to read model specific registers (MSRs)**
to retrieve data for calculation of most critical per-CPU specific metrics:
- `cpu_busy_frequency_mhz`
- `cpu_temperature_celsius`
- `cpu_c1_state_residency_percent`
- `cpu_c6_state_residency_percent`
- `cpu_busy_cycles_percent`
To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration).
### HW Dependencies:
Specific metrics require certain processor features to be present, otherwise Intel PowerStat plugin won't be able to
read them. When using Linux Kernel based OS, user can detect supported processor features reading `/proc/cpuinfo` file.
Plugin assumes crucial properties are the same for all CPU cores in the system.
The following processor properties are examined in more detail in this section:
processor _cpu family_, _model_ and _flags_.
The following processor properties are required by the plugin:
- Processor _cpu family_ must be Intel (0x6) - since data used by the plugin assumes Intel specific
model specific registers for all features
- The following processor flags shall be present:
- "_msr_" shall be present for plugin to read platform data from processor model specific registers and collect
the following metrics: _powerstat_core.cpu_temperature_, _powerstat_core.cpu_busy_frequency_,
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_, _powerstat_core._cpu_c6_state_residency_
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat_core.cpu_busy_frequency_,
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_
- "_dts_" shall be present to collect _powerstat_core.cpu_temperature_
- Processor _Model number_ must be one of the following values for plugin to read _powerstat_core.cpu_c1_state_residency_
and _powerstat_core.cpu_c6_state_residency_ metrics:
| Model number | Processor name |
|-----|-------------|
| 0x37 | Intel Atom® Bay Trail |
| 0x4D | Intel Atom® Avaton |
| 0x5C | Intel Atom® Apollo Lake |
| 0x5F | Intel Atom® Denverton |
| 0x7A | Intel Atom® Goldmont |
| 0x4C | Intel Atom® Airmont |
| 0x86 | Intel Atom® Jacobsville |
| 0x96 | Intel Atom® Elkhart Lake |
| 0x9C | Intel Atom® Jasper Lake |
| 0x1A | Intel Nehalem-EP |
| 0x1E | Intel Nehalem |
| 0x1F | Intel Nehalem-G |
| 0x2E | Intel Nehalem-EX |
| 0x25 | Intel Westmere |
| 0x2C | Intel Westmere-EP |
| 0x2F | Intel Westmere-EX |
| 0x2A | Intel Sandybridge |
| 0x2D | Intel Sandybridge-X |
| 0x3A | Intel Ivybridge |
| 0x3E | Intel Ivybridge-X |
| 0x4E | Intel Atom® Silvermont-MID |
| 0x5E | Intel Skylake |
| 0x55 | Intel Skylake-X |
| 0x8E | Intel Kabylake-L |
| 0x9E | Intel Kabylake |
| 0x6A | Intel Icelake-X |
| 0x6C | Intel Icelake-D |
| 0x7D | Intel Icelake |
| 0x7E | Intel Icelake-L |
| 0x9D | Intel Icelake-NNPI |
| 0x3C | Intel Haswell |
| 0x3F | Intel Haswell-X |
| 0x45 | Intel Haswell-L |
| 0x46 | Intel Haswell-G |
| 0x3D | Intel Broadwell |
| 0x47 | Intel Broadwell-G |
| 0x4F | Intel Broadwell-X |
| 0x56 | Intel Broadwell-D |
| 0x66 | Intel Cannonlake-L |
| 0x57 | Intel Xeon® PHI Knights Landing |
| 0x85 | Intel Xeon® PHI Knights Mill |
| 0xA5 | Intel CometLake |
| 0xA6 | Intel CometLake-L |
| 0x8F | Intel Sapphire Rapids X |
| 0x8C | Intel TigerLake-L |
| 0x8D | Intel TigerLake |
### Metrics
All metrics collected by Intel PowerStat plugin are collected in fixed intervals.
Metrics that reports processor C-state residency or power are calculated over elapsed intervals.
When starting to measure metrics, plugin skips first iteration of metrics if they are based on deltas with previous value.
**The following measurements are supported by Intel PowerStat plugin:**
- powerstat_core
- The following Tags are returned by plugin with powerstat_core measurements:
| Tag | Description |
|-----|-------------|
| `package_id` | ID of platform package/socket |
| `core_id` | ID of physical processor core |
| `cpu_id` | ID of logical processor core |
Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key)
while core_id and package_id tags are additional topology information.
- Available metrics for powerstat_core measurement
| Metric name (field) | Description | Units |
|-----|-------------|-----|
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
| `cpu_busy_cycles_percent` | CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
- powerstat_package
- The following Tags are returned by plugin with powerstat_package measurements:
| Tag | Description |
|-----|-------------|
| `package_id` | ID of platform package/socket |
Measurement powerstat_package metrics are collected per processor package - _package_id_ tag indicates which
package metric refers to.
- Available metrics for powerstat_package measurement
| Metric name (field) | Description | Units |
|-----|-------------|-----|
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
### Example Output:
```
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_cycles_percent=0.8 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000
```

View File

@ -0,0 +1,37 @@
package intel_powerstat
type msrData struct {
mperf uint64
aperf uint64
timeStampCounter uint64
c3 uint64
c6 uint64
c7 uint64
throttleTemp uint64
temp uint64
mperfDelta uint64
aperfDelta uint64
timeStampCounterDelta uint64
c3Delta uint64
c6Delta uint64
c7Delta uint64
readDate int64
}
type raplData struct {
dramCurrentEnergy float64
socketCurrentEnergy float64
socketEnergy float64
dramEnergy float64
readDate int64
}
type cpuInfo struct {
physicalID string
coreID string
cpuID string
vendorID string
cpuFamily string
model string
flags string
}

View File

@ -0,0 +1,154 @@
// +build linux
package intel_powerstat
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
)
// fileService is responsible for handling operations on files.
type fileService interface {
getCPUInfoStats() (map[string]*cpuInfo, error)
getStringsMatchingPatternOnPath(path string) ([]string, error)
readFile(path string) ([]byte, error)
readFileToFloat64(reader io.Reader) (float64, int64, error)
readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error)
}
type fileServiceImpl struct {
}
// getCPUInfoStats retrieves basic information about CPU from /proc/cpuinfo.
func (fs *fileServiceImpl) getCPUInfoStats() (map[string]*cpuInfo, error) {
path := "/proc/cpuinfo"
cpuInfoFile, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("error while reading %s, err: %v", path, err)
}
defer cpuInfoFile.Close()
scanner := bufio.NewScanner(cpuInfoFile)
processorRegexp := regexp.MustCompile(`^processor\t+:\s([0-9]+)\n*$`)
physicalIDRegexp := regexp.MustCompile(`^physical id\t+:\s([0-9]+)\n*$`)
coreIDRegexp := regexp.MustCompile(`^core id\t+:\s([0-9]+)\n*$`)
vendorIDRegexp := regexp.MustCompile(`^vendor_id\t+:\s([a-zA-Z]+)\n*$`)
cpuFamilyRegexp := regexp.MustCompile(`^cpu\sfamily\t+:\s([0-9]+)\n*$`)
modelRegexp := regexp.MustCompile(`^model\t+:\s([0-9]+)\n*$`)
flagsRegexp := regexp.MustCompile(`^flags\t+:\s(.+)\n*$`)
stats := make(map[string]*cpuInfo)
currentInfo := &cpuInfo{}
for scanner.Scan() {
line := scanner.Text()
processorRes := processorRegexp.FindStringSubmatch(line)
if len(processorRes) > 1 {
currentInfo = &cpuInfo{
cpuID: processorRes[1],
}
}
vendorIDRes := vendorIDRegexp.FindStringSubmatch(line)
if len(vendorIDRes) > 1 {
currentInfo.vendorID = vendorIDRes[1]
}
physicalIDRes := physicalIDRegexp.FindStringSubmatch(line)
if len(physicalIDRes) > 1 {
currentInfo.physicalID = physicalIDRes[1]
}
coreIDRes := coreIDRegexp.FindStringSubmatch(line)
if len(coreIDRes) > 1 {
currentInfo.coreID = coreIDRes[1]
}
cpuFamilyRes := cpuFamilyRegexp.FindStringSubmatch(line)
if len(cpuFamilyRes) > 1 {
currentInfo.cpuFamily = cpuFamilyRes[1]
}
modelRes := modelRegexp.FindStringSubmatch(line)
if len(modelRes) > 1 {
currentInfo.model = modelRes[1]
}
flagsRes := flagsRegexp.FindStringSubmatch(line)
if len(flagsRes) > 1 {
currentInfo.flags = flagsRes[1]
// Flags is the last value we have to acquire, so currentInfo is added to map.
stats[currentInfo.cpuID] = currentInfo
}
}
return stats, nil
}
// getStringsMatchingPatternOnPath looks for filenames and directory names on path matching given regexp.
// It ignores file system errors such as I/O errors reading directories. The only possible returned error
// is ErrBadPattern, when pattern is malformed.
func (fs *fileServiceImpl) getStringsMatchingPatternOnPath(path string) ([]string, error) {
return filepath.Glob(path)
}
// readFile reads file on path and return string content.
func (fs *fileServiceImpl) readFile(path string) ([]byte, error) {
out, err := ioutil.ReadFile(path)
if err != nil {
return make([]byte, 0), err
}
return out, nil
}
// readFileToFloat64 reads file on path and tries to parse content to float64.
func (fs *fileServiceImpl) readFileToFloat64(reader io.Reader) (float64, int64, error) {
read, err := ioutil.ReadAll(reader)
if err != nil {
return 0, 0, err
}
readDate := time.Now().UnixNano()
// Remove new line character
trimmedString := strings.TrimRight(string(read), "\n")
// Parse result to float64
parsedValue, err := strconv.ParseFloat(trimmedString, 64)
if err != nil {
return 0, 0, fmt.Errorf("error parsing string to float for %s", trimmedString)
}
return parsedValue, readDate, nil
}
// readFileAtOffsetToUint64 reads 8 bytes from passed file at given offset.
func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) {
buffer := make([]byte, 8)
if offset == 0 {
return 0, fmt.Errorf("file offset %d should not be 0", offset)
}
_, err := reader.ReadAt(buffer, offset)
if err != nil {
return 0, fmt.Errorf("error on reading file at offset %d, err: %v", offset, err)
}
return binary.LittleEndian.Uint64(buffer), nil
}
func newFileService() *fileServiceImpl {
return &fileServiceImpl{}
}

View File

@ -0,0 +1,132 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package intel_powerstat
import (
io "io"
mock "github.com/stretchr/testify/mock"
)
// mockFileService is an autogenerated mock type for the fileService type
type mockFileService struct {
mock.Mock
}
// getCPUInfoStats provides a mock function with given fields:
func (_m *mockFileService) getCPUInfoStats() (map[string]*cpuInfo, error) {
ret := _m.Called()
var r0 map[string]*cpuInfo
if rf, ok := ret.Get(0).(func() map[string]*cpuInfo); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(map[string]*cpuInfo)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// getStringsMatchingPatternOnPath provides a mock function with given fields: path
func (_m *mockFileService) getStringsMatchingPatternOnPath(path string) ([]string, error) {
ret := _m.Called(path)
var r0 []string
if rf, ok := ret.Get(0).(func(string) []string); ok {
r0 = rf(path)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]string)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(path)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// readFile provides a mock function with given fields: path
func (_m *mockFileService) readFile(path string) ([]byte, error) {
ret := _m.Called(path)
var r0 []byte
if rf, ok := ret.Get(0).(func(string) []byte); ok {
r0 = rf(path)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]byte)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(path)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// readFileAtOffsetToUint64 provides a mock function with given fields: reader, offset
func (_m *mockFileService) readFileAtOffsetToUint64(reader io.ReaderAt, offset int64) (uint64, error) {
ret := _m.Called(reader, offset)
var r0 uint64
if rf, ok := ret.Get(0).(func(io.ReaderAt, int64) uint64); ok {
r0 = rf(reader, offset)
} else {
r0 = ret.Get(0).(uint64)
}
var r1 error
if rf, ok := ret.Get(1).(func(io.ReaderAt, int64) error); ok {
r1 = rf(reader, offset)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// readFileToFloat64 provides a mock function with given fields: reader
func (_m *mockFileService) readFileToFloat64(reader io.Reader) (float64, int64, error) {
ret := _m.Called(reader)
var r0 float64
if rf, ok := ret.Get(0).(func(io.Reader) float64); ok {
r0 = rf(reader)
} else {
r0 = ret.Get(0).(float64)
}
var r1 int64
if rf, ok := ret.Get(1).(func(io.Reader) int64); ok {
r1 = rf(reader)
} else {
r1 = ret.Get(1).(int64)
}
var r2 error
if rf, ok := ret.Get(2).(func(io.Reader) error); ok {
r2 = rf(reader)
} else {
r2 = ret.Error(2)
}
return r0, r1, r2
}

View File

@ -0,0 +1,486 @@
// +build linux
package intel_powerstat
import (
"fmt"
"math/big"
"strings"
"sync"
"time"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)
const (
cpuFrequency = "cpu_frequency"
cpuBusyFrequency = "cpu_busy_frequency"
cpuTemperature = "cpu_temperature"
cpuC1StateResidency = "cpu_c1_state_residency"
cpuC6StateResidency = "cpu_c6_state_residency"
cpuBusyCycles = "cpu_busy_cycles"
percentageMultiplier = 100
)
// PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.
type PowerStat struct {
CPUMetrics []string `toml:"cpu_metrics"`
Log telegraf.Logger `toml:"-"`
fs fileService
rapl raplService
msr msrService
cpuFrequency bool
cpuBusyFrequency bool
cpuTemperature bool
cpuC1StateResidency bool
cpuC6StateResidency bool
cpuBusyCycles bool
cpuInfo map[string]*cpuInfo
skipFirstIteration bool
}
// Description returns a one-sentence description on the plugin.
func (p *PowerStat) Description() string {
return `Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.`
}
// SampleConfig returns the default configuration of the plugin.
func (p *PowerStat) SampleConfig() string {
return `
## All global metrics are always collected by Intel PowerStat plugin.
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level
## telemetry will be exposed by Intel PowerStat plugin.
## Supported options:
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"
# cpu_metrics = []
`
}
// Init performs one time setup of the plugin.
func (p *PowerStat) Init() error {
p.parseCPUMetricsConfig()
err := p.verifyProcessor()
if err != nil {
return err
}
// Initialize MSR service only when there is at least one core metric enabled.
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles {
p.msr = newMsrServiceWithFs(p.Log, p.fs)
}
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
return nil
}
// Gather takes in an accumulator and adds the metrics that the Input gathers.
func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
p.addGlobalMetrics(acc)
if p.areCoreMetricsEnabled() {
p.addPerCoreMetrics(acc)
}
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations.
p.skipFirstIteration = false
return nil
}
func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
// Prepare RAPL data each gather because there is a possibility to disable rapl kernel module
p.rapl.initializeRaplData()
for socketID := range p.rapl.getRaplData() {
err := p.rapl.retrieveAndCalculateData(socketID)
if err != nil {
// In case of an error skip calculating metrics for this socket
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
continue
}
p.addThermalDesignPowerMetric(socketID, acc)
if p.skipFirstIteration {
continue
}
p.addCurrentSocketPowerConsumption(socketID, acc)
p.addCurrentDramPowerConsumption(socketID, acc)
}
}
func (p *PowerStat) addThermalDesignPowerMetric(socketID string, acc telegraf.Accumulator) {
maxPower, err := p.rapl.getConstraintMaxPowerWatts(socketID)
if err != nil {
p.Log.Errorf("error while retrieving TDP of the socket %s, err: %v", socketID, err)
return
}
tags := map[string]string{
"package_id": socketID,
}
fields := map[string]interface{}{
"thermal_design_power_watts": roundFloatToNearestTwoDecimalPlaces(maxPower),
}
acc.AddGauge("powerstat_package", fields, tags)
}
func (p *PowerStat) addCurrentSocketPowerConsumption(socketID string, acc telegraf.Accumulator) {
tags := map[string]string{
"package_id": socketID,
}
fields := map[string]interface{}{
"current_power_consumption_watts": roundFloatToNearestTwoDecimalPlaces(p.rapl.getRaplData()[socketID].socketCurrentEnergy),
}
acc.AddGauge("powerstat_package", fields, tags)
}
func (p *PowerStat) addCurrentDramPowerConsumption(socketID string, acc telegraf.Accumulator) {
tags := map[string]string{
"package_id": socketID,
}
fields := map[string]interface{}{
"current_dram_power_consumption_watts": roundFloatToNearestTwoDecimalPlaces(p.rapl.getRaplData()[socketID].dramCurrentEnergy),
}
acc.AddGauge("powerstat_package", fields, tags)
}
func (p *PowerStat) addPerCoreMetrics(acc telegraf.Accumulator) {
var wg sync.WaitGroup
wg.Add(len(p.msr.getCPUCoresData()))
for cpuID := range p.msr.getCPUCoresData() {
go p.addMetricsForSingleCore(cpuID, acc, &wg)
}
wg.Wait()
}
func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulator, wg *sync.WaitGroup) {
defer wg.Done()
if p.cpuFrequency {
p.addCPUFrequencyMetric(cpuID, acc)
}
// Read data from MSR only if required
if p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature ||
p.cpuBusyFrequency {
err := p.msr.openAndReadMsr(cpuID)
if err != nil {
// In case of an error exit the function. All metrics past this point are dependant on MSR.
p.Log.Debugf("error while reading msr: %v", err)
return
}
}
if p.cpuTemperature {
p.addCPUTemperatureMetric(cpuID, acc)
}
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle.
if p.cpuBusyFrequency {
p.addCPUBusyFrequencyMetric(cpuID, acc)
}
if !p.skipFirstIteration {
if p.cpuC1StateResidency {
p.addCPUC1StateResidencyMetric(cpuID, acc)
}
if p.cpuC6StateResidency {
p.addCPUC6StateResidencyMetric(cpuID, acc)
}
if p.cpuBusyCycles {
p.addCPUBusyCyclesMetric(cpuID, acc)
}
}
}
func (p *PowerStat) addCPUFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
frequency, err := p.msr.retrieveCPUFrequencyForCore(cpuID)
// In case of an error leave func
if err != nil {
p.Log.Debugf("error while reading file: %v", err)
return
}
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
"package_id": cpu.physicalID,
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_frequency_mhz": roundFloatToNearestTwoDecimalPlaces(frequency),
}
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) addCPUTemperatureMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
temp := coresData[cpuID].throttleTemp - coresData[cpuID].temp
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
"package_id": cpu.physicalID,
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_temperature_celsius": temp,
}
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
mperfDelta := coresData[cpuID].mperfDelta
// Avoid division by 0
if mperfDelta == 0 {
p.Log.Errorf("mperf delta should not equal 0 on core %s", cpuID)
return
}
aperfMperf := float64(coresData[cpuID].aperfDelta) / float64(mperfDelta)
tsc := convertProcessorCyclesToHertz(coresData[cpuID].timeStampCounterDelta)
timeNow := time.Now().UnixNano()
interval := convertNanoSecondsToSeconds(timeNow - coresData[cpuID].readDate)
coresData[cpuID].readDate = timeNow
if p.skipFirstIteration {
return
}
if interval == 0 {
p.Log.Errorf("interval between last two Telegraf cycles is 0")
return
}
busyMhzValue := roundFloatToNearestTwoDecimalPlaces(tsc * aperfMperf / interval)
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
"package_id": cpu.physicalID,
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_busy_frequency_mhz": busyMhzValue,
}
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) addCPUC1StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
timestampDeltaBig := new(big.Int).SetUint64(coresData[cpuID].timeStampCounterDelta)
// Avoid division by 0
if timestampDeltaBig.Sign() < 1 {
p.Log.Errorf("timestamp delta value %v should not be lower than 1", timestampDeltaBig)
return
}
// Since counter collection is not atomic it may happen that sum of C0, C1, C3, C6 and C7
// is bigger value than TSC, in such case C1 residency shall be set to 0.
// Operating on big.Int to avoid overflow
mperfDeltaBig := new(big.Int).SetUint64(coresData[cpuID].mperfDelta)
c3DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c3Delta)
c6DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c6Delta)
c7DeltaBig := new(big.Int).SetUint64(coresData[cpuID].c7Delta)
c1Big := new(big.Int).Sub(timestampDeltaBig, mperfDeltaBig)
c1Big.Sub(c1Big, c3DeltaBig)
c1Big.Sub(c1Big, c6DeltaBig)
c1Big.Sub(c1Big, c7DeltaBig)
if c1Big.Sign() < 0 {
c1Big = c1Big.SetInt64(0)
}
c1Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1Big.Uint64()) / float64(timestampDeltaBig.Uint64()))
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
"package_id": cpu.physicalID,
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_c1_state_residency_percent": c1Value,
}
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) addCPUC6StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
// Avoid division by 0
if coresData[cpuID].timeStampCounterDelta == 0 {
p.Log.Errorf("timestamp counter on offset %s should not equal 0 on cpuID %s",
timestampCounterLocation, cpuID)
return
}
c6Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(coresData[cpuID].c6Delta) / float64(coresData[cpuID].timeStampCounterDelta))
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
"package_id": cpu.physicalID,
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_c6_state_residency_percent": c6Value,
}
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
// Avoid division by 0
if coresData[cpuID].timeStampCounterDelta == 0 {
p.Log.Errorf("timestamp counter on offset %s should not equal 0 on cpuID %s",
timestampCounterLocation, cpuID)
return
}
busyCyclesValue := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta))
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
"package_id": cpu.physicalID,
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_busy_cycles_percent": busyCyclesValue,
}
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) parseCPUMetricsConfig() {
if len(p.CPUMetrics) == 0 {
return
}
if contains(p.CPUMetrics, cpuFrequency) {
p.cpuFrequency = true
}
if contains(p.CPUMetrics, cpuC1StateResidency) {
p.cpuC1StateResidency = true
}
if contains(p.CPUMetrics, cpuC6StateResidency) {
p.cpuC6StateResidency = true
}
if contains(p.CPUMetrics, cpuBusyCycles) {
p.cpuBusyCycles = true
}
if contains(p.CPUMetrics, cpuBusyFrequency) {
p.cpuBusyFrequency = true
}
if contains(p.CPUMetrics, cpuTemperature) {
p.cpuTemperature = true
}
}
func (p *PowerStat) verifyProcessor() error {
allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C,
0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E,
0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56,
0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8F, 0x8C, 0x8D}
stats, err := p.fs.getCPUInfoStats()
if err != nil {
return err
}
p.cpuInfo = stats
// First CPU is sufficient for verification.
firstCPU := p.cpuInfo["0"]
if firstCPU == nil {
return fmt.Errorf("first core not found while parsing /proc/cpuinfo")
}
if firstCPU.vendorID != "GenuineIntel" || firstCPU.cpuFamily != "6" {
return fmt.Errorf("Intel processor not found, vendorId: %s", firstCPU.vendorID)
}
if !contains(convertIntegerArrayToStringArray(allowedProcessorModelsForC1C6), firstCPU.model) {
p.cpuC1StateResidency = false
p.cpuC6StateResidency = false
}
if !strings.Contains(firstCPU.flags, "msr") {
p.cpuTemperature = false
p.cpuC6StateResidency = false
p.cpuBusyCycles = false
p.cpuBusyFrequency = false
p.cpuC1StateResidency = false
}
if !strings.Contains(firstCPU.flags, "aperfmperf") {
p.cpuBusyFrequency = false
p.cpuBusyCycles = false
p.cpuC1StateResidency = false
}
if !strings.Contains(firstCPU.flags, "dts") {
p.cpuTemperature = false
}
return nil
}
func contains(slice []string, str string) bool {
for _, v := range slice {
if v == str {
return true
}
}
return false
}
func (p *PowerStat) areCoreMetricsEnabled() bool {
return p.msr != nil && len(p.msr.getCPUCoresData()) > 0
}
// newPowerStat creates and returns PowerStat struct.
func newPowerStat(fs fileService) *PowerStat {
p := &PowerStat{
cpuFrequency: false,
cpuC1StateResidency: false,
cpuC6StateResidency: false,
cpuBusyCycles: false,
cpuTemperature: false,
cpuBusyFrequency: false,
skipFirstIteration: true,
fs: fs,
}
return p
}
func init() {
inputs.Add("intel_powerstat", func() telegraf.Input {
return newPowerStat(newFileService())
})
}

View File

@ -0,0 +1,3 @@
// +build !linux
package intel_powerstat

View File

@ -0,0 +1,494 @@
// +build linux
package intel_powerstat
import (
"errors"
"strconv"
"sync"
"testing"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/testutil"
)
func TestInitPlugin(t *testing.T) {
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
power, fsMock, _, _ := getPowerWithMockedServices()
fsMock.On("getCPUInfoStats", mock.Anything).
Return(nil, errors.New("error getting cpu stats")).Once()
require.Error(t, power.Init())
fsMock.On("getCPUInfoStats", mock.Anything).
Return(make(map[string]*cpuInfo), nil).Once()
require.Error(t, power.Init())
fsMock.On("getCPUInfoStats", mock.Anything).
Return(map[string]*cpuInfo{"0": {
vendorID: "GenuineIntel",
cpuFamily: "test",
}}, nil).Once()
require.Error(t, power.Init())
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(cores, nil).Once().
On("getCPUInfoStats", mock.Anything).
Return(map[string]*cpuInfo{"0": {
vendorID: "GenuineIntel",
cpuFamily: "6",
}}, nil)
// Verify MSR service initialization.
power.cpuFrequency = true
require.NoError(t, power.Init())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, len(cores), len(power.msr.getCPUCoresData()))
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once()
// In case of an error when fetching cpu cores plugin should proceed with execution.
require.NoError(t, power.Init())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, 0, len(power.msr.getCPUCoresData()))
}
func TestParseCPUMetricsConfig(t *testing.T) {
power, _, _, _ := getPowerWithMockedServices()
disableCoreMetrics(power)
power.CPUMetrics = []string{
"cpu_frequency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
"cpu_busy_frequency",
}
power.parseCPUMetricsConfig()
verifyCoreMetrics(t, power, true)
disableCoreMetrics(power)
verifyCoreMetrics(t, power, false)
power.CPUMetrics = []string{}
power.parseCPUMetricsConfig()
power.CPUMetrics = []string{"cpu_c6_state_residency", "#@$sdkjdfsdf3@", "1pu_c1_state_residency"}
power.parseCPUMetricsConfig()
require.Equal(t, false, power.cpuC1StateResidency)
require.Equal(t, true, power.cpuC6StateResidency)
disableCoreMetrics(power)
verifyCoreMetrics(t, power, false)
power.CPUMetrics = []string{"#@$sdkjdfsdf3@", "1pu_c1_state_residency", "123"}
power.parseCPUMetricsConfig()
verifyCoreMetrics(t, power, false)
}
func verifyCoreMetrics(t *testing.T, power *PowerStat, enabled bool) {
require.Equal(t, enabled, power.cpuFrequency)
require.Equal(t, enabled, power.cpuC1StateResidency)
require.Equal(t, enabled, power.cpuC6StateResidency)
require.Equal(t, enabled, power.cpuBusyCycles)
require.Equal(t, enabled, power.cpuBusyFrequency)
require.Equal(t, enabled, power.cpuTemperature)
}
func TestGather(t *testing.T) {
var acc testutil.Accumulator
packageIDs := []string{"0", "1"}
coreIDs := []string{"0", "1", "2", "3"}
socketCurrentEnergy := 13213852.2
dramCurrentEnergy := 784552.0
preparedCPUData := getPreparedCPUData(coreIDs)
raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy)
power, _, raplMock, msrMock := getPowerWithMockedServices()
prepareCPUInfo(power, coreIDs, packageIDs)
enableCoreMetrics(power)
power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).
On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil)
msrMock.On("getCPUCoresData").Return(preparedCPUData).
On("openAndReadMsr", mock.Anything).Return(nil).
On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil)
require.NoError(t, power.Gather(&acc))
// Number of global metrics : 3
// Number of per core metrics : 6
require.Equal(t, 3*len(packageIDs)+6*len(coreIDs), len(acc.GetTelegrafMetrics()))
}
func TestAddGlobalMetricsNegative(t *testing.T) {
var acc testutil.Accumulator
socketCurrentEnergy := 13213852.2
dramCurrentEnergy := 784552.0
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
power, _, raplMock, _ := getPowerWithMockedServices()
power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(raplDataMap).Once().
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap))
power.addGlobalMetrics(&acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
raplMock.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
raplMock.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(make(map[string]*raplData)).Once()
power.addGlobalMetrics(&acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
raplMock.AssertNotCalled(t, "retrieveAndCalculateData")
raplMock.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Once().
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once().
On("getConstraintMaxPowerWatts", mock.Anything).Return(12313851.5, nil).Twice()
power.addGlobalMetrics(&acc)
require.Equal(t, 3, len(acc.GetTelegrafMetrics()))
}
func TestAddGlobalMetricsPositive(t *testing.T) {
var acc testutil.Accumulator
socketCurrentEnergy := 3644574.4
dramCurrentEnergy := 124234872.5
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
maxPower := 546783852.9
power, _, raplMock, _ := getPowerWithMockedServices()
power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).
On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice().
On("getCurrentDramPowerConsumption", mock.Anything).Return(dramCurrentEnergy)
power.addGlobalMetrics(&acc)
require.Equal(t, 6, len(acc.GetTelegrafMetrics()))
expectedResults := getGlobalMetrics(maxPower, socketCurrentEnergy, dramCurrentEnergy)
for _, test := range expectedResults {
acc.AssertContainsTaggedFields(t, "powerstat_package", test.fields, test.tags)
}
}
func TestAddMetricsForSingleCoreNegative(t *testing.T) {
var wg sync.WaitGroup
var acc testutil.Accumulator
core := "0"
power, _, _, msrMock := getPowerWithMockedServices()
msrMock.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
// Skip generating metric for CPU frequency.
power.cpuFrequency = false
wg.Add(1)
power.addMetricsForSingleCore(core, &acc, &wg)
wg.Wait()
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
func TestAddCPUFrequencyMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
frequency := 1200000.2
power, _, _, msrMock := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).
Return(float64(0), errors.New("error on reading file")).Once()
power.addCPUFrequencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
power.addCPUFrequencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
expectedFrequency := roundFloatToNearestTwoDecimalPlaces(frequency)
expectedMetric := getPowerCoreMetric("cpu_frequency_mhz", expectedFrequency, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
}
func TestAddCoreCPUTemperatureMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
preparedData := getPreparedCPUData([]string{cpuID})
expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
msrMock.On("getCPUCoresData").Return(preparedData).Once()
power.addCPUTemperatureMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerCoreMetric("cpu_temperature_celsius", expectedTemp, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
}
func TestAddC6StateResidencyMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUC6StateResidencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerCoreMetric("cpu_c6_state_residency_percent", expectedC6, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
acc.ClearMetrics()
preparedData[cpuID].timeStampCounterDelta = 0
power.addCPUC6StateResidencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
func TestAddProcessorBusyCyclesMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) /
float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUBusyCyclesMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
acc.ClearMetrics()
preparedData[cpuID].timeStampCounterDelta = 0
power.addCPUBusyCyclesMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
func TestAddProcessorBusyFrequencyMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
power.skipFirstIteration = false
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUBusyFrequencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
acc.ClearMetrics()
preparedData[cpuID].mperfDelta = 0
power.addCPUBusyFrequencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
func TestAddC1StateResidencyMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta -
preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta
expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUC1StateResidencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerCoreMetric("cpu_c1_state_residency_percent", expectedC1, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
acc.ClearMetrics()
preparedData[cpuID].timeStampCounterDelta = 0
power.addCPUC1StateResidencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
func TestAddThermalDesignPowerMetric(t *testing.T) {
var acc testutil.Accumulator
sockets := []string{"0"}
maxPower := 195720672.1
power, _, raplMock, _ := getPowerWithMockedServices()
raplMock.On("getConstraintMaxPowerWatts", mock.Anything).
Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once().
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once()
power.addThermalDesignPowerMetric(sockets[0], &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
power.addThermalDesignPowerMetric(sockets[0], &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
expectedTDP := roundFloatToNearestTwoDecimalPlaces(maxPower)
expectedMetric := getPowerGlobalMetric("thermal_design_power_watts", expectedTDP, sockets[0])
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
}
func getPreparedCPUData(cores []string) map[string]*msrData {
msrDataMap := make(map[string]*msrData)
for _, core := range cores {
msrDataMap[core] = &msrData{
mperf: 43079,
aperf: 82001,
timeStampCounter: 15514,
c3: 52829,
c6: 86930,
c7: 25340,
throttleTemp: 88150,
temp: 40827,
mperfDelta: 23515,
aperfDelta: 33866,
timeStampCounterDelta: 13686000,
c3Delta: 20003,
c6Delta: 44518,
c7Delta: 20979,
}
}
return msrDataMap
}
func getGlobalMetrics(maxPower float64, socketCurrentEnergy float64, dramCurrentEnergy float64) []struct {
fields map[string]interface{}
tags map[string]string
} {
return []struct {
fields map[string]interface{}
tags map[string]string
}{
getPowerGlobalMetric("thermal_design_power_watts", roundFloatToNearestTwoDecimalPlaces(maxPower), "0"),
getPowerGlobalMetric("thermal_design_power_watts", roundFloatToNearestTwoDecimalPlaces(maxPower), "1"),
getPowerGlobalMetric("current_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(socketCurrentEnergy), "0"),
getPowerGlobalMetric("current_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(socketCurrentEnergy), "1"),
getPowerGlobalMetric("current_dram_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(dramCurrentEnergy), "0"),
getPowerGlobalMetric("current_dram_power_consumption_watts", roundFloatToNearestTwoDecimalPlaces(dramCurrentEnergy), "1"),
}
}
func getPowerCoreMetric(name string, value interface{}, coreID string, packageID string, cpuID string) struct {
fields map[string]interface{}
tags map[string]string
} {
return getPowerMetric(name, value, map[string]string{"package_id": packageID, "core_id": coreID, "cpu_id": cpuID})
}
func getPowerGlobalMetric(name string, value interface{}, socketID string) struct {
fields map[string]interface{}
tags map[string]string
} {
return getPowerMetric(name, value, map[string]string{"package_id": socketID})
}
func getPowerMetric(name string, value interface{}, tags map[string]string) struct {
fields map[string]interface{}
tags map[string]string
} {
return struct {
fields map[string]interface{}
tags map[string]string
}{
map[string]interface{}{
name: value,
},
tags,
}
}
func prepareCPUInfoForSingleCPU(power *PowerStat, cpuID string, coreID string, packageID string) {
power.cpuInfo = make(map[string]*cpuInfo)
power.cpuInfo[cpuID] = &cpuInfo{
physicalID: packageID,
coreID: coreID,
cpuID: cpuID,
}
}
func prepareCPUInfo(power *PowerStat, coreIDs []string, packageIDs []string) {
power.cpuInfo = make(map[string]*cpuInfo)
currentCPU := 0
for _, packageID := range packageIDs {
for _, coreID := range coreIDs {
cpuID := strconv.Itoa(currentCPU)
power.cpuInfo[cpuID] = &cpuInfo{
physicalID: packageID,
cpuID: cpuID,
coreID: coreID,
}
currentCPU++
}
}
}
func enableCoreMetrics(power *PowerStat) {
power.cpuC1StateResidency = true
power.cpuC6StateResidency = true
power.cpuTemperature = true
power.cpuBusyFrequency = true
power.cpuFrequency = true
power.cpuBusyCycles = true
}
func disableCoreMetrics(power *PowerStat) {
power.cpuC1StateResidency = false
power.cpuC6StateResidency = false
power.cpuTemperature = false
power.cpuBusyFrequency = false
power.cpuFrequency = false
power.cpuBusyCycles = false
}
func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData {
raplDataMap := make(map[string]*raplData, len(socketIDs))
for _, socketID := range socketIDs {
raplDataMap[socketID] = &raplData{
socketCurrentEnergy: socketCurrentEnergy,
dramCurrentEnergy: dramCurrentEnergy,
}
}
return raplDataMap
}
func getPowerWithMockedServices() (*PowerStat, *mockFileService, *mockRaplService, *mockMsrService) {
fsMock := &mockFileService{}
msrMock := &mockMsrService{}
raplMock := &mockRaplService{}
logger := testutil.Logger{Name: "PowerPluginTest"}
p := newPowerStat(fsMock)
p.Log = logger
p.fs = fsMock
p.rapl = raplMock
p.msr = msrMock
return p, fsMock, raplMock, msrMock
}

View File

@ -0,0 +1,207 @@
// +build linux
package intel_powerstat
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"golang.org/x/sync/errgroup"
"github.com/influxdata/telegraf"
)
const (
systemCPUPath = "/sys/devices/system/cpu/"
cpuCurrentFreqPartialPath = "/sys/devices/system/cpu/cpu%s/cpufreq/scaling_cur_freq"
msrPartialPath = "/dev/cpu/%s/msr"
c3StateResidencyLocation = 0x3FC
c6StateResidencyLocation = 0x3FD
c7StateResidencyLocation = 0x3FE
maximumFrequencyClockCountLocation = 0xE7
actualFrequencyClockCountLocation = 0xE8
throttleTemperatureLocation = 0x1A2
temperatureLocation = 0x19C
timestampCounterLocation = 0x10
)
// msrService is responsible for interactions with MSR.
type msrService interface {
getCPUCoresData() map[string]*msrData
retrieveCPUFrequencyForCore(core string) (float64, error)
openAndReadMsr(core string) error
}
type msrServiceImpl struct {
cpuCoresData map[string]*msrData
msrOffsets []int64
fs fileService
log telegraf.Logger
}
func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
return m.cpuCoresData
}
func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) {
cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core)
cpuFreqFile, err := os.Open(cpuFreqPath)
if err != nil {
return 0, fmt.Errorf("error opening scaling_cur_freq file on path %s, err: %v", cpuFreqPath, err)
}
defer cpuFreqFile.Close()
cpuFreq, _, err := m.fs.readFileToFloat64(cpuFreqFile)
return convertKiloHertzToMegaHertz(cpuFreq), err
}
func (m *msrServiceImpl) openAndReadMsr(core string) error {
path := fmt.Sprintf(msrPartialPath, core)
msrFile, err := os.Open(path)
if err != nil {
return fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
}
defer msrFile.Close()
err = m.readDataFromMsr(core, msrFile)
if err != nil {
return fmt.Errorf("error reading data from MSR for core %s, err: %v", core, err)
}
return nil
}
func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error {
g, ctx := errgroup.WithContext(context.Background())
// Create and populate a map that contains msr offsets along with their respective channels
msrOffsetsWithChannels := make(map[int64]chan uint64)
for _, offset := range m.msrOffsets {
msrOffsetsWithChannels[offset] = make(chan uint64)
}
// Start a goroutine for each msr offset
for offset, channel := range msrOffsetsWithChannels {
// Wrap around function to avoid race on loop counter
func(off int64, ch chan uint64) {
g.Go(func() error {
defer close(ch)
err := m.readValueFromFileAtOffset(ctx, ch, reader, off)
if err != nil {
return fmt.Errorf("error reading MSR file, err: %v", err)
}
return nil
})
}(offset, channel)
}
newC3 := <-msrOffsetsWithChannels[c3StateResidencyLocation]
newC6 := <-msrOffsetsWithChannels[c6StateResidencyLocation]
newC7 := <-msrOffsetsWithChannels[c7StateResidencyLocation]
newMperf := <-msrOffsetsWithChannels[maximumFrequencyClockCountLocation]
newAperf := <-msrOffsetsWithChannels[actualFrequencyClockCountLocation]
newTsc := <-msrOffsetsWithChannels[timestampCounterLocation]
newThrottleTemp := <-msrOffsetsWithChannels[throttleTemperatureLocation]
newTemp := <-msrOffsetsWithChannels[temperatureLocation]
if err := g.Wait(); err != nil {
return fmt.Errorf("received error during reading MSR values in goroutines: %v", err)
}
m.cpuCoresData[core].c3Delta = newC3 - m.cpuCoresData[core].c3
m.cpuCoresData[core].c6Delta = newC6 - m.cpuCoresData[core].c6
m.cpuCoresData[core].c7Delta = newC7 - m.cpuCoresData[core].c7
m.cpuCoresData[core].mperfDelta = newMperf - m.cpuCoresData[core].mperf
m.cpuCoresData[core].aperfDelta = newAperf - m.cpuCoresData[core].aperf
m.cpuCoresData[core].timeStampCounterDelta = newTsc - m.cpuCoresData[core].timeStampCounter
m.cpuCoresData[core].c3 = newC3
m.cpuCoresData[core].c6 = newC6
m.cpuCoresData[core].c7 = newC7
m.cpuCoresData[core].mperf = newMperf
m.cpuCoresData[core].aperf = newAperf
m.cpuCoresData[core].timeStampCounter = newTsc
// MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16.
m.cpuCoresData[core].throttleTemp = (newThrottleTemp >> 16) & 0xFF
// MSR (19Ch) IA32_THERM_STATUS bits 22:16.
m.cpuCoresData[core].temp = (newTemp >> 16) & 0x7F
return nil
}
func (m *msrServiceImpl) readValueFromFileAtOffset(ctx context.Context, ch chan uint64, reader io.ReaderAt, offset int64) error {
value, err := m.fs.readFileAtOffsetToUint64(reader, offset)
if err != nil {
return err
}
// Detect context cancellation and return an error if other goroutine fails
select {
case <-ctx.Done():
return ctx.Err()
case ch <- value:
}
return nil
}
// setCPUCores initialize cpuCoresData map.
func (m *msrServiceImpl) setCPUCores() error {
m.cpuCoresData = make(map[string]*msrData)
cpuPrefix := "cpu"
cpuCore := fmt.Sprintf("%s%s", cpuPrefix, "[0-9]*")
cpuCorePattern := fmt.Sprintf("%s/%s", systemCPUPath, cpuCore)
cpuPaths, err := m.fs.getStringsMatchingPatternOnPath(cpuCorePattern)
if err != nil {
return err
}
if len(cpuPaths) == 0 {
m.log.Debugf("CPU core data wasn't found using pattern: %s", cpuCorePattern)
return nil
}
for _, cpuPath := range cpuPaths {
core := strings.TrimPrefix(filepath.Base(cpuPath), cpuPrefix)
m.cpuCoresData[core] = &msrData{
mperf: 0,
aperf: 0,
timeStampCounter: 0,
c3: 0,
c6: 0,
c7: 0,
throttleTemp: 0,
temp: 0,
mperfDelta: 0,
aperfDelta: 0,
timeStampCounterDelta: 0,
c3Delta: 0,
c6Delta: 0,
c7Delta: 0,
}
}
return nil
}
func newMsrServiceWithFs(logger telegraf.Logger, fs fileService) *msrServiceImpl {
msrService := &msrServiceImpl{
fs: fs,
log: logger,
}
err := msrService.setCPUCores()
if err != nil {
// This error does not prevent plugin from working thus it is not returned.
msrService.log.Error(err)
}
msrService.msrOffsets = []int64{c3StateResidencyLocation, c6StateResidencyLocation, c7StateResidencyLocation,
maximumFrequencyClockCountLocation, actualFrequencyClockCountLocation, timestampCounterLocation,
throttleTemperatureLocation, temperatureLocation}
return msrService
}

View File

@ -0,0 +1,61 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package intel_powerstat
import mock "github.com/stretchr/testify/mock"
// mockMsrService is an autogenerated mock type for the msrService type
type mockMsrService struct {
mock.Mock
}
// getCPUCoresData provides a mock function with given fields:
func (_m *mockMsrService) getCPUCoresData() map[string]*msrData {
ret := _m.Called()
var r0 map[string]*msrData
if rf, ok := ret.Get(0).(func() map[string]*msrData); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(map[string]*msrData)
}
}
return r0
}
// openAndReadMsr provides a mock function with given fields: core
func (_m *mockMsrService) openAndReadMsr(core string) error {
ret := _m.Called(core)
var r0 error
if rf, ok := ret.Get(0).(func(string) error); ok {
r0 = rf(core)
} else {
r0 = ret.Error(0)
}
return r0
}
// retrieveCPUFrequencyForCore provides a mock function with given fields: core
func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) {
ret := _m.Called(core)
var r0 float64
if rf, ok := ret.Get(0).(func(string) float64); ok {
r0 = rf(core)
} else {
r0 = ret.Get(0).(float64)
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(core)
} else {
r1 = ret.Error(1)
}
return r0, r1
}

View File

@ -0,0 +1,134 @@
// +build linux
package intel_powerstat
import (
"context"
"errors"
"strings"
"testing"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/testutil"
)
func TestReadDataFromMsrPositive(t *testing.T) {
firstValue := uint64(1000000)
secondValue := uint64(5000000)
delta := secondValue - firstValue
cpuCores := []string{"cpu0", "cpu1"}
msr, fsMock := getMsrServiceWithMockedFs()
prepareTestData(fsMock, cpuCores, msr, t)
cores := trimCPUFromCores(cpuCores)
methodCallNumberForFirstValue := len(msr.msrOffsets) * len(cores)
methodCallNumberForSecondValue := methodCallNumberForFirstValue * 2
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
Return(firstValue, nil).Times(methodCallNumberForFirstValue)
for _, core := range cores {
require.NoError(t, msr.readDataFromMsr(core, nil))
}
fsMock.AssertNumberOfCalls(t, "readFileAtOffsetToUint64", methodCallNumberForFirstValue)
verifyCPUCoresData(cores, t, msr, firstValue, false, 0)
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
Return(secondValue, nil).Times(methodCallNumberForFirstValue)
for _, core := range cores {
require.NoError(t, msr.readDataFromMsr(core, nil))
}
fsMock.AssertNumberOfCalls(t, "readFileAtOffsetToUint64", methodCallNumberForSecondValue)
verifyCPUCoresData(cores, t, msr, secondValue, true, delta)
}
func trimCPUFromCores(cpuCores []string) []string {
cores := make([]string, 0)
for _, core := range cpuCores {
cores = append(cores, strings.TrimPrefix(core, "cpu"))
}
return cores
}
func TestReadDataFromMsrNegative(t *testing.T) {
firstValue := uint64(1000000)
cpuCores := []string{"cpu0", "cpu1"}
msr, fsMock := getMsrServiceWithMockedFs()
prepareTestData(fsMock, cpuCores, msr, t)
cores := trimCPUFromCores(cpuCores)
methodCallNumberPerCore := len(msr.msrOffsets)
// Normal execution for first core.
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
Return(firstValue, nil).Times(methodCallNumberPerCore).
// Fail to read file for second core.
On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
Return(uint64(0), errors.New("error reading file")).Times(methodCallNumberPerCore)
require.NoError(t, msr.readDataFromMsr(cores[0], nil))
require.Error(t, msr.readDataFromMsr(cores[1], nil))
}
func TestReadValueFromFileAtOffset(t *testing.T) {
cores := []string{"cpu0", "cpu1"}
msr, fsMock := getMsrServiceWithMockedFs()
ctx := context.Background()
testChannel := make(chan uint64, 1)
defer close(testChannel)
zero := uint64(0)
prepareTestData(fsMock, cores, msr, t)
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
Return(zero, errors.New("error reading file")).Once()
require.Error(t, msr.readValueFromFileAtOffset(ctx, testChannel, nil, 0))
fsMock.On("readFileAtOffsetToUint64", mock.Anything, mock.Anything).
Return(zero, nil).Once()
require.Equal(t, nil, msr.readValueFromFileAtOffset(ctx, testChannel, nil, 0))
require.Equal(t, zero, <-testChannel)
}
func prepareTestData(fsMock *mockFileService, cores []string, msr *msrServiceImpl, t *testing.T) {
// Prepare MSR offsets and CPUCoresData for test.
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(cores, nil).Once()
require.NoError(t, msr.setCPUCores())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
}
func verifyCPUCoresData(cores []string, t *testing.T, msr *msrServiceImpl, expectedValue uint64, verifyDelta bool, delta uint64) {
for _, core := range cores {
require.Equal(t, expectedValue, msr.cpuCoresData[core].c3)
require.Equal(t, expectedValue, msr.cpuCoresData[core].c6)
require.Equal(t, expectedValue, msr.cpuCoresData[core].c7)
require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf)
require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf)
require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter)
require.Equal(t, (expectedValue>>16)&0xFF, msr.cpuCoresData[core].throttleTemp)
require.Equal(t, (expectedValue>>16)&0x7F, msr.cpuCoresData[core].temp)
if verifyDelta {
require.Equal(t, delta, msr.cpuCoresData[core].c3Delta)
require.Equal(t, delta, msr.cpuCoresData[core].c6Delta)
require.Equal(t, delta, msr.cpuCoresData[core].c7Delta)
require.Equal(t, delta, msr.cpuCoresData[core].mperfDelta)
require.Equal(t, delta, msr.cpuCoresData[core].aperfDelta)
require.Equal(t, delta, msr.cpuCoresData[core].timeStampCounterDelta)
}
}
}
func getMsrServiceWithMockedFs() (*msrServiceImpl, *mockFileService) {
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
logger := testutil.Logger{Name: "PowerPluginTest"}
fsMock := &mockFileService{}
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(cores, nil).Once()
msr := newMsrServiceWithFs(logger, fsMock)
return msr, fsMock
}

View File

@ -0,0 +1,238 @@
// +build linux
package intel_powerstat
import (
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/influxdata/telegraf"
)
const (
intelRaplPath = "/sys/devices/virtual/powercap/intel-rapl"
intelRaplSocketPartialPath = "%s/intel-rapl:%s"
energyUjPartialPath = "%s/energy_uj"
maxEnergyRangeUjPartialPath = "%s/max_energy_range_uj"
maxPowerUwPartialPath = "%s/constraint_0_max_power_uw"
intelRaplDramPartialPath = "%s/intel-rapl:%s/%s"
intelRaplDramNamePartialPath = "%s/name"
)
// raplService is responsible for interactions with RAPL.
type raplService interface {
initializeRaplData()
getRaplData() map[string]*raplData
retrieveAndCalculateData(socketID string) error
getConstraintMaxPowerWatts(socketID string) (float64, error)
}
type raplServiceImpl struct {
log telegraf.Logger
data map[string]*raplData
dramFolders map[string]string
fs fileService
}
// initializeRaplData looks for RAPL folders and initializes data map with fetched information.
func (r *raplServiceImpl) initializeRaplData() {
r.prepareData()
r.findDramFolders()
}
func (r *raplServiceImpl) getRaplData() map[string]*raplData {
return r.data
}
func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath)
socketEnergyUjFile, err := os.Open(socketEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening socket energy_uj file on path %s, err: %v", socketEnergyUjPath, err)
}
defer socketEnergyUjFile.Close()
dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID])
dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath)
dramEnergyUjFile, err := os.Open(dramEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening dram energy_uj file on path %s, err: %v", dramEnergyUjPath, err)
}
defer dramEnergyUjFile.Close()
socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath)
socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening socket max_energy_range_uj file on path %s, err: %v", socketMaxEnergyUjPath, err)
}
defer socketMaxEnergyUjFile.Close()
dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath)
dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening dram max_energy_range_uj file on path %s, err: %v", dramMaxEnergyUjPath, err)
}
defer dramMaxEnergyUjFile.Close()
return r.calculateData(socketID, socketEnergyUjFile, dramEnergyUjFile, socketMaxEnergyUjFile, dramMaxEnergyUjFile)
}
func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) {
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath)
socketMaxPowerFile, err := os.Open(socketMaxPowerPath)
if err != nil {
return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %s, err: %v", socketMaxPowerPath, err)
}
defer socketMaxPowerFile.Close()
socketMaxPower, _, err := r.fs.readFileToFloat64(socketMaxPowerFile)
return convertMicroWattToWatt(socketMaxPower), err
}
func (r *raplServiceImpl) prepareData() {
intelRaplPrefix := "intel-rapl:"
intelRapl := fmt.Sprintf("%s%s", intelRaplPrefix, "[0-9]*")
raplPattern := fmt.Sprintf("%s/%s", intelRaplPath, intelRapl)
raplPaths, err := r.fs.getStringsMatchingPatternOnPath(raplPattern)
if err != nil {
r.log.Errorf("error while preparing RAPL data: %v", err)
r.data = make(map[string]*raplData)
return
}
if len(raplPaths) == 0 {
r.log.Debugf("RAPL data wasn't found using pattern: %s", raplPattern)
r.data = make(map[string]*raplData)
return
}
// If RAPL exists initialize data map (if it wasn't initialized before).
if len(r.data) == 0 {
for _, raplPath := range raplPaths {
socketID := strings.TrimPrefix(filepath.Base(raplPath), intelRaplPrefix)
r.data[socketID] = &raplData{
socketCurrentEnergy: 0,
dramCurrentEnergy: 0,
socketEnergy: 0,
dramEnergy: 0,
readDate: 0,
}
}
}
}
func (r *raplServiceImpl) findDramFolders() {
intelRaplPrefix := "intel-rapl:"
intelRaplDram := fmt.Sprintf("%s%s", intelRaplPrefix, "[0-9]*[0-9]*")
// Clean existing map
r.dramFolders = make(map[string]string)
for socketID := range r.data {
path := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
raplFoldersPattern := fmt.Sprintf("%s/%s", path, intelRaplDram)
pathsToRaplFolders, err := r.fs.getStringsMatchingPatternOnPath(raplFoldersPattern)
if err != nil {
r.log.Errorf("error during lookup for rapl dram: %v", err)
continue
}
if len(pathsToRaplFolders) == 0 {
r.log.Debugf("RAPL folders weren't found using pattern: %s", raplFoldersPattern)
continue
}
raplFolders := make([]string, 0)
for _, folderPath := range pathsToRaplFolders {
raplFolders = append(raplFolders, filepath.Base(folderPath))
}
r.findDramFolder(raplFolders, socketID)
}
}
func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) {
for _, raplFolder := range raplFolders {
potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder)
nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath)
read, err := r.fs.readFile(nameFilePath)
if err != nil {
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
continue
}
// Remove new line character
trimmedString := strings.TrimRight(string(read), "\n")
if trimmedString == "dram" {
// There should be only one DRAM folder per socket
r.dramFolders[socketID] = raplFolder
return
}
}
}
func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.Reader, dramEnergyUjFile io.Reader,
socketMaxEnergyUjFile io.Reader, dramMaxEnergyUjFile io.Reader) error {
newSocketEnergy, _, err := r.readEnergyInJoules(socketEnergyUjFile)
if err != nil {
return err
}
newDramEnergy, readDate, err := r.readEnergyInJoules(dramEnergyUjFile)
if err != nil {
return err
}
interval := convertNanoSecondsToSeconds(readDate - r.data[socketID].readDate)
r.data[socketID].readDate = readDate
if interval == 0 {
return fmt.Errorf("interval between last two Telegraf cycles is 0")
}
if newSocketEnergy > r.data[socketID].socketEnergy {
r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval
} else {
socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile)
if err != nil {
return err
}
// When socket energy_uj counter reaches maximum value defined in max_energy_range_uj file it
// starts counting from 0.
r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval
}
if newDramEnergy > r.data[socketID].dramEnergy {
r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval
} else {
dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile)
if err != nil {
return err
}
// When dram energy_uj counter reaches maximum value defined in max_energy_range_uj file it
// starts counting from 0.
r.data[socketID].dramCurrentEnergy = (dramMaxEnergy - r.data[socketID].dramEnergy + newDramEnergy) / interval
}
r.data[socketID].socketEnergy = newSocketEnergy
r.data[socketID].dramEnergy = newDramEnergy
return nil
}
func (r *raplServiceImpl) readEnergyInJoules(reader io.Reader) (float64, int64, error) {
currentEnergy, readDate, err := r.fs.readFileToFloat64(reader)
return convertMicroJoulesToJoules(currentEnergy), readDate, err
}
func newRaplServiceWithFs(logger telegraf.Logger, fs fileService) *raplServiceImpl {
return &raplServiceImpl{
log: logger,
data: make(map[string]*raplData),
dramFolders: make(map[string]string),
fs: fs,
}
}

View File

@ -0,0 +1,66 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package intel_powerstat
import mock "github.com/stretchr/testify/mock"
// mockRaplService is an autogenerated mock type for the raplService type
type mockRaplService struct {
mock.Mock
}
// getConstraintMaxPowerWatts provides a mock function with given fields: socketID
func (_m *mockRaplService) getConstraintMaxPowerWatts(socketID string) (float64, error) {
ret := _m.Called(socketID)
var r0 float64
if rf, ok := ret.Get(0).(func(string) float64); ok {
r0 = rf(socketID)
} else {
r0 = ret.Get(0).(float64)
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(socketID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// getRaplData provides a mock function with given fields:
func (_m *mockRaplService) getRaplData() map[string]*raplData {
ret := _m.Called()
var r0 map[string]*raplData
if rf, ok := ret.Get(0).(func() map[string]*raplData); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(map[string]*raplData)
}
}
return r0
}
// initializeRaplData provides a mock function with given fields:
func (_m *mockRaplService) initializeRaplData() {
_m.Called()
}
// retrieveAndCalculateData provides a mock function with given fields: socketID
func (_m *mockRaplService) retrieveAndCalculateData(socketID string) error {
ret := _m.Called(socketID)
var r0 error
if rf, ok := ret.Get(0).(func(string) error); ok {
r0 = rf(socketID)
} else {
r0 = ret.Error(0)
}
return r0
}

View File

@ -0,0 +1,115 @@
// +build linux
package intel_powerstat
import (
"errors"
"fmt"
"strings"
"testing"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/influxdata/telegraf/testutil"
)
func TestPrepareData(t *testing.T) {
sockets := []string{"intel-rapl:0", "intel-rapl:1"}
rapl, fsMock := getRaplWithMockedFs()
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).Return(sockets, nil).Twice()
rapl.prepareData()
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, len(sockets), len(rapl.getRaplData()))
// Verify no data is wiped in the next calls
socketEnergy := 74563813417.0
socketID := "0"
rapl.data[socketID].socketEnergy = socketEnergy
rapl.prepareData()
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, len(sockets), len(rapl.getRaplData()))
require.Equal(t, socketEnergy, rapl.data[socketID].socketEnergy)
// Verify data is wiped once there is no RAPL folders
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(nil, errors.New("missing RAPL")).Once()
rapl.prepareData()
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, 0, len(rapl.getRaplData()))
}
func TestFindDramFolders(t *testing.T) {
sockets := []string{"0", "1"}
raplFolders := []string{"intel-rapl:0:1", "intel-rapl:0:2", "intel-rapl:0:3"}
rapl, fsMock := getRaplWithMockedFs()
for _, socketID := range sockets {
rapl.data[socketID] = &raplData{}
}
firstPath := fmt.Sprintf(intelRaplDramNamePartialPath,
fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, "0", raplFolders[2]))
secondPath := fmt.Sprintf(intelRaplDramNamePartialPath,
fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, "1", raplFolders[1]))
fsMock.
On("getStringsMatchingPatternOnPath", mock.Anything).Return(raplFolders, nil).Twice().
On("readFile", firstPath).Return([]byte("dram"), nil).Once().
On("readFile", secondPath).Return([]byte("dram"), nil).Once().
On("readFile", mock.Anything).Return([]byte("random"), nil)
rapl.findDramFolders()
require.Equal(t, len(sockets), len(rapl.dramFolders))
require.Equal(t, raplFolders[2], rapl.dramFolders["0"])
require.Equal(t, raplFolders[1], rapl.dramFolders["1"])
fsMock.AssertNumberOfCalls(t, "readFile", 5)
}
func TestCalculateDataOverflowCases(t *testing.T) {
socketID := "1"
rapl, fsMock := getRaplWithMockedFs()
rapl.data[socketID] = &raplData{}
rapl.data[socketID].socketEnergy = convertMicroJoulesToJoules(23424123.1)
rapl.data[socketID].dramEnergy = convertMicroJoulesToJoules(345611233.2)
rapl.data[socketID].readDate = 54123
interval := int64(54343)
convertedInterval := convertNanoSecondsToSeconds(interval - rapl.data[socketID].readDate)
newEnergy := 3343443.4
maxEnergy := 234324546456.6
convertedNewEnergy := convertMicroJoulesToJoules(newEnergy)
convertedMaxNewEnergy := convertMicroJoulesToJoules(maxEnergy)
maxDramEnergy := 981230834098.3
newDramEnergy := 4533311.1
convertedMaxDramEnergy := convertMicroJoulesToJoules(maxDramEnergy)
convertedDramEnergy := convertMicroJoulesToJoules(newDramEnergy)
expectedCurrentEnergy := (convertedMaxNewEnergy - rapl.data[socketID].socketEnergy + convertedNewEnergy) / convertedInterval
expectedDramCurrentEnergy := (convertedMaxDramEnergy - rapl.data[socketID].dramEnergy + convertedDramEnergy) / convertedInterval
fsMock.
On("readFileToFloat64", mock.Anything).Return(newEnergy, int64(12321), nil).Once().
On("readFileToFloat64", mock.Anything).Return(newDramEnergy, interval, nil).Once().
On("readFileToFloat64", mock.Anything).Return(maxEnergy, int64(64534), nil).Once().
On("readFileToFloat64", mock.Anything).Return(maxDramEnergy, int64(98342), nil).Once()
require.NoError(t, rapl.calculateData(socketID, strings.NewReader(mock.Anything), strings.NewReader(mock.Anything),
strings.NewReader(mock.Anything), strings.NewReader(mock.Anything)))
require.Equal(t, expectedCurrentEnergy, rapl.data[socketID].socketCurrentEnergy)
require.Equal(t, expectedDramCurrentEnergy, rapl.data[socketID].dramCurrentEnergy)
}
func getRaplWithMockedFs() (*raplServiceImpl, *mockFileService) {
logger := testutil.Logger{Name: "PowerPluginTest"}
fsMock := &mockFileService{}
rapl := newRaplServiceWithFs(logger, fsMock)
return rapl, fsMock
}

View File

@ -0,0 +1,49 @@
// +build linux
package intel_powerstat
import (
"math"
"strconv"
)
const (
microJouleToJoule = 1.0 / 1000000
microWattToWatt = 1.0 / 1000000
kiloHertzToMegaHertz = 1.0 / 1000
nanoSecondsToSeconds = 1.0 / 1000000000
cyclesToHertz = 1.0 / 1000000
)
func convertMicroJoulesToJoules(mJ float64) float64 {
return mJ * microJouleToJoule
}
func convertMicroWattToWatt(mW float64) float64 {
return mW * microWattToWatt
}
func convertKiloHertzToMegaHertz(kHz float64) float64 {
return kHz * kiloHertzToMegaHertz
}
func convertNanoSecondsToSeconds(ns int64) float64 {
return float64(ns) * nanoSecondsToSeconds
}
func convertProcessorCyclesToHertz(pc uint64) float64 {
return float64(pc) * cyclesToHertz
}
func roundFloatToNearestTwoDecimalPlaces(n float64) float64 {
return math.Round(n*100) / 100
}
func convertIntegerArrayToStringArray(array []int64) []string {
stringArray := make([]string, 0)
for _, value := range array {
stringArray = append(stringArray, strconv.FormatInt(value, 10))
}
return stringArray
}