feat(intel_powerstat): add Max Turbo Frequency and introduce improvements (#11035)

This commit is contained in:
bkotlowski 2022-05-23 21:02:32 +02:00 committed by GitHub
parent 4f972daa2b
commit df3e9ec2a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 658 additions and 182 deletions

View File

@ -11,18 +11,25 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur
```toml
# Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and per-CPU metrics like temperature, power and utilization.
[[inputs.intel_powerstat]]
## All global metrics are always collected by Intel PowerStat plugin.
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level
## telemetry will be exposed by Intel PowerStat plugin.
## The user can choose which package metrics are monitored by the plugin with the package_metrics setting:
## - The default, will collect "current_power_consumption", "current_dram_power_consumption" and "thermal_design_power"
## - Setting this value to an empty array means no package metrics will be collected
## - Finally, a user can specify individual metrics to capture from the supported options list
## Supported options:
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"
## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
## Empty or missing array means no per-CPU specific metrics will be collected by the plugin.
## Supported options:
## "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", "cpu_busy_frequency"
## ATTENTION: cpu_busy_cycles option is DEPRECATED - superseded by cpu_c0_state_residency
# cpu_metrics = []
```
## Example: Configuration with no per-CPU telemetry
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
```toml
[[inputs.intel_powerstat]]
@ -31,28 +38,39 @@ This configuration allows getting global metrics (processor package specific), n
## Example: Configuration with no per-CPU telemetry - equivalent case
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
```toml
[[inputs.intel_powerstat]]
```
## Example: Configuration for CPU Temperature and Frequency only
## Example: Configuration for CPU Temperature and CPU Frequency
This configuration allows getting global metrics plus subset of per-CPU metrics (CPU Temperature and Current Frequency):
This configuration allows getting default processor package specific metrics, plus subset of per-CPU metrics (CPU Temperature and CPU Frequency):
```toml
[[inputs.intel_powerstat]]
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
```
## Example: Configuration with all available metrics
## Example: Configuration for CPU Temperature and CPU Frequency without default package metrics
This configuration allows getting global metrics and all per-CPU metrics:
This configuration allows getting only a subset of per-CPU metrics (CPU Temperature and CPU Frequency):
```toml
[[inputs.intel_powerstat]]
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"]
package_metrics = []
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
```
## Example: Configuration with all available metrics
This configuration allows getting all processor package specific metrics and all per-CPU metrics:
```toml
[[inputs.intel_powerstat]]
package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"]
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency"]
```
## SW Dependencies
@ -66,11 +84,17 @@ The following dependencies are expected by plugin:
Minimum kernel version required is 3.13 to satisfy all requirements.
Please make sure that kernel modules are loaded and running. You might have to manually enable them by using `modprobe`.
Exact commands to be executed are:
Please make sure that kernel modules are loaded and running (cpufreq is integrated in kernel). Modules might have to be manually enabled by using `modprobe`.
Depending on the kernel version, run commands:
```sh
sudo modprobe cpufreq-stats
# kernel 5.x.x:
sudo modprobe rapl
subo modprobe msr
sudo modprobe intel_rapl_common
sudo modprobe intel_rapl_msr
# kernel 4.x.x:
sudo modprobe msr
sudo modprobe intel_rapl
```
@ -80,9 +104,13 @@ to retrieve data for calculation of most critical per-CPU specific metrics:
- `cpu_busy_frequency_mhz`
- `cpu_temperature_celsius`
- `cpu_c0_state_residency_percent`
- `cpu_c1_state_residency_percent`
- `cpu_c6_state_residency_percent`
- `cpu_busy_cycles_percent`
and to retrieve data for calculation per-package specific metric:
- `max_turbo_frequency_mhz`
To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration).
@ -99,13 +127,13 @@ The following processor properties are required by the plugin:
model specific registers for all features
- The following processor flags shall be present:
- "_msr_" shall be present for plugin to read platform data from processor model specific registers and collect
the following metrics: _powerstat_core.cpu_temperature_, _powerstat_core.cpu_busy_frequency_,
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_, _powerstat_core._cpu_c6_state_residency_
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat_core.cpu_busy_frequency_,
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_
- "_dts_" shall be present to collect _powerstat_core.cpu_temperature_
- Processor _Model number_ must be one of the following values for plugin to read _powerstat_core.cpu_c1_state_residency_
and _powerstat_core.cpu_c6_state_residency_ metrics:
the following metrics: _powerstat\_core.cpu\_temperature_, _powerstat\_core.cpu\_busy\_frequency_,
_powerstat\_core.cpu\_c0\_state\_residency_, _powerstat\_core.cpu\_c1\_state\_residency_, _powerstat\_core.cpu\_c6\_state\_residency_
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat\_core.cpu\_busy\_frequency_,
_powerstat\_core.cpu\_c0\_state\_residency_, _powerstat\_core.cpu\_c1\_state\_residency_
- "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_
- Processor _Model number_ must be one of the following values for plugin to read _powerstat\_core.cpu\_c1\_state\_residency_
and _powerstat\_core.cpu\_c6\_state\_residency_ metrics:
| Model number | Processor name |
|-----|-------------|
@ -168,50 +196,59 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
- The following Tags are returned by plugin with powerstat_core measurements:
```text
| Tag | Description |
|-----|-------------|
| `package_id` | ID of platform package/socket |
| `core_id` | ID of physical processor core |
| `cpu_id` | ID of logical processor core |
| Tag | Description |
|--------------|-------------------------------|
| `package_id` | ID of platform package/socket |
| `core_id` | ID of physical processor core |
| `cpu_id` | ID of logical processor core |
Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key)
while core_id and package_id tags are additional topology information.
```
- Available metrics for powerstat_core measurement
```text
| Metric name (field) | Description | Units |
|-----|-------------|-----|
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
| `cpu_busy_cycles_percent` | CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
```
| Metric name (field) | Description | Units |
|---------------------|-------------|-------|
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
| `cpu_c0_state_residency_percent` | Percentage of time that CPU Core spent in C0 Core residency state | % |
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
| `cpu_busy_cycles_percent` | (**DEPRECATED** - superseded by cpu_c0_state_residency_percent) CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
- powerstat_package
- The following Tags are returned by plugin with powerstat_package measurements:
```text
| Tag | Description |
|-----|-------------|
| `package_id` | ID of platform package/socket |
Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which
package metric refers to.
```
| Tag | Description |
|-----|-------------|
| `package_id` | ID of platform package/socket |
| `active_cores`| Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency
Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which package metric refers to.
- Available metrics for powerstat_package measurement
```text
| Metric name (field) | Description | Units |
|-----|-------------|-----|
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
```
| Metric name (field) | Description | Units |
|-----|-------------|-----|
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
| `max_turbo_frequency_mhz`| Maximum reachable turbo frequency for number of cores active | MHz
### Known issues
From linux kernel version v5.4.77 with [this kernel change](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94)
resources like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for security reasons, so this plugin needs root privileges to work properly.
If such strict security restrictions are not relevant, reading permissions to files in `/sys/devices/virtual/powercap/intel-rapl/`
directory can be manually changed for example with `chmod` command with custom parameters.
For example to give all users permission to all files in `intel-rapl` directory:
```bash
sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/
```
### Example Output
@ -219,10 +256,12 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_cycles_percent=0.8 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_state_residency_percent=0.8 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000
```

View File

@ -7,8 +7,8 @@ type msrData struct {
c3 uint64
c6 uint64
c7 uint64
throttleTemp uint64
temp uint64
throttleTemp int64
temp int64
mperfDelta uint64
aperfDelta uint64
timeStampCounterDelta uint64

View File

@ -152,3 +152,22 @@ func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset i
func newFileService() *fileServiceImpl {
return &fileServiceImpl{}
}
func checkFile(path string) error {
if path == "" {
return fmt.Errorf("empty path given")
}
lInfo, err := os.Lstat(path)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("file `%s` doesn't exist", path)
}
return fmt.Errorf("cannot obtain file info of `%s`: %v", path, err)
}
mode := lInfo.Mode()
if mode&os.ModeSymlink != 0 {
return fmt.Errorf("file `%s` is a symlink", path)
}
return nil
}

View File

@ -1,4 +1,4 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
// Code generated by mockery v2.10.0. DO NOT EDIT.
package intel_powerstat
@ -8,7 +8,7 @@ import (
mock "github.com/stretchr/testify/mock"
)
// mockFileService is an autogenerated mock type for the fileService type
// mockFileService is an autogenerated mock type for the mockFileService type
type mockFileService struct {
mock.Mock
}

View File

@ -6,6 +6,7 @@ package intel_powerstat
import (
"fmt"
"math/big"
"strconv"
"strings"
"sync"
"time"
@ -15,60 +16,81 @@ import (
)
const (
cpuFrequency = "cpu_frequency"
cpuBusyFrequency = "cpu_busy_frequency"
cpuTemperature = "cpu_temperature"
cpuC1StateResidency = "cpu_c1_state_residency"
cpuC6StateResidency = "cpu_c6_state_residency"
cpuBusyCycles = "cpu_busy_cycles"
percentageMultiplier = 100
cpuFrequency = "cpu_frequency"
cpuBusyFrequency = "cpu_busy_frequency"
cpuTemperature = "cpu_temperature"
cpuC0StateResidency = "cpu_c0_state_residency"
cpuC1StateResidency = "cpu_c1_state_residency"
cpuC6StateResidency = "cpu_c6_state_residency"
cpuBusyCycles = "cpu_busy_cycles"
packageCurrentPowerConsumption = "current_power_consumption"
packageCurrentDramPowerConsumption = "current_dram_power_consumption"
packageThermalDesignPower = "thermal_design_power"
packageTurboLimit = "max_turbo_frequency"
percentageMultiplier = 100
)
// PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.
type PowerStat struct {
CPUMetrics []string `toml:"cpu_metrics"`
Log telegraf.Logger `toml:"-"`
CPUMetrics []string `toml:"cpu_metrics"`
PackageMetrics []string `toml:"package_metrics"`
Log telegraf.Logger `toml:"-"`
fs fileService
rapl raplService
msr msrService
cpuFrequency bool
cpuBusyFrequency bool
cpuTemperature bool
cpuC1StateResidency bool
cpuC6StateResidency bool
cpuBusyCycles bool
cpuInfo map[string]*cpuInfo
skipFirstIteration bool
cpuFrequency bool
cpuBusyFrequency bool
cpuTemperature bool
cpuC0StateResidency bool
cpuC1StateResidency bool
cpuC6StateResidency bool
cpuBusyCycles bool
packageTurboLimit bool
packageCurrentPowerConsumption bool
packageCurrentDramPowerConsumption bool
packageThermalDesignPower bool
cpuInfo map[string]*cpuInfo
skipFirstIteration bool
logOnce map[string]error
}
// Init performs one time setup of the plugin.
// Init performs one time setup of the plugin
func (p *PowerStat) Init() error {
p.parsePackageMetricsConfig()
p.parseCPUMetricsConfig()
err := p.verifyProcessor()
if err != nil {
return err
}
// Initialize MSR service only when there is at least one core metric enabled.
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles {
// Initialize MSR service only when there is at least one metric enabled
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit {
p.msr = newMsrServiceWithFs(p.Log, p.fs)
}
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit {
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
}
if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() {
return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather")
}
return nil
}
// Gather takes in an accumulator and adds the metrics that the Input gathers.
// Gather takes in an accumulator and adds the metrics that the Input gathers
func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
p.addGlobalMetrics(acc)
if p.areGlobalMetricsEnabled() {
p.addGlobalMetrics(acc)
}
if p.areCoreMetricsEnabled() {
p.addPerCoreMetrics(acc)
}
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations.
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations
p.skipFirstIteration = false
return nil
@ -79,18 +101,36 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
p.rapl.initializeRaplData()
for socketID := range p.rapl.getRaplData() {
if p.packageTurboLimit {
p.addTurboRatioLimit(socketID, acc)
}
err := p.rapl.retrieveAndCalculateData(socketID)
if err != nil {
// In case of an error skip calculating metrics for this socket
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
if val := p.logOnce[socketID]; val == nil || val.Error() != err.Error() {
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
// Remember that specific error occurs for socketID to omit logging next time
p.logOnce[socketID] = err
}
continue
}
p.addThermalDesignPowerMetric(socketID, acc)
// If error stops occurring, clear logOnce indicator
p.logOnce[socketID] = nil
if p.packageThermalDesignPower {
p.addThermalDesignPowerMetric(socketID, acc)
}
if p.skipFirstIteration {
continue
}
p.addCurrentSocketPowerConsumption(socketID, acc)
p.addCurrentDramPowerConsumption(socketID, acc)
if p.packageCurrentPowerConsumption {
p.addCurrentSocketPowerConsumption(socketID, acc)
}
if p.packageCurrentDramPowerConsumption {
p.addCurrentDramPowerConsumption(socketID, acc)
}
}
}
@ -155,11 +195,10 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
}
// Read data from MSR only if required
if p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature ||
p.cpuBusyFrequency {
if p.cpuC0StateResidency || p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature || p.cpuBusyFrequency {
err := p.msr.openAndReadMsr(cpuID)
if err != nil {
// In case of an error exit the function. All metrics past this point are dependant on MSR.
// In case of an error exit the function. All metrics past this point are dependent on MSR
p.Log.Debugf("error while reading msr: %v", err)
return
}
@ -169,12 +208,16 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
p.addCPUTemperatureMetric(cpuID, acc)
}
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle.
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle
if p.cpuBusyFrequency {
p.addCPUBusyFrequencyMetric(cpuID, acc)
}
if !p.skipFirstIteration {
if p.cpuC0StateResidency || p.cpuBusyCycles {
p.addCPUC0StateResidencyMetric(cpuID, acc)
}
if p.cpuC1StateResidency {
p.addCPUC1StateResidencyMetric(cpuID, acc)
}
@ -182,10 +225,6 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
if p.cpuC6StateResidency {
p.addCPUC6StateResidencyMetric(cpuID, acc)
}
if p.cpuBusyCycles {
p.addCPUBusyCyclesMetric(cpuID, acc)
}
}
}
@ -229,6 +268,153 @@ func (p *PowerStat) addCPUTemperatureMetric(cpuID string, acc telegraf.Accumulat
acc.AddGauge("powerstat_core", fields, tags)
}
func calculateTurboRatioGroup(coreCounts uint64, msr uint64, group map[int]uint64) {
from := coreCounts & 0xFF // value of number of active cores of bucket 1 is written in the first 8 bits. The next buckets values are saved on the following 8-bit sides
for i := 0; i < 8; i++ {
to := (coreCounts >> (i * 8)) & 0xFF
if to == 0 {
break
}
value := (msr >> (i * 8)) & 0xFF
// value of freq ratio is stored in 8-bit blocks, and their real value is obtained after multiplication by 100
if value != 0 && to != 0 {
for ; from <= to; from++ {
group[int(from)] = value * 100
}
}
from = to + 1
}
}
func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator) {
var err error
turboRatioLimitGroups := make(map[int]uint64)
var cpuID = ""
var model = ""
for _, v := range p.cpuInfo {
if v.physicalID == socketID {
cpuID = v.cpuID
model = v.model
}
}
if cpuID == "" || model == "" {
p.Log.Debugf("error while reading socket ID")
return
}
// dump_hsw_turbo_ratio_limit
if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X
coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18
msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT2")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT2: %v", err)
return
}
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit2, turboRatioLimitGroups)
}
// dump_ivt_turbo_ratio_limit
if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X
(model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X
coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16
msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
return
}
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups)
}
if (model != strconv.FormatInt(0x37, 10)) && // INTEL_FAM6_ATOM_SILVERMONT
(model != strconv.FormatInt(0x4A, 10)) && // INTEL_FAM6_ATOM_SILVERMONT_MID:
(model != strconv.FormatInt(0x5A, 10)) && // INTEL_FAM6_ATOM_AIRMONT_MID:
(model != strconv.FormatInt(0x2E, 10)) && // INTEL_FAM6_NEHALEM_EX
(model != strconv.FormatInt(0x2F, 10)) && // INTEL_FAM6_WESTMERE_EX
(model != strconv.FormatInt(0x57, 10)) && // INTEL_FAM6_XEON_PHI_KNL
(model != strconv.FormatInt(0x85, 10)) { // INTEL_FAM6_XEON_PHI_KNM
coreCounts := uint64(0x0807060504030201) // default value (counting the number of active cores 1 to 8). May be changed in "if" segment below
if (model == strconv.FormatInt(0x5C, 10)) || // INTEL_FAM6_ATOM_GOLDMONT
(model == strconv.FormatInt(0x55, 10)) || // INTEL_FAM6_SKYLAKE_X
(model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X
(model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D
(model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D
coreCounts, err = p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
return
}
}
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
return
}
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups)
}
// dump_atom_turbo_ratio_limits
if model == strconv.FormatInt(0x37, 10) || // INTEL_FAM6_ATOM_SILVERMONT
model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID:
model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID
coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_ATOM_CORE_TURBO_RATIOS")
if err != nil {
p.Log.Debugf("error while reading MSR_ATOM_CORE_TURBO_RATIOS: %v", err)
return
}
value := uint64(0)
newValue := uint64(0)
for i := 0; i < 4; i++ { // value "4" is specific for this group of processors
newValue = (msrTurboRatioLimit >> (8 * (i))) & 0x3F // value of freq ratio is stored in 6-bit blocks, saved every 8 bits
value = value + (newValue << ((i - 1) * 8)) // now value of freq ratio is stored in 8-bit blocks, saved every 8 bits
}
calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups)
}
// dump_knl_turbo_ratio_limits
if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
return
}
// value of freq ratio of bucket 1 is saved in bits 15 to 8.
// each next value is calculated as the previous value - delta. Delta is stored in 3-bit blocks every 8 bits (start at 21 (2*8+5))
value := (msrTurboRatioLimit >> 8) & 0xFF
newValue := value
for i := 2; i < 8; i++ {
newValue = newValue - (msrTurboRatioLimit>>(8*i+5))&0x7
value = value + (newValue << ((i - 1) * 8))
}
// value of number of active cores of bucket 1 is saved in bits 1 to 7.
// each next value is calculated as the previous value + delta. Delta is stored in 5-bit blocks every 8 bits (start at 16 (2*8))
coreCounts := (msrTurboRatioLimit & 0xFF) >> 1
newBucket := coreCounts
for i := 2; i < 8; i++ {
newBucket = newBucket + (msrTurboRatioLimit>>(8*i))&0x1F
coreCounts = coreCounts + (newBucket << ((i - 1) * 8))
}
calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups)
}
for key, val := range turboRatioLimitGroups {
tags := map[string]string{
"package_id": socketID,
"active_cores": strconv.Itoa(key),
}
fields := map[string]interface{}{
"max_turbo_frequency_mhz": val,
}
acc.AddGauge("powerstat_package", fields, tags)
}
}
func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
mperfDelta := coresData[cpuID].mperfDelta
@ -331,7 +517,7 @@ func (p *PowerStat) addCPUC6StateResidencyMetric(cpuID string, acc telegraf.Accu
acc.AddGauge("powerstat_core", fields, tags)
}
func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulator) {
func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData()
// Avoid division by 0
if coresData[cpuID].timeStampCounterDelta == 0 {
@ -339,7 +525,7 @@ func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulato
timestampCounterLocation, cpuID)
return
}
busyCyclesValue := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
c0Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta))
cpu := p.cpuInfo[cpuID]
tags := map[string]string{
@ -347,11 +533,42 @@ func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulato
"core_id": cpu.coreID,
"cpu_id": cpu.cpuID,
}
fields := map[string]interface{}{
"cpu_busy_cycles_percent": busyCyclesValue,
if p.cpuC0StateResidency {
fields := map[string]interface{}{
"cpu_c0_state_residency_percent": c0Value,
}
acc.AddGauge("powerstat_core", fields, tags)
}
if p.cpuBusyCycles {
deprecatedFields := map[string]interface{}{
"cpu_busy_cycles_percent": c0Value,
}
acc.AddGauge("powerstat_core", deprecatedFields, tags)
}
}
func (p *PowerStat) parsePackageMetricsConfig() {
if p.PackageMetrics == nil {
// if Package Metric config is empty, use the default settings.
p.packageCurrentPowerConsumption = true
p.packageCurrentDramPowerConsumption = true
p.packageThermalDesignPower = true
return
}
acc.AddGauge("powerstat_core", fields, tags)
if contains(p.PackageMetrics, packageTurboLimit) {
p.packageTurboLimit = true
}
if contains(p.PackageMetrics, packageCurrentPowerConsumption) {
p.packageCurrentPowerConsumption = true
}
if contains(p.PackageMetrics, packageCurrentDramPowerConsumption) {
p.packageCurrentDramPowerConsumption = true
}
if contains(p.PackageMetrics, packageThermalDesignPower) {
p.packageThermalDesignPower = true
}
}
func (p *PowerStat) parseCPUMetricsConfig() {
@ -363,6 +580,10 @@ func (p *PowerStat) parseCPUMetricsConfig() {
p.cpuFrequency = true
}
if contains(p.CPUMetrics, cpuC0StateResidency) {
p.cpuC0StateResidency = true
}
if contains(p.CPUMetrics, cpuC1StateResidency) {
p.cpuC1StateResidency = true
}
@ -396,7 +617,7 @@ func (p *PowerStat) verifyProcessor() error {
p.cpuInfo = stats
// First CPU is sufficient for verification.
// First CPU is sufficient for verification
firstCPU := p.cpuInfo["0"]
if firstCPU == nil {
return fmt.Errorf("first core not found while parsing /proc/cpuinfo")
@ -414,14 +635,16 @@ func (p *PowerStat) verifyProcessor() error {
if !strings.Contains(firstCPU.flags, "msr") {
p.cpuTemperature = false
p.cpuC6StateResidency = false
p.cpuC0StateResidency = false
p.cpuBusyCycles = false
p.cpuBusyFrequency = false
p.cpuC1StateResidency = false
}
if !strings.Contains(firstCPU.flags, "aperfmperf") {
p.cpuBusyFrequency = false
p.cpuBusyCycles = false
p.cpuBusyFrequency = false
p.cpuC0StateResidency = false
p.cpuC1StateResidency = false
}
@ -438,7 +661,6 @@ func contains(slice []string, str string) bool {
return true
}
}
return false
}
@ -446,17 +668,27 @@ func (p *PowerStat) areCoreMetricsEnabled() bool {
return p.msr != nil && len(p.msr.getCPUCoresData()) > 0
}
// newPowerStat creates and returns PowerStat struct.
func (p *PowerStat) areGlobalMetricsEnabled() bool {
return p.rapl != nil
}
// newPowerStat creates and returns PowerStat struct
func newPowerStat(fs fileService) *PowerStat {
p := &PowerStat{
cpuFrequency: false,
cpuC1StateResidency: false,
cpuC6StateResidency: false,
cpuBusyCycles: false,
cpuTemperature: false,
cpuBusyFrequency: false,
skipFirstIteration: true,
fs: fs,
cpuFrequency: false,
cpuC0StateResidency: false,
cpuC1StateResidency: false,
cpuC6StateResidency: false,
cpuBusyCycles: false,
cpuTemperature: false,
cpuBusyFrequency: false,
packageTurboLimit: false,
packageCurrentPowerConsumption: false,
packageCurrentDramPowerConsumption: false,
packageThermalDesignPower: false,
skipFirstIteration: true,
fs: fs,
logOnce: make(map[string]error),
}
return p

View File

@ -15,26 +15,32 @@ import (
"github.com/influxdata/telegraf/testutil"
)
type MockServices struct {
fs *mockFileService
msr *mockMsrService
rapl *mockRaplService
}
func TestInitPlugin(t *testing.T) {
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
power, fsMock, _, _ := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
fsMock.On("getCPUInfoStats", mock.Anything).
mockServices.fs.On("getCPUInfoStats", mock.Anything).
Return(nil, errors.New("error getting cpu stats")).Once()
require.Error(t, power.Init())
fsMock.On("getCPUInfoStats", mock.Anything).
mockServices.fs.On("getCPUInfoStats", mock.Anything).
Return(make(map[string]*cpuInfo), nil).Once()
require.Error(t, power.Init())
fsMock.On("getCPUInfoStats", mock.Anything).
mockServices.fs.On("getCPUInfoStats", mock.Anything).
Return(map[string]*cpuInfo{"0": {
vendorID: "GenuineIntel",
cpuFamily: "test",
}}, nil).Once()
require.Error(t, power.Init())
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(cores, nil).Once().
On("getCPUInfoStats", mock.Anything).
Return(map[string]*cpuInfo{"0": {
@ -44,24 +50,24 @@ func TestInitPlugin(t *testing.T) {
// Verify MSR service initialization.
power.cpuFrequency = true
require.NoError(t, power.Init())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, len(cores), len(power.msr.getCPUCoresData()))
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once()
// In case of an error when fetching cpu cores plugin should proceed with execution.
require.NoError(t, power.Init())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, 0, len(power.msr.getCPUCoresData()))
}
func TestParseCPUMetricsConfig(t *testing.T) {
power, _, _, _ := getPowerWithMockedServices()
power, _ := getPowerWithMockedServices()
disableCoreMetrics(power)
power.CPUMetrics = []string{
"cpu_frequency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
"cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
"cpu_busy_frequency",
}
power.parseCPUMetricsConfig()
@ -88,6 +94,7 @@ func verifyCoreMetrics(t *testing.T, power *PowerStat, enabled bool) {
require.Equal(t, enabled, power.cpuFrequency)
require.Equal(t, enabled, power.cpuC1StateResidency)
require.Equal(t, enabled, power.cpuC6StateResidency)
require.Equal(t, enabled, power.cpuC0StateResidency)
require.Equal(t, enabled, power.cpuBusyCycles)
require.Equal(t, enabled, power.cpuBusyFrequency)
require.Equal(t, enabled, power.cpuTemperature)
@ -102,23 +109,23 @@ func TestGather(t *testing.T) {
preparedCPUData := getPreparedCPUData(coreIDs)
raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy)
power, _, raplMock, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
prepareCPUInfo(power, coreIDs, packageIDs)
enableCoreMetrics(power)
power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).
mockServices.rapl.On("initializeRaplData", mock.Anything).
On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil)
msrMock.On("getCPUCoresData").Return(preparedCPUData).
mockServices.msr.On("getCPUCoresData").Return(preparedCPUData).
On("openAndReadMsr", mock.Anything).Return(nil).
On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil)
require.NoError(t, power.Gather(&acc))
// Number of global metrics : 3
// Number of per core metrics : 6
require.Equal(t, 3*len(packageIDs)+6*len(coreIDs), len(acc.GetTelegrafMetrics()))
// Number of per core metrics : 7
require.Equal(t, 3*len(packageIDs)+7*len(coreIDs), len(acc.GetTelegrafMetrics()))
}
func TestAddGlobalMetricsNegative(t *testing.T) {
@ -126,24 +133,24 @@ func TestAddGlobalMetricsNegative(t *testing.T) {
socketCurrentEnergy := 13213852.2
dramCurrentEnergy := 784552.0
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
power, _, raplMock, _ := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).Once().
mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(raplDataMap).Once().
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap))
power.addGlobalMetrics(&acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
raplMock.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
mockServices.rapl.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
raplMock.On("initializeRaplData", mock.Anything).Once().
mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(make(map[string]*raplData)).Once()
power.addGlobalMetrics(&acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
raplMock.AssertNotCalled(t, "retrieveAndCalculateData")
mockServices.rapl.AssertNotCalled(t, "retrieveAndCalculateData")
raplMock.On("initializeRaplData", mock.Anything).Once().
mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Once().
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once().
@ -159,10 +166,10 @@ func TestAddGlobalMetricsPositive(t *testing.T) {
dramCurrentEnergy := 124234872.5
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
maxPower := 546783852.9
power, _, raplMock, _ := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).
mockServices.rapl.On("initializeRaplData", mock.Anything).
On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice().
@ -181,9 +188,9 @@ func TestAddMetricsForSingleCoreNegative(t *testing.T) {
var wg sync.WaitGroup
var acc testutil.Accumulator
core := "0"
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
msrMock.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
mockServices.msr.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
// Skip generating metric for CPU frequency.
power.cpuFrequency = false
@ -201,16 +208,16 @@ func TestAddCPUFrequencyMetric(t *testing.T) {
coreID := "3"
packageID := "0"
frequency := 1200000.2
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).
mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).
Return(float64(0), errors.New("error on reading file")).Once()
power.addCPUFrequencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
power.addCPUFrequencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -225,12 +232,12 @@ func TestAddCoreCPUTemperatureMetric(t *testing.T) {
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
preparedData := getPreparedCPUData([]string{cpuID})
expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
msrMock.On("getCPUCoresData").Return(preparedData).Once()
mockServices.msr.On("getCPUCoresData").Return(preparedData).Once()
power.addCPUTemperatureMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -243,13 +250,13 @@ func TestAddC6StateResidencyMetric(t *testing.T) {
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUC6StateResidencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -263,27 +270,32 @@ func TestAddC6StateResidencyMetric(t *testing.T) {
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
func TestAddProcessorBusyCyclesMetric(t *testing.T) {
func TestAddC0StateResidencyMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) /
float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUBusyCyclesMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.cpuBusyCycles, power.cpuC0StateResidency = true, true
power.addCPUC0StateResidencyMetric(cpuID, &acc)
require.Equal(t, 2, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
expectedMetric := getPowerCoreMetric("cpu_c0_state_residency_percent", expectedBusyCycles, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
// Deprecated
expectedMetric = getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
acc.ClearMetrics()
preparedData[cpuID].timeStampCounterDelta = 0
power.addCPUBusyCyclesMetric(cpuID, &acc)
power.addCPUC0StateResidencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
}
@ -292,12 +304,12 @@ func TestAddProcessorBusyFrequencyMetric(t *testing.T) {
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
power.skipFirstIteration = false
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUBusyFrequencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -312,14 +324,14 @@ func TestAddC1StateResidencyMetric(t *testing.T) {
cpuID := "0"
coreID := "2"
packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta -
preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta
expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUC1StateResidencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -337,9 +349,9 @@ func TestAddThermalDesignPowerMetric(t *testing.T) {
var acc testutil.Accumulator
sockets := []string{"0"}
maxPower := 195720672.1
power, _, raplMock, _ := getPowerWithMockedServices()
power, mockServices := getPowerWithMockedServices()
raplMock.On("getConstraintMaxPowerWatts", mock.Anything).
mockServices.rapl.On("getConstraintMaxPowerWatts", mock.Anything).
Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once().
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once()
@ -354,6 +366,80 @@ func TestAddThermalDesignPowerMetric(t *testing.T) {
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
}
func TestCalculateTurboRatioGroup(t *testing.T) {
coreCounts := uint64(0x0807060504030201)
msr := uint64(0x0807060504030201)
turboRatioLimitGroups := make(map[int]uint64)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 8, len(turboRatioLimitGroups))
require.Equal(t, uint64(100), turboRatioLimitGroups[1])
require.Equal(t, uint64(200), turboRatioLimitGroups[2])
require.Equal(t, uint64(300), turboRatioLimitGroups[3])
require.Equal(t, uint64(400), turboRatioLimitGroups[4])
require.Equal(t, uint64(500), turboRatioLimitGroups[5])
require.Equal(t, uint64(600), turboRatioLimitGroups[6])
require.Equal(t, uint64(700), turboRatioLimitGroups[7])
require.Equal(t, uint64(800), turboRatioLimitGroups[8])
coreCounts = uint64(0x100e0c0a08060402)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 16, len(turboRatioLimitGroups))
require.Equal(t, uint64(100), turboRatioLimitGroups[1])
require.Equal(t, uint64(100), turboRatioLimitGroups[2])
require.Equal(t, uint64(200), turboRatioLimitGroups[3])
require.Equal(t, uint64(200), turboRatioLimitGroups[4])
require.Equal(t, uint64(300), turboRatioLimitGroups[5])
require.Equal(t, uint64(300), turboRatioLimitGroups[6])
require.Equal(t, uint64(400), turboRatioLimitGroups[7])
require.Equal(t, uint64(400), turboRatioLimitGroups[8])
require.Equal(t, uint64(500), turboRatioLimitGroups[9])
require.Equal(t, uint64(500), turboRatioLimitGroups[10])
require.Equal(t, uint64(600), turboRatioLimitGroups[11])
require.Equal(t, uint64(600), turboRatioLimitGroups[12])
require.Equal(t, uint64(700), turboRatioLimitGroups[13])
require.Equal(t, uint64(700), turboRatioLimitGroups[14])
require.Equal(t, uint64(800), turboRatioLimitGroups[15])
require.Equal(t, uint64(800), turboRatioLimitGroups[16])
coreCounts = uint64(0x1211)
msr = uint64(0xfffe)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 18, len(turboRatioLimitGroups))
require.Equal(t, uint64(25400), turboRatioLimitGroups[17])
require.Equal(t, uint64(25500), turboRatioLimitGroups[18])
coreCounts = uint64(0x1201)
msr = uint64(0x0202)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 18, len(turboRatioLimitGroups))
require.Equal(t, uint64(200), turboRatioLimitGroups[1])
require.Equal(t, uint64(200), turboRatioLimitGroups[2])
require.Equal(t, uint64(200), turboRatioLimitGroups[3])
require.Equal(t, uint64(200), turboRatioLimitGroups[4])
require.Equal(t, uint64(200), turboRatioLimitGroups[5])
require.Equal(t, uint64(200), turboRatioLimitGroups[6])
require.Equal(t, uint64(200), turboRatioLimitGroups[7])
require.Equal(t, uint64(200), turboRatioLimitGroups[8])
require.Equal(t, uint64(200), turboRatioLimitGroups[9])
require.Equal(t, uint64(200), turboRatioLimitGroups[10])
require.Equal(t, uint64(200), turboRatioLimitGroups[11])
require.Equal(t, uint64(200), turboRatioLimitGroups[12])
require.Equal(t, uint64(200), turboRatioLimitGroups[13])
require.Equal(t, uint64(200), turboRatioLimitGroups[14])
require.Equal(t, uint64(200), turboRatioLimitGroups[15])
require.Equal(t, uint64(200), turboRatioLimitGroups[16])
require.Equal(t, uint64(200), turboRatioLimitGroups[17])
require.Equal(t, uint64(200), turboRatioLimitGroups[18])
coreCounts = uint64(0x1211)
msr = uint64(0xfffe)
turboRatioLimitGroups = make(map[int]uint64)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 2, len(turboRatioLimitGroups))
require.Equal(t, uint64(25400), turboRatioLimitGroups[17])
require.Equal(t, uint64(25500), turboRatioLimitGroups[18])
}
func getPreparedCPUData(cores []string) map[string]*msrData {
msrDataMap := make(map[string]*msrData)
@ -451,6 +537,7 @@ func prepareCPUInfo(power *PowerStat, coreIDs []string, packageIDs []string) {
}
func enableCoreMetrics(power *PowerStat) {
power.cpuC0StateResidency = true
power.cpuC1StateResidency = true
power.cpuC6StateResidency = true
power.cpuTemperature = true
@ -460,12 +547,13 @@ func enableCoreMetrics(power *PowerStat) {
}
func disableCoreMetrics(power *PowerStat) {
power.cpuC0StateResidency = false
power.cpuC1StateResidency = false
power.cpuC6StateResidency = false
power.cpuBusyCycles = false
power.cpuTemperature = false
power.cpuBusyFrequency = false
power.cpuFrequency = false
power.cpuBusyCycles = false
}
func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData {
@ -480,16 +568,18 @@ func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCur
return raplDataMap
}
func getPowerWithMockedServices() (*PowerStat, *mockFileService, *mockRaplService, *mockMsrService) {
fsMock := &mockFileService{}
msrMock := &mockMsrService{}
raplMock := &mockRaplService{}
logger := testutil.Logger{Name: "PowerPluginTest"}
p := newPowerStat(fsMock)
p.Log = logger
p.fs = fsMock
p.rapl = raplMock
p.msr = msrMock
func getPowerWithMockedServices() (*PowerStat, *MockServices) {
var mockServices MockServices
mockServices.fs = &mockFileService{}
mockServices.msr = &mockMsrService{}
mockServices.rapl = &mockRaplService{}
p := newPowerStat(mockServices.fs)
p.Log = testutil.Logger{Name: "PowerPluginTest"}
p.rapl = mockServices.rapl
p.msr = mockServices.msr
p.packageCurrentPowerConsumption = true
p.packageCurrentDramPowerConsumption = true
p.packageThermalDesignPower = true
return p, fsMock, raplMock, msrMock
return p, &mockServices
}

View File

@ -28,6 +28,10 @@ const (
throttleTemperatureLocation = 0x1A2
temperatureLocation = 0x19C
timestampCounterLocation = 0x10
turboRatioLimitLocation = 0x1AD
turboRatioLimit1Location = 0x1AE
turboRatioLimit2Location = 0x1AF
atomCoreTurboRatiosLocation = 0x66C
)
// msrService is responsible for interactions with MSR.
@ -35,6 +39,7 @@ type msrService interface {
getCPUCoresData() map[string]*msrData
retrieveCPUFrequencyForCore(core string) (float64, error)
openAndReadMsr(core string) error
readSingleMsr(core string, msr string) (uint64, error)
}
type msrServiceImpl struct {
@ -50,6 +55,10 @@ func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) {
cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core)
err := checkFile(cpuFreqPath)
if err != nil {
return 0, err
}
cpuFreqFile, err := os.Open(cpuFreqPath)
if err != nil {
return 0, fmt.Errorf("error opening scaling_cur_freq file on path %s, err: %v", cpuFreqPath, err)
@ -62,6 +71,10 @@ func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, erro
func (m *msrServiceImpl) openAndReadMsr(core string) error {
path := fmt.Sprintf(msrPartialPath, core)
err := checkFile(path)
if err != nil {
return err
}
msrFile, err := os.Open(path)
if err != nil {
return fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
@ -75,6 +88,40 @@ func (m *msrServiceImpl) openAndReadMsr(core string) error {
return nil
}
func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error) {
path := fmt.Sprintf(msrPartialPath, core)
err := checkFile(path)
if err != nil {
return 0, err
}
msrFile, err := os.Open(path)
if err != nil {
return 0, fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
}
defer msrFile.Close()
var msrAddress int64
switch msr {
case "MSR_TURBO_RATIO_LIMIT":
msrAddress = turboRatioLimitLocation
case "MSR_TURBO_RATIO_LIMIT1":
msrAddress = turboRatioLimit1Location
case "MSR_TURBO_RATIO_LIMIT2":
msrAddress = turboRatioLimit2Location
case "MSR_ATOM_CORE_TURBO_RATIOS":
msrAddress = atomCoreTurboRatiosLocation
default:
return 0, fmt.Errorf("incorect name of MSR %s", msr)
}
value, err := m.fs.readFileAtOffsetToUint64(msrFile, msrAddress)
if err != nil {
return 0, err
}
return value, nil
}
func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error {
g, ctx := errgroup.WithContext(context.Background())
@ -128,9 +175,9 @@ func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error
m.cpuCoresData[core].aperf = newAperf
m.cpuCoresData[core].timeStampCounter = newTsc
// MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16.
m.cpuCoresData[core].throttleTemp = (newThrottleTemp >> 16) & 0xFF
m.cpuCoresData[core].throttleTemp = int64((newThrottleTemp >> 16) & 0xFF)
// MSR (19Ch) IA32_THERM_STATUS bits 22:16.
m.cpuCoresData[core].temp = (newTemp >> 16) & 0x7F
m.cpuCoresData[core].temp = int64((newTemp >> 16) & 0x7F)
return nil
}

View File

@ -1,10 +1,10 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
// Code generated by mockery v2.10.0. DO NOT EDIT.
package intel_powerstat
import mock "github.com/stretchr/testify/mock"
// mockMsrService is an autogenerated mock type for the msrService type
// mockMsrService is an autogenerated mock type for the mockMsrService type
type mockMsrService struct {
mock.Mock
}
@ -39,6 +39,27 @@ func (_m *mockMsrService) openAndReadMsr(core string) error {
return r0
}
// readSingleMsr provides a mock function with given fields: core, msr
func (_m *mockMsrService) readSingleMsr(core string, msr string) (uint64, error) {
ret := _m.Called(core, msr)
var r0 uint64
if rf, ok := ret.Get(0).(func(string, string) uint64); ok {
r0 = rf(core, msr)
} else {
r0 = ret.Get(0).(uint64)
}
var r1 error
if rf, ok := ret.Get(1).(func(string, string) error); ok {
r1 = rf(core, msr)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// retrieveCPUFrequencyForCore provides a mock function with given fields: core
func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) {
ret := _m.Called(core)

View File

@ -109,8 +109,8 @@ func verifyCPUCoresData(cores []string, t *testing.T, msr *msrServiceImpl, expec
require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf)
require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf)
require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter)
require.Equal(t, (expectedValue>>16)&0xFF, msr.cpuCoresData[core].throttleTemp)
require.Equal(t, (expectedValue>>16)&0x7F, msr.cpuCoresData[core].temp)
require.Equal(t, int64((expectedValue>>16)&0xFF), msr.cpuCoresData[core].throttleTemp)
require.Equal(t, int64((expectedValue>>16)&0x7F), msr.cpuCoresData[core].temp)
if verifyDelta {
require.Equal(t, delta, msr.cpuCoresData[core].c3Delta)

View File

@ -36,6 +36,7 @@ type raplServiceImpl struct {
data map[string]*raplData
dramFolders map[string]string
fs fileService
logOnce map[string]error
}
// initializeRaplData looks for RAPL folders and initializes data map with fetched information.
@ -51,6 +52,10 @@ func (r *raplServiceImpl) getRaplData() map[string]*raplData {
func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath)
err := checkFile(socketEnergyUjPath)
if err != nil {
return err
}
socketEnergyUjFile, err := os.Open(socketEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening socket energy_uj file on path %s, err: %v", socketEnergyUjPath, err)
@ -59,6 +64,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID])
dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath)
err = checkFile(dramEnergyUjPath)
if err != nil {
return err
}
dramEnergyUjFile, err := os.Open(dramEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening dram energy_uj file on path %s, err: %v", dramEnergyUjPath, err)
@ -66,6 +75,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
defer dramEnergyUjFile.Close()
socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath)
err = checkFile(socketMaxEnergyUjPath)
if err != nil {
return err
}
socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening socket max_energy_range_uj file on path %s, err: %v", socketMaxEnergyUjPath, err)
@ -73,6 +86,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
defer socketMaxEnergyUjFile.Close()
dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath)
err = checkFile(dramMaxEnergyUjPath)
if err != nil {
return err
}
dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath)
if err != nil {
return fmt.Errorf("error opening dram max_energy_range_uj file on path %s, err: %v", dramMaxEnergyUjPath, err)
@ -85,6 +102,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) {
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath)
err := checkFile(socketMaxPowerPath)
if err != nil {
return 0, err
}
socketMaxPowerFile, err := os.Open(socketMaxPowerPath)
if err != nil {
return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %s, err: %v", socketMaxPowerPath, err)
@ -156,15 +177,22 @@ func (r *raplServiceImpl) findDramFolders() {
}
func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) {
if r.logOnce == nil {
r.logOnce = make(map[string]error)
}
for _, raplFolder := range raplFolders {
potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder)
nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath)
read, err := r.fs.readFile(nameFilePath)
if err != nil {
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
if val := r.logOnce[nameFilePath]; val == nil || val.Error() != err.Error() {
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
r.logOnce[nameFilePath] = err
}
continue
}
r.logOnce[nameFilePath] = nil
// Remove new line character
trimmedString := strings.TrimRight(string(read), "\n")
if trimmedString == "dram" {
@ -194,7 +222,7 @@ func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.R
return fmt.Errorf("interval between last two Telegraf cycles is 0")
}
if newSocketEnergy > r.data[socketID].socketEnergy {
if newSocketEnergy >= r.data[socketID].socketEnergy {
r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval
} else {
socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile)
@ -206,7 +234,7 @@ func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.R
r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval
}
if newDramEnergy > r.data[socketID].dramEnergy {
if newDramEnergy >= r.data[socketID].dramEnergy {
r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval
} else {
dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile)

View File

@ -1,10 +1,10 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
// Code generated by mockery v2.10.0. DO NOT EDIT.
package intel_powerstat
import mock "github.com/stretchr/testify/mock"
// mockRaplService is an autogenerated mock type for the raplService type
// mockRaplService is an autogenerated mock type for the mockRaplService type
type mockRaplService struct {
mock.Mock
}