feat(intel_powerstat): add Max Turbo Frequency and introduce improvements (#11035)

This commit is contained in:
bkotlowski 2022-05-23 21:02:32 +02:00 committed by GitHub
parent 4f972daa2b
commit df3e9ec2a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 658 additions and 182 deletions

View File

@ -11,18 +11,25 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur
```toml ```toml
# Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and per-CPU metrics like temperature, power and utilization. # Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and per-CPU metrics like temperature, power and utilization.
[[inputs.intel_powerstat]] [[inputs.intel_powerstat]]
## All global metrics are always collected by Intel PowerStat plugin. ## The user can choose which package metrics are monitored by the plugin with the package_metrics setting:
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array. ## - The default, will collect "current_power_consumption", "current_dram_power_consumption" and "thermal_design_power"
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level ## - Setting this value to an empty array means no package metrics will be collected
## telemetry will be exposed by Intel PowerStat plugin. ## - Finally, a user can specify individual metrics to capture from the supported options list
## Supported options: ## Supported options:
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles" ## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
## Empty or missing array means no per-CPU specific metrics will be collected by the plugin.
## Supported options:
## "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", "cpu_busy_frequency"
## ATTENTION: cpu_busy_cycles option is DEPRECATED - superseded by cpu_c0_state_residency
# cpu_metrics = [] # cpu_metrics = []
``` ```
## Example: Configuration with no per-CPU telemetry ## Example: Configuration with no per-CPU telemetry
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected: This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
```toml ```toml
[[inputs.intel_powerstat]] [[inputs.intel_powerstat]]
@ -31,28 +38,39 @@ This configuration allows getting global metrics (processor package specific), n
## Example: Configuration with no per-CPU telemetry - equivalent case ## Example: Configuration with no per-CPU telemetry - equivalent case
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected: This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
```toml ```toml
[[inputs.intel_powerstat]] [[inputs.intel_powerstat]]
``` ```
## Example: Configuration for CPU Temperature and Frequency only ## Example: Configuration for CPU Temperature and CPU Frequency
This configuration allows getting global metrics plus subset of per-CPU metrics (CPU Temperature and Current Frequency): This configuration allows getting default processor package specific metrics, plus subset of per-CPU metrics (CPU Temperature and CPU Frequency):
```toml ```toml
[[inputs.intel_powerstat]] [[inputs.intel_powerstat]]
cpu_metrics = ["cpu_frequency", "cpu_temperature"] cpu_metrics = ["cpu_frequency", "cpu_temperature"]
``` ```
## Example: Configuration with all available metrics ## Example: Configuration for CPU Temperature and CPU Frequency without default package metrics
This configuration allows getting global metrics and all per-CPU metrics: This configuration allows getting only a subset of per-CPU metrics (CPU Temperature and CPU Frequency):
```toml ```toml
[[inputs.intel_powerstat]] [[inputs.intel_powerstat]]
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"] package_metrics = []
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
```
## Example: Configuration with all available metrics
This configuration allows getting all processor package specific metrics and all per-CPU metrics:
```toml
[[inputs.intel_powerstat]]
package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"]
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency"]
``` ```
## SW Dependencies ## SW Dependencies
@ -66,11 +84,17 @@ The following dependencies are expected by plugin:
Minimum kernel version required is 3.13 to satisfy all requirements. Minimum kernel version required is 3.13 to satisfy all requirements.
Please make sure that kernel modules are loaded and running. You might have to manually enable them by using `modprobe`. Please make sure that kernel modules are loaded and running (cpufreq is integrated in kernel). Modules might have to be manually enabled by using `modprobe`.
Exact commands to be executed are: Depending on the kernel version, run commands:
```sh ```sh
sudo modprobe cpufreq-stats # kernel 5.x.x:
sudo modprobe rapl
subo modprobe msr
sudo modprobe intel_rapl_common
sudo modprobe intel_rapl_msr
# kernel 4.x.x:
sudo modprobe msr sudo modprobe msr
sudo modprobe intel_rapl sudo modprobe intel_rapl
``` ```
@ -80,9 +104,13 @@ to retrieve data for calculation of most critical per-CPU specific metrics:
- `cpu_busy_frequency_mhz` - `cpu_busy_frequency_mhz`
- `cpu_temperature_celsius` - `cpu_temperature_celsius`
- `cpu_c0_state_residency_percent`
- `cpu_c1_state_residency_percent` - `cpu_c1_state_residency_percent`
- `cpu_c6_state_residency_percent` - `cpu_c6_state_residency_percent`
- `cpu_busy_cycles_percent`
and to retrieve data for calculation per-package specific metric:
- `max_turbo_frequency_mhz`
To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration). To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration).
@ -99,13 +127,13 @@ The following processor properties are required by the plugin:
model specific registers for all features model specific registers for all features
- The following processor flags shall be present: - The following processor flags shall be present:
- "_msr_" shall be present for plugin to read platform data from processor model specific registers and collect - "_msr_" shall be present for plugin to read platform data from processor model specific registers and collect
the following metrics: _powerstat_core.cpu_temperature_, _powerstat_core.cpu_busy_frequency_, the following metrics: _powerstat\_core.cpu\_temperature_, _powerstat\_core.cpu\_busy\_frequency_,
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_, _powerstat_core._cpu_c6_state_residency_ _powerstat\_core.cpu\_c0\_state\_residency_, _powerstat\_core.cpu\_c1\_state\_residency_, _powerstat\_core.cpu\_c6\_state\_residency_
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat_core.cpu_busy_frequency_, - "_aperfmperf_" shall be present to collect the following metrics: _powerstat\_core.cpu\_busy\_frequency_,
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_ _powerstat\_core.cpu\_c0\_state\_residency_, _powerstat\_core.cpu\_c1\_state\_residency_
- "_dts_" shall be present to collect _powerstat_core.cpu_temperature_ - "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_
- Processor _Model number_ must be one of the following values for plugin to read _powerstat_core.cpu_c1_state_residency_ - Processor _Model number_ must be one of the following values for plugin to read _powerstat\_core.cpu\_c1\_state\_residency_
and _powerstat_core.cpu_c6_state_residency_ metrics: and _powerstat\_core.cpu\_c6\_state\_residency_ metrics:
| Model number | Processor name | | Model number | Processor name |
|-----|-------------| |-----|-------------|
@ -168,50 +196,59 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
- The following Tags are returned by plugin with powerstat_core measurements: - The following Tags are returned by plugin with powerstat_core measurements:
```text | Tag | Description |
| Tag | Description | |--------------|-------------------------------|
|-----|-------------| | `package_id` | ID of platform package/socket |
| `package_id` | ID of platform package/socket | | `core_id` | ID of physical processor core |
| `core_id` | ID of physical processor core | | `cpu_id` | ID of logical processor core |
| `cpu_id` | ID of logical processor core |
Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key) Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key)
while core_id and package_id tags are additional topology information. while core_id and package_id tags are additional topology information.
```
- Available metrics for powerstat_core measurement - Available metrics for powerstat_core measurement
```text | Metric name (field) | Description | Units |
| Metric name (field) | Description | Units | |---------------------|-------------|-------|
|-----|-------------|-----| | `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz | | `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz | | `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees | | `cpu_c0_state_residency_percent` | Percentage of time that CPU Core spent in C0 Core residency state | % |
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % | | `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % | | `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
| `cpu_busy_cycles_percent` | CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % | | `cpu_busy_cycles_percent` | (**DEPRECATED** - superseded by cpu_c0_state_residency_percent) CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
```
- powerstat_package - powerstat_package
- The following Tags are returned by plugin with powerstat_package measurements: - The following Tags are returned by plugin with powerstat_package measurements:
```text | Tag | Description |
| Tag | Description | |-----|-------------|
|-----|-------------| | `package_id` | ID of platform package/socket |
| `package_id` | ID of platform package/socket | | `active_cores`| Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency
Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which
package metric refers to. Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which package metric refers to.
```
- Available metrics for powerstat_package measurement - Available metrics for powerstat_package measurement
```text | Metric name (field) | Description | Units |
| Metric name (field) | Description | Units | |-----|-------------|-----|
|-----|-------------|-----| | `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts | | `current_power_consumption_watts` | Current power consumption of processor package | Watts |
| `current_power_consumption_watts` | Current power consumption of processor package | Watts | | `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts | | `max_turbo_frequency_mhz`| Maximum reachable turbo frequency for number of cores active | MHz
```
### Known issues
From linux kernel version v5.4.77 with [this kernel change](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94)
resources like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for security reasons, so this plugin needs root privileges to work properly.
If such strict security restrictions are not relevant, reading permissions to files in `/sys/devices/virtual/powercap/intel-rapl/`
directory can be manually changed for example with `chmod` command with custom parameters.
For example to give all users permission to all files in `intel-rapl` directory:
```bash
sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/
```
### Example Output ### Example Output
@ -219,10 +256,12 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_cycles_percent=0.8 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_state_residency_percent=0.8 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000 powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000
``` ```

View File

@ -7,8 +7,8 @@ type msrData struct {
c3 uint64 c3 uint64
c6 uint64 c6 uint64
c7 uint64 c7 uint64
throttleTemp uint64 throttleTemp int64
temp uint64 temp int64
mperfDelta uint64 mperfDelta uint64
aperfDelta uint64 aperfDelta uint64
timeStampCounterDelta uint64 timeStampCounterDelta uint64

View File

@ -152,3 +152,22 @@ func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset i
func newFileService() *fileServiceImpl { func newFileService() *fileServiceImpl {
return &fileServiceImpl{} return &fileServiceImpl{}
} }
func checkFile(path string) error {
if path == "" {
return fmt.Errorf("empty path given")
}
lInfo, err := os.Lstat(path)
if err != nil {
if os.IsNotExist(err) {
return fmt.Errorf("file `%s` doesn't exist", path)
}
return fmt.Errorf("cannot obtain file info of `%s`: %v", path, err)
}
mode := lInfo.Mode()
if mode&os.ModeSymlink != 0 {
return fmt.Errorf("file `%s` is a symlink", path)
}
return nil
}

View File

@ -1,4 +1,4 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT. // Code generated by mockery v2.10.0. DO NOT EDIT.
package intel_powerstat package intel_powerstat
@ -8,7 +8,7 @@ import (
mock "github.com/stretchr/testify/mock" mock "github.com/stretchr/testify/mock"
) )
// mockFileService is an autogenerated mock type for the fileService type // mockFileService is an autogenerated mock type for the mockFileService type
type mockFileService struct { type mockFileService struct {
mock.Mock mock.Mock
} }

View File

@ -6,6 +6,7 @@ package intel_powerstat
import ( import (
"fmt" "fmt"
"math/big" "math/big"
"strconv"
"strings" "strings"
"sync" "sync"
"time" "time"
@ -15,60 +16,81 @@ import (
) )
const ( const (
cpuFrequency = "cpu_frequency" cpuFrequency = "cpu_frequency"
cpuBusyFrequency = "cpu_busy_frequency" cpuBusyFrequency = "cpu_busy_frequency"
cpuTemperature = "cpu_temperature" cpuTemperature = "cpu_temperature"
cpuC1StateResidency = "cpu_c1_state_residency" cpuC0StateResidency = "cpu_c0_state_residency"
cpuC6StateResidency = "cpu_c6_state_residency" cpuC1StateResidency = "cpu_c1_state_residency"
cpuBusyCycles = "cpu_busy_cycles" cpuC6StateResidency = "cpu_c6_state_residency"
percentageMultiplier = 100 cpuBusyCycles = "cpu_busy_cycles"
packageCurrentPowerConsumption = "current_power_consumption"
packageCurrentDramPowerConsumption = "current_dram_power_consumption"
packageThermalDesignPower = "thermal_design_power"
packageTurboLimit = "max_turbo_frequency"
percentageMultiplier = 100
) )
// PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization. // PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.
type PowerStat struct { type PowerStat struct {
CPUMetrics []string `toml:"cpu_metrics"` CPUMetrics []string `toml:"cpu_metrics"`
Log telegraf.Logger `toml:"-"` PackageMetrics []string `toml:"package_metrics"`
Log telegraf.Logger `toml:"-"`
fs fileService fs fileService
rapl raplService rapl raplService
msr msrService msr msrService
cpuFrequency bool cpuFrequency bool
cpuBusyFrequency bool cpuBusyFrequency bool
cpuTemperature bool cpuTemperature bool
cpuC1StateResidency bool cpuC0StateResidency bool
cpuC6StateResidency bool cpuC1StateResidency bool
cpuBusyCycles bool cpuC6StateResidency bool
cpuInfo map[string]*cpuInfo cpuBusyCycles bool
skipFirstIteration bool packageTurboLimit bool
packageCurrentPowerConsumption bool
packageCurrentDramPowerConsumption bool
packageThermalDesignPower bool
cpuInfo map[string]*cpuInfo
skipFirstIteration bool
logOnce map[string]error
} }
// Init performs one time setup of the plugin. // Init performs one time setup of the plugin
func (p *PowerStat) Init() error { func (p *PowerStat) Init() error {
p.parsePackageMetricsConfig()
p.parseCPUMetricsConfig() p.parseCPUMetricsConfig()
err := p.verifyProcessor() err := p.verifyProcessor()
if err != nil { if err != nil {
return err return err
} }
// Initialize MSR service only when there is at least one core metric enabled. // Initialize MSR service only when there is at least one metric enabled
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC1StateResidency || if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles { p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit {
p.msr = newMsrServiceWithFs(p.Log, p.fs) p.msr = newMsrServiceWithFs(p.Log, p.fs)
} }
p.rapl = newRaplServiceWithFs(p.Log, p.fs) if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit {
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
}
if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() {
return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather")
}
return nil return nil
} }
// Gather takes in an accumulator and adds the metrics that the Input gathers. // Gather takes in an accumulator and adds the metrics that the Input gathers
func (p *PowerStat) Gather(acc telegraf.Accumulator) error { func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
p.addGlobalMetrics(acc) if p.areGlobalMetricsEnabled() {
p.addGlobalMetrics(acc)
}
if p.areCoreMetricsEnabled() { if p.areCoreMetricsEnabled() {
p.addPerCoreMetrics(acc) p.addPerCoreMetrics(acc)
} }
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations. // Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations
p.skipFirstIteration = false p.skipFirstIteration = false
return nil return nil
@ -79,18 +101,36 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
p.rapl.initializeRaplData() p.rapl.initializeRaplData()
for socketID := range p.rapl.getRaplData() { for socketID := range p.rapl.getRaplData() {
if p.packageTurboLimit {
p.addTurboRatioLimit(socketID, acc)
}
err := p.rapl.retrieveAndCalculateData(socketID) err := p.rapl.retrieveAndCalculateData(socketID)
if err != nil { if err != nil {
// In case of an error skip calculating metrics for this socket // In case of an error skip calculating metrics for this socket
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err) if val := p.logOnce[socketID]; val == nil || val.Error() != err.Error() {
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
// Remember that specific error occurs for socketID to omit logging next time
p.logOnce[socketID] = err
}
continue continue
} }
p.addThermalDesignPowerMetric(socketID, acc)
// If error stops occurring, clear logOnce indicator
p.logOnce[socketID] = nil
if p.packageThermalDesignPower {
p.addThermalDesignPowerMetric(socketID, acc)
}
if p.skipFirstIteration { if p.skipFirstIteration {
continue continue
} }
p.addCurrentSocketPowerConsumption(socketID, acc) if p.packageCurrentPowerConsumption {
p.addCurrentDramPowerConsumption(socketID, acc) p.addCurrentSocketPowerConsumption(socketID, acc)
}
if p.packageCurrentDramPowerConsumption {
p.addCurrentDramPowerConsumption(socketID, acc)
}
} }
} }
@ -155,11 +195,10 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
} }
// Read data from MSR only if required // Read data from MSR only if required
if p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature || if p.cpuC0StateResidency || p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature || p.cpuBusyFrequency {
p.cpuBusyFrequency {
err := p.msr.openAndReadMsr(cpuID) err := p.msr.openAndReadMsr(cpuID)
if err != nil { if err != nil {
// In case of an error exit the function. All metrics past this point are dependant on MSR. // In case of an error exit the function. All metrics past this point are dependent on MSR
p.Log.Debugf("error while reading msr: %v", err) p.Log.Debugf("error while reading msr: %v", err)
return return
} }
@ -169,12 +208,16 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
p.addCPUTemperatureMetric(cpuID, acc) p.addCPUTemperatureMetric(cpuID, acc)
} }
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle. // cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle
if p.cpuBusyFrequency { if p.cpuBusyFrequency {
p.addCPUBusyFrequencyMetric(cpuID, acc) p.addCPUBusyFrequencyMetric(cpuID, acc)
} }
if !p.skipFirstIteration { if !p.skipFirstIteration {
if p.cpuC0StateResidency || p.cpuBusyCycles {
p.addCPUC0StateResidencyMetric(cpuID, acc)
}
if p.cpuC1StateResidency { if p.cpuC1StateResidency {
p.addCPUC1StateResidencyMetric(cpuID, acc) p.addCPUC1StateResidencyMetric(cpuID, acc)
} }
@ -182,10 +225,6 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
if p.cpuC6StateResidency { if p.cpuC6StateResidency {
p.addCPUC6StateResidencyMetric(cpuID, acc) p.addCPUC6StateResidencyMetric(cpuID, acc)
} }
if p.cpuBusyCycles {
p.addCPUBusyCyclesMetric(cpuID, acc)
}
} }
} }
@ -229,6 +268,153 @@ func (p *PowerStat) addCPUTemperatureMetric(cpuID string, acc telegraf.Accumulat
acc.AddGauge("powerstat_core", fields, tags) acc.AddGauge("powerstat_core", fields, tags)
} }
func calculateTurboRatioGroup(coreCounts uint64, msr uint64, group map[int]uint64) {
from := coreCounts & 0xFF // value of number of active cores of bucket 1 is written in the first 8 bits. The next buckets values are saved on the following 8-bit sides
for i := 0; i < 8; i++ {
to := (coreCounts >> (i * 8)) & 0xFF
if to == 0 {
break
}
value := (msr >> (i * 8)) & 0xFF
// value of freq ratio is stored in 8-bit blocks, and their real value is obtained after multiplication by 100
if value != 0 && to != 0 {
for ; from <= to; from++ {
group[int(from)] = value * 100
}
}
from = to + 1
}
}
func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator) {
var err error
turboRatioLimitGroups := make(map[int]uint64)
var cpuID = ""
var model = ""
for _, v := range p.cpuInfo {
if v.physicalID == socketID {
cpuID = v.cpuID
model = v.model
}
}
if cpuID == "" || model == "" {
p.Log.Debugf("error while reading socket ID")
return
}
// dump_hsw_turbo_ratio_limit
if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X
coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18
msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT2")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT2: %v", err)
return
}
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit2, turboRatioLimitGroups)
}
// dump_ivt_turbo_ratio_limit
if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X
(model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X
coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16
msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
return
}
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups)
}
if (model != strconv.FormatInt(0x37, 10)) && // INTEL_FAM6_ATOM_SILVERMONT
(model != strconv.FormatInt(0x4A, 10)) && // INTEL_FAM6_ATOM_SILVERMONT_MID:
(model != strconv.FormatInt(0x5A, 10)) && // INTEL_FAM6_ATOM_AIRMONT_MID:
(model != strconv.FormatInt(0x2E, 10)) && // INTEL_FAM6_NEHALEM_EX
(model != strconv.FormatInt(0x2F, 10)) && // INTEL_FAM6_WESTMERE_EX
(model != strconv.FormatInt(0x57, 10)) && // INTEL_FAM6_XEON_PHI_KNL
(model != strconv.FormatInt(0x85, 10)) { // INTEL_FAM6_XEON_PHI_KNM
coreCounts := uint64(0x0807060504030201) // default value (counting the number of active cores 1 to 8). May be changed in "if" segment below
if (model == strconv.FormatInt(0x5C, 10)) || // INTEL_FAM6_ATOM_GOLDMONT
(model == strconv.FormatInt(0x55, 10)) || // INTEL_FAM6_SKYLAKE_X
(model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X
(model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D
(model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D
coreCounts, err = p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
return
}
}
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
return
}
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups)
}
// dump_atom_turbo_ratio_limits
if model == strconv.FormatInt(0x37, 10) || // INTEL_FAM6_ATOM_SILVERMONT
model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID:
model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID
coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_ATOM_CORE_TURBO_RATIOS")
if err != nil {
p.Log.Debugf("error while reading MSR_ATOM_CORE_TURBO_RATIOS: %v", err)
return
}
value := uint64(0)
newValue := uint64(0)
for i := 0; i < 4; i++ { // value "4" is specific for this group of processors
newValue = (msrTurboRatioLimit >> (8 * (i))) & 0x3F // value of freq ratio is stored in 6-bit blocks, saved every 8 bits
value = value + (newValue << ((i - 1) * 8)) // now value of freq ratio is stored in 8-bit blocks, saved every 8 bits
}
calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups)
}
// dump_knl_turbo_ratio_limits
if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
return
}
// value of freq ratio of bucket 1 is saved in bits 15 to 8.
// each next value is calculated as the previous value - delta. Delta is stored in 3-bit blocks every 8 bits (start at 21 (2*8+5))
value := (msrTurboRatioLimit >> 8) & 0xFF
newValue := value
for i := 2; i < 8; i++ {
newValue = newValue - (msrTurboRatioLimit>>(8*i+5))&0x7
value = value + (newValue << ((i - 1) * 8))
}
// value of number of active cores of bucket 1 is saved in bits 1 to 7.
// each next value is calculated as the previous value + delta. Delta is stored in 5-bit blocks every 8 bits (start at 16 (2*8))
coreCounts := (msrTurboRatioLimit & 0xFF) >> 1
newBucket := coreCounts
for i := 2; i < 8; i++ {
newBucket = newBucket + (msrTurboRatioLimit>>(8*i))&0x1F
coreCounts = coreCounts + (newBucket << ((i - 1) * 8))
}
calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups)
}
for key, val := range turboRatioLimitGroups {
tags := map[string]string{
"package_id": socketID,
"active_cores": strconv.Itoa(key),
}
fields := map[string]interface{}{
"max_turbo_frequency_mhz": val,
}
acc.AddGauge("powerstat_package", fields, tags)
}
}
func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) { func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData() coresData := p.msr.getCPUCoresData()
mperfDelta := coresData[cpuID].mperfDelta mperfDelta := coresData[cpuID].mperfDelta
@ -331,7 +517,7 @@ func (p *PowerStat) addCPUC6StateResidencyMetric(cpuID string, acc telegraf.Accu
acc.AddGauge("powerstat_core", fields, tags) acc.AddGauge("powerstat_core", fields, tags)
} }
func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulator) { func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
coresData := p.msr.getCPUCoresData() coresData := p.msr.getCPUCoresData()
// Avoid division by 0 // Avoid division by 0
if coresData[cpuID].timeStampCounterDelta == 0 { if coresData[cpuID].timeStampCounterDelta == 0 {
@ -339,7 +525,7 @@ func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulato
timestampCounterLocation, cpuID) timestampCounterLocation, cpuID)
return return
} }
busyCyclesValue := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * c0Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta)) float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta))
cpu := p.cpuInfo[cpuID] cpu := p.cpuInfo[cpuID]
tags := map[string]string{ tags := map[string]string{
@ -347,11 +533,42 @@ func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulato
"core_id": cpu.coreID, "core_id": cpu.coreID,
"cpu_id": cpu.cpuID, "cpu_id": cpu.cpuID,
} }
fields := map[string]interface{}{ if p.cpuC0StateResidency {
"cpu_busy_cycles_percent": busyCyclesValue, fields := map[string]interface{}{
"cpu_c0_state_residency_percent": c0Value,
}
acc.AddGauge("powerstat_core", fields, tags)
}
if p.cpuBusyCycles {
deprecatedFields := map[string]interface{}{
"cpu_busy_cycles_percent": c0Value,
}
acc.AddGauge("powerstat_core", deprecatedFields, tags)
}
}
func (p *PowerStat) parsePackageMetricsConfig() {
if p.PackageMetrics == nil {
// if Package Metric config is empty, use the default settings.
p.packageCurrentPowerConsumption = true
p.packageCurrentDramPowerConsumption = true
p.packageThermalDesignPower = true
return
} }
acc.AddGauge("powerstat_core", fields, tags) if contains(p.PackageMetrics, packageTurboLimit) {
p.packageTurboLimit = true
}
if contains(p.PackageMetrics, packageCurrentPowerConsumption) {
p.packageCurrentPowerConsumption = true
}
if contains(p.PackageMetrics, packageCurrentDramPowerConsumption) {
p.packageCurrentDramPowerConsumption = true
}
if contains(p.PackageMetrics, packageThermalDesignPower) {
p.packageThermalDesignPower = true
}
} }
func (p *PowerStat) parseCPUMetricsConfig() { func (p *PowerStat) parseCPUMetricsConfig() {
@ -363,6 +580,10 @@ func (p *PowerStat) parseCPUMetricsConfig() {
p.cpuFrequency = true p.cpuFrequency = true
} }
if contains(p.CPUMetrics, cpuC0StateResidency) {
p.cpuC0StateResidency = true
}
if contains(p.CPUMetrics, cpuC1StateResidency) { if contains(p.CPUMetrics, cpuC1StateResidency) {
p.cpuC1StateResidency = true p.cpuC1StateResidency = true
} }
@ -396,7 +617,7 @@ func (p *PowerStat) verifyProcessor() error {
p.cpuInfo = stats p.cpuInfo = stats
// First CPU is sufficient for verification. // First CPU is sufficient for verification
firstCPU := p.cpuInfo["0"] firstCPU := p.cpuInfo["0"]
if firstCPU == nil { if firstCPU == nil {
return fmt.Errorf("first core not found while parsing /proc/cpuinfo") return fmt.Errorf("first core not found while parsing /proc/cpuinfo")
@ -414,14 +635,16 @@ func (p *PowerStat) verifyProcessor() error {
if !strings.Contains(firstCPU.flags, "msr") { if !strings.Contains(firstCPU.flags, "msr") {
p.cpuTemperature = false p.cpuTemperature = false
p.cpuC6StateResidency = false p.cpuC6StateResidency = false
p.cpuC0StateResidency = false
p.cpuBusyCycles = false p.cpuBusyCycles = false
p.cpuBusyFrequency = false p.cpuBusyFrequency = false
p.cpuC1StateResidency = false p.cpuC1StateResidency = false
} }
if !strings.Contains(firstCPU.flags, "aperfmperf") { if !strings.Contains(firstCPU.flags, "aperfmperf") {
p.cpuBusyFrequency = false
p.cpuBusyCycles = false p.cpuBusyCycles = false
p.cpuBusyFrequency = false
p.cpuC0StateResidency = false
p.cpuC1StateResidency = false p.cpuC1StateResidency = false
} }
@ -438,7 +661,6 @@ func contains(slice []string, str string) bool {
return true return true
} }
} }
return false return false
} }
@ -446,17 +668,27 @@ func (p *PowerStat) areCoreMetricsEnabled() bool {
return p.msr != nil && len(p.msr.getCPUCoresData()) > 0 return p.msr != nil && len(p.msr.getCPUCoresData()) > 0
} }
// newPowerStat creates and returns PowerStat struct. func (p *PowerStat) areGlobalMetricsEnabled() bool {
return p.rapl != nil
}
// newPowerStat creates and returns PowerStat struct
func newPowerStat(fs fileService) *PowerStat { func newPowerStat(fs fileService) *PowerStat {
p := &PowerStat{ p := &PowerStat{
cpuFrequency: false, cpuFrequency: false,
cpuC1StateResidency: false, cpuC0StateResidency: false,
cpuC6StateResidency: false, cpuC1StateResidency: false,
cpuBusyCycles: false, cpuC6StateResidency: false,
cpuTemperature: false, cpuBusyCycles: false,
cpuBusyFrequency: false, cpuTemperature: false,
skipFirstIteration: true, cpuBusyFrequency: false,
fs: fs, packageTurboLimit: false,
packageCurrentPowerConsumption: false,
packageCurrentDramPowerConsumption: false,
packageThermalDesignPower: false,
skipFirstIteration: true,
fs: fs,
logOnce: make(map[string]error),
} }
return p return p

View File

@ -15,26 +15,32 @@ import (
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
) )
type MockServices struct {
fs *mockFileService
msr *mockMsrService
rapl *mockRaplService
}
func TestInitPlugin(t *testing.T) { func TestInitPlugin(t *testing.T) {
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"} cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
power, fsMock, _, _ := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
fsMock.On("getCPUInfoStats", mock.Anything). mockServices.fs.On("getCPUInfoStats", mock.Anything).
Return(nil, errors.New("error getting cpu stats")).Once() Return(nil, errors.New("error getting cpu stats")).Once()
require.Error(t, power.Init()) require.Error(t, power.Init())
fsMock.On("getCPUInfoStats", mock.Anything). mockServices.fs.On("getCPUInfoStats", mock.Anything).
Return(make(map[string]*cpuInfo), nil).Once() Return(make(map[string]*cpuInfo), nil).Once()
require.Error(t, power.Init()) require.Error(t, power.Init())
fsMock.On("getCPUInfoStats", mock.Anything). mockServices.fs.On("getCPUInfoStats", mock.Anything).
Return(map[string]*cpuInfo{"0": { Return(map[string]*cpuInfo{"0": {
vendorID: "GenuineIntel", vendorID: "GenuineIntel",
cpuFamily: "test", cpuFamily: "test",
}}, nil).Once() }}, nil).Once()
require.Error(t, power.Init()) require.Error(t, power.Init())
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything). mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(cores, nil).Once(). Return(cores, nil).Once().
On("getCPUInfoStats", mock.Anything). On("getCPUInfoStats", mock.Anything).
Return(map[string]*cpuInfo{"0": { Return(map[string]*cpuInfo{"0": {
@ -44,24 +50,24 @@ func TestInitPlugin(t *testing.T) {
// Verify MSR service initialization. // Verify MSR service initialization.
power.cpuFrequency = true power.cpuFrequency = true
require.NoError(t, power.Init()) require.NoError(t, power.Init())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, len(cores), len(power.msr.getCPUCoresData())) require.Equal(t, len(cores), len(power.msr.getCPUCoresData()))
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything). mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything).
Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once() Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once()
// In case of an error when fetching cpu cores plugin should proceed with execution. // In case of an error when fetching cpu cores plugin should proceed with execution.
require.NoError(t, power.Init()) require.NoError(t, power.Init())
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything) mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
require.Equal(t, 0, len(power.msr.getCPUCoresData())) require.Equal(t, 0, len(power.msr.getCPUCoresData()))
} }
func TestParseCPUMetricsConfig(t *testing.T) { func TestParseCPUMetricsConfig(t *testing.T) {
power, _, _, _ := getPowerWithMockedServices() power, _ := getPowerWithMockedServices()
disableCoreMetrics(power) disableCoreMetrics(power)
power.CPUMetrics = []string{ power.CPUMetrics = []string{
"cpu_frequency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
"cpu_busy_frequency", "cpu_busy_frequency",
} }
power.parseCPUMetricsConfig() power.parseCPUMetricsConfig()
@ -88,6 +94,7 @@ func verifyCoreMetrics(t *testing.T, power *PowerStat, enabled bool) {
require.Equal(t, enabled, power.cpuFrequency) require.Equal(t, enabled, power.cpuFrequency)
require.Equal(t, enabled, power.cpuC1StateResidency) require.Equal(t, enabled, power.cpuC1StateResidency)
require.Equal(t, enabled, power.cpuC6StateResidency) require.Equal(t, enabled, power.cpuC6StateResidency)
require.Equal(t, enabled, power.cpuC0StateResidency)
require.Equal(t, enabled, power.cpuBusyCycles) require.Equal(t, enabled, power.cpuBusyCycles)
require.Equal(t, enabled, power.cpuBusyFrequency) require.Equal(t, enabled, power.cpuBusyFrequency)
require.Equal(t, enabled, power.cpuTemperature) require.Equal(t, enabled, power.cpuTemperature)
@ -102,23 +109,23 @@ func TestGather(t *testing.T) {
preparedCPUData := getPreparedCPUData(coreIDs) preparedCPUData := getPreparedCPUData(coreIDs)
raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy) raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy)
power, _, raplMock, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
prepareCPUInfo(power, coreIDs, packageIDs) prepareCPUInfo(power, coreIDs, packageIDs)
enableCoreMetrics(power) enableCoreMetrics(power)
power.skipFirstIteration = false power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything). mockServices.rapl.On("initializeRaplData", mock.Anything).
On("getRaplData").Return(raplDataMap). On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)). On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil) On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil)
msrMock.On("getCPUCoresData").Return(preparedCPUData). mockServices.msr.On("getCPUCoresData").Return(preparedCPUData).
On("openAndReadMsr", mock.Anything).Return(nil). On("openAndReadMsr", mock.Anything).Return(nil).
On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil) On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil)
require.NoError(t, power.Gather(&acc)) require.NoError(t, power.Gather(&acc))
// Number of global metrics : 3 // Number of global metrics : 3
// Number of per core metrics : 6 // Number of per core metrics : 7
require.Equal(t, 3*len(packageIDs)+6*len(coreIDs), len(acc.GetTelegrafMetrics())) require.Equal(t, 3*len(packageIDs)+7*len(coreIDs), len(acc.GetTelegrafMetrics()))
} }
func TestAddGlobalMetricsNegative(t *testing.T) { func TestAddGlobalMetricsNegative(t *testing.T) {
@ -126,24 +133,24 @@ func TestAddGlobalMetricsNegative(t *testing.T) {
socketCurrentEnergy := 13213852.2 socketCurrentEnergy := 13213852.2
dramCurrentEnergy := 784552.0 dramCurrentEnergy := 784552.0
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy) raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
power, _, raplMock, _ := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
power.skipFirstIteration = false power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything).Once(). mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(raplDataMap).Once(). On("getRaplData").Return(raplDataMap).Once().
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap)) On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap))
power.addGlobalMetrics(&acc) power.addGlobalMetrics(&acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics())) require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
raplMock.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap)) mockServices.rapl.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
raplMock.On("initializeRaplData", mock.Anything).Once(). mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(make(map[string]*raplData)).Once() On("getRaplData").Return(make(map[string]*raplData)).Once()
power.addGlobalMetrics(&acc) power.addGlobalMetrics(&acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics())) require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
raplMock.AssertNotCalled(t, "retrieveAndCalculateData") mockServices.rapl.AssertNotCalled(t, "retrieveAndCalculateData")
raplMock.On("initializeRaplData", mock.Anything).Once(). mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
On("getRaplData").Return(raplDataMap). On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Once(). On("retrieveAndCalculateData", mock.Anything).Return(nil).Once().
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once(). On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once().
@ -159,10 +166,10 @@ func TestAddGlobalMetricsPositive(t *testing.T) {
dramCurrentEnergy := 124234872.5 dramCurrentEnergy := 124234872.5
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy) raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
maxPower := 546783852.9 maxPower := 546783852.9
power, _, raplMock, _ := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
power.skipFirstIteration = false power.skipFirstIteration = false
raplMock.On("initializeRaplData", mock.Anything). mockServices.rapl.On("initializeRaplData", mock.Anything).
On("getRaplData").Return(raplDataMap). On("getRaplData").Return(raplDataMap).
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)). On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice(). On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice().
@ -181,9 +188,9 @@ func TestAddMetricsForSingleCoreNegative(t *testing.T) {
var wg sync.WaitGroup var wg sync.WaitGroup
var acc testutil.Accumulator var acc testutil.Accumulator
core := "0" core := "0"
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
msrMock.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once() mockServices.msr.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
// Skip generating metric for CPU frequency. // Skip generating metric for CPU frequency.
power.cpuFrequency = false power.cpuFrequency = false
@ -201,16 +208,16 @@ func TestAddCPUFrequencyMetric(t *testing.T) {
coreID := "3" coreID := "3"
packageID := "0" packageID := "0"
frequency := 1200000.2 frequency := 1200000.2
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything). mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).
Return(float64(0), errors.New("error on reading file")).Once() Return(float64(0), errors.New("error on reading file")).Once()
power.addCPUFrequencyMetric(cpuID, &acc) power.addCPUFrequencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics())) require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once() mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
power.addCPUFrequencyMetric(cpuID, &acc) power.addCPUFrequencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics())) require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -225,12 +232,12 @@ func TestAddCoreCPUTemperatureMetric(t *testing.T) {
cpuID := "0" cpuID := "0"
coreID := "2" coreID := "2"
packageID := "1" packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
preparedData := getPreparedCPUData([]string{cpuID}) preparedData := getPreparedCPUData([]string{cpuID})
expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
msrMock.On("getCPUCoresData").Return(preparedData).Once() mockServices.msr.On("getCPUCoresData").Return(preparedData).Once()
power.addCPUTemperatureMetric(cpuID, &acc) power.addCPUTemperatureMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics())) require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -243,13 +250,13 @@ func TestAddC6StateResidencyMetric(t *testing.T) {
cpuID := "0" cpuID := "0"
coreID := "2" coreID := "2"
packageID := "1" packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID}) preparedData := getPreparedCPUData([]string{cpuID})
expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta)) float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice() mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUC6StateResidencyMetric(cpuID, &acc) power.addCPUC6StateResidencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics())) require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -263,27 +270,32 @@ func TestAddC6StateResidencyMetric(t *testing.T) {
require.Equal(t, 0, len(acc.GetTelegrafMetrics())) require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
} }
func TestAddProcessorBusyCyclesMetric(t *testing.T) { func TestAddC0StateResidencyMetric(t *testing.T) {
var acc testutil.Accumulator var acc testutil.Accumulator
cpuID := "0" cpuID := "0"
coreID := "2" coreID := "2"
packageID := "1" packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID}) preparedData := getPreparedCPUData([]string{cpuID})
expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) / expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) /
float64(preparedData[cpuID].timeStampCounterDelta)) float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice() mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUBusyCyclesMetric(cpuID, &acc) power.cpuBusyCycles, power.cpuC0StateResidency = true, true
require.Equal(t, 1, len(acc.GetTelegrafMetrics())) power.addCPUC0StateResidencyMetric(cpuID, &acc)
require.Equal(t, 2, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID) expectedMetric := getPowerCoreMetric("cpu_c0_state_residency_percent", expectedBusyCycles, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
// Deprecated
expectedMetric = getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags) acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
acc.ClearMetrics() acc.ClearMetrics()
preparedData[cpuID].timeStampCounterDelta = 0 preparedData[cpuID].timeStampCounterDelta = 0
power.addCPUBusyCyclesMetric(cpuID, &acc) power.addCPUC0StateResidencyMetric(cpuID, &acc)
require.Equal(t, 0, len(acc.GetTelegrafMetrics())) require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
} }
@ -292,12 +304,12 @@ func TestAddProcessorBusyFrequencyMetric(t *testing.T) {
cpuID := "0" cpuID := "0"
coreID := "2" coreID := "2"
packageID := "1" packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID}) preparedData := getPreparedCPUData([]string{cpuID})
power.skipFirstIteration = false power.skipFirstIteration = false
msrMock.On("getCPUCoresData").Return(preparedData).Twice() mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUBusyFrequencyMetric(cpuID, &acc) power.addCPUBusyFrequencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics())) require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -312,14 +324,14 @@ func TestAddC1StateResidencyMetric(t *testing.T) {
cpuID := "0" cpuID := "0"
coreID := "2" coreID := "2"
packageID := "1" packageID := "1"
power, _, _, msrMock := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID) prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID}) preparedData := getPreparedCPUData([]string{cpuID})
c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta - c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta -
preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta
expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta)) expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta))
msrMock.On("getCPUCoresData").Return(preparedData).Twice() mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
power.addCPUC1StateResidencyMetric(cpuID, &acc) power.addCPUC1StateResidencyMetric(cpuID, &acc)
require.Equal(t, 1, len(acc.GetTelegrafMetrics())) require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
@ -337,9 +349,9 @@ func TestAddThermalDesignPowerMetric(t *testing.T) {
var acc testutil.Accumulator var acc testutil.Accumulator
sockets := []string{"0"} sockets := []string{"0"}
maxPower := 195720672.1 maxPower := 195720672.1
power, _, raplMock, _ := getPowerWithMockedServices() power, mockServices := getPowerWithMockedServices()
raplMock.On("getConstraintMaxPowerWatts", mock.Anything). mockServices.rapl.On("getConstraintMaxPowerWatts", mock.Anything).
Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once(). Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once().
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once() On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once()
@ -354,6 +366,80 @@ func TestAddThermalDesignPowerMetric(t *testing.T) {
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags) acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
} }
func TestCalculateTurboRatioGroup(t *testing.T) {
coreCounts := uint64(0x0807060504030201)
msr := uint64(0x0807060504030201)
turboRatioLimitGroups := make(map[int]uint64)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 8, len(turboRatioLimitGroups))
require.Equal(t, uint64(100), turboRatioLimitGroups[1])
require.Equal(t, uint64(200), turboRatioLimitGroups[2])
require.Equal(t, uint64(300), turboRatioLimitGroups[3])
require.Equal(t, uint64(400), turboRatioLimitGroups[4])
require.Equal(t, uint64(500), turboRatioLimitGroups[5])
require.Equal(t, uint64(600), turboRatioLimitGroups[6])
require.Equal(t, uint64(700), turboRatioLimitGroups[7])
require.Equal(t, uint64(800), turboRatioLimitGroups[8])
coreCounts = uint64(0x100e0c0a08060402)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 16, len(turboRatioLimitGroups))
require.Equal(t, uint64(100), turboRatioLimitGroups[1])
require.Equal(t, uint64(100), turboRatioLimitGroups[2])
require.Equal(t, uint64(200), turboRatioLimitGroups[3])
require.Equal(t, uint64(200), turboRatioLimitGroups[4])
require.Equal(t, uint64(300), turboRatioLimitGroups[5])
require.Equal(t, uint64(300), turboRatioLimitGroups[6])
require.Equal(t, uint64(400), turboRatioLimitGroups[7])
require.Equal(t, uint64(400), turboRatioLimitGroups[8])
require.Equal(t, uint64(500), turboRatioLimitGroups[9])
require.Equal(t, uint64(500), turboRatioLimitGroups[10])
require.Equal(t, uint64(600), turboRatioLimitGroups[11])
require.Equal(t, uint64(600), turboRatioLimitGroups[12])
require.Equal(t, uint64(700), turboRatioLimitGroups[13])
require.Equal(t, uint64(700), turboRatioLimitGroups[14])
require.Equal(t, uint64(800), turboRatioLimitGroups[15])
require.Equal(t, uint64(800), turboRatioLimitGroups[16])
coreCounts = uint64(0x1211)
msr = uint64(0xfffe)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 18, len(turboRatioLimitGroups))
require.Equal(t, uint64(25400), turboRatioLimitGroups[17])
require.Equal(t, uint64(25500), turboRatioLimitGroups[18])
coreCounts = uint64(0x1201)
msr = uint64(0x0202)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 18, len(turboRatioLimitGroups))
require.Equal(t, uint64(200), turboRatioLimitGroups[1])
require.Equal(t, uint64(200), turboRatioLimitGroups[2])
require.Equal(t, uint64(200), turboRatioLimitGroups[3])
require.Equal(t, uint64(200), turboRatioLimitGroups[4])
require.Equal(t, uint64(200), turboRatioLimitGroups[5])
require.Equal(t, uint64(200), turboRatioLimitGroups[6])
require.Equal(t, uint64(200), turboRatioLimitGroups[7])
require.Equal(t, uint64(200), turboRatioLimitGroups[8])
require.Equal(t, uint64(200), turboRatioLimitGroups[9])
require.Equal(t, uint64(200), turboRatioLimitGroups[10])
require.Equal(t, uint64(200), turboRatioLimitGroups[11])
require.Equal(t, uint64(200), turboRatioLimitGroups[12])
require.Equal(t, uint64(200), turboRatioLimitGroups[13])
require.Equal(t, uint64(200), turboRatioLimitGroups[14])
require.Equal(t, uint64(200), turboRatioLimitGroups[15])
require.Equal(t, uint64(200), turboRatioLimitGroups[16])
require.Equal(t, uint64(200), turboRatioLimitGroups[17])
require.Equal(t, uint64(200), turboRatioLimitGroups[18])
coreCounts = uint64(0x1211)
msr = uint64(0xfffe)
turboRatioLimitGroups = make(map[int]uint64)
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
require.Equal(t, 2, len(turboRatioLimitGroups))
require.Equal(t, uint64(25400), turboRatioLimitGroups[17])
require.Equal(t, uint64(25500), turboRatioLimitGroups[18])
}
func getPreparedCPUData(cores []string) map[string]*msrData { func getPreparedCPUData(cores []string) map[string]*msrData {
msrDataMap := make(map[string]*msrData) msrDataMap := make(map[string]*msrData)
@ -451,6 +537,7 @@ func prepareCPUInfo(power *PowerStat, coreIDs []string, packageIDs []string) {
} }
func enableCoreMetrics(power *PowerStat) { func enableCoreMetrics(power *PowerStat) {
power.cpuC0StateResidency = true
power.cpuC1StateResidency = true power.cpuC1StateResidency = true
power.cpuC6StateResidency = true power.cpuC6StateResidency = true
power.cpuTemperature = true power.cpuTemperature = true
@ -460,12 +547,13 @@ func enableCoreMetrics(power *PowerStat) {
} }
func disableCoreMetrics(power *PowerStat) { func disableCoreMetrics(power *PowerStat) {
power.cpuC0StateResidency = false
power.cpuC1StateResidency = false power.cpuC1StateResidency = false
power.cpuC6StateResidency = false power.cpuC6StateResidency = false
power.cpuBusyCycles = false
power.cpuTemperature = false power.cpuTemperature = false
power.cpuBusyFrequency = false power.cpuBusyFrequency = false
power.cpuFrequency = false power.cpuFrequency = false
power.cpuBusyCycles = false
} }
func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData { func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData {
@ -480,16 +568,18 @@ func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCur
return raplDataMap return raplDataMap
} }
func getPowerWithMockedServices() (*PowerStat, *mockFileService, *mockRaplService, *mockMsrService) { func getPowerWithMockedServices() (*PowerStat, *MockServices) {
fsMock := &mockFileService{} var mockServices MockServices
msrMock := &mockMsrService{} mockServices.fs = &mockFileService{}
raplMock := &mockRaplService{} mockServices.msr = &mockMsrService{}
logger := testutil.Logger{Name: "PowerPluginTest"} mockServices.rapl = &mockRaplService{}
p := newPowerStat(fsMock) p := newPowerStat(mockServices.fs)
p.Log = logger p.Log = testutil.Logger{Name: "PowerPluginTest"}
p.fs = fsMock p.rapl = mockServices.rapl
p.rapl = raplMock p.msr = mockServices.msr
p.msr = msrMock p.packageCurrentPowerConsumption = true
p.packageCurrentDramPowerConsumption = true
p.packageThermalDesignPower = true
return p, fsMock, raplMock, msrMock return p, &mockServices
} }

View File

@ -28,6 +28,10 @@ const (
throttleTemperatureLocation = 0x1A2 throttleTemperatureLocation = 0x1A2
temperatureLocation = 0x19C temperatureLocation = 0x19C
timestampCounterLocation = 0x10 timestampCounterLocation = 0x10
turboRatioLimitLocation = 0x1AD
turboRatioLimit1Location = 0x1AE
turboRatioLimit2Location = 0x1AF
atomCoreTurboRatiosLocation = 0x66C
) )
// msrService is responsible for interactions with MSR. // msrService is responsible for interactions with MSR.
@ -35,6 +39,7 @@ type msrService interface {
getCPUCoresData() map[string]*msrData getCPUCoresData() map[string]*msrData
retrieveCPUFrequencyForCore(core string) (float64, error) retrieveCPUFrequencyForCore(core string) (float64, error)
openAndReadMsr(core string) error openAndReadMsr(core string) error
readSingleMsr(core string, msr string) (uint64, error)
} }
type msrServiceImpl struct { type msrServiceImpl struct {
@ -50,6 +55,10 @@ func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) { func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) {
cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core) cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core)
err := checkFile(cpuFreqPath)
if err != nil {
return 0, err
}
cpuFreqFile, err := os.Open(cpuFreqPath) cpuFreqFile, err := os.Open(cpuFreqPath)
if err != nil { if err != nil {
return 0, fmt.Errorf("error opening scaling_cur_freq file on path %s, err: %v", cpuFreqPath, err) return 0, fmt.Errorf("error opening scaling_cur_freq file on path %s, err: %v", cpuFreqPath, err)
@ -62,6 +71,10 @@ func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, erro
func (m *msrServiceImpl) openAndReadMsr(core string) error { func (m *msrServiceImpl) openAndReadMsr(core string) error {
path := fmt.Sprintf(msrPartialPath, core) path := fmt.Sprintf(msrPartialPath, core)
err := checkFile(path)
if err != nil {
return err
}
msrFile, err := os.Open(path) msrFile, err := os.Open(path)
if err != nil { if err != nil {
return fmt.Errorf("error opening MSR file on path %s, err: %v", path, err) return fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
@ -75,6 +88,40 @@ func (m *msrServiceImpl) openAndReadMsr(core string) error {
return nil return nil
} }
func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error) {
path := fmt.Sprintf(msrPartialPath, core)
err := checkFile(path)
if err != nil {
return 0, err
}
msrFile, err := os.Open(path)
if err != nil {
return 0, fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
}
defer msrFile.Close()
var msrAddress int64
switch msr {
case "MSR_TURBO_RATIO_LIMIT":
msrAddress = turboRatioLimitLocation
case "MSR_TURBO_RATIO_LIMIT1":
msrAddress = turboRatioLimit1Location
case "MSR_TURBO_RATIO_LIMIT2":
msrAddress = turboRatioLimit2Location
case "MSR_ATOM_CORE_TURBO_RATIOS":
msrAddress = atomCoreTurboRatiosLocation
default:
return 0, fmt.Errorf("incorect name of MSR %s", msr)
}
value, err := m.fs.readFileAtOffsetToUint64(msrFile, msrAddress)
if err != nil {
return 0, err
}
return value, nil
}
func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error { func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error {
g, ctx := errgroup.WithContext(context.Background()) g, ctx := errgroup.WithContext(context.Background())
@ -128,9 +175,9 @@ func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error
m.cpuCoresData[core].aperf = newAperf m.cpuCoresData[core].aperf = newAperf
m.cpuCoresData[core].timeStampCounter = newTsc m.cpuCoresData[core].timeStampCounter = newTsc
// MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16. // MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16.
m.cpuCoresData[core].throttleTemp = (newThrottleTemp >> 16) & 0xFF m.cpuCoresData[core].throttleTemp = int64((newThrottleTemp >> 16) & 0xFF)
// MSR (19Ch) IA32_THERM_STATUS bits 22:16. // MSR (19Ch) IA32_THERM_STATUS bits 22:16.
m.cpuCoresData[core].temp = (newTemp >> 16) & 0x7F m.cpuCoresData[core].temp = int64((newTemp >> 16) & 0x7F)
return nil return nil
} }

View File

@ -1,10 +1,10 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT. // Code generated by mockery v2.10.0. DO NOT EDIT.
package intel_powerstat package intel_powerstat
import mock "github.com/stretchr/testify/mock" import mock "github.com/stretchr/testify/mock"
// mockMsrService is an autogenerated mock type for the msrService type // mockMsrService is an autogenerated mock type for the mockMsrService type
type mockMsrService struct { type mockMsrService struct {
mock.Mock mock.Mock
} }
@ -39,6 +39,27 @@ func (_m *mockMsrService) openAndReadMsr(core string) error {
return r0 return r0
} }
// readSingleMsr provides a mock function with given fields: core, msr
func (_m *mockMsrService) readSingleMsr(core string, msr string) (uint64, error) {
ret := _m.Called(core, msr)
var r0 uint64
if rf, ok := ret.Get(0).(func(string, string) uint64); ok {
r0 = rf(core, msr)
} else {
r0 = ret.Get(0).(uint64)
}
var r1 error
if rf, ok := ret.Get(1).(func(string, string) error); ok {
r1 = rf(core, msr)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// retrieveCPUFrequencyForCore provides a mock function with given fields: core // retrieveCPUFrequencyForCore provides a mock function with given fields: core
func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) { func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) {
ret := _m.Called(core) ret := _m.Called(core)

View File

@ -109,8 +109,8 @@ func verifyCPUCoresData(cores []string, t *testing.T, msr *msrServiceImpl, expec
require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf) require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf)
require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf) require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf)
require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter) require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter)
require.Equal(t, (expectedValue>>16)&0xFF, msr.cpuCoresData[core].throttleTemp) require.Equal(t, int64((expectedValue>>16)&0xFF), msr.cpuCoresData[core].throttleTemp)
require.Equal(t, (expectedValue>>16)&0x7F, msr.cpuCoresData[core].temp) require.Equal(t, int64((expectedValue>>16)&0x7F), msr.cpuCoresData[core].temp)
if verifyDelta { if verifyDelta {
require.Equal(t, delta, msr.cpuCoresData[core].c3Delta) require.Equal(t, delta, msr.cpuCoresData[core].c3Delta)

View File

@ -36,6 +36,7 @@ type raplServiceImpl struct {
data map[string]*raplData data map[string]*raplData
dramFolders map[string]string dramFolders map[string]string
fs fileService fs fileService
logOnce map[string]error
} }
// initializeRaplData looks for RAPL folders and initializes data map with fetched information. // initializeRaplData looks for RAPL folders and initializes data map with fetched information.
@ -51,6 +52,10 @@ func (r *raplServiceImpl) getRaplData() map[string]*raplData {
func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error { func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID) socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath) socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath)
err := checkFile(socketEnergyUjPath)
if err != nil {
return err
}
socketEnergyUjFile, err := os.Open(socketEnergyUjPath) socketEnergyUjFile, err := os.Open(socketEnergyUjPath)
if err != nil { if err != nil {
return fmt.Errorf("error opening socket energy_uj file on path %s, err: %v", socketEnergyUjPath, err) return fmt.Errorf("error opening socket energy_uj file on path %s, err: %v", socketEnergyUjPath, err)
@ -59,6 +64,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID]) dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID])
dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath) dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath)
err = checkFile(dramEnergyUjPath)
if err != nil {
return err
}
dramEnergyUjFile, err := os.Open(dramEnergyUjPath) dramEnergyUjFile, err := os.Open(dramEnergyUjPath)
if err != nil { if err != nil {
return fmt.Errorf("error opening dram energy_uj file on path %s, err: %v", dramEnergyUjPath, err) return fmt.Errorf("error opening dram energy_uj file on path %s, err: %v", dramEnergyUjPath, err)
@ -66,6 +75,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
defer dramEnergyUjFile.Close() defer dramEnergyUjFile.Close()
socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath) socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath)
err = checkFile(socketMaxEnergyUjPath)
if err != nil {
return err
}
socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath) socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath)
if err != nil { if err != nil {
return fmt.Errorf("error opening socket max_energy_range_uj file on path %s, err: %v", socketMaxEnergyUjPath, err) return fmt.Errorf("error opening socket max_energy_range_uj file on path %s, err: %v", socketMaxEnergyUjPath, err)
@ -73,6 +86,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
defer socketMaxEnergyUjFile.Close() defer socketMaxEnergyUjFile.Close()
dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath) dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath)
err = checkFile(dramMaxEnergyUjPath)
if err != nil {
return err
}
dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath) dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath)
if err != nil { if err != nil {
return fmt.Errorf("error opening dram max_energy_range_uj file on path %s, err: %v", dramMaxEnergyUjPath, err) return fmt.Errorf("error opening dram max_energy_range_uj file on path %s, err: %v", dramMaxEnergyUjPath, err)
@ -85,6 +102,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) { func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) {
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID) socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath) socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath)
err := checkFile(socketMaxPowerPath)
if err != nil {
return 0, err
}
socketMaxPowerFile, err := os.Open(socketMaxPowerPath) socketMaxPowerFile, err := os.Open(socketMaxPowerPath)
if err != nil { if err != nil {
return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %s, err: %v", socketMaxPowerPath, err) return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %s, err: %v", socketMaxPowerPath, err)
@ -156,15 +177,22 @@ func (r *raplServiceImpl) findDramFolders() {
} }
func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) { func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) {
if r.logOnce == nil {
r.logOnce = make(map[string]error)
}
for _, raplFolder := range raplFolders { for _, raplFolder := range raplFolders {
potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder) potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder)
nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath) nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath)
read, err := r.fs.readFile(nameFilePath) read, err := r.fs.readFile(nameFilePath)
if err != nil { if err != nil {
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err) if val := r.logOnce[nameFilePath]; val == nil || val.Error() != err.Error() {
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
r.logOnce[nameFilePath] = err
}
continue continue
} }
r.logOnce[nameFilePath] = nil
// Remove new line character // Remove new line character
trimmedString := strings.TrimRight(string(read), "\n") trimmedString := strings.TrimRight(string(read), "\n")
if trimmedString == "dram" { if trimmedString == "dram" {
@ -194,7 +222,7 @@ func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.R
return fmt.Errorf("interval between last two Telegraf cycles is 0") return fmt.Errorf("interval between last two Telegraf cycles is 0")
} }
if newSocketEnergy > r.data[socketID].socketEnergy { if newSocketEnergy >= r.data[socketID].socketEnergy {
r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval
} else { } else {
socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile) socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile)
@ -206,7 +234,7 @@ func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.R
r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval
} }
if newDramEnergy > r.data[socketID].dramEnergy { if newDramEnergy >= r.data[socketID].dramEnergy {
r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval
} else { } else {
dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile) dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile)

View File

@ -1,10 +1,10 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT. // Code generated by mockery v2.10.0. DO NOT EDIT.
package intel_powerstat package intel_powerstat
import mock "github.com/stretchr/testify/mock" import mock "github.com/stretchr/testify/mock"
// mockRaplService is an autogenerated mock type for the raplService type // mockRaplService is an autogenerated mock type for the mockRaplService type
type mockRaplService struct { type mockRaplService struct {
mock.Mock mock.Mock
} }