feat(intel_powerstat): add Max Turbo Frequency and introduce improvements (#11035)
This commit is contained in:
parent
4f972daa2b
commit
df3e9ec2a2
|
|
@ -11,18 +11,25 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur
|
|||
```toml
|
||||
# Intel PowerStat plugin enables monitoring of platform metrics (power, TDP) and per-CPU metrics like temperature, power and utilization.
|
||||
[[inputs.intel_powerstat]]
|
||||
## All global metrics are always collected by Intel PowerStat plugin.
|
||||
## User can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
|
||||
## Empty array means no per-CPU specific metrics will be collected by the plugin - in this case only platform level
|
||||
## telemetry will be exposed by Intel PowerStat plugin.
|
||||
## The user can choose which package metrics are monitored by the plugin with the package_metrics setting:
|
||||
## - The default, will collect "current_power_consumption", "current_dram_power_consumption" and "thermal_design_power"
|
||||
## - Setting this value to an empty array means no package metrics will be collected
|
||||
## - Finally, a user can specify individual metrics to capture from the supported options list
|
||||
## Supported options:
|
||||
## "cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"
|
||||
## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"
|
||||
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
|
||||
|
||||
## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
|
||||
## Empty or missing array means no per-CPU specific metrics will be collected by the plugin.
|
||||
## Supported options:
|
||||
## "cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature", "cpu_busy_frequency"
|
||||
## ATTENTION: cpu_busy_cycles option is DEPRECATED - superseded by cpu_c0_state_residency
|
||||
# cpu_metrics = []
|
||||
```
|
||||
|
||||
## Example: Configuration with no per-CPU telemetry
|
||||
|
||||
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
|
||||
This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
|
||||
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
|
|
@ -31,28 +38,39 @@ This configuration allows getting global metrics (processor package specific), n
|
|||
|
||||
## Example: Configuration with no per-CPU telemetry - equivalent case
|
||||
|
||||
This configuration allows getting global metrics (processor package specific), no per-CPU metrics are collected:
|
||||
This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
|
||||
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
```
|
||||
|
||||
## Example: Configuration for CPU Temperature and Frequency only
|
||||
## Example: Configuration for CPU Temperature and CPU Frequency
|
||||
|
||||
This configuration allows getting global metrics plus subset of per-CPU metrics (CPU Temperature and Current Frequency):
|
||||
This configuration allows getting default processor package specific metrics, plus subset of per-CPU metrics (CPU Temperature and CPU Frequency):
|
||||
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
|
||||
```
|
||||
|
||||
## Example: Configuration with all available metrics
|
||||
## Example: Configuration for CPU Temperature and CPU Frequency without default package metrics
|
||||
|
||||
This configuration allows getting global metrics and all per-CPU metrics:
|
||||
This configuration allows getting only a subset of per-CPU metrics (CPU Temperature and CPU Frequency):
|
||||
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles"]
|
||||
package_metrics = []
|
||||
cpu_metrics = ["cpu_frequency", "cpu_temperature"]
|
||||
```
|
||||
|
||||
## Example: Configuration with all available metrics
|
||||
|
||||
This configuration allows getting all processor package specific metrics and all per-CPU metrics:
|
||||
|
||||
```toml
|
||||
[[inputs.intel_powerstat]]
|
||||
package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"]
|
||||
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency"]
|
||||
```
|
||||
|
||||
## SW Dependencies
|
||||
|
|
@ -66,11 +84,17 @@ The following dependencies are expected by plugin:
|
|||
|
||||
Minimum kernel version required is 3.13 to satisfy all requirements.
|
||||
|
||||
Please make sure that kernel modules are loaded and running. You might have to manually enable them by using `modprobe`.
|
||||
Exact commands to be executed are:
|
||||
Please make sure that kernel modules are loaded and running (cpufreq is integrated in kernel). Modules might have to be manually enabled by using `modprobe`.
|
||||
Depending on the kernel version, run commands:
|
||||
|
||||
```sh
|
||||
sudo modprobe cpufreq-stats
|
||||
# kernel 5.x.x:
|
||||
sudo modprobe rapl
|
||||
subo modprobe msr
|
||||
sudo modprobe intel_rapl_common
|
||||
sudo modprobe intel_rapl_msr
|
||||
|
||||
# kernel 4.x.x:
|
||||
sudo modprobe msr
|
||||
sudo modprobe intel_rapl
|
||||
```
|
||||
|
|
@ -80,9 +104,13 @@ to retrieve data for calculation of most critical per-CPU specific metrics:
|
|||
|
||||
- `cpu_busy_frequency_mhz`
|
||||
- `cpu_temperature_celsius`
|
||||
- `cpu_c0_state_residency_percent`
|
||||
- `cpu_c1_state_residency_percent`
|
||||
- `cpu_c6_state_residency_percent`
|
||||
- `cpu_busy_cycles_percent`
|
||||
|
||||
and to retrieve data for calculation per-package specific metric:
|
||||
|
||||
- `max_turbo_frequency_mhz`
|
||||
|
||||
To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration).
|
||||
|
||||
|
|
@ -99,13 +127,13 @@ The following processor properties are required by the plugin:
|
|||
model specific registers for all features
|
||||
- The following processor flags shall be present:
|
||||
- "_msr_" shall be present for plugin to read platform data from processor model specific registers and collect
|
||||
the following metrics: _powerstat_core.cpu_temperature_, _powerstat_core.cpu_busy_frequency_,
|
||||
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_, _powerstat_core._cpu_c6_state_residency_
|
||||
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat_core.cpu_busy_frequency_,
|
||||
_powerstat_core.cpu_busy_cycles_, _powerstat_core.cpu_c1_state_residency_
|
||||
- "_dts_" shall be present to collect _powerstat_core.cpu_temperature_
|
||||
- Processor _Model number_ must be one of the following values for plugin to read _powerstat_core.cpu_c1_state_residency_
|
||||
and _powerstat_core.cpu_c6_state_residency_ metrics:
|
||||
the following metrics: _powerstat\_core.cpu\_temperature_, _powerstat\_core.cpu\_busy\_frequency_,
|
||||
_powerstat\_core.cpu\_c0\_state\_residency_, _powerstat\_core.cpu\_c1\_state\_residency_, _powerstat\_core.cpu\_c6\_state\_residency_
|
||||
- "_aperfmperf_" shall be present to collect the following metrics: _powerstat\_core.cpu\_busy\_frequency_,
|
||||
_powerstat\_core.cpu\_c0\_state\_residency_, _powerstat\_core.cpu\_c1\_state\_residency_
|
||||
- "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_
|
||||
- Processor _Model number_ must be one of the following values for plugin to read _powerstat\_core.cpu\_c1\_state\_residency_
|
||||
and _powerstat\_core.cpu\_c6\_state\_residency_ metrics:
|
||||
|
||||
| Model number | Processor name |
|
||||
|-----|-------------|
|
||||
|
|
@ -168,50 +196,59 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
|
|||
|
||||
- The following Tags are returned by plugin with powerstat_core measurements:
|
||||
|
||||
```text
|
||||
| Tag | Description |
|
||||
|-----|-------------|
|
||||
| `package_id` | ID of platform package/socket |
|
||||
| `core_id` | ID of physical processor core |
|
||||
| `cpu_id` | ID of logical processor core |
|
||||
| Tag | Description |
|
||||
|--------------|-------------------------------|
|
||||
| `package_id` | ID of platform package/socket |
|
||||
| `core_id` | ID of physical processor core |
|
||||
| `cpu_id` | ID of logical processor core |
|
||||
|
||||
Measurement powerstat_core metrics are collected per-CPU (cpu_id is the key)
|
||||
while core_id and package_id tags are additional topology information.
|
||||
```
|
||||
|
||||
- Available metrics for powerstat_core measurement
|
||||
|
||||
```text
|
||||
| Metric name (field) | Description | Units |
|
||||
|-----|-------------|-----|
|
||||
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
|
||||
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
|
||||
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
|
||||
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
|
||||
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
|
||||
| `cpu_busy_cycles_percent` | CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
|
||||
```
|
||||
| Metric name (field) | Description | Units |
|
||||
|---------------------|-------------|-------|
|
||||
| `cpu_frequency_mhz` | Current operational frequency of CPU Core | MHz |
|
||||
| `cpu_busy_frequency_mhz` | CPU Core Busy Frequency measured as frequency adjusted to CPU Core busy cycles | MHz |
|
||||
| `cpu_temperature_celsius` | Current temperature of CPU Core | Celsius degrees |
|
||||
| `cpu_c0_state_residency_percent` | Percentage of time that CPU Core spent in C0 Core residency state | % |
|
||||
| `cpu_c1_state_residency_percent` | Percentage of time that CPU Core spent in C1 Core residency state | % |
|
||||
| `cpu_c6_state_residency_percent` | Percentage of time that CPU Core spent in C6 Core residency state | % |
|
||||
| `cpu_busy_cycles_percent` | (**DEPRECATED** - superseded by cpu_c0_state_residency_percent) CPU Core Busy cycles as a ratio of Cycles spent in C0 state residency to all cycles executed by CPU Core | % |
|
||||
|
||||
- powerstat_package
|
||||
|
||||
- The following Tags are returned by plugin with powerstat_package measurements:
|
||||
|
||||
```text
|
||||
| Tag | Description |
|
||||
|-----|-------------|
|
||||
| `package_id` | ID of platform package/socket |
|
||||
Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which
|
||||
package metric refers to.
|
||||
```
|
||||
| Tag | Description |
|
||||
|-----|-------------|
|
||||
| `package_id` | ID of platform package/socket |
|
||||
| `active_cores`| Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency
|
||||
|
||||
Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which package metric refers to.
|
||||
|
||||
- Available metrics for powerstat_package measurement
|
||||
|
||||
```text
|
||||
| Metric name (field) | Description | Units |
|
||||
|-----|-------------|-----|
|
||||
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
|
||||
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
|
||||
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
|
||||
```
|
||||
| Metric name (field) | Description | Units |
|
||||
|-----|-------------|-----|
|
||||
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
|
||||
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
|
||||
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
|
||||
| `max_turbo_frequency_mhz`| Maximum reachable turbo frequency for number of cores active | MHz
|
||||
|
||||
### Known issues
|
||||
|
||||
From linux kernel version v5.4.77 with [this kernel change](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94)
|
||||
resources like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for security reasons, so this plugin needs root privileges to work properly.
|
||||
|
||||
If such strict security restrictions are not relevant, reading permissions to files in `/sys/devices/virtual/powercap/intel-rapl/`
|
||||
directory can be manually changed for example with `chmod` command with custom parameters.
|
||||
For example to give all users permission to all files in `intel-rapl` directory:
|
||||
|
||||
```bash
|
||||
sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/
|
||||
```
|
||||
|
||||
### Example Output
|
||||
|
||||
|
|
@ -219,10 +256,12 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
|
|||
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_cycles_percent=0.8 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c1_state_residency_percent=6.68 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c0_state_residency_percent=0.8 1606494744000000000
|
||||
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_busy_frequency_mhz=1213.24 1606494744000000000
|
||||
```
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ type msrData struct {
|
|||
c3 uint64
|
||||
c6 uint64
|
||||
c7 uint64
|
||||
throttleTemp uint64
|
||||
temp uint64
|
||||
throttleTemp int64
|
||||
temp int64
|
||||
mperfDelta uint64
|
||||
aperfDelta uint64
|
||||
timeStampCounterDelta uint64
|
||||
|
|
|
|||
|
|
@ -152,3 +152,22 @@ func (fs *fileServiceImpl) readFileAtOffsetToUint64(reader io.ReaderAt, offset i
|
|||
func newFileService() *fileServiceImpl {
|
||||
return &fileServiceImpl{}
|
||||
}
|
||||
|
||||
func checkFile(path string) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("empty path given")
|
||||
}
|
||||
|
||||
lInfo, err := os.Lstat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("file `%s` doesn't exist", path)
|
||||
}
|
||||
return fmt.Errorf("cannot obtain file info of `%s`: %v", path, err)
|
||||
}
|
||||
mode := lInfo.Mode()
|
||||
if mode&os.ModeSymlink != 0 {
|
||||
return fmt.Errorf("file `%s` is a symlink", path)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
|
||||
// Code generated by mockery v2.10.0. DO NOT EDIT.
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
|
|
@ -8,7 +8,7 @@ import (
|
|||
mock "github.com/stretchr/testify/mock"
|
||||
)
|
||||
|
||||
// mockFileService is an autogenerated mock type for the fileService type
|
||||
// mockFileService is an autogenerated mock type for the mockFileService type
|
||||
type mockFileService struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ package intel_powerstat
|
|||
import (
|
||||
"fmt"
|
||||
"math/big"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
|
@ -15,60 +16,81 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
cpuFrequency = "cpu_frequency"
|
||||
cpuBusyFrequency = "cpu_busy_frequency"
|
||||
cpuTemperature = "cpu_temperature"
|
||||
cpuC1StateResidency = "cpu_c1_state_residency"
|
||||
cpuC6StateResidency = "cpu_c6_state_residency"
|
||||
cpuBusyCycles = "cpu_busy_cycles"
|
||||
percentageMultiplier = 100
|
||||
cpuFrequency = "cpu_frequency"
|
||||
cpuBusyFrequency = "cpu_busy_frequency"
|
||||
cpuTemperature = "cpu_temperature"
|
||||
cpuC0StateResidency = "cpu_c0_state_residency"
|
||||
cpuC1StateResidency = "cpu_c1_state_residency"
|
||||
cpuC6StateResidency = "cpu_c6_state_residency"
|
||||
cpuBusyCycles = "cpu_busy_cycles"
|
||||
packageCurrentPowerConsumption = "current_power_consumption"
|
||||
packageCurrentDramPowerConsumption = "current_dram_power_consumption"
|
||||
packageThermalDesignPower = "thermal_design_power"
|
||||
packageTurboLimit = "max_turbo_frequency"
|
||||
percentageMultiplier = 100
|
||||
)
|
||||
|
||||
// PowerStat plugin enables monitoring of platform metrics (power, TDP) and Core metrics like temperature, power and utilization.
|
||||
type PowerStat struct {
|
||||
CPUMetrics []string `toml:"cpu_metrics"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
CPUMetrics []string `toml:"cpu_metrics"`
|
||||
PackageMetrics []string `toml:"package_metrics"`
|
||||
Log telegraf.Logger `toml:"-"`
|
||||
|
||||
fs fileService
|
||||
rapl raplService
|
||||
msr msrService
|
||||
|
||||
cpuFrequency bool
|
||||
cpuBusyFrequency bool
|
||||
cpuTemperature bool
|
||||
cpuC1StateResidency bool
|
||||
cpuC6StateResidency bool
|
||||
cpuBusyCycles bool
|
||||
cpuInfo map[string]*cpuInfo
|
||||
skipFirstIteration bool
|
||||
cpuFrequency bool
|
||||
cpuBusyFrequency bool
|
||||
cpuTemperature bool
|
||||
cpuC0StateResidency bool
|
||||
cpuC1StateResidency bool
|
||||
cpuC6StateResidency bool
|
||||
cpuBusyCycles bool
|
||||
packageTurboLimit bool
|
||||
packageCurrentPowerConsumption bool
|
||||
packageCurrentDramPowerConsumption bool
|
||||
packageThermalDesignPower bool
|
||||
cpuInfo map[string]*cpuInfo
|
||||
skipFirstIteration bool
|
||||
logOnce map[string]error
|
||||
}
|
||||
|
||||
// Init performs one time setup of the plugin.
|
||||
// Init performs one time setup of the plugin
|
||||
func (p *PowerStat) Init() error {
|
||||
p.parsePackageMetricsConfig()
|
||||
p.parseCPUMetricsConfig()
|
||||
err := p.verifyProcessor()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Initialize MSR service only when there is at least one core metric enabled.
|
||||
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC1StateResidency ||
|
||||
p.cpuC6StateResidency || p.cpuBusyCycles {
|
||||
// Initialize MSR service only when there is at least one metric enabled
|
||||
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
|
||||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit {
|
||||
p.msr = newMsrServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
|
||||
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit {
|
||||
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
|
||||
if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() {
|
||||
return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Gather takes in an accumulator and adds the metrics that the Input gathers.
|
||||
// Gather takes in an accumulator and adds the metrics that the Input gathers
|
||||
func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
|
||||
p.addGlobalMetrics(acc)
|
||||
if p.areGlobalMetricsEnabled() {
|
||||
p.addGlobalMetrics(acc)
|
||||
}
|
||||
|
||||
if p.areCoreMetricsEnabled() {
|
||||
p.addPerCoreMetrics(acc)
|
||||
}
|
||||
|
||||
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations.
|
||||
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations
|
||||
p.skipFirstIteration = false
|
||||
|
||||
return nil
|
||||
|
|
@ -79,18 +101,36 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
|
|||
p.rapl.initializeRaplData()
|
||||
|
||||
for socketID := range p.rapl.getRaplData() {
|
||||
if p.packageTurboLimit {
|
||||
p.addTurboRatioLimit(socketID, acc)
|
||||
}
|
||||
|
||||
err := p.rapl.retrieveAndCalculateData(socketID)
|
||||
if err != nil {
|
||||
// In case of an error skip calculating metrics for this socket
|
||||
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
|
||||
if val := p.logOnce[socketID]; val == nil || val.Error() != err.Error() {
|
||||
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
|
||||
// Remember that specific error occurs for socketID to omit logging next time
|
||||
p.logOnce[socketID] = err
|
||||
}
|
||||
continue
|
||||
}
|
||||
p.addThermalDesignPowerMetric(socketID, acc)
|
||||
|
||||
// If error stops occurring, clear logOnce indicator
|
||||
p.logOnce[socketID] = nil
|
||||
if p.packageThermalDesignPower {
|
||||
p.addThermalDesignPowerMetric(socketID, acc)
|
||||
}
|
||||
|
||||
if p.skipFirstIteration {
|
||||
continue
|
||||
}
|
||||
p.addCurrentSocketPowerConsumption(socketID, acc)
|
||||
p.addCurrentDramPowerConsumption(socketID, acc)
|
||||
if p.packageCurrentPowerConsumption {
|
||||
p.addCurrentSocketPowerConsumption(socketID, acc)
|
||||
}
|
||||
if p.packageCurrentDramPowerConsumption {
|
||||
p.addCurrentDramPowerConsumption(socketID, acc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -155,11 +195,10 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
|
|||
}
|
||||
|
||||
// Read data from MSR only if required
|
||||
if p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature ||
|
||||
p.cpuBusyFrequency {
|
||||
if p.cpuC0StateResidency || p.cpuC1StateResidency || p.cpuC6StateResidency || p.cpuBusyCycles || p.cpuTemperature || p.cpuBusyFrequency {
|
||||
err := p.msr.openAndReadMsr(cpuID)
|
||||
if err != nil {
|
||||
// In case of an error exit the function. All metrics past this point are dependant on MSR.
|
||||
// In case of an error exit the function. All metrics past this point are dependent on MSR
|
||||
p.Log.Debugf("error while reading msr: %v", err)
|
||||
return
|
||||
}
|
||||
|
|
@ -169,12 +208,16 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
|
|||
p.addCPUTemperatureMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle.
|
||||
// cpuBusyFrequency metric does some calculations inside that are required in another plugin cycle
|
||||
if p.cpuBusyFrequency {
|
||||
p.addCPUBusyFrequencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
if !p.skipFirstIteration {
|
||||
if p.cpuC0StateResidency || p.cpuBusyCycles {
|
||||
p.addCPUC0StateResidencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
if p.cpuC1StateResidency {
|
||||
p.addCPUC1StateResidencyMetric(cpuID, acc)
|
||||
}
|
||||
|
|
@ -182,10 +225,6 @@ func (p *PowerStat) addMetricsForSingleCore(cpuID string, acc telegraf.Accumulat
|
|||
if p.cpuC6StateResidency {
|
||||
p.addCPUC6StateResidencyMetric(cpuID, acc)
|
||||
}
|
||||
|
||||
if p.cpuBusyCycles {
|
||||
p.addCPUBusyCyclesMetric(cpuID, acc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -229,6 +268,153 @@ func (p *PowerStat) addCPUTemperatureMetric(cpuID string, acc telegraf.Accumulat
|
|||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func calculateTurboRatioGroup(coreCounts uint64, msr uint64, group map[int]uint64) {
|
||||
from := coreCounts & 0xFF // value of number of active cores of bucket 1 is written in the first 8 bits. The next buckets values are saved on the following 8-bit sides
|
||||
for i := 0; i < 8; i++ {
|
||||
to := (coreCounts >> (i * 8)) & 0xFF
|
||||
if to == 0 {
|
||||
break
|
||||
}
|
||||
value := (msr >> (i * 8)) & 0xFF
|
||||
// value of freq ratio is stored in 8-bit blocks, and their real value is obtained after multiplication by 100
|
||||
if value != 0 && to != 0 {
|
||||
for ; from <= to; from++ {
|
||||
group[int(from)] = value * 100
|
||||
}
|
||||
}
|
||||
from = to + 1
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator) {
|
||||
var err error
|
||||
turboRatioLimitGroups := make(map[int]uint64)
|
||||
|
||||
var cpuID = ""
|
||||
var model = ""
|
||||
for _, v := range p.cpuInfo {
|
||||
if v.physicalID == socketID {
|
||||
cpuID = v.cpuID
|
||||
model = v.model
|
||||
}
|
||||
}
|
||||
if cpuID == "" || model == "" {
|
||||
p.Log.Debugf("error while reading socket ID")
|
||||
return
|
||||
}
|
||||
// dump_hsw_turbo_ratio_limit
|
||||
if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X
|
||||
coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18
|
||||
msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT2")
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT2: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit2, turboRatioLimitGroups)
|
||||
}
|
||||
|
||||
// dump_ivt_turbo_ratio_limit
|
||||
if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X
|
||||
(model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X
|
||||
coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16
|
||||
msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
|
||||
return
|
||||
}
|
||||
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups)
|
||||
}
|
||||
|
||||
if (model != strconv.FormatInt(0x37, 10)) && // INTEL_FAM6_ATOM_SILVERMONT
|
||||
(model != strconv.FormatInt(0x4A, 10)) && // INTEL_FAM6_ATOM_SILVERMONT_MID:
|
||||
(model != strconv.FormatInt(0x5A, 10)) && // INTEL_FAM6_ATOM_AIRMONT_MID:
|
||||
(model != strconv.FormatInt(0x2E, 10)) && // INTEL_FAM6_NEHALEM_EX
|
||||
(model != strconv.FormatInt(0x2F, 10)) && // INTEL_FAM6_WESTMERE_EX
|
||||
(model != strconv.FormatInt(0x57, 10)) && // INTEL_FAM6_XEON_PHI_KNL
|
||||
(model != strconv.FormatInt(0x85, 10)) { // INTEL_FAM6_XEON_PHI_KNM
|
||||
coreCounts := uint64(0x0807060504030201) // default value (counting the number of active cores 1 to 8). May be changed in "if" segment below
|
||||
if (model == strconv.FormatInt(0x5C, 10)) || // INTEL_FAM6_ATOM_GOLDMONT
|
||||
(model == strconv.FormatInt(0x55, 10)) || // INTEL_FAM6_SKYLAKE_X
|
||||
(model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X
|
||||
(model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D
|
||||
(model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D
|
||||
coreCounts, err = p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
|
||||
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
|
||||
return
|
||||
}
|
||||
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups)
|
||||
}
|
||||
// dump_atom_turbo_ratio_limits
|
||||
if model == strconv.FormatInt(0x37, 10) || // INTEL_FAM6_ATOM_SILVERMONT
|
||||
model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID:
|
||||
model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID
|
||||
coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_ATOM_CORE_TURBO_RATIOS")
|
||||
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_ATOM_CORE_TURBO_RATIOS: %v", err)
|
||||
return
|
||||
}
|
||||
value := uint64(0)
|
||||
newValue := uint64(0)
|
||||
|
||||
for i := 0; i < 4; i++ { // value "4" is specific for this group of processors
|
||||
newValue = (msrTurboRatioLimit >> (8 * (i))) & 0x3F // value of freq ratio is stored in 6-bit blocks, saved every 8 bits
|
||||
value = value + (newValue << ((i - 1) * 8)) // now value of freq ratio is stored in 8-bit blocks, saved every 8 bits
|
||||
}
|
||||
|
||||
calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups)
|
||||
}
|
||||
// dump_knl_turbo_ratio_limits
|
||||
if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// value of freq ratio of bucket 1 is saved in bits 15 to 8.
|
||||
// each next value is calculated as the previous value - delta. Delta is stored in 3-bit blocks every 8 bits (start at 21 (2*8+5))
|
||||
value := (msrTurboRatioLimit >> 8) & 0xFF
|
||||
newValue := value
|
||||
for i := 2; i < 8; i++ {
|
||||
newValue = newValue - (msrTurboRatioLimit>>(8*i+5))&0x7
|
||||
value = value + (newValue << ((i - 1) * 8))
|
||||
}
|
||||
|
||||
// value of number of active cores of bucket 1 is saved in bits 1 to 7.
|
||||
// each next value is calculated as the previous value + delta. Delta is stored in 5-bit blocks every 8 bits (start at 16 (2*8))
|
||||
coreCounts := (msrTurboRatioLimit & 0xFF) >> 1
|
||||
newBucket := coreCounts
|
||||
for i := 2; i < 8; i++ {
|
||||
newBucket = newBucket + (msrTurboRatioLimit>>(8*i))&0x1F
|
||||
coreCounts = coreCounts + (newBucket << ((i - 1) * 8))
|
||||
}
|
||||
calculateTurboRatioGroup(coreCounts, value, turboRatioLimitGroups)
|
||||
}
|
||||
|
||||
for key, val := range turboRatioLimitGroups {
|
||||
tags := map[string]string{
|
||||
"package_id": socketID,
|
||||
"active_cores": strconv.Itoa(key),
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"max_turbo_frequency_mhz": val,
|
||||
}
|
||||
acc.AddGauge("powerstat_package", fields, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUBusyFrequencyMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
mperfDelta := coresData[cpuID].mperfDelta
|
||||
|
|
@ -331,7 +517,7 @@ func (p *PowerStat) addCPUC6StateResidencyMetric(cpuID string, acc telegraf.Accu
|
|||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accumulator) {
|
||||
coresData := p.msr.getCPUCoresData()
|
||||
// Avoid division by 0
|
||||
if coresData[cpuID].timeStampCounterDelta == 0 {
|
||||
|
|
@ -339,7 +525,7 @@ func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulato
|
|||
timestampCounterLocation, cpuID)
|
||||
return
|
||||
}
|
||||
busyCyclesValue := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
|
||||
c0Value := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
|
||||
float64(coresData[cpuID].mperfDelta) / float64(coresData[cpuID].timeStampCounterDelta))
|
||||
cpu := p.cpuInfo[cpuID]
|
||||
tags := map[string]string{
|
||||
|
|
@ -347,11 +533,42 @@ func (p *PowerStat) addCPUBusyCyclesMetric(cpuID string, acc telegraf.Accumulato
|
|||
"core_id": cpu.coreID,
|
||||
"cpu_id": cpu.cpuID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_busy_cycles_percent": busyCyclesValue,
|
||||
if p.cpuC0StateResidency {
|
||||
fields := map[string]interface{}{
|
||||
"cpu_c0_state_residency_percent": c0Value,
|
||||
}
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
}
|
||||
if p.cpuBusyCycles {
|
||||
deprecatedFields := map[string]interface{}{
|
||||
"cpu_busy_cycles_percent": c0Value,
|
||||
}
|
||||
acc.AddGauge("powerstat_core", deprecatedFields, tags)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) parsePackageMetricsConfig() {
|
||||
if p.PackageMetrics == nil {
|
||||
// if Package Metric config is empty, use the default settings.
|
||||
p.packageCurrentPowerConsumption = true
|
||||
p.packageCurrentDramPowerConsumption = true
|
||||
p.packageThermalDesignPower = true
|
||||
return
|
||||
}
|
||||
|
||||
acc.AddGauge("powerstat_core", fields, tags)
|
||||
if contains(p.PackageMetrics, packageTurboLimit) {
|
||||
p.packageTurboLimit = true
|
||||
}
|
||||
if contains(p.PackageMetrics, packageCurrentPowerConsumption) {
|
||||
p.packageCurrentPowerConsumption = true
|
||||
}
|
||||
|
||||
if contains(p.PackageMetrics, packageCurrentDramPowerConsumption) {
|
||||
p.packageCurrentDramPowerConsumption = true
|
||||
}
|
||||
if contains(p.PackageMetrics, packageThermalDesignPower) {
|
||||
p.packageThermalDesignPower = true
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) parseCPUMetricsConfig() {
|
||||
|
|
@ -363,6 +580,10 @@ func (p *PowerStat) parseCPUMetricsConfig() {
|
|||
p.cpuFrequency = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuC0StateResidency) {
|
||||
p.cpuC0StateResidency = true
|
||||
}
|
||||
|
||||
if contains(p.CPUMetrics, cpuC1StateResidency) {
|
||||
p.cpuC1StateResidency = true
|
||||
}
|
||||
|
|
@ -396,7 +617,7 @@ func (p *PowerStat) verifyProcessor() error {
|
|||
|
||||
p.cpuInfo = stats
|
||||
|
||||
// First CPU is sufficient for verification.
|
||||
// First CPU is sufficient for verification
|
||||
firstCPU := p.cpuInfo["0"]
|
||||
if firstCPU == nil {
|
||||
return fmt.Errorf("first core not found while parsing /proc/cpuinfo")
|
||||
|
|
@ -414,14 +635,16 @@ func (p *PowerStat) verifyProcessor() error {
|
|||
if !strings.Contains(firstCPU.flags, "msr") {
|
||||
p.cpuTemperature = false
|
||||
p.cpuC6StateResidency = false
|
||||
p.cpuC0StateResidency = false
|
||||
p.cpuBusyCycles = false
|
||||
p.cpuBusyFrequency = false
|
||||
p.cpuC1StateResidency = false
|
||||
}
|
||||
|
||||
if !strings.Contains(firstCPU.flags, "aperfmperf") {
|
||||
p.cpuBusyFrequency = false
|
||||
p.cpuBusyCycles = false
|
||||
p.cpuBusyFrequency = false
|
||||
p.cpuC0StateResidency = false
|
||||
p.cpuC1StateResidency = false
|
||||
}
|
||||
|
||||
|
|
@ -438,7 +661,6 @@ func contains(slice []string, str string) bool {
|
|||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
|
|
@ -446,17 +668,27 @@ func (p *PowerStat) areCoreMetricsEnabled() bool {
|
|||
return p.msr != nil && len(p.msr.getCPUCoresData()) > 0
|
||||
}
|
||||
|
||||
// newPowerStat creates and returns PowerStat struct.
|
||||
func (p *PowerStat) areGlobalMetricsEnabled() bool {
|
||||
return p.rapl != nil
|
||||
}
|
||||
|
||||
// newPowerStat creates and returns PowerStat struct
|
||||
func newPowerStat(fs fileService) *PowerStat {
|
||||
p := &PowerStat{
|
||||
cpuFrequency: false,
|
||||
cpuC1StateResidency: false,
|
||||
cpuC6StateResidency: false,
|
||||
cpuBusyCycles: false,
|
||||
cpuTemperature: false,
|
||||
cpuBusyFrequency: false,
|
||||
skipFirstIteration: true,
|
||||
fs: fs,
|
||||
cpuFrequency: false,
|
||||
cpuC0StateResidency: false,
|
||||
cpuC1StateResidency: false,
|
||||
cpuC6StateResidency: false,
|
||||
cpuBusyCycles: false,
|
||||
cpuTemperature: false,
|
||||
cpuBusyFrequency: false,
|
||||
packageTurboLimit: false,
|
||||
packageCurrentPowerConsumption: false,
|
||||
packageCurrentDramPowerConsumption: false,
|
||||
packageThermalDesignPower: false,
|
||||
skipFirstIteration: true,
|
||||
fs: fs,
|
||||
logOnce: make(map[string]error),
|
||||
}
|
||||
|
||||
return p
|
||||
|
|
|
|||
|
|
@ -15,26 +15,32 @@ import (
|
|||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
type MockServices struct {
|
||||
fs *mockFileService
|
||||
msr *mockMsrService
|
||||
rapl *mockRaplService
|
||||
}
|
||||
|
||||
func TestInitPlugin(t *testing.T) {
|
||||
cores := []string{"cpu0", "cpu1", "cpu2", "cpu3"}
|
||||
power, fsMock, _, _ := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
|
||||
fsMock.On("getCPUInfoStats", mock.Anything).
|
||||
mockServices.fs.On("getCPUInfoStats", mock.Anything).
|
||||
Return(nil, errors.New("error getting cpu stats")).Once()
|
||||
require.Error(t, power.Init())
|
||||
|
||||
fsMock.On("getCPUInfoStats", mock.Anything).
|
||||
mockServices.fs.On("getCPUInfoStats", mock.Anything).
|
||||
Return(make(map[string]*cpuInfo), nil).Once()
|
||||
require.Error(t, power.Init())
|
||||
|
||||
fsMock.On("getCPUInfoStats", mock.Anything).
|
||||
mockServices.fs.On("getCPUInfoStats", mock.Anything).
|
||||
Return(map[string]*cpuInfo{"0": {
|
||||
vendorID: "GenuineIntel",
|
||||
cpuFamily: "test",
|
||||
}}, nil).Once()
|
||||
require.Error(t, power.Init())
|
||||
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(cores, nil).Once().
|
||||
On("getCPUInfoStats", mock.Anything).
|
||||
Return(map[string]*cpuInfo{"0": {
|
||||
|
|
@ -44,24 +50,24 @@ func TestInitPlugin(t *testing.T) {
|
|||
// Verify MSR service initialization.
|
||||
power.cpuFrequency = true
|
||||
require.NoError(t, power.Init())
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, len(cores), len(power.msr.getCPUCoresData()))
|
||||
|
||||
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
mockServices.fs.On("getStringsMatchingPatternOnPath", mock.Anything).
|
||||
Return(nil, errors.New("error during getStringsMatchingPatternOnPath")).Once()
|
||||
|
||||
// In case of an error when fetching cpu cores plugin should proceed with execution.
|
||||
require.NoError(t, power.Init())
|
||||
fsMock.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
mockServices.fs.AssertCalled(t, "getStringsMatchingPatternOnPath", mock.Anything)
|
||||
require.Equal(t, 0, len(power.msr.getCPUCoresData()))
|
||||
}
|
||||
|
||||
func TestParseCPUMetricsConfig(t *testing.T) {
|
||||
power, _, _, _ := getPowerWithMockedServices()
|
||||
power, _ := getPowerWithMockedServices()
|
||||
disableCoreMetrics(power)
|
||||
|
||||
power.CPUMetrics = []string{
|
||||
"cpu_frequency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
|
||||
"cpu_frequency", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency", "cpu_busy_cycles", "cpu_temperature",
|
||||
"cpu_busy_frequency",
|
||||
}
|
||||
power.parseCPUMetricsConfig()
|
||||
|
|
@ -88,6 +94,7 @@ func verifyCoreMetrics(t *testing.T, power *PowerStat, enabled bool) {
|
|||
require.Equal(t, enabled, power.cpuFrequency)
|
||||
require.Equal(t, enabled, power.cpuC1StateResidency)
|
||||
require.Equal(t, enabled, power.cpuC6StateResidency)
|
||||
require.Equal(t, enabled, power.cpuC0StateResidency)
|
||||
require.Equal(t, enabled, power.cpuBusyCycles)
|
||||
require.Equal(t, enabled, power.cpuBusyFrequency)
|
||||
require.Equal(t, enabled, power.cpuTemperature)
|
||||
|
|
@ -102,23 +109,23 @@ func TestGather(t *testing.T) {
|
|||
preparedCPUData := getPreparedCPUData(coreIDs)
|
||||
raplDataMap := prepareRaplDataMap(packageIDs, socketCurrentEnergy, dramCurrentEnergy)
|
||||
|
||||
power, _, raplMock, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
prepareCPUInfo(power, coreIDs, packageIDs)
|
||||
enableCoreMetrics(power)
|
||||
power.skipFirstIteration = false
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).
|
||||
mockServices.rapl.On("initializeRaplData", mock.Anything).
|
||||
On("getRaplData").Return(raplDataMap).
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil)
|
||||
msrMock.On("getCPUCoresData").Return(preparedCPUData).
|
||||
mockServices.msr.On("getCPUCoresData").Return(preparedCPUData).
|
||||
On("openAndReadMsr", mock.Anything).Return(nil).
|
||||
On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil)
|
||||
|
||||
require.NoError(t, power.Gather(&acc))
|
||||
// Number of global metrics : 3
|
||||
// Number of per core metrics : 6
|
||||
require.Equal(t, 3*len(packageIDs)+6*len(coreIDs), len(acc.GetTelegrafMetrics()))
|
||||
// Number of per core metrics : 7
|
||||
require.Equal(t, 3*len(packageIDs)+7*len(coreIDs), len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddGlobalMetricsNegative(t *testing.T) {
|
||||
|
|
@ -126,24 +133,24 @@ func TestAddGlobalMetricsNegative(t *testing.T) {
|
|||
socketCurrentEnergy := 13213852.2
|
||||
dramCurrentEnergy := 784552.0
|
||||
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
|
||||
power, _, raplMock, _ := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
power.skipFirstIteration = false
|
||||
raplMock.On("initializeRaplData", mock.Anything).Once().
|
||||
mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
|
||||
On("getRaplData").Return(raplDataMap).Once().
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Times(len(raplDataMap))
|
||||
|
||||
power.addGlobalMetrics(&acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
raplMock.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
|
||||
mockServices.rapl.AssertNumberOfCalls(t, "retrieveAndCalculateData", len(raplDataMap))
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).Once().
|
||||
mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
|
||||
On("getRaplData").Return(make(map[string]*raplData)).Once()
|
||||
|
||||
power.addGlobalMetrics(&acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
raplMock.AssertNotCalled(t, "retrieveAndCalculateData")
|
||||
mockServices.rapl.AssertNotCalled(t, "retrieveAndCalculateData")
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).Once().
|
||||
mockServices.rapl.On("initializeRaplData", mock.Anything).Once().
|
||||
On("getRaplData").Return(raplDataMap).
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(nil).Once().
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(errors.New("error while calculating data")).Once().
|
||||
|
|
@ -159,10 +166,10 @@ func TestAddGlobalMetricsPositive(t *testing.T) {
|
|||
dramCurrentEnergy := 124234872.5
|
||||
raplDataMap := prepareRaplDataMap([]string{"0", "1"}, socketCurrentEnergy, dramCurrentEnergy)
|
||||
maxPower := 546783852.9
|
||||
power, _, raplMock, _ := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
power.skipFirstIteration = false
|
||||
|
||||
raplMock.On("initializeRaplData", mock.Anything).
|
||||
mockServices.rapl.On("initializeRaplData", mock.Anything).
|
||||
On("getRaplData").Return(raplDataMap).
|
||||
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Twice().
|
||||
|
|
@ -181,9 +188,9 @@ func TestAddMetricsForSingleCoreNegative(t *testing.T) {
|
|||
var wg sync.WaitGroup
|
||||
var acc testutil.Accumulator
|
||||
core := "0"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
|
||||
msrMock.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
|
||||
mockServices.msr.On("openAndReadMsr", core).Return(errors.New("error reading MSR file")).Once()
|
||||
|
||||
// Skip generating metric for CPU frequency.
|
||||
power.cpuFrequency = false
|
||||
|
|
@ -201,16 +208,16 @@ func TestAddCPUFrequencyMetric(t *testing.T) {
|
|||
coreID := "3"
|
||||
packageID := "0"
|
||||
frequency := 1200000.2
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
|
||||
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).
|
||||
mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).
|
||||
Return(float64(0), errors.New("error on reading file")).Once()
|
||||
|
||||
power.addCPUFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
msrMock.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
|
||||
mockServices.msr.On("retrieveCPUFrequencyForCore", mock.Anything).Return(frequency, nil).Once()
|
||||
|
||||
power.addCPUFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
|
@ -225,12 +232,12 @@ func TestAddCoreCPUTemperatureMetric(t *testing.T) {
|
|||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
expectedTemp := preparedData[cpuID].throttleTemp - preparedData[cpuID].temp
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Once()
|
||||
mockServices.msr.On("getCPUCoresData").Return(preparedData).Once()
|
||||
power.addCPUTemperatureMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
|
|
@ -243,13 +250,13 @@ func TestAddC6StateResidencyMetric(t *testing.T) {
|
|||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
expectedC6 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier *
|
||||
float64(preparedData[cpuID].c6Delta) / float64(preparedData[cpuID].timeStampCounterDelta))
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.addCPUC6StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
|
|
@ -263,27 +270,32 @@ func TestAddC6StateResidencyMetric(t *testing.T) {
|
|||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
func TestAddProcessorBusyCyclesMetric(t *testing.T) {
|
||||
func TestAddC0StateResidencyMetric(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
expectedBusyCycles := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(preparedData[cpuID].mperfDelta) /
|
||||
float64(preparedData[cpuID].timeStampCounterDelta))
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.addCPUBusyCyclesMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.cpuBusyCycles, power.cpuC0StateResidency = true, true
|
||||
power.addCPUC0StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 2, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
expectedMetric := getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
|
||||
expectedMetric := getPowerCoreMetric("cpu_c0_state_residency_percent", expectedBusyCycles, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
|
||||
// Deprecated
|
||||
expectedMetric = getPowerCoreMetric("cpu_busy_cycles_percent", expectedBusyCycles, coreID, packageID, cpuID)
|
||||
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
|
||||
|
||||
acc.ClearMetrics()
|
||||
preparedData[cpuID].timeStampCounterDelta = 0
|
||||
power.addCPUBusyCyclesMetric(cpuID, &acc)
|
||||
power.addCPUC0StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 0, len(acc.GetTelegrafMetrics()))
|
||||
}
|
||||
|
||||
|
|
@ -292,12 +304,12 @@ func TestAddProcessorBusyFrequencyMetric(t *testing.T) {
|
|||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
power.skipFirstIteration = false
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
power.addCPUBusyFrequencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
||||
|
|
@ -312,14 +324,14 @@ func TestAddC1StateResidencyMetric(t *testing.T) {
|
|||
cpuID := "0"
|
||||
coreID := "2"
|
||||
packageID := "1"
|
||||
power, _, _, msrMock := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
|
||||
preparedData := getPreparedCPUData([]string{cpuID})
|
||||
c1 := preparedData[cpuID].timeStampCounterDelta - preparedData[cpuID].mperfDelta - preparedData[cpuID].c3Delta -
|
||||
preparedData[cpuID].c6Delta - preparedData[cpuID].c7Delta
|
||||
expectedC1 := roundFloatToNearestTwoDecimalPlaces(percentageMultiplier * float64(c1) / float64(preparedData[cpuID].timeStampCounterDelta))
|
||||
|
||||
msrMock.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
mockServices.msr.On("getCPUCoresData").Return(preparedData).Twice()
|
||||
|
||||
power.addCPUC1StateResidencyMetric(cpuID, &acc)
|
||||
require.Equal(t, 1, len(acc.GetTelegrafMetrics()))
|
||||
|
|
@ -337,9 +349,9 @@ func TestAddThermalDesignPowerMetric(t *testing.T) {
|
|||
var acc testutil.Accumulator
|
||||
sockets := []string{"0"}
|
||||
maxPower := 195720672.1
|
||||
power, _, raplMock, _ := getPowerWithMockedServices()
|
||||
power, mockServices := getPowerWithMockedServices()
|
||||
|
||||
raplMock.On("getConstraintMaxPowerWatts", mock.Anything).
|
||||
mockServices.rapl.On("getConstraintMaxPowerWatts", mock.Anything).
|
||||
Return(float64(0), errors.New("getConstraintMaxPowerWatts error")).Once().
|
||||
On("getConstraintMaxPowerWatts", mock.Anything).Return(maxPower, nil).Once()
|
||||
|
||||
|
|
@ -354,6 +366,80 @@ func TestAddThermalDesignPowerMetric(t *testing.T) {
|
|||
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
|
||||
}
|
||||
|
||||
func TestCalculateTurboRatioGroup(t *testing.T) {
|
||||
coreCounts := uint64(0x0807060504030201)
|
||||
msr := uint64(0x0807060504030201)
|
||||
turboRatioLimitGroups := make(map[int]uint64)
|
||||
|
||||
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
|
||||
require.Equal(t, 8, len(turboRatioLimitGroups))
|
||||
require.Equal(t, uint64(100), turboRatioLimitGroups[1])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[2])
|
||||
require.Equal(t, uint64(300), turboRatioLimitGroups[3])
|
||||
require.Equal(t, uint64(400), turboRatioLimitGroups[4])
|
||||
require.Equal(t, uint64(500), turboRatioLimitGroups[5])
|
||||
require.Equal(t, uint64(600), turboRatioLimitGroups[6])
|
||||
require.Equal(t, uint64(700), turboRatioLimitGroups[7])
|
||||
require.Equal(t, uint64(800), turboRatioLimitGroups[8])
|
||||
|
||||
coreCounts = uint64(0x100e0c0a08060402)
|
||||
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
|
||||
require.Equal(t, 16, len(turboRatioLimitGroups))
|
||||
require.Equal(t, uint64(100), turboRatioLimitGroups[1])
|
||||
require.Equal(t, uint64(100), turboRatioLimitGroups[2])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[3])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[4])
|
||||
require.Equal(t, uint64(300), turboRatioLimitGroups[5])
|
||||
require.Equal(t, uint64(300), turboRatioLimitGroups[6])
|
||||
require.Equal(t, uint64(400), turboRatioLimitGroups[7])
|
||||
require.Equal(t, uint64(400), turboRatioLimitGroups[8])
|
||||
require.Equal(t, uint64(500), turboRatioLimitGroups[9])
|
||||
require.Equal(t, uint64(500), turboRatioLimitGroups[10])
|
||||
require.Equal(t, uint64(600), turboRatioLimitGroups[11])
|
||||
require.Equal(t, uint64(600), turboRatioLimitGroups[12])
|
||||
require.Equal(t, uint64(700), turboRatioLimitGroups[13])
|
||||
require.Equal(t, uint64(700), turboRatioLimitGroups[14])
|
||||
require.Equal(t, uint64(800), turboRatioLimitGroups[15])
|
||||
require.Equal(t, uint64(800), turboRatioLimitGroups[16])
|
||||
coreCounts = uint64(0x1211)
|
||||
msr = uint64(0xfffe)
|
||||
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
|
||||
require.Equal(t, 18, len(turboRatioLimitGroups))
|
||||
require.Equal(t, uint64(25400), turboRatioLimitGroups[17])
|
||||
require.Equal(t, uint64(25500), turboRatioLimitGroups[18])
|
||||
|
||||
coreCounts = uint64(0x1201)
|
||||
msr = uint64(0x0202)
|
||||
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
|
||||
require.Equal(t, 18, len(turboRatioLimitGroups))
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[1])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[2])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[3])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[4])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[5])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[6])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[7])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[8])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[9])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[10])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[11])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[12])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[13])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[14])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[15])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[16])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[17])
|
||||
require.Equal(t, uint64(200), turboRatioLimitGroups[18])
|
||||
|
||||
coreCounts = uint64(0x1211)
|
||||
msr = uint64(0xfffe)
|
||||
turboRatioLimitGroups = make(map[int]uint64)
|
||||
calculateTurboRatioGroup(coreCounts, msr, turboRatioLimitGroups)
|
||||
require.Equal(t, 2, len(turboRatioLimitGroups))
|
||||
require.Equal(t, uint64(25400), turboRatioLimitGroups[17])
|
||||
require.Equal(t, uint64(25500), turboRatioLimitGroups[18])
|
||||
}
|
||||
|
||||
func getPreparedCPUData(cores []string) map[string]*msrData {
|
||||
msrDataMap := make(map[string]*msrData)
|
||||
|
||||
|
|
@ -451,6 +537,7 @@ func prepareCPUInfo(power *PowerStat, coreIDs []string, packageIDs []string) {
|
|||
}
|
||||
|
||||
func enableCoreMetrics(power *PowerStat) {
|
||||
power.cpuC0StateResidency = true
|
||||
power.cpuC1StateResidency = true
|
||||
power.cpuC6StateResidency = true
|
||||
power.cpuTemperature = true
|
||||
|
|
@ -460,12 +547,13 @@ func enableCoreMetrics(power *PowerStat) {
|
|||
}
|
||||
|
||||
func disableCoreMetrics(power *PowerStat) {
|
||||
power.cpuC0StateResidency = false
|
||||
power.cpuC1StateResidency = false
|
||||
power.cpuC6StateResidency = false
|
||||
power.cpuBusyCycles = false
|
||||
power.cpuTemperature = false
|
||||
power.cpuBusyFrequency = false
|
||||
power.cpuFrequency = false
|
||||
power.cpuBusyCycles = false
|
||||
}
|
||||
|
||||
func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCurrentEnergy float64) map[string]*raplData {
|
||||
|
|
@ -480,16 +568,18 @@ func prepareRaplDataMap(socketIDs []string, socketCurrentEnergy float64, dramCur
|
|||
return raplDataMap
|
||||
}
|
||||
|
||||
func getPowerWithMockedServices() (*PowerStat, *mockFileService, *mockRaplService, *mockMsrService) {
|
||||
fsMock := &mockFileService{}
|
||||
msrMock := &mockMsrService{}
|
||||
raplMock := &mockRaplService{}
|
||||
logger := testutil.Logger{Name: "PowerPluginTest"}
|
||||
p := newPowerStat(fsMock)
|
||||
p.Log = logger
|
||||
p.fs = fsMock
|
||||
p.rapl = raplMock
|
||||
p.msr = msrMock
|
||||
func getPowerWithMockedServices() (*PowerStat, *MockServices) {
|
||||
var mockServices MockServices
|
||||
mockServices.fs = &mockFileService{}
|
||||
mockServices.msr = &mockMsrService{}
|
||||
mockServices.rapl = &mockRaplService{}
|
||||
p := newPowerStat(mockServices.fs)
|
||||
p.Log = testutil.Logger{Name: "PowerPluginTest"}
|
||||
p.rapl = mockServices.rapl
|
||||
p.msr = mockServices.msr
|
||||
p.packageCurrentPowerConsumption = true
|
||||
p.packageCurrentDramPowerConsumption = true
|
||||
p.packageThermalDesignPower = true
|
||||
|
||||
return p, fsMock, raplMock, msrMock
|
||||
return p, &mockServices
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,10 @@ const (
|
|||
throttleTemperatureLocation = 0x1A2
|
||||
temperatureLocation = 0x19C
|
||||
timestampCounterLocation = 0x10
|
||||
turboRatioLimitLocation = 0x1AD
|
||||
turboRatioLimit1Location = 0x1AE
|
||||
turboRatioLimit2Location = 0x1AF
|
||||
atomCoreTurboRatiosLocation = 0x66C
|
||||
)
|
||||
|
||||
// msrService is responsible for interactions with MSR.
|
||||
|
|
@ -35,6 +39,7 @@ type msrService interface {
|
|||
getCPUCoresData() map[string]*msrData
|
||||
retrieveCPUFrequencyForCore(core string) (float64, error)
|
||||
openAndReadMsr(core string) error
|
||||
readSingleMsr(core string, msr string) (uint64, error)
|
||||
}
|
||||
|
||||
type msrServiceImpl struct {
|
||||
|
|
@ -50,6 +55,10 @@ func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
|
|||
|
||||
func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) {
|
||||
cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core)
|
||||
err := checkFile(cpuFreqPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
cpuFreqFile, err := os.Open(cpuFreqPath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening scaling_cur_freq file on path %s, err: %v", cpuFreqPath, err)
|
||||
|
|
@ -62,6 +71,10 @@ func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, erro
|
|||
|
||||
func (m *msrServiceImpl) openAndReadMsr(core string) error {
|
||||
path := fmt.Sprintf(msrPartialPath, core)
|
||||
err := checkFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
msrFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
|
||||
|
|
@ -75,6 +88,40 @@ func (m *msrServiceImpl) openAndReadMsr(core string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error) {
|
||||
path := fmt.Sprintf(msrPartialPath, core)
|
||||
err := checkFile(path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
msrFile, err := os.Open(path)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening MSR file on path %s, err: %v", path, err)
|
||||
}
|
||||
defer msrFile.Close()
|
||||
|
||||
var msrAddress int64
|
||||
switch msr {
|
||||
case "MSR_TURBO_RATIO_LIMIT":
|
||||
msrAddress = turboRatioLimitLocation
|
||||
case "MSR_TURBO_RATIO_LIMIT1":
|
||||
msrAddress = turboRatioLimit1Location
|
||||
case "MSR_TURBO_RATIO_LIMIT2":
|
||||
msrAddress = turboRatioLimit2Location
|
||||
case "MSR_ATOM_CORE_TURBO_RATIOS":
|
||||
msrAddress = atomCoreTurboRatiosLocation
|
||||
default:
|
||||
return 0, fmt.Errorf("incorect name of MSR %s", msr)
|
||||
}
|
||||
|
||||
value, err := m.fs.readFileAtOffsetToUint64(msrFile, msrAddress)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error {
|
||||
g, ctx := errgroup.WithContext(context.Background())
|
||||
|
||||
|
|
@ -128,9 +175,9 @@ func (m *msrServiceImpl) readDataFromMsr(core string, reader io.ReaderAt) error
|
|||
m.cpuCoresData[core].aperf = newAperf
|
||||
m.cpuCoresData[core].timeStampCounter = newTsc
|
||||
// MSR (1A2h) IA32_TEMPERATURE_TARGET bits 23:16.
|
||||
m.cpuCoresData[core].throttleTemp = (newThrottleTemp >> 16) & 0xFF
|
||||
m.cpuCoresData[core].throttleTemp = int64((newThrottleTemp >> 16) & 0xFF)
|
||||
// MSR (19Ch) IA32_THERM_STATUS bits 22:16.
|
||||
m.cpuCoresData[core].temp = (newTemp >> 16) & 0x7F
|
||||
m.cpuCoresData[core].temp = int64((newTemp >> 16) & 0x7F)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
|
||||
// Code generated by mockery v2.10.0. DO NOT EDIT.
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import mock "github.com/stretchr/testify/mock"
|
||||
|
||||
// mockMsrService is an autogenerated mock type for the msrService type
|
||||
// mockMsrService is an autogenerated mock type for the mockMsrService type
|
||||
type mockMsrService struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
|
@ -39,6 +39,27 @@ func (_m *mockMsrService) openAndReadMsr(core string) error {
|
|||
return r0
|
||||
}
|
||||
|
||||
// readSingleMsr provides a mock function with given fields: core, msr
|
||||
func (_m *mockMsrService) readSingleMsr(core string, msr string) (uint64, error) {
|
||||
ret := _m.Called(core, msr)
|
||||
|
||||
var r0 uint64
|
||||
if rf, ok := ret.Get(0).(func(string, string) uint64); ok {
|
||||
r0 = rf(core, msr)
|
||||
} else {
|
||||
r0 = ret.Get(0).(uint64)
|
||||
}
|
||||
|
||||
var r1 error
|
||||
if rf, ok := ret.Get(1).(func(string, string) error); ok {
|
||||
r1 = rf(core, msr)
|
||||
} else {
|
||||
r1 = ret.Error(1)
|
||||
}
|
||||
|
||||
return r0, r1
|
||||
}
|
||||
|
||||
// retrieveCPUFrequencyForCore provides a mock function with given fields: core
|
||||
func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, error) {
|
||||
ret := _m.Called(core)
|
||||
|
|
|
|||
|
|
@ -109,8 +109,8 @@ func verifyCPUCoresData(cores []string, t *testing.T, msr *msrServiceImpl, expec
|
|||
require.Equal(t, expectedValue, msr.cpuCoresData[core].mperf)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].aperf)
|
||||
require.Equal(t, expectedValue, msr.cpuCoresData[core].timeStampCounter)
|
||||
require.Equal(t, (expectedValue>>16)&0xFF, msr.cpuCoresData[core].throttleTemp)
|
||||
require.Equal(t, (expectedValue>>16)&0x7F, msr.cpuCoresData[core].temp)
|
||||
require.Equal(t, int64((expectedValue>>16)&0xFF), msr.cpuCoresData[core].throttleTemp)
|
||||
require.Equal(t, int64((expectedValue>>16)&0x7F), msr.cpuCoresData[core].temp)
|
||||
|
||||
if verifyDelta {
|
||||
require.Equal(t, delta, msr.cpuCoresData[core].c3Delta)
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ type raplServiceImpl struct {
|
|||
data map[string]*raplData
|
||||
dramFolders map[string]string
|
||||
fs fileService
|
||||
logOnce map[string]error
|
||||
}
|
||||
|
||||
// initializeRaplData looks for RAPL folders and initializes data map with fetched information.
|
||||
|
|
@ -51,6 +52,10 @@ func (r *raplServiceImpl) getRaplData() map[string]*raplData {
|
|||
func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
|
||||
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
|
||||
socketEnergyUjPath := fmt.Sprintf(energyUjPartialPath, socketRaplPath)
|
||||
err := checkFile(socketEnergyUjPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
socketEnergyUjFile, err := os.Open(socketEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening socket energy_uj file on path %s, err: %v", socketEnergyUjPath, err)
|
||||
|
|
@ -59,6 +64,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
|
|||
|
||||
dramRaplPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, r.dramFolders[socketID])
|
||||
dramEnergyUjPath := fmt.Sprintf(energyUjPartialPath, dramRaplPath)
|
||||
err = checkFile(dramEnergyUjPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dramEnergyUjFile, err := os.Open(dramEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening dram energy_uj file on path %s, err: %v", dramEnergyUjPath, err)
|
||||
|
|
@ -66,6 +75,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
|
|||
defer dramEnergyUjFile.Close()
|
||||
|
||||
socketMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, socketRaplPath)
|
||||
err = checkFile(socketMaxEnergyUjPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
socketMaxEnergyUjFile, err := os.Open(socketMaxEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening socket max_energy_range_uj file on path %s, err: %v", socketMaxEnergyUjPath, err)
|
||||
|
|
@ -73,6 +86,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
|
|||
defer socketMaxEnergyUjFile.Close()
|
||||
|
||||
dramMaxEnergyUjPath := fmt.Sprintf(maxEnergyRangeUjPartialPath, dramRaplPath)
|
||||
err = checkFile(dramMaxEnergyUjPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dramMaxEnergyUjFile, err := os.Open(dramMaxEnergyUjPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening dram max_energy_range_uj file on path %s, err: %v", dramMaxEnergyUjPath, err)
|
||||
|
|
@ -85,6 +102,10 @@ func (r *raplServiceImpl) retrieveAndCalculateData(socketID string) error {
|
|||
func (r *raplServiceImpl) getConstraintMaxPowerWatts(socketID string) (float64, error) {
|
||||
socketRaplPath := fmt.Sprintf(intelRaplSocketPartialPath, intelRaplPath, socketID)
|
||||
socketMaxPowerPath := fmt.Sprintf(maxPowerUwPartialPath, socketRaplPath)
|
||||
err := checkFile(socketMaxPowerPath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
socketMaxPowerFile, err := os.Open(socketMaxPowerPath)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening constraint_0_max_power_uw file on path %s, err: %v", socketMaxPowerPath, err)
|
||||
|
|
@ -156,15 +177,22 @@ func (r *raplServiceImpl) findDramFolders() {
|
|||
}
|
||||
|
||||
func (r *raplServiceImpl) findDramFolder(raplFolders []string, socketID string) {
|
||||
if r.logOnce == nil {
|
||||
r.logOnce = make(map[string]error)
|
||||
}
|
||||
|
||||
for _, raplFolder := range raplFolders {
|
||||
potentialDramPath := fmt.Sprintf(intelRaplDramPartialPath, intelRaplPath, socketID, raplFolder)
|
||||
nameFilePath := fmt.Sprintf(intelRaplDramNamePartialPath, potentialDramPath)
|
||||
read, err := r.fs.readFile(nameFilePath)
|
||||
if err != nil {
|
||||
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
|
||||
if val := r.logOnce[nameFilePath]; val == nil || val.Error() != err.Error() {
|
||||
r.log.Errorf("error reading file on path: %s, err: %v", nameFilePath, err)
|
||||
r.logOnce[nameFilePath] = err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
r.logOnce[nameFilePath] = nil
|
||||
// Remove new line character
|
||||
trimmedString := strings.TrimRight(string(read), "\n")
|
||||
if trimmedString == "dram" {
|
||||
|
|
@ -194,7 +222,7 @@ func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.R
|
|||
return fmt.Errorf("interval between last two Telegraf cycles is 0")
|
||||
}
|
||||
|
||||
if newSocketEnergy > r.data[socketID].socketEnergy {
|
||||
if newSocketEnergy >= r.data[socketID].socketEnergy {
|
||||
r.data[socketID].socketCurrentEnergy = (newSocketEnergy - r.data[socketID].socketEnergy) / interval
|
||||
} else {
|
||||
socketMaxEnergy, _, err := r.readEnergyInJoules(socketMaxEnergyUjFile)
|
||||
|
|
@ -206,7 +234,7 @@ func (r *raplServiceImpl) calculateData(socketID string, socketEnergyUjFile io.R
|
|||
r.data[socketID].socketCurrentEnergy = (socketMaxEnergy - r.data[socketID].socketEnergy + newSocketEnergy) / interval
|
||||
}
|
||||
|
||||
if newDramEnergy > r.data[socketID].dramEnergy {
|
||||
if newDramEnergy >= r.data[socketID].dramEnergy {
|
||||
r.data[socketID].dramCurrentEnergy = (newDramEnergy - r.data[socketID].dramEnergy) / interval
|
||||
} else {
|
||||
dramMaxEnergy, _, err := r.readEnergyInJoules(dramMaxEnergyUjFile)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
|
||||
// Code generated by mockery v2.10.0. DO NOT EDIT.
|
||||
|
||||
package intel_powerstat
|
||||
|
||||
import mock "github.com/stretchr/testify/mock"
|
||||
|
||||
// mockRaplService is an autogenerated mock type for the raplService type
|
||||
// mockRaplService is an autogenerated mock type for the mockRaplService type
|
||||
type mockRaplService struct {
|
||||
mock.Mock
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue