feat(intel_powerstat): add uncore frequency metrics (#11254)

This commit is contained in:
bkotlowski 2022-06-06 17:23:48 +02:00 committed by GitHub
parent d3ee1b74fd
commit 6d829c199d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 429 additions and 45 deletions

View File

@ -1,10 +1,13 @@
# Intel PowerStat Input Plugin
This input plugin monitors power statistics on Intel-based platforms and assumes presence of Linux based OS.
This input plugin monitors power statistics on Intel-based platforms and assumes
presence of Linux based OS.
Main use cases are power saving and workload migration. Telemetry frameworks allow users to monitor critical platform level metrics.
Key source of platform telemetry is power domain that is beneficial for MANO/Monitoring&Analytics systems
to take preventive/corrective actions based on platform busyness, CPU temperature, actual CPU utilization and power statistics.
Main use cases are power saving and workload migration. Telemetry frameworks
allow users to monitor critical platform level metrics. Key source of platform
telemetry is power domain that is beneficial for MANO Monitoring&Analytics
systems to take preventive/corrective actions based on platform busyness, CPU
temperature, actual CPU utilization and power statistics.
## Configuration
@ -16,7 +19,7 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur
## - Setting this value to an empty array means no package metrics will be collected
## - Finally, a user can specify individual metrics to capture from the supported options list
## Supported options:
## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"
## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency"
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.
@ -29,7 +32,8 @@ to take preventive/corrective actions based on platform busyness, CPU temperatur
## Example: Configuration with no per-CPU telemetry
This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
This configuration allows getting default processor package specific metrics, no
per-CPU metrics are collected:
```toml
[[inputs.intel_powerstat]]
@ -38,7 +42,8 @@ This configuration allows getting default processor package specific metrics, no
## Example: Configuration with no per-CPU telemetry - equivalent case
This configuration allows getting default processor package specific metrics, no per-CPU metrics are collected:
This configuration allows getting default processor package specific metrics, no
per-CPU metrics are collected:
```toml
[[inputs.intel_powerstat]]
@ -46,7 +51,8 @@ This configuration allows getting default processor package specific metrics, no
## Example: Configuration for CPU Temperature and CPU Frequency
This configuration allows getting default processor package specific metrics, plus subset of per-CPU metrics (CPU Temperature and CPU Frequency):
This configuration allows getting default processor package specific metrics,
plus subset of per-CPU metrics (CPU Temperature and CPU Frequency):
```toml
[[inputs.intel_powerstat]]
@ -55,7 +61,8 @@ This configuration allows getting default processor package specific metrics, pl
## Example: Configuration for CPU Temperature and CPU Frequency without default package metrics
This configuration allows getting only a subset of per-CPU metrics (CPU Temperature and CPU Frequency):
This configuration allows getting only a subset of per-CPU metrics (CPU
Temperature and CPU Frequency):
```toml
[[inputs.intel_powerstat]]
@ -65,27 +72,33 @@ This configuration allows getting only a subset of per-CPU metrics (CPU Temperat
## Example: Configuration with all available metrics
This configuration allows getting all processor package specific metrics and all per-CPU metrics:
This configuration allows getting all processor package specific metrics and all
per-CPU metrics:
```toml
[[inputs.intel_powerstat]]
package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"]
package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency"]
cpu_metrics = ["cpu_frequency", "cpu_busy_frequency", "cpu_temperature", "cpu_c0_state_residency", "cpu_c1_state_residency", "cpu_c6_state_residency"]
```
## SW Dependencies
Plugin is based on Linux Kernel modules that expose specific metrics over `sysfs` or `devfs` interfaces.
The following dependencies are expected by plugin:
Plugin is based on Linux Kernel modules that expose specific metrics over
`sysfs` or `devfs` interfaces. The following dependencies are expected by
plugin:
- _intel-rapl_ module which exposes Intel Runtime Power Limiting metrics over `sysfs` (`/sys/devices/virtual/powercap/intel-rapl`),
- _msr_ kernel module that provides access to processor model specific registers over `devfs` (`/dev/cpu/cpu%d/msr`),
- _cpufreq_ kernel module - which exposes per-CPU Frequency over `sysfs` (`/sys/devices/system/cpu/cpu%d/cpufreq/scaling_cur_freq`).
- _intel-uncore-frequency_ module exposes Intel uncore frequency metrics over `sysfs` (`/sys/devices/system/cpu/intel_uncore_frequency`),
Minimum kernel version required is 3.13 to satisfy all requirements.
Minimum kernel version required is 3.13 to satisfy most of requirements,
for `uncore_frequency` metrics `intel-uncore-frequency` module is required
(available since kernel 5.6).
Please make sure that kernel modules are loaded and running (cpufreq is integrated in kernel). Modules might have to be manually enabled by using `modprobe`.
Depending on the kernel version, run commands:
Please make sure that kernel modules are loaded and running (cpufreq is
integrated in kernel). Modules might have to be manually enabled by using
`modprobe`. Depending on the kernel version, run commands:
```sh
# kernel 5.x.x:
@ -94,13 +107,17 @@ subo modprobe msr
sudo modprobe intel_rapl_common
sudo modprobe intel_rapl_msr
# also for kernel >= 5.6.0
sudo modprobe intel-uncore-frequency
# kernel 4.x.x:
sudo modprobe msr
sudo modprobe intel_rapl
```
**Telegraf with Intel PowerStat plugin enabled may require root access to read model specific registers (MSRs)**
to retrieve data for calculation of most critical per-CPU specific metrics:
**Telegraf with Intel PowerStat plugin enabled may require root access to read
model specific registers (MSRs)** to retrieve data for calculation of most
critical per-CPU specific metrics:
- `cpu_busy_frequency_mhz`
- `cpu_temperature_celsius`
@ -111,17 +128,20 @@ to retrieve data for calculation of most critical per-CPU specific metrics:
and to retrieve data for calculation per-package specific metric:
- `max_turbo_frequency_mhz`
- `uncore_frequency_mhz_cur`
To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration).
To expose other Intel PowerStat metrics root access may or may not be required
(depending on OS type or configuration).
## HW Dependencies
Specific metrics require certain processor features to be present, otherwise Intel PowerStat plugin won't be able to
read them. When using Linux Kernel based OS, user can detect supported processor features reading `/proc/cpuinfo` file.
Specific metrics require certain processor features to be present, otherwise
Intel PowerStat plugin won't be able to read them. When using Linux Kernel based
OS, user can detect supported processor features reading `/proc/cpuinfo` file.
Plugin assumes crucial properties are the same for all CPU cores in the system.
The following processor properties are examined in more detail in this section:
processor _cpu family_, _model_ and _flags_.
The following processor properties are required by the plugin:
processor _cpu family_, _model_ and _flags_. The following processor properties
are required by the plugin:
- Processor _cpu family_ must be Intel (0x6) - since data used by the plugin assumes Intel specific
model specific registers for all features
@ -186,9 +206,11 @@ and _powerstat\_core.cpu\_c6\_state\_residency_ metrics:
## Metrics
All metrics collected by Intel PowerStat plugin are collected in fixed intervals.
Metrics that reports processor C-state residency or power are calculated over elapsed intervals.
When starting to measure metrics, plugin skips first iteration of metrics if they are based on deltas with previous value.
All metrics collected by Intel PowerStat plugin are collected in fixed
intervals. Metrics that reports processor C-state residency or power are
calculated over elapsed intervals. When starting to measure metrics, plugin
skips first iteration of metrics if they are based on deltas with previous
value.
**The following measurements are supported by Intel PowerStat plugin:**
@ -225,6 +247,8 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
|-----|-------------|
| `package_id` | ID of platform package/socket |
| `active_cores`| Specific tag for `max_turbo_frequency_mhz` metric. The maximum number of activated cores for reachable turbo frequency
| `die`| Specific tag for all `uncore_frequency` metrics. Id of die
| `type`| Specific tag for all `uncore_frequency` metrics. Type of uncore frequency (current or initial)
Measurement powerstat_package metrics are collected per processor package -_package_id_ tag indicates which package metric refers to.
@ -232,25 +256,32 @@ When starting to measure metrics, plugin skips first iteration of metrics if the
| Metric name (field) | Description | Units |
|-----|-------------|-----|
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
| `thermal_design_power_watts` | Maximum Thermal Design Power (TDP) available for processor package | Watts |
| `current_power_consumption_watts` | Current power consumption of processor package | Watts |
| `current_dram_power_consumption_watts` | Current power consumption of processor package DRAM subsystem | Watts |
| `max_turbo_frequency_mhz`| Maximum reachable turbo frequency for number of cores active | MHz
| `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz
| `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz
| `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz
### Known issues
From linux kernel version v5.4.77 with [this kernel change](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94)
resources like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for security reasons, so this plugin needs root privileges to work properly.
From linux kernel version v5.4.77 with [this kernel change][19f6d91b] resources
like `/sys/class/powercap/intel-rapl*/*/energy_uj` are readable only by root for
security reasons, so this plugin needs root privileges to work properly.
If such strict security restrictions are not relevant, reading permissions to files in `/sys/devices/virtual/powercap/intel-rapl/`
directory can be manually changed for example with `chmod` command with custom parameters.
For example to give all users permission to all files in `intel-rapl` directory:
If such strict security restrictions are not relevant, reading permissions to
files in `/sys/devices/virtual/powercap/intel-rapl/` directory can be manually
changed for example with `chmod` command with custom parameters. For example to
give all users permission to all files in `intel-rapl` directory:
```bash
sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/
```
### Example Output
[19f6d91b]: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=v5.4.77&id=19f6d91bdad42200aac557a683c17b1f65ee6c94
## Example Output
```shell
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
@ -258,6 +289,8 @@ powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 16
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000
powerstat_package,die=0,host=ubuntu,package_id=0,type=initial uncore_frequency_limit_mhz_min=800,uncore_frequency_limit_mhz_max=2400 1606494744000000000
powerstat_package,die=0,host=ubuntu,package_id=0,type=current uncore_frequency_mhz_cur=800i,uncore_frequency_limit_mhz_min=800,uncore_frequency_limit_mhz_max=2400 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_frequency_mhz=1200.29 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_temperature_celsius=34i 1606494744000000000
powerstat_core,core_id=0,cpu_id=0,host=ubuntu,package_id=0 cpu_c6_state_residency_percent=92.52 1606494744000000000

View File

@ -1,4 +1,4 @@
// Code generated by mockery v2.10.0. DO NOT EDIT.
// Code generated by mockery v2.12.3. DO NOT EDIT.
package intel_powerstat
@ -130,3 +130,18 @@ func (_m *mockFileService) readFileToFloat64(reader io.Reader) (float64, int64,
return r0, r1, r2
}
type newmockFileServiceT interface {
mock.TestingT
Cleanup(func())
}
// newmockFileService creates a new instance of mockFileService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
func newmockFileService(t newmockFileServiceT) *mockFileService {
mock := &mockFileService{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}

View File

@ -6,6 +6,7 @@ package intel_powerstat
import (
_ "embed"
"errors"
"fmt"
"math/big"
"strconv"
@ -33,6 +34,7 @@ const (
packageCurrentDramPowerConsumption = "current_dram_power_consumption"
packageThermalDesignPower = "thermal_design_power"
packageTurboLimit = "max_turbo_frequency"
packageUncoreFrequency = "uncore_frequency"
percentageMultiplier = 100
)
@ -57,6 +59,7 @@ type PowerStat struct {
packageCurrentPowerConsumption bool
packageCurrentDramPowerConsumption bool
packageThermalDesignPower bool
packageUncoreFrequency bool
cpuInfo map[string]*cpuInfo
skipFirstIteration bool
logOnce map[string]error
@ -76,10 +79,10 @@ func (p *PowerStat) Init() error {
}
// Initialize MSR service only when there is at least one metric enabled
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit {
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency {
p.msr = newMsrServiceWithFs(p.Log, p.fs)
}
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit {
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit || p.packageUncoreFrequency {
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
}
@ -97,7 +100,17 @@ func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
}
if p.areCoreMetricsEnabled() {
p.addPerCoreMetrics(acc)
if p.msr.isMsrLoaded() {
p.logOnce["msr"] = nil
p.addPerCoreMetrics(acc)
} else {
err := errors.New("error while trying to read MSR (probably msr module was not loaded)")
if val := p.logOnce["msr"]; val == nil || val.Error() != err.Error() {
p.Log.Errorf("%v", err)
// Remember that specific error occurs to omit logging next time
p.logOnce["msr"] = err
}
}
}
// Gathering the first iteration of metrics was skipped for most of them because they are based on delta calculations
@ -109,25 +122,31 @@ func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
// Prepare RAPL data each gather because there is a possibility to disable rapl kernel module
p.rapl.initializeRaplData()
for socketID := range p.rapl.getRaplData() {
if p.packageTurboLimit {
p.addTurboRatioLimit(socketID, acc)
}
if p.packageUncoreFrequency {
die := maxDiePerSocket(socketID)
for actualDie := 0; actualDie < die; actualDie++ {
p.addUncoreFreq(socketID, strconv.Itoa(actualDie), acc)
}
}
err := p.rapl.retrieveAndCalculateData(socketID)
if err != nil {
// In case of an error skip calculating metrics for this socket
if val := p.logOnce[socketID]; val == nil || val.Error() != err.Error() {
if val := p.logOnce[socketID+"rapl"]; val == nil || val.Error() != err.Error() {
p.Log.Errorf("error fetching rapl data for socket %s, err: %v", socketID, err)
// Remember that specific error occurs for socketID to omit logging next time
p.logOnce[socketID] = err
p.logOnce[socketID+"rapl"] = err
}
continue
}
// If error stops occurring, clear logOnce indicator
p.logOnce[socketID] = nil
p.logOnce[socketID+"rapl"] = nil
if p.packageThermalDesignPower {
p.addThermalDesignPowerMetric(socketID, acc)
}
@ -143,6 +162,84 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
}
}
}
func maxDiePerSocket(_ string) int {
/*
TODO:
At the moment, linux does not distinguish between more dies per socket.
This piece of code will need to be upgraded in the future.
https://github.com/torvalds/linux/blob/v5.17/arch/x86/include/asm/topology.h#L153
*/
return 1
}
func (p *PowerStat) addUncoreFreq(socketID string, die string, acc telegraf.Accumulator) {
err := checkFile("/sys/devices/system/cpu/intel_uncore_frequency")
if err != nil {
err := fmt.Errorf("error while checking existing intel_uncore_frequency (probably intel-uncore-frequency module was not loaded)")
if val := p.logOnce["intel_uncore_frequency"]; val == nil || val.Error() != err.Error() {
p.Log.Errorf("%v", err)
// Remember that specific error occurs to omit logging next time
p.logOnce["intel_uncore_frequency"] = err
}
return
}
p.logOnce["intel_uncore_frequency"] = nil
p.readUncoreFreq("initial", socketID, die, acc)
p.readUncoreFreq("current", socketID, die, acc)
}
func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) {
fields := map[string]interface{}{}
cpuID := ""
if typeFreq == "current" {
if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() {
p.logOnce[socketID+"msr"] = nil
for _, v := range p.cpuInfo {
if v.physicalID == socketID {
cpuID = v.cpuID
}
}
if cpuID == "" {
p.Log.Debugf("error while reading socket ID")
return
}
actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, "MSR_UNCORE_PERF_STATUS")
if err != nil {
p.Log.Debugf("error while reading MSR_UNCORE_PERF_STATUS: %v", err)
return
}
actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100
fields["uncore_frequency_mhz_cur"] = actualUncoreFreq
} else {
err := errors.New("error while trying to read MSR (probably msr module was not loaded), uncore_frequency_mhz_cur metric will not be collected")
if val := p.logOnce[socketID+"msr"]; val == nil || val.Error() != err.Error() {
p.Log.Errorf("%v", err)
// Remember that specific error occurs for socketID to omit logging next time
p.logOnce[socketID+"msr"] = err
}
}
}
initMinFreq, err := p.msr.retrieveUncoreFrequency(socketID, typeFreq, "min", die)
if err != nil {
p.Log.Errorf("error while retrieving minimum uncore frequency of the socket %s, err: %v", socketID, err)
return
}
initMaxFreq, err := p.msr.retrieveUncoreFrequency(socketID, typeFreq, "max", die)
if err != nil {
p.Log.Errorf("error while retrieving maximum uncore frequency of the socket %s, err: %v", socketID, err)
return
}
tags := map[string]string{
"package_id": socketID,
"type": typeFreq,
"die": die,
}
fields["uncore_frequency_limit_mhz_min"] = initMinFreq
fields["uncore_frequency_limit_mhz_max"] = initMaxFreq
acc.AddGauge("powerstat_package", fields, tags)
}
func (p *PowerStat) addThermalDesignPowerMetric(socketID string, acc telegraf.Accumulator) {
maxPower, err := p.rapl.getConstraintMaxPowerWatts(socketID)
@ -579,6 +676,9 @@ func (p *PowerStat) parsePackageMetricsConfig() {
if contains(p.PackageMetrics, packageThermalDesignPower) {
p.packageThermalDesignPower = true
}
if contains(p.PackageMetrics, packageUncoreFrequency) {
p.packageUncoreFrequency = true
}
}
func (p *PowerStat) parseCPUMetricsConfig() {
@ -693,6 +793,7 @@ func newPowerStat(fs fileService) *PowerStat {
cpuTemperature: false,
cpuBusyFrequency: false,
packageTurboLimit: false,
packageUncoreFrequency: false,
packageCurrentPowerConsumption: false,
packageCurrentDramPowerConsumption: false,
packageThermalDesignPower: false,

View File

@ -119,6 +119,7 @@ func TestGather(t *testing.T) {
On("retrieveAndCalculateData", mock.Anything).Return(nil).Times(len(raplDataMap)).
On("getConstraintMaxPowerWatts", mock.Anything).Return(546783852.3, nil)
mockServices.msr.On("getCPUCoresData").Return(preparedCPUData).
On("isMsrLoaded", mock.Anything).Return(true).
On("openAndReadMsr", mock.Anything).Return(nil).
On("retrieveCPUFrequencyForCore", mock.Anything).Return(1200000.2, nil)
@ -227,6 +228,43 @@ func TestAddCPUFrequencyMetric(t *testing.T) {
acc.AssertContainsTaggedFields(t, "powerstat_core", expectedMetric.fields, expectedMetric.tags)
}
func TestReadUncoreFreq(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
coreID := "0"
packageID := "0"
die := "0"
power, mockServices := getPowerWithMockedServices()
prepareCPUInfoForSingleCPU(power, cpuID, coreID, packageID)
preparedData := getPreparedCPUData([]string{cpuID})
mockServices.msr.On("getCPUCoresData").Return(preparedData)
mockServices.msr.On("isMsrLoaded").Return(true)
mockServices.msr.On("readSingleMsr", "0", "MSR_UNCORE_PERF_STATUS").Return(uint64(10), nil)
mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0").
Return(float64(500), nil)
mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "max", "0").
Return(float64(1200), nil)
mockServices.msr.On("retrieveUncoreFrequency", "0", "current", "min", "0").
Return(float64(600), nil)
mockServices.msr.On("retrieveUncoreFrequency", "0", "current", "max", "0").
Return(float64(1100), nil)
power.readUncoreFreq("current", packageID, die, &acc)
power.readUncoreFreq("initial", packageID, die, &acc)
require.Equal(t, 2, len(acc.GetTelegrafMetrics()))
expectedMetric := getPowerUncoreFreqMetric("initial", float64(500), float64(1200), nil, packageID, die)
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
expectedMetric = getPowerUncoreFreqMetric("current", float64(600), float64(1100), uint64(1000), packageID, die)
acc.AssertContainsTaggedFields(t, "powerstat_package", expectedMetric.fields, expectedMetric.tags)
}
func TestAddCoreCPUTemperatureMetric(t *testing.T) {
var acc testutil.Accumulator
cpuID := "0"
@ -496,6 +534,27 @@ func getPowerGlobalMetric(name string, value interface{}, socketID string) struc
return getPowerMetric(name, value, map[string]string{"package_id": socketID})
}
func getPowerUncoreFreqMetric(typeFreq string, limitMin interface{}, limitMax interface{}, current interface{}, socketID string, die string) struct {
fields map[string]interface{}
tags map[string]string
} {
var ret struct {
fields map[string]interface{}
tags map[string]string
}
ret.tags = make(map[string]string)
ret.fields = make(map[string]interface{})
ret.tags["package_id"] = socketID
ret.tags["die"] = die
ret.tags["type"] = typeFreq
ret.fields["uncore_frequency_limit_mhz_min"] = limitMin
ret.fields["uncore_frequency_limit_mhz_max"] = limitMax
if typeFreq == "current" {
ret.fields["uncore_frequency_mhz_cur"] = current
}
return ret
}
func getPowerMetric(name string, value interface{}, tags map[string]string) struct {
fields map[string]interface{}
tags map[string]string

View File

@ -20,6 +20,7 @@ const (
systemCPUPath = "/sys/devices/system/cpu/"
cpuCurrentFreqPartialPath = "/sys/devices/system/cpu/cpu%s/cpufreq/scaling_cur_freq"
msrPartialPath = "/dev/cpu/%s/msr"
uncoreFreqPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_%s_die_%s/%s%s_freq_khz"
c3StateResidencyLocation = 0x3FC
c6StateResidencyLocation = 0x3FD
c7StateResidencyLocation = 0x3FE
@ -32,14 +33,17 @@ const (
turboRatioLimit1Location = 0x1AE
turboRatioLimit2Location = 0x1AF
atomCoreTurboRatiosLocation = 0x66C
uncorePerfStatusLocation = 0x621
)
// msrService is responsible for interactions with MSR.
type msrService interface {
getCPUCoresData() map[string]*msrData
retrieveCPUFrequencyForCore(core string) (float64, error)
retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error)
openAndReadMsr(core string) error
readSingleMsr(core string, msr string) (uint64, error)
isMsrLoaded() bool
}
type msrServiceImpl struct {
@ -53,6 +57,15 @@ func (m *msrServiceImpl) getCPUCoresData() map[string]*msrData {
return m.cpuCoresData
}
func (m *msrServiceImpl) isMsrLoaded() bool {
for cpuID := range m.getCPUCoresData() {
err := m.openAndReadMsr(cpuID)
if err == nil {
return true
}
}
return false
}
func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, error) {
cpuFreqPath := fmt.Sprintf(cpuCurrentFreqPartialPath, core)
err := checkFile(cpuFreqPath)
@ -69,6 +82,49 @@ func (m *msrServiceImpl) retrieveCPUFrequencyForCore(core string) (float64, erro
return convertKiloHertzToMegaHertz(cpuFreq), err
}
func (m *msrServiceImpl) retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) {
uncoreFreqPath, err := createUncoreFreqPath(socketID, typeFreq, kind, die)
if err != nil {
return 0, fmt.Errorf("unable to create uncore freq read path for socketID %s, and frequency type %s err: %v", socketID, typeFreq, err)
}
err = checkFile(uncoreFreqPath)
if err != nil {
return 0, err
}
uncoreFreqFile, err := os.Open(uncoreFreqPath)
if err != nil {
return 0, fmt.Errorf("error opening uncore frequncy file on %s, err: %v", uncoreFreqPath, err)
}
defer uncoreFreqFile.Close()
uncoreFreq, _, err := m.fs.readFileToFloat64(uncoreFreqFile)
return convertKiloHertzToMegaHertz(uncoreFreq), err
}
func createUncoreFreqPath(socketID string, typeFreq string, kind string, die string) (string, error) {
if socketID >= "0" && socketID <= "9" {
socketID = fmt.Sprintf("0%s", socketID)
}
if die >= "0" && die <= "9" {
die = fmt.Sprintf("0%s", die)
}
var prefix string
switch typeFreq {
case "initial":
prefix = "initial_"
case "current":
prefix = ""
default:
return "", fmt.Errorf("unknown frequency type %s, only 'initial' and 'current' are supported", typeFreq)
}
if kind != "min" && kind != "max" {
return "", fmt.Errorf("unknown frequency type %s, only 'min' and 'max' are supported", kind)
}
return fmt.Sprintf(uncoreFreqPath, socketID, die, prefix, kind), nil
}
func (m *msrServiceImpl) openAndReadMsr(core string) error {
path := fmt.Sprintf(msrPartialPath, core)
err := checkFile(path)
@ -110,6 +166,8 @@ func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error)
msrAddress = turboRatioLimit2Location
case "MSR_ATOM_CORE_TURBO_RATIOS":
msrAddress = atomCoreTurboRatiosLocation
case "MSR_UNCORE_PERF_STATUS":
msrAddress = uncorePerfStatusLocation
default:
return 0, fmt.Errorf("incorect name of MSR %s", msr)
}
@ -250,6 +308,5 @@ func newMsrServiceWithFs(logger telegraf.Logger, fs fileService) *msrServiceImpl
msrService.msrOffsets = []int64{c3StateResidencyLocation, c6StateResidencyLocation, c7StateResidencyLocation,
maximumFrequencyClockCountLocation, actualFrequencyClockCountLocation, timestampCounterLocation,
throttleTemperatureLocation, temperatureLocation}
return msrService
}

View File

@ -1,4 +1,4 @@
// Code generated by mockery v2.10.0. DO NOT EDIT.
// Code generated by mockery v2.12.3. DO NOT EDIT.
package intel_powerstat
@ -9,6 +9,20 @@ type mockMsrService struct {
mock.Mock
}
// isMsrLoaded provides a mock function with given fields:
func (_m *mockMsrService) isMsrLoaded() bool {
ret := _m.Called()
var r0 bool
if rf, ok := ret.Get(0).(func() bool); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// getCPUCoresData provides a mock function with given fields:
func (_m *mockMsrService) getCPUCoresData() map[string]*msrData {
ret := _m.Called()
@ -80,3 +94,39 @@ func (_m *mockMsrService) retrieveCPUFrequencyForCore(core string) (float64, err
return r0, r1
}
// retrieveUncoreFrequency provides a mock function with given fields: socketID, typeFreq, kind, die
func (_m *mockMsrService) retrieveUncoreFrequency(socketID string, typeFreq string, kind string, die string) (float64, error) {
ret := _m.Called(socketID, typeFreq, kind, die)
var r0 float64
if rf, ok := ret.Get(0).(func(string, string, string, string) float64); ok {
r0 = rf(socketID, typeFreq, kind, die)
} else {
r0 = ret.Get(0).(float64)
}
var r1 error
if rf, ok := ret.Get(1).(func(string, string, string, string) error); ok {
r1 = rf(socketID, typeFreq, kind, die)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
type newmockMsrServiceT interface {
mock.TestingT
Cleanup(func())
}
// newmockMsrService creates a new instance of mockMsrService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
func newmockMsrService(t newmockMsrServiceT) *mockMsrService {
mock := &mockMsrService{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}

View File

@ -93,6 +93,60 @@ func TestReadValueFromFileAtOffset(t *testing.T) {
require.Equal(t, zero, <-testChannel)
}
func TestCreateUncoreFreqPath(t *testing.T) {
path, err := createUncoreFreqPath("0", "initial", "min", "0")
expectedPath := "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/initial_min_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "initial", "max", "0")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/initial_max_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "current", "min", "0")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/min_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "current", "max", "0")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/max_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("9", "current", "max", "0")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_09_die_00/max_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("99", "current", "max", "0")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_99_die_00/max_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "current", "max", "9")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_09/max_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "current", "max", "99")
expectedPath = "/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_99/max_freq_khz"
require.Equal(t, nil, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "foo", "max", "0")
expectedPath = ""
expectedError := errors.New("unknown frequency type foo, only 'initial' and 'current' are supported")
require.Equal(t, expectedError, err)
require.Equal(t, expectedPath, path)
path, err = createUncoreFreqPath("0", "current", "bar", "0")
expectedPath = ""
expectedError = errors.New("unknown frequency type bar, only 'min' and 'max' are supported")
require.Equal(t, expectedError, err)
require.Equal(t, expectedPath, path)
}
func prepareTestData(fsMock *mockFileService, cores []string, msr *msrServiceImpl, t *testing.T) {
// Prepare MSR offsets and CPUCoresData for test.
fsMock.On("getStringsMatchingPatternOnPath", mock.Anything).

View File

@ -1,4 +1,4 @@
// Code generated by mockery v2.10.0. DO NOT EDIT.
// Code generated by mockery v2.12.3. DO NOT EDIT.
package intel_powerstat
@ -64,3 +64,18 @@ func (_m *mockRaplService) retrieveAndCalculateData(socketID string) error {
return r0
}
type newmockRaplServiceT interface {
mock.TestingT
Cleanup(func())
}
// newmockRaplService creates a new instance of mockRaplService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
func newmockRaplService(t newmockRaplServiceT) *mockRaplService {
mock := &mockRaplService{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}

View File

@ -5,7 +5,7 @@
## - Setting this value to an empty array means no package metrics will be collected
## - Finally, a user can specify individual metrics to capture from the supported options list
## Supported options:
## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency"
## "current_power_consumption", "current_dram_power_consumption", "thermal_design_power", "max_turbo_frequency", "uncore_frequency"
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
## The user can choose which per-CPU metrics are monitored by the plugin in cpu_metrics array.