feat(intel_powerstat): Add CPU base frequency metric and add support for new platforms (#12452)

This commit is contained in:
Paweł Żak 2023-01-18 14:10:00 +01:00 committed by GitHub
parent 7725896ff4
commit 65b23f112e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 464 additions and 121 deletions

View File

@ -33,7 +33,8 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
## supported options list ## supported options list
## Supported options: ## Supported options:
## "current_power_consumption", "current_dram_power_consumption", ## "current_power_consumption", "current_dram_power_consumption",
## "thermal_design_power", "max_turbo_frequency", "uncore_frequency" ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency",
## "cpu_base_frequency"
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"] # package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
## The user can choose which per-CPU metrics are monitored by the plugin in ## The user can choose which per-CPU metrics are monitored by the plugin in
@ -125,7 +126,7 @@ integrated in kernel). Modules might have to be manually enabled by using
```sh ```sh
# kernel 5.x.x: # kernel 5.x.x:
sudo modprobe rapl sudo modprobe rapl
subo modprobe msr sudo modprobe msr
sudo modprobe intel_rapl_common sudo modprobe intel_rapl_common
sudo modprobe intel_rapl_msr sudo modprobe intel_rapl_msr
@ -151,6 +152,7 @@ and to retrieve data for calculation per-package specific metric:
- `max_turbo_frequency_mhz` - `max_turbo_frequency_mhz`
- `uncore_frequency_mhz_cur` - `uncore_frequency_mhz_cur`
- `cpu_base_frequency_mhz`
To expose other Intel PowerStat metrics root access may or may not be required To expose other Intel PowerStat metrics root access may or may not be required
(depending on OS type or configuration). (depending on OS type or configuration).
@ -180,57 +182,68 @@ are required by the plugin:
_powerstat\_core.cpu\_c1\_state\_residency_ _powerstat\_core.cpu\_c1\_state\_residency_
- "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_ - "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_
- Processor _Model number_ must be one of the following values for plugin to - Processor _Model number_ must be one of the following values for plugin to
read _powerstat\_core.cpu\_c1\_state\_residency_ and read _powerstat\_core.cpu\_c1\_state\_residency_ /
_powerstat\_core.cpu\_c6\_state\_residency_ metrics: _powerstat\_core.cpu\_c6\_state\_residency_ and
_powerstat\_package.cpu\_base\_frequency_ metrics:
| Model number | Processor name | | Model number | Processor name |
|-----|-------------| |--------------|---------------------------------|
| 0x37 | Intel Atom® Bay Trail | | 0x37 | Intel Atom® Bay Trail |
| 0x4D | Intel Atom® Avaton | | 0x4D | Intel Atom® Avaton |
| 0x5C | Intel Atom® Apollo Lake | | 0x5C | Intel Atom® Apollo Lake |
| 0x5F | Intel Atom® Denverton | | 0x5F | Intel Atom® Denverton |
| 0x7A | Intel Atom® Goldmont | | 0x7A | Intel Atom® Goldmont |
| 0x4C | Intel Atom® Airmont | | 0x4C | Intel Atom® Airmont |
| 0x86 | Intel Atom® Jacobsville | | 0x86 | Intel Atom® Jacobsville |
| 0x96 | Intel Atom® Elkhart Lake | | 0x96 | Intel Atom® Elkhart Lake |
| 0x9C | Intel Atom® Jasper Lake | | 0x9C | Intel Atom® Jasper Lake |
| 0x1A | Intel Nehalem-EP | | 0x1A | Intel Nehalem-EP |
| 0x1E | Intel Nehalem | | 0x1E | Intel Nehalem |
| 0x1F | Intel Nehalem-G | | 0x1F | Intel Nehalem-G |
| 0x2E | Intel Nehalem-EX | | 0x2E | Intel Nehalem-EX |
| 0x25 | Intel Westmere | | 0x25 | Intel Westmere |
| 0x2C | Intel Westmere-EP | | 0x2C | Intel Westmere-EP |
| 0x2F | Intel Westmere-EX | | 0x2F | Intel Westmere-EX |
| 0x2A | Intel Sandybridge | | 0x2A | Intel Sandybridge |
| 0x2D | Intel Sandybridge-X | | 0x2D | Intel Sandybridge-X |
| 0x3A | Intel Ivybridge | | 0x3A | Intel Ivybridge |
| 0x3E | Intel Ivybridge-X | | 0x3E | Intel Ivybridge-X |
| 0x4E | Intel Atom® Silvermont-MID | | 0x4E | Intel Atom® Silvermont-MID |
| 0x5E | Intel Skylake | | 0x5E | Intel Skylake |
| 0x55 | Intel Skylake-X | | 0x55 | Intel Skylake-X |
| 0x8E | Intel Kabylake-L | | 0x8E | Intel KabyLake-L |
| 0x9E | Intel Kabylake | | 0x9E | Intel KabyLake |
| 0x6A | Intel Icelake-X | | 0x6A | Intel IceLake-X |
| 0x6C | Intel Icelake-D | | 0x6C | Intel IceLake-D |
| 0x7D | Intel Icelake | | 0x7D | Intel IceLake |
| 0x7E | Intel Icelake-L | | 0x7E | Intel IceLake-L |
| 0x9D | Intel Icelake-NNPI | | 0x9D | Intel IceLake-NNPI |
| 0x3C | Intel Haswell | | 0x3C | Intel Haswell |
| 0x3F | Intel Haswell-X | | 0x3F | Intel Haswell-X |
| 0x45 | Intel Haswell-L | | 0x45 | Intel Haswell-L |
| 0x46 | Intel Haswell-G | | 0x46 | Intel Haswell-G |
| 0x3D | Intel Broadwell | | 0x3D | Intel Broadwell |
| 0x47 | Intel Broadwell-G | | 0x47 | Intel Broadwell-G |
| 0x4F | Intel Broadwell-X | | 0x4F | Intel Broadwell-X |
| 0x56 | Intel Broadwell-D | | 0x56 | Intel Broadwell-D |
| 0x66 | Intel Cannonlake-L | | 0x66 | Intel CannonLake-L |
| 0x57 | Intel Xeon® PHI Knights Landing | | 0x57 | Intel Xeon® PHI Knights Landing |
| 0x85 | Intel Xeon® PHI Knights Mill | | 0x85 | Intel Xeon® PHI Knights Mill |
| 0xA5 | Intel CometLake | | 0xA5 | Intel CometLake |
| 0xA6 | Intel CometLake-L | | 0xA6 | Intel CometLake-L |
| 0x8F | Intel Sapphire Rapids X | | 0x8A | Intel Lakefield |
| 0x8C | Intel TigerLake-L | | 0x8F | Intel Sapphire Rapids X |
| 0x8D | Intel TigerLake | | 0x8C | Intel TigerLake-L |
| 0x8D | Intel TigerLake |
| 0xA7 | Intel RocketLake |
| 0x97 | Intel AlderLake |
| 0x9A | Intel AlderLake-L |
| 0xBE | Intel AlderLake-N |
| 0xB7 | Intel RaptorLake |
| 0xBA | Intel RaptorLake-P |
| 0xBF | Intel RaptorLake-S |
| 0xAC | Intel MeteorLake |
| 0xAA | Intel MeteorLake-L |
## Metrics ## Metrics
@ -290,6 +303,7 @@ value.
| `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz | `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz
| `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz | `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz
| `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz | `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz
| `cpu_base_frequency_mhz`| CPU Base Frequency (maximum non-turbo frequency) for the processor package | MHz
### Known issues ### Known issues
@ -310,9 +324,10 @@ sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/
## Example Output ## Example Output
```shell ```text
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
powerstat_package,host=ubuntu,package_id=0 cpu_base_frequency_mhz=2400i 1669118424000000000
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000 powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000
powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000 powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000

View File

@ -33,6 +33,7 @@ const (
packageThermalDesignPower = "thermal_design_power" packageThermalDesignPower = "thermal_design_power"
packageTurboLimit = "max_turbo_frequency" packageTurboLimit = "max_turbo_frequency"
packageUncoreFrequency = "uncore_frequency" packageUncoreFrequency = "uncore_frequency"
packageCPUBaseFrequency = "cpu_base_frequency"
percentageMultiplier = 100 percentageMultiplier = 100
) )
@ -46,21 +47,25 @@ type PowerStat struct {
rapl raplService rapl raplService
msr msrService msr msrService
cpuFrequency bool cpuFrequency bool
cpuBusyFrequency bool cpuBusyFrequency bool
cpuTemperature bool cpuTemperature bool
cpuC0StateResidency bool cpuC0StateResidency bool
cpuC1StateResidency bool cpuC1StateResidency bool
cpuC6StateResidency bool cpuC6StateResidency bool
cpuBusyCycles bool cpuBusyCycles bool
packageTurboLimit bool packageTurboLimit bool
packageCurrentPowerConsumption bool packageCurrentPowerConsumption bool
packageCurrentDramPowerConsumption bool packageCurrentDramPowerConsumption bool
packageThermalDesignPower bool packageThermalDesignPower bool
packageUncoreFrequency bool packageUncoreFrequency bool
cpuInfo map[string]*cpuInfo packageCPUBaseFrequency bool
skipFirstIteration bool
logOnce map[string]error cpuBusClockValue float64
cpuInfo map[string]*cpuInfo
skipFirstIteration bool
logOnce map[string]error
} }
func (*PowerStat) SampleConfig() string { func (*PowerStat) SampleConfig() string {
@ -75,23 +80,48 @@ func (p *PowerStat) Init() error {
if err != nil { if err != nil {
return err return err
} }
// Initialize MSR service only when there is at least one metric enabled
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency || p.initMSR()
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency { p.initRaplService()
p.msr = newMsrServiceWithFs(p.Log, p.fs)
}
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit ||
p.packageUncoreFrequency {
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
}
if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() { if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() {
return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather") return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather")
} }
p.fillCPUBusClock()
return nil return nil
} }
func (p *PowerStat) initMSR() {
// Initialize MSR service only when there is at least one metric enabled
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency || p.packageCPUBaseFrequency {
p.msr = newMsrServiceWithFs(p.Log, p.fs)
}
}
func (p *PowerStat) initRaplService() {
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit ||
p.packageUncoreFrequency || p.packageCPUBaseFrequency {
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
}
}
// fill CPUBusClockValue if required
func (p *PowerStat) fillCPUBusClock() {
if p.packageCPUBaseFrequency {
// cpuBusClock is the same for every core/socket.
busClockInfo := p.getBusClock("0")
if busClockInfo == 0 {
p.Log.Warn("Disabling package metric: cpu_base_frequency_mhz. Can't detect bus clock value")
p.packageCPUBaseFrequency = false
return
}
p.cpuBusClockValue = busClockInfo
}
}
// Gather takes in an accumulator and adds the metrics that the Input gathers // Gather takes in an accumulator and adds the metrics that the Input gathers
func (p *PowerStat) Gather(acc telegraf.Accumulator) error { func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
if p.areGlobalMetricsEnabled() { if p.areGlobalMetricsEnabled() {
@ -133,6 +163,10 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
} }
} }
if p.packageCPUBaseFrequency {
p.addCPUBaseFreq(socketID, acc)
}
err := p.rapl.retrieveAndCalculateData(socketID) err := p.rapl.retrieveAndCalculateData(socketID)
if err != nil { if err != nil {
// In case of an error skip calculating metrics for this socket // In case of an error skip calculating metrics for this socket
@ -189,22 +223,17 @@ func (p *PowerStat) addUncoreFreq(socketID string, die string, acc telegraf.Accu
func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) { func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) {
fields := map[string]interface{}{} fields := map[string]interface{}{}
cpuID := ""
if typeFreq == "current" { if typeFreq == "current" {
if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() { if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() {
p.logOnce[socketID+"msr"] = nil p.logOnce[socketID+"msr"] = nil
for _, v := range p.cpuInfo { cpuID, err := p.GetCPUIDFromSocketID(socketID)
if v.physicalID == socketID { if err != nil {
cpuID = v.cpuID p.Log.Debugf("error while reading socket ID: %v", err)
}
}
if cpuID == "" {
p.Log.Debugf("error while reading socket ID")
return return
} }
actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, "MSR_UNCORE_PERF_STATUS") actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, msrUncorePerfStatusString)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_UNCORE_PERF_STATUS: %v", err) p.Log.Debugf("error while reading %s: %v", msrUncorePerfStatusString, err)
return return
} }
actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100 actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100
@ -406,15 +435,15 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
} }
} }
if cpuID == "" || model == "" { if cpuID == "" || model == "" {
p.Log.Debugf("error while reading socket ID") p.Log.Debug("error while reading socket ID")
return return
} }
// dump_hsw_turbo_ratio_limit // dump_hsw_turbo_ratio_limit
if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X
coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18 coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18
msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT2") msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit2String)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT2: %v", err) p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit2String, err)
return return
} }
@ -425,9 +454,9 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X
(model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X (model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X
coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16 coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16
msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1") msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err) p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit1String, err)
return return
} }
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups) calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups)
@ -446,17 +475,17 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
(model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X (model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X
(model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D (model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D
(model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D (model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D
coreCounts, err = p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1") coreCounts, err = p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err) p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit1String, err)
return return
} }
} }
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT") msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err) p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimitString, err)
return return
} }
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups) calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups)
@ -466,10 +495,10 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID: model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID:
model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID
coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4 coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_ATOM_CORE_TURBO_RATIOS") msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrAtomCoreTurboRatiosString)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_ATOM_CORE_TURBO_RATIOS: %v", err) p.Log.Debugf("error while reading %s: %v", msrAtomCoreTurboRatiosString, err)
return return
} }
value := uint64(0) value := uint64(0)
@ -484,9 +513,9 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
} }
// dump_knl_turbo_ratio_limits // dump_knl_turbo_ratio_limits
if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT") msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString)
if err != nil { if err != nil {
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err) p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimitString, err)
return return
} }
@ -654,6 +683,79 @@ func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accu
} }
} }
func (p *PowerStat) addCPUBaseFreq(socketID string, acc telegraf.Accumulator) {
cpuID, err := p.GetCPUIDFromSocketID(socketID)
if err != nil {
p.Log.Debugf("error while getting CPU ID from Socket ID: %v", err)
return
}
msrPlatformInfoMsr, err := p.msr.readSingleMsr(cpuID, msrPlatformInfoString)
if err != nil {
p.Log.Debugf("error while reading %s: %v", msrPlatformInfoString, err)
return
}
// the value of the freq ratio is saved in bits 15 to 8.
// to get the freq -> ratio * busClock
cpuBaseFreq := float64((msrPlatformInfoMsr>>8)&0xFF) * p.cpuBusClockValue
if cpuBaseFreq == 0 {
p.Log.Debugf("error while adding CPU base frequency, cpuBaseFreq is zero for the socket: %s", socketID)
return
}
tags := map[string]string{
"package_id": socketID,
}
fields := map[string]interface{}{
"cpu_base_frequency_mhz": uint64(cpuBaseFreq),
}
acc.AddGauge("powerstat_package", fields, tags)
}
func (p *PowerStat) getBusClock(cpuID string) float64 {
cpuInfo, ok := p.cpuInfo[cpuID]
if !ok {
p.Log.Debugf("cannot find cpuInfo for cpu: %s", cpuID)
return 0
}
model := cpuInfo.model
busClock100 := []int64{0x2A, 0x2D, 0x3A, 0x3E, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, 0x4E, 0x5E, 0x55, 0x8E, 0x9E, 0xA5, 0xA6, 0x66, 0x6A, 0x6C,
0x7D, 0x7E, 0x9D, 0x8A, 0xA7, 0x8C, 0x8D, 0x8F, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA, 0x5C, 0x5F, 0x7A, 0x86, 0x96, 0x9C, 0x57, 0x85}
busClock133 := []int64{0x1E, 0x1F, 0x1A, 0x2E, 0x25, 0x2C, 0x2F, 0x4C}
busClockCalculate := []int64{0x37, 0x4D}
if contains(convertIntegerArrayToStringArray(busClock100), model) {
return 100.0
} else if contains(convertIntegerArrayToStringArray(busClock133), model) {
return 133.0
} else if contains(convertIntegerArrayToStringArray(busClockCalculate), model) {
return p.getSilvermontBusClock(cpuID)
}
p.Log.Debugf("couldn't find the freq for the model: %d", model)
return 0.0
}
func (p *PowerStat) getSilvermontBusClock(cpuID string) float64 {
silvermontFreqTable := []float64{83.3, 100.0, 133.3, 116.7, 80.0}
msr, err := p.msr.readSingleMsr(cpuID, msrFSBFreqString)
if err != nil {
p.Log.Debugf("error while reading %s: %v", msrFSBFreqString, err)
return 0.0
}
i := int(msr & 0xf)
if i >= len(silvermontFreqTable) {
p.Log.Debugf("unknown msr value: %d, using default bus clock value: %d", i, silvermontFreqTable[3])
//same behaviour as in turbostat
i = 3
}
return silvermontFreqTable[i]
}
func (p *PowerStat) parsePackageMetricsConfig() { func (p *PowerStat) parsePackageMetricsConfig() {
if p.PackageMetrics == nil { if p.PackageMetrics == nil {
// if Package Metric config is empty, use the default settings. // if Package Metric config is empty, use the default settings.
@ -679,6 +781,9 @@ func (p *PowerStat) parsePackageMetricsConfig() {
if contains(p.PackageMetrics, packageUncoreFrequency) { if contains(p.PackageMetrics, packageUncoreFrequency) {
p.packageUncoreFrequency = true p.packageUncoreFrequency = true
} }
if contains(p.PackageMetrics, packageCPUBaseFrequency) {
p.packageCPUBaseFrequency = true
}
} }
func (p *PowerStat) parseCPUMetricsConfig() { func (p *PowerStat) parseCPUMetricsConfig() {
@ -719,7 +824,7 @@ func (p *PowerStat) verifyProcessor() error {
allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C, allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C,
0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E, 0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E,
0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, 0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56,
0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8F, 0x8C, 0x8D} 0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8A, 0x8F, 0x8C, 0x8D, 0xA7, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA}
stats, err := p.fs.getCPUInfoStats() stats, err := p.fs.getCPUInfoStats()
if err != nil { if err != nil {
return err return err
@ -743,6 +848,7 @@ func (p *PowerStat) verifyProcessor() error {
} }
if !strings.Contains(firstCPU.flags, "msr") { if !strings.Contains(firstCPU.flags, "msr") {
p.packageCPUBaseFrequency = false
p.cpuTemperature = false p.cpuTemperature = false
p.cpuC6StateResidency = false p.cpuC6StateResidency = false
p.cpuC0StateResidency = false p.cpuC0StateResidency = false
@ -765,9 +871,9 @@ func (p *PowerStat) verifyProcessor() error {
return nil return nil
} }
func contains(slice []string, str string) bool { func contains[T comparable](s []T, e T) bool {
for _, v := range slice { for _, v := range s {
if v == str { if v == e {
return true return true
} }
} }
@ -782,24 +888,21 @@ func (p *PowerStat) areGlobalMetricsEnabled() bool {
return p.rapl != nil return p.rapl != nil
} }
func (p *PowerStat) GetCPUIDFromSocketID(socketID string) (string, error) {
for _, v := range p.cpuInfo {
if v.physicalID == socketID {
return v.cpuID, nil
}
}
return "", fmt.Errorf("can't find cpuID for socketID: %s", socketID)
}
// newPowerStat creates and returns PowerStat struct // newPowerStat creates and returns PowerStat struct
func newPowerStat(fs fileService) *PowerStat { func newPowerStat(fs fileService) *PowerStat {
p := &PowerStat{ p := &PowerStat{
cpuFrequency: false, skipFirstIteration: true,
cpuC0StateResidency: false, fs: fs,
cpuC1StateResidency: false, logOnce: make(map[string]error),
cpuC6StateResidency: false,
cpuBusyCycles: false,
cpuTemperature: false,
cpuBusyFrequency: false,
packageTurboLimit: false,
packageUncoreFrequency: false,
packageCurrentPowerConsumption: false,
packageCurrentDramPowerConsumption: false,
packageThermalDesignPower: false,
skipFirstIteration: true,
fs: fs,
logOnce: make(map[string]error),
} }
return p return p

View File

@ -7,10 +7,12 @@ import (
"strconv" "strconv"
"sync" "sync"
"testing" "testing"
"time"
"github.com/stretchr/testify/mock" "github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/testutil" "github.com/influxdata/telegraf/testutil"
) )
@ -241,7 +243,7 @@ func TestReadUncoreFreq(t *testing.T) {
mockServices.msr.On("isMsrLoaded").Return(true) mockServices.msr.On("isMsrLoaded").Return(true)
mockServices.msr.On("readSingleMsr", "0", "MSR_UNCORE_PERF_STATUS").Return(uint64(10), nil) mockServices.msr.On("readSingleMsr", "0", msrUncorePerfStatusString).Return(uint64(10), nil)
mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0"). mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0").
Return(float64(500), nil) Return(float64(500), nil)
@ -641,3 +643,209 @@ func getPowerWithMockedServices() (*PowerStat, *MockServices) {
return p, &mockServices return p, &mockServices
} }
func TestGetBusClock(t *testing.T) {
tests := []struct {
name string
modelCPU uint64
socketID string
msrFSBFreqValue uint64
readSingleMsrErrFSB error
cpuBusClockValue float64
}{
{
name: "Error_withUnknownCPUmodel",
socketID: "0",
modelCPU: 0xFF,
cpuBusClockValue: 0,
},
{
name: "OK_withFBS100",
socketID: "0",
modelCPU: 106,
msrFSBFreqValue: 1,
cpuBusClockValue: 100.0,
},
{
name: "OK_withFBS133",
socketID: "0",
modelCPU: 0x1F,
cpuBusClockValue: 133,
},
{
name: "Error_withFBSCalculated",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 0,
readSingleMsrErrFSB: errors.New("something is wrong"),
},
{
name: "OK_withFBSCalculated83.3",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 0,
cpuBusClockValue: 83.3,
},
{
name: "OK_withFBSCalculated100",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 1,
cpuBusClockValue: 100,
},
{
name: "OK_withFBSCalculated133.3",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 2,
cpuBusClockValue: 133.3,
},
{
name: "OK_withFBSCalculated116.7",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 3,
cpuBusClockValue: 116.7,
},
{
name: "OK_withFBSCalculated80",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 4,
cpuBusClockValue: 80,
},
{
name: "OK_withFBSCalculatedUnknownFSBFreq",
socketID: "0",
modelCPU: 0x37,
msrFSBFreqValue: 5,
cpuBusClockValue: 116.7,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, mockServices := getPowerWithMockedServices()
busClockCalculate := []uint64{0x37, 0x4D}
p.cpuInfo = map[string]*cpuInfo{
tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID, model: strconv.FormatUint(tt.modelCPU, 10)},
}
if contains(busClockCalculate, tt.modelCPU) {
mockServices.msr.On("readSingleMsr", mock.Anything, msrFSBFreqString).Return(tt.msrFSBFreqValue, tt.readSingleMsrErrFSB)
}
defer mockServices.msr.AssertExpectations(t)
value := p.getBusClock(tt.socketID)
require.Equal(t, tt.cpuBusClockValue, value)
})
}
}
func TestFillCPUBusClock(t *testing.T) {
tests := []struct {
name string
modelCPU uint64
busClockValue float64
packageCPUBaseFrequencySet bool
}{
{
name: "NotSet_0",
modelCPU: 0xFF,
busClockValue: 0,
},
{
name: "Set_100",
modelCPU: 0x2A,
busClockValue: 100,
packageCPUBaseFrequencySet: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
p, _ := getPowerWithMockedServices()
p.packageCPUBaseFrequency = true
p.cpuInfo = map[string]*cpuInfo{
"0": {cpuID: "0", physicalID: "0", model: strconv.FormatUint(tt.modelCPU, 10)},
}
p.fillCPUBusClock()
require.Equal(t, tt.busClockValue, p.cpuBusClockValue)
require.Equal(t, tt.packageCPUBaseFrequencySet, p.packageCPUBaseFrequency)
})
}
}
func TestAddCPUBaseFreq(t *testing.T) {
tests := []struct {
name string
socketID string
readSingleMsrErrRatio error
msrPlatformInfoValue uint64
setupPowerstat func(t *testing.T)
clockBusValue float64
nonTurboRatio float64
metricExpected bool
}{
{
name: "Error_reading_msr",
socketID: "0",
clockBusValue: 100,
readSingleMsrErrRatio: errors.New("can't read msr"),
metricExpected: false,
},
{
name: "NoMetric_Ratio_is_0",
socketID: "0",
msrPlatformInfoValue: 0x8008082FF2810000,
clockBusValue: 100,
nonTurboRatio: 0,
metricExpected: false,
},
{
name: "OK_Ratio_is_24",
socketID: "0",
msrPlatformInfoValue: 0x8008082FF2811800,
clockBusValue: 100,
nonTurboRatio: 24,
metricExpected: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
var acc testutil.Accumulator
p, mockServices := getPowerWithMockedServices()
p.cpuInfo = map[string]*cpuInfo{
tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID},
}
p.cpuBusClockValue = tt.clockBusValue
mockServices.msr.On("readSingleMsr", mock.Anything, msrPlatformInfoString).Return(tt.msrPlatformInfoValue, tt.readSingleMsrErrRatio)
defer mockServices.msr.AssertExpectations(t)
p.addCPUBaseFreq(tt.socketID, &acc)
actual := acc.GetTelegrafMetrics()
if !tt.metricExpected {
require.Len(t, actual, 0)
return
}
require.Len(t, actual, 1)
expected := []telegraf.Metric{
testutil.MustMetric(
"powerstat_package",
map[string]string{
"package_id": tt.socketID,
},
map[string]interface{}{
"cpu_base_frequency_mhz": uint64(tt.nonTurboRatio * tt.clockBusValue),
},
time.Unix(0, 0),
telegraf.Gauge,
),
}
testutil.RequireMetricsEqual(t, expected, actual, testutil.IgnoreTime())
})
}
}

View File

@ -33,6 +33,18 @@ const (
turboRatioLimit2Location = 0x1AF turboRatioLimit2Location = 0x1AF
atomCoreTurboRatiosLocation = 0x66C atomCoreTurboRatiosLocation = 0x66C
uncorePerfStatusLocation = 0x621 uncorePerfStatusLocation = 0x621
platformInfo = 0xCE
fsbFreq = 0xCD
)
const (
msrTurboRatioLimitString = "MSR_TURBO_RATIO_LIMIT"
msrTurboRatioLimit1String = "MSR_TURBO_RATIO_LIMIT1"
msrTurboRatioLimit2String = "MSR_TURBO_RATIO_LIMIT2"
msrAtomCoreTurboRatiosString = "MSR_ATOM_CORE_TURBO_RATIOS"
msrUncorePerfStatusString = "MSR_UNCORE_PERF_STATUS"
msrPlatformInfoString = "MSR_PLATFORM_INFO"
msrFSBFreqString = "MSR_FSB_FREQ"
) )
// msrService is responsible for interactions with MSR. // msrService is responsible for interactions with MSR.
@ -157,16 +169,20 @@ func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error)
var msrAddress int64 var msrAddress int64
switch msr { switch msr {
case "MSR_TURBO_RATIO_LIMIT": case msrTurboRatioLimitString:
msrAddress = turboRatioLimitLocation msrAddress = turboRatioLimitLocation
case "MSR_TURBO_RATIO_LIMIT1": case msrTurboRatioLimit1String:
msrAddress = turboRatioLimit1Location msrAddress = turboRatioLimit1Location
case "MSR_TURBO_RATIO_LIMIT2": case msrTurboRatioLimit2String:
msrAddress = turboRatioLimit2Location msrAddress = turboRatioLimit2Location
case "MSR_ATOM_CORE_TURBO_RATIOS": case msrAtomCoreTurboRatiosString:
msrAddress = atomCoreTurboRatiosLocation msrAddress = atomCoreTurboRatiosLocation
case "MSR_UNCORE_PERF_STATUS": case msrUncorePerfStatusString:
msrAddress = uncorePerfStatusLocation msrAddress = uncorePerfStatusLocation
case msrPlatformInfoString:
msrAddress = platformInfo
case msrFSBFreqString:
msrAddress = fsbFreq
default: default:
return 0, fmt.Errorf("incorect name of MSR %s", msr) return 0, fmt.Errorf("incorect name of MSR %s", msr)
} }

View File

@ -10,7 +10,8 @@
## supported options list ## supported options list
## Supported options: ## Supported options:
## "current_power_consumption", "current_dram_power_consumption", ## "current_power_consumption", "current_dram_power_consumption",
## "thermal_design_power", "max_turbo_frequency", "uncore_frequency" ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency",
## "cpu_base_frequency"
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"] # package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
## The user can choose which per-CPU metrics are monitored by the plugin in ## The user can choose which per-CPU metrics are monitored by the plugin in