From 65b23f112ea5aacd354350f8b36b53d06ca29729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20=C5=BBak?= Date: Wed, 18 Jan 2023 14:10:00 +0100 Subject: [PATCH] feat(intel_powerstat): Add CPU base frequency metric and add support for new platforms (#12452) --- plugins/inputs/intel_powerstat/README.md | 121 +++++----- .../inputs/intel_powerstat/intel_powerstat.go | 225 +++++++++++++----- .../intel_powerstat/intel_powerstat_test.go | 210 +++++++++++++++- plugins/inputs/intel_powerstat/msr.go | 26 +- plugins/inputs/intel_powerstat/sample.conf | 3 +- 5 files changed, 464 insertions(+), 121 deletions(-) diff --git a/plugins/inputs/intel_powerstat/README.md b/plugins/inputs/intel_powerstat/README.md index cf806b886..120c8a0c4 100644 --- a/plugins/inputs/intel_powerstat/README.md +++ b/plugins/inputs/intel_powerstat/README.md @@ -33,7 +33,8 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details. ## supported options list ## Supported options: ## "current_power_consumption", "current_dram_power_consumption", - ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency" + ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency", + ## "cpu_base_frequency" # package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"] ## The user can choose which per-CPU metrics are monitored by the plugin in @@ -125,7 +126,7 @@ integrated in kernel). Modules might have to be manually enabled by using ```sh # kernel 5.x.x: sudo modprobe rapl -subo modprobe msr +sudo modprobe msr sudo modprobe intel_rapl_common sudo modprobe intel_rapl_msr @@ -151,6 +152,7 @@ and to retrieve data for calculation per-package specific metric: - `max_turbo_frequency_mhz` - `uncore_frequency_mhz_cur` +- `cpu_base_frequency_mhz` To expose other Intel PowerStat metrics root access may or may not be required (depending on OS type or configuration). @@ -180,57 +182,68 @@ are required by the plugin: _powerstat\_core.cpu\_c1\_state\_residency_ - "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_ - Processor _Model number_ must be one of the following values for plugin to - read _powerstat\_core.cpu\_c1\_state\_residency_ and - _powerstat\_core.cpu\_c6\_state\_residency_ metrics: + read _powerstat\_core.cpu\_c1\_state\_residency_ / + _powerstat\_core.cpu\_c6\_state\_residency_ and + _powerstat\_package.cpu\_base\_frequency_ metrics: -| Model number | Processor name | -|-----|-------------| -| 0x37 | Intel Atom® Bay Trail | -| 0x4D | Intel Atom® Avaton | -| 0x5C | Intel Atom® Apollo Lake | -| 0x5F | Intel Atom® Denverton | -| 0x7A | Intel Atom® Goldmont | -| 0x4C | Intel Atom® Airmont | -| 0x86 | Intel Atom® Jacobsville | -| 0x96 | Intel Atom® Elkhart Lake | -| 0x9C | Intel Atom® Jasper Lake | -| 0x1A | Intel Nehalem-EP | -| 0x1E | Intel Nehalem | -| 0x1F | Intel Nehalem-G | -| 0x2E | Intel Nehalem-EX | -| 0x25 | Intel Westmere | -| 0x2C | Intel Westmere-EP | -| 0x2F | Intel Westmere-EX | -| 0x2A | Intel Sandybridge | -| 0x2D | Intel Sandybridge-X | -| 0x3A | Intel Ivybridge | -| 0x3E | Intel Ivybridge-X | -| 0x4E | Intel Atom® Silvermont-MID | -| 0x5E | Intel Skylake | -| 0x55 | Intel Skylake-X | -| 0x8E | Intel Kabylake-L | -| 0x9E | Intel Kabylake | -| 0x6A | Intel Icelake-X | -| 0x6C | Intel Icelake-D | -| 0x7D | Intel Icelake | -| 0x7E | Intel Icelake-L | -| 0x9D | Intel Icelake-NNPI | -| 0x3C | Intel Haswell | -| 0x3F | Intel Haswell-X | -| 0x45 | Intel Haswell-L | -| 0x46 | Intel Haswell-G | -| 0x3D | Intel Broadwell | -| 0x47 | Intel Broadwell-G | -| 0x4F | Intel Broadwell-X | -| 0x56 | Intel Broadwell-D | -| 0x66 | Intel Cannonlake-L | -| 0x57 | Intel Xeon® PHI Knights Landing | -| 0x85 | Intel Xeon® PHI Knights Mill | -| 0xA5 | Intel CometLake | -| 0xA6 | Intel CometLake-L | -| 0x8F | Intel Sapphire Rapids X | -| 0x8C | Intel TigerLake-L | -| 0x8D | Intel TigerLake | +| Model number | Processor name | +|--------------|---------------------------------| +| 0x37 | Intel Atom® Bay Trail | +| 0x4D | Intel Atom® Avaton | +| 0x5C | Intel Atom® Apollo Lake | +| 0x5F | Intel Atom® Denverton | +| 0x7A | Intel Atom® Goldmont | +| 0x4C | Intel Atom® Airmont | +| 0x86 | Intel Atom® Jacobsville | +| 0x96 | Intel Atom® Elkhart Lake | +| 0x9C | Intel Atom® Jasper Lake | +| 0x1A | Intel Nehalem-EP | +| 0x1E | Intel Nehalem | +| 0x1F | Intel Nehalem-G | +| 0x2E | Intel Nehalem-EX | +| 0x25 | Intel Westmere | +| 0x2C | Intel Westmere-EP | +| 0x2F | Intel Westmere-EX | +| 0x2A | Intel Sandybridge | +| 0x2D | Intel Sandybridge-X | +| 0x3A | Intel Ivybridge | +| 0x3E | Intel Ivybridge-X | +| 0x4E | Intel Atom® Silvermont-MID | +| 0x5E | Intel Skylake | +| 0x55 | Intel Skylake-X | +| 0x8E | Intel KabyLake-L | +| 0x9E | Intel KabyLake | +| 0x6A | Intel IceLake-X | +| 0x6C | Intel IceLake-D | +| 0x7D | Intel IceLake | +| 0x7E | Intel IceLake-L | +| 0x9D | Intel IceLake-NNPI | +| 0x3C | Intel Haswell | +| 0x3F | Intel Haswell-X | +| 0x45 | Intel Haswell-L | +| 0x46 | Intel Haswell-G | +| 0x3D | Intel Broadwell | +| 0x47 | Intel Broadwell-G | +| 0x4F | Intel Broadwell-X | +| 0x56 | Intel Broadwell-D | +| 0x66 | Intel CannonLake-L | +| 0x57 | Intel Xeon® PHI Knights Landing | +| 0x85 | Intel Xeon® PHI Knights Mill | +| 0xA5 | Intel CometLake | +| 0xA6 | Intel CometLake-L | +| 0x8A | Intel Lakefield | +| 0x8F | Intel Sapphire Rapids X | +| 0x8C | Intel TigerLake-L | +| 0x8D | Intel TigerLake | +| 0xA7 | Intel RocketLake | +| 0x97 | Intel AlderLake | +| 0x9A | Intel AlderLake-L | +| 0xBE | Intel AlderLake-N | +| 0xB7 | Intel RaptorLake | +| 0xBA | Intel RaptorLake-P | +| 0xBF | Intel RaptorLake-S | +| 0xAC | Intel MeteorLake | +| 0xAA | Intel MeteorLake-L | ## Metrics @@ -290,6 +303,7 @@ value. | `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz | `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz | `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz + | `cpu_base_frequency_mhz`| CPU Base Frequency (maximum non-turbo frequency) for the processor package | MHz ### Known issues @@ -310,9 +324,10 @@ sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/ ## Example Output -```shell +```text powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000 powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000 +powerstat_package,host=ubuntu,package_id=0 cpu_base_frequency_mhz=2400i 1669118424000000000 powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000 powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000 powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000 diff --git a/plugins/inputs/intel_powerstat/intel_powerstat.go b/plugins/inputs/intel_powerstat/intel_powerstat.go index 295c1c69f..25cdd96b7 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat.go @@ -33,6 +33,7 @@ const ( packageThermalDesignPower = "thermal_design_power" packageTurboLimit = "max_turbo_frequency" packageUncoreFrequency = "uncore_frequency" + packageCPUBaseFrequency = "cpu_base_frequency" percentageMultiplier = 100 ) @@ -46,21 +47,25 @@ type PowerStat struct { rapl raplService msr msrService - cpuFrequency bool - cpuBusyFrequency bool - cpuTemperature bool - cpuC0StateResidency bool - cpuC1StateResidency bool - cpuC6StateResidency bool - cpuBusyCycles bool + cpuFrequency bool + cpuBusyFrequency bool + cpuTemperature bool + cpuC0StateResidency bool + cpuC1StateResidency bool + cpuC6StateResidency bool + cpuBusyCycles bool + packageTurboLimit bool packageCurrentPowerConsumption bool packageCurrentDramPowerConsumption bool packageThermalDesignPower bool packageUncoreFrequency bool - cpuInfo map[string]*cpuInfo - skipFirstIteration bool - logOnce map[string]error + packageCPUBaseFrequency bool + + cpuBusClockValue float64 + cpuInfo map[string]*cpuInfo + skipFirstIteration bool + logOnce map[string]error } func (*PowerStat) SampleConfig() string { @@ -75,23 +80,48 @@ func (p *PowerStat) Init() error { if err != nil { return err } - // Initialize MSR service only when there is at least one metric enabled - if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency || - p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency { - p.msr = newMsrServiceWithFs(p.Log, p.fs) - } - if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit || - p.packageUncoreFrequency { - p.rapl = newRaplServiceWithFs(p.Log, p.fs) - } + + p.initMSR() + p.initRaplService() if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() { return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather") } + p.fillCPUBusClock() return nil } +func (p *PowerStat) initMSR() { + // Initialize MSR service only when there is at least one metric enabled + if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency || + p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency || p.packageCPUBaseFrequency { + p.msr = newMsrServiceWithFs(p.Log, p.fs) + } +} + +func (p *PowerStat) initRaplService() { + if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit || + p.packageUncoreFrequency || p.packageCPUBaseFrequency { + p.rapl = newRaplServiceWithFs(p.Log, p.fs) + } +} + +// fill CPUBusClockValue if required +func (p *PowerStat) fillCPUBusClock() { + if p.packageCPUBaseFrequency { + // cpuBusClock is the same for every core/socket. + busClockInfo := p.getBusClock("0") + if busClockInfo == 0 { + p.Log.Warn("Disabling package metric: cpu_base_frequency_mhz. Can't detect bus clock value") + p.packageCPUBaseFrequency = false + return + } + + p.cpuBusClockValue = busClockInfo + } +} + // Gather takes in an accumulator and adds the metrics that the Input gathers func (p *PowerStat) Gather(acc telegraf.Accumulator) error { if p.areGlobalMetricsEnabled() { @@ -133,6 +163,10 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) { } } + if p.packageCPUBaseFrequency { + p.addCPUBaseFreq(socketID, acc) + } + err := p.rapl.retrieveAndCalculateData(socketID) if err != nil { // In case of an error skip calculating metrics for this socket @@ -189,22 +223,17 @@ func (p *PowerStat) addUncoreFreq(socketID string, die string, acc telegraf.Accu func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) { fields := map[string]interface{}{} - cpuID := "" if typeFreq == "current" { if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() { p.logOnce[socketID+"msr"] = nil - for _, v := range p.cpuInfo { - if v.physicalID == socketID { - cpuID = v.cpuID - } - } - if cpuID == "" { - p.Log.Debugf("error while reading socket ID") + cpuID, err := p.GetCPUIDFromSocketID(socketID) + if err != nil { + p.Log.Debugf("error while reading socket ID: %v", err) return } - actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, "MSR_UNCORE_PERF_STATUS") + actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, msrUncorePerfStatusString) if err != nil { - p.Log.Debugf("error while reading MSR_UNCORE_PERF_STATUS: %v", err) + p.Log.Debugf("error while reading %s: %v", msrUncorePerfStatusString, err) return } actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100 @@ -406,15 +435,15 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator } } if cpuID == "" || model == "" { - p.Log.Debugf("error while reading socket ID") + p.Log.Debug("error while reading socket ID") return } // dump_hsw_turbo_ratio_limit if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18 - msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT2") + msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit2String) if err != nil { - p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT2: %v", err) + p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit2String, err) return } @@ -425,9 +454,9 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X (model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16 - msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1") + msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String) if err != nil { - p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err) + p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit1String, err) return } calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups) @@ -446,17 +475,17 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator (model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X (model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D (model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D - coreCounts, err = p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1") + coreCounts, err = p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String) if err != nil { - p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err) + p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit1String, err) return } } - msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT") + msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString) if err != nil { - p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err) + p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimitString, err) return } calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups) @@ -466,10 +495,10 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID: model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4 - msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_ATOM_CORE_TURBO_RATIOS") + msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrAtomCoreTurboRatiosString) if err != nil { - p.Log.Debugf("error while reading MSR_ATOM_CORE_TURBO_RATIOS: %v", err) + p.Log.Debugf("error while reading %s: %v", msrAtomCoreTurboRatiosString, err) return } value := uint64(0) @@ -484,9 +513,9 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator } // dump_knl_turbo_ratio_limits if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL - msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT") + msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString) if err != nil { - p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err) + p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimitString, err) return } @@ -654,6 +683,79 @@ func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accu } } +func (p *PowerStat) addCPUBaseFreq(socketID string, acc telegraf.Accumulator) { + cpuID, err := p.GetCPUIDFromSocketID(socketID) + if err != nil { + p.Log.Debugf("error while getting CPU ID from Socket ID: %v", err) + return + } + + msrPlatformInfoMsr, err := p.msr.readSingleMsr(cpuID, msrPlatformInfoString) + if err != nil { + p.Log.Debugf("error while reading %s: %v", msrPlatformInfoString, err) + return + } + + // the value of the freq ratio is saved in bits 15 to 8. + // to get the freq -> ratio * busClock + cpuBaseFreq := float64((msrPlatformInfoMsr>>8)&0xFF) * p.cpuBusClockValue + if cpuBaseFreq == 0 { + p.Log.Debugf("error while adding CPU base frequency, cpuBaseFreq is zero for the socket: %s", socketID) + return + } + + tags := map[string]string{ + "package_id": socketID, + } + fields := map[string]interface{}{ + "cpu_base_frequency_mhz": uint64(cpuBaseFreq), + } + acc.AddGauge("powerstat_package", fields, tags) +} + +func (p *PowerStat) getBusClock(cpuID string) float64 { + cpuInfo, ok := p.cpuInfo[cpuID] + if !ok { + p.Log.Debugf("cannot find cpuInfo for cpu: %s", cpuID) + return 0 + } + + model := cpuInfo.model + busClock100 := []int64{0x2A, 0x2D, 0x3A, 0x3E, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, 0x4E, 0x5E, 0x55, 0x8E, 0x9E, 0xA5, 0xA6, 0x66, 0x6A, 0x6C, + 0x7D, 0x7E, 0x9D, 0x8A, 0xA7, 0x8C, 0x8D, 0x8F, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA, 0x5C, 0x5F, 0x7A, 0x86, 0x96, 0x9C, 0x57, 0x85} + busClock133 := []int64{0x1E, 0x1F, 0x1A, 0x2E, 0x25, 0x2C, 0x2F, 0x4C} + busClockCalculate := []int64{0x37, 0x4D} + + if contains(convertIntegerArrayToStringArray(busClock100), model) { + return 100.0 + } else if contains(convertIntegerArrayToStringArray(busClock133), model) { + return 133.0 + } else if contains(convertIntegerArrayToStringArray(busClockCalculate), model) { + return p.getSilvermontBusClock(cpuID) + } + + p.Log.Debugf("couldn't find the freq for the model: %d", model) + return 0.0 +} + +func (p *PowerStat) getSilvermontBusClock(cpuID string) float64 { + silvermontFreqTable := []float64{83.3, 100.0, 133.3, 116.7, 80.0} + msr, err := p.msr.readSingleMsr(cpuID, msrFSBFreqString) + if err != nil { + p.Log.Debugf("error while reading %s: %v", msrFSBFreqString, err) + return 0.0 + } + + i := int(msr & 0xf) + if i >= len(silvermontFreqTable) { + p.Log.Debugf("unknown msr value: %d, using default bus clock value: %d", i, silvermontFreqTable[3]) + //same behaviour as in turbostat + i = 3 + } + + return silvermontFreqTable[i] +} + func (p *PowerStat) parsePackageMetricsConfig() { if p.PackageMetrics == nil { // if Package Metric config is empty, use the default settings. @@ -679,6 +781,9 @@ func (p *PowerStat) parsePackageMetricsConfig() { if contains(p.PackageMetrics, packageUncoreFrequency) { p.packageUncoreFrequency = true } + if contains(p.PackageMetrics, packageCPUBaseFrequency) { + p.packageCPUBaseFrequency = true + } } func (p *PowerStat) parseCPUMetricsConfig() { @@ -719,7 +824,7 @@ func (p *PowerStat) verifyProcessor() error { allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C, 0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E, 0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, - 0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8F, 0x8C, 0x8D} + 0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8A, 0x8F, 0x8C, 0x8D, 0xA7, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA} stats, err := p.fs.getCPUInfoStats() if err != nil { return err @@ -743,6 +848,7 @@ func (p *PowerStat) verifyProcessor() error { } if !strings.Contains(firstCPU.flags, "msr") { + p.packageCPUBaseFrequency = false p.cpuTemperature = false p.cpuC6StateResidency = false p.cpuC0StateResidency = false @@ -765,9 +871,9 @@ func (p *PowerStat) verifyProcessor() error { return nil } -func contains(slice []string, str string) bool { - for _, v := range slice { - if v == str { +func contains[T comparable](s []T, e T) bool { + for _, v := range s { + if v == e { return true } } @@ -782,24 +888,21 @@ func (p *PowerStat) areGlobalMetricsEnabled() bool { return p.rapl != nil } +func (p *PowerStat) GetCPUIDFromSocketID(socketID string) (string, error) { + for _, v := range p.cpuInfo { + if v.physicalID == socketID { + return v.cpuID, nil + } + } + return "", fmt.Errorf("can't find cpuID for socketID: %s", socketID) +} + // newPowerStat creates and returns PowerStat struct func newPowerStat(fs fileService) *PowerStat { p := &PowerStat{ - cpuFrequency: false, - cpuC0StateResidency: false, - cpuC1StateResidency: false, - cpuC6StateResidency: false, - cpuBusyCycles: false, - cpuTemperature: false, - cpuBusyFrequency: false, - packageTurboLimit: false, - packageUncoreFrequency: false, - packageCurrentPowerConsumption: false, - packageCurrentDramPowerConsumption: false, - packageThermalDesignPower: false, - skipFirstIteration: true, - fs: fs, - logOnce: make(map[string]error), + skipFirstIteration: true, + fs: fs, + logOnce: make(map[string]error), } return p diff --git a/plugins/inputs/intel_powerstat/intel_powerstat_test.go b/plugins/inputs/intel_powerstat/intel_powerstat_test.go index 095626585..8f80a5a8a 100644 --- a/plugins/inputs/intel_powerstat/intel_powerstat_test.go +++ b/plugins/inputs/intel_powerstat/intel_powerstat_test.go @@ -7,10 +7,12 @@ import ( "strconv" "sync" "testing" + "time" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/testutil" ) @@ -241,7 +243,7 @@ func TestReadUncoreFreq(t *testing.T) { mockServices.msr.On("isMsrLoaded").Return(true) - mockServices.msr.On("readSingleMsr", "0", "MSR_UNCORE_PERF_STATUS").Return(uint64(10), nil) + mockServices.msr.On("readSingleMsr", "0", msrUncorePerfStatusString).Return(uint64(10), nil) mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0"). Return(float64(500), nil) @@ -641,3 +643,209 @@ func getPowerWithMockedServices() (*PowerStat, *MockServices) { return p, &mockServices } + +func TestGetBusClock(t *testing.T) { + tests := []struct { + name string + modelCPU uint64 + socketID string + msrFSBFreqValue uint64 + readSingleMsrErrFSB error + cpuBusClockValue float64 + }{ + { + name: "Error_withUnknownCPUmodel", + socketID: "0", + modelCPU: 0xFF, + cpuBusClockValue: 0, + }, + { + name: "OK_withFBS100", + socketID: "0", + modelCPU: 106, + msrFSBFreqValue: 1, + cpuBusClockValue: 100.0, + }, + { + name: "OK_withFBS133", + socketID: "0", + modelCPU: 0x1F, + cpuBusClockValue: 133, + }, + { + name: "Error_withFBSCalculated", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 0, + readSingleMsrErrFSB: errors.New("something is wrong"), + }, + { + name: "OK_withFBSCalculated83.3", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 0, + cpuBusClockValue: 83.3, + }, + { + name: "OK_withFBSCalculated100", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 1, + cpuBusClockValue: 100, + }, + { + name: "OK_withFBSCalculated133.3", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 2, + cpuBusClockValue: 133.3, + }, + { + name: "OK_withFBSCalculated116.7", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 3, + cpuBusClockValue: 116.7, + }, + { + name: "OK_withFBSCalculated80", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 4, + cpuBusClockValue: 80, + }, + { + name: "OK_withFBSCalculatedUnknownFSBFreq", + socketID: "0", + modelCPU: 0x37, + msrFSBFreqValue: 5, + cpuBusClockValue: 116.7, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, mockServices := getPowerWithMockedServices() + busClockCalculate := []uint64{0x37, 0x4D} + p.cpuInfo = map[string]*cpuInfo{ + tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID, model: strconv.FormatUint(tt.modelCPU, 10)}, + } + if contains(busClockCalculate, tt.modelCPU) { + mockServices.msr.On("readSingleMsr", mock.Anything, msrFSBFreqString).Return(tt.msrFSBFreqValue, tt.readSingleMsrErrFSB) + } + defer mockServices.msr.AssertExpectations(t) + + value := p.getBusClock(tt.socketID) + require.Equal(t, tt.cpuBusClockValue, value) + }) + } +} + +func TestFillCPUBusClock(t *testing.T) { + tests := []struct { + name string + modelCPU uint64 + busClockValue float64 + packageCPUBaseFrequencySet bool + }{ + { + name: "NotSet_0", + modelCPU: 0xFF, + busClockValue: 0, + }, + { + name: "Set_100", + modelCPU: 0x2A, + busClockValue: 100, + packageCPUBaseFrequencySet: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, _ := getPowerWithMockedServices() + p.packageCPUBaseFrequency = true + p.cpuInfo = map[string]*cpuInfo{ + "0": {cpuID: "0", physicalID: "0", model: strconv.FormatUint(tt.modelCPU, 10)}, + } + + p.fillCPUBusClock() + require.Equal(t, tt.busClockValue, p.cpuBusClockValue) + require.Equal(t, tt.packageCPUBaseFrequencySet, p.packageCPUBaseFrequency) + }) + } +} + +func TestAddCPUBaseFreq(t *testing.T) { + tests := []struct { + name string + socketID string + readSingleMsrErrRatio error + msrPlatformInfoValue uint64 + setupPowerstat func(t *testing.T) + clockBusValue float64 + nonTurboRatio float64 + metricExpected bool + }{ + { + name: "Error_reading_msr", + socketID: "0", + clockBusValue: 100, + readSingleMsrErrRatio: errors.New("can't read msr"), + metricExpected: false, + }, + { + name: "NoMetric_Ratio_is_0", + socketID: "0", + msrPlatformInfoValue: 0x8008082FF2810000, + clockBusValue: 100, + nonTurboRatio: 0, + metricExpected: false, + }, + { + name: "OK_Ratio_is_24", + socketID: "0", + msrPlatformInfoValue: 0x8008082FF2811800, + clockBusValue: 100, + nonTurboRatio: 24, + metricExpected: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var acc testutil.Accumulator + p, mockServices := getPowerWithMockedServices() + + p.cpuInfo = map[string]*cpuInfo{ + tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID}, + } + p.cpuBusClockValue = tt.clockBusValue + + mockServices.msr.On("readSingleMsr", mock.Anything, msrPlatformInfoString).Return(tt.msrPlatformInfoValue, tt.readSingleMsrErrRatio) + defer mockServices.msr.AssertExpectations(t) + + p.addCPUBaseFreq(tt.socketID, &acc) + actual := acc.GetTelegrafMetrics() + if !tt.metricExpected { + require.Len(t, actual, 0) + return + } + + require.Len(t, actual, 1) + expected := []telegraf.Metric{ + testutil.MustMetric( + "powerstat_package", + map[string]string{ + "package_id": tt.socketID, + }, + map[string]interface{}{ + "cpu_base_frequency_mhz": uint64(tt.nonTurboRatio * tt.clockBusValue), + }, + time.Unix(0, 0), + telegraf.Gauge, + ), + } + testutil.RequireMetricsEqual(t, expected, actual, testutil.IgnoreTime()) + }) + } +} diff --git a/plugins/inputs/intel_powerstat/msr.go b/plugins/inputs/intel_powerstat/msr.go index 5fc49b6b0..733bf86a0 100644 --- a/plugins/inputs/intel_powerstat/msr.go +++ b/plugins/inputs/intel_powerstat/msr.go @@ -33,6 +33,18 @@ const ( turboRatioLimit2Location = 0x1AF atomCoreTurboRatiosLocation = 0x66C uncorePerfStatusLocation = 0x621 + platformInfo = 0xCE + fsbFreq = 0xCD +) + +const ( + msrTurboRatioLimitString = "MSR_TURBO_RATIO_LIMIT" + msrTurboRatioLimit1String = "MSR_TURBO_RATIO_LIMIT1" + msrTurboRatioLimit2String = "MSR_TURBO_RATIO_LIMIT2" + msrAtomCoreTurboRatiosString = "MSR_ATOM_CORE_TURBO_RATIOS" + msrUncorePerfStatusString = "MSR_UNCORE_PERF_STATUS" + msrPlatformInfoString = "MSR_PLATFORM_INFO" + msrFSBFreqString = "MSR_FSB_FREQ" ) // msrService is responsible for interactions with MSR. @@ -157,16 +169,20 @@ func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error) var msrAddress int64 switch msr { - case "MSR_TURBO_RATIO_LIMIT": + case msrTurboRatioLimitString: msrAddress = turboRatioLimitLocation - case "MSR_TURBO_RATIO_LIMIT1": + case msrTurboRatioLimit1String: msrAddress = turboRatioLimit1Location - case "MSR_TURBO_RATIO_LIMIT2": + case msrTurboRatioLimit2String: msrAddress = turboRatioLimit2Location - case "MSR_ATOM_CORE_TURBO_RATIOS": + case msrAtomCoreTurboRatiosString: msrAddress = atomCoreTurboRatiosLocation - case "MSR_UNCORE_PERF_STATUS": + case msrUncorePerfStatusString: msrAddress = uncorePerfStatusLocation + case msrPlatformInfoString: + msrAddress = platformInfo + case msrFSBFreqString: + msrAddress = fsbFreq default: return 0, fmt.Errorf("incorect name of MSR %s", msr) } diff --git a/plugins/inputs/intel_powerstat/sample.conf b/plugins/inputs/intel_powerstat/sample.conf index 0307e8269..16d7b9b26 100644 --- a/plugins/inputs/intel_powerstat/sample.conf +++ b/plugins/inputs/intel_powerstat/sample.conf @@ -10,7 +10,8 @@ ## supported options list ## Supported options: ## "current_power_consumption", "current_dram_power_consumption", - ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency" + ## "thermal_design_power", "max_turbo_frequency", "uncore_frequency", + ## "cpu_base_frequency" # package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"] ## The user can choose which per-CPU metrics are monitored by the plugin in