feat(intel_powerstat): Add CPU base frequency metric and add support for new platforms (#12452)
This commit is contained in:
parent
7725896ff4
commit
65b23f112e
|
|
@ -33,7 +33,8 @@ See the [CONFIGURATION.md][CONFIGURATION.md] for more details.
|
|||
## supported options list
|
||||
## Supported options:
|
||||
## "current_power_consumption", "current_dram_power_consumption",
|
||||
## "thermal_design_power", "max_turbo_frequency", "uncore_frequency"
|
||||
## "thermal_design_power", "max_turbo_frequency", "uncore_frequency",
|
||||
## "cpu_base_frequency"
|
||||
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
|
||||
|
||||
## The user can choose which per-CPU metrics are monitored by the plugin in
|
||||
|
|
@ -125,7 +126,7 @@ integrated in kernel). Modules might have to be manually enabled by using
|
|||
```sh
|
||||
# kernel 5.x.x:
|
||||
sudo modprobe rapl
|
||||
subo modprobe msr
|
||||
sudo modprobe msr
|
||||
sudo modprobe intel_rapl_common
|
||||
sudo modprobe intel_rapl_msr
|
||||
|
||||
|
|
@ -151,6 +152,7 @@ and to retrieve data for calculation per-package specific metric:
|
|||
|
||||
- `max_turbo_frequency_mhz`
|
||||
- `uncore_frequency_mhz_cur`
|
||||
- `cpu_base_frequency_mhz`
|
||||
|
||||
To expose other Intel PowerStat metrics root access may or may not be required
|
||||
(depending on OS type or configuration).
|
||||
|
|
@ -180,57 +182,68 @@ are required by the plugin:
|
|||
_powerstat\_core.cpu\_c1\_state\_residency_
|
||||
- "_dts_" shall be present to collect _powerstat\_core.cpu\_temperature_
|
||||
- Processor _Model number_ must be one of the following values for plugin to
|
||||
read _powerstat\_core.cpu\_c1\_state\_residency_ and
|
||||
_powerstat\_core.cpu\_c6\_state\_residency_ metrics:
|
||||
read _powerstat\_core.cpu\_c1\_state\_residency_ /
|
||||
_powerstat\_core.cpu\_c6\_state\_residency_ and
|
||||
_powerstat\_package.cpu\_base\_frequency_ metrics:
|
||||
|
||||
| Model number | Processor name |
|
||||
|-----|-------------|
|
||||
| 0x37 | Intel Atom® Bay Trail |
|
||||
| 0x4D | Intel Atom® Avaton |
|
||||
| 0x5C | Intel Atom® Apollo Lake |
|
||||
| 0x5F | Intel Atom® Denverton |
|
||||
| 0x7A | Intel Atom® Goldmont |
|
||||
| 0x4C | Intel Atom® Airmont |
|
||||
| 0x86 | Intel Atom® Jacobsville |
|
||||
| 0x96 | Intel Atom® Elkhart Lake |
|
||||
| 0x9C | Intel Atom® Jasper Lake |
|
||||
| 0x1A | Intel Nehalem-EP |
|
||||
| 0x1E | Intel Nehalem |
|
||||
| 0x1F | Intel Nehalem-G |
|
||||
| 0x2E | Intel Nehalem-EX |
|
||||
| 0x25 | Intel Westmere |
|
||||
| 0x2C | Intel Westmere-EP |
|
||||
| 0x2F | Intel Westmere-EX |
|
||||
| 0x2A | Intel Sandybridge |
|
||||
| 0x2D | Intel Sandybridge-X |
|
||||
| 0x3A | Intel Ivybridge |
|
||||
| 0x3E | Intel Ivybridge-X |
|
||||
| 0x4E | Intel Atom® Silvermont-MID |
|
||||
| 0x5E | Intel Skylake |
|
||||
| 0x55 | Intel Skylake-X |
|
||||
| 0x8E | Intel Kabylake-L |
|
||||
| 0x9E | Intel Kabylake |
|
||||
| 0x6A | Intel Icelake-X |
|
||||
| 0x6C | Intel Icelake-D |
|
||||
| 0x7D | Intel Icelake |
|
||||
| 0x7E | Intel Icelake-L |
|
||||
| 0x9D | Intel Icelake-NNPI |
|
||||
| 0x3C | Intel Haswell |
|
||||
| 0x3F | Intel Haswell-X |
|
||||
| 0x45 | Intel Haswell-L |
|
||||
| 0x46 | Intel Haswell-G |
|
||||
| 0x3D | Intel Broadwell |
|
||||
| 0x47 | Intel Broadwell-G |
|
||||
| 0x4F | Intel Broadwell-X |
|
||||
| 0x56 | Intel Broadwell-D |
|
||||
| 0x66 | Intel Cannonlake-L |
|
||||
| 0x57 | Intel Xeon® PHI Knights Landing |
|
||||
| 0x85 | Intel Xeon® PHI Knights Mill |
|
||||
| 0xA5 | Intel CometLake |
|
||||
| 0xA6 | Intel CometLake-L |
|
||||
| 0x8F | Intel Sapphire Rapids X |
|
||||
| 0x8C | Intel TigerLake-L |
|
||||
| 0x8D | Intel TigerLake |
|
||||
| Model number | Processor name |
|
||||
|--------------|---------------------------------|
|
||||
| 0x37 | Intel Atom® Bay Trail |
|
||||
| 0x4D | Intel Atom® Avaton |
|
||||
| 0x5C | Intel Atom® Apollo Lake |
|
||||
| 0x5F | Intel Atom® Denverton |
|
||||
| 0x7A | Intel Atom® Goldmont |
|
||||
| 0x4C | Intel Atom® Airmont |
|
||||
| 0x86 | Intel Atom® Jacobsville |
|
||||
| 0x96 | Intel Atom® Elkhart Lake |
|
||||
| 0x9C | Intel Atom® Jasper Lake |
|
||||
| 0x1A | Intel Nehalem-EP |
|
||||
| 0x1E | Intel Nehalem |
|
||||
| 0x1F | Intel Nehalem-G |
|
||||
| 0x2E | Intel Nehalem-EX |
|
||||
| 0x25 | Intel Westmere |
|
||||
| 0x2C | Intel Westmere-EP |
|
||||
| 0x2F | Intel Westmere-EX |
|
||||
| 0x2A | Intel Sandybridge |
|
||||
| 0x2D | Intel Sandybridge-X |
|
||||
| 0x3A | Intel Ivybridge |
|
||||
| 0x3E | Intel Ivybridge-X |
|
||||
| 0x4E | Intel Atom® Silvermont-MID |
|
||||
| 0x5E | Intel Skylake |
|
||||
| 0x55 | Intel Skylake-X |
|
||||
| 0x8E | Intel KabyLake-L |
|
||||
| 0x9E | Intel KabyLake |
|
||||
| 0x6A | Intel IceLake-X |
|
||||
| 0x6C | Intel IceLake-D |
|
||||
| 0x7D | Intel IceLake |
|
||||
| 0x7E | Intel IceLake-L |
|
||||
| 0x9D | Intel IceLake-NNPI |
|
||||
| 0x3C | Intel Haswell |
|
||||
| 0x3F | Intel Haswell-X |
|
||||
| 0x45 | Intel Haswell-L |
|
||||
| 0x46 | Intel Haswell-G |
|
||||
| 0x3D | Intel Broadwell |
|
||||
| 0x47 | Intel Broadwell-G |
|
||||
| 0x4F | Intel Broadwell-X |
|
||||
| 0x56 | Intel Broadwell-D |
|
||||
| 0x66 | Intel CannonLake-L |
|
||||
| 0x57 | Intel Xeon® PHI Knights Landing |
|
||||
| 0x85 | Intel Xeon® PHI Knights Mill |
|
||||
| 0xA5 | Intel CometLake |
|
||||
| 0xA6 | Intel CometLake-L |
|
||||
| 0x8A | Intel Lakefield |
|
||||
| 0x8F | Intel Sapphire Rapids X |
|
||||
| 0x8C | Intel TigerLake-L |
|
||||
| 0x8D | Intel TigerLake |
|
||||
| 0xA7 | Intel RocketLake |
|
||||
| 0x97 | Intel AlderLake |
|
||||
| 0x9A | Intel AlderLake-L |
|
||||
| 0xBE | Intel AlderLake-N |
|
||||
| 0xB7 | Intel RaptorLake |
|
||||
| 0xBA | Intel RaptorLake-P |
|
||||
| 0xBF | Intel RaptorLake-S |
|
||||
| 0xAC | Intel MeteorLake |
|
||||
| 0xAA | Intel MeteorLake-L |
|
||||
|
||||
## Metrics
|
||||
|
||||
|
|
@ -290,6 +303,7 @@ value.
|
|||
| `uncore_frequency_limit_mhz_min`| Minimum uncore frequency limit for die in processor package | MHz
|
||||
| `uncore_frequency_limit_mhz_max`| Maximum uncore frequency limit for die in processor package | MHz
|
||||
| `uncore_frequency_mhz_cur`| Current uncore frequency for die in processor package. Available only with tag `current`. Since this value is not yet available from `intel-uncore-frequency` module it needs to be accessed via MSR. In case of lack of loaded msr, only `uncore_frequency_limit_mhz_min` and `uncore_frequency_limit_mhz_max` metrics will be collected | MHz
|
||||
| `cpu_base_frequency_mhz`| CPU Base Frequency (maximum non-turbo frequency) for the processor package | MHz
|
||||
|
||||
### Known issues
|
||||
|
||||
|
|
@ -310,9 +324,10 @@ sudo chmod -R a+rx /sys/devices/virtual/powercap/intel-rapl/
|
|||
|
||||
## Example Output
|
||||
|
||||
```shell
|
||||
```text
|
||||
powerstat_package,host=ubuntu,package_id=0 thermal_design_power_watts=160 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 current_power_consumption_watts=35 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 cpu_base_frequency_mhz=2400i 1669118424000000000
|
||||
powerstat_package,host=ubuntu,package_id=0 current_dram_power_consumption_watts=13.94 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0,active_cores=0 max_turbo_frequency_mhz=3000i 1606494744000000000
|
||||
powerstat_package,host=ubuntu,package_id=0,active_cores=1 max_turbo_frequency_mhz=2800i 1606494744000000000
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ const (
|
|||
packageThermalDesignPower = "thermal_design_power"
|
||||
packageTurboLimit = "max_turbo_frequency"
|
||||
packageUncoreFrequency = "uncore_frequency"
|
||||
packageCPUBaseFrequency = "cpu_base_frequency"
|
||||
percentageMultiplier = 100
|
||||
)
|
||||
|
||||
|
|
@ -46,21 +47,25 @@ type PowerStat struct {
|
|||
rapl raplService
|
||||
msr msrService
|
||||
|
||||
cpuFrequency bool
|
||||
cpuBusyFrequency bool
|
||||
cpuTemperature bool
|
||||
cpuC0StateResidency bool
|
||||
cpuC1StateResidency bool
|
||||
cpuC6StateResidency bool
|
||||
cpuBusyCycles bool
|
||||
cpuFrequency bool
|
||||
cpuBusyFrequency bool
|
||||
cpuTemperature bool
|
||||
cpuC0StateResidency bool
|
||||
cpuC1StateResidency bool
|
||||
cpuC6StateResidency bool
|
||||
cpuBusyCycles bool
|
||||
|
||||
packageTurboLimit bool
|
||||
packageCurrentPowerConsumption bool
|
||||
packageCurrentDramPowerConsumption bool
|
||||
packageThermalDesignPower bool
|
||||
packageUncoreFrequency bool
|
||||
cpuInfo map[string]*cpuInfo
|
||||
skipFirstIteration bool
|
||||
logOnce map[string]error
|
||||
packageCPUBaseFrequency bool
|
||||
|
||||
cpuBusClockValue float64
|
||||
cpuInfo map[string]*cpuInfo
|
||||
skipFirstIteration bool
|
||||
logOnce map[string]error
|
||||
}
|
||||
|
||||
func (*PowerStat) SampleConfig() string {
|
||||
|
|
@ -75,23 +80,48 @@ func (p *PowerStat) Init() error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Initialize MSR service only when there is at least one metric enabled
|
||||
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
|
||||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency {
|
||||
p.msr = newMsrServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit ||
|
||||
p.packageUncoreFrequency {
|
||||
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
|
||||
p.initMSR()
|
||||
p.initRaplService()
|
||||
|
||||
if !p.areCoreMetricsEnabled() && !p.areGlobalMetricsEnabled() {
|
||||
return fmt.Errorf("all configuration options are empty or invalid. Did not find anything to gather")
|
||||
}
|
||||
|
||||
p.fillCPUBusClock()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *PowerStat) initMSR() {
|
||||
// Initialize MSR service only when there is at least one metric enabled
|
||||
if p.cpuFrequency || p.cpuBusyFrequency || p.cpuTemperature || p.cpuC0StateResidency || p.cpuC1StateResidency ||
|
||||
p.cpuC6StateResidency || p.cpuBusyCycles || p.packageTurboLimit || p.packageUncoreFrequency || p.packageCPUBaseFrequency {
|
||||
p.msr = newMsrServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) initRaplService() {
|
||||
if p.packageCurrentPowerConsumption || p.packageCurrentDramPowerConsumption || p.packageThermalDesignPower || p.packageTurboLimit ||
|
||||
p.packageUncoreFrequency || p.packageCPUBaseFrequency {
|
||||
p.rapl = newRaplServiceWithFs(p.Log, p.fs)
|
||||
}
|
||||
}
|
||||
|
||||
// fill CPUBusClockValue if required
|
||||
func (p *PowerStat) fillCPUBusClock() {
|
||||
if p.packageCPUBaseFrequency {
|
||||
// cpuBusClock is the same for every core/socket.
|
||||
busClockInfo := p.getBusClock("0")
|
||||
if busClockInfo == 0 {
|
||||
p.Log.Warn("Disabling package metric: cpu_base_frequency_mhz. Can't detect bus clock value")
|
||||
p.packageCPUBaseFrequency = false
|
||||
return
|
||||
}
|
||||
|
||||
p.cpuBusClockValue = busClockInfo
|
||||
}
|
||||
}
|
||||
|
||||
// Gather takes in an accumulator and adds the metrics that the Input gathers
|
||||
func (p *PowerStat) Gather(acc telegraf.Accumulator) error {
|
||||
if p.areGlobalMetricsEnabled() {
|
||||
|
|
@ -133,6 +163,10 @@ func (p *PowerStat) addGlobalMetrics(acc telegraf.Accumulator) {
|
|||
}
|
||||
}
|
||||
|
||||
if p.packageCPUBaseFrequency {
|
||||
p.addCPUBaseFreq(socketID, acc)
|
||||
}
|
||||
|
||||
err := p.rapl.retrieveAndCalculateData(socketID)
|
||||
if err != nil {
|
||||
// In case of an error skip calculating metrics for this socket
|
||||
|
|
@ -189,22 +223,17 @@ func (p *PowerStat) addUncoreFreq(socketID string, die string, acc telegraf.Accu
|
|||
|
||||
func (p *PowerStat) readUncoreFreq(typeFreq string, socketID string, die string, acc telegraf.Accumulator) {
|
||||
fields := map[string]interface{}{}
|
||||
cpuID := ""
|
||||
if typeFreq == "current" {
|
||||
if p.areCoreMetricsEnabled() && p.msr.isMsrLoaded() {
|
||||
p.logOnce[socketID+"msr"] = nil
|
||||
for _, v := range p.cpuInfo {
|
||||
if v.physicalID == socketID {
|
||||
cpuID = v.cpuID
|
||||
}
|
||||
}
|
||||
if cpuID == "" {
|
||||
p.Log.Debugf("error while reading socket ID")
|
||||
cpuID, err := p.GetCPUIDFromSocketID(socketID)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading socket ID: %v", err)
|
||||
return
|
||||
}
|
||||
actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, "MSR_UNCORE_PERF_STATUS")
|
||||
actualUncoreFreq, err := p.msr.readSingleMsr(cpuID, msrUncorePerfStatusString)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_UNCORE_PERF_STATUS: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrUncorePerfStatusString, err)
|
||||
return
|
||||
}
|
||||
actualUncoreFreq = (actualUncoreFreq & 0x3F) * 100
|
||||
|
|
@ -406,15 +435,15 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
|
|||
}
|
||||
}
|
||||
if cpuID == "" || model == "" {
|
||||
p.Log.Debugf("error while reading socket ID")
|
||||
p.Log.Debug("error while reading socket ID")
|
||||
return
|
||||
}
|
||||
// dump_hsw_turbo_ratio_limit
|
||||
if model == strconv.FormatInt(0x3F, 10) { // INTEL_FAM6_HASWELL_X
|
||||
coreCounts := uint64(0x1211) // counting the number of active cores 17 and 18
|
||||
msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT2")
|
||||
msrTurboRatioLimit2, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit2String)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT2: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit2String, err)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -425,9 +454,9 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
|
|||
if (model == strconv.FormatInt(0x3E, 10)) || // INTEL_FAM6_IVYBRIDGE_X
|
||||
(model == strconv.FormatInt(0x3F, 10)) { // INTEL_FAM6_HASWELL_X
|
||||
coreCounts := uint64(0x100F0E0D0C0B0A09) // counting the number of active cores 9 to 16
|
||||
msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
|
||||
msrTurboRatioLimit1, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit1String, err)
|
||||
return
|
||||
}
|
||||
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit1, turboRatioLimitGroups)
|
||||
|
|
@ -446,17 +475,17 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
|
|||
(model == strconv.FormatInt(0x6C, 10) || model == strconv.FormatInt(0x8F, 10) || model == strconv.FormatInt(0x6A, 10)) || // INTEL_FAM6_ICELAKE_X
|
||||
(model == strconv.FormatInt(0x5F, 10)) || // INTEL_FAM6_ATOM_GOLDMONT_D
|
||||
(model == strconv.FormatInt(0x86, 10)) { // INTEL_FAM6_ATOM_TREMONT_D
|
||||
coreCounts, err = p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT1")
|
||||
coreCounts, err = p.msr.readSingleMsr(cpuID, msrTurboRatioLimit1String)
|
||||
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT1: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimit1String, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimitString, err)
|
||||
return
|
||||
}
|
||||
calculateTurboRatioGroup(coreCounts, msrTurboRatioLimit, turboRatioLimitGroups)
|
||||
|
|
@ -466,10 +495,10 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
|
|||
model == strconv.FormatInt(0x4A, 10) || // INTEL_FAM6_ATOM_SILVERMONT_MID:
|
||||
model == strconv.FormatInt(0x5A, 10) { // INTEL_FAM6_ATOM_AIRMONT_MID
|
||||
coreCounts := uint64(0x04030201) // counting the number of active cores 1 to 4
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_ATOM_CORE_TURBO_RATIOS")
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrAtomCoreTurboRatiosString)
|
||||
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_ATOM_CORE_TURBO_RATIOS: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrAtomCoreTurboRatiosString, err)
|
||||
return
|
||||
}
|
||||
value := uint64(0)
|
||||
|
|
@ -484,9 +513,9 @@ func (p *PowerStat) addTurboRatioLimit(socketID string, acc telegraf.Accumulator
|
|||
}
|
||||
// dump_knl_turbo_ratio_limits
|
||||
if model == strconv.FormatInt(0x57, 10) { // INTEL_FAM6_XEON_PHI_KNL
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, "MSR_TURBO_RATIO_LIMIT")
|
||||
msrTurboRatioLimit, err := p.msr.readSingleMsr(cpuID, msrTurboRatioLimitString)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading MSR_TURBO_RATIO_LIMIT: %v", err)
|
||||
p.Log.Debugf("error while reading %s: %v", msrTurboRatioLimitString, err)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -654,6 +683,79 @@ func (p *PowerStat) addCPUC0StateResidencyMetric(cpuID string, acc telegraf.Accu
|
|||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) addCPUBaseFreq(socketID string, acc telegraf.Accumulator) {
|
||||
cpuID, err := p.GetCPUIDFromSocketID(socketID)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while getting CPU ID from Socket ID: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
msrPlatformInfoMsr, err := p.msr.readSingleMsr(cpuID, msrPlatformInfoString)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading %s: %v", msrPlatformInfoString, err)
|
||||
return
|
||||
}
|
||||
|
||||
// the value of the freq ratio is saved in bits 15 to 8.
|
||||
// to get the freq -> ratio * busClock
|
||||
cpuBaseFreq := float64((msrPlatformInfoMsr>>8)&0xFF) * p.cpuBusClockValue
|
||||
if cpuBaseFreq == 0 {
|
||||
p.Log.Debugf("error while adding CPU base frequency, cpuBaseFreq is zero for the socket: %s", socketID)
|
||||
return
|
||||
}
|
||||
|
||||
tags := map[string]string{
|
||||
"package_id": socketID,
|
||||
}
|
||||
fields := map[string]interface{}{
|
||||
"cpu_base_frequency_mhz": uint64(cpuBaseFreq),
|
||||
}
|
||||
acc.AddGauge("powerstat_package", fields, tags)
|
||||
}
|
||||
|
||||
func (p *PowerStat) getBusClock(cpuID string) float64 {
|
||||
cpuInfo, ok := p.cpuInfo[cpuID]
|
||||
if !ok {
|
||||
p.Log.Debugf("cannot find cpuInfo for cpu: %s", cpuID)
|
||||
return 0
|
||||
}
|
||||
|
||||
model := cpuInfo.model
|
||||
busClock100 := []int64{0x2A, 0x2D, 0x3A, 0x3E, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56, 0x4E, 0x5E, 0x55, 0x8E, 0x9E, 0xA5, 0xA6, 0x66, 0x6A, 0x6C,
|
||||
0x7D, 0x7E, 0x9D, 0x8A, 0xA7, 0x8C, 0x8D, 0x8F, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA, 0x5C, 0x5F, 0x7A, 0x86, 0x96, 0x9C, 0x57, 0x85}
|
||||
busClock133 := []int64{0x1E, 0x1F, 0x1A, 0x2E, 0x25, 0x2C, 0x2F, 0x4C}
|
||||
busClockCalculate := []int64{0x37, 0x4D}
|
||||
|
||||
if contains(convertIntegerArrayToStringArray(busClock100), model) {
|
||||
return 100.0
|
||||
} else if contains(convertIntegerArrayToStringArray(busClock133), model) {
|
||||
return 133.0
|
||||
} else if contains(convertIntegerArrayToStringArray(busClockCalculate), model) {
|
||||
return p.getSilvermontBusClock(cpuID)
|
||||
}
|
||||
|
||||
p.Log.Debugf("couldn't find the freq for the model: %d", model)
|
||||
return 0.0
|
||||
}
|
||||
|
||||
func (p *PowerStat) getSilvermontBusClock(cpuID string) float64 {
|
||||
silvermontFreqTable := []float64{83.3, 100.0, 133.3, 116.7, 80.0}
|
||||
msr, err := p.msr.readSingleMsr(cpuID, msrFSBFreqString)
|
||||
if err != nil {
|
||||
p.Log.Debugf("error while reading %s: %v", msrFSBFreqString, err)
|
||||
return 0.0
|
||||
}
|
||||
|
||||
i := int(msr & 0xf)
|
||||
if i >= len(silvermontFreqTable) {
|
||||
p.Log.Debugf("unknown msr value: %d, using default bus clock value: %d", i, silvermontFreqTable[3])
|
||||
//same behaviour as in turbostat
|
||||
i = 3
|
||||
}
|
||||
|
||||
return silvermontFreqTable[i]
|
||||
}
|
||||
|
||||
func (p *PowerStat) parsePackageMetricsConfig() {
|
||||
if p.PackageMetrics == nil {
|
||||
// if Package Metric config is empty, use the default settings.
|
||||
|
|
@ -679,6 +781,9 @@ func (p *PowerStat) parsePackageMetricsConfig() {
|
|||
if contains(p.PackageMetrics, packageUncoreFrequency) {
|
||||
p.packageUncoreFrequency = true
|
||||
}
|
||||
if contains(p.PackageMetrics, packageCPUBaseFrequency) {
|
||||
p.packageCPUBaseFrequency = true
|
||||
}
|
||||
}
|
||||
|
||||
func (p *PowerStat) parseCPUMetricsConfig() {
|
||||
|
|
@ -719,7 +824,7 @@ func (p *PowerStat) verifyProcessor() error {
|
|||
allowedProcessorModelsForC1C6 := []int64{0x37, 0x4D, 0x5C, 0x5F, 0x7A, 0x4C, 0x86, 0x96, 0x9C,
|
||||
0x1A, 0x1E, 0x1F, 0x2E, 0x25, 0x2C, 0x2F, 0x2A, 0x2D, 0x3A, 0x3E, 0x4E, 0x5E, 0x55, 0x8E,
|
||||
0x9E, 0x6A, 0x6C, 0x7D, 0x7E, 0x9D, 0x3C, 0x3F, 0x45, 0x46, 0x3D, 0x47, 0x4F, 0x56,
|
||||
0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8F, 0x8C, 0x8D}
|
||||
0x66, 0x57, 0x85, 0xA5, 0xA6, 0x8A, 0x8F, 0x8C, 0x8D, 0xA7, 0x97, 0x9A, 0xBE, 0xB7, 0xBA, 0xBF, 0xAC, 0xAA}
|
||||
stats, err := p.fs.getCPUInfoStats()
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
@ -743,6 +848,7 @@ func (p *PowerStat) verifyProcessor() error {
|
|||
}
|
||||
|
||||
if !strings.Contains(firstCPU.flags, "msr") {
|
||||
p.packageCPUBaseFrequency = false
|
||||
p.cpuTemperature = false
|
||||
p.cpuC6StateResidency = false
|
||||
p.cpuC0StateResidency = false
|
||||
|
|
@ -765,9 +871,9 @@ func (p *PowerStat) verifyProcessor() error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func contains(slice []string, str string) bool {
|
||||
for _, v := range slice {
|
||||
if v == str {
|
||||
func contains[T comparable](s []T, e T) bool {
|
||||
for _, v := range s {
|
||||
if v == e {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
@ -782,24 +888,21 @@ func (p *PowerStat) areGlobalMetricsEnabled() bool {
|
|||
return p.rapl != nil
|
||||
}
|
||||
|
||||
func (p *PowerStat) GetCPUIDFromSocketID(socketID string) (string, error) {
|
||||
for _, v := range p.cpuInfo {
|
||||
if v.physicalID == socketID {
|
||||
return v.cpuID, nil
|
||||
}
|
||||
}
|
||||
return "", fmt.Errorf("can't find cpuID for socketID: %s", socketID)
|
||||
}
|
||||
|
||||
// newPowerStat creates and returns PowerStat struct
|
||||
func newPowerStat(fs fileService) *PowerStat {
|
||||
p := &PowerStat{
|
||||
cpuFrequency: false,
|
||||
cpuC0StateResidency: false,
|
||||
cpuC1StateResidency: false,
|
||||
cpuC6StateResidency: false,
|
||||
cpuBusyCycles: false,
|
||||
cpuTemperature: false,
|
||||
cpuBusyFrequency: false,
|
||||
packageTurboLimit: false,
|
||||
packageUncoreFrequency: false,
|
||||
packageCurrentPowerConsumption: false,
|
||||
packageCurrentDramPowerConsumption: false,
|
||||
packageThermalDesignPower: false,
|
||||
skipFirstIteration: true,
|
||||
fs: fs,
|
||||
logOnce: make(map[string]error),
|
||||
skipFirstIteration: true,
|
||||
fs: fs,
|
||||
logOnce: make(map[string]error),
|
||||
}
|
||||
|
||||
return p
|
||||
|
|
|
|||
|
|
@ -7,10 +7,12 @@ import (
|
|||
"strconv"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/mock"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/influxdata/telegraf"
|
||||
"github.com/influxdata/telegraf/testutil"
|
||||
)
|
||||
|
||||
|
|
@ -241,7 +243,7 @@ func TestReadUncoreFreq(t *testing.T) {
|
|||
|
||||
mockServices.msr.On("isMsrLoaded").Return(true)
|
||||
|
||||
mockServices.msr.On("readSingleMsr", "0", "MSR_UNCORE_PERF_STATUS").Return(uint64(10), nil)
|
||||
mockServices.msr.On("readSingleMsr", "0", msrUncorePerfStatusString).Return(uint64(10), nil)
|
||||
|
||||
mockServices.msr.On("retrieveUncoreFrequency", "0", "initial", "min", "0").
|
||||
Return(float64(500), nil)
|
||||
|
|
@ -641,3 +643,209 @@ func getPowerWithMockedServices() (*PowerStat, *MockServices) {
|
|||
|
||||
return p, &mockServices
|
||||
}
|
||||
|
||||
func TestGetBusClock(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
modelCPU uint64
|
||||
socketID string
|
||||
msrFSBFreqValue uint64
|
||||
readSingleMsrErrFSB error
|
||||
cpuBusClockValue float64
|
||||
}{
|
||||
{
|
||||
name: "Error_withUnknownCPUmodel",
|
||||
socketID: "0",
|
||||
modelCPU: 0xFF,
|
||||
cpuBusClockValue: 0,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBS100",
|
||||
socketID: "0",
|
||||
modelCPU: 106,
|
||||
msrFSBFreqValue: 1,
|
||||
cpuBusClockValue: 100.0,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBS133",
|
||||
socketID: "0",
|
||||
modelCPU: 0x1F,
|
||||
cpuBusClockValue: 133,
|
||||
},
|
||||
{
|
||||
name: "Error_withFBSCalculated",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 0,
|
||||
readSingleMsrErrFSB: errors.New("something is wrong"),
|
||||
},
|
||||
{
|
||||
name: "OK_withFBSCalculated83.3",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 0,
|
||||
cpuBusClockValue: 83.3,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBSCalculated100",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 1,
|
||||
cpuBusClockValue: 100,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBSCalculated133.3",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 2,
|
||||
cpuBusClockValue: 133.3,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBSCalculated116.7",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 3,
|
||||
cpuBusClockValue: 116.7,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBSCalculated80",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 4,
|
||||
cpuBusClockValue: 80,
|
||||
},
|
||||
{
|
||||
name: "OK_withFBSCalculatedUnknownFSBFreq",
|
||||
socketID: "0",
|
||||
modelCPU: 0x37,
|
||||
msrFSBFreqValue: 5,
|
||||
cpuBusClockValue: 116.7,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p, mockServices := getPowerWithMockedServices()
|
||||
busClockCalculate := []uint64{0x37, 0x4D}
|
||||
p.cpuInfo = map[string]*cpuInfo{
|
||||
tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID, model: strconv.FormatUint(tt.modelCPU, 10)},
|
||||
}
|
||||
if contains(busClockCalculate, tt.modelCPU) {
|
||||
mockServices.msr.On("readSingleMsr", mock.Anything, msrFSBFreqString).Return(tt.msrFSBFreqValue, tt.readSingleMsrErrFSB)
|
||||
}
|
||||
defer mockServices.msr.AssertExpectations(t)
|
||||
|
||||
value := p.getBusClock(tt.socketID)
|
||||
require.Equal(t, tt.cpuBusClockValue, value)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFillCPUBusClock(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
modelCPU uint64
|
||||
busClockValue float64
|
||||
packageCPUBaseFrequencySet bool
|
||||
}{
|
||||
{
|
||||
name: "NotSet_0",
|
||||
modelCPU: 0xFF,
|
||||
busClockValue: 0,
|
||||
},
|
||||
{
|
||||
name: "Set_100",
|
||||
modelCPU: 0x2A,
|
||||
busClockValue: 100,
|
||||
packageCPUBaseFrequencySet: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p, _ := getPowerWithMockedServices()
|
||||
p.packageCPUBaseFrequency = true
|
||||
p.cpuInfo = map[string]*cpuInfo{
|
||||
"0": {cpuID: "0", physicalID: "0", model: strconv.FormatUint(tt.modelCPU, 10)},
|
||||
}
|
||||
|
||||
p.fillCPUBusClock()
|
||||
require.Equal(t, tt.busClockValue, p.cpuBusClockValue)
|
||||
require.Equal(t, tt.packageCPUBaseFrequencySet, p.packageCPUBaseFrequency)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAddCPUBaseFreq(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
socketID string
|
||||
readSingleMsrErrRatio error
|
||||
msrPlatformInfoValue uint64
|
||||
setupPowerstat func(t *testing.T)
|
||||
clockBusValue float64
|
||||
nonTurboRatio float64
|
||||
metricExpected bool
|
||||
}{
|
||||
{
|
||||
name: "Error_reading_msr",
|
||||
socketID: "0",
|
||||
clockBusValue: 100,
|
||||
readSingleMsrErrRatio: errors.New("can't read msr"),
|
||||
metricExpected: false,
|
||||
},
|
||||
{
|
||||
name: "NoMetric_Ratio_is_0",
|
||||
socketID: "0",
|
||||
msrPlatformInfoValue: 0x8008082FF2810000,
|
||||
clockBusValue: 100,
|
||||
nonTurboRatio: 0,
|
||||
metricExpected: false,
|
||||
},
|
||||
{
|
||||
name: "OK_Ratio_is_24",
|
||||
socketID: "0",
|
||||
msrPlatformInfoValue: 0x8008082FF2811800,
|
||||
clockBusValue: 100,
|
||||
nonTurboRatio: 24,
|
||||
metricExpected: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
var acc testutil.Accumulator
|
||||
p, mockServices := getPowerWithMockedServices()
|
||||
|
||||
p.cpuInfo = map[string]*cpuInfo{
|
||||
tt.socketID: {cpuID: tt.socketID, physicalID: tt.socketID},
|
||||
}
|
||||
p.cpuBusClockValue = tt.clockBusValue
|
||||
|
||||
mockServices.msr.On("readSingleMsr", mock.Anything, msrPlatformInfoString).Return(tt.msrPlatformInfoValue, tt.readSingleMsrErrRatio)
|
||||
defer mockServices.msr.AssertExpectations(t)
|
||||
|
||||
p.addCPUBaseFreq(tt.socketID, &acc)
|
||||
actual := acc.GetTelegrafMetrics()
|
||||
if !tt.metricExpected {
|
||||
require.Len(t, actual, 0)
|
||||
return
|
||||
}
|
||||
|
||||
require.Len(t, actual, 1)
|
||||
expected := []telegraf.Metric{
|
||||
testutil.MustMetric(
|
||||
"powerstat_package",
|
||||
map[string]string{
|
||||
"package_id": tt.socketID,
|
||||
},
|
||||
map[string]interface{}{
|
||||
"cpu_base_frequency_mhz": uint64(tt.nonTurboRatio * tt.clockBusValue),
|
||||
},
|
||||
time.Unix(0, 0),
|
||||
telegraf.Gauge,
|
||||
),
|
||||
}
|
||||
testutil.RequireMetricsEqual(t, expected, actual, testutil.IgnoreTime())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,18 @@ const (
|
|||
turboRatioLimit2Location = 0x1AF
|
||||
atomCoreTurboRatiosLocation = 0x66C
|
||||
uncorePerfStatusLocation = 0x621
|
||||
platformInfo = 0xCE
|
||||
fsbFreq = 0xCD
|
||||
)
|
||||
|
||||
const (
|
||||
msrTurboRatioLimitString = "MSR_TURBO_RATIO_LIMIT"
|
||||
msrTurboRatioLimit1String = "MSR_TURBO_RATIO_LIMIT1"
|
||||
msrTurboRatioLimit2String = "MSR_TURBO_RATIO_LIMIT2"
|
||||
msrAtomCoreTurboRatiosString = "MSR_ATOM_CORE_TURBO_RATIOS"
|
||||
msrUncorePerfStatusString = "MSR_UNCORE_PERF_STATUS"
|
||||
msrPlatformInfoString = "MSR_PLATFORM_INFO"
|
||||
msrFSBFreqString = "MSR_FSB_FREQ"
|
||||
)
|
||||
|
||||
// msrService is responsible for interactions with MSR.
|
||||
|
|
@ -157,16 +169,20 @@ func (m *msrServiceImpl) readSingleMsr(core string, msr string) (uint64, error)
|
|||
|
||||
var msrAddress int64
|
||||
switch msr {
|
||||
case "MSR_TURBO_RATIO_LIMIT":
|
||||
case msrTurboRatioLimitString:
|
||||
msrAddress = turboRatioLimitLocation
|
||||
case "MSR_TURBO_RATIO_LIMIT1":
|
||||
case msrTurboRatioLimit1String:
|
||||
msrAddress = turboRatioLimit1Location
|
||||
case "MSR_TURBO_RATIO_LIMIT2":
|
||||
case msrTurboRatioLimit2String:
|
||||
msrAddress = turboRatioLimit2Location
|
||||
case "MSR_ATOM_CORE_TURBO_RATIOS":
|
||||
case msrAtomCoreTurboRatiosString:
|
||||
msrAddress = atomCoreTurboRatiosLocation
|
||||
case "MSR_UNCORE_PERF_STATUS":
|
||||
case msrUncorePerfStatusString:
|
||||
msrAddress = uncorePerfStatusLocation
|
||||
case msrPlatformInfoString:
|
||||
msrAddress = platformInfo
|
||||
case msrFSBFreqString:
|
||||
msrAddress = fsbFreq
|
||||
default:
|
||||
return 0, fmt.Errorf("incorect name of MSR %s", msr)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@
|
|||
## supported options list
|
||||
## Supported options:
|
||||
## "current_power_consumption", "current_dram_power_consumption",
|
||||
## "thermal_design_power", "max_turbo_frequency", "uncore_frequency"
|
||||
## "thermal_design_power", "max_turbo_frequency", "uncore_frequency",
|
||||
## "cpu_base_frequency"
|
||||
# package_metrics = ["current_power_consumption", "current_dram_power_consumption", "thermal_design_power"]
|
||||
|
||||
## The user can choose which per-CPU metrics are monitored by the plugin in
|
||||
|
|
|
|||
Loading…
Reference in New Issue