From d61f2e435a83ac6051fb439404fbd281548779d1 Mon Sep 17 00:00:00 2001 From: Sebastian Spaink <3441183+sspaink@users.noreply.github.com> Date: Tue, 1 Feb 2022 16:06:44 -0600 Subject: [PATCH] feat(inputs.win_perf_counter): allow errors to be ignored (#10535) --- plugins/inputs/win_perf_counters/README.md | 9 ++ plugins/inputs/win_perf_counters/pdh.go | 93 ++++++++++++++++++- .../win_perf_counters/win_perf_counters.go | 22 ++++- .../win_perf_counters_test.go | 40 ++++++++ 4 files changed, 162 insertions(+), 2 deletions(-) diff --git a/plugins/inputs/win_perf_counters/README.md b/plugins/inputs/win_perf_counters/README.md index dcc15d638..3d3b16768 100644 --- a/plugins/inputs/win_perf_counters/README.md +++ b/plugins/inputs/win_perf_counters/README.md @@ -105,6 +105,15 @@ Supported on Windows Vista/Windows Server 2008 and newer Example: `UsePerfCounterTime=true` +#### IgnoredErrors + +IgnoredErrors accepts a list of PDH error codes which are defined in pdh.go, if this error is encountered it will be ignored. +For example, you can provide "PDH_NO_DATA" to ignore performance counters with no instances, but by default no errors are ignored. +You can find the list of possible errors here: [PDH errors](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/win_perf_counters/pdh.go) + +Example: +`IgnoredErrors=["PDH_NO_DATA"]` + ### Object See Entry below. diff --git a/plugins/inputs/win_perf_counters/pdh.go b/plugins/inputs/win_perf_counters/pdh.go index d4e5f14a1..46048bee0 100644 --- a/plugins/inputs/win_perf_counters/pdh.go +++ b/plugins/inputs/win_perf_counters/pdh.go @@ -38,8 +38,9 @@ import ( "syscall" "unsafe" - "golang.org/x/sys/windows" "time" + + "golang.org/x/sys/windows" ) // Error codes @@ -55,6 +56,7 @@ type ( ) // PDH error codes, which can be returned by all Pdh* functions. Taken from mingw-w64 pdhmsg.h + const ( PDH_CSTATUS_VALID_DATA = 0x00000000 // The returned data is valid. PDH_CSTATUS_NEW_DATA = 0x00000001 // The return data value is valid and different from the last sample. @@ -144,6 +146,95 @@ const ( PDH_QUERY_PERF_DATA_TIMEOUT = 0xC0000BFE ) +var PDHErrors = map[uint32]string{ + PDH_CSTATUS_VALID_DATA: "PDH_CSTATUS_VALID_DATA", + PDH_CSTATUS_NEW_DATA: "PDH_CSTATUS_NEW_DATA", + PDH_CSTATUS_NO_MACHINE: "PDH_CSTATUS_NO_MACHINE", + PDH_CSTATUS_NO_INSTANCE: "PDH_CSTATUS_NO_INSTANCE", + PDH_MORE_DATA: "PDH_MORE_DATA", + PDH_CSTATUS_ITEM_NOT_VALIDATED: "PDH_CSTATUS_ITEM_NOT_VALIDATED", + PDH_RETRY: "PDH_RETRY", + PDH_NO_DATA: "PDH_NO_DATA", + PDH_CALC_NEGATIVE_DENOMINATOR: "PDH_CALC_NEGATIVE_DENOMINATOR", + PDH_CALC_NEGATIVE_TIMEBASE: "PDH_CALC_NEGATIVE_TIMEBASE", + PDH_CALC_NEGATIVE_VALUE: "PDH_CALC_NEGATIVE_VALUE", + PDH_DIALOG_CANCELLED: "PDH_DIALOG_CANCELLED", + PDH_END_OF_LOG_FILE: "PDH_END_OF_LOG_FILE", + PDH_ASYNC_QUERY_TIMEOUT: "PDH_ASYNC_QUERY_TIMEOUT", + PDH_CANNOT_SET_DEFAULT_REALTIME_DATASOURCE: "PDH_CANNOT_SET_DEFAULT_REALTIME_DATASOURCE", + PDH_CSTATUS_NO_OBJECT: "PDH_CSTATUS_NO_OBJECT", + PDH_CSTATUS_NO_COUNTER: "PDH_CSTATUS_NO_COUNTER", + PDH_CSTATUS_INVALID_DATA: "PDH_CSTATUS_INVALID_DATA", + PDH_MEMORY_ALLOCATION_FAILURE: "PDH_MEMORY_ALLOCATION_FAILURE", + PDH_INVALID_HANDLE: "PDH_INVALID_HANDLE", + PDH_INVALID_ARGUMENT: "PDH_INVALID_ARGUMENT", + PDH_FUNCTION_NOT_FOUND: "PDH_FUNCTION_NOT_FOUND", + PDH_CSTATUS_NO_COUNTERNAME: "PDH_CSTATUS_NO_COUNTERNAME", + PDH_CSTATUS_BAD_COUNTERNAME: "PDH_CSTATUS_BAD_COUNTERNAME", + PDH_INVALID_BUFFER: "PDH_INVALID_BUFFER", + PDH_INSUFFICIENT_BUFFER: "PDH_INSUFFICIENT_BUFFER", + PDH_CANNOT_CONNECT_MACHINE: "PDH_CANNOT_CONNECT_MACHINE", + PDH_INVALID_PATH: "PDH_INVALID_PATH", + PDH_INVALID_INSTANCE: "PDH_INVALID_INSTANCE", + PDH_INVALID_DATA: "PDH_INVALID_DATA", + PDH_NO_DIALOG_DATA: "PDH_NO_DIALOG_DATA", + PDH_CANNOT_READ_NAME_STRINGS: "PDH_CANNOT_READ_NAME_STRINGS", + PDH_LOG_FILE_CREATE_ERROR: "PDH_LOG_FILE_CREATE_ERROR", + PDH_LOG_FILE_OPEN_ERROR: "PDH_LOG_FILE_OPEN_ERROR", + PDH_LOG_TYPE_NOT_FOUND: "PDH_LOG_TYPE_NOT_FOUND", + PDH_NO_MORE_DATA: "PDH_NO_MORE_DATA", + PDH_ENTRY_NOT_IN_LOG_FILE: "PDH_ENTRY_NOT_IN_LOG_FILE", + PDH_DATA_SOURCE_IS_LOG_FILE: "PDH_DATA_SOURCE_IS_LOG_FILE", + PDH_DATA_SOURCE_IS_REAL_TIME: "PDH_DATA_SOURCE_IS_REAL_TIME", + PDH_UNABLE_READ_LOG_HEADER: "PDH_UNABLE_READ_LOG_HEADER", + PDH_FILE_NOT_FOUND: "PDH_FILE_NOT_FOUND", + PDH_FILE_ALREADY_EXISTS: "PDH_FILE_ALREADY_EXISTS", + PDH_NOT_IMPLEMENTED: "PDH_NOT_IMPLEMENTED", + PDH_STRING_NOT_FOUND: "PDH_STRING_NOT_FOUND", + PDH_UNABLE_MAP_NAME_FILES: "PDH_UNABLE_MAP_NAME_FILES", + PDH_UNKNOWN_LOG_FORMAT: "PDH_UNKNOWN_LOG_FORMAT", + PDH_UNKNOWN_LOGSVC_COMMAND: "PDH_UNKNOWN_LOGSVC_COMMAND", + PDH_LOGSVC_QUERY_NOT_FOUND: "PDH_LOGSVC_QUERY_NOT_FOUND", + PDH_LOGSVC_NOT_OPENED: "PDH_LOGSVC_NOT_OPENED", + PDH_WBEM_ERROR: "PDH_WBEM_ERROR", + PDH_ACCESS_DENIED: "PDH_ACCESS_DENIED", + PDH_LOG_FILE_TOO_SMALL: "PDH_LOG_FILE_TOO_SMALL", + PDH_INVALID_DATASOURCE: "PDH_INVALID_DATASOURCE", + PDH_INVALID_SQLDB: "PDH_INVALID_SQLDB", + PDH_NO_COUNTERS: "PDH_NO_COUNTERS", + PDH_SQL_ALLOC_FAILED: "PDH_SQL_ALLOC_FAILED", + PDH_SQL_ALLOCCON_FAILED: "PDH_SQL_ALLOCCON_FAILED", + PDH_SQL_EXEC_DIRECT_FAILED: "PDH_SQL_EXEC_DIRECT_FAILED", + PDH_SQL_FETCH_FAILED: "PDH_SQL_FETCH_FAILED", + PDH_SQL_ROWCOUNT_FAILED: "PDH_SQL_ROWCOUNT_FAILED", + PDH_SQL_MORE_RESULTS_FAILED: "PDH_SQL_MORE_RESULTS_FAILED", + PDH_SQL_CONNECT_FAILED: "PDH_SQL_CONNECT_FAILED", + PDH_SQL_BIND_FAILED: "PDH_SQL_BIND_FAILED", + PDH_CANNOT_CONNECT_WMI_SERVER: "PDH_CANNOT_CONNECT_WMI_SERVER", + PDH_PLA_COLLECTION_ALREADY_RUNNING: "PDH_PLA_COLLECTION_ALREADY_RUNNING", + PDH_PLA_ERROR_SCHEDULE_OVERLAP: "PDH_PLA_ERROR_SCHEDULE_OVERLAP", + PDH_PLA_COLLECTION_NOT_FOUND: "PDH_PLA_COLLECTION_NOT_FOUND", + PDH_PLA_ERROR_SCHEDULE_ELAPSED: "PDH_PLA_ERROR_SCHEDULE_ELAPSED", + PDH_PLA_ERROR_NOSTART: "PDH_PLA_ERROR_NOSTART", + PDH_PLA_ERROR_ALREADY_EXISTS: "PDH_PLA_ERROR_ALREADY_EXISTS", + PDH_PLA_ERROR_TYPE_MISMATCH: "PDH_PLA_ERROR_TYPE_MISMATCH", + PDH_PLA_ERROR_FILEPATH: "PDH_PLA_ERROR_FILEPATH", + PDH_PLA_SERVICE_ERROR: "PDH_PLA_SERVICE_ERROR", + PDH_PLA_VALIDATION_ERROR: "PDH_PLA_VALIDATION_ERROR", + PDH_PLA_VALIDATION_WARNING: "PDH_PLA_VALIDATION_WARNING", + PDH_PLA_ERROR_NAME_TOO_LONG: "PDH_PLA_ERROR_NAME_TOO_LONG", + PDH_INVALID_SQL_LOG_FORMAT: "PDH_INVALID_SQL_LOG_FORMAT", + PDH_COUNTER_ALREADY_IN_QUERY: "PDH_COUNTER_ALREADY_IN_QUERY", + PDH_BINARY_LOG_CORRUPT: "PDH_BINARY_LOG_CORRUPT", + PDH_LOG_SAMPLE_TOO_SMALL: "PDH_LOG_SAMPLE_TOO_SMALL", + PDH_OS_LATER_VERSION: "PDH_OS_LATER_VERSION", + PDH_OS_EARLIER_VERSION: "PDH_OS_EARLIER_VERSION", + PDH_INCORRECT_APPEND_TIME: "PDH_INCORRECT_APPEND_TIME", + PDH_UNMATCHED_APPEND_COUNTER: "PDH_UNMATCHED_APPEND_COUNTER", + PDH_SQL_ALTER_DETAIL_FAILED: "PDH_SQL_ALTER_DETAIL_FAILED", + PDH_QUERY_PERF_DATA_TIMEOUT: "PDH_QUERY_PERF_DATA_TIMEOUT", +} + // Formatting options for GetFormattedCounterValue(). const ( PDH_FMT_RAW = 0x00000010 diff --git a/plugins/inputs/win_perf_counters/win_perf_counters.go b/plugins/inputs/win_perf_counters/win_perf_counters.go index a126db4ea..9cf45a256 100644 --- a/plugins/inputs/win_perf_counters/win_perf_counters.go +++ b/plugins/inputs/win_perf_counters/win_perf_counters.go @@ -35,6 +35,12 @@ var sampleConfig = ` #LocalizeWildcardsExpansion = true # Period after which counters will be reread from configuration and wildcards in counter paths expanded CountersRefreshInterval="1m" + ## Accepts a list of PDH error codes which are defined in pdh.go, if this error is encountered it will be ignored + ## For example, you can provide "PDH_NO_DATA" to ignore performance counters with no instances + ## By default no errors are ignored + ## You can find the list here: https://github.com/influxdata/telegraf/blob/master/plugins/inputs/win_perf_counters/pdh.go + ## e.g.: IgnoredErrors = ["PDH_NO_DATA"] + # IgnoredErrors = [] [[inputs.win_perf_counters.object]] # Processor usage, alternative to native, reports on a per core. @@ -152,6 +158,7 @@ type Win_PerfCounters struct { CountersRefreshInterval config.Duration UseWildcardsExpansion bool LocalizeWildcardsExpansion bool + IgnoredErrors []string `toml:"IgnoredErrors"` Log telegraf.Logger @@ -389,6 +396,19 @@ func (m *Win_PerfCounters) ParseConfig() error { } +func (m *Win_PerfCounters) checkError(err error) error { + if pdhErr, ok := err.(*PdhError); ok { + for _, ignoredErrors := range m.IgnoredErrors { + if PDHErrors[pdhErr.ErrorCode] == ignoredErrors { + return nil + } + } + + return err + } + return err +} + func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error { // Parse the config once var err error @@ -407,7 +427,7 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error { } //some counters need two data samples before computing a value if err = m.query.CollectData(); err != nil { - return err + return m.checkError(err) } m.lastRefreshed = time.Now() diff --git a/plugins/inputs/win_perf_counters/win_perf_counters_test.go b/plugins/inputs/win_perf_counters/win_perf_counters_test.go index 5519e3d37..97787cb31 100644 --- a/plugins/inputs/win_perf_counters/win_perf_counters_test.go +++ b/plugins/inputs/win_perf_counters/win_perf_counters_test.go @@ -1068,3 +1068,43 @@ func TestLocalizeWildcardsExpansion(t *testing.T) { //be English. require.Contains(t, acc.Metrics[0].Fields, sanitizedChars.Replace(counter)) } + +func TestCheckError(t *testing.T) { + tests := []struct { + Name string + Err error + IgnoredErrors []string + ExpectedErr error + }{ + { + Name: "Ignore PDH_NO_DATA", + Err: &PdhError{ + ErrorCode: uint32(PDH_NO_DATA), + }, + IgnoredErrors: []string{ + "PDH_NO_DATA", + }, + ExpectedErr: nil, + }, + { + Name: "Don't ignore PDH_NO_DATA", + Err: &PdhError{ + ErrorCode: uint32(PDH_NO_DATA), + }, + ExpectedErr: &PdhError{ + ErrorCode: uint32(PDH_NO_DATA), + }, + }, + } + + for _, tc := range tests { + t.Run(tc.Name, func(t *testing.T) { + m := Win_PerfCounters{ + IgnoredErrors: tc.IgnoredErrors, + } + + err := m.checkError(tc.Err) + require.Equal(t, tc.ExpectedErr, err) + }) + } +}