Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(inputs.win_perf_counter): allow errors to be ignored #10535

Merged
merged 10 commits into from
Feb 1, 2022
9 changes: 9 additions & 0 deletions plugins/inputs/win_perf_counters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ Supported on Windows Vista/Windows Server 2008 and newer
Example:
`UsePerfCounterTime=true`

#### IgnoredErrors

IgnoredErrors accepts a list of PDH error codes which are defined in pdh.go, if this error is encountered it will be ignored.
For example, you can provide "PDH_NO_DATA" to ignore performance counters with no instances, but by default no errors are ignored.
You can find the list of possible errors here: [PDH errors](https://github.com/influxdata/telegraf/blob/master/plugins/inputs/win_perf_counters/pdh.go)

Example:
`IgnoredErrors=["PDH_NO_DATA"]`

### Object

See Entry below.
Expand Down
93 changes: 92 additions & 1 deletion plugins/inputs/win_perf_counters/pdh.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ import (
"syscall"
"unsafe"

"golang.org/x/sys/windows"
"time"

"golang.org/x/sys/windows"
)

// Error codes
Expand All @@ -55,6 +56,7 @@ type (
)

// PDH error codes, which can be returned by all Pdh* functions. Taken from mingw-w64 pdhmsg.h

const (
PDH_CSTATUS_VALID_DATA = 0x00000000 // The returned data is valid.
PDH_CSTATUS_NEW_DATA = 0x00000001 // The return data value is valid and different from the last sample.
Expand Down Expand Up @@ -144,6 +146,95 @@ const (
PDH_QUERY_PERF_DATA_TIMEOUT = 0xC0000BFE
)

var PDHErrors = map[uint32]string{
PDH_CSTATUS_VALID_DATA: "PDH_CSTATUS_VALID_DATA",
PDH_CSTATUS_NEW_DATA: "PDH_CSTATUS_NEW_DATA",
PDH_CSTATUS_NO_MACHINE: "PDH_CSTATUS_NO_MACHINE",
PDH_CSTATUS_NO_INSTANCE: "PDH_CSTATUS_NO_INSTANCE",
PDH_MORE_DATA: "PDH_MORE_DATA",
PDH_CSTATUS_ITEM_NOT_VALIDATED: "PDH_CSTATUS_ITEM_NOT_VALIDATED",
PDH_RETRY: "PDH_RETRY",
PDH_NO_DATA: "PDH_NO_DATA",
PDH_CALC_NEGATIVE_DENOMINATOR: "PDH_CALC_NEGATIVE_DENOMINATOR",
PDH_CALC_NEGATIVE_TIMEBASE: "PDH_CALC_NEGATIVE_TIMEBASE",
PDH_CALC_NEGATIVE_VALUE: "PDH_CALC_NEGATIVE_VALUE",
PDH_DIALOG_CANCELLED: "PDH_DIALOG_CANCELLED",
PDH_END_OF_LOG_FILE: "PDH_END_OF_LOG_FILE",
PDH_ASYNC_QUERY_TIMEOUT: "PDH_ASYNC_QUERY_TIMEOUT",
PDH_CANNOT_SET_DEFAULT_REALTIME_DATASOURCE: "PDH_CANNOT_SET_DEFAULT_REALTIME_DATASOURCE",
PDH_CSTATUS_NO_OBJECT: "PDH_CSTATUS_NO_OBJECT",
PDH_CSTATUS_NO_COUNTER: "PDH_CSTATUS_NO_COUNTER",
PDH_CSTATUS_INVALID_DATA: "PDH_CSTATUS_INVALID_DATA",
PDH_MEMORY_ALLOCATION_FAILURE: "PDH_MEMORY_ALLOCATION_FAILURE",
PDH_INVALID_HANDLE: "PDH_INVALID_HANDLE",
PDH_INVALID_ARGUMENT: "PDH_INVALID_ARGUMENT",
PDH_FUNCTION_NOT_FOUND: "PDH_FUNCTION_NOT_FOUND",
PDH_CSTATUS_NO_COUNTERNAME: "PDH_CSTATUS_NO_COUNTERNAME",
PDH_CSTATUS_BAD_COUNTERNAME: "PDH_CSTATUS_BAD_COUNTERNAME",
PDH_INVALID_BUFFER: "PDH_INVALID_BUFFER",
PDH_INSUFFICIENT_BUFFER: "PDH_INSUFFICIENT_BUFFER",
PDH_CANNOT_CONNECT_MACHINE: "PDH_CANNOT_CONNECT_MACHINE",
PDH_INVALID_PATH: "PDH_INVALID_PATH",
PDH_INVALID_INSTANCE: "PDH_INVALID_INSTANCE",
PDH_INVALID_DATA: "PDH_INVALID_DATA",
PDH_NO_DIALOG_DATA: "PDH_NO_DIALOG_DATA",
PDH_CANNOT_READ_NAME_STRINGS: "PDH_CANNOT_READ_NAME_STRINGS",
PDH_LOG_FILE_CREATE_ERROR: "PDH_LOG_FILE_CREATE_ERROR",
PDH_LOG_FILE_OPEN_ERROR: "PDH_LOG_FILE_OPEN_ERROR",
PDH_LOG_TYPE_NOT_FOUND: "PDH_LOG_TYPE_NOT_FOUND",
PDH_NO_MORE_DATA: "PDH_NO_MORE_DATA",
PDH_ENTRY_NOT_IN_LOG_FILE: "PDH_ENTRY_NOT_IN_LOG_FILE",
PDH_DATA_SOURCE_IS_LOG_FILE: "PDH_DATA_SOURCE_IS_LOG_FILE",
PDH_DATA_SOURCE_IS_REAL_TIME: "PDH_DATA_SOURCE_IS_REAL_TIME",
PDH_UNABLE_READ_LOG_HEADER: "PDH_UNABLE_READ_LOG_HEADER",
PDH_FILE_NOT_FOUND: "PDH_FILE_NOT_FOUND",
PDH_FILE_ALREADY_EXISTS: "PDH_FILE_ALREADY_EXISTS",
PDH_NOT_IMPLEMENTED: "PDH_NOT_IMPLEMENTED",
PDH_STRING_NOT_FOUND: "PDH_STRING_NOT_FOUND",
PDH_UNABLE_MAP_NAME_FILES: "PDH_UNABLE_MAP_NAME_FILES",
PDH_UNKNOWN_LOG_FORMAT: "PDH_UNKNOWN_LOG_FORMAT",
PDH_UNKNOWN_LOGSVC_COMMAND: "PDH_UNKNOWN_LOGSVC_COMMAND",
PDH_LOGSVC_QUERY_NOT_FOUND: "PDH_LOGSVC_QUERY_NOT_FOUND",
PDH_LOGSVC_NOT_OPENED: "PDH_LOGSVC_NOT_OPENED",
PDH_WBEM_ERROR: "PDH_WBEM_ERROR",
PDH_ACCESS_DENIED: "PDH_ACCESS_DENIED",
PDH_LOG_FILE_TOO_SMALL: "PDH_LOG_FILE_TOO_SMALL",
PDH_INVALID_DATASOURCE: "PDH_INVALID_DATASOURCE",
PDH_INVALID_SQLDB: "PDH_INVALID_SQLDB",
PDH_NO_COUNTERS: "PDH_NO_COUNTERS",
PDH_SQL_ALLOC_FAILED: "PDH_SQL_ALLOC_FAILED",
PDH_SQL_ALLOCCON_FAILED: "PDH_SQL_ALLOCCON_FAILED",
PDH_SQL_EXEC_DIRECT_FAILED: "PDH_SQL_EXEC_DIRECT_FAILED",
PDH_SQL_FETCH_FAILED: "PDH_SQL_FETCH_FAILED",
PDH_SQL_ROWCOUNT_FAILED: "PDH_SQL_ROWCOUNT_FAILED",
PDH_SQL_MORE_RESULTS_FAILED: "PDH_SQL_MORE_RESULTS_FAILED",
PDH_SQL_CONNECT_FAILED: "PDH_SQL_CONNECT_FAILED",
PDH_SQL_BIND_FAILED: "PDH_SQL_BIND_FAILED",
PDH_CANNOT_CONNECT_WMI_SERVER: "PDH_CANNOT_CONNECT_WMI_SERVER",
PDH_PLA_COLLECTION_ALREADY_RUNNING: "PDH_PLA_COLLECTION_ALREADY_RUNNING",
PDH_PLA_ERROR_SCHEDULE_OVERLAP: "PDH_PLA_ERROR_SCHEDULE_OVERLAP",
PDH_PLA_COLLECTION_NOT_FOUND: "PDH_PLA_COLLECTION_NOT_FOUND",
PDH_PLA_ERROR_SCHEDULE_ELAPSED: "PDH_PLA_ERROR_SCHEDULE_ELAPSED",
PDH_PLA_ERROR_NOSTART: "PDH_PLA_ERROR_NOSTART",
PDH_PLA_ERROR_ALREADY_EXISTS: "PDH_PLA_ERROR_ALREADY_EXISTS",
PDH_PLA_ERROR_TYPE_MISMATCH: "PDH_PLA_ERROR_TYPE_MISMATCH",
PDH_PLA_ERROR_FILEPATH: "PDH_PLA_ERROR_FILEPATH",
PDH_PLA_SERVICE_ERROR: "PDH_PLA_SERVICE_ERROR",
PDH_PLA_VALIDATION_ERROR: "PDH_PLA_VALIDATION_ERROR",
PDH_PLA_VALIDATION_WARNING: "PDH_PLA_VALIDATION_WARNING",
PDH_PLA_ERROR_NAME_TOO_LONG: "PDH_PLA_ERROR_NAME_TOO_LONG",
PDH_INVALID_SQL_LOG_FORMAT: "PDH_INVALID_SQL_LOG_FORMAT",
PDH_COUNTER_ALREADY_IN_QUERY: "PDH_COUNTER_ALREADY_IN_QUERY",
PDH_BINARY_LOG_CORRUPT: "PDH_BINARY_LOG_CORRUPT",
PDH_LOG_SAMPLE_TOO_SMALL: "PDH_LOG_SAMPLE_TOO_SMALL",
PDH_OS_LATER_VERSION: "PDH_OS_LATER_VERSION",
PDH_OS_EARLIER_VERSION: "PDH_OS_EARLIER_VERSION",
PDH_INCORRECT_APPEND_TIME: "PDH_INCORRECT_APPEND_TIME",
PDH_UNMATCHED_APPEND_COUNTER: "PDH_UNMATCHED_APPEND_COUNTER",
PDH_SQL_ALTER_DETAIL_FAILED: "PDH_SQL_ALTER_DETAIL_FAILED",
PDH_QUERY_PERF_DATA_TIMEOUT: "PDH_QUERY_PERF_DATA_TIMEOUT",
}

// Formatting options for GetFormattedCounterValue().
const (
PDH_FMT_RAW = 0x00000010
Expand Down
22 changes: 21 additions & 1 deletion plugins/inputs/win_perf_counters/win_perf_counters.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ var sampleConfig = `
#LocalizeWildcardsExpansion = true
# Period after which counters will be reread from configuration and wildcards in counter paths expanded
CountersRefreshInterval="1m"
## Accepts a list of PDH error codes which are defined in pdh.go, if this error is encountered it will be ignored
## For example, you can provide "PDH_NO_DATA" to ignore performance counters with no instances
## By default no errors are ignored
## You can find the list here: https://github.com/influxdata/telegraf/blob/master/plugins/inputs/win_perf_counters/pdh.go
## e.g.: IgnoredErrors = ["PDH_NO_DATA"]
# IgnoredErrors = []
Comment on lines +38 to +43
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs to be copied into the readme.md too

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


[[inputs.win_perf_counters.object]]
# Processor usage, alternative to native, reports on a per core.
Expand Down Expand Up @@ -152,6 +158,7 @@ type Win_PerfCounters struct {
CountersRefreshInterval config.Duration
UseWildcardsExpansion bool
LocalizeWildcardsExpansion bool
IgnoredErrors []string `toml:"IgnoredErrors"`

Log telegraf.Logger

Expand Down Expand Up @@ -389,6 +396,19 @@ func (m *Win_PerfCounters) ParseConfig() error {

}

func (m *Win_PerfCounters) checkError(err error) error {
if pdhErr, ok := err.(*PdhError); ok {
for _, ignoredErrors := range m.IgnoredErrors {
if PDHErrors[pdhErr.ErrorCode] == ignoredErrors {
return nil
}
}

return err
}
return err
}

func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error {
// Parse the config once
var err error
Expand All @@ -407,7 +427,7 @@ func (m *Win_PerfCounters) Gather(acc telegraf.Accumulator) error {
}
//some counters need two data samples before computing a value
if err = m.query.CollectData(); err != nil {
return err
return m.checkError(err)
}
m.lastRefreshed = time.Now()

Expand Down
40 changes: 40 additions & 0 deletions plugins/inputs/win_perf_counters/win_perf_counters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1068,3 +1068,43 @@ func TestLocalizeWildcardsExpansion(t *testing.T) {
//be English.
require.Contains(t, acc.Metrics[0].Fields, sanitizedChars.Replace(counter))
}

func TestCheckError(t *testing.T) {
tests := []struct {
Name string
Err error
IgnoredErrors []string
ExpectedErr error
}{
{
Name: "Ignore PDH_NO_DATA",
Err: &PdhError{
ErrorCode: uint32(PDH_NO_DATA),
},
IgnoredErrors: []string{
"PDH_NO_DATA",
},
ExpectedErr: nil,
},
{
Name: "Don't ignore PDH_NO_DATA",
Err: &PdhError{
ErrorCode: uint32(PDH_NO_DATA),
},
ExpectedErr: &PdhError{
ErrorCode: uint32(PDH_NO_DATA),
},
},
}

for _, tc := range tests {
t.Run(tc.Name, func(t *testing.T) {
m := Win_PerfCounters{
IgnoredErrors: tc.IgnoredErrors,
}

err := m.checkError(tc.Err)
require.Equal(t, tc.ExpectedErr, err)
})
}
}