Skip to content

Commit

Permalink
feat(metrics) add failure reason (#2965)
Browse files Browse the repository at this point in the history
Add failure_reason label to ingress_controller_configuration_push_count metric.

Co-authored-by: Patryk Małek <patryk.malek@konghq.com>
Co-authored-by: Travis Raines <571832+rainest@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 27, 2022
1 parent 6eb2b9c commit ac4d48b
Show file tree
Hide file tree
Showing 5 changed files with 222 additions and 41 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ instructions and the [revised Kong 3.x upgrade instructions](https://docs.konghq
overriden by setting a `konghq.com/regex-prefix` annotation, for routes that
need their paths to actually begin with `/~`
[#2956](https://github.com/Kong/kubernetes-ingress-controller/pull/2956)
- Prometheus metrics now highlight configuration push failures caused by
conflicts. The `ingress_controller_configuration_push_count` Prometheus
metric now reports `success="false"` with a `failure_reason="conflict|other"`
label, distinguishing configuration conflicts from other errors (transient
network errors, Kong offline, Kong reported non-conflict error, etc.).
[#2965](https://github.com/Kong/kubernetes-ingress-controller/pull/2965)

### Fixed

Expand Down
7 changes: 3 additions & 4 deletions grafana.json
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@
"targets": [
{
"exemplar": true,
"expr": "ingress_controller_configuration_push_count{success=\"false\"}",
"expr": "sum by (failure_reason) (ingress_controller_configuration_push_count{success=\"false\"})",
"instant": false,
"interval": "",
"legendFormat": "",
Expand Down Expand Up @@ -375,6 +375,5 @@
"timezone": "",
"title": "Kong ingress controller",
"uid": "SaGwMOtnz",
"version": 1,
"weekStart": ""
}
"version": 2
}
111 changes: 89 additions & 22 deletions internal/dataplane/sendconfig/sendconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (
"context"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"net"
"net/http"
"reflect"
"sync"
Expand All @@ -16,6 +18,7 @@ import (
"github.com/kong/deck/file"
"github.com/kong/deck/state"
deckutils "github.com/kong/deck/utils"
"github.com/kong/go-kong/kong"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"

Expand Down Expand Up @@ -85,8 +88,9 @@ func PerformUpdate(ctx context.Context,

if err != nil {
promMetrics.ConfigPushCount.With(prometheus.Labels{
metrics.SuccessKey: metrics.SuccessFalse,
metrics.ProtocolKey: metricsProtocol,
metrics.SuccessKey: metrics.SuccessFalse,
metrics.ProtocolKey: metricsProtocol,
metrics.FailureReasonKey: pushFailureReason(err),
}).Inc()
promMetrics.ConfigPushDuration.With(prometheus.Labels{
metrics.SuccessKey: metrics.SuccessFalse,
Expand All @@ -96,8 +100,9 @@ func PerformUpdate(ctx context.Context,
}

promMetrics.ConfigPushCount.With(prometheus.Labels{
metrics.SuccessKey: metrics.SuccessTrue,
metrics.ProtocolKey: metricsProtocol,
metrics.SuccessKey: metrics.SuccessTrue,
metrics.ProtocolKey: metricsProtocol,
metrics.FailureReasonKey: "",
}).Inc()
promMetrics.ConfigPushDuration.With(prometheus.Labels{
metrics.SuccessKey: metrics.SuccessTrue,
Expand Down Expand Up @@ -204,45 +209,55 @@ func onUpdateDBMode(ctx context.Context,
skipCACertificates bool,
) error {
dumpConfig := dump.Config{SelectorTags: selectorTags, SkipCACerts: skipCACertificates}
// read the current state
rawState, err := dump.Get(ctx, kongConfig.Client, dumpConfig)
if err != nil {
return fmt.Errorf("loading configuration from kong: %w", err)
}
currentState, err := state.Get(rawState)
if err != nil {
return err
}

// read the target state
rawState, err = file.Get(ctx, targetContent, file.RenderConfig{
CurrentState: currentState,
KongVersion: kongConfig.Version,
}, dumpConfig, kongConfig.Client)
cs, err := currentState(ctx, kongConfig, dumpConfig)
if err != nil {
return err
}
targetState, err := state.Get(rawState)

ts, err := targetState(ctx, targetContent, cs, kongConfig, dumpConfig)
if err != nil {
return err
return deckConfigConflictError{err}
}

syncer, err := diff.NewSyncer(diff.SyncerOpts{
CurrentState: currentState,
TargetState: targetState,
CurrentState: cs,
TargetState: ts,
KongClient: kongConfig.Client,
SilenceWarnings: true,
})
if err != nil {
return fmt.Errorf("creating a new syncer: %w", err)
}

_, errs := syncer.Solve(ctx, kongConfig.Concurrency, false)
if errs != nil {
return deckutils.ErrArray{Errors: errs}
}
return nil
}

func currentState(ctx context.Context, kongConfig *Kong, dumpConfig dump.Config) (*state.KongState, error) {
rawState, err := dump.Get(ctx, kongConfig.Client, dumpConfig)
if err != nil {
return nil, fmt.Errorf("loading configuration from kong: %w", err)
}

return state.Get(rawState)
}

func targetState(ctx context.Context, targetContent *file.Content, currentState *state.KongState, kongConfig *Kong, dumpConfig dump.Config) (*state.KongState, error) {
rawState, err := file.Get(ctx, targetContent, file.RenderConfig{
CurrentState: currentState,
KongVersion: kongConfig.Version,
}, dumpConfig, kongConfig.Client)
if err != nil {
return nil, err
}

return state.Get(rawState)
}

func equalSHA(a, b []byte) bool {
return reflect.DeepEqual(a, b)
}
Expand Down Expand Up @@ -273,3 +288,55 @@ func hasSHAUpdateAlreadyBeenReported(latestUpdateSHA []byte) bool {
latestReportedSHA = latestUpdateSHA
return false
}

// deckConfigConflictError is an error used to wrap deck config conflict errors returned from deck functions
// transforming KongRawState to KongState (e.g. state.Get, dump.Get).
type deckConfigConflictError struct {
err error
}

func (e deckConfigConflictError) Error() string {
return e.err.Error()
}

func (e deckConfigConflictError) Is(target error) bool {
_, ok := target.(deckConfigConflictError)
return ok
}

func (e deckConfigConflictError) Unwrap() error {
return e.err
}

// pushFailureReason extracts config push failure reason from an error returned from onUpdateInMemoryMode or onUpdateDBMode.
func pushFailureReason(err error) string {
var netErr net.Error
if errors.As(err, &netErr) {
return metrics.FailureReasonNetwork
}

if isConflictErr(err) {
return metrics.FailureReasonConflict
}

return metrics.FailureReasonOther
}

func isConflictErr(err error) bool {
var apiErr *kong.APIError
if errors.As(err, &apiErr) && apiErr.Code() == http.StatusConflict ||
errors.Is(err, deckConfigConflictError{}) {
return true
}

var deckErrArray deckutils.ErrArray
if errors.As(err, &deckErrArray) {
for _, err := range deckErrArray.Errors {
if isConflictErr(err) {
return true
}
}
}

return false
}
86 changes: 86 additions & 0 deletions internal/dataplane/sendconfig/sendconfig_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
package sendconfig

import (
"errors"
"fmt"
"net"
"net/http"
"reflect"
"testing"

"github.com/kong/deck/file"
deckutils "github.com/kong/deck/utils"
"github.com/kong/go-kong/kong"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/kong/kubernetes-ingress-controller/v2/internal/metrics"
)

func Test_renderConfigWithCustomEntities(t *testing.T) {
Expand Down Expand Up @@ -128,3 +136,81 @@ func Test_updateReportingUtilities(t *testing.T) {
assert.True(t, hasSHAUpdateAlreadyBeenReported([]byte("yet-another-fake-sha")))
assert.True(t, hasSHAUpdateAlreadyBeenReported([]byte("yet-another-fake-sha")))
}

func Test_pushFailureReason(t *testing.T) {
apiConflictErr := kong.NewAPIError(http.StatusConflict, "conflict api error")
networkErr := net.UnknownNetworkError("network error")
genericError := errors.New("generic error")

testCases := []struct {
name string
err error
expectedReason string
}{
{
name: "generic_error",
err: genericError,
expectedReason: metrics.FailureReasonOther,
},
{
name: "api_conflict_error",
err: apiConflictErr,
expectedReason: metrics.FailureReasonConflict,
},
{
name: "api_conflict_error_wrapped",
err: fmt.Errorf("wrapped conflict api err: %w", apiConflictErr),
expectedReason: metrics.FailureReasonConflict,
},
{
name: "deck_config_conflict_error_empty",
err: deckConfigConflictError{},
expectedReason: metrics.FailureReasonConflict,
},
{
name: "deck_config_conflict_error_with_generic_error",
err: deckConfigConflictError{genericError},
expectedReason: metrics.FailureReasonConflict,
},
{
name: "deck_err_array_with_api_conflict_error",
err: deckutils.ErrArray{Errors: []error{apiConflictErr}},
expectedReason: metrics.FailureReasonConflict,
},
{
name: "wrapped_deck_err_array_with_api_conflict_error",
err: fmt.Errorf("wrapped: %w", deckutils.ErrArray{Errors: []error{apiConflictErr}}),
expectedReason: metrics.FailureReasonConflict,
},
{
name: "deck_err_array_with_generic_error",
err: deckutils.ErrArray{Errors: []error{genericError}},
expectedReason: metrics.FailureReasonOther,
},
{
name: "deck_err_array_empty",
err: deckutils.ErrArray{Errors: []error{genericError}},
expectedReason: metrics.FailureReasonOther,
},
{
name: "network_error",
err: networkErr,
expectedReason: metrics.FailureReasonNetwork,
},
{
name: "network_error_wrapped_in_deck_config_conflict_error",
err: deckConfigConflictError{networkErr},
expectedReason: metrics.FailureReasonNetwork,
},
}

for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

reason := pushFailureReason(tc.err)
require.Equal(t, tc.expectedReason, reason)
})
}
}
Loading

0 comments on commit ac4d48b

Please sign in to comment.