Skip to content

Commit

Permalink
Change SyncerSyncResult to NegErrorCount metrics
Browse files Browse the repository at this point in the history
Update SyncerSyncResult as NegErrorCount so we also track API server
errors from GCE/K8s.
  • Loading branch information
sawsa307 committed May 2, 2023
1 parent 09b5657 commit e019327
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 15 deletions.
46 changes: 46 additions & 0 deletions pkg/neg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ limitations under the License.
package metrics

import (
"errors"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
"google.golang.org/api/googleapi"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
negtypes "k8s.io/ingress-gce/pkg/neg/types"
"k8s.io/ingress-gce/pkg/utils"
)

const (
Expand All @@ -34,6 +39,7 @@ const (
syncerStalenessKey = "syncer_staleness"
epsStalenessKey = "endpointslice_staleness"
degradedModeCorrectnessKey = "degraded_mode_correctness"
negSyncErrorCountKey = "neg_sync_error_count"

resultSuccess = "success"
resultError = "error"
Expand All @@ -43,6 +49,9 @@ const (

NotInDegradedEndpoints = "not_in_degraded_endpoints"
OnlyInDegradedEndpoints = "only_in_degraded_endpoints"

gceServerError = "GCE_server_err"
k8sServerError = "K8s_server_err"
)

type syncType string
Expand Down Expand Up @@ -169,6 +178,16 @@ var (
},
degradedModeCorrectnessLabels,
)

// NegSyncErrorCount tracks the count for internal sync errors and server error(GCE/K8s)
NegSyncErrorCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: negControllerSubsystem,
Name: negSyncErrorCountKey,
Help: "Current count for each sync result",
},
[]string{"error_type"},
)
)

var register sync.Once
Expand All @@ -188,6 +207,7 @@ func RegisterMetrics() {
prometheus.MustRegister(LabelNumber)
prometheus.MustRegister(AnnotationSize)
prometheus.MustRegister(DegradeModeCorrectness)
prometheus.MustRegister(NegSyncErrorCount)

RegisterSyncerMetrics()
})
Expand Down Expand Up @@ -233,9 +253,35 @@ func PublishDegradedModeCorrectnessMetrics(count int, endpointType string, negTy
DegradeModeCorrectness.WithLabelValues(negType, endpointType).Observe(float64(count))
}

func PublishNegSyncErrorCountMetrics(err error) {
if err == nil {
return
}
NegSyncErrorCount.WithLabelValues(getErrorType(err)).Inc()
}

func getResult(err error) string {
if err != nil {
return resultError
}
return resultSuccess
}

func getErrorType(err error) string {
var gerr *googleapi.Error
var k8serr *k8serrors.StatusError
// Here we track API server errors(GCE/K8s).
if errors.As(err, &gerr) || errors.As(err, &k8serr) {
if utils.IsGCEServerError(err) {
return gceServerError
}
if utils.IsK8sServerError(err) {
return k8sServerError
}
return OtherError
}

// Here we track internal sync errors.
syncErr := negtypes.ClassifyError(err)
return string(syncErr.Reason)
}
3 changes: 1 addition & 2 deletions pkg/neg/metrics/neg_metrics_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ func FakeSyncerMetrics() *SyncerMetrics {

// RegisterSyncerMetrics registers syncer related metrics
func RegisterSyncerMetrics() {
prometheus.MustRegister(syncerSyncResult)
prometheus.MustRegister(syncerSyncerState)
}

Expand Down Expand Up @@ -102,8 +101,8 @@ func (sm *SyncerMetrics) UpdateSyncerStatusInMetrics(key negtypes.NegSyncerKey,
if err != nil {
syncErr := negtypes.ClassifyError(err)
reason = syncErr.Reason
PublishNegSyncErrorCountMetrics(syncErr)
}
syncerSyncResult.WithLabelValues(string(reason)).Inc()
sm.mu.Lock()
defer sm.mu.Unlock()
if sm.syncerStatusMap == nil {
Expand Down
13 changes: 0 additions & 13 deletions pkg/neg/metrics/syncer_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,6 @@ import (
)

const (
syncResultLabel = "result"
syncResultKey = "sync_result"

syncerStateLabel = "state"
syncerStateKey = "syncer_state"

Expand All @@ -48,16 +45,6 @@ const (
)

var (
// syncerSyncResult tracks the count for each sync result
syncerSyncResult = prometheus.NewCounterVec(
prometheus.CounterOpts{
Subsystem: negControllerSubsystem,
Name: syncResultKey,
Help: "Current count for each sync result",
},
[]string{syncResultLabel},
)

// syncerSyncerState tracks the count of syncer in different states
syncerSyncerState = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand Down
37 changes: 37 additions & 0 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ const (
// exclude from load balancers created by a cloud provider. This label is deprecated and will
// be removed in 1.18.
LabelAlphaNodeRoleExcludeBalancer = "alpha.service-controller.kubernetes.io/exclude-balancer"

GCEError = "GCE"
K8sError = "K8s"
)

var networkTierErrorRegexp = regexp.MustCompile(`The network tier of external IP is STANDARD|PREMIUM, that of Address must be the same.`)
Expand Down Expand Up @@ -268,6 +271,40 @@ func GetErrorType(err error) string {
return ""
}

// IsGCEServerError returns true if the error is GCE server error
func IsGCEServerError(err error) bool {
if err == nil {
return false
}
var gerr *googleapi.Error
if !errors.As(err, &gerr) {
return false
}
for {
if apiErr, ok := err.(*googleapi.Error); ok {
return apiErr.Code >= http.StatusInternalServerError
}
err = errors.Unwrap(err)
}
}

// IsK8sServerError returns true if the error is K8s server error
func IsK8sServerError(err error) bool {
if err == nil {
return false
}
var k8serr *k8serrors.StatusError
if !errors.As(err, &k8serr) {
return false
}
for {
if apiErr, ok := err.(*k8serrors.StatusError); ok {
return apiErr.ErrStatus.Code >= http.StatusInternalServerError
}
err = errors.Unwrap(err)
}
}

// PrettyJson marshals an object in a human-friendly format.
func PrettyJson(data interface{}) (string, error) {
buffer := new(bytes.Buffer)
Expand Down

0 comments on commit e019327

Please sign in to comment.