Skip to content

Commit

Permalink
Record metrics for temporary error.
Browse files Browse the repository at this point in the history
  • Loading branch information
eipi-one committed Oct 3, 2024
1 parent 2fbd1f1 commit f6d8964
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 1 deletion.
19 changes: 18 additions & 1 deletion pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License.
package metrics

import (
"errors"
"fmt"
"net/http"
"os"
Expand All @@ -23,6 +24,8 @@ import (
"google.golang.org/grpc/status"
"k8s.io/component-base/metrics"
"k8s.io/klog/v2"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/cloud_provider/file"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/common"
)

const (
Expand Down Expand Up @@ -147,7 +150,7 @@ func (mm *MetricsManager) recordComponentVersionMetric() error {
}

func (mm *MetricsManager) RecordOperationMetrics(opErr error, methodName string, filestoreMode string, opDuration time.Duration) {
operationSeconds.WithLabelValues(getErrorCode(opErr), methodName, filestoreMode).Observe(opDuration.Seconds())
operationSeconds.WithLabelValues(errorCodeLabelValue(opErr), methodName, filestoreMode).Observe(opDuration.Seconds())
}

func (mm *MetricsManager) RecordKubeAPIMetrics(opErr error, resourceType, opType, opSource string, opDuration time.Duration) {
Expand Down Expand Up @@ -194,6 +197,20 @@ func (mm *MetricsManager) EmitGKEComponentVersion() error {
return nil
}

// errorCodeLabelValue returns the label value for the given operation error.
func errorCodeLabelValue(operationErr error) string {
err := codes.OK.String()
if operationErr != nil {
// If the operationErr is a TemporaryError, unwrap the temporary error before passing it to CodeForError.
var tempErr *common.TemporaryError
if errors.As(operationErr, &tempErr) {
operationErr = tempErr.Unwrap()
}
err = getErrorCode(file.StatusError(operationErr))
}
return err
}

// Server represents any type that could serve HTTP requests for the metrics
// endpoint.
type Server interface {
Expand Down
83 changes: 83 additions & 0 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
package metrics

import (
"context"
"errors"
"fmt"
"net/http"
"testing"

"google.golang.org/api/googleapi"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"github.com/google/go-cmp/cmp"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/common"
)

const (
Expand All @@ -24,3 +34,76 @@ func TestProcessStartTimeMetricExist(t *testing.T) {

t.Fatalf("Metrics does not contain %v. Scraped content: %v", ProcessStartTimeMetric, metricsFamilies)
}


func TestErrorCodeLabelValue(t *testing.T) {
testCases := []struct {
name string
operationErr error
wantErrorCode string
}{
{
name: "Not googleapi.Error",
operationErr: errors.New("I am not a googleapi.Error"),
wantErrorCode: "Internal",
},
{
name: "User error",
operationErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
wantErrorCode: "InvalidArgument",
},
{
name: "googleapi.Error but not a user error",
operationErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
wantErrorCode: "Internal",
},
{
name: "context canceled error",
operationErr: context.Canceled,
wantErrorCode: "Canceled",
},
{
name: "context deadline exceeded error",
operationErr: context.DeadlineExceeded,
wantErrorCode: "DeadlineExceeded",
},
{
name: "status error with Aborted error code",
operationErr: status.Error(codes.Aborted, "aborted error"),
wantErrorCode: "Aborted",
},
{
name: "user multiattach error",
operationErr: fmt.Errorf("The disk resource 'projects/foo/disk/bar' is already being used by 'projects/foo/instances/1'"),
wantErrorCode: "Internal",
},
{
name: "TemporaryError that wraps googleapi error",
operationErr: common.NewTemporaryError(codes.Unavailable, &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"}),
wantErrorCode: "InvalidArgument",
},
{
name: "TemporaryError that wraps fmt.Errorf, which wraps googleapi error",
operationErr: common.NewTemporaryError(codes.Aborted, fmt.Errorf("got error: %w", &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"})),
wantErrorCode: "InvalidArgument",
},
{
name: "TemporaryError that wraps status error",
operationErr: common.NewTemporaryError(codes.Aborted, status.Error(codes.InvalidArgument, "User error with bad request")),
wantErrorCode: "InvalidArgument",
},
{
name: "TemporaryError that wraps multiattach error",
operationErr: common.NewTemporaryError(codes.Unavailable, fmt.Errorf("The disk resource 'projects/foo/disk/bar' is already being used by 'projects/foo/instances/1'")),
wantErrorCode: "Internal",
},
}

for _, tc := range testCases {
t.Logf("Running test: %v", tc.name)
errCode := errorCodeLabelValue(tc.operationErr)
if diff := cmp.Diff(tc.wantErrorCode, errCode); diff != "" {
t.Errorf("%s: -want err, +got err\n%s", tc.name, diff)
}
}
}

0 comments on commit f6d8964

Please sign in to comment.