Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: do not fail TaskRun for concurrent modification errors #7467

Merged
merged 1 commit into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,12 @@ func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1.TaskRun) error {
// it has probably evicted. We can return the error, but we consider it a permanent one.
return controller.NewPermanentError(err)
} else if err != nil {
// It is admissible for Pods to fail with concurrentModification errors
// when stopping sideCars. Instead of failing the TaskRun, we shall just
// let the reconciler requeue.
if isConcurrentModificationError(err) {
return controller.NewRequeueAfter(time.Second)
}
logger.Errorf("Error stopping sidecars for TaskRun %q: %v", tr.Name, err)
tr.Status.MarkResourceFailed(v1.TaskRunReasonStopSidecarFailed, err)
}
Expand Down Expand Up @@ -1014,6 +1020,28 @@ func isResourceQuotaConflictError(err error) bool {
return k8ErrStatus.Details != nil && k8ErrStatus.Details.Kind == "resourcequotas"
}

const (
// TODO(#7466) Currently this appears as a local constant due to upstream dependencies bump blocker.
Copy link
Member

@Yongxuanzhang Yongxuanzhang Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

~is 7466 a pr?~
Oh I see, #7466 this one

// This shall reference to k8s.io/apiserver/pkg/registry/generic/registry.OptimisticLockErrorMsg
// once #7464 is unblocked.
optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
)

// isConcurrentModificationError determines whether it is a concurrent
// modification error depending on its error type and error message.
func isConcurrentModificationError(err error) bool {
if !k8serrors.IsConflict(err) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why just checking k8serrors.IsConflict() isn't sufficient and we have to check the exact optimisticLockErrorMsg?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are actual other cases where the NewConflict() errors are thrown i.e. there is an invalid storage error or uid mismatch where there could be a case of an object was missing unexpectedly.

It might be safe to only retry on concurrent modifications for this case IIUC.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, makes sense.

return false
}

var se *k8serrors.StatusError
if !errors.As(err, &se) {
return false
}

return strings.Contains(err.Error(), optimisticLockErrorMsg)
}

// retryTaskRun archives taskRun.Status to taskRun.Status.RetriesStatus, and set
// taskRun status to Unknown with Reason v1.TaskRunReasonToBeRetried.
func retryTaskRun(tr *v1.TaskRun, message string) {
Expand Down
28 changes: 28 additions & 0 deletions pkg/reconciler/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ import (
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
k8sapierrors "k8s.io/apimachinery/pkg/api/errors"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -6282,6 +6283,33 @@ status:
}
}

func TestIsConcurrentModificationError(t *testing.T) {
tcs := []struct {
description string
err error
want bool
}{{
description: "conflict error not concurrent modification",
err: k8serrors.NewConflict(schema.ParseGroupResource("foo"), "bar", errors.New("not concurrent modification")),
want: false,
}, {
description: "concurrent modification error",
err: k8serrors.NewConflict(schema.ParseGroupResource("foo"), "bar", errors.New(optimisticLockErrorMsg)),
want: true,
}, {
description: "not conflict error",
err: k8serrors.NewNotFound(schema.ParseGroupResource("foo"), "bar"),
want: false,
}}
for _, tc := range tcs {
t.Run(tc.description, func(t *testing.T) {
if isConcurrentModificationError(tc.err) != tc.want {
t.Errorf("Unexpected concurrent modification error state")
}
})
}
}

// getResolvedResolutionRequest is a helper function to return the ResolutionRequest and the data is filled with resourceBytes,
// the ResolutionRequest's name is generated by resolverName, namespace and runName.
func getResolvedResolutionRequest(t *testing.T, resolverName string, resourceBytes []byte, namespace string, runName string) resolutionv1beta1.ResolutionRequest {
Expand Down
Loading