From 5f07c377db6d0604140d1978fdff214849b014fd Mon Sep 17 00:00:00 2001 From: JeromeJu Date: Thu, 7 Dec 2023 16:14:04 +0000 Subject: [PATCH] Fix: do not fail TaskRun for concurrent modification errors This commit fixes the behaviour that a concurrent modification error when stopping sidecar will fail the TaskRun, which could cause successful Tasks to fail even though it could succeed after retrying. /kind bug --- pkg/reconciler/taskrun/taskrun.go | 28 ++++++++++++++++++++++++++ pkg/reconciler/taskrun/taskrun_test.go | 28 ++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index e4e55371502..b05dfcc5e50 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -280,6 +280,12 @@ func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1.TaskRun) error { // it has probably evicted. We can return the error, but we consider it a permanent one. return controller.NewPermanentError(err) } else if err != nil { + // It is admissible for Pods to fail with concurrentModification errors + // when stopping sideCars. Instead of failing the TaskRun, we shall just + // let the reconciler requeue. + if isConcurrentModificationError(err) { + return controller.NewRequeueAfter(time.Second) + } logger.Errorf("Error stopping sidecars for TaskRun %q: %v", tr.Name, err) tr.Status.MarkResourceFailed(v1.TaskRunReasonStopSidecarFailed, err) } @@ -1014,6 +1020,28 @@ func isResourceQuotaConflictError(err error) bool { return k8ErrStatus.Details != nil && k8ErrStatus.Details.Kind == "resourcequotas" } +const ( + // TODO(#7466) Currently this appears as a local constant due to upstream dependencies bump blocker. + // This shall reference to k8s.io/apiserver/pkg/registry/generic/registry.OptimisticLockErrorMsg + // once #7464 is unblocked. + optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again" +) + +// isConcurrentModificationError determines whether it is a concurrent +// modification error depending on its error type and error message. +func isConcurrentModificationError(err error) bool { + if !k8serrors.IsConflict(err) { + return false + } + + var se *k8serrors.StatusError + if !errors.As(err, &se) { + return false + } + + return strings.Contains(err.Error(), optimisticLockErrorMsg) +} + // retryTaskRun archives taskRun.Status to taskRun.Status.RetriesStatus, and set // taskRun status to Unknown with Reason v1.TaskRunReasonToBeRetried. func retryTaskRun(tr *v1.TaskRun, message string) { diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index 5148ac1b9d6..f5e728b978e 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -65,6 +65,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" k8sapierrors "k8s.io/apimachinery/pkg/api/errors" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -6282,6 +6283,33 @@ status: } } +func TestIsConcurrentModificationError(t *testing.T) { + tcs := []struct { + description string + err error + want bool + }{{ + description: "conflict error not concurrent modification", + err: k8serrors.NewConflict(schema.ParseGroupResource("foo"), "bar", errors.New("not concurrent modification")), + want: false, + }, { + description: "concurrent modification error", + err: k8serrors.NewConflict(schema.ParseGroupResource("foo"), "bar", errors.New(optimisticLockErrorMsg)), + want: true, + }, { + description: "not conflict error", + err: k8serrors.NewNotFound(schema.ParseGroupResource("foo"), "bar"), + want: false, + }} + for _, tc := range tcs { + t.Run(tc.description, func(t *testing.T) { + if isConcurrentModificationError(tc.err) != tc.want { + t.Errorf("Unexpected concurrent modification error state") + } + }) + } +} + // getResolvedResolutionRequest is a helper function to return the ResolutionRequest and the data is filled with resourceBytes, // the ResolutionRequest's name is generated by resolverName, namespace and runName. func getResolvedResolutionRequest(t *testing.T, resolverName string, resourceBytes []byte, namespace string, runName string) resolutionv1beta1.ResolutionRequest {