From 7adb32cec1007c239e1f840f332480a1423edc8d Mon Sep 17 00:00:00 2001 From: Andrea Frittoli Date: Wed, 12 Jul 2023 12:54:08 +0100 Subject: [PATCH] Fail fast on invalid image The kubernets pod treats an invallid image failures as potentially ephemeral errors, because even if the format of the image reference is not syntactically correct, users may update image without recreating the Pod. Tekton, however, uses Pod to provide workloads that run to completion, and users are not allowed to change the specification of steps during execution. This commits changes the handling of the InvalidImageName pod reason, so that the TaskRun is marked as failed and the Pod deleted. Fixes: #6105 Signed-off-by: Andrea Frittoli --- pkg/reconciler/taskrun/taskrun.go | 28 ++-- pkg/reconciler/taskrun/taskrun_test.go | 180 ++++++++++++------------- 2 files changed, 104 insertions(+), 104 deletions(-) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index 9e32d319ff9..f1aa048bd60 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -92,9 +92,15 @@ type Reconciler struct { tracerProvider trace.TracerProvider } -// Check that our Reconciler implements taskrunreconciler.Interface var ( + // Check that our Reconciler implements taskrunreconciler.Interface _ taskrunreconciler.Interface = (*Reconciler)(nil) + + // Pod failure reasons that trigger failure of the TaskRun + podFailureReasons = map[string]struct{}{ + "ImagePullBackOff": {}, + "InvalidImageName": {}, + } ) // ReconcileKind compares the actual state with the desired, and attempts to @@ -211,17 +217,21 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1.TaskRun) pkgrecon func (c *Reconciler) checkPodFailed(tr *v1.TaskRun) (bool, v1.TaskRunReason, string) { for _, step := range tr.Status.Steps { - if step.Waiting != nil && step.Waiting.Reason == "ImagePullBackOff" { - image := step.ImageID - message := fmt.Sprintf(`The step %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, step.Name, tr.Name, image, step.Waiting.Message) - return true, v1.TaskRunReasonImagePullFailed, message + if step.Waiting != nil { + if _, found := podFailureReasons[step.Waiting.Reason]; found { + image := step.ImageID + message := fmt.Sprintf(`The step %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, step.Name, tr.Name, image, step.Waiting.Message) + return true, v1.TaskRunReasonImagePullFailed, message + } } } for _, sidecar := range tr.Status.Sidecars { - if sidecar.Waiting != nil && sidecar.Waiting.Reason == "ImagePullBackOff" { - image := sidecar.ImageID - message := fmt.Sprintf(`The sidecar %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, sidecar.Name, tr.Name, image, sidecar.Waiting.Message) - return true, v1.TaskRunReasonImagePullFailed, message + if sidecar.Waiting != nil { + if _, found := podFailureReasons[sidecar.Waiting.Reason]; found { + image := sidecar.ImageID + message := fmt.Sprintf(`The sidecar %q in TaskRun %q failed to pull the image %q. The pod errored with the message: "%s."`, sidecar.Name, tr.Name, image, sidecar.Waiting.Message) + return true, v1.TaskRunReasonImagePullFailed, message + } } } return false, "", "" diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index 8a57fb18f95..c65b2b6fc65 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -2314,65 +2314,36 @@ status: } } -func TestReconcilePodFailuresStepImagePullFailed(t *testing.T) { - taskRun := parse.MustParseV1TaskRun(t, ` -metadata: - name: test-imagepull-fail - namespace: foo -spec: - taskSpec: - steps: - - image: whatever -status: - steps: - - container: step-unnamed-0 - name: unnamed-0 - imageID: whatever - waiting: - message: Back-off pulling image "whatever" - reason: ImagePullBackOff - taskSpec: - steps: - - image: whatever -`) - expectedStatus := &apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionFalse, - Reason: "TaskRunImagePullFailed", - Message: `The step "unnamed-0" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "Back-off pulling image "whatever"."`, - } - - wantEvents := []string{ - "Normal Started ", - `Warning Failed The step "unnamed-0" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "Back-off pulling image "whatever".`, - } - d := test.Data{ - TaskRuns: []*v1.TaskRun{taskRun}, - } - testAssets, cancel := getTaskRunController(t, d) - defer cancel() - c := testAssets.Controller - clients := testAssets.Clients - - if err := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(taskRun)); err != nil { - t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err) - } - newTr, err := clients.Pipeline.TektonV1().TaskRuns(taskRun.Namespace).Get(testAssets.Ctx, taskRun.Name, metav1.GetOptions{}) - if err != nil { - t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err) - } - condition := newTr.Status.GetCondition(apis.ConditionSucceeded) - if d := cmp.Diff(expectedStatus, condition, ignoreLastTransitionTime); d != "" { - t.Fatalf("Did not get expected condition %s", diff.PrintWantGot(d)) - } - err = k8sevent.CheckEventsOrdered(t, testAssets.Recorder.Events, taskRun.Name, wantEvents) - if err != nil { - t.Errorf(err.Error()) - } -} - -func TestReconcilePodFailuresSidecarImagePullFailed(t *testing.T) { - taskRun := parse.MustParseV1TaskRun(t, ` +func TestReconcilePodFailuresImageFailures(t *testing.T) { + var stepNumber int8 + for _, tc := range []struct { + desc string + reason string + message string + failure string // "step" or "sidecar" + }{{ + desc: "image pull failed sidecar", + reason: "ImagePullBackOff", + message: "Back-off pulling image \"whatever\"", + failure: "sidecar", + }, { + desc: "invalid image sidecar", + reason: "InvalidImageName", + message: "Invalid image \"whatever\"", + failure: "sidecar", + }, { + desc: "image pull failed step", + reason: "ImagePullBackOff", + message: "Back-off pulling image \"whatever\"", + failure: "step", + }, { + desc: "invalid image step", + reason: "InvalidImageName", + message: "Invalid image \"whatever\"", + failure: "step", + }} { + t.Run(tc.desc, func(t *testing.T) { + taskRun := parse.MustParseV1TaskRun(t, ` metadata: name: test-imagepull-fail namespace: foo @@ -2392,14 +2363,10 @@ status: - container: step-unnamed-1 name: unnamed-1 imageID: whatever - waiting: - message: Back-off pulling image "whatever" - reason: ImagePullBackOff steps: - container: step-unnamed-2 name: unnamed-2 - running: - startedAt: "2022-06-09T10:13:41Z" + imageID: whatever taskSpec: sidecars: - image: ubuntu @@ -2407,39 +2374,62 @@ status: steps: - image: alpine `) - expectedStatus := &apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionFalse, - Reason: "TaskRunImagePullFailed", - Message: `The sidecar "unnamed-1" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "Back-off pulling image "whatever"."`, - } + startTime, _ := time.Parse(time.RFC3339, "2022-06-09T10:13:41Z") + if tc.failure == "step" { + taskRun.Status.Steps[0].Waiting = &corev1.ContainerStateWaiting{ + Message: tc.message, + Reason: tc.reason, + } + taskRun.Status.Sidecars[1].Running = &corev1.ContainerStateRunning{ + StartedAt: metav1.NewTime(startTime), + } + stepNumber = 2 + } else { + taskRun.Status.Sidecars[1].Waiting = &corev1.ContainerStateWaiting{ + Message: tc.message, + Reason: tc.reason, + } + taskRun.Status.Steps[0].Running = &corev1.ContainerStateRunning{ + StartedAt: metav1.NewTime(startTime), + } + stepNumber = 1 + } - wantEvents := []string{ - "Normal Started ", - `Warning Failed The sidecar "unnamed-1" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "Back-off pulling image "whatever".`, - } - d := test.Data{ - TaskRuns: []*v1.TaskRun{taskRun}, - } - testAssets, cancel := getTaskRunController(t, d) - defer cancel() - c := testAssets.Controller - clients := testAssets.Clients + expectedStatus := &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionFalse, + Reason: "TaskRunImagePullFailed", + Message: fmt.Sprintf(`The %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "%s."`, tc.failure, stepNumber, tc.message), + } - if err := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(taskRun)); err != nil { - t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err) - } - newTr, err := clients.Pipeline.TektonV1().TaskRuns(taskRun.Namespace).Get(testAssets.Ctx, taskRun.Name, metav1.GetOptions{}) - if err != nil { - t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err) - } - condition := newTr.Status.GetCondition(apis.ConditionSucceeded) - if d := cmp.Diff(expectedStatus, condition, ignoreLastTransitionTime); d != "" { - t.Fatalf("Did not get expected condition %s", diff.PrintWantGot(d)) - } - err = k8sevent.CheckEventsOrdered(t, testAssets.Recorder.Events, taskRun.Name, wantEvents) - if err != nil { - t.Errorf(err.Error()) + wantEvents := []string{ + "Normal Started ", + fmt.Sprintf(`Warning Failed The %s "unnamed-%d" in TaskRun "test-imagepull-fail" failed to pull the image "whatever". The pod errored with the message: "%s.`, tc.failure, stepNumber, tc.message), + } + d := test.Data{ + TaskRuns: []*v1.TaskRun{taskRun}, + } + testAssets, cancel := getTaskRunController(t, d) + defer cancel() + c := testAssets.Controller + clients := testAssets.Clients + + if err := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(taskRun)); err != nil { + t.Fatalf("Unexpected error when reconciling completed TaskRun : %v", err) + } + newTr, err := clients.Pipeline.TektonV1().TaskRuns(taskRun.Namespace).Get(testAssets.Ctx, taskRun.Name, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Expected completed TaskRun %s to exist but instead got error when getting it: %v", taskRun.Name, err) + } + condition := newTr.Status.GetCondition(apis.ConditionSucceeded) + if d := cmp.Diff(expectedStatus, condition, ignoreLastTransitionTime); d != "" { + t.Fatalf("Did not get expected condition %s", diff.PrintWantGot(d)) + } + err = k8sevent.CheckEventsOrdered(t, testAssets.Recorder.Events, taskRun.Name, wantEvents) + if err != nil { + t.Errorf(err.Error()) + } + }) } }