From 98c3b9085f6fd1fd36884904238ef0e6f61668e6 Mon Sep 17 00:00:00 2001 From: JeromeJu Date: Fri, 17 Nov 2023 18:19:31 +0000 Subject: [PATCH] Error sweep: refactor steps termination when failing TaskRun This commit refactors the step termination logics once the pod for the TaskRun is marked failed. It adds to the container termination message and complete the prerequisites for differentiating pod termination reason with TaskRunReasons which are currently used to cover container termination reasons. --- pkg/reconciler/taskrun/taskrun.go | 25 ++++++++++++++++--------- pkg/reconciler/taskrun/taskrun_test.go | 9 +++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index a8463fe2b25..7939a38f46f 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -724,25 +724,32 @@ func (c *Reconciler) failTaskRun(ctx context.Context, tr *v1.TaskRun, reason v1. var err error if reason == v1.TaskRunReasonCancelled && (config.FromContextOrDefaults(ctx).FeatureFlags.EnableKeepPodOnCancel && config.FromContextOrDefaults(ctx).FeatureFlags.EnableAPIFields == config.AlphaAPIFields) { - logger.Infof("canceling task run %q by entrypoint", tr.Name) + logger.Infof("Canceling task run %q by entrypoint", tr.Name) err = podconvert.CancelPod(ctx, c.KubeClientSet, tr.Namespace, tr.Status.PodName) } else { err = c.KubeClientSet.CoreV1().Pods(tr.Namespace).Delete(ctx, tr.Status.PodName, metav1.DeleteOptions{}) } if err != nil && !k8serrors.IsNotFound(err) { - logger.Infof("Failed to terminate pod: %v", err) + logger.Errorf("Failed to terminate pod %s: %v", tr.Status.PodName, err) return err } - // Update step states for TaskRun on TaskRun object since pod has been deleted for cancel or timeout + terminateStepsInPod(tr, reason) + return nil +} + +// terminateStepsInPod updates step states for TaskRun on TaskRun object since pod has been deleted for cancel or timeout +func terminateStepsInPod(tr *v1.TaskRun, taskRunReason v1.TaskRunReason) { for i, step := range tr.Status.Steps { // If running, include StartedAt for when step began running if step.Running != nil { step.Terminated = &corev1.ContainerStateTerminated{ ExitCode: 1, StartedAt: step.Running.StartedAt, - FinishedAt: completionTime, - Reason: reason.String(), + FinishedAt: *tr.Status.CompletionTime, + // TODO(#7385): replace with more pod/container termination reason instead of overloading taskRunReason + Reason: taskRunReason.String(), + Message: fmt.Sprintf("Step %s terminated as pod %s is terminated", step.Name, tr.Status.PodName), } step.Running = nil tr.Status.Steps[i] = step @@ -751,15 +758,15 @@ func (c *Reconciler) failTaskRun(ctx context.Context, tr *v1.TaskRun, reason v1. if step.Waiting != nil { step.Terminated = &corev1.ContainerStateTerminated{ ExitCode: 1, - FinishedAt: completionTime, - Reason: reason.String(), + FinishedAt: *tr.Status.CompletionTime, + // TODO(#7385): replace with more pod/container termination reason instead of overloading taskRunReason + Reason: taskRunReason.String(), + Message: fmt.Sprintf("Step %s terminated as pod %s is terminated", step.Name, tr.Status.PodName), } step.Waiting = nil tr.Status.Steps[i] = step } } - - return nil } // createPod creates a Pod based on the Task's configuration, with pvcName as a volumeMount diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index 5c870577793..aaa2d2de0f1 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -4189,6 +4189,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonCancelled.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4234,6 +4235,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonCancelled.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4275,6 +4277,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4331,6 +4334,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4339,6 +4343,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4384,6 +4389,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4392,6 +4398,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4400,6 +4407,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, }, @@ -4453,6 +4461,7 @@ status: Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, Reason: v1.TaskRunReasonTimedOut.String(), + Message: "Step terminated as pod foo-is-bar is terminated", }, }, },