From 26c4755936df55493881f48d82d83f00a1ba0315 Mon Sep 17 00:00:00 2001 From: Renzo Rojas Silva Date: Fri, 12 Jan 2024 19:36:30 -0500 Subject: [PATCH] Add granular termination reason in container termination message Related with #7539 and #7223 To report specific Steps termination reasons we need to know why its continer finished; we use the termination message to store a new "state" with this information. We are adding a new field to store this information per step. Co-authored-by: JeromeJu <46675578+JeromeJu@users.noreply.github.com> Co-authored-by: Chitrang Patel --- cmd/entrypoint/main.go | 2 +- cmd/entrypoint/waiter.go | 8 +- cmd/entrypoint/waiter_test.go | 4 +- docs/pipeline-api.md | 10 + docs/stepactions.md | 1 + docs/taskruns.md | 2 + pkg/apis/pipeline/v1/openapi_generated.go | 6 + pkg/apis/pipeline/v1/swagger.json | 3 + pkg/apis/pipeline/v1/taskrun_types.go | 1 + .../pipeline/v1beta1/taskrun_conversion.go | 5 + pkg/entrypoint/entrypointer.go | 38 ++- pkg/entrypoint/entrypointer_test.go | 186 ++++++++++- pkg/pod/pod.go | 12 + pkg/pod/status.go | 38 ++- pkg/pod/status_test.go | 208 ++++++++++++- pkg/reconciler/taskrun/taskrun.go | 2 + pkg/reconciler/taskrun/taskrun_test.go | 11 + test/conversion_test.go | 2 + test/propagated_params_test.go | 2 +- test/taskrun_test.go | 293 +++++++++++++++++- test/upgrade_test.go | 2 + 21 files changed, 795 insertions(+), 41 deletions(-) diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go index dead6fa6581..adf7a0a9d4a 100644 --- a/cmd/entrypoint/main.go +++ b/cmd/entrypoint/main.go @@ -178,7 +178,7 @@ func main() { if err := e.Go(); err != nil { breakpointExitPostFile := e.PostFile + breakpointExitSuffix switch t := err.(type) { //nolint:errorlint // checking for multiple types with errors.As is ugly. - case skipError: + case entrypoint.SkipError: log.Print("Skipping step because a previous step failed") os.Exit(1) case termination.MessageLengthError: diff --git a/cmd/entrypoint/waiter.go b/cmd/entrypoint/waiter.go index 656b015af74..3de2a3bc5f5 100644 --- a/cmd/entrypoint/waiter.go +++ b/cmd/entrypoint/waiter.go @@ -71,7 +71,7 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool, if breakpointOnFailure { return nil } - return skipError("error file present, bail and skip the step") + return entrypoint.ErrSkipPreviousStepFailed } select { case <-ctx.Done(): @@ -86,9 +86,3 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool, } } } - -type skipError string - -func (e skipError) Error() string { - return string(e) -} diff --git a/cmd/entrypoint/waiter_test.go b/cmd/entrypoint/waiter_test.go index c8d2b016a30..2f7500defd5 100644 --- a/cmd/entrypoint/waiter_test.go +++ b/cmd/entrypoint/waiter_test.go @@ -153,7 +153,7 @@ func TestRealWaiterWaitWithErrorWaitfile(t *testing.T) { if err == nil { t.Errorf("expected skipError upon encounter error waitfile") } - var skipErr skipError + var skipErr entrypoint.SkipError if errors.As(err, &skipErr) { close(doneCh) } else { @@ -292,7 +292,7 @@ func TestRealWaiterWaitContextWithErrorWaitfile(t *testing.T) { if err == nil { t.Errorf("expected skipError upon encounter error waitfile") } - var skipErr skipError + var skipErr entrypoint.SkipError if errors.As(err, &skipErr) { close(doneCh) } else { diff --git a/docs/pipeline-api.md b/docs/pipeline-api.md index 4d09b848f60..c2f4e3a750a 100644 --- a/docs/pipeline-api.md +++ b/docs/pipeline-api.md @@ -4642,6 +4642,16 @@ string + + +terminationReason
+ +string + + + + +

StepTemplate diff --git a/docs/stepactions.md b/docs/stepactions.md index 8dc3ed91c5b..4c9962e8f7f 100644 --- a/docs/stepactions.md +++ b/docs/stepactions.md @@ -411,6 +411,7 @@ status: - container: step-action-runner imageID: docker.io/library/alpine@sha256:eece025e432126ce23f223450a0326fbebde39cdf496a85d8c016293fc851978 name: action-runner + terminationReason: Completed terminated: containerID: containerd://46a836588967202c05b594696077b147a0eb0621976534765478925bb7ce57f6 exitCode: 0 diff --git a/docs/taskruns.md b/docs/taskruns.md index 6db9c375f1b..1e161902048 100644 --- a/docs/taskruns.md +++ b/docs/taskruns.md @@ -799,6 +799,7 @@ The `status` field defines the observed state of `TaskRun` - `refSource`: the source from where a remote `Task` definition was fetched. - `featureFlags`: Identifies the feature flags used during the `TaskRun`. - `steps` - Contains the `state` of each `step` container. + - `steps[].terminationReason` - When the step is terminated, it stores the step's final state. - `retriesStatus` - Contains the history of `TaskRun`'s `status` in case of a retry in order to keep record of failures. No `status` stored within `retriesStatus` will have any `date` within as it is redundant. - [`sidecars`](tasks.md#using-a-sidecar-in-a-task) - This field is a list. The list has one entry per `sidecar` in the manifest. Each entry represents the imageid of the corresponding sidecar. @@ -831,6 +832,7 @@ steps: - container: step-hello imageID: docker-pullable://busybox@sha256:895ab622e92e18d6b461d671081757af7dbaa3b00e3e28e12505af7817f73649 name: hello + terminationReason: Completed terminated: containerID: docker://d5a54f5bbb8e7a6fd3bc7761b78410403244cf4c9c5822087fb0209bf59e3621 exitCode: 0 diff --git a/pkg/apis/pipeline/v1/openapi_generated.go b/pkg/apis/pipeline/v1/openapi_generated.go index c6a73d7afc3..d6f61ef5faf 100644 --- a/pkg/apis/pipeline/v1/openapi_generated.go +++ b/pkg/apis/pipeline/v1/openapi_generated.go @@ -3142,6 +3142,12 @@ func schema_pkg_apis_pipeline_v1_StepState(ref common.ReferenceCallback) common. }, }, }, + "terminationReason": { + SchemaProps: spec.SchemaProps{ + Type: []string{"string"}, + Format: "", + }, + }, }, }, }, diff --git a/pkg/apis/pipeline/v1/swagger.json b/pkg/apis/pipeline/v1/swagger.json index 1424609e3b6..397a1ae46e1 100644 --- a/pkg/apis/pipeline/v1/swagger.json +++ b/pkg/apis/pipeline/v1/swagger.json @@ -1609,6 +1609,9 @@ "description": "Details about a terminated container", "$ref": "#/definitions/v1.ContainerStateTerminated" }, + "terminationReason": { + "type": "string" + }, "waiting": { "description": "Details about a waiting container", "$ref": "#/definitions/v1.ContainerStateWaiting" diff --git a/pkg/apis/pipeline/v1/taskrun_types.go b/pkg/apis/pipeline/v1/taskrun_types.go index 7c3cf232ee5..3454fa4ebdb 100644 --- a/pkg/apis/pipeline/v1/taskrun_types.go +++ b/pkg/apis/pipeline/v1/taskrun_types.go @@ -357,6 +357,7 @@ type StepState struct { Container string `json:"container,omitempty"` ImageID string `json:"imageID,omitempty"` Results []TaskRunStepResult `json:"results,omitempty"` + TerminationReason string `json:"terminationReason,omitempty"` } // SidecarState reports the results of running a sidecar in a Task. diff --git a/pkg/apis/pipeline/v1beta1/taskrun_conversion.go b/pkg/apis/pipeline/v1beta1/taskrun_conversion.go index de2243f25a2..9706fbdb041 100644 --- a/pkg/apis/pipeline/v1beta1/taskrun_conversion.go +++ b/pkg/apis/pipeline/v1beta1/taskrun_conversion.go @@ -344,6 +344,11 @@ func (ss StepState) convertTo(ctx context.Context, sink *v1.StepState) { sink.Container = ss.ContainerName sink.ImageID = ss.ImageID sink.Results = nil + + if ss.ContainerState.Terminated != nil { + sink.TerminationReason = ss.ContainerState.Terminated.Reason + } + for _, r := range ss.Results { new := v1.TaskRunStepResult{} r.convertTo(ctx, &new) diff --git a/pkg/entrypoint/entrypointer.go b/pkg/entrypoint/entrypointer.go index d27d3554682..a1d4893c8ee 100644 --- a/pkg/entrypoint/entrypointer.go +++ b/pkg/entrypoint/entrypointer.go @@ -55,11 +55,19 @@ func (e ContextError) Error() string { return string(e) } +type SkipError string + +func (e SkipError) Error() string { + return string(e) +} + var ( // ErrContextDeadlineExceeded is the error returned when the context deadline is exceeded ErrContextDeadlineExceeded = ContextError(context.DeadlineExceeded.Error()) // ErrContextCanceled is the error returned when the context is canceled ErrContextCanceled = ContextError(context.Canceled.Error()) + // ErrSkipPreviousStepFailed is the error returned when the step is skipped due to previous step error + ErrSkipPreviousStepFailed = SkipError("error file present, bail and skip the step") ) // IsContextDeadlineError determine whether the error is context deadline @@ -167,6 +175,11 @@ func (e Entrypointer) Go() error { Value: time.Now().Format(timeFormat), ResultType: result.InternalTektonResultType, }) + + if errors.Is(err, ErrSkipPreviousStepFailed) { + output = append(output, e.outputRunResult(pod.TerminationReasonSkipped)) + } + return err } } @@ -199,26 +212,18 @@ func (e Entrypointer) Go() error { } }() err = e.Runner.Run(ctx, e.Command...) - if errors.Is(err, ErrContextDeadlineExceeded) { - output = append(output, result.RunResult{ - Key: "Reason", - Value: "TimeoutExceeded", - ResultType: result.InternalTektonResultType, - }) - } } var ee *exec.ExitError switch { case err != nil && errors.Is(err, ErrContextCanceled): logger.Info("Step was canceling") - output = append(output, result.RunResult{ - Key: "Reason", - Value: "Cancelled", - ResultType: result.InternalTektonResultType, - }) + output = append(output, e.outputRunResult(pod.TerminationReasonCancelled)) e.WritePostFile(e.PostFile, ErrContextCanceled) e.WriteExitCodeFile(e.StepMetadataDir, syscall.SIGKILL.String()) + case errors.Is(err, ErrContextDeadlineExceeded): + e.WritePostFile(e.PostFile, err) + output = append(output, e.outputRunResult(pod.TerminationReasonTimeoutExceeded)) case err != nil && e.BreakpointOnFailure: logger.Info("Skipping writing to PostFile") case e.OnError == ContinueOnError && errors.As(err, &ee): @@ -453,3 +458,12 @@ func (e *Entrypointer) applyStepResultSubstitutions(stepDir string) error { e.Command = newCommand return nil } + +// outputRunResult returns the run reason for a termination +func (e Entrypointer) outputRunResult(terminationReason string) result.RunResult { + return result.RunResult{ + Key: "Reason", + Value: terminationReason, + ResultType: result.InternalTektonResultType, + } +} diff --git a/pkg/entrypoint/entrypointer_test.go b/pkg/entrypoint/entrypointer_test.go index 159a809fa64..29d3563ef0c 100644 --- a/pkg/entrypoint/entrypointer_test.go +++ b/pkg/entrypoint/entrypointer_test.go @@ -901,29 +901,203 @@ func TestIsContextCanceledError(t *testing.T) { } } +func TestTerminationReason(t *testing.T) { + tests := []struct { + desc string + waitFiles []string + onError string + runError error + expectedRunErr error + expectedExitCode *string + expectedWrotefile *string + expectedStatus []result.RunResult + }{ + { + desc: "reason completed", + expectedExitCode: ptr("0"), + expectedWrotefile: ptr("postfile"), + expectedStatus: []result.RunResult{ + { + Key: "StartedAt", + ResultType: result.InternalTektonResultType, + }, + }, + }, + { + desc: "reason continued", + onError: ContinueOnError, + runError: ptr(exec.ExitError{}), + expectedRunErr: ptr(exec.ExitError{}), + expectedExitCode: ptr("-1"), + expectedWrotefile: ptr("postfile"), + expectedStatus: []result.RunResult{ + { + Key: "ExitCode", + Value: "-1", + ResultType: result.InternalTektonResultType, + }, + { + Key: "StartedAt", + ResultType: result.InternalTektonResultType, + }, + }, + }, + { + desc: "reason errored", + runError: ptr(exec.Error{}), + expectedRunErr: ptr(exec.Error{}), + expectedWrotefile: ptr("postfile.err"), + expectedStatus: []result.RunResult{ + { + Key: "StartedAt", + ResultType: result.InternalTektonResultType, + }, + }, + }, + { + desc: "reason timedout", + runError: ErrContextDeadlineExceeded, + expectedRunErr: ErrContextDeadlineExceeded, + expectedWrotefile: ptr("postfile.err"), + expectedStatus: []result.RunResult{ + { + Key: "Reason", + Value: pod.TerminationReasonTimeoutExceeded, + ResultType: result.InternalTektonResultType, + }, + { + Key: "StartedAt", + ResultType: result.InternalTektonResultType, + }, + }, + }, + { + desc: "reason skipped", + waitFiles: []string{"file"}, + expectedRunErr: ErrSkipPreviousStepFailed, + expectedWrotefile: ptr("postfile.err"), + expectedStatus: []result.RunResult{ + { + Key: "Reason", + Value: pod.TerminationReasonSkipped, + ResultType: result.InternalTektonResultType, + }, + { + Key: "StartedAt", + ResultType: result.InternalTektonResultType, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + fw, fr, fpw := &fakeWaiter{skipStep: true}, &fakeRunner{runError: test.runError}, &fakePostWriter{} + + tmpFolder, err := os.MkdirTemp("", "") + if err != nil { + t.Fatalf("unexpected error creating temporary folder: %v", err) + } else { + defer os.RemoveAll(tmpFolder) + } + + terminationFile, err := os.CreateTemp(tmpFolder, "termination") + if err != nil { + t.Fatalf("unexpected error creating termination file: %v", err) + } + + e := Entrypointer{ + Command: append([]string{}, []string{}...), + WaitFiles: test.waitFiles, + PostFile: "postfile", + Waiter: fw, + Runner: fr, + PostWriter: fpw, + TerminationPath: terminationFile.Name(), + BreakpointOnFailure: false, + StepMetadataDir: tmpFolder, + OnError: test.onError, + } + + err = e.Go() + + if d := cmp.Diff(test.expectedRunErr, err); d != "" { + t.Fatalf("entrypoint error doesn't match %s", diff.PrintWantGot(d)) + } + + if d := cmp.Diff(test.expectedExitCode, fpw.exitCode); d != "" { + t.Fatalf("exitCode doesn't match %s", diff.PrintWantGot(d)) + } + + if d := cmp.Diff(test.expectedWrotefile, fpw.wrote); d != "" { + t.Fatalf("wrote file doesn't match %s", diff.PrintWantGot(d)) + } + + termination, err := getTermination(t, terminationFile.Name()) + if err != nil { + t.Fatalf("error getting termination output: %v", err) + } + + if d := cmp.Diff(test.expectedStatus, termination); d != "" { + t.Fatalf("termination status doesn't match %s", diff.PrintWantGot(d)) + } + }) + } +} + +func getTermination(t *testing.T, terminationFile string) ([]result.RunResult, error) { + t.Helper() + fileContents, err := os.ReadFile(terminationFile) + if err != nil { + return nil, err + } + + logger, _ := logging.NewLogger("", "status") + terminationStatus, err := termination.ParseMessage(logger, string(fileContents)) + if err != nil { + return nil, err + } + + for i, termination := range terminationStatus { + if termination.Key == "StartedAt" { + terminationStatus[i].Value = "" + } + } + + return terminationStatus, nil +} + type fakeWaiter struct { sync.Mutex waited []string waitCancelDuration time.Duration + skipStep bool } func (f *fakeWaiter) Wait(ctx context.Context, file string, _ bool, _ bool) error { - if file == pod.DownwardMountCancelFile && f.waitCancelDuration > 0 { + switch { + case file == pod.DownwardMountCancelFile && f.waitCancelDuration > 0: time.Sleep(f.waitCancelDuration) - } else if file == pod.DownwardMountCancelFile { + case file == pod.DownwardMountCancelFile: return nil + case f.skipStep: + return ErrSkipPreviousStepFailed } + f.Lock() f.waited = append(f.waited, file) f.Unlock() return nil } -type fakeRunner struct{ args *[]string } +type fakeRunner struct { + args *[]string + runError error +} func (f *fakeRunner) Run(ctx context.Context, args ...string) error { f.args = &args - return nil + return f.runError } type fakePostWriter struct { @@ -1057,3 +1231,7 @@ func getMockSpireClient(ctx context.Context) (spire.EntrypointerAPIClient, spire return sc, sc, tr } + +func ptr[T any](value T) *T { + return &value +} diff --git a/pkg/pod/pod.go b/pkg/pod/pod.go index 709696aff92..d63111db427 100644 --- a/pkg/pod/pod.go +++ b/pkg/pod/pod.go @@ -61,6 +61,18 @@ const ( // osSelectorLabel is the label Kubernetes uses for OS-specific workloads (https://kubernetes.io/docs/reference/labels-annotations-taints/#kubernetes-io-os) osSelectorLabel = "kubernetes.io/os" + + // TerminationReasonTimeoutExceeded indicates a step execution timed out. + TerminationReasonTimeoutExceeded = "TimeoutExceeded" + + // TerminationReasonSkipped indicates a step execution was skipped due to previous step failed. + TerminationReasonSkipped = "Skipped" + + // TerminationReasonContinued indicates a step errored but was ignored since onError was set to continue. + TerminationReasonContinued = "Continued" + + // TerminationReasonCancelled indicates a step was cancelled. + TerminationReasonCancelled = "Cancelled" ) // These are effectively const, but Go doesn't have such an annotation. diff --git a/pkg/pod/status.go b/pkg/pod/status.go index 2a5351763ac..2b1cff90d40 100644 --- a/pkg/pod/status.go +++ b/pkg/pod/status.go @@ -272,6 +272,7 @@ func setTaskRunStatusBasedOnStepStatus(ctx context.Context, logger *zap.SugaredL } // Parse termination messages + terminationReason := "" if state.Terminated != nil && len(state.Terminated.Message) != 0 { msg := state.Terminated.Message @@ -311,14 +312,18 @@ func setTaskRunStatusBasedOnStepStatus(ctx context.Context, logger *zap.SugaredL if exitCode != nil { state.Terminated.ExitCode = *exitCode } + + terminationFromResults := extractTerminationReasonFromResults(results) + terminationReason = getTerminationReason(state.Terminated.Reason, terminationFromResults, exitCode) } } trs.Steps = append(trs.Steps, v1.StepState{ - ContainerState: *state, - Name: trimStepPrefix(s.Name), - Container: s.Name, - ImageID: s.ImageID, - Results: taskRunStepResults, + ContainerState: *state, + Name: trimStepPrefix(s.Name), + Container: s.Name, + ImageID: s.ImageID, + Results: taskRunStepResults, + TerminationReason: terminationReason, }) } @@ -488,6 +493,27 @@ func extractExitCodeFromResults(results []result.RunResult) (*int32, error) { return nil, nil //nolint:nilnil // would be more ergonomic to return a sentinel error } +func extractTerminationReasonFromResults(results []result.RunResult) string { + for _, r := range results { + if r.ResultType == result.InternalTektonResultType && r.Key == "Reason" { + return r.Value + } + } + return "" +} + +func getTerminationReason(terminatedStateReason string, terminationFromResults string, exitCodeFromResults *int32) string { + if terminationFromResults != "" { + return terminationFromResults + } + + if exitCodeFromResults != nil { + return TerminationReasonContinued + } + + return terminatedStateReason +} + func updateCompletedTaskRunStatus(logger *zap.SugaredLogger, trs *v1.TaskRunStatus, pod *corev1.Pod, onError v1.PipelineTaskOnErrorType) { if DidTaskRunFail(pod) { msg := getFailureMessage(logger, pod) @@ -612,7 +638,7 @@ func extractContainerFailureMessage(logger *zap.SugaredLogger, status corev1.Con msg := status.State.Terminated.Message r, _ := termination.ParseMessage(logger, msg) for _, runResult := range r { - if runResult.ResultType == result.InternalTektonResultType && runResult.Key == "Reason" && runResult.Value == "TimeoutExceeded" { + if runResult.ResultType == result.InternalTektonResultType && runResult.Key == "Reason" && runResult.Value == TerminationReasonTimeoutExceeded { return fmt.Sprintf("%q exited because the step exceeded the specified timeout limit", status.Name) } } diff --git a/pkg/pod/status_test.go b/pkg/pod/status_test.go index aa35cd05042..5686acbf0e0 100644 --- a/pkg/pod/status_test.go +++ b/pkg/pod/status_test.go @@ -1332,8 +1332,9 @@ func TestMakeTaskRunStatus(t *testing.T) { ExitCode: 11, }, }, - Name: "first", - Container: "step-first", + TerminationReason: "Continued", + Name: "first", + Container: "step-first", }, { ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ @@ -2341,6 +2342,209 @@ func TestGetStepResultsFromSidecarLogs_Error(t *testing.T) { } } +func TestGetStepTerminationReasonFromContainerStatus(t *testing.T) { + tests := []struct { + desc string + pod corev1.Pod + expectedTerminationReason map[string]string + }{ + { + desc: "Step skipped", + expectedTerminationReason: map[string]string{ + "step-1": "Skipped", + }, + pod: corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "step-1"}, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "step-1", + ImageID: "image-id-1", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `[{"key":"StartedAt","value":"2023-11-26T19:53:29.452Z","type":3},{"key":"Reason","value":"Skipped","type":3}]`, + ExitCode: 1, + Reason: "Error", + }, + }, + }, + }, + }, + }, + }, + { + desc: "Step continued", + expectedTerminationReason: map[string]string{ + "step-1": "Continued", + }, + pod: corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "step-1"}, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "step-1", + ImageID: "image-id-1", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `[{"key":"StartedAt","value":"2023-11-26T19:53:29.452Z","type":3},{"key":"ExitCode","value":"1","type":3}]`, + ExitCode: 0, + Reason: "Completed", + }, + }, + }, + }, + }, + }, + }, + { + desc: "Step timedout", + expectedTerminationReason: map[string]string{ + "step-1": "TimeoutExceeded", + }, + pod: corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "step-1"}, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "step-1", + ImageID: "image-id-1", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `[{"key":"StartedAt","value":"2023-11-26T19:53:29.452Z","type":3},{"key":"Reason","value":"TimeoutExceeded","type":3}]`, + ExitCode: 1, + Reason: "Error", + }, + }, + }, + }, + }, + }, + }, + { + desc: "Step completed", + expectedTerminationReason: map[string]string{ + "step-1": "Completed", + }, + pod: corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "step-1"}, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "step-1", + ImageID: "image-id-1", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `[{"key":"StartedAt","value":"2023-11-26T19:53:29.452Z","type":3}]`, + ExitCode: 0, + Reason: "Completed", + }, + }, + }, + }, + }, + }, + }, + { + desc: "Step error", + expectedTerminationReason: map[string]string{ + "step-1": "Error", + }, + pod: corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + {Name: "step-1"}, + }, + }, + Status: corev1.PodStatus{ + Phase: corev1.PodFailed, + ContainerStatuses: []corev1.ContainerStatus{ + { + Name: "step-1", + ImageID: "image-id-1", + State: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + Message: `[{"key":"StartedAt","value":"2023-11-26T19:53:29.452Z","type":3}]`, + ExitCode: 1, + Reason: "Error", + }, + }, + }, + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + startTime := time.Date(2010, 1, 1, 1, 1, 1, 1, time.UTC) + tr := v1.TaskRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "task-run", + }, + Status: v1.TaskRunStatus{ + TaskRunStatusFields: v1.TaskRunStatusFields{ + StartTime: &metav1.Time{Time: startTime}, + }, + }, + } + logger, _ := logging.NewLogger("", "status") + kubeclient := fakek8s.NewSimpleClientset() + + trs, err := MakeTaskRunStatus(context.Background(), logger, tr, &test.pod, kubeclient, &v1.TaskSpec{}) + if err != nil { + t.Errorf("MakeTaskRunResult: %s", err) + } + + for _, step := range trs.Steps { + if step.TerminationReason == "" { + t.Errorf("Terminated status not found for %s", step.Name) + } + want := test.expectedTerminationReason[step.Container] + got := step.TerminationReason + if d := cmp.Diff(want, got, ignoreVolatileTime); d != "" { + t.Errorf("Diff %s", diff.PrintWantGot(d)) + } + } + }) + } +} + func TestGetTaskResultsFromSidecarLogs(t *testing.T) { sidecarLogResults := []result.RunResult{{ Key: "step-foo.step-res", diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index 09ca1b7a151..add42af3a30 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -756,6 +756,7 @@ func terminateStepsInPod(tr *v1.TaskRun, taskRunReason v1.TaskRunReason) { Reason: taskRunReason.String(), Message: fmt.Sprintf("Step %s terminated as pod %s is terminated", step.Name, tr.Status.PodName), } + step.TerminationReason = taskRunReason.String() step.Running = nil tr.Status.Steps[i] = step } @@ -768,6 +769,7 @@ func terminateStepsInPod(tr *v1.TaskRun, taskRunReason v1.TaskRunReason) { Reason: taskRunReason.String(), Message: fmt.Sprintf("Step %s terminated as pod %s is terminated", step.Name, tr.Status.PodName), } + step.TerminationReason = taskRunReason.String() step.Waiting = nil tr.Status.Steps[i] = step } diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index c681f8ae04b..03864f320d4 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -4618,6 +4618,7 @@ status: }, expectedStepStates: []v1.StepState{ { + TerminationReason: v1.TaskRunReasonCancelled.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4664,6 +4665,7 @@ status: }, expectedStepStates: []v1.StepState{ { + TerminationReason: v1.TaskRunReasonCancelled.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4706,6 +4708,7 @@ status: }, expectedStepStates: []v1.StepState{ { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4763,6 +4766,7 @@ status: }, }, { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4772,6 +4776,7 @@ status: }, }, { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4818,6 +4823,7 @@ status: }, expectedStepStates: []v1.StepState{ { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4827,6 +4833,7 @@ status: }, }, { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4836,6 +4843,7 @@ status: }, }, { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, @@ -4865,6 +4873,7 @@ status: finishedAt: "2022-01-01T00:00:00Z" reason: Completed startedAt: "2022-01-01T00:00:00Z" + terminationReason: Completed - running: startedAt: "2022-01-01T00:00:00Z" `), @@ -4882,6 +4891,7 @@ status: }, expectedStepStates: []v1.StepState{ { + TerminationReason: "Completed", ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 12, @@ -4890,6 +4900,7 @@ status: }, }, { + TerminationReason: v1.TaskRunReasonTimedOut.String(), ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 1, diff --git a/test/conversion_test.go b/test/conversion_test.go index 9977390de9e..1bbe8d3e200 100644 --- a/test/conversion_test.go +++ b/test/conversion_test.go @@ -468,6 +468,7 @@ status: steps: - container: step-echo name: step-echo + terminationReason: Completed terminated: reason: Completed ` @@ -734,6 +735,7 @@ status: - image: alpine name: hello script: 'echo Hello' + terminationReason: Completed terminated: reason: Completed ` diff --git a/test/propagated_params_test.go b/test/propagated_params_test.go index 2a1824d1771..727f496b901 100644 --- a/test/propagated_params_test.go +++ b/test/propagated_params_test.go @@ -43,7 +43,7 @@ var ( ignoreTaskRunStatus = cmpopts.IgnoreFields(v1.TaskRunStatusFields{}, "StartTime", "CompletionTime") ignoreConditions = cmpopts.IgnoreFields(duckv1.Status{}, "Conditions") ignoreContainerStates = cmpopts.IgnoreFields(corev1.ContainerState{}, "Terminated") - ignoreStepState = cmpopts.IgnoreFields(v1.StepState{}, "ImageID") + ignoreStepState = cmpopts.IgnoreFields(v1.StepState{}, "ImageID", "TerminationReason") // ignoreSATaskRunSpec ignores the service account in the TaskRunSpec as it may differ across platforms ignoreSATaskRunSpec = cmpopts.IgnoreFields(v1.TaskRunSpec{}, "ServiceAccountName") // ignoreSAPipelineRunSpec ignores the service account in the PipelineRunSpec as it may differ across platforms diff --git a/test/taskrun_test.go b/test/taskrun_test.go index f772c53eff1..fe38c6ae33c 100644 --- a/test/taskrun_test.go +++ b/test/taskrun_test.go @@ -21,6 +21,7 @@ package test import ( "context" + "encoding/json" "fmt" "regexp" "strings" @@ -31,8 +32,10 @@ import ( v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" "github.com/tektoncd/pipeline/pkg/pod" "github.com/tektoncd/pipeline/test/parse" + jsonpatch "gomodules.xyz/jsonpatch/v2" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" knativetest "knative.dev/pkg/test" "knative.dev/pkg/test/helpers" ) @@ -99,8 +102,9 @@ spec: Reason: "Completed", }, }, - Name: "unnamed-0", - Container: "step-unnamed-0", + TerminationReason: "Completed", + Name: "unnamed-0", + Container: "step-unnamed-0", }, { ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ @@ -108,8 +112,9 @@ spec: Reason: "Error", }, }, - Name: "unnamed-1", - Container: "step-unnamed-1", + TerminationReason: "Error", + Name: "unnamed-1", + Container: "step-unnamed-1", }, { ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ @@ -117,8 +122,9 @@ spec: Reason: "Error", }, }, - Name: "unnamed-2", - Container: "step-unnamed-2", + TerminationReason: "Skipped", + Name: "unnamed-2", + Container: "step-unnamed-2", }} ignoreTerminatedFields := cmpopts.IgnoreFields(corev1.ContainerStateTerminated{}, "StartedAt", "FinishedAt", "ContainerID") ignoreStepFields := cmpopts.IgnoreFields(v1.StepState{}, "ImageID") @@ -185,6 +191,7 @@ spec: } expectedStepState := []v1.StepState{{ + TerminationReason: "Completed", ContainerState: corev1.ContainerState{ Terminated: &corev1.ContainerStateTerminated{ ExitCode: 0, @@ -209,3 +216,277 @@ spec: t.Fatalf("-got, +want: %v", d) } } + +func TestTaskRunStepsTerminationReasons(t *testing.T) { + ctx := context.Background() + c, namespace := setup(ctx, t) + defer tearDown(ctx, t, c, namespace) + fqImageName := getTestImage(busyboxImage) + + tests := []struct { + description string + shouldSucceed bool + taskRun string + shouldCancel bool + expectedStepStatus []v1.StepState + }{ + { + description: "termination completed", + shouldSucceed: true, + taskRun: ` +metadata: + name: %v + namespace: %v +spec: + taskSpec: + steps: + - image: %v + name: first + command: ['/bin/sh'] + args: ['-c', 'echo hello']`, + expectedStepStatus: []v1.StepState{ + { + Container: "step-first", + Name: "first", + TerminationReason: "Completed", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 0, + Reason: "Completed", + }, + }, + }, + }, + }, + { + description: "termination continued", + shouldSucceed: true, + taskRun: ` +metadata: + name: %v + namespace: %v +spec: + taskSpec: + steps: + - image: %v + onError: continue + name: first + command: ['/bin/sh'] + args: ['-c', 'echo hello; exit 1']`, + expectedStepStatus: []v1.StepState{ + { + Container: "step-first", + Name: "first", + TerminationReason: "Continued", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "Completed", + }, + }, + }, + }, + }, + { + description: "termination errored", + shouldSucceed: false, + taskRun: ` +metadata: + name: %v + namespace: %v +spec: + taskSpec: + steps: + - image: %v + name: first + command: ['/bin/sh'] + args: ['-c', 'echo hello; exit 1']`, + expectedStepStatus: []v1.StepState{ + { + Container: "step-first", + Name: "first", + TerminationReason: "Error", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "Error", + }, + }, + }, + }, + }, + { + description: "termination timedout", + shouldSucceed: false, + taskRun: ` +metadata: + name: %v + namespace: %v +spec: + taskSpec: + steps: + - image: %v + name: first + timeout: 1s + command: ['/bin/sh'] + args: ['-c', 'echo hello; sleep 5s']`, + expectedStepStatus: []v1.StepState{ + { + Container: "step-first", + Name: "first", + TerminationReason: "TimeoutExceeded", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "Error", + }, + }, + }, + }, + }, + { + description: "termination skipped", + shouldSucceed: false, + taskRun: ` +metadata: + name: %v + namespace: %v +spec: + taskSpec: + steps: + - image: %v + name: first + command: ['/bin/sh'] + args: ['-c', 'echo hello; exit 1'] + - image: %v + name: second + command: ['/bin/sh'] + args: ['-c', 'echo hello']`, + expectedStepStatus: []v1.StepState{ + { + Container: "step-first", + Name: "first", + TerminationReason: "Error", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "Error", + }, + }, + }, + { + Container: "step-second", + Name: "second", + TerminationReason: "Skipped", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "Error", + }, + }, + }, + }, + }, + { + description: "termination cancelled", + shouldSucceed: false, + shouldCancel: true, + taskRun: ` +metadata: + name: %v + namespace: %v +spec: + taskSpec: + steps: + - image: %v + name: first + command: ['/bin/sh'] + args: ['-c', 'sleep infinity; echo hello']`, + expectedStepStatus: []v1.StepState{ + { + Container: "step-first", + Name: "first", + TerminationReason: "TaskRunCancelled", + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + ExitCode: 1, + Reason: "TaskRunCancelled", + }, + }, + }, + }, + }, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + taskRunName := helpers.ObjectNameForTest(t) + values := []interface{}{taskRunName, namespace} + for range test.expectedStepStatus { + values = append(values, fqImageName) + } + taskRunYaml := fmt.Sprintf(test.taskRun, values...) + taskRun := parse.MustParseV1TaskRun(t, taskRunYaml) + + if _, err := c.V1TaskRunClient.Create(ctx, taskRun, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create TaskRun: %s", err) + } + + expectedTaskRunState := TaskRunFailed(taskRunName) + finalStatus := "Failed" + if test.shouldSucceed { + expectedTaskRunState = TaskRunSucceed(taskRunName) + finalStatus = "Succeeded" + } + + if test.shouldCancel { + expectedTaskRunState = FailedWithReason("TaskRunCancelled", taskRunName) + if err := cancelTaskRun(t, ctx, taskRunName, c); err != nil { + t.Fatalf("Error cancelling taskrun: %s", err) + } + } + + err := WaitForTaskRunState(ctx, c, taskRunName, expectedTaskRunState, finalStatus, v1Version) + if err != nil { + t.Fatalf("Error waiting for TaskRun to finish: %s", err) + } + + taskRunState, err := c.V1TaskRunClient.Get(ctx, taskRunName, metav1.GetOptions{}) + if err != nil { + t.Fatalf("Couldn't get expected TaskRun %s: %s", taskRunName, err) + } + + ignoreTerminatedFields := cmpopts.IgnoreFields(corev1.ContainerStateTerminated{}, "StartedAt", "FinishedAt", "ContainerID", "Message") + ignoreStepFields := cmpopts.IgnoreFields(v1.StepState{}, "ImageID") + if d := cmp.Diff(taskRunState.Status.Steps, test.expectedStepStatus, ignoreTerminatedFields, ignoreStepFields); d != "" { + t.Fatalf("-got, +want: %v", d) + } + }) + } +} + +func cancelTaskRun(t *testing.T, ctx context.Context, taskRunName string, c *clients) error { + t.Helper() + + err := WaitForTaskRunState(ctx, c, taskRunName, Running(taskRunName), "Running", v1Version) + if err != nil { + t.Fatalf("Error waiting for TaskRun to start running before cancelling: %s", err) + } + + patches := []jsonpatch.JsonPatchOperation{{ + Operation: "add", + Path: "/spec/status", + Value: "TaskRunCancelled", + }} + + patchBytes, err := json.Marshal(patches) + if err != nil { + return err + } + + if _, err := c.V1TaskRunClient.Patch(ctx, taskRunName, types.JSONPatchType, patchBytes, metav1.PatchOptions{}, ""); err != nil { + return err + } + + return nil +} diff --git a/test/upgrade_test.go b/test/upgrade_test.go index 331c6d5a5e5..7d30b233fa2 100644 --- a/test/upgrade_test.go +++ b/test/upgrade_test.go @@ -147,10 +147,12 @@ status: steps: - container: step-echo name: step-echo + terminationReason: Completed terminated: reason: Completed - container: check-workspace name: check-workspace + terminationReason: Completed terminated: reason: Completed `