Skip to content

Commit

Permalink
[TEP-0050] Implement PipelineTask OnError
Browse files Browse the repository at this point in the history
Part of [#7165][#7165]. In [TEP-0050][tep-0050], we proposed to add an `OnError` API field under `PipelineTask` to configure error handling strategy.

This commits leverages `PipelineTask.OnError` API field introduced in the previous PR, implement the error handling strategy, update related docs and tests.

/kind feature

[tep-0050]: https://github.com/tektoncd/community/blob/main/teps/0050-ignore-task-failures.md
[#7165]: #7165
  • Loading branch information
QuanZhang-William committed Nov 27, 2023
1 parent 23581c5 commit 0a34d79
Show file tree
Hide file tree
Showing 14 changed files with 401 additions and 17 deletions.
2 changes: 0 additions & 2 deletions docs/pipelines.md
Original file line number Diff line number Diff line change
Expand Up @@ -658,8 +658,6 @@ tasks:

> :seedling: **Specifying `onError` in `PipelineTasks` is an [alpha](additional-configs.md#alpha-features) feature.** The `enable-api-fields` feature flag must be set to `"alpha"` to specify `onError` in a `PipelineTask`.

> :seedling: This feature is in **Preview Only** mode and not yet supported/implemented.

When a `PipelineTask` fails, the rest of the `PipelineTasks` are skipped and the `PipelineRun` is declared a failure. If you would like to
ignore such `PipelineTask` failure and continue executing the rest of the `PipelineTasks`, you can specify `onError` for such a `PipelineTask`.

Expand Down
25 changes: 25 additions & 0 deletions examples/v1/pipelineruns/alpha/ignore-task-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: tekton.dev/v1
kind: PipelineRun
metadata:
generateName: pipelinerun-with-failing-task-
spec:
pipelineSpec:
tasks:
- name: echo-continue
onError: continue
taskSpec:
steps:
- name: write
image: alpine
script: |
echo "this is a failing task"
exit 1
- name: echo
runAfter:
- echo-continue
taskSpec:
steps:
- name: write
image: alpine
script: |
echo "this is a success task"
2 changes: 0 additions & 2 deletions pkg/apis/pipeline/v1/pipeline_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,6 @@ type PipelineTask struct {

// OnError defines the exiting behavior of a PipelineRun on error
// can be set to [ continue | stopAndFail ]
// Note: OnError is in preview mode and not yet supported
// TODO(#7165)
// +optional
OnError PipelineTaskOnErrorType `json:"onError,omitempty"`
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/pipeline/v1/pipelinerun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ const (
PipelineRunReasonCELEvaluationFailed PipelineRunReason = "CELEvaluationFailed"
)

// PipelineTaskOnErrorAnnotation is used to pass the failure strategy to TaskRun pods
const PipelineTaskOnErrorAnnotation = "pipeline.tekton.dev/pipeline-task-on-error"

func (t PipelineRunReason) String() string {
return string(t)
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/pipeline/v1/taskrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ const (
TaskRunReasonStopSidecarFailed = "TaskRunStopSidecarFailed"
// TaskRunReasonInvalidParamValue indicates that the TaskRun Param input value is not allowed.
TaskRunReasonInvalidParamValue = "InvalidParamValue"
// TaskRunReasonFailureIgnored is the reason set when the Taskrun has failed and the failure is ignored for the owning PipelineRun
TaskRunReasonFailureIgnored TaskRunReason = "FailureIgnored"
)

func (t TaskRunReason) String() string {
Expand Down
2 changes: 0 additions & 2 deletions pkg/apis/pipeline/v1beta1/pipeline_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,6 @@ type PipelineTask struct {

// OnError defines the exiting behavior of a PipelineRun on error
// can be set to [ continue | stopAndFail ]
// Note: OnError is in preview mode and not yet supported
// TODO(#7165)
// +optional
OnError PipelineTaskOnErrorType `json:"onError,omitempty"`
}
Expand Down
15 changes: 12 additions & 3 deletions pkg/pod/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,12 @@ func MakeTaskRunStatus(ctx context.Context, logger *zap.SugaredLogger, tr v1.Tas
complete := areStepsComplete(pod) || pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed

if complete {
updateCompletedTaskRunStatus(logger, trs, pod)
onError, ok := tr.Annotations[v1.PipelineTaskOnErrorAnnotation]
if ok {
updateCompletedTaskRunStatus(logger, trs, pod, v1.PipelineTaskOnErrorType(onError))
} else {
updateCompletedTaskRunStatus(logger, trs, pod, "")
}
} else {
updateIncompleteTaskRunStatus(trs, pod)
}
Expand Down Expand Up @@ -359,10 +364,14 @@ func extractExitCodeFromResults(results []result.RunResult) (*int32, error) {
return nil, nil //nolint:nilnil // would be more ergonomic to return a sentinel error
}

func updateCompletedTaskRunStatus(logger *zap.SugaredLogger, trs *v1.TaskRunStatus, pod *corev1.Pod) {
func updateCompletedTaskRunStatus(logger *zap.SugaredLogger, trs *v1.TaskRunStatus, pod *corev1.Pod, onError v1.PipelineTaskOnErrorType) {
if DidTaskRunFail(pod) {
msg := getFailureMessage(logger, pod)
markStatusFailure(trs, v1.TaskRunReasonFailed.String(), msg)
if onError == v1.PipelineTaskContinue {
markStatusFailure(trs, v1.TaskRunReasonFailureIgnored.String(), msg)
} else {
markStatusFailure(trs, v1.TaskRunReasonFailed.String(), msg)
}
} else {
markStatusSuccess(trs)
}
Expand Down
69 changes: 69 additions & 0 deletions pkg/pod/status_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1293,6 +1293,75 @@ func TestMakeTaskRunStatus(t *testing.T) {
}
}

func TestMakeRunStatus_OnError(t *testing.T) {
for _, c := range []struct {
name string
podStatus corev1.PodStatus
onError v1.PipelineTaskOnErrorType
want v1.TaskRunStatus
}{{
name: "onError: continue",
podStatus: corev1.PodStatus{
Phase: corev1.PodFailed,
Message: "boom",
},
onError: v1.PipelineTaskContinue,
want: v1.TaskRunStatus{
Status: statusFailure(string(v1.TaskRunReasonFailureIgnored), "boom"),
},
}, {
name: "onError: stopAndFail",
podStatus: corev1.PodStatus{
Phase: corev1.PodFailed,
Message: "boom",
},
onError: v1.PipelineTaskStopAndFail,
want: v1.TaskRunStatus{
Status: statusFailure(string(v1.TaskRunReasonFailed), "boom"),
},
}, {
name: "stand alone TaskRun",
podStatus: corev1.PodStatus{
Phase: corev1.PodFailed,
Message: "boom",
},
want: v1.TaskRunStatus{
Status: statusFailure(string(v1.TaskRunReasonFailed), "boom"),
},
}} {
t.Run(c.name, func(t *testing.T) {
pod := corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "pod",
Namespace: "foo",
},
Status: c.podStatus,
}
tr := v1.TaskRun{
ObjectMeta: metav1.ObjectMeta{
Name: "task-run",
Namespace: "foo",
},
}
if c.onError != "" {
tr.Annotations = map[string]string{}
tr.Annotations[v1.PipelineTaskOnErrorAnnotation] = string(c.onError)
}

logger, _ := logging.NewLogger("", "status")
kubeclient := fakek8s.NewSimpleClientset()
got, err := MakeTaskRunStatus(context.Background(), logger, tr, &pod, kubeclient, &v1.TaskSpec{})
if err != nil {
t.Errorf("Unexpected err in MakeTaskRunResult: %s", err)
}

if d := cmp.Diff(c.want.Status, got.Status, ignoreVolatileTime); d != "" {
t.Errorf("Diff %s", diff.PrintWantGot(d))
}
})
}
}

func TestMakeTaskRunStatusAlpha(t *testing.T) {
for _, c := range []struct {
desc string
Expand Down
3 changes: 3 additions & 0 deletions pkg/reconciler/pipelinerun/pipelinerun.go
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,9 @@ func (c *Reconciler) createTaskRun(ctx context.Context, taskRunName string, para
if spanContext, err := getMarshalledSpanFromContext(ctx); err == nil {
tr.Annotations[TaskRunSpanContextAnnotation] = spanContext
}
if rpt.PipelineTask.OnError == v1.PipelineTaskContinue {
tr.Annotations[v1.PipelineTaskOnErrorAnnotation] = string(v1.PipelineTaskContinue)
}

if rpt.PipelineTask.Timeout != nil {
tr.Spec.Timeout = rpt.PipelineTask.Timeout
Expand Down
70 changes: 70 additions & 0 deletions pkg/reconciler/pipelinerun/pipelinerun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,76 @@ spec:
}
}

// TestPipelineTaskErrorIsIgnored tests that a resource dependent PipelineTask with onError:continue is skipped
// if the parent PipelineTask fails to produce the result
func TestPipelineTaskErrorIsIgnored(t *testing.T) {
prs := []*v1.PipelineRun{parse.MustParseV1PipelineRun(t, `
metadata:
name: test-pipeline-missing-results
namespace: foo
spec:
serviceAccountName: test-sa-0
pipelineSpec:
tasks:
- name: task1
taskSpec:
results:
- name: result1
type: string
steps:
- name: failing-step
onError: continue
image: busybox
script: exit 1; echo -n 123 | tee $(results.result1.path)'
- name: task2
onError: continue
params:
- name: param1
value: $(tasks.task1.results.result1)
taskSpec:
params:
- name: param1
type: string
steps:
- name: foo
image: busybox
script: 'echo $(params.param1)'
`)}
trs := []*v1.TaskRun{mustParseTaskRunWithObjectMeta(t,
taskRunObjectMeta("test-pipeline-missing-results-task1", "foo",
"test-pipeline-missing-results", "test-pipeline", "task1", true),
`
spec:
serviceAccountName: test-sa
timeout: 1h0m0s
status:
conditions:
- status: "True"
type: Succeeded
`)}
cms := []*corev1.ConfigMap{withEnabledAlphaAPIFields(newFeatureFlagsConfigMap())}

d := test.Data{
PipelineRuns: prs,
TaskRuns: trs,
ConfigMaps: cms,
}
prt := newPipelineRunTest(t, d)
defer prt.Cancel()

reconciledRun, _ := prt.reconcileRun("foo", "test-pipeline-missing-results", []string{}, false)
cond := reconciledRun.Status.Conditions[0]
if cond.Status != corev1.ConditionTrue {
t.Fatalf("expected PipelineRun status to be True but got: %s", cond.Status)
}
if len(reconciledRun.Status.SkippedTasks) != 1 {
t.Fatalf("expected 1 skipped Task but got %v", len(reconciledRun.Status.SkippedTasks))
}
if reconciledRun.Status.SkippedTasks[0].Reason != v1.MissingResultsSkip {
t.Fatalf("expected 1 skipped Task with reason %s, but got %v", v1.MissingResultsSkip, reconciledRun.Status.SkippedTasks[0].Reason)
}
}

func TestMissingResultWhenStepErrorIsIgnored(t *testing.T) {
prs := []*v1.PipelineRun{parse.MustParseV1PipelineRun(t, `
metadata:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,9 @@ func (t *ResolvedPipelineTask) skipBecauseResultReferencesAreMissing(facts *Pipe
resolvedResultRefs, pt, err := ResolveResultRefs(facts.State, PipelineRunState{t})
rpt := facts.State.ToMap()[pt]
if rpt != nil {
if err != nil && (t.IsFinalTask(facts) || rpt.Skip(facts).SkippingReason == v1.WhenExpressionsSkip) {
if err != nil &&
(t.PipelineTask.OnError == v1.PipelineTaskContinue ||
(t.IsFinalTask(facts) || rpt.Skip(facts).SkippingReason == v1.WhenExpressionsSkip)) {
return true
}
}
Expand Down
28 changes: 21 additions & 7 deletions pkg/reconciler/pipelinerun/resources/pipelinerunstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ type pipelineRunStatusCount struct {
Succeeded int
// failed tasks count
Failed int
// failed but ignored tasks count
IgnoredFailed int
// cancelled tasks count
Cancelled int
// number of tasks which are still pending, have not executed
Expand Down Expand Up @@ -278,11 +280,11 @@ func (state PipelineRunState) getNextTasks(candidateTasks sets.String) []*Resolv
}

// IsStopping returns true if the PipelineRun won't be scheduling any new Task because
// at least one task already failed or was cancelled in the specified dag
// at least one task already failed (with onError: stopAndFail) or was cancelled in the specified dag
func (facts *PipelineRunFacts) IsStopping() bool {
for _, t := range facts.State {
if facts.isDAGTask(t.PipelineTask.Name) {
if t.isFailure() {
if t.isFailure() && t.PipelineTask.OnError != v1.PipelineTaskContinue {
return true
}
}
Expand Down Expand Up @@ -417,7 +419,8 @@ func (facts *PipelineRunFacts) GetPipelineConditionStatus(ctx context.Context, p
// get the count of successful tasks, failed tasks, cancelled tasks, skipped task, and incomplete tasks
s := facts.getPipelineTasksCount()
// completed task is a collection of successful, failed, cancelled tasks (skipped tasks are reported separately)
cmTasks := s.Succeeded + s.Failed + s.Cancelled
cmTasks := s.Succeeded + s.Failed + s.Cancelled + s.IgnoredFailed
totalFailedTasks := s.Failed + s.IgnoredFailed

// The completion reason is set from the TaskRun completion reason
// by default, set it to ReasonRunning
Expand All @@ -427,8 +430,14 @@ func (facts *PipelineRunFacts) GetPipelineConditionStatus(ctx context.Context, p
if s.Incomplete == 0 {
status := corev1.ConditionTrue
reason := v1.PipelineRunReasonSuccessful.String()
message := fmt.Sprintf("Tasks Completed: %d (Failed: %d, Cancelled %d), Skipped: %d",
cmTasks, s.Failed, s.Cancelled, s.Skipped)
var message string
if s.IgnoredFailed > 0 {
message = fmt.Sprintf("Tasks Completed: %d (Failed: %d (Ignored: %d), Cancelled %d), Skipped: %d",
cmTasks, totalFailedTasks, s.IgnoredFailed, s.Cancelled, s.Skipped)
} else {
message = fmt.Sprintf("Tasks Completed: %d (Failed: %d, Cancelled %d), Skipped: %d",
cmTasks, totalFailedTasks, s.Cancelled, s.Skipped)
}
// Set reason to ReasonCompleted - At least one is skipped
if s.Skipped > 0 {
reason = v1.PipelineRunReasonCompleted.String()
Expand Down Expand Up @@ -604,6 +613,7 @@ func (facts *PipelineRunFacts) getPipelineTasksCount() pipelineRunStatusCount {
Cancelled: 0,
Incomplete: 0,
SkippedDueToTimeout: 0,
IgnoredFailed: 0,
}
for _, t := range facts.State {
switch {
Expand All @@ -616,9 +626,13 @@ func (facts *PipelineRunFacts) getPipelineTasksCount() pipelineRunStatusCount {
// increment cancelled counter since the task is cancelled
case t.isCancelled():
s.Cancelled++
// increment failure counter since the task has failed
// increment failure counter based on Task OnError type since the task has failed
case t.isFailure():
s.Failed++
if t.PipelineTask.OnError == v1.PipelineTaskContinue {
s.IgnoredFailed++
} else {
s.Failed++
}
// increment skipped and skipped due to timeout counters since the task was skipped due to the pipeline, tasks, or finally timeout being reached before the task was launched
case t.Skip(facts).SkippingReason == v1.PipelineTimedOutSkip ||
t.Skip(facts).SkippingReason == v1.TasksTimedOutSkip ||
Expand Down
Loading

0 comments on commit 0a34d79

Please sign in to comment.