Skip to content

Commit

Permalink
TEP-0090: Failure Strategies - Remove Fail Fast
Browse files Browse the repository at this point in the history
In tektoncd#4951, we implemented
`isFailure` for matrixed `TaskRuns` where we applied fail-fast
failure strategy. We discussed failure strategies further in this
PR - tektoncd/community#724 - and API WG
on 13 June 2022. We agreed to leave early termination upon failure
out of scope for the initial release of Matrix. We plan to explore
failure strategies, including fail-fast, in future work. And these
failure strategies may apply more broadly beyond Matrix.

In this change, we update `isFailure` to evaluate to `true` only when
there's a failure and there are no running `TaskRuns` in the `rprt`.
  • Loading branch information
jerop authored and tekton-robot committed Jun 14, 2022
1 parent 72f20dc commit cfc931c
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 20 deletions.
27 changes: 15 additions & 12 deletions pkg/reconciler/pipelinerun/resources/pipelinerunresolution.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ func (t ResolvedPipelineRunTask) isSuccessful() bool {
}

// isFailure returns true only if the run has failed and will not be retried.
// If the PipelineTask has a Matrix, isFailure returns true if any run has failed and will not be retried.
// If the PipelineTask has a Matrix, isFailure returns true if any run has failed (no remaining retries)
// and all other runs are done.
func (t ResolvedPipelineRunTask) isFailure() bool {
if t.isCancelled() {
return true
Expand All @@ -130,14 +131,14 @@ func (t ResolvedPipelineRunTask) isFailure() bool {
if len(t.TaskRuns) == 0 {
return false
}
isDone = true
atLeastOneFailed := false
for _, taskRun := range t.TaskRuns {
c = taskRun.Status.GetCondition(apis.ConditionSucceeded)
isDone = taskRun.IsDone()
if isDone && c.IsFalse() && !t.hasRemainingRetries() {
return true
}
isDone = isDone && taskRun.IsDone()
taskRunFailed := taskRun.Status.GetCondition(apis.ConditionSucceeded).IsFalse() && !t.hasRemainingRetries()
atLeastOneFailed = atLeastOneFailed || taskRunFailed
}
return false
return atLeastOneFailed && isDone
default:
if t.TaskRun == nil {
return false
Expand Down Expand Up @@ -180,6 +181,7 @@ func (t ResolvedPipelineRunTask) hasRemainingRetries() bool {
}

// isCancelled returns true only if the run is cancelled
// If the PipelineTask has a Matrix, isCancelled returns true if any run is cancelled and all other runs are done.
func (t ResolvedPipelineRunTask) isCancelled() bool {
switch {
case t.IsCustomTask():
Expand All @@ -192,14 +194,15 @@ func (t ResolvedPipelineRunTask) isCancelled() bool {
if len(t.TaskRuns) == 0 {
return false
}
// is cancelled when any TaskRun is cancelled to fail fast
isDone := true
atLeastOneCancelled := false
for _, taskRun := range t.TaskRuns {
isDone = isDone && taskRun.IsDone()
c := taskRun.Status.GetCondition(apis.ConditionSucceeded)
if c != nil && c.IsFalse() && c.Reason == v1beta1.TaskRunReasonCancelled.String() {
return true
}
taskRunCancelled := c.IsFalse() && c.Reason == v1beta1.TaskRunReasonCancelled.String()
atLeastOneCancelled = atLeastOneCancelled || taskRunCancelled
}
return false
return atLeastOneCancelled && isDone
default:
if t.TaskRun == nil {
return false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1120,12 +1120,12 @@ func TestIsFailure(t *testing.T) {
},
want: true,
}, {
name: "one matrixed taskruns failed",
name: "one matrixed taskrun failed, one matrixed taskrun running",
rprt: ResolvedPipelineRunTask{
PipelineTask: matrixedPipelineTask,
TaskRuns: []*v1beta1.TaskRun{makeFailed(trs[0]), makeStarted(trs[1])},
},
want: true,
want: false,
}, {
name: "matrixed taskruns failed: retries remaining",
rprt: ResolvedPipelineRunTask{
Expand Down Expand Up @@ -1155,12 +1155,12 @@ func TestIsFailure(t *testing.T) {
},
want: true,
}, {
name: "one matrixed taskrun cancelled",
name: "one matrixed taskrun cancelled, one matrixed taskrun running",
rprt: ResolvedPipelineRunTask{
PipelineTask: matrixedPipelineTask,
TaskRuns: []*v1beta1.TaskRun{withCancelled(makeFailed(trs[0])), makeStarted(trs[1])},
},
want: true,
want: false,
}, {
name: "matrixed taskruns cancelled but not failed",
rprt: ResolvedPipelineRunTask{
Expand All @@ -1183,12 +1183,12 @@ func TestIsFailure(t *testing.T) {
},
want: true,
}, {
name: "one matrixed taskrun cancelled: retries remaining",
name: "one matrixed taskrun cancelled: retries remaining, one matrixed taskrun running",
rprt: ResolvedPipelineRunTask{
PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1),
TaskRuns: []*v1beta1.TaskRun{withCancelled(makeFailed(trs[0])), makeStarted(trs[1])},
},
want: true,
want: false,
}, {
name: "matrixed taskruns cancelled: no retries remaining",
rprt: ResolvedPipelineRunTask{
Expand All @@ -1197,12 +1197,12 @@ func TestIsFailure(t *testing.T) {
},
want: true,
}, {
name: "one matrixed taskrun cancelled: no retries remaining",
name: "one matrixed taskrun cancelled: no retries remaining, one matrixed taskrun running",
rprt: ResolvedPipelineRunTask{
PipelineTask: withPipelineTaskRetries(*matrixedPipelineTask, 1),
TaskRuns: []*v1beta1.TaskRun{withCancelled(withRetries(makeFailed(trs[0]))), makeStarted(trs[1])},
},
want: true,
want: false,
}} {
t.Run(tc.name, func(t *testing.T) {
if got := tc.rprt.isFailure(); got != tc.want {
Expand Down

0 comments on commit cfc931c

Please sign in to comment.