Skip to content

Commit

Permalink
specifying onError for a step
Browse files Browse the repository at this point in the history
this commit implements tep-0049 - ignore a step error

When a `step` in a `task` results in a failure, the rest of the steps in the
`task` are skipped and the `taskRun` is declared a failure. If you would like
to ignore such step errors and continue executing the rest of the steps in
the task, you can specify `onError` for such a `step`.

`onError` can be set to either `continue` or `fail` as part of the
step definition. If `onError` is set to `continue`, the entrypoint sets the
original failed exit code of the script in the container terminated state.
A `step` with `onError` set to `continue` does not fail the `taskRun` and
continues executing the rest of the steps in a task.

This is an alpha feature. The `enable-api-fields` feature flag must be set to
`"alpha"` to specify `onError` for a `step`.

This commit includes following changes:

* Changing entrypoint to include three new flags `onError`, `stepPath`, and
`stepPathLink`.
* Adding two new functions as part of the runner WriteFileContent and CreatePath
* Creating a volume `/tekton/steps/`
* Supporting a path variable $(steps.step-<stepName>.exitCode.path) and
$(steps.step-unnamed-<stepIndex>.exitCode.path)
* API spec `onError` while defining a step
* Writing exitCode at /tekton/steps/step-<step-name>/exitCode or
/tekton/steps/step-unnamed-<step-index>/exitCode
* Set the exitCode of a terminated state to a non-zero exit code
* Doc, unit test, and examples for this feature
  • Loading branch information
pritidesai committed Jul 20, 2021
1 parent 51f3ce8 commit c56c41e
Show file tree
Hide file tree
Showing 27 changed files with 1,200 additions and 46 deletions.
20 changes: 18 additions & 2 deletions cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,13 @@ var (
results = flag.String("results", "", "If specified, list of file names that might contain task results")
timeout = flag.Duration("timeout", time.Duration(0), "If specified, sets timeout for step")
breakpointOnFailure = flag.Bool("breakpoint_on_failure", false, "If specified, expect steps to not skip on failure")
onError = flag.String("on_error", "", "Set to \"continue\" to ignore an error and "+
"continue when a container terminates with a non-zero exit code. "+
"Set to \"fail\" to declare a failure with a step error and stop executing the rest of the steps.")
stepPath = flag.String("step_path", "", "Relative step path, creates the specified path under /tekton/steps/ which "+
"can be used to store the step metadata e.g. <step-name> in /tekton/steps/<step-name>/")
stepPathLink = flag.String("step_path_link", "", "Relative step path, creates a symbolic link to the "+
"specified step path e.g. <step-index> in /tekton/steps/<step-index>")
)

const (
Expand Down Expand Up @@ -108,6 +115,9 @@ func main() {
Results: strings.Split(*results, ","),
Timeout: timeout,
BreakpointOnFailure: *breakpointOnFailure,
OnError: *onError,
StepPath: *stepPath,
StepPathLink: *stepPathLink,
}

// Copy any creds injected by the controller into the $HOME directory of the current
Expand All @@ -134,9 +144,15 @@ func main() {
// same signature.
if status, ok := t.Sys().(syscall.WaitStatus); ok {
checkForBreakpointOnFailure(e, breakpointExitPostFile)
os.Exit(status.ExitStatus())
// ignore a step error i.e. do not exit if a container terminates with a non-zero exit code when onError is set to "continue"
if e.OnError != entrypoint.ContinueOnError {
os.Exit(status.ExitStatus())
}
}
// log and exit only if a step error must cause run failure
if e.OnError != entrypoint.ContinueOnError {
log.Fatalf("Error executing command (ExitError): %v", err)
}
log.Fatalf("Error executing command (ExitError): %v", err)
default:
checkForBreakpointOnFailure(e, breakpointExitPostFile)
log.Fatalf("Error executing command: %v", err)
Expand Down
39 changes: 39 additions & 0 deletions cmd/entrypoint/post_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,42 @@ func (*realPostWriter) Write(file string) {
log.Fatalf("Creating %q: %v", file, err)
}
}

// WriteFileContent creates the file with the specified content provided the directory structure already exists
func (*realPostWriter) WriteFileContent(file, content string) {
if file == "" {
return
}
f, err := os.Create(file)
if err != nil {
log.Fatalf("Creating %q: %v", file, err)
}
defer f.Close()

if _, err := f.WriteString(content); err != nil {
log.Fatalf("Writing %q: %v", file, err)
}
}

// CreatePath creates the specified path and a symbolic link to the path
func (*realPostWriter) CreatePath(source, link string) {
if source == "" {
return
}
if err := os.MkdirAll(source, 0770); err != nil {
log.Fatalf("Creating file path %q: %v", source, err)
}

if link == "" {
return
}
// create a symlink if it does not exist
if _, err := os.Stat(link); os.IsNotExist(err) {
// check if a source exist before creating a symbolic link
if _, err := os.Stat(source); err == nil {
if err := os.Symlink(source, link); err != nil {
log.Fatalf("Creating a symlink %q: %v", link, err)
}
}
}
}
73 changes: 73 additions & 0 deletions cmd/entrypoint/post_writer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package main

import (
"os"
"testing"
)

func TestRealPostWriter_WriteFileContent(t *testing.T) {
tests := []struct {
name, file, content string
}{{
name: "write a file content",
file: "sample.txt",
content: "this is a sample file",
}, {
name: "write a file without specifying any path",
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rw := realPostWriter{}
rw.WriteFileContent(tt.file, tt.content)
if tt.file != "" {
defer os.Remove(tt.file)
if _, err := os.Stat(tt.file); err != nil {
t.Fatalf("Failed to create a file %q", tt.file)
}
b, err := os.ReadFile(tt.file)
if err != nil {
t.Fatalf("Failed to read the file %q", tt.file)
}
if tt.content != string(b) {
t.Fatalf("Failed to write the desired content %q to the file %q", tt.content, tt.file)
}
}
})
}
}

func TestRealPostWriter_CreateStepPath(t *testing.T) {
tests := []struct {
name, source, link string
}{{
name: "Create a path with a file",
source: "sample.txt",
link: "0",
}, {
name: "Create a path without specifying any path",
}, {
name: "Create a sym link without specifying any link path",
source: "sample.txt",
}, {
name: "Create a sym link without specifying any source",
link: "0.txt",
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rw := realPostWriter{}
rw.CreatePath(tt.source, tt.link)
if tt.source != "" {
defer os.Remove(tt.source)
if _, err := os.Stat(tt.source); err != nil {
t.Fatalf("Failed to create a file %q", tt.source)
}
}
if tt.source != "" && tt.link != "" {
defer os.Remove(tt.link)
if _, err := os.Stat(tt.link); err != nil {
t.Fatalf("Failed to create a sym link %q", tt.link)
}
}
})
}
}
2 changes: 2 additions & 0 deletions docs/developers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ of how this directory is used:
* These folders are [part of the Tekton API](../api_compatibility_policy.md):
* `/tekton/results` is where [results](#results) are written to
(path available to `Task` authors via [`$(results.name.path)`](../variables.md))
* `/tekton/steps` is where the `step` exitCodes are written to
(path available to `Task` authors via [`$(steps.<stepName>.exitCode.path)`](../variables.md#variables-available-in-a-task))
* These folders are implementation details of Tekton and **users should not
rely on this specific behavior as it may change in the future**:
* `/tekton/tools` contains tools like the [entrypoint binary](#entrypoint-rewriting-and-step-ordering)
Expand Down
86 changes: 86 additions & 0 deletions docs/tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ weight: 200
- [Reserved directories](#reserved-directories)
- [Running scripts within `Steps`](#running-scripts-within-steps)
- [Specifying a timeout](#specifying-a-timeout)
- [Specifying `onError` for a `step`](#specifying-onerror-for-a-step)
- [Accessing Step's `exitCode` in subsequent `Steps`](#accessing-steps-exitcode-in-subsequent-steps)
- [Specifying `Parameters`](#specifying-parameters)
- [Specifying `Resources`](#specifying-resources)
- [Specifying `Workspaces`](#specifying-workspaces)
Expand Down Expand Up @@ -282,6 +284,90 @@ steps:
sleep 60
timeout: 5s
```

#### Specifying `onError` for a `step`

This is an alpha feature. The `enable-api-fields` feature flag [must be set to `"alpha"`](./install.md)
to specify `onError` for a `step`.

When a `step` in a `task` results in a failure, the rest of the steps in the `task` are skipped and the `taskRun` is
declared a failure. If you would like to ignore such step errors and continue executing the rest of the steps in
the task, you can specify `onError` for such a `step`.

`onError` can be set to either `continue` or `fail` as part of the step definition. If `onError` is
set to `continue`, the entrypoint sets the original failed exit code of the [script](#running-scripts-within-steps)
in the container terminated state. A `step` with `onError` set to `continue` does not fail the `taskRun` and continues
executing the rest of the steps in a task.

To ignore a step error, set `onError` to `continue`:

```yaml
steps:
- image: docker.io/library/golang:latest
name: ignore-unit-test-failure
onError: continue
script: |
go test .
```

The original failed exit code of the [script](#running-scripts-within-steps) is available in the terminated state of
the container.

```
kubectl get tr taskrun-unit-test-t6qcl -o json | jq .status
{
"conditions": [
{
"message": "All Steps have completed executing",
"reason": "Succeeded",
"status": "True",
"type": "Succeeded"
}
],
"steps": [
{
"container": "step-ignore-unit-test-failure",
"imageID": "...",
"name": "ignore-unit-test-failure",
"terminated": {
"containerID": "...",
"exitCode": 1,
"reason": "Completed",
}
},
],
```
For an end-to-end example, see [the taskRun ignoring a step error](../examples/v1beta1/taskruns/alpha/ignore-step-error.yaml)
and [the pipelineRun ignoring a step error](../examples/v1beta1/pipelineruns/alpha/ignore-step-error.yaml).
#### Accessing Step's `exitCode` in subsequent `Steps`
A step can access the exit code of any previous step using the `path` similar to a task result, for example:
```shell
$(steps.step-<step-name>.exitCode.path)
```

The `exitCode` of a step without any name can be referenced using:

```shell
$(steps.step-unnamed-<step-index>.exitCode.path)
```

If you would like to use the tekton internal path, you can access the exit code by reading the file
(which is not recommended though since the path might change in the future):

```shell
cat /tekton/steps/step-<step-name>/exitCode
```

And, access the step exit code without a step name:

```shell
cat /tekton/steps/step-unnamed-<step-index>/exitCode
```

### Specifying `Parameters`

You can specify parameters, such as compilation flags or artifact names, that you want to supply to the `Task` at execution time.
Expand Down
2 changes: 2 additions & 0 deletions docs/variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ For instructions on using variable substitutions see the relevant section of [th
| `context.taskRun.uid` | The uid of the `TaskRun` that this `Task` is running in. |
| `context.task.name` | The name of this `Task`. |
| `context.task.retry-count` | The current retry number of this `Task`. |
| `steps.step-<stepName>.exitCode.path` | An exit code of the step. |
| `steps.step-unnamed-<stepIndex>.exitCode.path` | An exit code of the step without any name. |

### `PipelineResource` variables available in a `Task`

Expand Down
59 changes: 59 additions & 0 deletions examples/v1beta1/pipelineruns/alpha/ignore-step-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
kind: PipelineRun
apiVersion: tekton.dev/v1beta1
metadata:
generateName: pipelinerun-with-failing-step-
spec:
serviceAccountName: 'default'
pipelineSpec:
tasks:
- name: task1
taskSpec:
steps:
# not really doing anything here, just a hurdle to test the "ignore step error"
- image: alpine
onError: continue
name: exit-with-1
script: |
exit 1
# initialize a task result which will be validated by the next task
- image: alpine
name: write-a-result
onError: continue
script: |
echo -n 123 | tee $(results.task1-result.path)
exit 11
results:
- name: task1-result
description: result of a task1
- name: task2
runAfter: [ "task1" ]
params:
- name: task1-result
value: $(tasks.task1.results.task1-result)
taskSpec:
params:
- name: task1-result
steps:
# again, not really doing anything here, just a hurdle to test the "ignore step error"
- image: alpine
onError: continue
name: exit-with-255
script: |
exit 255
# verify that the task result was produced by the first task, fail if the result does not match
- image: alpine
name: verify-a-task-result
script: |
ls /tekton/results/
if [ $(params.task1-result) == 123 ]; then
echo "Yay! the task result matches which was initialized in the previous task while ignoring the step error"
else
echo "the task result does not match."
exit 1
fi
# the last step of a task and one more hurdle
- image: alpine
name: exit-with-20
onError: continue
script: |
exit 20
49 changes: 49 additions & 0 deletions examples/v1beta1/taskruns/alpha/ignore-step-error.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
kind: TaskRun
apiVersion: tekton.dev/v1beta1
metadata:
generateName: taskrun-with-failing-step-
spec:
taskSpec:
steps:
# exit with 1 and ignore non zero exit code
- image: alpine
onError: continue
name: exit-with-1
script: |
exit 1
# check if the /tekton/steps/step-<step-name>/exitCode got created and contains the exit code
# check if the symlink /tekton/steps/0/ got created
- image: alpine
name: verify-step-path
script: |
exitCode=`cat $(steps.step-exit-with-1.exitCode.path)`
if [ $exitCode == 1 ]; then
echo "Yay! the exit code can be accessed using the path variable and matches the previous step exit code"
else
echo "the exit code does not match."
exit 1
fi
FILE=/tekton/steps/step-exit-with-1/exitCode
if [ -f "$FILE" ]; then
echo "$FILE exists."
echo "Yay! the file exists which was created by the controller to record the step exit code."
else
echo "$FILE does not exist."
exit 1
fi
FILE=/tekton/steps/0/exitCode
if [ -f "$FILE" ]; then
echo "$FILE exists."
echo "Yay! the symlink exists which was created by the controller to record the step exit code."
else
echo "$FILE does not exist."
exit 1
fi
exitCode=`cat $FILE`
if [ $exitCode == 1 ]; then
echo "Yay! the exit code matches to the previous step exit code"
else
echo "the exit code does not match."
exit 1
fi
---
7 changes: 7 additions & 0 deletions internal/builder/v1beta1/step.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,10 @@ func StepScript(script string) StepOp {
step.Script = script
}
}

// StepOnError sets the onError of a step
func StepOnError(e string) StepOp {
return func(step *v1beta1.Step) {
step.OnError = e
}
}
Loading

0 comments on commit c56c41e

Please sign in to comment.