specifying onError for a step

this commit implements tep-0049 - ignore a step error When a `step` in a `task` results in a failure, the rest of the steps in the `task` are skipped and the `taskRun` is declared a failure. If you would like to ignore such step errors and continue executing the rest of the steps in the task, you can specify `onError` for such a `step`. `onError` can be set to either `continue` or `fail` as part of the step definition. If `onError` is set to `continue`, the entrypoint sets the original failed exit code of the script in the container terminated state. A `step` with `onError` set to `continue` does not fail the `taskRun` and continues executing the rest of the steps in a task. This is an alpha feature. The `enable-api-fields` feature flag must be set to `"alpha"` to specify `onError` for a `step`. This commit includes following changes: * Changing entrypoint to include three new flags `onError`, `stepPath`, and `stepPathLink`. * Adding two new functions as part of the runner WriteFileContent and CreatePath * Creating a volume `/tekton/steps/` * Supporting a path variable $(steps.step-<stepName>.exitCode.path) and $(steps.step-unnamed-<stepIndex>.exitCode.path) * API spec `onError` while defining a step * Writing exitCode at /tekton/steps/step-<step-name>/exitCode or /tekton/steps/step-unnamed-<step-index>/exitCode * Set the exitCode of a terminated state to a non-zero exit code * Doc, unit test, and examples for this feature
tektoncd · Jul 20, 2021 · c56c41e · c56c41e
1 parent 51f3ce8
commit c56c41e
Show file tree

Hide file tree

Showing 27 changed files with 1,200 additions and 46 deletions.
diff --git a/cmd/entrypoint/main.go b/cmd/entrypoint/main.go
@@ -43,6 +43,13 @@ var (
 	results             = flag.String("results", "", "If specified, list of file names that might contain task results")
 	timeout             = flag.Duration("timeout", time.Duration(0), "If specified, sets timeout for step")
 	breakpointOnFailure = flag.Bool("breakpoint_on_failure", false, "If specified, expect steps to not skip on failure")
+	onError             = flag.String("on_error", "", "Set to \"continue\" to ignore an error and "+
+		"continue when a container terminates with a non-zero exit code. "+
+		"Set to \"fail\" to declare a failure with a step error and stop executing the rest of the steps.")
+	stepPath = flag.String("step_path", "", "Relative step path, creates the specified path under /tekton/steps/ which "+
+		"can be used to store the step metadata e.g. <step-name> in /tekton/steps/<step-name>/")
+	stepPathLink = flag.String("step_path_link", "", "Relative step path, creates a symbolic link to the "+
+		"specified step path e.g. <step-index> in /tekton/steps/<step-index>")
 )
 
 const (
@@ -108,6 +115,9 @@ func main() {
 		Results:             strings.Split(*results, ","),
 		Timeout:             timeout,
 		BreakpointOnFailure: *breakpointOnFailure,
+		OnError:             *onError,
+		StepPath:            *stepPath,
+		StepPathLink:        *stepPathLink,
 	}
 
 	// Copy any creds injected by the controller into the $HOME directory of the current
@@ -134,9 +144,15 @@ func main() {
 			// same signature.
 			if status, ok := t.Sys().(syscall.WaitStatus); ok {
 				checkForBreakpointOnFailure(e, breakpointExitPostFile)
-				os.Exit(status.ExitStatus())
+				// ignore a step error i.e. do not exit if a container terminates with a non-zero exit code when onError is set to "continue"
+				if e.OnError != entrypoint.ContinueOnError {
+					os.Exit(status.ExitStatus())
+				}
+			}
+			// log and exit only if a step error must cause run failure
+			if e.OnError != entrypoint.ContinueOnError {
+				log.Fatalf("Error executing command (ExitError): %v", err)
 			}
-			log.Fatalf("Error executing command (ExitError): %v", err)
 		default:
 			checkForBreakpointOnFailure(e, breakpointExitPostFile)
 			log.Fatalf("Error executing command: %v", err)

diff --git a/cmd/entrypoint/post_writer.go b/cmd/entrypoint/post_writer.go
@@ -20,3 +20,42 @@ func (*realPostWriter) Write(file string) {
 		log.Fatalf("Creating %q: %v", file, err)
 	}
 }
+
+// WriteFileContent creates the file with the specified content provided the directory structure already exists
+func (*realPostWriter) WriteFileContent(file, content string) {
+	if file == "" {
+		return
+	}
+	f, err := os.Create(file)
+	if err != nil {
+		log.Fatalf("Creating %q: %v", file, err)
+	}
+	defer f.Close()
+
+	if _, err := f.WriteString(content); err != nil {
+		log.Fatalf("Writing %q: %v", file, err)
+	}
+}
+
+// CreatePath creates the specified path and a symbolic link to the path
+func (*realPostWriter) CreatePath(source, link string) {
+	if source == "" {
+		return
+	}
+	if err := os.MkdirAll(source, 0770); err != nil {
+		log.Fatalf("Creating file path %q: %v", source, err)
+	}
+
+	if link == "" {
+		return
+	}
+	// create a symlink if it does not exist
+	if _, err := os.Stat(link); os.IsNotExist(err) {
+		// check if a source exist before creating a symbolic link
+		if _, err := os.Stat(source); err == nil {
+			if err := os.Symlink(source, link); err != nil {
+				log.Fatalf("Creating a symlink %q: %v", link, err)
+			}
+		}
+	}
+}
diff --git a/cmd/entrypoint/post_writer_test.go b/cmd/entrypoint/post_writer_test.go
@@ -0,0 +1,73 @@
+package main
+
+import (
+	"os"
+	"testing"
+)
+
+func TestRealPostWriter_WriteFileContent(t *testing.T) {
+	tests := []struct {
+		name, file, content string
+	}{{
+		name:    "write a file content",
+		file:    "sample.txt",
+		content: "this is a sample file",
+	}, {
+		name: "write a file without specifying any path",
+	}}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rw := realPostWriter{}
+			rw.WriteFileContent(tt.file, tt.content)
+			if tt.file != "" {
+				defer os.Remove(tt.file)
+				if _, err := os.Stat(tt.file); err != nil {
+					t.Fatalf("Failed to create a file %q", tt.file)
+				}
+				b, err := os.ReadFile(tt.file)
+				if err != nil {
+					t.Fatalf("Failed to read the file %q", tt.file)
+				}
+				if tt.content != string(b) {
+					t.Fatalf("Failed to write the desired content %q to the file %q", tt.content, tt.file)
+				}
+			}
+		})
+	}
+}
+
+func TestRealPostWriter_CreateStepPath(t *testing.T) {
+	tests := []struct {
+		name, source, link string
+	}{{
+		name:   "Create a path with a file",
+		source: "sample.txt",
+		link:   "0",
+	}, {
+		name: "Create a path without specifying any path",
+	}, {
+		name:   "Create a sym link without specifying any link path",
+		source: "sample.txt",
+	}, {
+		name: "Create a sym link without specifying any source",
+		link: "0.txt",
+	}}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rw := realPostWriter{}
+			rw.CreatePath(tt.source, tt.link)
+			if tt.source != "" {
+				defer os.Remove(tt.source)
+				if _, err := os.Stat(tt.source); err != nil {
+					t.Fatalf("Failed to create a file %q", tt.source)
+				}
+			}
+			if tt.source != "" && tt.link != "" {
+				defer os.Remove(tt.link)
+				if _, err := os.Stat(tt.link); err != nil {
+					t.Fatalf("Failed to create a sym link %q", tt.link)
+				}
+			}
+		})
+	}
+}
diff --git a/docs/developers/README.md b/docs/developers/README.md
@@ -142,6 +142,8 @@ of how this directory is used:
   * These folders are [part of the Tekton API](../api_compatibility_policy.md):
     * `/tekton/results` is where [results](#results) are written to
       (path available to `Task` authors via [`$(results.name.path)`](../variables.md))
+    * `/tekton/steps` is where the `step` exitCodes are written to
+      (path available to `Task` authors via [`$(steps.<stepName>.exitCode.path)`](../variables.md#variables-available-in-a-task))
   * These folders are implementation details of Tekton and **users should not
     rely on this specific behavior as it may change in the future**:
     * `/tekton/tools` contains tools like the [entrypoint binary](#entrypoint-rewriting-and-step-ordering)

diff --git a/docs/tasks.md b/docs/tasks.md
@@ -13,6 +13,8 @@ weight: 200
     - [Reserved directories](#reserved-directories)
     - [Running scripts within `Steps`](#running-scripts-within-steps)
     - [Specifying a timeout](#specifying-a-timeout)
+    - [Specifying `onError` for a `step`](#specifying-onerror-for-a-step)
+    - [Accessing Step's `exitCode` in subsequent `Steps`](#accessing-steps-exitcode-in-subsequent-steps)
   - [Specifying `Parameters`](#specifying-parameters)
   - [Specifying `Resources`](#specifying-resources)
   - [Specifying `Workspaces`](#specifying-workspaces)
@@ -282,6 +284,90 @@ steps:
       sleep 60
     timeout: 5s
 ``` 
+
+#### Specifying `onError` for a `step`
+
+This is an alpha feature. The `enable-api-fields` feature flag [must be set to `"alpha"`](./install.md)
+to specify `onError` for a `step`.
+
+When a `step` in a `task` results in a failure, the rest of the steps in the `task` are skipped and the `taskRun` is
+declared a failure. If you would like to ignore such step errors and continue executing the rest of the steps in
+the task, you can specify `onError` for such a `step`.
+
+`onError` can be set to either `continue` or `fail` as part of the step definition. If `onError` is
+set to `continue`, the entrypoint sets the original failed exit code of the [script](#running-scripts-within-steps)
+in the container terminated state. A `step` with `onError` set to `continue` does not fail the `taskRun` and continues
+executing the rest of the steps in a task.
+
+To ignore a step error, set `onError` to `continue`:
+
+```yaml
+steps:
+  - image: docker.io/library/golang:latest
+    name: ignore-unit-test-failure
+    onError: continue
+    script: |
+      go test .
+```
+
+The original failed exit code of the [script](#running-scripts-within-steps) is available in the terminated state of
+the container.
+
+```
+kubectl get tr taskrun-unit-test-t6qcl -o json | jq .status
+{
+  "conditions": [
+    {
+      "message": "All Steps have completed executing",
+      "reason": "Succeeded",
+      "status": "True",
+      "type": "Succeeded"
+    }
+  ],
+  "steps": [
+    {
+      "container": "step-ignore-unit-test-failure",
+      "imageID": "...",
+      "name": "ignore-unit-test-failure",
+      "terminated": {
+        "containerID": "...",
+        "exitCode": 1,
+        "reason": "Completed",
+      }
+    },
+  ],
+```
+
+For an end-to-end example, see [the taskRun ignoring a step error](../examples/v1beta1/taskruns/alpha/ignore-step-error.yaml)
+and [the pipelineRun ignoring a step error](../examples/v1beta1/pipelineruns/alpha/ignore-step-error.yaml).
+
+#### Accessing Step's `exitCode` in subsequent `Steps`
+
+A step can access the exit code of any previous step using the `path` similar to a task result, for example:
+
+```shell
+$(steps.step-<step-name>.exitCode.path)
+```
+
+The `exitCode` of a step without any name can be referenced using:
+
+```shell
+$(steps.step-unnamed-<step-index>.exitCode.path)
+```
+
+If you would like to use the tekton internal path, you can access the exit code by reading the file
+(which is not recommended though since the path might change in the future):
+
+```shell
+cat /tekton/steps/step-<step-name>/exitCode
+```
+
+And, access the step exit code without a step name:
+
+```shell
+cat /tekton/steps/step-unnamed-<step-index>/exitCode
+```
+
 ### Specifying `Parameters`
 
 You can specify parameters, such as compilation flags or artifact names, that you want to supply to the `Task` at execution time.

diff --git a/docs/variables.md b/docs/variables.md
@@ -45,6 +45,8 @@ For instructions on using variable substitutions see the relevant section of [th
 | `context.taskRun.uid` | The uid of the `TaskRun` that this `Task` is running in. |
 | `context.task.name` | The name of this `Task`. |
 | `context.task.retry-count` | The current retry number of this `Task`. |
+| `steps.step-<stepName>.exitCode.path` | An exit code of the step. |
+| `steps.step-unnamed-<stepIndex>.exitCode.path` | An exit code of the step without any name. |
 
 ### `PipelineResource` variables available in a `Task`
 

diff --git a/examples/v1beta1/pipelineruns/alpha/ignore-step-error.yaml b/examples/v1beta1/pipelineruns/alpha/ignore-step-error.yaml
@@ -0,0 +1,59 @@
+kind: PipelineRun
+apiVersion: tekton.dev/v1beta1
+metadata:
+  generateName: pipelinerun-with-failing-step-
+spec:
+  serviceAccountName: 'default'
+  pipelineSpec:
+    tasks:
+      - name: task1
+        taskSpec:
+          steps:
+            # not really doing anything here, just a hurdle to test the "ignore step error"
+            - image: alpine
+              onError: continue
+              name: exit-with-1
+              script: |
+                exit 1
+            # initialize a task result which will be validated by the next task
+            - image: alpine
+              name: write-a-result
+              onError: continue
+              script: |
+                echo -n 123 | tee $(results.task1-result.path)
+                exit 11
+          results:
+            - name: task1-result
+              description: result of a task1
+      - name: task2
+        runAfter: [ "task1" ]
+        params:
+          - name: task1-result
+            value: $(tasks.task1.results.task1-result)
+        taskSpec:
+          params:
+            - name: task1-result
+          steps:
+            # again, not really doing anything here, just a hurdle to test the "ignore step error"
+            - image: alpine
+              onError: continue
+              name: exit-with-255
+              script: |
+                exit 255
+            # verify that the task result was produced by the first task, fail if the result does not match
+            - image: alpine
+              name: verify-a-task-result
+              script: |
+                ls /tekton/results/
+                if [ $(params.task1-result) == 123 ]; then
+                    echo "Yay! the task result matches which was initialized in the previous task while ignoring the step error"
+                else
+                    echo "the task result does not match."
+                    exit 1
+                fi
+            # the last step of a task and one more hurdle
+            - image: alpine
+              name: exit-with-20
+              onError: continue
+              script: |
+                exit 20
diff --git a/examples/v1beta1/taskruns/alpha/ignore-step-error.yaml b/examples/v1beta1/taskruns/alpha/ignore-step-error.yaml
@@ -0,0 +1,49 @@
+kind: TaskRun
+apiVersion: tekton.dev/v1beta1
+metadata:
+  generateName: taskrun-with-failing-step-
+spec:
+  taskSpec:
+    steps:
+      # exit with 1 and ignore non zero exit code
+      - image: alpine
+        onError: continue
+        name: exit-with-1
+        script: |
+          exit 1
+      # check if the /tekton/steps/step-<step-name>/exitCode got created and contains the exit code
+      # check if the symlink /tekton/steps/0/ got created
+      - image: alpine
+        name: verify-step-path
+        script: |
+          exitCode=`cat $(steps.step-exit-with-1.exitCode.path)`
+          if [ $exitCode == 1 ]; then
+              echo "Yay! the exit code can be accessed using the path variable and matches the previous step exit code"
+          else
+              echo "the exit code does not match."
+              exit 1
+          fi
+          FILE=/tekton/steps/step-exit-with-1/exitCode
+          if [ -f "$FILE" ]; then
+            echo "$FILE exists."
+            echo "Yay! the file exists which was created by the controller to record the step exit code."
+          else
+            echo "$FILE does not exist."
+            exit 1
+          fi
+          FILE=/tekton/steps/0/exitCode
+          if [ -f "$FILE" ]; then
+            echo "$FILE exists."
+            echo "Yay! the symlink exists which was created by the controller to record the step exit code."
+          else
+            echo "$FILE does not exist."
+            exit 1
+          fi
+          exitCode=`cat $FILE`
+          if [ $exitCode == 1 ]; then
+              echo "Yay! the exit code matches to the previous step exit code"
+          else
+              echo "the exit code does not match."
+              exit 1
+          fi
+---
diff --git a/internal/builder/v1beta1/step.go b/internal/builder/v1beta1/step.go
@@ -86,3 +86,10 @@ func StepScript(script string) StepOp {
 		step.Script = script
 	}
 }
+
+// StepOnError sets the onError of a step
+func StepOnError(e string) StepOp {
+	return func(step *v1beta1.Step) {
+		step.OnError = e
+	}
+}