Skip to content

Commit

Permalink
Add granular termination reason in container termination message
Browse files Browse the repository at this point in the history
Related with tektoncd#7539 and tektoncd#7223

To report specific Steps termination reasons we need to know why its continer finished; we use the termination message to store a new "state" with this information. We are adding a new field to store this information per step.

Co-authored-by: JeromeJu <46675578+JeromeJu@users.noreply.github.com>
Co-authored-by: Chitrang Patel <chitrang@google.com>
  • Loading branch information
3 people committed Jan 15, 2024
1 parent 933f2a0 commit bc8f39b
Show file tree
Hide file tree
Showing 14 changed files with 770 additions and 40 deletions.
2 changes: 1 addition & 1 deletion cmd/entrypoint/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ func main() {
if err := e.Go(); err != nil {
breakpointExitPostFile := e.PostFile + breakpointExitSuffix
switch t := err.(type) { //nolint:errorlint // checking for multiple types with errors.As is ugly.
case skipError:
case entrypoint.SkipError:
log.Print("Skipping step because a previous step failed")
os.Exit(1)
case termination.MessageLengthError:
Expand Down
8 changes: 1 addition & 7 deletions cmd/entrypoint/waiter.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
if breakpointOnFailure {
return nil
}
return skipError("error file present, bail and skip the step")
return entrypoint.ErrSkipPreviousStepFailed
}
select {
case <-ctx.Done():
Expand All @@ -86,9 +86,3 @@ func (rw *realWaiter) Wait(ctx context.Context, file string, expectContent bool,
}
}
}

type skipError string

func (e skipError) Error() string {
return string(e)
}
4 changes: 2 additions & 2 deletions cmd/entrypoint/waiter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func TestRealWaiterWaitWithErrorWaitfile(t *testing.T) {
if err == nil {
t.Errorf("expected skipError upon encounter error waitfile")
}
var skipErr skipError
var skipErr entrypoint.SkipError
if errors.As(err, &skipErr) {
close(doneCh)
} else {
Expand Down Expand Up @@ -292,7 +292,7 @@ func TestRealWaiterWaitContextWithErrorWaitfile(t *testing.T) {
if err == nil {
t.Errorf("expected skipError upon encounter error waitfile")
}
var skipErr skipError
var skipErr entrypoint.SkipError
if errors.As(err, &skipErr) {
close(doneCh)
} else {
Expand Down
10 changes: 10 additions & 0 deletions docs/pipeline-api.md
Original file line number Diff line number Diff line change
Expand Up @@ -4638,6 +4638,16 @@ string
<td>
</td>
</tr>
<tr>
<td>
<code>terminationReason</code><br/>
<em>
string
</em>
</td>
<td>
</td>
</tr>
</tbody>
</table>
<h3 id="tekton.dev/v1.StepTemplate">StepTemplate
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/pipeline/v1/openapi_generated.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pkg/apis/pipeline/v1/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,9 @@
"description": "Details about a terminated container",
"$ref": "#/definitions/v1.ContainerStateTerminated"
},
"terminationReason": {
"type": "string"
},
"waiting": {
"description": "Details about a waiting container",
"$ref": "#/definitions/v1.ContainerStateWaiting"
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/pipeline/v1/taskrun_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,7 @@ type StepState struct {
Container string `json:"container,omitempty"`
ImageID string `json:"imageID,omitempty"`
Results []TaskRunStepResult `json:"results,omitempty"`
TerminationReason string `json:"terminationReason,omitempty"`
}

// SidecarState reports the results of running a sidecar in a Task.
Expand Down
38 changes: 26 additions & 12 deletions pkg/entrypoint/entrypointer.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,19 @@ func (e ContextError) Error() string {
return string(e)
}

type SkipError string

func (e SkipError) Error() string {
return string(e)
}

var (
// ErrContextDeadlineExceeded is the error returned when the context deadline is exceeded
ErrContextDeadlineExceeded = ContextError(context.DeadlineExceeded.Error())
// ErrContextCanceled is the error returned when the context is canceled
ErrContextCanceled = ContextError(context.Canceled.Error())
// ErrSkipPreviousStepFailed is the error returned when the step is skipped due to previous step error
ErrSkipPreviousStepFailed = SkipError("error file present, bail and skip the step")
)

// IsContextDeadlineError determine whether the error is context deadline
Expand Down Expand Up @@ -165,6 +173,11 @@ func (e Entrypointer) Go() error {
Value: time.Now().Format(timeFormat),
ResultType: result.InternalTektonResultType,
})

if errors.Is(err, ErrSkipPreviousStepFailed) {
output = append(output, e.outputRunResult(pod.TerminationReasonSkipped))
}

return err
}
}
Expand Down Expand Up @@ -194,26 +207,18 @@ func (e Entrypointer) Go() error {
}
}()
err = e.Runner.Run(ctx, e.Command...)
if errors.Is(err, ErrContextDeadlineExceeded) {
output = append(output, result.RunResult{
Key: "Reason",
Value: "TimeoutExceeded",
ResultType: result.InternalTektonResultType,
})
}
}

var ee *exec.ExitError
switch {
case err != nil && errors.Is(err, ErrContextCanceled):
logger.Info("Step was canceling")
output = append(output, result.RunResult{
Key: "Reason",
Value: "Cancelled",
ResultType: result.InternalTektonResultType,
})
output = append(output, e.outputRunResult(pod.TerminationReasonCancelled))
e.WritePostFile(e.PostFile, ErrContextCanceled)
e.WriteExitCodeFile(e.StepMetadataDir, syscall.SIGKILL.String())
case errors.Is(err, ErrContextDeadlineExceeded):
e.WritePostFile(e.PostFile, err)
output = append(output, e.outputRunResult(pod.TerminationReasonTimeoutExceeded))
case err != nil && e.BreakpointOnFailure:
logger.Info("Skipping writing to PostFile")
case e.OnError == ContinueOnError && errors.As(err, &ee):
Expand Down Expand Up @@ -336,3 +341,12 @@ func (e Entrypointer) waitingCancellation(ctx context.Context, cancel context.Ca
cancel()
return nil
}

// outputRunResult returns the run reason for a termination
func (e Entrypointer) outputRunResult(terminationReason string) result.RunResult {
return result.RunResult{
Key: "Reason",
Value: terminationReason,
ResultType: result.InternalTektonResultType,
}
}
186 changes: 182 additions & 4 deletions pkg/entrypoint/entrypointer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -747,29 +747,203 @@ func TestIsContextCanceledError(t *testing.T) {
}
}

func TestTerminationReason(t *testing.T) {
tests := []struct {
desc string
waitFiles []string
onError string
runError error
expectedRunErr error
expectedExitCode *string
expectedWrotefile *string
expectedStatus []result.RunResult
}{
{
desc: "reason completed",
expectedExitCode: ptr("0"),
expectedWrotefile: ptr("postfile"),
expectedStatus: []result.RunResult{
{
Key: "StartedAt",
ResultType: result.InternalTektonResultType,
},
},
},
{
desc: "reason continued",
onError: ContinueOnError,
runError: ptr(exec.ExitError{}),
expectedRunErr: ptr(exec.ExitError{}),
expectedExitCode: ptr("-1"),
expectedWrotefile: ptr("postfile"),
expectedStatus: []result.RunResult{
{
Key: "ExitCode",
Value: "-1",
ResultType: result.InternalTektonResultType,
},
{
Key: "StartedAt",
ResultType: result.InternalTektonResultType,
},
},
},
{
desc: "reason errored",
runError: ptr(exec.Error{}),
expectedRunErr: ptr(exec.Error{}),
expectedWrotefile: ptr("postfile.err"),
expectedStatus: []result.RunResult{
{
Key: "StartedAt",
ResultType: result.InternalTektonResultType,
},
},
},
{
desc: "reason timedout",
runError: ErrContextDeadlineExceeded,
expectedRunErr: ErrContextDeadlineExceeded,
expectedWrotefile: ptr("postfile.err"),
expectedStatus: []result.RunResult{
{
Key: "Reason",
Value: pod.TerminationReasonTimeoutExceeded,
ResultType: result.InternalTektonResultType,
},
{
Key: "StartedAt",
ResultType: result.InternalTektonResultType,
},
},
},
{
desc: "reason skipped",
waitFiles: []string{"file"},
expectedRunErr: ErrSkipPreviousStepFailed,
expectedWrotefile: ptr("postfile.err"),
expectedStatus: []result.RunResult{
{
Key: "Reason",
Value: pod.TerminationReasonSkipped,
ResultType: result.InternalTektonResultType,
},
{
Key: "StartedAt",
ResultType: result.InternalTektonResultType,
},
},
},
}

for _, test := range tests {
t.Run(test.desc, func(t *testing.T) {
fw, fr, fpw := &fakeWaiter{skipStep: true}, &fakeRunner{runError: test.runError}, &fakePostWriter{}

tmpFolder, err := os.MkdirTemp("", "")
if err != nil {
t.Fatalf("unexpected error creating temporary folder: %v", err)
} else {
defer os.RemoveAll(tmpFolder)
}

terminationFile, err := os.CreateTemp(tmpFolder, "termination")
if err != nil {
t.Fatalf("unexpected error creating termination file: %v", err)
}

e := Entrypointer{
Command: append([]string{}, []string{}...),
WaitFiles: test.waitFiles,
PostFile: "postfile",
Waiter: fw,
Runner: fr,
PostWriter: fpw,
TerminationPath: terminationFile.Name(),
BreakpointOnFailure: false,
StepMetadataDir: tmpFolder,
OnError: test.onError,
}

err = e.Go()

if d := cmp.Diff(test.expectedRunErr, err); d != "" {
t.Fatalf("entrypoint error doesn't match %s", diff.PrintWantGot(d))
}

if d := cmp.Diff(test.expectedExitCode, fpw.exitCode); d != "" {
t.Fatalf("exitCode doesn't match %s", diff.PrintWantGot(d))
}

if d := cmp.Diff(test.expectedWrotefile, fpw.wrote); d != "" {
t.Fatalf("wrote file doesn't match %s", diff.PrintWantGot(d))
}

termination, err := getTermination(t, terminationFile.Name())
if err != nil {
t.Fatalf("error getting termination output: %v", err)
}

if d := cmp.Diff(test.expectedStatus, termination); d != "" {
t.Fatalf("termination status doesn't match %s", diff.PrintWantGot(d))
}
})
}
}

func getTermination(t *testing.T, terminationFile string) ([]result.RunResult, error) {
t.Helper()
fileContents, err := os.ReadFile(terminationFile)
if err != nil {
return nil, err
}

logger, _ := logging.NewLogger("", "status")
terminationStatus, err := termination.ParseMessage(logger, string(fileContents))
if err != nil {
return nil, err
}

for i, termination := range terminationStatus {
if termination.Key == "StartedAt" {
terminationStatus[i].Value = ""
}
}

return terminationStatus, nil
}

type fakeWaiter struct {
sync.Mutex
waited []string
waitCancelDuration time.Duration
skipStep bool
}

func (f *fakeWaiter) Wait(ctx context.Context, file string, _ bool, _ bool) error {
if file == pod.DownwardMountCancelFile && f.waitCancelDuration > 0 {
switch {
case file == pod.DownwardMountCancelFile && f.waitCancelDuration > 0:
time.Sleep(f.waitCancelDuration)
} else if file == pod.DownwardMountCancelFile {
case file == pod.DownwardMountCancelFile:
return nil
case f.skipStep:
return ErrSkipPreviousStepFailed
}

f.Lock()
f.waited = append(f.waited, file)
f.Unlock()
return nil
}

type fakeRunner struct{ args *[]string }
type fakeRunner struct {
args *[]string
runError error
}

func (f *fakeRunner) Run(ctx context.Context, args ...string) error {
f.args = &args
return nil
return f.runError
}

type fakePostWriter struct {
Expand Down Expand Up @@ -903,3 +1077,7 @@ func getMockSpireClient(ctx context.Context) (spire.EntrypointerAPIClient, spire

return sc, sc, tr
}

func ptr[T any](value T) *T {
return &value
}
Loading

0 comments on commit bc8f39b

Please sign in to comment.