diff --git a/docs/events.md b/docs/events.md new file mode 100644 index 00000000000..add381754cc --- /dev/null +++ b/docs/events.md @@ -0,0 +1,39 @@ + +# Events + +Tekton runtime resources, specifically `TaskRuns` and `PipelineRuns`, +emit events when they are executed, so that users can monitor their lifecycle +and react to it. Tekton emits [kubernetes events](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.18/#event-v1-core), that can be retrieve from the resource via +`kubectl describe [resource]`. + +No events are emitted for `Conditions` today (https://github.com/tektoncd/pipeline/issues/2461). + +## TaskRuns + +`TaskRun` events are generated for the following `Reasons`: +- `Started`: this is triggered the first time the `TaskRun` is picked by the + reconciler from its work queue, so it only happens if web-hook validation was + successful. Note that this event does not imply that a step started executing, + as several conditions must be met first: + - task and bound resource validation must be successful + - attached conditions must run successfully + - the `Pod` associated to the `TaskRun` must be successfully scheduled +- `Succeeded`: this is triggered once all steps in the `TaskRun` are executed + successfully, including post-steps injected by Tekton. +- `Failed`: this is triggered if the `TaskRun` is completed, but not successfully. + Causes of failure may be: one the steps failed, the `TaskRun` was cancelled or + the `TaskRun` timed out. + +## PipelineRuns + +`PipelineRun` events are generated for the following `Reasons`: +- `Succeeded`: this is triggered once all `Tasks` reachable via the DAG are + executed successfully. +- `Failed`: this is triggered if the `PipelineRun` is completed, but not + successfully. Causes of failure may be: one the `Tasks` failed or the + `PipelineRun` was cancelled. diff --git a/docs/pipelineruns.md b/docs/pipelineruns.md index 05b2a69469f..89d09fb4944 100644 --- a/docs/pipelineruns.md +++ b/docs/pipelineruns.md @@ -18,6 +18,8 @@ weight: 4 - [Specifying `LimitRange` values](#specifying-limitrange-values) - [Configuring a failure timeout](#configuring-a-failure-timeout) - [Cancelling a `PipelineRun`](#cancelling-a-pipelinerun) +- [Events](events.md#pipelineruns) + ## Overview diff --git a/docs/taskruns.md b/docs/taskruns.md index ce27a5b457d..6d65114a9bf 100644 --- a/docs/taskruns.md +++ b/docs/taskruns.md @@ -22,6 +22,7 @@ weight: 2 - [Monitoring `Steps`](#monitoring-steps) - [Monitoring `Results`](#monitoring-results) - [Cancelling a `TaskRun`](#cancelling-a-taskrun) +- [Events](events.md#taskruns) - [Code examples](#code-examples) - [Example `TaskRun` with a referenced `Task`](#example-taskrun-with-a-referenced-task) - [Example `TaskRun` with an embedded `Task`](#example-taskrun-with-an-embedded-task) diff --git a/pkg/reconciler/event.go b/pkg/reconciler/event.go index e35560b88ef..39c46019643 100644 --- a/pkg/reconciler/event.go +++ b/pkg/reconciler/event.go @@ -17,6 +17,7 @@ package reconciler import ( corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" "knative.dev/pkg/apis" @@ -27,20 +28,25 @@ const ( EventReasonSucceded = "Succeeded" // EventReasonFailed is the reason set for events about unsuccessful completion of TaskRuns / PipelineRuns EventReasonFailed = "Failed" + // EventReasonStarted is the reason set for events about the start of TaskRuns / PipelineRuns + EventReasonStarted = "Started" ) -// EmitEvent emits success or failed event for object -// if afterCondition is different from beforeCondition +// EmitEvent emits an event for object if afterCondition is different from beforeCondition func EmitEvent(c record.EventRecorder, beforeCondition *apis.Condition, afterCondition *apis.Condition, object runtime.Object) { - if beforeCondition != afterCondition && afterCondition != nil { - // Create events when the obj result is in. + if !equality.Semantic.DeepEqual(beforeCondition, afterCondition) && afterCondition != nil { + // If the condition changed, and the target condition is not empty, we send an event switch afterCondition.Status { case corev1.ConditionTrue: c.Event(object, corev1.EventTypeNormal, EventReasonSucceded, afterCondition.Message) - case corev1.ConditionUnknown: - c.Event(object, corev1.EventTypeNormal, afterCondition.Reason, afterCondition.Message) case corev1.ConditionFalse: c.Event(object, corev1.EventTypeWarning, EventReasonFailed, afterCondition.Message) + case corev1.ConditionUnknown: + if beforeCondition == nil { + // If the condition changed, the status is "unknown", and there was no condition before, + // we emit the "Started event". We ignore further updates of the "uknown" status. + c.Event(object, corev1.EventTypeNormal, EventReasonStarted, "") + } } } } diff --git a/pkg/reconciler/event_test.go b/pkg/reconciler/event_test.go index f6c464c23c7..dd21527c684 100644 --- a/pkg/reconciler/event_test.go +++ b/pkg/reconciler/event_test.go @@ -27,21 +27,24 @@ import ( func TestEmitEvent(t *testing.T) { testcases := []struct { - name string - before *apis.Condition - after *apis.Condition - expectEvent bool + name string + before *apis.Condition + after *apis.Condition + expectEvent bool + expectedEvent string }{{ - name: "unknown to true", + name: "unknown to true with message", before: &apis.Condition{ Type: apis.ConditionSucceeded, Status: corev1.ConditionUnknown, }, after: &apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionTrue, + Type: apis.ConditionSucceeded, + Status: corev1.ConditionTrue, + Message: "all done", }, - expectEvent: true, + expectEvent: true, + expectedEvent: "Normal Succeeded all done", }, { name: "true to true", before: &apis.Condition{ @@ -52,7 +55,8 @@ func TestEmitEvent(t *testing.T) { Type: apis.ConditionSucceeded, Status: corev1.ConditionTrue, }, - expectEvent: false, + expectEvent: false, + expectedEvent: "", }, { name: "false to false", before: &apis.Condition{ @@ -63,7 +67,22 @@ func TestEmitEvent(t *testing.T) { Type: apis.ConditionSucceeded, Status: corev1.ConditionFalse, }, - expectEvent: false, + expectEvent: false, + expectedEvent: "", + }, { + name: "unknown to unknown", + before: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + Reason: "foo", + }, + after: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + Reason: "bar", + }, + expectEvent: false, + expectedEvent: "", }, { name: "true to nil", after: nil, @@ -71,7 +90,8 @@ func TestEmitEvent(t *testing.T) { Type: apis.ConditionSucceeded, Status: corev1.ConditionTrue, }, - expectEvent: true, + expectEvent: false, + expectedEvent: "", }, { name: "nil to true", before: nil, @@ -79,7 +99,40 @@ func TestEmitEvent(t *testing.T) { Type: apis.ConditionSucceeded, Status: corev1.ConditionTrue, }, - expectEvent: true, + expectEvent: true, + expectedEvent: "Normal Succeeded ", + }, { + name: "nil to unknown with message", + before: nil, + after: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + Message: "just starting", + }, + expectEvent: true, + expectedEvent: "Normal Started ", + }, { + name: "unknown to false with message", + before: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionUnknown, + }, + after: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionFalse, + Message: "really bad", + }, + expectEvent: true, + expectedEvent: "Warning Failed really bad", + }, { + name: "nil to false", + before: nil, + after: &apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionFalse, + }, + expectEvent: true, + expectedEvent: "Warning Failed ", }} for _, ts := range testcases { @@ -90,12 +143,21 @@ func TestEmitEvent(t *testing.T) { select { case event := <-fr.Events: - if ts.expectEvent && event == "" { - t.Errorf("Expected event but got empty for %s", ts.name) + if event == "" { + // The fake recorder reported empty, it should not happen + t.Fatalf("Expected event but got empty for %s", ts.name) } - case <-timer.C: if !ts.expectEvent { - t.Errorf("Unexpected event but got for %s", ts.name) + // The fake recorder reported an event which we did not expect + t.Errorf("Unxpected event \"%s\" but got one for %s", event, ts.name) + } + if !(event == ts.expectedEvent) { + t.Errorf("Expected event \"%s\" but got \"%s\" instead for %s", ts.expectedEvent, event, ts.name) + } + case <-timer.C: + if ts.expectEvent { + // The fake recorder did not report, the timer imeout expired + t.Errorf("Expected event but got none for %s", ts.name) } } } diff --git a/pkg/reconciler/taskrun/cancel.go b/pkg/reconciler/taskrun/cancel.go deleted file mode 100644 index 8eff915d7da..00000000000 --- a/pkg/reconciler/taskrun/cancel.go +++ /dev/null @@ -1,77 +0,0 @@ -/* -Copyright 2019 The Tekton Authors - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package taskrun - -import ( - "fmt" - "time" - - "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1" - podconvert "github.com/tektoncd/pipeline/pkg/pod" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "knative.dev/pkg/apis" -) - -type logger interface { - Warn(args ...interface{}) - Warnf(template string, args ...interface{}) - Infof(template string, args ...interface{}) -} - -func killTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, - logger logger, reason, message string) error { - - logger.Warn("stopping task run %q because of %q", tr.Name, reason) - tr.Status.SetCondition(&apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionFalse, - Reason: reason, - Message: message, - }) - - // update tr completed time - tr.Status.CompletionTime = &metav1.Time{Time: time.Now()} - - if tr.Status.PodName == "" { - logger.Warnf("task run %q has no pod running yet", tr.Name) - return nil - } - - // tr.Status.PodName will be empty if the pod was never successfully created. This condition - // can be reached, for example, by the pod never being schedulable due to limits imposed by - // a namespace's ResourceQuota. - err := clientSet.CoreV1().Pods(tr.Namespace).Delete(tr.Status.PodName, &metav1.DeleteOptions{}) - if err != nil && !errors.IsNotFound(err) { - logger.Warnf("Failed to terminate pod: %v", err) - return err - } - return nil -} - -// cancelTaskRun marks the TaskRun as cancelled and delete pods linked to it. -func cancelTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error { - message := fmt.Sprintf("TaskRun %q was cancelled", tr.Name) - return killTaskRun(tr, clientSet, logger, "TaskRunCancelled", message) -} - -func timeoutTaskRun(tr *v1alpha1.TaskRun, clientSet kubernetes.Interface, logger logger) error { - message := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, tr.Spec.Timeout.Duration.String()) - return killTaskRun(tr, clientSet, logger, podconvert.ReasonTimedOut, message) -} diff --git a/pkg/reconciler/taskrun/controller.go b/pkg/reconciler/taskrun/controller.go index 4642b801947..193cc8a3b29 100644 --- a/pkg/reconciler/taskrun/controller.go +++ b/pkg/reconciler/taskrun/controller.go @@ -44,6 +44,7 @@ const ( resyncPeriod = 10 * time.Hour ) +// NewController instantiates a new controller.Impl from knative.dev/pkg/controller func NewController(images pipeline.Images) func(context.Context, configmap.Watcher) *controller.Impl { return func(ctx context.Context, cmw configmap.Watcher) *controller.Impl { logger := logging.FromContext(ctx) diff --git a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go index 04fde4fbc85..694a3ebe2fc 100644 --- a/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go +++ b/pkg/reconciler/taskrun/resources/cloudevent/cloud_event_controller.go @@ -66,8 +66,7 @@ func cloudEventDeliveryFromTargets(targets []string) []v1alpha1.CloudEventDelive } // SendCloudEvents is used by the TaskRun controller to send cloud events once -// the TaskRun is complete. `tr` is used to obtain the list of targets but also -// to construct the body of the +// the TaskRun is complete. `tr` is used to obtain the list of targets func SendCloudEvents(tr *v1alpha1.TaskRun, ceclient CEClient, logger *zap.SugaredLogger) error { logger = logger.With(zap.String("taskrun", tr.Name)) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index e3f109410ff..d3e13e4c5fa 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -103,11 +103,15 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { // If the TaskRun is just starting, this will also set the starttime, // from which the timeout will immediately begin counting down. - tr.Status.InitializeConditions() - // In case node time was not synchronized, when controller has been scheduled to other nodes. - if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 { - c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime) - tr.Status.StartTime = &tr.CreationTimestamp + if !tr.HasStarted() { + tr.Status.InitializeConditions() + // In case node time was not synchronized, when controller has been scheduled to other nodes. + if tr.Status.StartTime.Sub(tr.CreationTimestamp.Time) < 0 { + c.Logger.Warnf("TaskRun %s createTimestamp %s is after the taskRun started %s", tr.GetRunKey(), tr.CreationTimestamp, tr.Status.StartTime) + tr.Status.StartTime = &tr.CreationTimestamp + } + // Run asnyc startup hooks + go c.preRunAsyncHook(ctx, tr) } // If the TaskRun is complete, run some post run fixtures when applicable @@ -161,36 +165,20 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { // If the TaskRun is cancelled, kill resources and update status if tr.IsCancelled() { before := tr.Status.GetCondition(apis.ConditionSucceeded) -<<<<<<< HEAD message := fmt.Sprintf("TaskRun %q was cancelled", tr.Name) err := c.failTaskRun(tr, v1beta1.TaskRunReasonCancelled, message) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + go c.postRunAsyncHook(ctx, tr, before) return multierror.Append(err, c.updateStatusLabelsAndAnnotations(tr, original)).ErrorOrNil() -======= - err := cancelTaskRun(tr, c.KubeClientSet, c.Logger) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) - return err ->>>>>>> Consolidate cancel and timeout logic } // Check if the TaskRun has timed out; if it is, this will set its status // accordingly. if tr.HasTimedOut() { before := tr.Status.GetCondition(apis.ConditionSucceeded) -<<<<<<< HEAD message := fmt.Sprintf("TaskRun %q failed to finish within %q", tr.Name, tr.GetTimeout()) err := c.failTaskRun(tr, podconvert.ReasonTimedOut, message) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + go c.postRunAsyncHook(ctx, tr, before) return multierror.Append(err, c.updateStatusLabelsAndAnnotations(tr, original)).ErrorOrNil() -======= - err := timeoutTaskRun(tr, c.KubeClientSet, c.Logger) - after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) - return err ->>>>>>> Consolidate cancel and timeout logic } // prepare fetches all required resources, validates them together with the @@ -215,13 +203,42 @@ func (c *Reconciler) Reconcile(ctx context.Context, key string) error { c.Logger.Errorf("Reconcile error: %v", err.Error()) } - // Emit events (only when ConditionSucceeded was changed) + // Run post run hook (Emit events) (only when ConditionSucceeded was changed) after := tr.Status.GetCondition(apis.ConditionSucceeded) - reconciler.EmitEvent(c.Recorder, before, after, tr) + // If after is different from before and status is not Unknown, the taskrun + // has completed its work - except for post-run tasks like emitting events, + // recording metrics, sending cloud events. + // Once tr.isDone becomes true, even when this key is queued, `reconcile` + // won't be invoked so we won't pass through here again + if tr.IsDone() && after != before { + go c.postRunAsyncHook(ctx, tr, before) + } else { + reconciler.EmitEvent(c.Recorder, before, after, tr) + } return multierror.Append(err, c.updateStatusLabelsAndAnnotations(tr, original)).ErrorOrNil() } +// Run any async logic that may be required at start-up time. This method is used +// to emit events, notifications or any other async operation +func (c *Reconciler) preRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun) { + c.Logger.Infof("preRunAsyncHook: %s", tr.Name) + + // Emit event + afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) + reconciler.EmitEvent(c.Recorder, nil, afterCondition, tr) +} + +// Run any async logic that may be required once the tr is successfully reconciled +// This method is used to emit events, notifications or any other async operation +func (c *Reconciler) postRunAsyncHook(ctx context.Context, tr *v1alpha1.TaskRun, beforeCondition *apis.Condition) { + c.Logger.Infof("postRunAsyncHook: %s", tr.Name) + + // Emit event + afterCondition := tr.Status.GetCondition(apis.ConditionSucceeded) + reconciler.EmitEvent(c.Recorder, beforeCondition, afterCondition, tr) +} + // `prepare` fetches resources the taskrun depends on, runs validation and convertion // It may report errors back to Reconcile, it updates the taskrun status in case of // error but it does not sync updates back to etcd. It does not emit events. @@ -237,10 +254,6 @@ func (c *Reconciler) prepare(ctx context.Context, tr *v1alpha1.TaskRun) (*v1alph // and may not have had all of the assumed default specified. tr.SetDefaults(contexts.WithUpgradeViaDefaulting(ctx)) - if tr.Spec.Timeout == nil { - tr.Spec.Timeout = &metav1.Duration{Duration: config.DefaultTimeoutMinutes * time.Minute} - } - if err := tr.ConvertTo(ctx, &v1beta1.TaskRun{}); err != nil { if ce, ok := err.(*v1beta1.CannotConvertError); ok { tr.Status.MarkResourceNotConvertible(ce) @@ -267,7 +280,7 @@ func (c *Reconciler) prepare(ctx context.Context, tr *v1alpha1.TaskRun) (*v1alph // Propagate labels from Task to TaskRun. if tr.ObjectMeta.Labels == nil { - tr.ObjectMeta.Labels = make(map[string]string, len(taskMeta.Labels)+1) + tr.ObjectMeta.Labels = make(map[st ring]string, len(taskMeta.Labels)+1) } for key, value := range taskMeta.Labels { tr.ObjectMeta.Labels[key] = value @@ -406,7 +419,6 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1alpha1.TaskRun, } c.Logger.Infof("Successfully reconciled taskrun %s/%s with status: %#v", tr.Name, tr.Namespace, tr.Status.GetCondition(apis.ConditionSucceeded)) - return nil }