From 873edd2223afe2da11b4a04fc364d913fb8724b5 Mon Sep 17 00:00:00 2001 From: Jason Hall Date: Mon, 15 Nov 2021 09:14:21 -0500 Subject: [PATCH] Avoid API server call to get Pod when sidecars are stopped If the TaskRun status indicates that all sidecars are already stopped, there's no need to hit the API server to get the Pod's status to attempt to stop the sidecars. This can avoid a lot of API calls, especially during global resync, when TaskRuns' sidecars are already stopped. This moves metrics.RecordPodLatency to only be called when sidecars are detected to be ready, before starting the first step, instead of any time a completed taskrun is observed (incl during a global resync) --- pkg/reconciler/taskrun/taskrun.go | 36 +++--- pkg/reconciler/taskrun/taskrun_test.go | 170 +++++++++++-------------- 2 files changed, 93 insertions(+), 113 deletions(-) diff --git a/pkg/reconciler/taskrun/taskrun.go b/pkg/reconciler/taskrun/taskrun.go index d053e5b4255..65bf2339601 100644 --- a/pkg/reconciler/taskrun/taskrun.go +++ b/pkg/reconciler/taskrun/taskrun.go @@ -132,24 +132,15 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg return cloudEventErr } - pod, err := c.stopSidecars(ctx, tr) - if err != nil { + if err := c.stopSidecars(ctx, tr); err != nil { return err } go func(metrics *taskrunmetrics.Recorder) { - err := metrics.DurationAndCount(tr) - if err != nil { + if err := metrics.DurationAndCount(tr); err != nil { logger.Warnf("Failed to log the metrics : %v", err) } - if pod != nil { - err = metrics.RecordPodLatency(pod, tr) - if err != nil { - logger.Warnf("Failed to log the metrics : %v", err) - } - } - err = metrics.CloudEvents(tr) - if err != nil { + if err := metrics.CloudEvents(tr); err != nil { logger.Warnf("Failed to log the metrics : %v", err) } }(c.metrics) @@ -203,16 +194,16 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg } return nil } -func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) (*corev1.Pod, error) { +func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) error { logger := logging.FromContext(ctx) // do not continue without knowing the associated pod if tr.Status.PodName == "" { - return nil, nil + return nil } // do not continue if the TaskSpec had no sidecars if tr.Status.TaskSpec != nil && len(tr.Status.TaskSpec.Sidecars) == 0 { - return nil, nil + return nil } // do not continue if the TaskRun was canceled or timed out as this caused the pod to be deleted in failTaskRun @@ -220,10 +211,15 @@ func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) (*co if condition != nil { reason := v1beta1.TaskRunReason(condition.Reason) if reason == v1beta1.TaskRunReasonCancelled || reason == v1beta1.TaskRunReasonTimedOut { - return nil, nil + return nil } } + // do not continue if there are no Running sidecars. + if !podconvert.IsSidecarStatusRunning(tr) { + return nil + } + pod, err := podconvert.StopSidecars(ctx, c.Images.NopImage, c.KubeClientSet, tr.Namespace, tr.Status.PodName) if err == nil { // Check if any SidecarStatuses are still shown as Running after stopping @@ -232,16 +228,15 @@ func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) (*co err = updateStoppedSidecarStatus(ctx, pod, tr, c) } } - if k8serrors.IsNotFound(err) { // At this stage the TaskRun has been completed if the pod is not found, it won't come back, // it has probably evicted. We can return the error, but we consider it a permanent one. - return nil, controller.NewPermanentError(err) + return controller.NewPermanentError(err) } else if err != nil { logger.Errorf("Error stopping sidecars for TaskRun %q: %v", tr.Name, err) tr.Status.MarkResourceFailed(podconvert.ReasonFailedResolution, err) } - return pod, nil + return nil } func (c *Reconciler) finishReconcileUpdateEmitEvents(ctx context.Context, tr *v1beta1.TaskRun, beforeCondition *apis.Condition, previousError error) error { @@ -459,6 +454,9 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1beta1.TaskRun, rtr *re if err := podconvert.UpdateReady(ctx, c.KubeClientSet, *pod); err != nil { return err } + if err := c.metrics.RecordPodLatency(pod, tr); err != nil { + logger.Warnf("Failed to log the metrics : %v", err) + } } // Convert the Pod's status to the equivalent TaskRun Status. diff --git a/pkg/reconciler/taskrun/taskrun_test.go b/pkg/reconciler/taskrun/taskrun_test.go index ed24b0a599b..fee80512052 100644 --- a/pkg/reconciler/taskrun/taskrun_test.go +++ b/pkg/reconciler/taskrun/taskrun_test.go @@ -4309,54 +4309,25 @@ func TestStopSidecars_ClientGetPodForTaskSpecWithSidecars(t *testing.T) { Name: "test-taskrun", Namespace: "foo", }, - Spec: v1beta1.TaskRunSpec{ - TaskSpec: &v1beta1.TaskSpec{ - Steps: []v1beta1.Step{{ - Container: corev1.Container{ - Image: "myimage", - Name: "mycontainer", - Command: []string{"/mycmd"}, - }, - }}, - Sidecars: []v1beta1.Sidecar{{ - Container: corev1.Container{ - Image: "dummy", - Name: "sidecar", - Command: []string{"some-command"}, - }, - }}, - }, - }, Status: v1beta1.TaskRunStatus{ Status: duckv1beta1.Status{ Conditions: duckv1beta1.Conditions{ apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionTrue, - Reason: v1beta1.TaskRunReasonSuccessful.String(), - Message: "", + Type: apis.ConditionSucceeded, + Status: corev1.ConditionTrue, }, }, }, TaskRunStatusFields: v1beta1.TaskRunStatusFields{ PodName: "test-taskrun-pod", StartTime: &metav1.Time{Time: startTime}, - TaskSpec: &v1beta1.TaskSpec{ - Steps: []v1beta1.Step{{ - Container: corev1.Container{ - Image: "myimage", - Name: "mycontainer", - Command: []string{"/mycmd"}, - }, - }}, - Sidecars: []v1beta1.Sidecar{{ - Container: corev1.Container{ - Image: "dummy", - Name: "sidecar", - Command: []string{"some-command"}, + Sidecars: []v1beta1.SidecarState{{ + ContainerState: corev1.ContainerState{ + Running: &corev1.ContainerStateRunning{ + StartedAt: metav1.Time{startTime}, }, - }}, - }, + }, + }}, }, }, } @@ -4384,7 +4355,7 @@ func TestStopSidecars_ClientGetPodForTaskSpecWithSidecars(t *testing.T) { t.Errorf("Expected no error to be returned by reconciler: %v", reconcileErr) } - // Verify that the pod was retrieved + // Verify that the pod was retrieved. getPodFound := false for _, action := range clients.Kube.Actions() { if action.Matches("get", "pods") { @@ -4397,74 +4368,85 @@ func TestStopSidecars_ClientGetPodForTaskSpecWithSidecars(t *testing.T) { } } -func TestStopSidecars_NoClientGetPodForTaskSpecWithoutSidecars(t *testing.T) { +func TestStopSidecars_NoClientGetPodForTaskSpecWithoutRunningSidecars(t *testing.T) { startTime := time.Date(2000, 1, 1, 1, 1, 1, 1, time.UTC) - tr := &v1beta1.TaskRun{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-taskrun", - Namespace: "foo", - Labels: map[string]string{"mylabel": "myvalue"}, - }, - Spec: v1beta1.TaskRunSpec{ - TaskSpec: &v1beta1.TaskSpec{ - Steps: []v1beta1.Step{{ - Container: corev1.Container{ - Image: "myimage", - Name: "mycontainer", - Command: []string{"/mycmd"}, - }, - }}, + + for _, tc := range []struct { + desc string + tr *v1beta1.TaskRun + }{{ + desc: "no sidecars", + tr: &v1beta1.TaskRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-taskrun", + Namespace: "foo", }, - }, - Status: v1beta1.TaskRunStatus{ - Status: duckv1beta1.Status{ - Conditions: duckv1beta1.Conditions{ - apis.Condition{ - Type: apis.ConditionSucceeded, - Status: corev1.ConditionTrue, - Reason: v1beta1.TaskRunReasonSuccessful.String(), - Message: "", - }, + Status: v1beta1.TaskRunStatus{ + Status: duckv1beta1.Status{ + Conditions: duckv1beta1.Conditions{apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionTrue, + }}, + }, + TaskRunStatusFields: v1beta1.TaskRunStatusFields{ + PodName: "test-taskrun-pod", + StartTime: &metav1.Time{Time: startTime}, + Sidecars: []v1beta1.SidecarState{}, }, }, - TaskRunStatusFields: v1beta1.TaskRunStatusFields{ - PodName: "test-taskrun-pod", - StartTime: &metav1.Time{Time: startTime}, - TaskSpec: &v1beta1.TaskSpec{ - Steps: []v1beta1.Step{{ - Container: corev1.Container{ - Image: "myimage", - Name: "mycontainer", - Command: []string{"/mycmd"}, + }, + }, { + desc: "sidecars are terminated", + tr: &v1beta1.TaskRun{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-taskrun", + Namespace: "foo", + }, + Status: v1beta1.TaskRunStatus{ + Status: duckv1beta1.Status{ + Conditions: duckv1beta1.Conditions{apis.Condition{ + Type: apis.ConditionSucceeded, + Status: corev1.ConditionTrue, + }}, + }, + TaskRunStatusFields: v1beta1.TaskRunStatusFields{ + PodName: "test-taskrun-pod", + StartTime: &metav1.Time{Time: startTime}, + Sidecars: []v1beta1.SidecarState{{ + ContainerState: corev1.ContainerState{ + Terminated: &corev1.ContainerStateTerminated{ + StartedAt: metav1.Time{startTime}, + FinishedAt: metav1.Time{startTime}, + }, }, }}, }, }, }, - } - - taskRuns := []*v1beta1.TaskRun{tr} - - d := test.Data{ - TaskRuns: taskRuns, - } + }} { + t.Run(tc.desc, func(t *testing.T) { + d := test.Data{ + TaskRuns: []*v1beta1.TaskRun{tc.tr}, + } - testAssets, cancel := getTaskRunController(t, d) - defer cancel() - c := testAssets.Controller - clients := testAssets.Clients - reconcileErr := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tr)) + testAssets, cancel := getTaskRunController(t, d) + defer cancel() + c := testAssets.Controller + clients := testAssets.Clients + reconcileErr := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tc.tr)) - // We do not expect an error - if reconcileErr != nil { - t.Errorf("Expected no error to be returned by reconciler: %v", reconcileErr) - } + // We do not expect an error + if reconcileErr != nil { + t.Errorf("Expected no error to be returned by reconciler: %v", reconcileErr) + } - // Verify that the pod was not retrieved - for _, action := range clients.Kube.Actions() { - if action.Matches("get", "pods") { - t.Errorf("expected the pod not to be retrieved because the TaskRun has no sidecars") - } + // Verify that the pod was not retrieved + for _, action := range clients.Kube.Actions() { + if action.Matches("get", "pods") { + t.Errorf("expected the pod not to be retrieved because the TaskRun has no sidecars") + } + } + }) } }