Skip to content

Commit

Permalink
Avoid API server call to get Pod when sidecars are stopped
Browse files Browse the repository at this point in the history
If the TaskRun status indicates that all sidecars are already stopped,
there's no need to hit the API server to get the Pod's status to attempt
to stop the sidecars.

This can avoid a lot of API calls, especially during global resync, when
TaskRuns' sidecars are already stopped.

This moves metrics.RecordPodLatency to only be called when sidecars are
detected to be ready, before starting the first step, instead of any
time a completed taskrun is observed (incl during a global resync)
  • Loading branch information
imjasonh authored and tekton-robot committed Jan 4, 2022
1 parent 2ee1383 commit 873edd2
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 113 deletions.
36 changes: 17 additions & 19 deletions pkg/reconciler/taskrun/taskrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,15 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg
return cloudEventErr
}

pod, err := c.stopSidecars(ctx, tr)
if err != nil {
if err := c.stopSidecars(ctx, tr); err != nil {
return err
}

go func(metrics *taskrunmetrics.Recorder) {
err := metrics.DurationAndCount(tr)
if err != nil {
if err := metrics.DurationAndCount(tr); err != nil {
logger.Warnf("Failed to log the metrics : %v", err)
}
if pod != nil {
err = metrics.RecordPodLatency(pod, tr)
if err != nil {
logger.Warnf("Failed to log the metrics : %v", err)
}
}
err = metrics.CloudEvents(tr)
if err != nil {
if err := metrics.CloudEvents(tr); err != nil {
logger.Warnf("Failed to log the metrics : %v", err)
}
}(c.metrics)
Expand Down Expand Up @@ -203,27 +194,32 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, tr *v1beta1.TaskRun) pkg
}
return nil
}
func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) (*corev1.Pod, error) {
func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) error {
logger := logging.FromContext(ctx)
// do not continue without knowing the associated pod
if tr.Status.PodName == "" {
return nil, nil
return nil
}

// do not continue if the TaskSpec had no sidecars
if tr.Status.TaskSpec != nil && len(tr.Status.TaskSpec.Sidecars) == 0 {
return nil, nil
return nil
}

// do not continue if the TaskRun was canceled or timed out as this caused the pod to be deleted in failTaskRun
condition := tr.Status.GetCondition(apis.ConditionSucceeded)
if condition != nil {
reason := v1beta1.TaskRunReason(condition.Reason)
if reason == v1beta1.TaskRunReasonCancelled || reason == v1beta1.TaskRunReasonTimedOut {
return nil, nil
return nil
}
}

// do not continue if there are no Running sidecars.
if !podconvert.IsSidecarStatusRunning(tr) {
return nil
}

pod, err := podconvert.StopSidecars(ctx, c.Images.NopImage, c.KubeClientSet, tr.Namespace, tr.Status.PodName)
if err == nil {
// Check if any SidecarStatuses are still shown as Running after stopping
Expand All @@ -232,16 +228,15 @@ func (c *Reconciler) stopSidecars(ctx context.Context, tr *v1beta1.TaskRun) (*co
err = updateStoppedSidecarStatus(ctx, pod, tr, c)
}
}

if k8serrors.IsNotFound(err) {
// At this stage the TaskRun has been completed if the pod is not found, it won't come back,
// it has probably evicted. We can return the error, but we consider it a permanent one.
return nil, controller.NewPermanentError(err)
return controller.NewPermanentError(err)
} else if err != nil {
logger.Errorf("Error stopping sidecars for TaskRun %q: %v", tr.Name, err)
tr.Status.MarkResourceFailed(podconvert.ReasonFailedResolution, err)
}
return pod, nil
return nil
}

func (c *Reconciler) finishReconcileUpdateEmitEvents(ctx context.Context, tr *v1beta1.TaskRun, beforeCondition *apis.Condition, previousError error) error {
Expand Down Expand Up @@ -459,6 +454,9 @@ func (c *Reconciler) reconcile(ctx context.Context, tr *v1beta1.TaskRun, rtr *re
if err := podconvert.UpdateReady(ctx, c.KubeClientSet, *pod); err != nil {
return err
}
if err := c.metrics.RecordPodLatency(pod, tr); err != nil {
logger.Warnf("Failed to log the metrics : %v", err)
}
}

// Convert the Pod's status to the equivalent TaskRun Status.
Expand Down
170 changes: 76 additions & 94 deletions pkg/reconciler/taskrun/taskrun_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4309,54 +4309,25 @@ func TestStopSidecars_ClientGetPodForTaskSpecWithSidecars(t *testing.T) {
Name: "test-taskrun",
Namespace: "foo",
},
Spec: v1beta1.TaskRunSpec{
TaskSpec: &v1beta1.TaskSpec{
Steps: []v1beta1.Step{{
Container: corev1.Container{
Image: "myimage",
Name: "mycontainer",
Command: []string{"/mycmd"},
},
}},
Sidecars: []v1beta1.Sidecar{{
Container: corev1.Container{
Image: "dummy",
Name: "sidecar",
Command: []string{"some-command"},
},
}},
},
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{
apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
Reason: v1beta1.TaskRunReasonSuccessful.String(),
Message: "",
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
},
},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
PodName: "test-taskrun-pod",
StartTime: &metav1.Time{Time: startTime},
TaskSpec: &v1beta1.TaskSpec{
Steps: []v1beta1.Step{{
Container: corev1.Container{
Image: "myimage",
Name: "mycontainer",
Command: []string{"/mycmd"},
},
}},
Sidecars: []v1beta1.Sidecar{{
Container: corev1.Container{
Image: "dummy",
Name: "sidecar",
Command: []string{"some-command"},
Sidecars: []v1beta1.SidecarState{{
ContainerState: corev1.ContainerState{
Running: &corev1.ContainerStateRunning{
StartedAt: metav1.Time{startTime},
},
}},
},
},
}},
},
},
}
Expand Down Expand Up @@ -4384,7 +4355,7 @@ func TestStopSidecars_ClientGetPodForTaskSpecWithSidecars(t *testing.T) {
t.Errorf("Expected no error to be returned by reconciler: %v", reconcileErr)
}

// Verify that the pod was retrieved
// Verify that the pod was retrieved.
getPodFound := false
for _, action := range clients.Kube.Actions() {
if action.Matches("get", "pods") {
Expand All @@ -4397,74 +4368,85 @@ func TestStopSidecars_ClientGetPodForTaskSpecWithSidecars(t *testing.T) {
}
}

func TestStopSidecars_NoClientGetPodForTaskSpecWithoutSidecars(t *testing.T) {
func TestStopSidecars_NoClientGetPodForTaskSpecWithoutRunningSidecars(t *testing.T) {
startTime := time.Date(2000, 1, 1, 1, 1, 1, 1, time.UTC)
tr := &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{
Name: "test-taskrun",
Namespace: "foo",
Labels: map[string]string{"mylabel": "myvalue"},
},
Spec: v1beta1.TaskRunSpec{
TaskSpec: &v1beta1.TaskSpec{
Steps: []v1beta1.Step{{
Container: corev1.Container{
Image: "myimage",
Name: "mycontainer",
Command: []string{"/mycmd"},
},
}},

for _, tc := range []struct {
desc string
tr *v1beta1.TaskRun
}{{
desc: "no sidecars",
tr: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{
Name: "test-taskrun",
Namespace: "foo",
},
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{
apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
Reason: v1beta1.TaskRunReasonSuccessful.String(),
Message: "",
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
}},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
PodName: "test-taskrun-pod",
StartTime: &metav1.Time{Time: startTime},
Sidecars: []v1beta1.SidecarState{},
},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
PodName: "test-taskrun-pod",
StartTime: &metav1.Time{Time: startTime},
TaskSpec: &v1beta1.TaskSpec{
Steps: []v1beta1.Step{{
Container: corev1.Container{
Image: "myimage",
Name: "mycontainer",
Command: []string{"/mycmd"},
},
}, {
desc: "sidecars are terminated",
tr: &v1beta1.TaskRun{
ObjectMeta: metav1.ObjectMeta{
Name: "test-taskrun",
Namespace: "foo",
},
Status: v1beta1.TaskRunStatus{
Status: duckv1beta1.Status{
Conditions: duckv1beta1.Conditions{apis.Condition{
Type: apis.ConditionSucceeded,
Status: corev1.ConditionTrue,
}},
},
TaskRunStatusFields: v1beta1.TaskRunStatusFields{
PodName: "test-taskrun-pod",
StartTime: &metav1.Time{Time: startTime},
Sidecars: []v1beta1.SidecarState{{
ContainerState: corev1.ContainerState{
Terminated: &corev1.ContainerStateTerminated{
StartedAt: metav1.Time{startTime},
FinishedAt: metav1.Time{startTime},
},
},
}},
},
},
},
}

taskRuns := []*v1beta1.TaskRun{tr}

d := test.Data{
TaskRuns: taskRuns,
}
}} {
t.Run(tc.desc, func(t *testing.T) {
d := test.Data{
TaskRuns: []*v1beta1.TaskRun{tc.tr},
}

testAssets, cancel := getTaskRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients
reconcileErr := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tr))
testAssets, cancel := getTaskRunController(t, d)
defer cancel()
c := testAssets.Controller
clients := testAssets.Clients
reconcileErr := c.Reconciler.Reconcile(testAssets.Ctx, getRunName(tc.tr))

// We do not expect an error
if reconcileErr != nil {
t.Errorf("Expected no error to be returned by reconciler: %v", reconcileErr)
}
// We do not expect an error
if reconcileErr != nil {
t.Errorf("Expected no error to be returned by reconciler: %v", reconcileErr)
}

// Verify that the pod was not retrieved
for _, action := range clients.Kube.Actions() {
if action.Matches("get", "pods") {
t.Errorf("expected the pod not to be retrieved because the TaskRun has no sidecars")
}
// Verify that the pod was not retrieved
for _, action := range clients.Kube.Actions() {
if action.Matches("get", "pods") {
t.Errorf("expected the pod not to be retrieved because the TaskRun has no sidecars")
}
}
})
}
}

Expand Down

0 comments on commit 873edd2

Please sign in to comment.