Skip to content

Commit

Permalink
Use FilteredPodInformer to watch Tekton pods
Browse files Browse the repository at this point in the history
There was an outage in our environment, where the Tekton controller were
impacted. Most of the controllers were killed by OOM, and restarted
continuously. The root cause is one end-user ran a large number of pods,
as a result there were ~8k completed pods in one namespace. And one pod
had 100K size as there was a big env variable in the pod spec. Most of
the controllers loaded all pods in the cluster into controller informer,
which resulted in the aforementioned OOM scenario.

Switch to filtered pod informers and use Tekton specific label to only
load Tekton managed pods into the informer cache.

Signed-off-by: Matthias Diester <matthias.diester@de.ibm.com>
  • Loading branch information
zhangtbj authored and tekton-robot committed Feb 4, 2021
1 parent 9850785 commit fbceaae
Show file tree
Hide file tree
Showing 16 changed files with 262 additions and 74 deletions.
3 changes: 3 additions & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@ import (
"os"

"github.com/tektoncd/pipeline/pkg/apis/pipeline"
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
"github.com/tektoncd/pipeline/pkg/reconciler/pipelinerun"
"github.com/tektoncd/pipeline/pkg/reconciler/taskrun"
"github.com/tektoncd/pipeline/pkg/version"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/rest"
filteredinformerfactory "knative.dev/pkg/client/injection/kube/informers/factory/filtered"
"knative.dev/pkg/controller"
"knative.dev/pkg/injection"
"knative.dev/pkg/injection/sharedmain"
Expand Down Expand Up @@ -109,6 +111,7 @@ func main() {
log.Fatal(http.ListenAndServe(":"+port, mux))
}()

ctx = filteredinformerfactory.WithSelectors(ctx, v1beta1.ManagedByLabelKey)
sharedmain.MainWithConfig(ctx, ControllerLogKey, cfg,
taskrun.NewController(*namespace, images),
pipelinerun.NewController(*namespace, images),
Expand Down
6 changes: 3 additions & 3 deletions pkg/apis/pipeline/v1alpha1/taskrun_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (

var _ apis.Defaultable = (*TaskRun)(nil)

const managedByLabelKey = "app.kubernetes.io/managed-by"
const ManagedByLabelKey = "app.kubernetes.io/managed-by"

func (tr *TaskRun) SetDefaults(ctx context.Context) {
ctx = apis.WithinParent(ctx, tr.ObjectMeta)
Expand All @@ -41,8 +41,8 @@ func (tr *TaskRun) SetDefaults(ctx context.Context) {
if tr.ObjectMeta.Labels == nil {
tr.ObjectMeta.Labels = map[string]string{}
}
if _, found := tr.ObjectMeta.Labels[managedByLabelKey]; !found {
tr.ObjectMeta.Labels[managedByLabelKey] = cfg.Defaults.DefaultManagedByLabelValue
if _, found := tr.ObjectMeta.Labels[ManagedByLabelKey]; !found {
tr.ObjectMeta.Labels[ManagedByLabelKey] = cfg.Defaults.DefaultManagedByLabelValue
}
}

Expand Down
6 changes: 3 additions & 3 deletions pkg/apis/pipeline/v1beta1/taskrun_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (

var _ apis.Defaultable = (*TaskRun)(nil)

const managedByLabelKey = "app.kubernetes.io/managed-by"
const ManagedByLabelKey = "app.kubernetes.io/managed-by"

func (tr *TaskRun) SetDefaults(ctx context.Context) {
ctx = apis.WithinParent(ctx, tr.ObjectMeta)
Expand All @@ -39,8 +39,8 @@ func (tr *TaskRun) SetDefaults(ctx context.Context) {
if tr.ObjectMeta.Labels == nil {
tr.ObjectMeta.Labels = map[string]string{}
}
if _, found := tr.ObjectMeta.Labels[managedByLabelKey]; !found {
tr.ObjectMeta.Labels[managedByLabelKey] = cfg.Defaults.DefaultManagedByLabelValue
if _, found := tr.ObjectMeta.Labels[ManagedByLabelKey]; !found {
tr.ObjectMeta.Labels[ManagedByLabelKey] = cfg.Defaults.DefaultManagedByLabelValue
}
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/reconciler/pipelinerun/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ import (
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
fakepipelineruninformer "github.com/tektoncd/pipeline/pkg/client/injection/informers/pipeline/v1beta1/pipelinerun/fake"
"github.com/tektoncd/pipeline/pkg/names"
ttesting "github.com/tektoncd/pipeline/pkg/reconciler/testing"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/apis"
duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
"knative.dev/pkg/metrics/metricstest" // Required to setup metrics env for testing
_ "knative.dev/pkg/metrics/testing"
rtesting "knative.dev/pkg/reconciler/testing"
)

var (
Expand Down Expand Up @@ -154,7 +154,7 @@ func TestRecordRunningPipelineRunsCount(t *testing.T) {
}
}

ctx, _ := rtesting.SetupFakeContext(t)
ctx, _ := ttesting.SetupFakeContext(t)
informer := fakepipelineruninformer.Get(ctx)
// Add N randomly-named PipelineRuns with differently-succeeded statuses.
for _, tr := range []*v1beta1.PipelineRun{
Expand Down
4 changes: 2 additions & 2 deletions pkg/reconciler/taskrun/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/cache"
kubeclient "knative.dev/pkg/client/injection/kube/client"
podinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/pod"
filteredpodinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/pod/filtered"
"knative.dev/pkg/configmap"
"knative.dev/pkg/controller"
"knative.dev/pkg/kmeta"
Expand All @@ -52,7 +52,7 @@ func NewController(namespace string, images pipeline.Images) func(context.Contex
taskRunInformer := taskruninformer.Get(ctx)
taskInformer := taskinformer.Get(ctx)
clusterTaskInformer := clustertaskinformer.Get(ctx)
podInformer := podinformer.Get(ctx)
podInformer := filteredpodinformer.Get(ctx, v1beta1.ManagedByLabelKey)
resourceInformer := resourceinformer.Get(ctx)
metrics, err := NewRecorder()
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/reconciler/taskrun/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ import (
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
faketaskruninformer "github.com/tektoncd/pipeline/pkg/client/injection/informers/pipeline/v1beta1/taskrun/fake"
"github.com/tektoncd/pipeline/pkg/names"
ttesting "github.com/tektoncd/pipeline/pkg/reconciler/testing"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/apis"
duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
"knative.dev/pkg/metrics/metricstest"
_ "knative.dev/pkg/metrics/testing"
rtesting "knative.dev/pkg/reconciler/testing"
)

var (
Expand Down Expand Up @@ -243,7 +243,7 @@ func TestRecordRunningTaskRunsCount(t *testing.T) {
}
}

ctx, _ := rtesting.SetupFakeContext(t)
ctx, _ := ttesting.SetupFakeContext(t)
informer := faketaskruninformer.Get(ctx)
// Add N randomly-named TaskRuns with differently-succeeded statuses.
for _, tr := range []*v1beta1.TaskRun{
Expand Down
26 changes: 24 additions & 2 deletions pkg/reconciler/testing/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,50 @@ import (
"context"
"testing"

"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/record"
filteredinformerfactory "knative.dev/pkg/client/injection/kube/informers/factory/filtered"
"knative.dev/pkg/injection"
logtesting "knative.dev/pkg/logging/testing"

"github.com/tektoncd/pipeline/pkg/reconciler/events/cloudevent"
"go.uber.org/zap"
"go.uber.org/zap/zaptest"
"knative.dev/pkg/controller"
"knative.dev/pkg/logging"
rtesting "knative.dev/pkg/reconciler/testing"

// Import for creating fake filtered factory in the test
_ "knative.dev/pkg/client/injection/kube/informers/factory/filtered/fake"
)

// SetupFakeContext sets up the the Context and the fake filtered informers for the tests.
func SetupFakeContext(t *testing.T) (context.Context, []controller.Informer) {
ctx, informer := rtesting.SetupFakeContext(t)
ctx, _, informer := SetupFakeContextWithLabelKey(t)
cloudEventClientBehaviour := cloudevent.FakeClientBehaviour{
SendSuccessfully: true,
}
ctx = cloudevent.WithClient(ctx, &cloudEventClientBehaviour)
return WithLogger(ctx, t), informer
}

// WithLogger returns the the Logger
func WithLogger(ctx context.Context, t *testing.T) context.Context {
return logging.WithLogger(ctx, TestLogger(t))
}

// TestLogger sets up the the Logger
func TestLogger(t *testing.T) *zap.SugaredLogger {
logger := zaptest.NewLogger(t, zaptest.WrapOptions(zap.AddCaller()))
return logger.Sugar().Named(t.Name())
}

// SetupFakeContextWithLabelKey sets up the the Context and the fake informers for the tests
// The provided context includes the FilteredInformerFactory LabelKey.
func SetupFakeContextWithLabelKey(t zaptest.TestingT) (context.Context, context.CancelFunc, []controller.Informer) {
ctx, c := context.WithCancel(logtesting.TestContextWithLogger(t))
ctx = controller.WithEventRecorder(ctx, record.NewFakeRecorder(1000))
ctx = filteredinformerfactory.WithSelectors(ctx, v1beta1.ManagedByLabelKey)
ctx, is := injection.Fake.SetupInformers(ctx, &rest.Config{})
return ctx, c, is
}
4 changes: 2 additions & 2 deletions test/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ import (
"k8s.io/client-go/tools/record"
fakekubeclient "knative.dev/pkg/client/injection/kube/client/fake"
fakeconfigmapinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/configmap/fake"
fakepodinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/pod/fake"
fakefilteredpodinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/pod/filtered/fake"
fakeserviceaccountinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/serviceaccount/fake"
"knative.dev/pkg/controller"
)
Expand Down Expand Up @@ -174,7 +174,7 @@ func SeedTestData(t *testing.T, ctx context.Context, d Data) (Clients, Informers
ClusterTask: fakeclustertaskinformer.Get(ctx),
PipelineResource: fakeresourceinformer.Get(ctx),
Condition: fakeconditioninformer.Get(ctx),
Pod: fakepodinformer.Get(ctx),
Pod: fakefilteredpodinformer.Get(ctx, v1beta1.ManagedByLabelKey),
ConfigMap: fakeconfigmapinformer.Get(ctx),
ServiceAccount: fakeserviceaccountinformer.Get(ctx),
}
Expand Down
4 changes: 2 additions & 2 deletions test/v1alpha1/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import (
coreinformers "k8s.io/client-go/informers/core/v1"
fakekubeclientset "k8s.io/client-go/kubernetes/fake"
fakekubeclient "knative.dev/pkg/client/injection/kube/client/fake"
fakepodinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/pod/fake"
fakefilteredpodinformer "knative.dev/pkg/client/injection/kube/informers/core/v1/pod/filtered/fake"
"knative.dev/pkg/controller"
)

Expand Down Expand Up @@ -101,7 +101,7 @@ func SeedTestData(t *testing.T, ctx context.Context, d Data) (Clients, Informers
ClusterTask: fakeclustertaskinformer.Get(ctx),
PipelineResource: fakeresourceinformer.Get(ctx),
Condition: fakeconditioninformer.Get(ctx),
Pod: fakepodinformer.Get(ctx),
Pod: fakefilteredpodinformer.Get(ctx, v1alpha1.ManagedByLabelKey),
}

for _, pr := range d.PipelineRuns {
Expand Down
4 changes: 2 additions & 2 deletions test/v1alpha1/wait_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ import (
"time"

"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1alpha1"
ttesting "github.com/tektoncd/pipeline/pkg/reconciler/testing"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/apis"
duckv1beta1 "knative.dev/pkg/apis/duck/v1beta1"
rtesting "knative.dev/pkg/reconciler/testing"
)

var (
Expand Down Expand Up @@ -101,7 +101,7 @@ func TestWaitForPipelineRunStateFailed(t *testing.T) {
}

func fakeClients(t *testing.T, d Data) (*clients, context.Context, func()) {
ctx, _ := rtesting.SetupFakeContext(t)
ctx, _ := ttesting.SetupFakeContext(t)
ctx, cancel := context.WithCancel(ctx)
fakeClients, _ := SeedTestData(t, ctx, d)
return &clients{
Expand Down

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit fbceaae

Please sign in to comment.