Skip to content

Commit

Permalink
feat: Centralize leaked ENI cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
sushrk committed Feb 23, 2024
1 parent a177073 commit 5749115
Show file tree
Hide file tree
Showing 25 changed files with 707 additions and 276 deletions.
1 change: 1 addition & 0 deletions PROJECT
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ resources:
version: v1beta1
- api:
crdVersion: v1
controller: true
domain: k8s.aws
group: vpcresources
kind: CNINode
Expand Down
2 changes: 2 additions & 0 deletions apis/vpcresources/v1alpha1/cninode_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ type Feature struct {
// CNINodeSpec defines the desired state of CNINode
type CNINodeSpec struct {
Features []Feature `json:"features,omitempty"`
// Additional tag key/value added to all network interfaces provisioned by the vpc-resource-controller and VPC-CNI
Tags map[string]string `json:"tags,omitempty"`
}

// CNINodeStatus defines the managed VPC resources.
Expand Down
7 changes: 7 additions & 0 deletions apis/vpcresources/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions config/crd/bases/vpcresources.k8s.aws_cninodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ spec:
type: string
type: object
type: array
tags:
additionalProperties:
type: string
description: Additional tag key/value added to all network interfaces
provisioned by the vpc-resource-controller and VPC-CNI
type: object
type: object
status:
description: CNINodeStatus defines the managed VPC resources.
Expand Down
2 changes: 2 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ rules:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- vpcresources.k8s.aws
Expand Down
128 changes: 128 additions & 0 deletions controllers/vpcresources/cninode_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package vpcresources

import (
"context"
stderrors "errors"
"fmt"

"github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1"
ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
"github.com/go-logr/logr"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/finalizer"
)

// CNINodeReconciler reconciles a CNINode object
type CNINodeReconciler struct {
client.Client
Scheme *runtime.Scheme
Context context.Context
Log logr.Logger
EC2Wrapper ec2API.EC2Wrapper
ClusterName string
finalizers finalizer.Finalizers
}

// nodeTerminationFinalizer will run the finalizer routine to clean up leaked resources on the node if CNINode is being deleted
// CNINode is deleted when the parent node object is delete ie when the EC2 instance is terminated
type nodeTerminationFinalizer struct {
EC2Wrapper ec2API.EC2Wrapper
}

var _ finalizer.Finalizer = (*nodeTerminationFinalizer)(nil)

//+kubebuilder:rbac:groups=vpcresources.k8s.aws,resources=cninodes,verbs=get;list;watch;create;update;patch;

// Reconcile handles CNINode create/update/delete events
// Reconciler will add the finalizer and cluster name tag if it does not exist and finalize on CNINode on deletion to clean up leaked resource on node
func (r *CNINodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
cniNode := &v1alpha1.CNINode{}
if err := r.Client.Get(ctx, req.NamespacedName, cniNode); err != nil {
if errors.IsNotFound(err) {
r.Log.Info("CNINode is deleted, will be re-created if the node exists", "CNINode", req.NamespacedName)
}
// Ignore not found error; CNINode will be re-created on node reconcile if the node is not terminated
return ctrl.Result{}, client.IgnoreNotFound(err)
}

// Add cluster name tag if it does not exist
clusterNameTagKey := fmt.Sprintf(config.ClusterNameTagKeyFormat, r.ClusterName)
val, ok := cniNode.Spec.Tags[clusterNameTagKey]
if !ok || val != config.ClusterNameTagValue {
newCNINode := cniNode.DeepCopy()
if len(newCNINode.Spec.Tags) != 0 {
newCNINode.Spec.Tags[clusterNameTagKey] = config.ClusterNameTagValue
} else {
newCNINode.Spec.Tags = map[string]string{
clusterNameTagKey: config.ClusterNameTagValue,
}
}

if err := r.Client.Patch(ctx, newCNINode, client.MergeFrom(cniNode)); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to patch CNINode %v", err)
}
}

// Finalize will add the finalizer if it does not exist and object is not being delete, and will remove the finalizer if it exists
// and object is being deleted. Also sets result.Updated to true so we need to update the object
finalizationRes, err := r.finalizers.Finalize(ctx, cniNode)
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to finalize CNINode: %v", err)
}
if finalizationRes.Updated {
if err = r.Client.Update(ctx, cniNode); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update CNINode based on finalization result: %w", err)
}
}
return ctrl.Result{}, nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *CNINodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
r.finalizers = finalizer.NewFinalizers()
err := r.finalizers.Register(config.NodeTerminationFinalizer, &nodeTerminationFinalizer{
EC2Wrapper: r.EC2Wrapper,
})
if err != nil {
return fmt.Errorf("failed to register the node termination finalizer on CNINode: %v", err)
}
return ctrl.NewControllerManagedBy(mgr).
For(&v1alpha1.CNINode{}).
WithOptions(controller.Options{MaxConcurrentReconciles: 1}).
Complete(r)
}

// Node termination finalize function will attempt to delete leaked resources (currently only leaked ENIs) at node deletion
func (f *nodeTerminationFinalizer) Finalize(ctx context.Context, obj client.Object) (finalizer.Result, error) {
cniNode, ok := obj.(*v1alpha1.CNINode)
if !ok {
return finalizer.Result{}, stderrors.New("unexpected object type")
}
cleaner := &cleanup.NodeTerminationCleaner{
NodeName: cniNode.Name,
EC2Wrapper: f.EC2Wrapper,
Log: ctrl.Log.WithName("node eni cleaner"),
}

// Returns err if failed to delete leaked ENIs on node so it can be retried
return finalizer.Result{}, cleaner.DeleteLeakedResources()
}
79 changes: 79 additions & 0 deletions controllers/vpcresources/suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

package vpcresources

import (
"path/filepath"
"testing"

"github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/envtest"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
//+kubebuilder:scaffold:imports
)

// These tests use Ginkgo (BDD-style Go testing framework). Refer to
// http://onsi.github.io/ginkgo/ to learn more about Ginkgo.

var cfg *rest.Config
var k8sClient client.Client
var testEnv *envtest.Environment

func TestAPIs(t *testing.T) {
RegisterFailHandler(Fail)

RunSpecs(t, "Controller Suite")
}

var _ = BeforeSuite(func() {
done := make(chan interface{})
go func() {
logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))

By("bootstrapping test environment")
testEnv = &envtest.Environment{
CRDDirectoryPaths: []string{filepath.Join("..", "..", "config", "crd", "bases")},
ErrorIfCRDPathMissing: false,
}

var err error
// cfg is defined in this file globally.
cfg, err = testEnv.Start()
Expect(err).NotTo(HaveOccurred())
Expect(cfg).NotTo(BeNil())

err = v1alpha1.AddToScheme(scheme.Scheme)
Expect(err).NotTo(HaveOccurred())
//+kubebuilder:scaffold:scheme

k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
Expect(err).NotTo(HaveOccurred())
Expect(k8sClient).NotTo(BeNil())
}()
Eventually(done, 60).Should(BeClosed())

})

var _ = AfterSuite(func() {
By("tearing down the test environment")
err := testEnv.Stop()
Expect(err).NotTo(HaveOccurred())
})
23 changes: 19 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ import (
vpcresourcesv1beta1 "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1beta1"
"github.com/aws/amazon-vpc-resource-controller-k8s/controllers/apps"
corecontroller "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/core"
vpcresourcescontrollers "github.com/aws/amazon-vpc-resource-controller-k8s/controllers/vpcresources"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/api"
ec2API "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api"
eniCleaner "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/aws/ec2/api/cleanup/eni"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/condition"
"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
Expand Down Expand Up @@ -311,7 +313,7 @@ func main() {
nodeManagerWorkers := asyncWorkers.NewDefaultWorkerPool("node async workers",
10, 1, ctrl.Log.WithName("node async workers"), ctx)
nodeManager, err := manager.NewNodeManager(ctrl.Log.WithName("node manager"), resourceManager,
apiWrapper, nodeManagerWorkers, controllerConditions, version.GitVersion, healthzHandler)
apiWrapper, nodeManagerWorkers, controllerConditions, clusterName, version.GitVersion, healthzHandler)

if err != nil {
ctrl.Log.Error(err, "failed to init node manager")
Expand All @@ -332,11 +334,13 @@ func main() {
os.Exit(1)
}

if err := (&ec2API.ENICleaner{
cleaner := &eniCleaner.ClusterENICleaner{
EC2Wrapper: ec2Wrapper,
ClusterName: clusterName,
Log: ctrl.Log.WithName("eni cleaner"),
}).SetupWithManager(ctx, mgr, healthzHandler); err != nil {
Log: ctrl.Log.WithName("cleaners").WithName("eni"),
}

if err := cleaner.SetupWithManager(ctx, mgr, healthzHandler); err != nil {
setupLog.Error(err, "unable to start eni cleaner")
os.Exit(1)
}
Expand Down Expand Up @@ -386,6 +390,17 @@ func main() {
os.Exit(1)
}

if err = (&vpcresourcescontrollers.CNINodeReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Context: ctx,
Log: ctrl.Log.WithName("controllers").WithName("CNINode"),
EC2Wrapper: ec2Wrapper,
ClusterName: clusterName,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CNINode")
os.Exit(1)
}
// +kubebuilder:scaffold:builder
setupLog.Info("setting up webhook server")
webhookServer := mgr.GetWebhookServer()
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5749115

Please sign in to comment.