Skip to content

Commit

Permalink
chore: Backport removal registration enforcement 1.2.x (#2036)
Browse files Browse the repository at this point in the history
  • Loading branch information
rschalo authored Feb 27, 2025
1 parent c380935 commit df0b2db
Show file tree
Hide file tree
Showing 14 changed files with 69 additions and 52 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module sigs.k8s.io/karpenter

go 1.23.2
go 1.23.6

require (
github.com/Pallinder/go-randomdata v1.2.0
Expand Down
2 changes: 1 addition & 1 deletion kwok/apis/crds/karpenter.kwok.sh_kwoknodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
controller-gen.kubebuilder.io/version: v0.17.2
name: kwoknodeclasses.karpenter.kwok.sh
spec:
group: karpenter.kwok.sh
Expand Down
2 changes: 1 addition & 1 deletion kwok/charts/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
controller-gen.kubebuilder.io/version: v0.17.2
name: nodeclaims.karpenter.sh
spec:
group: karpenter.sh
Expand Down
2 changes: 1 addition & 1 deletion kwok/charts/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
controller-gen.kubebuilder.io/version: v0.17.2
name: nodepools.karpenter.sh
spec:
group: karpenter.sh
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
controller-gen.kubebuilder.io/version: v0.17.2
name: nodeclaims.karpenter.sh
spec:
group: karpenter.sh
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
controller-gen.kubebuilder.io/version: v0.17.2
name: nodepools.karpenter.sh
spec:
group: karpenter.sh
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/nodeclaim/lifecycle/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ func NewController(clk clock.Clock, kubeClient client.Client, cloudProvider clou
recorder: recorder,

launch: &Launch{kubeClient: kubeClient, cloudProvider: cloudProvider, cache: cache.New(time.Minute, time.Second*10), recorder: recorder},
registration: &Registration{kubeClient: kubeClient},
registration: &Registration{kubeClient: kubeClient, recorder: recorder},
initialization: &Initialization{kubeClient: kubeClient},
liveness: &Liveness{clock: clk, kubeClient: kubeClient},
}
Expand Down
10 changes: 10 additions & 0 deletions pkg/controllers/nodeclaim/lifecycle/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,13 @@ func NodeClassNotReadyEvent(nodeClaim *v1.NodeClaim, err error) events.Event {
DedupeValues: []string{string(nodeClaim.UID)},
}
}

func UnregisteredTaintMissingEvent(nodeClaim *v1.NodeClaim) events.Event {
return events.Event{
InvolvedObject: nodeClaim,
Type: corev1.EventTypeWarning,
Reason: "UnregisteredTaintMissing",
Message: fmt.Sprintf("Missing %s taint which prevents registration related race conditions on Karpenter-managed nodes", v1.UnregisteredTaintKey),
DedupeValues: []string{string(nodeClaim.UID)},
}
}
48 changes: 24 additions & 24 deletions pkg/controllers/nodeclaim/lifecycle/initialization_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,50 +113,50 @@ var _ = Describe("Initialization", func() {
Entry("should ignore NodeClaims which aren't managed by this Karpenter instance", false),
)
It("shouldn't consider the nodeClaim initialized when it has not registered", func() {
nodeClaim := test.NodeClaim(v1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
v1.NodePoolLabelKey: nodePool.Name,
},
},
Spec: v1.NodeClaimSpec{
Resources: v1.ResourceRequirements{
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("2"),
corev1.ResourceMemory: resource.MustParse("50Mi"),
corev1.ResourcePods: resource.MustParse("5"),
},
},
},
})
nodeClaim := test.NodeClaim()
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)

node := test.Node(test.NodeOptions{
node1 := test.Node(test.NodeOptions{
ProviderID: nodeClaim.Status.ProviderID,
})
ExpectApplied(ctx, env.Client, node)
node2 := test.Node(test.NodeOptions{
ProviderID: nodeClaim.Status.ProviderID,
})
ExpectApplied(ctx, env.Client, node1, node2)

_ = ExpectObjectReconcileFailed(ctx, env.Client, nodeClaimController, nodeClaim)
ExpectMakeNodesReady(ctx, env.Client, node) // Remove the not-ready taint
// does not error but will not be registered because this reconcile returned multiple nodes
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)
ExpectMakeNodesReady(ctx, env.Client, node1, node2) // Remove the not-ready taint

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(ExpectStatusConditionExists(nodeClaim, v1.ConditionTypeRegistered).Status).To(Equal(metav1.ConditionFalse))
Expect(ExpectStatusConditionExists(nodeClaim, v1.ConditionTypeInitialized).Status).To(Equal(metav1.ConditionUnknown))

node = ExpectExists(ctx, env.Client, node)
node.Status.Capacity = corev1.ResourceList{
node1 = ExpectExists(ctx, env.Client, node1)
node1.Status.Capacity = corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: resource.MustParse("100Mi"),
corev1.ResourcePods: resource.MustParse("110"),
}
node.Status.Allocatable = corev1.ResourceList{
node1.Status.Allocatable = corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("8"),
corev1.ResourceMemory: resource.MustParse("80Mi"),
corev1.ResourcePods: resource.MustParse("110"),
}
ExpectApplied(ctx, env.Client, node)
node2 = ExpectExists(ctx, env.Client, node2)
node2.Status.Capacity = corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("10"),
corev1.ResourceMemory: resource.MustParse("100Mi"),
corev1.ResourcePods: resource.MustParse("110"),
}
node2.Status.Allocatable = corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("8"),
corev1.ResourceMemory: resource.MustParse("80Mi"),
corev1.ResourcePods: resource.MustParse("110"),
}
ExpectApplied(ctx, env.Client, node1, node2)
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expand Down
10 changes: 6 additions & 4 deletions pkg/controllers/nodeclaim/lifecycle/registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/events"
"sigs.k8s.io/karpenter/pkg/metrics"
"sigs.k8s.io/karpenter/pkg/scheduling"
nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim"
)

type Registration struct {
kubeClient client.Client
recorder events.Recorder
}

func (r *Registration) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (reconcile.Result, error) {
Expand All @@ -62,11 +64,11 @@ func (r *Registration) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (
_, hasStartupTaint := lo.Find(node.Spec.Taints, func(t corev1.Taint) bool {
return t.MatchTaint(&v1.UnregisteredNoExecuteTaint)
})
// check if sync succeeded but setting the registered status condition failed
// if sync succeeded, then the label will be present and the taint will be gone
// if the sync hasn't happened yet and the race protecting startup taint isn't present then log it as missing and proceed
// if the sync has happened then the startup taint has been removed if it was present
if _, ok := node.Labels[v1.NodeRegisteredLabelKey]; !ok && !hasStartupTaint {
nodeClaim.StatusConditions().SetFalse(v1.ConditionTypeRegistered, "UnregisteredTaintNotFound", fmt.Sprintf("Invariant violated, %s taint must be present on Karpenter-managed nodes", v1.UnregisteredTaintKey))
return reconcile.Result{}, fmt.Errorf("missing required startup taint, %s", v1.UnregisteredTaintKey)
log.FromContext(ctx).Error(fmt.Errorf("missing %s taint which prevents registration related race conditions on Karpenter-managed nodes", v1.UnregisteredTaintKey), "node claim registration error")
r.recorder.Publish(UnregisteredTaintMissingEvent(nodeClaim))
}
ctx = log.IntoContext(ctx, log.FromContext(ctx).WithValues("Node", klog.KRef("", node.Name)))
if err = r.syncNode(ctx, nodeClaim, node); err != nil {
Expand Down
17 changes: 14 additions & 3 deletions pkg/controllers/nodeclaim/lifecycle/registration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ var _ = Describe("Registration", func() {
Expect(node.Labels).To(HaveKeyWithValue(v1.NodeRegisteredLabelKey, "true"))
Expect(node.Spec.Taints).To(Not(ContainElement(v1.UnregisteredNoExecuteTaint)))
})
It("should fail registration if the karpenter.sh/unregistered taint is not present on the node and the node isn't labeled as registered", func() {
It("should succeed registration if the karpenter.sh/unregistered taint is not present and emit an event", func() {
nodeClaim := test.NodeClaim(v1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
Expand All @@ -124,12 +124,23 @@ var _ = Describe("Registration", func() {
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)

// Create a node without the unregistered taint
node := test.Node(test.NodeOptions{ProviderID: nodeClaim.Status.ProviderID})
ExpectApplied(ctx, env.Client, node)
_ = ExpectObjectReconcileFailed(ctx, env.Client, nodeClaimController, nodeClaim)
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)

// Verify the NodeClaim is registered
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(ExpectStatusConditionExists(nodeClaim, v1.ConditionTypeRegistered).Status).To(Equal(metav1.ConditionFalse))
Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeRegistered).IsTrue()).To(BeTrue())
Expect(nodeClaim.Status.NodeName).To(Equal(node.Name))

// Verify the node is registered
node = ExpectExists(ctx, env.Client, node)
Expect(node.Labels).To(HaveKeyWithValue(v1.NodeRegisteredLabelKey, "true"))

Expect(recorder.Calls("UnregisteredTaintMissing")).To(Equal(1))
})

It("should sync the labels to the Node when the Node comes online", func() {
nodeClaim := test.NodeClaim(v1.NodeClaim{
ObjectMeta: metav1.ObjectMeta{
Expand Down
7 changes: 4 additions & 3 deletions pkg/controllers/nodeclaim/lifecycle/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,12 @@ import (
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/record"
clock "k8s.io/utils/clock/testing"

"sigs.k8s.io/karpenter/pkg/apis"
v1 "sigs.k8s.io/karpenter/pkg/apis/v1"
"sigs.k8s.io/karpenter/pkg/cloudprovider/fake"
nodeclaimlifecycle "sigs.k8s.io/karpenter/pkg/controllers/nodeclaim/lifecycle"
"sigs.k8s.io/karpenter/pkg/events"
"sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/test"
. "sigs.k8s.io/karpenter/pkg/test/expectations"
Expand All @@ -49,6 +47,7 @@ var nodeClaimController *nodeclaimlifecycle.Controller
var env *test.Environment
var fakeClock *clock.FakeClock
var cloudProvider *fake.CloudProvider
var recorder *test.EventRecorder

func TestAPIs(t *testing.T) {
ctx = TestContextWithLogger(t)
Expand All @@ -70,11 +69,12 @@ func removeNodeClaimImmutabilityValidation(crds ...*apiextensionsv1.CustomResour

var _ = BeforeSuite(func() {
fakeClock = clock.NewFakeClock(time.Now())
recorder = test.NewEventRecorder()
env = test.NewEnvironment(test.WithCRDs(removeNodeClaimImmutabilityValidation(apis.CRDs...)...), test.WithCRDs(v1alpha1.CRDs...), test.WithFieldIndexers(test.NodeProviderIDFieldIndexer(ctx)))
ctx = options.ToContext(ctx, test.Options())

cloudProvider = fake.NewCloudProvider()
nodeClaimController = nodeclaimlifecycle.NewController(fakeClock, env.Client, cloudProvider, events.NewRecorder(&record.FakeRecorder{}))
nodeClaimController = nodeclaimlifecycle.NewController(fakeClock, env.Client, cloudProvider, recorder)
})

var _ = AfterSuite(func() {
Expand All @@ -91,6 +91,7 @@ var _ = Describe("Finalizer", func() {
var nodePool *v1.NodePool

BeforeEach(func() {
recorder.Reset() // Reset the events that we captured during the run
nodePool = test.NodePool()
})
Context("TerminationFinalizer", func() {
Expand Down
13 changes: 3 additions & 10 deletions pkg/controllers/nodeclaim/lifecycle/termination_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,20 +353,13 @@ var _ = Describe("Termination", func() {
}))
})
It("should not delete Nodes if the NodeClaim is not registered", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClaim)
node := test.NodeClaimLinkedNode(nodeClaim)
ExpectApplied(ctx, env.Client, nodePool, nodeClaim, node)
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)

nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
_, err := cloudProvider.Get(ctx, nodeClaim.Status.ProviderID)
Expect(err).ToNot(HaveOccurred())

node := test.NodeClaimLinkedNode(nodeClaim)
// Remove the unregistered taint to ensure the NodeClaim can't be marked as registered
node.Spec.Taints = nil
ExpectApplied(ctx, env.Client, node)
_ = ExpectObjectReconcileFailed(ctx, env.Client, nodeClaimController, nodeClaim)
nodeClaim = ExpectExists(ctx, env.Client, nodeClaim)
Expect(nodeClaim.StatusConditions().Get(v1.ConditionTypeRegistered).IsFalse()).To(BeTrue())
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)

Expect(env.Client.Delete(ctx, nodeClaim)).To(Succeed())
ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
controller-gen.kubebuilder.io/version: v0.17.2
name: testnodeclasses.karpenter.test.sh
spec:
group: karpenter.test.sh
Expand Down

0 comments on commit df0b2db

Please sign in to comment.