From e0b3e417154d6848f6688cd5bd5c4e6de38c50d4 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Sat, 30 Sep 2023 02:10:40 +0000 Subject: [PATCH] Convert replicated, system, not-safe-to-evict, and local storage pods to drainability rules --- cluster-autoscaler/simulator/cluster_test.go | 9 +- cluster-autoscaler/simulator/drain.go | 10 +- cluster-autoscaler/simulator/drain_test.go | 5 +- .../simulator/drainability/context.go | 5 + .../drainability/rules/localstorage/rule.go | 47 ++ .../rules/localstorage/rule_test.go | 458 +++++++++++++++++ .../drainability/rules/notsafetoevict/rule.go | 47 ++ .../rules/notsafetoevict/rule_test.go | 279 ++++++++++ .../drainability/rules/replicated/rule.go | 137 +++++ .../rules/replicated/rule_test.go | 421 +++++++++++++++ .../simulator/drainability/rules/rules.go | 8 + .../drainability/rules/system/rule.go | 47 ++ .../drainability/rules/system/rule_test.go | 308 +++++++++++ .../simulator/options/nodedelete.go | 2 +- cluster-autoscaler/utils/drain/drain.go | 197 +------ cluster-autoscaler/utils/drain/drain_test.go | 481 +----------------- cluster-autoscaler/utils/pod/pod.go | 6 +- 17 files changed, 1803 insertions(+), 664 deletions(-) create mode 100644 cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/replicated/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/system/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/system/rule_test.go diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index 136e0d47d2a9..c53aec5cbc01 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -152,7 +152,6 @@ func TestFindNodesToRemove(t *testing.T) { tracker := NewUsageTracker() tests := []findNodesToRemoveTestConfig{ - // just an empty node, should be removed { name: "just an empty node, should be removed", pods: []*apiv1.Pod{}, @@ -161,7 +160,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{emptyNodeToRemove}, unremovable: []*UnremovableNode{}, }, - // just a drainable node, but nowhere for pods to go to { name: "just a drainable node, but nowhere for pods to go to", pods: []*apiv1.Pod{pod1, pod2}, @@ -170,7 +168,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, - // drainable node, and a mostly empty node that can take its pods { name: "drainable node, and a mostly empty node that can take its pods", pods: []*apiv1.Pod{pod1, pod2, pod3}, @@ -179,7 +176,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{drainableNodeToRemove}, unremovable: []*UnremovableNode{{Node: nonDrainableNode, Reason: BlockedByPod, BlockingPod: &drain.BlockingPod{Pod: pod3, Reason: drain.NotReplicated}}}, }, - // drainable node, and a full node that cannot fit anymore pods { name: "drainable node, and a full node that cannot fit anymore pods", pods: []*apiv1.Pod{pod1, pod2, pod4}, @@ -188,7 +184,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, - // 4 nodes, 1 empty, 1 drainable { name: "4 nodes, 1 empty, 1 drainable", pods: []*apiv1.Pod{pod1, pod2, pod3, pod4}, @@ -209,8 +204,8 @@ func TestFindNodesToRemove(t *testing.T) { r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, tracker, testDeleteOptions(), nil, false) toRemove, unremovable := r.FindNodesToRemove(test.candidates, destinations, time.Now(), nil) fmt.Printf("Test scenario: %s, found len(toRemove)=%v, expected len(test.toRemove)=%v\n", test.name, len(toRemove), len(test.toRemove)) - assert.Equal(t, toRemove, test.toRemove) - assert.Equal(t, unremovable, test.unremovable) + assert.Equal(t, test.toRemove, toRemove) + assert.Equal(t, test.unremovable, unremovable) }) } } diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index 7d8e5c449655..101844ad65d3 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -50,6 +50,8 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. drainCtx := &drainability.DrainContext{ RemainingPdbTracker: remainingPdbTracker, DeleteOptions: deleteOptions, + Listers: listers, + Timestamp: timestamp, } for _, podInfo := range nodeInfo.Pods { pod := podInfo.Pod @@ -73,20 +75,16 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. } } - pods, daemonSetPods, blockingPod, err = drain.GetPodsForDeletionOnNodeDrain( + pods, daemonSetPods = drain.GetPodsForDeletionOnNodeDrain( pods, remainingPdbTracker.GetPdbs(), deleteOptions.SkipNodesWithSystemPods, deleteOptions.SkipNodesWithLocalStorage, deleteOptions.SkipNodesWithCustomControllerPods, - listers, - int32(deleteOptions.MinReplicaCount), timestamp) pods = append(pods, drainPods...) daemonSetPods = append(daemonSetPods, drainDs...) - if err != nil { - return pods, daemonSetPods, blockingPod, err - } + if canRemove, _, blockingPodInfo := remainingPdbTracker.CanRemovePods(pods); !canRemove { pod := blockingPodInfo.Pod return []*apiv1.Pod{}, []*apiv1.Pod{}, blockingPodInfo, fmt.Errorf("not enough pod disruption budget to move %s/%s", pod.Namespace, pod.Name) diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index 5e4611a9e104..1671fdfb9510 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -182,7 +182,7 @@ func TestGetPodsToMove(t *testing.T) { desc string pods []*apiv1.Pod pdbs []*policyv1.PodDisruptionBudget - rules []rules.Rule + rules rules.Rules wantPods []*apiv1.Pod wantDs []*apiv1.Pod wantBlocking *drain.BlockingPod @@ -312,9 +312,10 @@ func TestGetPodsToMove(t *testing.T) { SkipNodesWithLocalStorage: true, SkipNodesWithCustomControllerPods: true, } + rules := append(tc.rules, rules.Default()...) tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(tc.pdbs) - p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, tc.rules, nil, tracker, testTime) + p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, nil, tracker, testTime) if tc.wantErr { assert.Error(t, err) } else { diff --git a/cluster-autoscaler/simulator/drainability/context.go b/cluster-autoscaler/simulator/drainability/context.go index 84a4ec4c454f..d6fd7116ca02 100644 --- a/cluster-autoscaler/simulator/drainability/context.go +++ b/cluster-autoscaler/simulator/drainability/context.go @@ -17,12 +17,17 @@ limitations under the License. package drainability import ( + "time" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" ) // DrainContext contains parameters for drainability rules. type DrainContext struct { RemainingPdbTracker pdb.RemainingPdbTracker DeleteOptions options.NodeDeleteOptions + Listers kube_util.ListerRegistry + Timestamp time.Time } diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go new file mode 100644 index 000000000000..6a58da1dd32d --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go @@ -0,0 +1,47 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package localstorage + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle local storage pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with local storage pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus() + } + + if drainCtx.DeleteOptions.SkipNodesWithLocalStorage && drain.HasBlockingLocalStorage(pod) { + return drainability.NewBlockedStatus(drain.LocalStorageRequested, fmt.Errorf("pod with local storage present: %s", pod.Name)) + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go new file mode 100644 index 000000000000..222eb8fd7b6e --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go @@ -0,0 +1,458 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package localstorage + +import ( + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + emptydirPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictVolumeSingleVal = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeSingleValEmpty = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeSingleValNonMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValAllMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValNonMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValEmpty = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: ",", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + emptyDirTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + emptyDirEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + emptyDirSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + zeroGracePeriod = int64(0) + emptyDirLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + emptyDirLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + ) + + for _, test := range []struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + + wantReason drain.BlockingPodReason + wantError bool + }{ + { + desc: "pod with EmptyDir", + pod: emptydirPod, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation", + pod: emptyDirSafeToEvictVolumeSingleVal, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation", + pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation", + pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", + pod: emptyDirSafeToEvictLocalVolumeMultiValAllMatching, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values", + pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values", + pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values", + pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + + { + desc: "EmptyDir failed pod", + pod: emptyDirFailedPod, + }, + { + desc: "EmptyDir terminal pod", + pod: emptyDirTerminalPod, + }, + { + desc: "EmptyDir evicted pod", + pod: emptyDirEvictedPod, + }, + { + desc: "EmptyDir pod with PodSafeToEvict annotation", + pod: emptyDirSafePod, + }, + { + desc: "EmptyDir long terminating pod with 0 grace period", + pod: emptyDirLongTerminatingPod, + }, + { + desc: "EmptyDir long terminating pod with extended grace period", + pod: emptyDirLongTerminatingPodWithExtendedGracePeriod, + }, + } { + t.Run(test.desc, func(t *testing.T) { + drainCtx := &drainability.DrainContext{ + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithLocalStorage: true, + }, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go new file mode 100644 index 000000000000..224a1fbd6a3f --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go @@ -0,0 +1,47 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package notsafetoevict + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle not safe to evict pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with not safe to evict pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus() + } + + if drain.HasNotSafeToEvictAnnotation(pod) { + return drainability.NewBlockedStatus(drain.NotSafeToEvictAnnotation, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)) + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go new file mode 100644 index 000000000000..42b3df58c9a4 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go @@ -0,0 +1,279 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package notsafetoevict + +import ( + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + + jobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + } + + safePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + unsafeSystemFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + unsafeSystemTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + unsafeSystemEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + zeroGracePeriod = int64(0) + unsafeLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + unsafeLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + unsafeRcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + unsafeJobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + } + ) + + for _, test := range []struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + + wantReason drain.BlockingPodReason + wantError bool + }{ + { + desc: "pod with PodSafeToEvict annotation", + pod: safePod, + }, + { + desc: "RC-managed pod with no annotation", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "RC-managed pod with PodSafeToEvict=false annotation", + pod: unsafeRcPod, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.NotSafeToEvictAnnotation, + wantError: true, + }, + { + desc: "Job-managed pod with no annotation", + pod: jobPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "Job-managed pod with PodSafeToEvict=false annotation", + pod: unsafeJobPod, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.NotSafeToEvictAnnotation, + wantError: true, + }, + + { + desc: "unsafe failed pod", + pod: unsafeSystemFailedPod, + }, + { + desc: "unsafe terminal pod", + pod: unsafeSystemTerminalPod, + }, + { + desc: "unsafe evicted pod", + pod: unsafeSystemEvictedPod, + }, + { + desc: "unsafe long terminating pod with 0 grace period", + pod: unsafeLongTerminatingPod, + }, + { + desc: "unsafe long terminating pod with extended grace period", + pod: unsafeLongTerminatingPodWithExtendedGracePeriod, + }, + } { + t.Run(test.desc, func(t *testing.T) { + drainCtx := &drainability.DrainContext{ + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithSystemPods: true, + }, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go new file mode 100644 index 000000000000..ef98a5da8803 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -0,0 +1,137 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package replicated + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle replicated pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with replicated pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { + return drainability.NewUndefinedStatus() + } + + controllerRef := drain.ControllerRef(pod) + replicated := controllerRef != nil + + if drainCtx.DeleteOptions.SkipNodesWithCustomControllerPods { + // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods + if status := legacyCheck(drainCtx, pod); status.Outcome != drainability.UndefinedOutcome { + return status + } + replicated = replicated && replicatedKind[controllerRef.Kind] + } + + if pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) || replicated { + return drainability.NewUndefinedStatus() + } + + return drainability.NewBlockedStatus(drain.NotReplicated, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)) +} + +// replicatedKind returns true if this kind has replicates pods. +var replicatedKind = map[string]bool{ + "ReplicationController": true, + "Job": true, + "ReplicaSet": true, + "StatefulSet": true, +} + +func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drainCtx.Listers == nil { + return drainability.NewUndefinedStatus() + } + + // For now, owner controller must be in the same namespace as the pod + // so OwnerReference doesn't have its own Namespace field. + controllerNamespace := pod.Namespace + + controllerRef := drain.ControllerRef(pod) + if controllerRef == nil { + return drainability.NewUndefinedStatus() + } + refKind := controllerRef.Kind + + if refKind == "ReplicationController" { + rc, err := drainCtx.Listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) + // Assume RC is either gone/missing or has too few replicas configured. + if err != nil || rc == nil { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + + // TODO: Replace the minReplica check with PDB. + if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < drainCtx.DeleteOptions.MinReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, drainCtx.DeleteOptions.MinReplicaCount)) + } + } else if pod_util.IsDaemonSetPod(pod) { + if refKind == "DaemonSet" { + // We don't have a listener for ther other DaemonSet kind. + // TODO: Use a generic client for checking the reference. + return drainability.NewUndefinedStatus() + } + + _, err := drainCtx.Listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) + if err != nil { + if apierrors.IsNotFound(err) { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)) + } + return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "Job" { + job, err := drainCtx.Listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) + + if err != nil || job == nil { + // Assume the only reason for an error is because the Job is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "ReplicaSet" { + rs, err := drainCtx.Listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) + + if err == nil && rs != nil { + // Assume the only reason for an error is because the RS is gone/missing. + if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < drainCtx.DeleteOptions.MinReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, drainCtx.DeleteOptions.MinReplicaCount)) + } + } else { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "StatefulSet" { + ss, err := drainCtx.Listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) + + if err != nil && ss == nil { + // Assume the only reason for an error is because the SS is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go new file mode 100644 index 000000000000..905c63780b59 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go @@ -0,0 +1,421 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package replicated + +import ( + "fmt" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + v1appslister "k8s.io/client-go/listers/apps/v1" + v1lister "k8s.io/client-go/listers/core/v1" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + ds = appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", + }, + } + + dsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + cdsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + + jobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + } + + statefulset = appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ss", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", + }, + } + + ssPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, + } + + rs = appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rs", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicasets/rs", + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: &replicas, + }, + } + + rsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + rsPodDeleted = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + customControllerPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + // Using names like FooController is discouraged + // https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#naming-conventions + // vadasambar: I am using it here just because `FooController`` + // is easier to understand than say `FooSet` + OwnerReferences: GenerateOwnerReferences("Foo", "FooController", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + nakedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + nakedFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + nakedTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + nakedEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + nakedSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + zeroGracePeriod = int64(0) + nakedLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + nakedLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + ) + + type testCase struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + + // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods + skipNodesWithCustomControllerPods bool + + wantReason drain.BlockingPodReason + wantError bool + } + + sharedTests := []testCase{ + { + desc: "RC-managed pod", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "DS-managed pod", + pod: dsPod, + }, + { + desc: "DS-managed pod by a custom Daemonset", + pod: cdsPod, + }, + { + desc: "Job-managed pod", + pod: jobPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "SS-managed pod", + pod: ssPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "RS-managed pod", + pod: rsPod, + rss: []*appsv1.ReplicaSet{&rs}, + }, + { + desc: "RS-managed pod that is being deleted", + pod: rsPodDeleted, + rss: []*appsv1.ReplicaSet{&rs}, + }, + { + desc: "naked pod", + pod: nakedPod, + wantReason: drain.NotReplicated, + wantError: true, + }, + { + desc: "naked failed pod", + pod: nakedFailedPod, + }, + { + desc: "naked terminal pod", + pod: nakedTerminalPod, + }, + { + desc: "naked evicted pod", + pod: nakedEvictedPod, + }, + { + desc: "naked pod with PodSafeToEvict annotation", + pod: nakedSafePod, + }, + { + desc: "naked long terminating pod with 0 grace period", + pod: nakedLongTerminatingPod, + }, + { + desc: "naked long terminating pod with extended grace period", + pod: nakedLongTerminatingPodWithExtendedGracePeriod, + }, + } + + var tests []testCase + + // Note: do not modify the underlying reference values for sharedTests. + for _, test := range sharedTests { + for _, skipNodesWithCustomControllerPods := range []bool{true, false} { + // Copy test to prevent side effects. + test := test + test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods + test.desc = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.desc, skipNodesWithCustomControllerPods) + tests = append(tests, test) + } + } + + customControllerTests := []testCase{ + { + desc: "Custom-controller-managed blocking pod", + pod: customControllerPod, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, + }, + { + desc: "Custom-controller-managed non-blocking pod", + pod: customControllerPod, + }, + } + tests = append(tests, customControllerTests...) + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + var err error + var rcLister v1lister.ReplicationControllerLister + if len(test.rcs) > 0 { + rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) + assert.NoError(t, err) + } + var rsLister v1appslister.ReplicaSetLister + if len(test.rss) > 0 { + rsLister, err = kube_util.NewTestReplicaSetLister(test.rss) + assert.NoError(t, err) + } + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) + assert.NoError(t, err) + jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + assert.NoError(t, err) + ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + assert.NoError(t, err) + + registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) + + drainCtx := &drainability.DrainContext{ + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithCustomControllerPods: test.skipNodesWithCustomControllerPods, + }, + Listers: registry, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 1733a2b8dac5..1dd1d5e4e24c 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -20,7 +20,11 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/localstorage" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/mirror" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/notsafetoevict" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" ) // Rule determines whether a given pod can be drained or not. @@ -36,6 +40,10 @@ type Rule interface { func Default() Rules { return []Rule{ mirror.New(), + replicated.New(), + system.New(), + notsafetoevict.New(), + localstorage.New(), } } diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule.go b/cluster-autoscaler/simulator/drainability/rules/system/rule.go new file mode 100644 index 000000000000..14eef8585510 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule.go @@ -0,0 +1,47 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle system pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with system pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus() + } + + if drainCtx.DeleteOptions.SkipNodesWithSystemPods && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { + return drainability.NewBlockedStatus(drain.UnmovableKubeSystemPod, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)) + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go new file mode 100644 index 000000000000..5be7f835b301 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go @@ -0,0 +1,308 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + kubeSystemRc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "kube-system", + SelfLink: "api/v1/namespaces/kube-system/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + kubeSystemRcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), + Labels: map[string]string{ + "k8s-app": "bar", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + emptyPDB = &policyv1.PodDisruptionBudget{} + + kubeSystemPDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "bar", + }, + }, + }, + } + + kubeSystemFakePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "foo", + }, + }, + }, + } + + defaultNamespacePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "PDB-managed pod", + }, + }, + }, + } + + kubeSystemFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + kubeSystemTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + kubeSystemEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + kubeSystemSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + zeroGracePeriod = int64(0) + kubeSystemLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + kubeSystemLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + ) + + for _, test := range []struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + pdbs []*policyv1.PodDisruptionBudget + + wantReason drain.BlockingPodReason + wantError bool + }{ + { + desc: "kube-system pod with PodSafeToEvict annotation", + pod: kubeSystemSafePod, + }, + { + desc: "empty PDB with RC-managed pod", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, + }, + { + desc: "kube-system PDB with matching kube-system pod", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + }, + { + desc: "kube-system PDB with non-matching kube-system pod", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, + wantReason: drain.UnmovableKubeSystemPod, + wantError: true, + }, + { + desc: "kube-system PDB with default namespace pod", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + }, + { + desc: "default namespace PDB with matching labels kube-system pod", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, + wantReason: drain.UnmovableKubeSystemPod, + wantError: true, + }, + { + desc: "kube-system failed pod", + pod: kubeSystemFailedPod, + }, + { + desc: "kube-system terminal pod", + pod: kubeSystemTerminalPod, + }, + { + desc: "kube-system evicted pod", + pod: kubeSystemEvictedPod, + }, + { + desc: "kube-system pod with PodSafeToEvict annotation", + pod: kubeSystemSafePod, + }, + { + desc: "kube-system long terminating pod with 0 grace period", + pod: kubeSystemLongTerminatingPod, + }, + { + desc: "kube-system long terminating pod with extended grace period", + pod: kubeSystemLongTerminatingPodWithExtendedGracePeriod, + }, + } { + t.Run(test.desc, func(t *testing.T) { + tracker := pdb.NewBasicRemainingPdbTracker() + tracker.SetPdbs(test.pdbs) + + drainCtx := &drainability.DrainContext{ + RemainingPdbTracker: tracker, + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithSystemPods: true, + }, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/options/nodedelete.go b/cluster-autoscaler/simulator/options/nodedelete.go index 947095d6eb78..6b6e17a1b7e9 100644 --- a/cluster-autoscaler/simulator/options/nodedelete.go +++ b/cluster-autoscaler/simulator/options/nodedelete.go @@ -42,7 +42,7 @@ func NewNodeDeleteOptions(opts config.AutoscalingOptions) NodeDeleteOptions { return NodeDeleteOptions{ SkipNodesWithSystemPods: opts.SkipNodesWithSystemPods, SkipNodesWithLocalStorage: opts.SkipNodesWithLocalStorage, - MinReplicaCount: opts.MinReplicaCount, SkipNodesWithCustomControllerPods: opts.SkipNodesWithCustomControllerPods, + MinReplicaCount: opts.MinReplicaCount, } } diff --git a/cluster-autoscaler/utils/drain/drain.go b/cluster-autoscaler/utils/drain/drain.go index 46b9537af06b..45a28a384185 100644 --- a/cluster-autoscaler/utils/drain/drain.go +++ b/cluster-autoscaler/utils/drain/drain.go @@ -17,16 +17,12 @@ limitations under the License. package drain import ( - "fmt" "strings" "time" apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) @@ -74,179 +70,35 @@ const ( UnexpectedError ) -// GetPodsForDeletionOnNodeDrain returns pods that should be deleted on node drain as well as some extra information -// about possibly problematic pods (unreplicated and DaemonSets). +// GetPodsForDeletionOnNodeDrain returns pods that should be deleted on node +// drain as well as some extra information about possibly problematic pods +// (unreplicated and DaemonSets). +// +// This function assumes that default drainability rules have already been run +// to verify pod drainability. func GetPodsForDeletionOnNodeDrain( podList []*apiv1.Pod, pdbs []*policyv1.PodDisruptionBudget, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool, skipNodesWithCustomControllerPods bool, - listers kube_util.ListerRegistry, - minReplica int32, - currentTime time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, blockingPod *BlockingPod, err error) { + currentTime time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod) { pods = []*apiv1.Pod{} daemonSetPods = []*apiv1.Pod{} - // filter kube-system PDBs to avoid doing it for every kube-system pod - kubeSystemPDBs := make([]*policyv1.PodDisruptionBudget, 0) - for _, pdb := range pdbs { - if pdb.Namespace == "kube-system" { - kubeSystemPDBs = append(kubeSystemPDBs, pdb) - } - } for _, pod := range podList { - // Possibly skip a pod under deletion but only if it was being deleted for long enough - // to avoid a situation when we delete the empty node immediately after the pod was marked for - // deletion without respecting any graceful termination. if IsPodLongTerminating(pod, currentTime) { - // pod is being deleted for long enough - no need to care about it. continue } - isDaemonSetPod := false - replicated := false - safeToEvict := hasSafeToEvictAnnotation(pod) - terminal := isPodTerminal(pod) - - if skipNodesWithCustomControllerPods { - // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods - replicated, isDaemonSetPod, blockingPod, err = legacyCheckForReplicatedPods(listers, pod, minReplica) - if err != nil { - return []*apiv1.Pod{}, []*apiv1.Pod{}, blockingPod, err - } - } else { - replicated = ControllerRef(pod) != nil - isDaemonSetPod = pod_util.IsDaemonSetPod(pod) - } - - if isDaemonSetPod { + if pod_util.IsDaemonSetPod(pod) { daemonSetPods = append(daemonSetPods, pod) - continue - } - - if !safeToEvict && !terminal { - if hasNotSafeToEvictAnnotation(pod) { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotSafeToEvictAnnotation}, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name) - } - if !replicated { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotReplicated}, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name) - } - if pod.Namespace == "kube-system" && skipNodesWithSystemPods { - hasPDB, err := checkKubeSystemPDBs(pod, kubeSystemPDBs) - if err != nil { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error matching pods to pdbs: %v", err) - } - if !hasPDB { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name) - } - } - if HasBlockingLocalStorage(pod) && skipNodesWithLocalStorage { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name) - } - } - pods = append(pods, pod) - } - return pods, daemonSetPods, nil, nil -} - -func legacyCheckForReplicatedPods(listers kube_util.ListerRegistry, pod *apiv1.Pod, minReplica int32) (replicated bool, isDaemonSetPod bool, blockingPod *BlockingPod, err error) { - replicated = false - refKind := "" - checkReferences := listers != nil - isDaemonSetPod = false - - controllerRef := ControllerRef(pod) - if controllerRef != nil { - refKind = controllerRef.Kind - } - - // For now, owner controller must be in the same namespace as the pod - // so OwnerReference doesn't have its own Namespace field - controllerNamespace := pod.Namespace - if refKind == "ReplicationController" { - if checkReferences { - rc, err := listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) - // Assume a reason for an error is because the RC is either - // gone/missing or that the rc has too few replicas configured. - // TODO: replace the minReplica check with pod disruption budget. - if err == nil && rc != nil { - if rc.Spec.Replicas != nil && *rc.Spec.Replicas < minReplica { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", - pod.Namespace, pod.Name, rc.Spec.Replicas, minReplica) - } - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err) - } } else { - replicated = true - } - } else if pod_util.IsDaemonSetPod(pod) { - isDaemonSetPod = true - // don't have listener for other DaemonSet kind - // TODO: we should use a generic client for checking the reference. - if checkReferences && refKind == "DaemonSet" { - _, err := listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) - if apierrors.IsNotFound(err) { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err) - } else if err != nil { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err) - } - } - } else if refKind == "Job" { - if checkReferences { - job, err := listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) - - // Assume the only reason for an error is because the Job is - // gone/missing, not for any other cause. TODO(mml): something more - // sophisticated than this - if err == nil && job != nil { - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err) - } - } else { - replicated = true - } - } else if refKind == "ReplicaSet" { - if checkReferences { - rs, err := listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) - - // Assume the only reason for an error is because the RS is - // gone/missing, not for any other cause. TODO(mml): something more - // sophisticated than this - if err == nil && rs != nil { - if rs.Spec.Replicas != nil && *rs.Spec.Replicas < minReplica { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", - pod.Namespace, pod.Name, rs.Spec.Replicas, minReplica) - } - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err) - } - } else { - replicated = true - } - } else if refKind == "StatefulSet" { - if checkReferences { - ss, err := listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) - - // Assume the only reason for an error is because the StatefulSet is - // gone/missing, not for any other cause. TODO(mml): something more - // sophisticated than this - if err == nil && ss != nil { - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err) - } - } else { - replicated = true + pods = append(pods, pod) } } - - return replicated, isDaemonSetPod, &BlockingPod{}, nil + return pods, daemonSetPods } // ControllerRef returns the OwnerReference to pod's controller. @@ -254,8 +106,8 @@ func ControllerRef(pod *apiv1.Pod) *metav1.OwnerReference { return metav1.GetControllerOf(pod) } -// isPodTerminal checks whether the pod is in a terminal state. -func isPodTerminal(pod *apiv1.Pod) bool { +// IsPodTerminal checks whether the pod is in a terminal state. +func IsPodTerminal(pod *apiv1.Pod) bool { // pod will never be restarted if pod.Spec.RestartPolicy == apiv1.RestartPolicyNever && (pod.Status.Phase == apiv1.PodSucceeded || pod.Status.Phase == apiv1.PodFailed) { return true @@ -296,29 +148,14 @@ func isLocalVolume(volume *apiv1.Volume) bool { return volume.HostPath != nil || (volume.EmptyDir != nil && volume.EmptyDir.Medium != apiv1.StorageMediumMemory) } -// This only checks if a matching PDB exist and therefore if it makes sense to attempt drain simulation, -// as we check for allowed-disruptions later anyway (for all pods with PDB, not just in kube-system) -func checkKubeSystemPDBs(pod *apiv1.Pod, pdbs []*policyv1.PodDisruptionBudget) (bool, error) { - for _, pdb := range pdbs { - selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector) - if err != nil { - return false, err - } - if selector.Matches(labels.Set(pod.Labels)) { - return true, nil - } - } - - return false, nil -} - -// This checks if pod has PodSafeToEvictKey annotation -func hasSafeToEvictAnnotation(pod *apiv1.Pod) bool { +// HasSafeToEvictAnnotation checks if pod has PodSafeToEvictKey annotation. +func HasSafeToEvictAnnotation(pod *apiv1.Pod) bool { return pod.GetAnnotations()[PodSafeToEvictKey] == "true" } -// This checks if pod has PodSafeToEvictKey annotation set to false -func hasNotSafeToEvictAnnotation(pod *apiv1.Pod) bool { +// HasNotSafeToEvictAnnotation checks if pod has PodSafeToEvictKey annotation +// set to false. +func HasNotSafeToEvictAnnotation(pod *apiv1.Pod) bool { return pod.GetAnnotations()[PodSafeToEvictKey] == "false" } diff --git a/cluster-autoscaler/utils/drain/drain_test.go b/cluster-autoscaler/utils/drain/drain_test.go index 52ce4949a54e..0a20b781a9ac 100644 --- a/cluster-autoscaler/utils/drain/drain_test.go +++ b/cluster-autoscaler/utils/drain/drain_test.go @@ -26,10 +26,7 @@ import ( apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - v1appslister "k8s.io/client-go/listers/apps/v1" - v1lister "k8s.io/client-go/listers/core/v1" "github.com/stretchr/testify/assert" ) @@ -41,10 +38,8 @@ type testOpts struct { pdbs []*policyv1.PodDisruptionBudget rcs []*apiv1.ReplicationController replicaSets []*appsv1.ReplicaSet - expectFatal bool expectPods []*apiv1.Pod expectDaemonSetPods []*apiv1.Pod - expectBlockingPod *BlockingPod // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods skipNodesWithCustomControllerPods bool } @@ -214,93 +209,6 @@ func TestDrain(t *testing.T) { }, } - nakedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - emptydirPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictVolumeSingleVal := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeSingleValEmpty := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeSingleValNonMatching := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-2", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - emptyDirSafeToEvictLocalVolumeMultiValAllMatching := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -329,90 +237,6 @@ func TestDrain(t *testing.T) { }, } - emptyDirSafeToEvictLocalVolumeMultiValNonMatching := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValEmpty := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: ",", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - terminalPod := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -503,44 +327,6 @@ func TestDrain(t *testing.T) { }, } - unsafeRcPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - PodSafeToEvictKey: "false", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - unsafeJobPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - Annotations: map[string]string{ - PodSafeToEvictKey: "false", - }, - }, - } - - unsafeNakedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - PodSafeToEvictKey: "false", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - kubeSystemSafePod := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -588,39 +374,12 @@ func TestDrain(t *testing.T) { }, } - kubeSystemFakePDB := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "kube-system", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "foo", - }, - }, - }, - } - - defaultNamespacePDB := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "default", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "PDB-managed pod", - }, - }, - }, - } - sharedTests := []testOpts{ { description: "RC-managed pod", pods: []*apiv1.Pod{rcPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{rcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -628,7 +387,6 @@ func TestDrain(t *testing.T) { description: "DS-managed pod", pods: []*apiv1.Pod{dsPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{}, expectDaemonSetPods: []*apiv1.Pod{dsPod}, }, @@ -636,7 +394,6 @@ func TestDrain(t *testing.T) { description: "DS-managed pod by a custom Daemonset", pods: []*apiv1.Pod{cdsPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{}, expectDaemonSetPods: []*apiv1.Pod{cdsPod}, }, @@ -645,7 +402,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{jobPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{jobPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -654,7 +410,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{ssPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{ssPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -663,7 +418,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{rsPod}, pdbs: []*policyv1.PodDisruptionBudget{}, replicaSets: []*appsv1.ReplicaSet{&rs}, - expectFatal: false, expectPods: []*apiv1.Pod{rsPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -672,57 +426,7 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{rsPodDeleted}, pdbs: []*policyv1.PodDisruptionBudget{}, replicaSets: []*appsv1.ReplicaSet{&rs}, - expectFatal: false, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "naked pod", - pods: []*apiv1.Pod{nakedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: nakedPod, Reason: NotReplicated}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir", - pods: []*apiv1.Pod{emptydirPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptydirPod, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation", - pods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, - expectPods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal}, - expectBlockingPod: &BlockingPod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValEmpty}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValNonMatching}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, Reason: LocalStorageRequested}, expectDaemonSetPods: []*apiv1.Pod{}, }, { @@ -730,46 +434,13 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, - expectBlockingPod: &BlockingPod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValNonMatching}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValEmpty}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, Reason: LocalStorageRequested}, expectDaemonSetPods: []*apiv1.Pod{}, }, { description: "failed pod", pods: []*apiv1.Pod{failedPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{failedPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -778,7 +449,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{longTerminatingPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -787,7 +457,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -795,7 +464,6 @@ func TestDrain(t *testing.T) { description: "evicted pod", pods: []*apiv1.Pod{evictedPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{evictedPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -803,7 +471,6 @@ func TestDrain(t *testing.T) { description: "pod in terminal state", pods: []*apiv1.Pod{terminalPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{terminalPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -811,7 +478,6 @@ func TestDrain(t *testing.T) { description: "pod with PodSafeToEvict annotation", pods: []*apiv1.Pod{safePod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{safePod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -819,7 +485,6 @@ func TestDrain(t *testing.T) { description: "kube-system pod with PodSafeToEvict annotation", pods: []*apiv1.Pod{kubeSystemSafePod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{kubeSystemSafePod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -827,45 +492,14 @@ func TestDrain(t *testing.T) { description: "pod with EmptyDir and PodSafeToEvict annotation", pods: []*apiv1.Pod{emptydirSafePod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{emptydirSafePod}, expectDaemonSetPods: []*apiv1.Pod{}, }, - { - description: "RC-managed pod with PodSafeToEvict=false annotation", - pods: []*apiv1.Pod{unsafeRcPod}, - rcs: []*apiv1.ReplicationController{&rc}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: unsafeRcPod, Reason: NotSafeToEvictAnnotation}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "Job-managed pod with PodSafeToEvict=false annotation", - pods: []*apiv1.Pod{unsafeJobPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: unsafeJobPod, Reason: NotSafeToEvictAnnotation}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "naked pod with PodSafeToEvict=false annotation", - pods: []*apiv1.Pod{unsafeNakedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: unsafeNakedPod, Reason: NotSafeToEvictAnnotation}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, { description: "empty PDB with RC-managed pod", pods: []*apiv1.Pod{rcPod}, pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{rcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -874,129 +508,50 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{kubeSystemRcPod}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectFatal: false, expectPods: []*apiv1.Pod{kubeSystemRcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, - { - description: "kube-system PDB with non-matching kube-system pod", - pods: []*apiv1.Pod{kubeSystemRcPod}, - pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: kubeSystemRcPod, Reason: UnmovableKubeSystemPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, { description: "kube-system PDB with default namespace pod", pods: []*apiv1.Pod{rcPod}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{rcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, - { - description: "default namespace PDB with matching labels kube-system pod", - pods: []*apiv1.Pod{kubeSystemRcPod}, - pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: kubeSystemRcPod, Reason: UnmovableKubeSystemPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, } allTests := []testOpts{} // Note: be careful about modifying the underlying reference values for sharedTest // since they are shared (changing it once will change it for all shallow copies of sharedTest) for _, sharedTest := range sharedTests { - // make sure you shallow copy the test like this - // before you modify it - // (so that modifying one test doesn't affect another) - enabledTest := sharedTest - disabledTest := sharedTest - - // to execute the same shared tests for when the skipNodesWithCustomControllerPods flag is true - // and when the flag is false - enabledTest.skipNodesWithCustomControllerPods = true - enabledTest.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%v", - enabledTest.description, enabledTest.skipNodesWithCustomControllerPods) - allTests = append(allTests, enabledTest) - - disabledTest.skipNodesWithCustomControllerPods = false - disabledTest.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%v", - disabledTest.description, disabledTest.skipNodesWithCustomControllerPods) - allTests = append(allTests, disabledTest) + for _, skipNodesWithCustomControllerPods := range []bool{true, false} { + // Copy test to prevent side effects. + test := sharedTest + test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods + test.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.description, skipNodesWithCustomControllerPods) + allTests = append(allTests, test) + } } allTests = append(allTests, testOpts{ - description: "Custom-controller-managed blocking pod", - pods: []*apiv1.Pod{customControllerPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: customControllerPod, Reason: NotReplicated}, - expectDaemonSetPods: []*apiv1.Pod{}, - skipNodesWithCustomControllerPods: true, - }) - - allTests = append(allTests, testOpts{ - description: "Custom-controller-managed non-blocking pod", - pods: []*apiv1.Pod{customControllerPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, - expectPods: []*apiv1.Pod{customControllerPod}, - expectBlockingPod: &BlockingPod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - skipNodesWithCustomControllerPods: false, + description: "Custom-controller-managed non-blocking pod", + pods: []*apiv1.Pod{customControllerPod}, + pdbs: []*policyv1.PodDisruptionBudget{}, + expectPods: []*apiv1.Pod{customControllerPod}, + expectDaemonSetPods: []*apiv1.Pod{}, }) for _, test := range allTests { - var err error - var rcLister v1lister.ReplicationControllerLister - if len(test.rcs) > 0 { - rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) - assert.NoError(t, err) - } - var rsLister v1appslister.ReplicaSetLister - if len(test.replicaSets) > 0 { - rsLister, err = kube_util.NewTestReplicaSetLister(test.replicaSets) - assert.NoError(t, err) - } - - dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) - assert.NoError(t, err) - jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) - assert.NoError(t, err) - ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) - assert.NoError(t, err) + t.Run(test.description, func(t *testing.T) { + pods, daemonSetPods := GetPodsForDeletionOnNodeDrain(test.pods, test.pdbs, true, true, test.skipNodesWithCustomControllerPods, testTime) - registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) - - pods, daemonSetPods, blockingPod, err := GetPodsForDeletionOnNodeDrain(test.pods, test.pdbs, true, true, test.skipNodesWithCustomControllerPods, registry, 0, testTime) - - if test.expectFatal { - assert.Equal(t, test.expectBlockingPod, blockingPod) - if err == nil { - t.Fatalf("%s: unexpected non-error", test.description) + if len(pods) != len(test.expectPods) { + t.Fatal("wrong pod list content") } - } - if !test.expectFatal { - assert.Nil(t, blockingPod) - if err != nil { - t.Fatalf("%s: error occurred: %v", test.description, err) - } - } - - if len(pods) != len(test.expectPods) { - t.Fatalf("Wrong pod list content: %v", test.description) - } - - assert.ElementsMatch(t, test.expectDaemonSetPods, daemonSetPods) + assert.ElementsMatch(t, test.expectDaemonSetPods, daemonSetPods) + }) } } diff --git a/cluster-autoscaler/utils/pod/pod.go b/cluster-autoscaler/utils/pod/pod.go index 89dd04b5f9d1..b85b14ac32a2 100644 --- a/cluster-autoscaler/utils/pod/pod.go +++ b/cluster-autoscaler/utils/pod/pod.go @@ -35,11 +35,7 @@ func IsDaemonSetPod(pod *apiv1.Pod) bool { return true } - if val, ok := pod.Annotations[DaemonSetPodAnnotationKey]; ok && val == "true" { - return true - } - - return false + return pod.Annotations[DaemonSetPodAnnotationKey] == "true" } // IsMirrorPod checks whether the pod is a mirror pod.