From 7cb08df8b2eb6f19e0d9bd9b0728f1c68279426f Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Sat, 30 Sep 2023 00:50:13 +0000 Subject: [PATCH 1/9] Add pdb filtering to remainingPdbTracker --- cluster-autoscaler/core/scaledown/pdb/basic.go | 10 ++++++++++ cluster-autoscaler/core/scaledown/pdb/pdb.go | 2 ++ 2 files changed, 12 insertions(+) diff --git a/cluster-autoscaler/core/scaledown/pdb/basic.go b/cluster-autoscaler/core/scaledown/pdb/basic.go index 4bb248939b36..dd08b644cd11 100644 --- a/cluster-autoscaler/core/scaledown/pdb/basic.go +++ b/cluster-autoscaler/core/scaledown/pdb/basic.go @@ -63,6 +63,16 @@ func (t *basicRemainingPdbTracker) GetPdbs() []*policyv1.PodDisruptionBudget { return pdbs } +func (t *basicRemainingPdbTracker) MatchingPdbs(pod *apiv1.Pod) []*policyv1.PodDisruptionBudget { + var pdbs []*policyv1.PodDisruptionBudget + for _, pdbInfo := range t.pdbInfos { + if pod.Namespace == pdbInfo.pdb.Namespace && pdbInfo.selector.Matches(labels.Set(pod.Labels)) { + pdbs = append(pdbs, pdbInfo.pdb) + } + } + return pdbs +} + func (t *basicRemainingPdbTracker) CanRemovePods(pods []*apiv1.Pod) (canRemove, inParallel bool, blockingPod *drain.BlockingPod) { inParallel = true for _, pdbInfo := range t.pdbInfos { diff --git a/cluster-autoscaler/core/scaledown/pdb/pdb.go b/cluster-autoscaler/core/scaledown/pdb/pdb.go index 77d977dcecd6..5d21dc7e646f 100644 --- a/cluster-autoscaler/core/scaledown/pdb/pdb.go +++ b/cluster-autoscaler/core/scaledown/pdb/pdb.go @@ -28,6 +28,8 @@ type RemainingPdbTracker interface { SetPdbs(pdbs []*policyv1.PodDisruptionBudget) error // GetPdbs returns the current remaining PDBs. GetPdbs() []*policyv1.PodDisruptionBudget + // MatchingPdbs returns all PDBs matching the pod. + MatchingPdbs(pod *apiv1.Pod) []*policyv1.PodDisruptionBudget // CanRemovePods checks if the set of pods can be removed. // inParallel indicates if the pods can be removed in parallel. If it is false From 125e9c10dcaaad55de968aa12ac91012592e8d02 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Sat, 30 Sep 2023 02:10:40 +0000 Subject: [PATCH 2/9] Convert replicated, system, not-safe-to-evict, and local storage pods to drainability rules --- cluster-autoscaler/simulator/cluster_test.go | 9 +- cluster-autoscaler/simulator/drain.go | 10 +- cluster-autoscaler/simulator/drain_test.go | 5 +- .../simulator/drainability/context.go | 5 + .../drainability/rules/localstorage/rule.go | 47 ++ .../rules/localstorage/rule_test.go | 458 +++++++++++++++++ .../drainability/rules/notsafetoevict/rule.go | 47 ++ .../rules/notsafetoevict/rule_test.go | 279 ++++++++++ .../drainability/rules/replicated/rule.go | 137 +++++ .../rules/replicated/rule_test.go | 421 +++++++++++++++ .../simulator/drainability/rules/rules.go | 8 + .../drainability/rules/system/rule.go | 47 ++ .../drainability/rules/system/rule_test.go | 308 +++++++++++ .../simulator/options/nodedelete.go | 2 +- cluster-autoscaler/utils/drain/drain.go | 197 +------ cluster-autoscaler/utils/drain/drain_test.go | 481 +----------------- cluster-autoscaler/utils/pod/pod.go | 6 +- 17 files changed, 1803 insertions(+), 664 deletions(-) create mode 100644 cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/replicated/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/system/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/system/rule_test.go diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index 136e0d47d2a9..c53aec5cbc01 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -152,7 +152,6 @@ func TestFindNodesToRemove(t *testing.T) { tracker := NewUsageTracker() tests := []findNodesToRemoveTestConfig{ - // just an empty node, should be removed { name: "just an empty node, should be removed", pods: []*apiv1.Pod{}, @@ -161,7 +160,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{emptyNodeToRemove}, unremovable: []*UnremovableNode{}, }, - // just a drainable node, but nowhere for pods to go to { name: "just a drainable node, but nowhere for pods to go to", pods: []*apiv1.Pod{pod1, pod2}, @@ -170,7 +168,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, - // drainable node, and a mostly empty node that can take its pods { name: "drainable node, and a mostly empty node that can take its pods", pods: []*apiv1.Pod{pod1, pod2, pod3}, @@ -179,7 +176,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{drainableNodeToRemove}, unremovable: []*UnremovableNode{{Node: nonDrainableNode, Reason: BlockedByPod, BlockingPod: &drain.BlockingPod{Pod: pod3, Reason: drain.NotReplicated}}}, }, - // drainable node, and a full node that cannot fit anymore pods { name: "drainable node, and a full node that cannot fit anymore pods", pods: []*apiv1.Pod{pod1, pod2, pod4}, @@ -188,7 +184,6 @@ func TestFindNodesToRemove(t *testing.T) { toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, - // 4 nodes, 1 empty, 1 drainable { name: "4 nodes, 1 empty, 1 drainable", pods: []*apiv1.Pod{pod1, pod2, pod3, pod4}, @@ -209,8 +204,8 @@ func TestFindNodesToRemove(t *testing.T) { r := NewRemovalSimulator(registry, clusterSnapshot, predicateChecker, tracker, testDeleteOptions(), nil, false) toRemove, unremovable := r.FindNodesToRemove(test.candidates, destinations, time.Now(), nil) fmt.Printf("Test scenario: %s, found len(toRemove)=%v, expected len(test.toRemove)=%v\n", test.name, len(toRemove), len(test.toRemove)) - assert.Equal(t, toRemove, test.toRemove) - assert.Equal(t, unremovable, test.unremovable) + assert.Equal(t, test.toRemove, toRemove) + assert.Equal(t, test.unremovable, unremovable) }) } } diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index 7d8e5c449655..101844ad65d3 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -50,6 +50,8 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. drainCtx := &drainability.DrainContext{ RemainingPdbTracker: remainingPdbTracker, DeleteOptions: deleteOptions, + Listers: listers, + Timestamp: timestamp, } for _, podInfo := range nodeInfo.Pods { pod := podInfo.Pod @@ -73,20 +75,16 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. } } - pods, daemonSetPods, blockingPod, err = drain.GetPodsForDeletionOnNodeDrain( + pods, daemonSetPods = drain.GetPodsForDeletionOnNodeDrain( pods, remainingPdbTracker.GetPdbs(), deleteOptions.SkipNodesWithSystemPods, deleteOptions.SkipNodesWithLocalStorage, deleteOptions.SkipNodesWithCustomControllerPods, - listers, - int32(deleteOptions.MinReplicaCount), timestamp) pods = append(pods, drainPods...) daemonSetPods = append(daemonSetPods, drainDs...) - if err != nil { - return pods, daemonSetPods, blockingPod, err - } + if canRemove, _, blockingPodInfo := remainingPdbTracker.CanRemovePods(pods); !canRemove { pod := blockingPodInfo.Pod return []*apiv1.Pod{}, []*apiv1.Pod{}, blockingPodInfo, fmt.Errorf("not enough pod disruption budget to move %s/%s", pod.Namespace, pod.Name) diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index 5e4611a9e104..1671fdfb9510 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -182,7 +182,7 @@ func TestGetPodsToMove(t *testing.T) { desc string pods []*apiv1.Pod pdbs []*policyv1.PodDisruptionBudget - rules []rules.Rule + rules rules.Rules wantPods []*apiv1.Pod wantDs []*apiv1.Pod wantBlocking *drain.BlockingPod @@ -312,9 +312,10 @@ func TestGetPodsToMove(t *testing.T) { SkipNodesWithLocalStorage: true, SkipNodesWithCustomControllerPods: true, } + rules := append(tc.rules, rules.Default()...) tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(tc.pdbs) - p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, tc.rules, nil, tracker, testTime) + p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, nil, tracker, testTime) if tc.wantErr { assert.Error(t, err) } else { diff --git a/cluster-autoscaler/simulator/drainability/context.go b/cluster-autoscaler/simulator/drainability/context.go index 84a4ec4c454f..d6fd7116ca02 100644 --- a/cluster-autoscaler/simulator/drainability/context.go +++ b/cluster-autoscaler/simulator/drainability/context.go @@ -17,12 +17,17 @@ limitations under the License. package drainability import ( + "time" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" ) // DrainContext contains parameters for drainability rules. type DrainContext struct { RemainingPdbTracker pdb.RemainingPdbTracker DeleteOptions options.NodeDeleteOptions + Listers kube_util.ListerRegistry + Timestamp time.Time } diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go new file mode 100644 index 000000000000..6a58da1dd32d --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go @@ -0,0 +1,47 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package localstorage + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle local storage pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with local storage pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus() + } + + if drainCtx.DeleteOptions.SkipNodesWithLocalStorage && drain.HasBlockingLocalStorage(pod) { + return drainability.NewBlockedStatus(drain.LocalStorageRequested, fmt.Errorf("pod with local storage present: %s", pod.Name)) + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go new file mode 100644 index 000000000000..222eb8fd7b6e --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go @@ -0,0 +1,458 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package localstorage + +import ( + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + emptydirPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictVolumeSingleVal = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeSingleValEmpty = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeSingleValNonMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValAllMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValNonMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirSafeToEvictLocalVolumeMultiValEmpty = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: ",", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + + emptyDirFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + emptyDirTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + emptyDirEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + emptyDirSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + zeroGracePeriod = int64(0) + emptyDirLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + emptyDirLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + ) + + for _, test := range []struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + + wantReason drain.BlockingPodReason + wantError bool + }{ + { + desc: "pod with EmptyDir", + pod: emptydirPod, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation", + pod: emptyDirSafeToEvictVolumeSingleVal, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation", + pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation", + pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", + pod: emptyDirSafeToEvictLocalVolumeMultiValAllMatching, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values", + pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values", + pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values", + pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + + { + desc: "EmptyDir failed pod", + pod: emptyDirFailedPod, + }, + { + desc: "EmptyDir terminal pod", + pod: emptyDirTerminalPod, + }, + { + desc: "EmptyDir evicted pod", + pod: emptyDirEvictedPod, + }, + { + desc: "EmptyDir pod with PodSafeToEvict annotation", + pod: emptyDirSafePod, + }, + { + desc: "EmptyDir long terminating pod with 0 grace period", + pod: emptyDirLongTerminatingPod, + }, + { + desc: "EmptyDir long terminating pod with extended grace period", + pod: emptyDirLongTerminatingPodWithExtendedGracePeriod, + }, + } { + t.Run(test.desc, func(t *testing.T) { + drainCtx := &drainability.DrainContext{ + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithLocalStorage: true, + }, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go new file mode 100644 index 000000000000..224a1fbd6a3f --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go @@ -0,0 +1,47 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package notsafetoevict + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle not safe to evict pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with not safe to evict pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus() + } + + if drain.HasNotSafeToEvictAnnotation(pod) { + return drainability.NewBlockedStatus(drain.NotSafeToEvictAnnotation, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)) + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go new file mode 100644 index 000000000000..42b3df58c9a4 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go @@ -0,0 +1,279 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package notsafetoevict + +import ( + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + + jobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + } + + safePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + unsafeSystemFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + unsafeSystemTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + unsafeSystemEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + zeroGracePeriod = int64(0) + unsafeLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + unsafeLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + unsafeRcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + unsafeJobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + } + ) + + for _, test := range []struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + + wantReason drain.BlockingPodReason + wantError bool + }{ + { + desc: "pod with PodSafeToEvict annotation", + pod: safePod, + }, + { + desc: "RC-managed pod with no annotation", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "RC-managed pod with PodSafeToEvict=false annotation", + pod: unsafeRcPod, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.NotSafeToEvictAnnotation, + wantError: true, + }, + { + desc: "Job-managed pod with no annotation", + pod: jobPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "Job-managed pod with PodSafeToEvict=false annotation", + pod: unsafeJobPod, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.NotSafeToEvictAnnotation, + wantError: true, + }, + + { + desc: "unsafe failed pod", + pod: unsafeSystemFailedPod, + }, + { + desc: "unsafe terminal pod", + pod: unsafeSystemTerminalPod, + }, + { + desc: "unsafe evicted pod", + pod: unsafeSystemEvictedPod, + }, + { + desc: "unsafe long terminating pod with 0 grace period", + pod: unsafeLongTerminatingPod, + }, + { + desc: "unsafe long terminating pod with extended grace period", + pod: unsafeLongTerminatingPodWithExtendedGracePeriod, + }, + } { + t.Run(test.desc, func(t *testing.T) { + drainCtx := &drainability.DrainContext{ + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithSystemPods: true, + }, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go new file mode 100644 index 000000000000..2eb3f492c6ef --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -0,0 +1,137 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package replicated + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle replicated pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with replicated pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { + return drainability.NewUndefinedStatus() + } + + controllerRef := drain.ControllerRef(pod) + replicated := controllerRef != nil + + if drainCtx.DeleteOptions.SkipNodesWithCustomControllerPods { + // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods + if status := legacyCheck(drainCtx, pod); status.Outcome != drainability.UndefinedOutcome { + return status + } + replicated = replicated && replicatedKind[controllerRef.Kind] + } + + if pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) || replicated { + return drainability.NewUndefinedStatus() + } + + return drainability.NewBlockedStatus(drain.NotReplicated, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)) +} + +// replicatedKind returns true if this kind has replicates pods. +var replicatedKind = map[string]bool{ + "ReplicationController": true, + "Job": true, + "ReplicaSet": true, + "StatefulSet": true, +} + +func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drainCtx.Listers == nil { + return drainability.NewUndefinedStatus() + } + + // For now, owner controller must be in the same namespace as the pod + // so OwnerReference doesn't have its own Namespace field. + controllerNamespace := pod.Namespace + + controllerRef := drain.ControllerRef(pod) + if controllerRef == nil { + return drainability.NewUndefinedStatus() + } + refKind := controllerRef.Kind + + if refKind == "ReplicationController" { + rc, err := drainCtx.Listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) + // Assume RC is either gone/missing or has too few replicas configured. + if err != nil || rc == nil { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + + // TODO: Replace the minReplica check with PDB. + if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < drainCtx.DeleteOptions.MinReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, drainCtx.DeleteOptions.MinReplicaCount)) + } + } else if pod_util.IsDaemonSetPod(pod) { + if refKind == "DaemonSet" { + // We don't have a listener for the other DaemonSet kind. + // TODO: Use a generic client for checking the reference. + return drainability.NewUndefinedStatus() + } + + _, err := drainCtx.Listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) + if err != nil { + if apierrors.IsNotFound(err) { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)) + } + return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "Job" { + job, err := drainCtx.Listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) + + if err != nil || job == nil { + // Assume the only reason for an error is because the Job is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "ReplicaSet" { + rs, err := drainCtx.Listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) + + if err == nil && rs != nil { + // Assume the only reason for an error is because the RS is gone/missing. + if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < drainCtx.DeleteOptions.MinReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, drainCtx.DeleteOptions.MinReplicaCount)) + } + } else { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "StatefulSet" { + ss, err := drainCtx.Listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) + + if err != nil && ss == nil { + // Assume the only reason for an error is because the SS is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go new file mode 100644 index 000000000000..905c63780b59 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go @@ -0,0 +1,421 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package replicated + +import ( + "fmt" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + v1appslister "k8s.io/client-go/listers/apps/v1" + v1lister "k8s.io/client-go/listers/core/v1" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + ds = appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", + }, + } + + dsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + cdsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + + jobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + } + + statefulset = appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ss", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", + }, + } + + ssPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, + } + + rs = appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rs", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicasets/rs", + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: &replicas, + }, + } + + rsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + rsPodDeleted = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + customControllerPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + // Using names like FooController is discouraged + // https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#naming-conventions + // vadasambar: I am using it here just because `FooController`` + // is easier to understand than say `FooSet` + OwnerReferences: GenerateOwnerReferences("Foo", "FooController", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + nakedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + nakedFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + nakedTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + nakedEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + nakedSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + zeroGracePeriod = int64(0) + nakedLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + nakedLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + ) + + type testCase struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + + // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods + skipNodesWithCustomControllerPods bool + + wantReason drain.BlockingPodReason + wantError bool + } + + sharedTests := []testCase{ + { + desc: "RC-managed pod", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "DS-managed pod", + pod: dsPod, + }, + { + desc: "DS-managed pod by a custom Daemonset", + pod: cdsPod, + }, + { + desc: "Job-managed pod", + pod: jobPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "SS-managed pod", + pod: ssPod, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "RS-managed pod", + pod: rsPod, + rss: []*appsv1.ReplicaSet{&rs}, + }, + { + desc: "RS-managed pod that is being deleted", + pod: rsPodDeleted, + rss: []*appsv1.ReplicaSet{&rs}, + }, + { + desc: "naked pod", + pod: nakedPod, + wantReason: drain.NotReplicated, + wantError: true, + }, + { + desc: "naked failed pod", + pod: nakedFailedPod, + }, + { + desc: "naked terminal pod", + pod: nakedTerminalPod, + }, + { + desc: "naked evicted pod", + pod: nakedEvictedPod, + }, + { + desc: "naked pod with PodSafeToEvict annotation", + pod: nakedSafePod, + }, + { + desc: "naked long terminating pod with 0 grace period", + pod: nakedLongTerminatingPod, + }, + { + desc: "naked long terminating pod with extended grace period", + pod: nakedLongTerminatingPodWithExtendedGracePeriod, + }, + } + + var tests []testCase + + // Note: do not modify the underlying reference values for sharedTests. + for _, test := range sharedTests { + for _, skipNodesWithCustomControllerPods := range []bool{true, false} { + // Copy test to prevent side effects. + test := test + test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods + test.desc = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.desc, skipNodesWithCustomControllerPods) + tests = append(tests, test) + } + } + + customControllerTests := []testCase{ + { + desc: "Custom-controller-managed blocking pod", + pod: customControllerPod, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, + }, + { + desc: "Custom-controller-managed non-blocking pod", + pod: customControllerPod, + }, + } + tests = append(tests, customControllerTests...) + + for _, test := range tests { + t.Run(test.desc, func(t *testing.T) { + var err error + var rcLister v1lister.ReplicationControllerLister + if len(test.rcs) > 0 { + rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) + assert.NoError(t, err) + } + var rsLister v1appslister.ReplicaSetLister + if len(test.rss) > 0 { + rsLister, err = kube_util.NewTestReplicaSetLister(test.rss) + assert.NoError(t, err) + } + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) + assert.NoError(t, err) + jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + assert.NoError(t, err) + ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + assert.NoError(t, err) + + registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) + + drainCtx := &drainability.DrainContext{ + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithCustomControllerPods: test.skipNodesWithCustomControllerPods, + }, + Listers: registry, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 1733a2b8dac5..1dd1d5e4e24c 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -20,7 +20,11 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/localstorage" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/mirror" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/notsafetoevict" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" ) // Rule determines whether a given pod can be drained or not. @@ -36,6 +40,10 @@ type Rule interface { func Default() Rules { return []Rule{ mirror.New(), + replicated.New(), + system.New(), + notsafetoevict.New(), + localstorage.New(), } } diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule.go b/cluster-autoscaler/simulator/drainability/rules/system/rule.go new file mode 100644 index 000000000000..14eef8585510 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule.go @@ -0,0 +1,47 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle system pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with system pods on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus() + } + + if drainCtx.DeleteOptions.SkipNodesWithSystemPods && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { + return drainability.NewBlockedStatus(drain.UnmovableKubeSystemPod, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)) + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go new file mode 100644 index 000000000000..5be7f835b301 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go @@ -0,0 +1,308 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package system + +import ( + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + + "github.com/stretchr/testify/assert" +) + +func TestDrain(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + kubeSystemRc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "kube-system", + SelfLink: "api/v1/namespaces/kube-system/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + + kubeSystemRcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), + Labels: map[string]string{ + "k8s-app": "bar", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + emptyPDB = &policyv1.PodDisruptionBudget{} + + kubeSystemPDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "bar", + }, + }, + }, + } + + kubeSystemFakePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "foo", + }, + }, + }, + } + + defaultNamespacePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "PDB-managed pod", + }, + }, + }, + } + + kubeSystemFailedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + kubeSystemTerminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + + kubeSystemEvictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + + kubeSystemSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + + zeroGracePeriod = int64(0) + kubeSystemLongTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + + extendedGracePeriod = int64(6 * 60) // 6 minutes + kubeSystemLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + ) + + for _, test := range []struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + pdbs []*policyv1.PodDisruptionBudget + + wantReason drain.BlockingPodReason + wantError bool + }{ + { + desc: "kube-system pod with PodSafeToEvict annotation", + pod: kubeSystemSafePod, + }, + { + desc: "empty PDB with RC-managed pod", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, + }, + { + desc: "kube-system PDB with matching kube-system pod", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + }, + { + desc: "kube-system PDB with non-matching kube-system pod", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, + wantReason: drain.UnmovableKubeSystemPod, + wantError: true, + }, + { + desc: "kube-system PDB with default namespace pod", + pod: rcPod, + rcs: []*apiv1.ReplicationController{&rc}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + }, + { + desc: "default namespace PDB with matching labels kube-system pod", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, + wantReason: drain.UnmovableKubeSystemPod, + wantError: true, + }, + { + desc: "kube-system failed pod", + pod: kubeSystemFailedPod, + }, + { + desc: "kube-system terminal pod", + pod: kubeSystemTerminalPod, + }, + { + desc: "kube-system evicted pod", + pod: kubeSystemEvictedPod, + }, + { + desc: "kube-system pod with PodSafeToEvict annotation", + pod: kubeSystemSafePod, + }, + { + desc: "kube-system long terminating pod with 0 grace period", + pod: kubeSystemLongTerminatingPod, + }, + { + desc: "kube-system long terminating pod with extended grace period", + pod: kubeSystemLongTerminatingPodWithExtendedGracePeriod, + }, + } { + t.Run(test.desc, func(t *testing.T) { + tracker := pdb.NewBasicRemainingPdbTracker() + tracker.SetPdbs(test.pdbs) + + drainCtx := &drainability.DrainContext{ + RemainingPdbTracker: tracker, + DeleteOptions: options.NodeDeleteOptions{ + SkipNodesWithSystemPods: true, + }, + Timestamp: testTime, + } + status := New().Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } +} diff --git a/cluster-autoscaler/simulator/options/nodedelete.go b/cluster-autoscaler/simulator/options/nodedelete.go index 947095d6eb78..6b6e17a1b7e9 100644 --- a/cluster-autoscaler/simulator/options/nodedelete.go +++ b/cluster-autoscaler/simulator/options/nodedelete.go @@ -42,7 +42,7 @@ func NewNodeDeleteOptions(opts config.AutoscalingOptions) NodeDeleteOptions { return NodeDeleteOptions{ SkipNodesWithSystemPods: opts.SkipNodesWithSystemPods, SkipNodesWithLocalStorage: opts.SkipNodesWithLocalStorage, - MinReplicaCount: opts.MinReplicaCount, SkipNodesWithCustomControllerPods: opts.SkipNodesWithCustomControllerPods, + MinReplicaCount: opts.MinReplicaCount, } } diff --git a/cluster-autoscaler/utils/drain/drain.go b/cluster-autoscaler/utils/drain/drain.go index 46b9537af06b..45a28a384185 100644 --- a/cluster-autoscaler/utils/drain/drain.go +++ b/cluster-autoscaler/utils/drain/drain.go @@ -17,16 +17,12 @@ limitations under the License. package drain import ( - "fmt" "strings" "time" apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/labels" - kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) @@ -74,179 +70,35 @@ const ( UnexpectedError ) -// GetPodsForDeletionOnNodeDrain returns pods that should be deleted on node drain as well as some extra information -// about possibly problematic pods (unreplicated and DaemonSets). +// GetPodsForDeletionOnNodeDrain returns pods that should be deleted on node +// drain as well as some extra information about possibly problematic pods +// (unreplicated and DaemonSets). +// +// This function assumes that default drainability rules have already been run +// to verify pod drainability. func GetPodsForDeletionOnNodeDrain( podList []*apiv1.Pod, pdbs []*policyv1.PodDisruptionBudget, skipNodesWithSystemPods bool, skipNodesWithLocalStorage bool, skipNodesWithCustomControllerPods bool, - listers kube_util.ListerRegistry, - minReplica int32, - currentTime time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, blockingPod *BlockingPod, err error) { + currentTime time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod) { pods = []*apiv1.Pod{} daemonSetPods = []*apiv1.Pod{} - // filter kube-system PDBs to avoid doing it for every kube-system pod - kubeSystemPDBs := make([]*policyv1.PodDisruptionBudget, 0) - for _, pdb := range pdbs { - if pdb.Namespace == "kube-system" { - kubeSystemPDBs = append(kubeSystemPDBs, pdb) - } - } for _, pod := range podList { - // Possibly skip a pod under deletion but only if it was being deleted for long enough - // to avoid a situation when we delete the empty node immediately after the pod was marked for - // deletion without respecting any graceful termination. if IsPodLongTerminating(pod, currentTime) { - // pod is being deleted for long enough - no need to care about it. continue } - isDaemonSetPod := false - replicated := false - safeToEvict := hasSafeToEvictAnnotation(pod) - terminal := isPodTerminal(pod) - - if skipNodesWithCustomControllerPods { - // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods - replicated, isDaemonSetPod, blockingPod, err = legacyCheckForReplicatedPods(listers, pod, minReplica) - if err != nil { - return []*apiv1.Pod{}, []*apiv1.Pod{}, blockingPod, err - } - } else { - replicated = ControllerRef(pod) != nil - isDaemonSetPod = pod_util.IsDaemonSetPod(pod) - } - - if isDaemonSetPod { + if pod_util.IsDaemonSetPod(pod) { daemonSetPods = append(daemonSetPods, pod) - continue - } - - if !safeToEvict && !terminal { - if hasNotSafeToEvictAnnotation(pod) { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotSafeToEvictAnnotation}, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name) - } - if !replicated { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: NotReplicated}, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name) - } - if pod.Namespace == "kube-system" && skipNodesWithSystemPods { - hasPDB, err := checkKubeSystemPDBs(pod, kubeSystemPDBs) - if err != nil { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error matching pods to pdbs: %v", err) - } - if !hasPDB { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: UnmovableKubeSystemPod}, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name) - } - } - if HasBlockingLocalStorage(pod) && skipNodesWithLocalStorage { - return []*apiv1.Pod{}, []*apiv1.Pod{}, &BlockingPod{Pod: pod, Reason: LocalStorageRequested}, fmt.Errorf("pod with local storage present: %s", pod.Name) - } - } - pods = append(pods, pod) - } - return pods, daemonSetPods, nil, nil -} - -func legacyCheckForReplicatedPods(listers kube_util.ListerRegistry, pod *apiv1.Pod, minReplica int32) (replicated bool, isDaemonSetPod bool, blockingPod *BlockingPod, err error) { - replicated = false - refKind := "" - checkReferences := listers != nil - isDaemonSetPod = false - - controllerRef := ControllerRef(pod) - if controllerRef != nil { - refKind = controllerRef.Kind - } - - // For now, owner controller must be in the same namespace as the pod - // so OwnerReference doesn't have its own Namespace field - controllerNamespace := pod.Namespace - if refKind == "ReplicationController" { - if checkReferences { - rc, err := listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) - // Assume a reason for an error is because the RC is either - // gone/missing or that the rc has too few replicas configured. - // TODO: replace the minReplica check with pod disruption budget. - if err == nil && rc != nil { - if rc.Spec.Replicas != nil && *rc.Spec.Replicas < minReplica { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", - pod.Namespace, pod.Name, rc.Spec.Replicas, minReplica) - } - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err) - } } else { - replicated = true - } - } else if pod_util.IsDaemonSetPod(pod) { - isDaemonSetPod = true - // don't have listener for other DaemonSet kind - // TODO: we should use a generic client for checking the reference. - if checkReferences && refKind == "DaemonSet" { - _, err := listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) - if apierrors.IsNotFound(err) { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err) - } else if err != nil { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: UnexpectedError}, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err) - } - } - } else if refKind == "Job" { - if checkReferences { - job, err := listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) - - // Assume the only reason for an error is because the Job is - // gone/missing, not for any other cause. TODO(mml): something more - // sophisticated than this - if err == nil && job != nil { - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err) - } - } else { - replicated = true - } - } else if refKind == "ReplicaSet" { - if checkReferences { - rs, err := listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) - - // Assume the only reason for an error is because the RS is - // gone/missing, not for any other cause. TODO(mml): something more - // sophisticated than this - if err == nil && rs != nil { - if rs.Spec.Replicas != nil && *rs.Spec.Replicas < minReplica { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: MinReplicasReached}, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", - pod.Namespace, pod.Name, rs.Spec.Replicas, minReplica) - } - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err) - } - } else { - replicated = true - } - } else if refKind == "StatefulSet" { - if checkReferences { - ss, err := listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) - - // Assume the only reason for an error is because the StatefulSet is - // gone/missing, not for any other cause. TODO(mml): something more - // sophisticated than this - if err == nil && ss != nil { - replicated = true - } else { - return replicated, isDaemonSetPod, &BlockingPod{Pod: pod, Reason: ControllerNotFound}, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err) - } - } else { - replicated = true + pods = append(pods, pod) } } - - return replicated, isDaemonSetPod, &BlockingPod{}, nil + return pods, daemonSetPods } // ControllerRef returns the OwnerReference to pod's controller. @@ -254,8 +106,8 @@ func ControllerRef(pod *apiv1.Pod) *metav1.OwnerReference { return metav1.GetControllerOf(pod) } -// isPodTerminal checks whether the pod is in a terminal state. -func isPodTerminal(pod *apiv1.Pod) bool { +// IsPodTerminal checks whether the pod is in a terminal state. +func IsPodTerminal(pod *apiv1.Pod) bool { // pod will never be restarted if pod.Spec.RestartPolicy == apiv1.RestartPolicyNever && (pod.Status.Phase == apiv1.PodSucceeded || pod.Status.Phase == apiv1.PodFailed) { return true @@ -296,29 +148,14 @@ func isLocalVolume(volume *apiv1.Volume) bool { return volume.HostPath != nil || (volume.EmptyDir != nil && volume.EmptyDir.Medium != apiv1.StorageMediumMemory) } -// This only checks if a matching PDB exist and therefore if it makes sense to attempt drain simulation, -// as we check for allowed-disruptions later anyway (for all pods with PDB, not just in kube-system) -func checkKubeSystemPDBs(pod *apiv1.Pod, pdbs []*policyv1.PodDisruptionBudget) (bool, error) { - for _, pdb := range pdbs { - selector, err := metav1.LabelSelectorAsSelector(pdb.Spec.Selector) - if err != nil { - return false, err - } - if selector.Matches(labels.Set(pod.Labels)) { - return true, nil - } - } - - return false, nil -} - -// This checks if pod has PodSafeToEvictKey annotation -func hasSafeToEvictAnnotation(pod *apiv1.Pod) bool { +// HasSafeToEvictAnnotation checks if pod has PodSafeToEvictKey annotation. +func HasSafeToEvictAnnotation(pod *apiv1.Pod) bool { return pod.GetAnnotations()[PodSafeToEvictKey] == "true" } -// This checks if pod has PodSafeToEvictKey annotation set to false -func hasNotSafeToEvictAnnotation(pod *apiv1.Pod) bool { +// HasNotSafeToEvictAnnotation checks if pod has PodSafeToEvictKey annotation +// set to false. +func HasNotSafeToEvictAnnotation(pod *apiv1.Pod) bool { return pod.GetAnnotations()[PodSafeToEvictKey] == "false" } diff --git a/cluster-autoscaler/utils/drain/drain_test.go b/cluster-autoscaler/utils/drain/drain_test.go index 52ce4949a54e..0a20b781a9ac 100644 --- a/cluster-autoscaler/utils/drain/drain_test.go +++ b/cluster-autoscaler/utils/drain/drain_test.go @@ -26,10 +26,7 @@ import ( apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - v1appslister "k8s.io/client-go/listers/apps/v1" - v1lister "k8s.io/client-go/listers/core/v1" "github.com/stretchr/testify/assert" ) @@ -41,10 +38,8 @@ type testOpts struct { pdbs []*policyv1.PodDisruptionBudget rcs []*apiv1.ReplicationController replicaSets []*appsv1.ReplicaSet - expectFatal bool expectPods []*apiv1.Pod expectDaemonSetPods []*apiv1.Pod - expectBlockingPod *BlockingPod // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods skipNodesWithCustomControllerPods bool } @@ -214,93 +209,6 @@ func TestDrain(t *testing.T) { }, } - nakedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - emptydirPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictVolumeSingleVal := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeSingleValEmpty := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeSingleValNonMatching := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-2", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - emptyDirSafeToEvictLocalVolumeMultiValAllMatching := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -329,90 +237,6 @@ func TestDrain(t *testing.T) { }, } - emptyDirSafeToEvictLocalVolumeMultiValNonMatching := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValEmpty := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: ",", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - terminalPod := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -503,44 +327,6 @@ func TestDrain(t *testing.T) { }, } - unsafeRcPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - PodSafeToEvictKey: "false", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - unsafeJobPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - Annotations: map[string]string{ - PodSafeToEvictKey: "false", - }, - }, - } - - unsafeNakedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - PodSafeToEvictKey: "false", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - kubeSystemSafePod := &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -588,39 +374,12 @@ func TestDrain(t *testing.T) { }, } - kubeSystemFakePDB := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "kube-system", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "foo", - }, - }, - }, - } - - defaultNamespacePDB := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "default", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "PDB-managed pod", - }, - }, - }, - } - sharedTests := []testOpts{ { description: "RC-managed pod", pods: []*apiv1.Pod{rcPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{rcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -628,7 +387,6 @@ func TestDrain(t *testing.T) { description: "DS-managed pod", pods: []*apiv1.Pod{dsPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{}, expectDaemonSetPods: []*apiv1.Pod{dsPod}, }, @@ -636,7 +394,6 @@ func TestDrain(t *testing.T) { description: "DS-managed pod by a custom Daemonset", pods: []*apiv1.Pod{cdsPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{}, expectDaemonSetPods: []*apiv1.Pod{cdsPod}, }, @@ -645,7 +402,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{jobPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{jobPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -654,7 +410,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{ssPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{ssPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -663,7 +418,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{rsPod}, pdbs: []*policyv1.PodDisruptionBudget{}, replicaSets: []*appsv1.ReplicaSet{&rs}, - expectFatal: false, expectPods: []*apiv1.Pod{rsPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -672,57 +426,7 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{rsPodDeleted}, pdbs: []*policyv1.PodDisruptionBudget{}, replicaSets: []*appsv1.ReplicaSet{&rs}, - expectFatal: false, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "naked pod", - pods: []*apiv1.Pod{nakedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: nakedPod, Reason: NotReplicated}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir", - pods: []*apiv1.Pod{emptydirPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptydirPod, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation", - pods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, - expectPods: []*apiv1.Pod{emptyDirSafeToEvictVolumeSingleVal}, - expectBlockingPod: &BlockingPod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValEmpty}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeSingleValNonMatching}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, Reason: LocalStorageRequested}, expectDaemonSetPods: []*apiv1.Pod{}, }, { @@ -730,46 +434,13 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, - expectBlockingPod: &BlockingPod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValNonMatching}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, Reason: LocalStorageRequested}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValEmpty}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, Reason: LocalStorageRequested}, expectDaemonSetPods: []*apiv1.Pod{}, }, { description: "failed pod", pods: []*apiv1.Pod{failedPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{failedPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -778,7 +449,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{longTerminatingPod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -787,7 +457,6 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, pdbs: []*policyv1.PodDisruptionBudget{}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -795,7 +464,6 @@ func TestDrain(t *testing.T) { description: "evicted pod", pods: []*apiv1.Pod{evictedPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{evictedPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -803,7 +471,6 @@ func TestDrain(t *testing.T) { description: "pod in terminal state", pods: []*apiv1.Pod{terminalPod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{terminalPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -811,7 +478,6 @@ func TestDrain(t *testing.T) { description: "pod with PodSafeToEvict annotation", pods: []*apiv1.Pod{safePod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{safePod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -819,7 +485,6 @@ func TestDrain(t *testing.T) { description: "kube-system pod with PodSafeToEvict annotation", pods: []*apiv1.Pod{kubeSystemSafePod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{kubeSystemSafePod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -827,45 +492,14 @@ func TestDrain(t *testing.T) { description: "pod with EmptyDir and PodSafeToEvict annotation", pods: []*apiv1.Pod{emptydirSafePod}, pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, expectPods: []*apiv1.Pod{emptydirSafePod}, expectDaemonSetPods: []*apiv1.Pod{}, }, - { - description: "RC-managed pod with PodSafeToEvict=false annotation", - pods: []*apiv1.Pod{unsafeRcPod}, - rcs: []*apiv1.ReplicationController{&rc}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: unsafeRcPod, Reason: NotSafeToEvictAnnotation}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "Job-managed pod with PodSafeToEvict=false annotation", - pods: []*apiv1.Pod{unsafeJobPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: unsafeJobPod, Reason: NotSafeToEvictAnnotation}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "naked pod with PodSafeToEvict=false annotation", - pods: []*apiv1.Pod{unsafeNakedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: unsafeNakedPod, Reason: NotSafeToEvictAnnotation}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, { description: "empty PDB with RC-managed pod", pods: []*apiv1.Pod{rcPod}, pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{rcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, @@ -874,129 +508,50 @@ func TestDrain(t *testing.T) { pods: []*apiv1.Pod{kubeSystemRcPod}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectFatal: false, expectPods: []*apiv1.Pod{kubeSystemRcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, - { - description: "kube-system PDB with non-matching kube-system pod", - pods: []*apiv1.Pod{kubeSystemRcPod}, - pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: kubeSystemRcPod, Reason: UnmovableKubeSystemPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, { description: "kube-system PDB with default namespace pod", pods: []*apiv1.Pod{rcPod}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, rcs: []*apiv1.ReplicationController{&rc}, - expectFatal: false, expectPods: []*apiv1.Pod{rcPod}, expectDaemonSetPods: []*apiv1.Pod{}, }, - { - description: "default namespace PDB with matching labels kube-system pod", - pods: []*apiv1.Pod{kubeSystemRcPod}, - pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: kubeSystemRcPod, Reason: UnmovableKubeSystemPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, } allTests := []testOpts{} // Note: be careful about modifying the underlying reference values for sharedTest // since they are shared (changing it once will change it for all shallow copies of sharedTest) for _, sharedTest := range sharedTests { - // make sure you shallow copy the test like this - // before you modify it - // (so that modifying one test doesn't affect another) - enabledTest := sharedTest - disabledTest := sharedTest - - // to execute the same shared tests for when the skipNodesWithCustomControllerPods flag is true - // and when the flag is false - enabledTest.skipNodesWithCustomControllerPods = true - enabledTest.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%v", - enabledTest.description, enabledTest.skipNodesWithCustomControllerPods) - allTests = append(allTests, enabledTest) - - disabledTest.skipNodesWithCustomControllerPods = false - disabledTest.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%v", - disabledTest.description, disabledTest.skipNodesWithCustomControllerPods) - allTests = append(allTests, disabledTest) + for _, skipNodesWithCustomControllerPods := range []bool{true, false} { + // Copy test to prevent side effects. + test := sharedTest + test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods + test.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.description, skipNodesWithCustomControllerPods) + allTests = append(allTests, test) + } } allTests = append(allTests, testOpts{ - description: "Custom-controller-managed blocking pod", - pods: []*apiv1.Pod{customControllerPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: true, - expectPods: []*apiv1.Pod{}, - expectBlockingPod: &BlockingPod{Pod: customControllerPod, Reason: NotReplicated}, - expectDaemonSetPods: []*apiv1.Pod{}, - skipNodesWithCustomControllerPods: true, - }) - - allTests = append(allTests, testOpts{ - description: "Custom-controller-managed non-blocking pod", - pods: []*apiv1.Pod{customControllerPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectFatal: false, - expectPods: []*apiv1.Pod{customControllerPod}, - expectBlockingPod: &BlockingPod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - skipNodesWithCustomControllerPods: false, + description: "Custom-controller-managed non-blocking pod", + pods: []*apiv1.Pod{customControllerPod}, + pdbs: []*policyv1.PodDisruptionBudget{}, + expectPods: []*apiv1.Pod{customControllerPod}, + expectDaemonSetPods: []*apiv1.Pod{}, }) for _, test := range allTests { - var err error - var rcLister v1lister.ReplicationControllerLister - if len(test.rcs) > 0 { - rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) - assert.NoError(t, err) - } - var rsLister v1appslister.ReplicaSetLister - if len(test.replicaSets) > 0 { - rsLister, err = kube_util.NewTestReplicaSetLister(test.replicaSets) - assert.NoError(t, err) - } - - dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) - assert.NoError(t, err) - jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) - assert.NoError(t, err) - ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) - assert.NoError(t, err) + t.Run(test.description, func(t *testing.T) { + pods, daemonSetPods := GetPodsForDeletionOnNodeDrain(test.pods, test.pdbs, true, true, test.skipNodesWithCustomControllerPods, testTime) - registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) - - pods, daemonSetPods, blockingPod, err := GetPodsForDeletionOnNodeDrain(test.pods, test.pdbs, true, true, test.skipNodesWithCustomControllerPods, registry, 0, testTime) - - if test.expectFatal { - assert.Equal(t, test.expectBlockingPod, blockingPod) - if err == nil { - t.Fatalf("%s: unexpected non-error", test.description) + if len(pods) != len(test.expectPods) { + t.Fatal("wrong pod list content") } - } - if !test.expectFatal { - assert.Nil(t, blockingPod) - if err != nil { - t.Fatalf("%s: error occurred: %v", test.description, err) - } - } - - if len(pods) != len(test.expectPods) { - t.Fatalf("Wrong pod list content: %v", test.description) - } - - assert.ElementsMatch(t, test.expectDaemonSetPods, daemonSetPods) + assert.ElementsMatch(t, test.expectDaemonSetPods, daemonSetPods) + }) } } diff --git a/cluster-autoscaler/utils/pod/pod.go b/cluster-autoscaler/utils/pod/pod.go index 89dd04b5f9d1..b85b14ac32a2 100644 --- a/cluster-autoscaler/utils/pod/pod.go +++ b/cluster-autoscaler/utils/pod/pod.go @@ -35,11 +35,7 @@ func IsDaemonSetPod(pod *apiv1.Pod) bool { return true } - if val, ok := pod.Annotations[DaemonSetPodAnnotationKey]; ok && val == "true" { - return true - } - - return false + return pod.Annotations[DaemonSetPodAnnotationKey] == "true" } // IsMirrorPod checks whether the pod is a mirror pod. From d6ee96db194898004c4fb7ba56474aa76d7fb58d Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Sat, 30 Sep 2023 07:20:22 +0000 Subject: [PATCH 3/9] Convert scale-down pdb check to drainability rule --- cluster-autoscaler/simulator/drain.go | 26 +-- .../simulator/drainability/rules/pdb/rule.go | 43 +++++ .../drainability/rules/pdb/rule_test.go | 155 ++++++++++++++++++ .../simulator/drainability/rules/rules.go | 2 + 4 files changed, 209 insertions(+), 17 deletions(-) create mode 100644 cluster-autoscaler/simulator/drainability/rules/pdb/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index 101844ad65d3..daf8407cac7d 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -17,7 +17,6 @@ limitations under the License. package simulator import ( - "fmt" "time" apiv1 "k8s.io/api/core/v1" @@ -31,14 +30,14 @@ import ( schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) -// GetPodsToMove returns a list of pods that should be moved elsewhere -// and a list of DaemonSet pods that should be evicted if the node -// is drained. Raises error if there is an unreplicated pod. -// Based on kubectl drain code. If listers is nil it makes an assumption that RC, DS, Jobs and RS were deleted -// along with their pods (no abandoned pods with dangling created-by annotation). -// If listers is not nil it checks whether RC, DS, Jobs and RS that created these pods -// still exist. -// TODO(x13n): Rewrite GetPodsForDeletionOnNodeDrain into a set of DrainabilityRules. +// GetPodsToMove returns a list of pods that should be moved elsewhere and a +// list of DaemonSet pods that should be evicted if the node is drained. +// Raises error if there is an unreplicated pod. +// Based on kubectl drain code. If listers is nil it makes an assumption that +// RC, DS, Jobs and RS were deleted along with their pods (no abandoned pods +// with dangling created-by annotation). +// If listers is not nil it checks whether RC, DS, Jobs and RS that created +// these pods still exist. func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, listers kube_util.ListerRegistry, remainingPdbTracker pdb.RemainingPdbTracker, timestamp time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, blockingPod *drain.BlockingPod, err error) { var drainPods, drainDs []*apiv1.Pod if drainabilityRules == nil { @@ -82,13 +81,6 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. deleteOptions.SkipNodesWithLocalStorage, deleteOptions.SkipNodesWithCustomControllerPods, timestamp) - pods = append(pods, drainPods...) - daemonSetPods = append(daemonSetPods, drainDs...) - if canRemove, _, blockingPodInfo := remainingPdbTracker.CanRemovePods(pods); !canRemove { - pod := blockingPodInfo.Pod - return []*apiv1.Pod{}, []*apiv1.Pod{}, blockingPodInfo, fmt.Errorf("not enough pod disruption budget to move %s/%s", pod.Namespace, pod.Name) - } - - return pods, daemonSetPods, nil, nil + return append(pods, drainPods...), append(daemonSetPods, drainDs...), nil, nil } diff --git a/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go b/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go new file mode 100644 index 000000000000..a0d315fc7c0f --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/pdb/rule.go @@ -0,0 +1,43 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pdb + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +// Rule is a drainability rule on how to handle pods with pdbs. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides how to handle pods with pdbs on node drain. +func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + for _, pdb := range drainCtx.RemainingPdbTracker.MatchingPdbs(pod) { + if pdb.Status.DisruptionsAllowed < 1 { + return drainability.NewBlockedStatus(drain.NotEnoughPdb, fmt.Errorf("not enough pod disruption budget to move %s/%s", pod.Namespace, pod.Name)) + } + } + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go new file mode 100644 index 000000000000..741cf1fd2f3f --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go @@ -0,0 +1,155 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pdb + +import ( + "testing" + + apiv1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +func TestRule(t *testing.T) { + one := intstr.FromInt(1) + + testCases := []struct { + desc string + pod *apiv1.Pod + pdbs []*policyv1.PodDisruptionBudget + wantOutcome drainability.OutcomeType + wantReason drain.BlockingPodReason + }{ + { + desc: "no pdbs", + pod: &apiv1.Pod{}, + }, + { + desc: "no matching pdbs", + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "happy", + Namespace: "good", + Labels: map[string]string{ + "label": "true", + }, + }, + }, + pdbs: []*policyv1.PodDisruptionBudget{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "bad", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "label": "true", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "good", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "label": "false", + }, + }, + }, + }, + }, + }, + { + desc: "pdb prevents scale-down", + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sad", + Namespace: "good", + Labels: map[string]string{ + "label": "true", + }, + }, + }, + pdbs: []*policyv1.PodDisruptionBudget{ + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "bad", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "label": "true", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "good", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "label": "true", + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Namespace: "good", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "label": "false", + }, + }, + }, + }, + }, + wantOutcome: drainability.BlockDrain, + wantReason: drain.NotEnoughPdb, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + tracker := pdb.NewBasicRemainingPdbTracker() + tracker.SetPdbs(tc.pdbs) + drainCtx := &drainability.DrainContext{ + RemainingPdbTracker: tracker, + } + + got := New().Drainable(drainCtx, tc.pod) + if got.Outcome != tc.wantOutcome || got.BlockingReason != tc.wantReason { + t.Errorf("Rule.Drainable(%s) = (outcome: %v, reason: %v), want (outcome: %v, reason: %v)", tc.pod.Name, got.Outcome, got.BlockingReason, tc.wantOutcome, tc.wantReason) + } + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 1dd1d5e4e24c..4452dbbc4ec5 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -23,6 +23,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/localstorage" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/mirror" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/notsafetoevict" + pdbrule "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" ) @@ -44,6 +45,7 @@ func Default() Rules { system.New(), notsafetoevict.New(), localstorage.New(), + pdbrule.New(), } } From 324a33ede8ba67c961a67c43553aeac78ad00919 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Mon, 2 Oct 2023 18:37:09 +0000 Subject: [PATCH 4/9] Pass DeleteOptions once during default rule creation --- .../core/static_autoscaler_test.go | 2 +- cluster-autoscaler/main.go | 2 +- cluster-autoscaler/simulator/drain.go | 3 +- cluster-autoscaler/simulator/drain_test.go | 2 +- .../simulator/drainability/context.go | 2 -- .../drainability/rules/localstorage/rule.go | 19 ++++++------- .../rules/localstorage/rule_test.go | 22 ++++++++------- .../drainability/rules/notsafetoevict/rule.go | 7 +---- .../rules/notsafetoevict/rule_test.go | 4 --- .../drainability/rules/replicated/rule.go | 28 +++++++++++-------- .../rules/replicated/rule_test.go | 6 +--- .../simulator/drainability/rules/rules.go | 9 +++--- .../drainability/rules/system/rule.go | 17 +++++------ .../drainability/rules/system/rule_test.go | 26 +++++++++-------- 14 files changed, 71 insertions(+), 78 deletions(-) diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index 1a6fc141ec46..75b0db264d50 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -153,7 +153,7 @@ func (m *onNodeGroupDeleteMock) Delete(id string) error { func setUpScaleDownActuator(ctx *context.AutoscalingContext, autoscalingOptions config.AutoscalingOptions) { deleteOptions := options.NewNodeDeleteOptions(autoscalingOptions) - ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions, rules.Default(), NewTestProcessors(ctx).NodeGroupConfigProcessor) + ctx.ScaleDownActuator = actuation.NewActuator(ctx, nil, deletiontracker.NewNodeDeletionTracker(0*time.Second), deleteOptions, rules.Default(deleteOptions), NewTestProcessors(ctx).NodeGroupConfigProcessor) } func TestStaticAutoscalerRunOnce(t *testing.T) { diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 6855ca1c6c49..5f879522670a 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -481,7 +481,7 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter if autoscalingOptions.ParallelDrain { sdCandidatesSorting := previouscandidates.NewPreviousCandidates() scaleDownCandidatesComparers = []scaledowncandidates.CandidatesComparer{ - emptycandidates.NewEmptySortingProcessor(emptycandidates.NewNodeInfoGetter(opts.ClusterSnapshot), deleteOptions, rules.Default()), + emptycandidates.NewEmptySortingProcessor(emptycandidates.NewNodeInfoGetter(opts.ClusterSnapshot), deleteOptions, rules.Default(deleteOptions)), sdCandidatesSorting, } opts.Processors.ScaleDownCandidatesNotifier.Register(sdCandidatesSorting) diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index daf8407cac7d..5df8585e1f3b 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -41,14 +41,13 @@ import ( func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, listers kube_util.ListerRegistry, remainingPdbTracker pdb.RemainingPdbTracker, timestamp time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, blockingPod *drain.BlockingPod, err error) { var drainPods, drainDs []*apiv1.Pod if drainabilityRules == nil { - drainabilityRules = rules.Default() + drainabilityRules = rules.Default(deleteOptions) } if remainingPdbTracker == nil { remainingPdbTracker = pdb.NewBasicRemainingPdbTracker() } drainCtx := &drainability.DrainContext{ RemainingPdbTracker: remainingPdbTracker, - DeleteOptions: deleteOptions, Listers: listers, Timestamp: timestamp, } diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index 1671fdfb9510..1a7326be9a52 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -312,7 +312,7 @@ func TestGetPodsToMove(t *testing.T) { SkipNodesWithLocalStorage: true, SkipNodesWithCustomControllerPods: true, } - rules := append(tc.rules, rules.Default()...) + rules := append(tc.rules, rules.Default(deleteOptions)...) tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(tc.pdbs) p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, nil, tracker, testTime) diff --git a/cluster-autoscaler/simulator/drainability/context.go b/cluster-autoscaler/simulator/drainability/context.go index d6fd7116ca02..ffaa44b4b29c 100644 --- a/cluster-autoscaler/simulator/drainability/context.go +++ b/cluster-autoscaler/simulator/drainability/context.go @@ -20,14 +20,12 @@ import ( "time" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" - "k8s.io/autoscaler/cluster-autoscaler/simulator/options" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" ) // DrainContext contains parameters for drainability rules. type DrainContext struct { RemainingPdbTracker pdb.RemainingPdbTracker - DeleteOptions options.NodeDeleteOptions Listers kube_util.ListerRegistry Timestamp time.Time } diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go index 6a58da1dd32d..dd01c52ea764 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go @@ -26,22 +26,21 @@ import ( ) // Rule is a drainability rule on how to handle local storage pods. -type Rule struct{} +type Rule struct { + enabled bool +} // New creates a new Rule. -func New() *Rule { - return &Rule{} +func New(enabled bool) *Rule { + return &Rule{ + enabled: enabled, + } } // Drainable decides what to do with local storage pods on node drain. -func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { - return drainability.NewUndefinedStatus() - } - - if drainCtx.DeleteOptions.SkipNodesWithLocalStorage && drain.HasBlockingLocalStorage(pod) { +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if r.enabled && !drain.IsPodLongTerminating(pod, drainCtx.Timestamp) && !pod_util.IsDaemonSetPod(pod) && !drain.HasSafeToEvictAnnotation(pod) && !drain.IsPodTerminal(pod) && drain.HasBlockingLocalStorage(pod) { return drainability.NewBlockedStatus(drain.LocalStorageRequested, fmt.Errorf("pod with local storage present: %s", pod.Name)) } - return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go index 222eb8fd7b6e..fb98c35e8564 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go @@ -24,7 +24,6 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -357,10 +356,11 @@ func TestDrain(t *testing.T) { ) for _, test := range []struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + disabledRule bool wantReason drain.BlockingPodReason wantError bool @@ -417,7 +417,12 @@ func TestDrain(t *testing.T) { wantReason: drain.LocalStorageRequested, wantError: true, }, - + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values and rule disabled", + pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, + rcs: []*apiv1.ReplicationController{&rc}, + disabledRule: true, + }, { desc: "EmptyDir failed pod", pod: emptyDirFailedPod, @@ -445,12 +450,9 @@ func TestDrain(t *testing.T) { } { t.Run(test.desc, func(t *testing.T) { drainCtx := &drainability.DrainContext{ - DeleteOptions: options.NodeDeleteOptions{ - SkipNodesWithLocalStorage: true, - }, Timestamp: testTime, } - status := New().Drainable(drainCtx, test.pod) + status := New(!test.disabledRule).Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go index 224a1fbd6a3f..e0d6da61cd71 100644 --- a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go @@ -35,13 +35,8 @@ func New() *Rule { // Drainable decides what to do with not safe to evict pods on node drain. func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { - return drainability.NewUndefinedStatus() - } - - if drain.HasNotSafeToEvictAnnotation(pod) { + if !drain.IsPodLongTerminating(pod, drainCtx.Timestamp) && !pod_util.IsDaemonSetPod(pod) && !drain.HasSafeToEvictAnnotation(pod) && !drain.IsPodTerminal(pod) && drain.HasNotSafeToEvictAnnotation(pod) { return drainability.NewBlockedStatus(drain.NotSafeToEvictAnnotation, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)) } - return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go index 42b3df58c9a4..92e9ad1ac4aa 100644 --- a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go @@ -25,7 +25,6 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -266,9 +265,6 @@ func TestDrain(t *testing.T) { } { t.Run(test.desc, func(t *testing.T) { drainCtx := &drainability.DrainContext{ - DeleteOptions: options.NodeDeleteOptions{ - SkipNodesWithSystemPods: true, - }, Timestamp: testTime, } status := New().Drainable(drainCtx, test.pod) diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go index 2eb3f492c6ef..86b921cec59b 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -27,15 +27,21 @@ import ( ) // Rule is a drainability rule on how to handle replicated pods. -type Rule struct{} +type Rule struct { + skipNodesWithCustomControllerPods bool + minReplicaCount int +} // New creates a new Rule. -func New() *Rule { - return &Rule{} +func New(skipNodesWithCustomControllerPods bool, minReplicaCount int) *Rule { + return &Rule{ + skipNodesWithCustomControllerPods: skipNodesWithCustomControllerPods, + minReplicaCount: minReplicaCount, + } } // Drainable decides what to do with replicated pods on node drain. -func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { return drainability.NewUndefinedStatus() } @@ -43,9 +49,9 @@ func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drain controllerRef := drain.ControllerRef(pod) replicated := controllerRef != nil - if drainCtx.DeleteOptions.SkipNodesWithCustomControllerPods { + if r.skipNodesWithCustomControllerPods { // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods - if status := legacyCheck(drainCtx, pod); status.Outcome != drainability.UndefinedOutcome { + if status := legacyCheck(drainCtx, pod, r.minReplicaCount); status.Outcome != drainability.UndefinedOutcome { return status } replicated = replicated && replicatedKind[controllerRef.Kind] @@ -66,7 +72,7 @@ var replicatedKind = map[string]bool{ "StatefulSet": true, } -func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { +func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod, minReplicaCount int) drainability.Status { if drainCtx.Listers == nil { return drainability.NewUndefinedStatus() } @@ -89,8 +95,8 @@ func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainabili } // TODO: Replace the minReplica check with PDB. - if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < drainCtx.DeleteOptions.MinReplicaCount { - return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, drainCtx.DeleteOptions.MinReplicaCount)) + if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < minReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, minReplicaCount)) } } else if pod_util.IsDaemonSetPod(pod) { if refKind == "DaemonSet" { @@ -118,8 +124,8 @@ func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainabili if err == nil && rs != nil { // Assume the only reason for an error is because the RS is gone/missing. - if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < drainCtx.DeleteOptions.MinReplicaCount { - return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, drainCtx.DeleteOptions.MinReplicaCount)) + if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < minReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, minReplicaCount)) } } else { return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go index 905c63780b59..175791234b54 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go @@ -26,7 +26,6 @@ import ( apiv1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -407,13 +406,10 @@ func TestDrain(t *testing.T) { registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) drainCtx := &drainability.DrainContext{ - DeleteOptions: options.NodeDeleteOptions{ - SkipNodesWithCustomControllerPods: test.skipNodesWithCustomControllerPods, - }, Listers: registry, Timestamp: testTime, } - status := New().Drainable(drainCtx, test.pod) + status := New(test.skipNodesWithCustomControllerPods, 0).Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 4452dbbc4ec5..c155b13688b6 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -26,6 +26,7 @@ import ( pdbrule "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" + "k8s.io/autoscaler/cluster-autoscaler/simulator/options" ) // Rule determines whether a given pod can be drained or not. @@ -38,13 +39,13 @@ type Rule interface { } // Default returns the default list of Rules. -func Default() Rules { +func Default(deleteOptions options.NodeDeleteOptions) Rules { return []Rule{ mirror.New(), - replicated.New(), - system.New(), + replicated.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount), + system.New(deleteOptions.SkipNodesWithSystemPods), notsafetoevict.New(), - localstorage.New(), + localstorage.New(deleteOptions.SkipNodesWithLocalStorage), pdbrule.New(), } } diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule.go b/cluster-autoscaler/simulator/drainability/rules/system/rule.go index 14eef8585510..419ba3e28795 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule.go @@ -26,22 +26,19 @@ import ( ) // Rule is a drainability rule on how to handle system pods. -type Rule struct{} +type Rule struct { + enabled bool +} // New creates a new Rule. -func New() *Rule { - return &Rule{} +func New(enabled bool) *Rule { + return &Rule{enabled: enabled} } // Drainable decides what to do with system pods on node drain. -func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) || pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) { - return drainability.NewUndefinedStatus() - } - - if drainCtx.DeleteOptions.SkipNodesWithSystemPods && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if r.enabled && !drain.IsPodLongTerminating(pod, drainCtx.Timestamp) && !pod_util.IsDaemonSetPod(pod) && !drain.HasSafeToEvictAnnotation(pod) && !drain.IsPodTerminal(pod) && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { return drainability.NewBlockedStatus(drain.UnmovableKubeSystemPod, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)) } - return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go index 5be7f835b301..3c85d8c06663 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go @@ -26,7 +26,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" @@ -217,11 +216,12 @@ func TestDrain(t *testing.T) { ) for _, test := range []struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - pdbs []*policyv1.PodDisruptionBudget + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + pdbs []*policyv1.PodDisruptionBudget + disableRule bool wantReason drain.BlockingPodReason wantError bool @@ -264,6 +264,13 @@ func TestDrain(t *testing.T) { wantReason: drain.UnmovableKubeSystemPod, wantError: true, }, + { + desc: "default namespace PDB with matching labels kube-system pod and rule disabled", + pod: kubeSystemRcPod, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, + disableRule: true, + }, { desc: "kube-system failed pod", pod: kubeSystemFailedPod, @@ -295,12 +302,9 @@ func TestDrain(t *testing.T) { drainCtx := &drainability.DrainContext{ RemainingPdbTracker: tracker, - DeleteOptions: options.NodeDeleteOptions{ - SkipNodesWithSystemPods: true, - }, - Timestamp: testTime, + Timestamp: testTime, } - status := New().Drainable(drainCtx, test.pod) + status := New(!test.disableRule).Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) From 2fa6c2677b882b7dd97cc316fb48d2a4360bfa90 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Wed, 4 Oct 2023 01:06:39 +0000 Subject: [PATCH 5/9] Split out custom controller and common checks into separate drainability rules --- .../rules/customcontroller/rule.go | 112 ++++ .../rules/customcontroller/rule_test.go | 218 +++++++ .../drainability/rules/daemonset/rule.go | 39 ++ .../drainability/rules/daemonset/rule_test.go | 60 ++ .../drainability/rules/localstorage/rule.go | 3 +- .../rules/localstorage/rule_test.go | 575 +++++++----------- .../rules/longterminating/rule.go | 39 ++ .../rules/longterminating/rule_test.go | 94 +++ .../drainability/rules/mirror/rule_test.go | 16 +- .../drainability/rules/notsafetoevict/rule.go | 3 +- .../rules/notsafetoevict/rule_test.go | 255 ++------ .../drainability/rules/pdb/rule_test.go | 21 +- .../drainability/rules/replicated/rule.go | 86 +-- .../rules/replicated/rule_test.go | 350 +++-------- .../simulator/drainability/rules/rules.go | 17 +- .../drainability/rules/safetoevict/rule.go | 39 ++ .../rules/safetoevict/rule_test.go | 62 ++ .../drainability/rules/system/rule.go | 3 +- .../drainability/rules/system/rule_test.go | 148 +---- .../drainability/rules/terminal/rule.go | 39 ++ .../drainability/rules/terminal/rule_test.go | 79 +++ .../simulator/drainability/status.go | 23 +- 22 files changed, 1215 insertions(+), 1066 deletions(-) create mode 100644 cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/terminal/rule.go create mode 100644 cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go diff --git a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go new file mode 100644 index 000000000000..55caed24c15c --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go @@ -0,0 +1,112 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package customcontroller + +import ( + "fmt" + + apiv1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle replicated pods. +type Rule struct { + enabled bool + minReplicaCount int +} + +// New creates a new Rule. +func New(enabled bool, minReplicaCount int) *Rule { + return &Rule{ + enabled: enabled, + minReplicaCount: minReplicaCount, + } +} + +// Drainable decides what to do with replicated pods on node drain. +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if !r.enabled || drainCtx.Listers == nil { + return drainability.NewUndefinedStatus() + } + + // For now, owner controller must be in the same namespace as the pod + // so OwnerReference doesn't have its own Namespace field. + controllerNamespace := pod.Namespace + + controllerRef := drain.ControllerRef(pod) + if controllerRef == nil { + return drainability.NewUndefinedStatus() + } + refKind := controllerRef.Kind + + if refKind == "ReplicationController" { + rc, err := drainCtx.Listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) + // Assume RC is either gone/missing or has too few replicas configured. + if err != nil || rc == nil { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + + // TODO: Replace the minReplica check with PDB. + if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < r.minReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, r.minReplicaCount)) + } + } else if pod_util.IsDaemonSetPod(pod) { + if refKind == "DaemonSet" { + // We don't have a listener for the other DaemonSet kind. + // TODO: Use a generic client for checking the reference. + return drainability.NewUndefinedStatus() + } + + _, err := drainCtx.Listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) + if err != nil { + if apierrors.IsNotFound(err) { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)) + } + return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "Job" { + job, err := drainCtx.Listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) + + if err != nil || job == nil { + // Assume the only reason for an error is because the Job is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "ReplicaSet" { + rs, err := drainCtx.Listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) + + if err == nil && rs != nil { + // Assume the only reason for an error is because the RS is gone/missing. + if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < r.minReplicaCount { + return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, r.minReplicaCount)) + } + } else { + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) + } + } else if refKind == "StatefulSet" { + ss, err := drainCtx.Listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) + + if err != nil && ss == nil { + // Assume the only reason for an error is because the SS is gone/missing. + return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) + } + } + + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go new file mode 100644 index 000000000000..2c6909694f01 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go @@ -0,0 +1,218 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package customcontroller + +import ( + "fmt" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" + v1appslister "k8s.io/client-go/listers/apps/v1" + v1lister "k8s.io/client-go/listers/core/v1" + + "github.com/stretchr/testify/assert" +) + +func TestDrainable(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + ds = appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", + }, + } + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + statefulset = appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ss", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", + }, + } + rs = appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rs", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicasets/rs", + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: &replicas, + }, + } + ) + + for desc, test := range map[string]struct { + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + enabled bool + + wantReason drain.BlockingPodReason + wantError bool + }{ + "RC-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + }, + "DS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + }, + "DS-managed pod by a custom Daemonset": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + }, + "Job-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + }, + "SS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + }, + "RS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rss: []*appsv1.ReplicaSet{&rs}, + }, + "RS-managed pod that is being deleted": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rss: []*appsv1.ReplicaSet{&rs}, + }, + } { + for _, enabled := range []bool{true, false} { + desc = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.desc, enabled) + + t.Run(desc, func(t *testing.T) { + var err error + var rcLister v1lister.ReplicationControllerLister + if len(test.rcs) > 0 { + rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) + assert.NoError(t, err) + } + var rsLister v1appslister.ReplicaSetLister + if len(test.rss) > 0 { + rsLister, err = kube_util.NewTestReplicaSetLister(test.rss) + assert.NoError(t, err) + } + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) + assert.NoError(t, err) + jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + assert.NoError(t, err) + ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + assert.NoError(t, err) + + registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) + + drainCtx := &drainability.DrainContext{ + Listers: registry, + Timestamp: testTime, + } + status := New(enabled, 0).Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) + } + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go new file mode 100644 index 000000000000..05f93d288a1d --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go @@ -0,0 +1,39 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package daemonset + +import ( + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" +) + +// Rule is a drainability rule on how to handle daemon set pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with daemon set pods on node drain. +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if pod_util.IsDaemonSetPod(pod) { + return drainability.NewUndefinedStatus(drainability.Interrupt) + } + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go new file mode 100644 index 000000000000..d444d7079999 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go @@ -0,0 +1,60 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package daemonset + +import ( + "testing" + + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" +) + +func TestDrainable(t *testing.T) { + for desc, tc := range map[string]struct { + pod *apiv1.Pod + want drainability.Status + }{ + "regular pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: "ns", + }, + }, + want: drainability.NewUndefinedStatus(), + }, + "daemonset pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: "ns", + OwnerReferences: test.GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", ""), + }, + }, + want: drainability.NewUndefinedStatus(drainability.Interrupt), + }, + } { + t.Run(desc, func(t *testing.T) { + got := New().Drainable(nil, tc.pod) + if tc.want != got { + t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + } + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go index dd01c52ea764..68533fad5e1b 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go @@ -22,7 +22,6 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) // Rule is a drainability rule on how to handle local storage pods. @@ -39,7 +38,7 @@ func New(enabled bool) *Rule { // Drainable decides what to do with local storage pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if r.enabled && !drain.IsPodLongTerminating(pod, drainCtx.Timestamp) && !pod_util.IsDaemonSetPod(pod) && !drain.HasSafeToEvictAnnotation(pod) && !drain.IsPodTerminal(pod) && drain.HasBlockingLocalStorage(pod) { + if r.enabled && drain.HasBlockingLocalStorage(pod) { return drainability.NewBlockedStatus(drain.LocalStorageRequested, fmt.Errorf("pod with local storage present: %s", pod.Name)) } return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go index fb98c35e8564..cb96b568678f 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go @@ -25,12 +25,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" "github.com/stretchr/testify/assert" ) -func TestDrain(t *testing.T) { +func TestDrainable(t *testing.T) { var ( testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) replicas = int32(5) @@ -45,410 +45,267 @@ func TestDrain(t *testing.T) { Replicas: &replicas, }, } + ) - emptydirPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } + for desc, test := range map[string]struct { + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + disabledRule bool - emptyDirSafeToEvictVolumeSingleVal = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: "scratch", + wantReason drain.BlockingPodReason + wantError bool + }{ + "pod with EmptyDir": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, }, }, }, - } - - emptyDirSafeToEvictLocalVolumeSingleValEmpty = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: "", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch", }, }, - }, - } - - emptyDirSafeToEvictLocalVolumeSingleValNonMatching = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: "scratch-2", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, }, }, }, - } - - emptyDirSafeToEvictLocalVolumeMultiValAllMatching = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValNonMatching = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + rcs: []*apiv1.ReplicationController{&rc}, + }, + "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "", }, }, - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, }, }, }, - } - - emptyDirSafeToEvictLocalVolumeMultiValEmpty = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: ",", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-2", }, }, - }, - } - - emptyDirFailedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyNever, - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, }, }, }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - emptyDirTerminalPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.LocalStorageRequested, + wantError: true, + }, + "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", }, }, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodSucceeded, - }, - } - - emptyDirEvictedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyAlways, - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, }, }, }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - emptyDirSafePod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - zeroGracePeriod = int64(0) - emptyDirLongTerminatingPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &zeroGracePeriod, - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + rcs: []*apiv1.ReplicationController{&rc}, + }, + "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-5", }, }, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - extendedGracePeriod = int64(6 * 60) // 6 minutes - emptyDirLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &extendedGracePeriod, - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, }, }, }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - ) - - for _, test := range []struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - disabledRule bool - - wantReason drain.BlockingPodReason - wantError bool - }{ - { - desc: "pod with EmptyDir", - pod: emptydirPod, - rcs: []*apiv1.ReplicationController{&rc}, - wantReason: drain.LocalStorageRequested, - wantError: true, - }, - { - desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation", - pod: emptyDirSafeToEvictVolumeSingleVal, - rcs: []*apiv1.ReplicationController{&rc}, - }, - { - desc: "pod with EmptyDir and empty value for SafeToEvictLocalVolumesKey annotation", - pod: emptyDirSafeToEvictLocalVolumeSingleValEmpty, - rcs: []*apiv1.ReplicationController{&rc}, - wantReason: drain.LocalStorageRequested, - wantError: true, - }, - { - desc: "pod with EmptyDir and non-matching value for SafeToEvictLocalVolumesKey annotation", - pod: emptyDirSafeToEvictLocalVolumeSingleValNonMatching, - rcs: []*apiv1.ReplicationController{&rc}, - wantReason: drain.LocalStorageRequested, - wantError: true, - }, - { - desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", - pod: emptyDirSafeToEvictLocalVolumeMultiValAllMatching, - rcs: []*apiv1.ReplicationController{&rc}, - }, - { - desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with non-matching values", - pod: emptyDirSafeToEvictLocalVolumeMultiValNonMatching, rcs: []*apiv1.ReplicationController{&rc}, wantReason: drain.LocalStorageRequested, wantError: true, }, - { - desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values", - pod: emptyDirSafeToEvictLocalVolumeMultiValSomeMatchingVals, + "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with some matching values": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + }, rcs: []*apiv1.ReplicationController{&rc}, wantReason: drain.LocalStorageRequested, wantError: true, }, - { - desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values", - pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, + "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: ",", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + }, rcs: []*apiv1.ReplicationController{&rc}, wantReason: drain.LocalStorageRequested, wantError: true, }, - { - desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values and rule disabled", - pod: emptyDirSafeToEvictLocalVolumeMultiValEmpty, + "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values and rule disabled": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: ",", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + }, rcs: []*apiv1.ReplicationController{&rc}, disabledRule: true, }, - { - desc: "EmptyDir failed pod", - pod: emptyDirFailedPod, - }, - { - desc: "EmptyDir terminal pod", - pod: emptyDirTerminalPod, - }, - { - desc: "EmptyDir evicted pod", - pod: emptyDirEvictedPod, - }, - { - desc: "EmptyDir pod with PodSafeToEvict annotation", - pod: emptyDirSafePod, - }, - { - desc: "EmptyDir long terminating pod with 0 grace period", - pod: emptyDirLongTerminatingPod, - }, - { - desc: "EmptyDir long terminating pod with extended grace period", - pod: emptyDirLongTerminatingPodWithExtendedGracePeriod, - }, } { - t.Run(test.desc, func(t *testing.T) { + t.Run(desc, func(t *testing.T) { drainCtx := &drainability.DrainContext{ Timestamp: testTime, } diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go new file mode 100644 index 000000000000..5edc6b70f603 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go @@ -0,0 +1,39 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package longterminating + +import ( + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +// Rule is a drainability rule on how to handle long terminating pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with long terminating pods on node drain. +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { + return drainability.NewUndefinedStatus(drainability.Interrupt) + } + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go new file mode 100644 index 000000000000..934453429d1e --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go @@ -0,0 +1,94 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package longterminating + +import ( + "testing" + "time" + + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +func TestDrainable(t *testing.T) { + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + zeroGracePeriod = int64(0) + extendedGracePeriod = int64(6 * 60) // 6 minutes + ) + + for desc, tc := range map[string]struct { + pod *apiv1.Pod + want drainability.Status + }{ + "regular pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: "ns", + }, + }, + want: drainability.NewUndefinedStatus(), + }, + "long terminating pod with 0 grace period": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + }, + want: drainability.NewUndefinedStatus(drainability.Interrupt), + }, + "long terminating pod with extended grace period": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + }, + want: drainability.NewUndefinedStatus(drainability.Interrupt), + }, + } { + t.Run(desc, func(t *testing.T) { + drainCtx := &drainability.DrainContext{ + Timestamp: testTime, + } + got := New().Drainable(drainCtx, tc.pod) + if tc.want != got { + t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + } + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go index e05613daaedd..d95cf704c9f5 100644 --- a/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/mirror/rule_test.go @@ -25,14 +25,12 @@ import ( "k8s.io/kubernetes/pkg/kubelet/types" ) -func TestRule(t *testing.T) { - testCases := []struct { - desc string +func TestDrainable(t *testing.T) { + for desc, tc := range map[string]struct { pod *apiv1.Pod want drainability.Status }{ - { - desc: "non mirror pod", + "regular pod": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "regularPod", @@ -41,8 +39,7 @@ func TestRule(t *testing.T) { }, want: drainability.NewUndefinedStatus(), }, - { - desc: "mirror pod", + "mirror pod": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "manifestPod", @@ -54,9 +51,8 @@ func TestRule(t *testing.T) { }, want: drainability.NewSkipStatus(), }, - } - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { + } { + t.Run(desc, func(t *testing.T) { got := New().Drainable(nil, tc.pod) if tc.want != got { t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go index e0d6da61cd71..4a8147a9ac34 100644 --- a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule.go @@ -22,7 +22,6 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) // Rule is a drainability rule on how to handle not safe to evict pods. @@ -35,7 +34,7 @@ func New() *Rule { // Drainable decides what to do with not safe to evict pods on node drain. func (Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if !drain.IsPodLongTerminating(pod, drainCtx.Timestamp) && !pod_util.IsDaemonSetPod(pod) && !drain.HasSafeToEvictAnnotation(pod) && !drain.IsPodTerminal(pod) && drain.HasNotSafeToEvictAnnotation(pod) { + if drain.HasNotSafeToEvictAnnotation(pod) { return drainability.NewBlockedStatus(drain.NotSafeToEvictAnnotation, fmt.Errorf("pod annotated as not safe to evict present: %s", pod.Name)) } return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go index 92e9ad1ac4aa..8d96437eb74b 100644 --- a/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/notsafetoevict/rule_test.go @@ -26,12 +26,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" "github.com/stretchr/testify/assert" ) -func TestDrain(t *testing.T) { +func TestDrainable(t *testing.T) { var ( testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) replicas = int32(5) @@ -46,18 +46,6 @@ func TestDrain(t *testing.T) { Replicas: &replicas, }, } - - rcPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - job = batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job", @@ -65,205 +53,88 @@ func TestDrain(t *testing.T) { SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", }, } + ) - jobPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - }, - } - - safePod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - unsafeSystemFailedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyNever, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } + for desc, test := range map[string]struct { + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet - unsafeSystemTerminalPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", + wantReason drain.BlockingPodReason + wantError bool + }{ + "pod with PodSafeToEvict annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodSucceeded, - }, - } - - unsafeSystemEvictedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", + Spec: apiv1.PodSpec{ + NodeName: "node", }, }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyAlways, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - zeroGracePeriod = int64(0) - unsafeLongTerminatingPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", + }, + "RC-managed pod with no annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &zeroGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - extendedGracePeriod = int64(6 * 60) // 6 minutes - unsafeLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", + Spec: apiv1.PodSpec{ + NodeName: "node", }, }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &extendedGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - unsafeRcPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", + rcs: []*apiv1.ReplicationController{&rc}, + }, + "RC-managed pod with PodSafeToEvict=false annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - unsafeJobPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "false", + Spec: apiv1.PodSpec{ + NodeName: "node", }, }, - } - ) - - for _, test := range []struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - - wantReason drain.BlockingPodReason - wantError bool - }{ - { - desc: "pod with PodSafeToEvict annotation", - pod: safePod, - }, - { - desc: "RC-managed pod with no annotation", - pod: rcPod, - rcs: []*apiv1.ReplicationController{&rc}, - }, - { - desc: "RC-managed pod with PodSafeToEvict=false annotation", - pod: unsafeRcPod, rcs: []*apiv1.ReplicationController{&rc}, wantReason: drain.NotSafeToEvictAnnotation, wantError: true, }, - { - desc: "Job-managed pod with no annotation", - pod: jobPod, - rcs: []*apiv1.ReplicationController{&rc}, + "job-managed pod with no annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, }, - { - desc: "Job-managed pod with PodSafeToEvict=false annotation", - pod: unsafeJobPod, + "job-managed pod with PodSafeToEvict=false annotation": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "false", + }, + }, + }, rcs: []*apiv1.ReplicationController{&rc}, wantReason: drain.NotSafeToEvictAnnotation, wantError: true, }, - - { - desc: "unsafe failed pod", - pod: unsafeSystemFailedPod, - }, - { - desc: "unsafe terminal pod", - pod: unsafeSystemTerminalPod, - }, - { - desc: "unsafe evicted pod", - pod: unsafeSystemEvictedPod, - }, - { - desc: "unsafe long terminating pod with 0 grace period", - pod: unsafeLongTerminatingPod, - }, - { - desc: "unsafe long terminating pod with extended grace period", - pod: unsafeLongTerminatingPodWithExtendedGracePeriod, - }, } { - t.Run(test.desc, func(t *testing.T) { + t.Run(desc, func(t *testing.T) { drainCtx := &drainability.DrainContext{ Timestamp: testTime, } diff --git a/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go index 741cf1fd2f3f..73faf1100a07 100644 --- a/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/pdb/rule_test.go @@ -28,22 +28,19 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/utils/drain" ) -func TestRule(t *testing.T) { +func TestDrainable(t *testing.T) { one := intstr.FromInt(1) - testCases := []struct { - desc string + for desc, tc := range map[string]struct { pod *apiv1.Pod pdbs []*policyv1.PodDisruptionBudget wantOutcome drainability.OutcomeType wantReason drain.BlockingPodReason }{ - { - desc: "no pdbs", - pod: &apiv1.Pod{}, + "no pdbs": { + pod: &apiv1.Pod{}, }, - { - desc: "no matching pdbs", + "no matching pdbs": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "happy", @@ -82,8 +79,7 @@ func TestRule(t *testing.T) { }, }, }, - { - desc: "pdb prevents scale-down", + "pdb prevents scale-down": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "sad", @@ -137,9 +133,8 @@ func TestRule(t *testing.T) { wantOutcome: drainability.BlockDrain, wantReason: drain.NotEnoughPdb, }, - } - for _, tc := range testCases { - t.Run(tc.desc, func(t *testing.T) { + } { + t.Run(desc, func(t *testing.T) { tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(tc.pdbs) drainCtx := &drainability.DrainContext{ diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go index 86b921cec59b..cca20d11422d 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -20,10 +20,8 @@ import ( "fmt" apiv1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) // Rule is a drainability rule on how to handle replicated pods. @@ -42,26 +40,18 @@ func New(skipNodesWithCustomControllerPods bool, minReplicaCount int) *Rule { // Drainable decides what to do with replicated pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { - return drainability.NewUndefinedStatus() - } - controllerRef := drain.ControllerRef(pod) replicated := controllerRef != nil if r.skipNodesWithCustomControllerPods { // TODO(vadasambar): remove this when we get rid of skipNodesWithCustomControllerPods - if status := legacyCheck(drainCtx, pod, r.minReplicaCount); status.Outcome != drainability.UndefinedOutcome { - return status - } replicated = replicated && replicatedKind[controllerRef.Kind] } - if pod_util.IsDaemonSetPod(pod) || drain.HasSafeToEvictAnnotation(pod) || drain.IsPodTerminal(pod) || replicated { - return drainability.NewUndefinedStatus() + if !replicated { + return drainability.NewBlockedStatus(drain.NotReplicated, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)) } - - return drainability.NewBlockedStatus(drain.NotReplicated, fmt.Errorf("%s/%s is not replicated", pod.Namespace, pod.Name)) + return drainability.NewUndefinedStatus() } // replicatedKind returns true if this kind has replicates pods. @@ -71,73 +61,3 @@ var replicatedKind = map[string]bool{ "ReplicaSet": true, "StatefulSet": true, } - -func legacyCheck(drainCtx *drainability.DrainContext, pod *apiv1.Pod, minReplicaCount int) drainability.Status { - if drainCtx.Listers == nil { - return drainability.NewUndefinedStatus() - } - - // For now, owner controller must be in the same namespace as the pod - // so OwnerReference doesn't have its own Namespace field. - controllerNamespace := pod.Namespace - - controllerRef := drain.ControllerRef(pod) - if controllerRef == nil { - return drainability.NewUndefinedStatus() - } - refKind := controllerRef.Kind - - if refKind == "ReplicationController" { - rc, err := drainCtx.Listers.ReplicationControllerLister().ReplicationControllers(controllerNamespace).Get(controllerRef.Name) - // Assume RC is either gone/missing or has too few replicas configured. - if err != nil || rc == nil { - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) - } - - // TODO: Replace the minReplica check with PDB. - if rc.Spec.Replicas != nil && int(*rc.Spec.Replicas) < minReplicaCount { - return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, minReplicaCount)) - } - } else if pod_util.IsDaemonSetPod(pod) { - if refKind == "DaemonSet" { - // We don't have a listener for the other DaemonSet kind. - // TODO: Use a generic client for checking the reference. - return drainability.NewUndefinedStatus() - } - - _, err := drainCtx.Listers.DaemonSetLister().DaemonSets(controllerNamespace).Get(controllerRef.Name) - if err != nil { - if apierrors.IsNotFound(err) { - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("daemonset for %s/%s is not present, err: %v", pod.Namespace, pod.Name, err)) - } - return drainability.NewBlockedStatus(drain.UnexpectedError, fmt.Errorf("error when trying to get daemonset for %s/%s , err: %v", pod.Namespace, pod.Name, err)) - } - } else if refKind == "Job" { - job, err := drainCtx.Listers.JobLister().Jobs(controllerNamespace).Get(controllerRef.Name) - - if err != nil || job == nil { - // Assume the only reason for an error is because the Job is gone/missing. - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("job for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) - } - } else if refKind == "ReplicaSet" { - rs, err := drainCtx.Listers.ReplicaSetLister().ReplicaSets(controllerNamespace).Get(controllerRef.Name) - - if err == nil && rs != nil { - // Assume the only reason for an error is because the RS is gone/missing. - if rs.Spec.Replicas != nil && int(*rs.Spec.Replicas) < minReplicaCount { - return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rs.Spec.Replicas, minReplicaCount)) - } - } else { - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("replication controller for %s/%s is not available, err: %v", pod.Namespace, pod.Name, err)) - } - } else if refKind == "StatefulSet" { - ss, err := drainCtx.Listers.StatefulSetLister().StatefulSets(controllerNamespace).Get(controllerRef.Name) - - if err != nil && ss == nil { - // Assume the only reason for an error is because the SS is gone/missing. - return drainability.NewBlockedStatus(drain.ControllerNotFound, fmt.Errorf("statefulset for %s/%s is not available: err: %v", pod.Namespace, pod.Name, err)) - } - } - - return drainability.NewUndefinedStatus() -} diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go index 175791234b54..cee50dec100d 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go @@ -28,14 +28,14 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" v1appslister "k8s.io/client-go/listers/apps/v1" v1lister "k8s.io/client-go/listers/core/v1" "github.com/stretchr/testify/assert" ) -func TestDrain(t *testing.T) { +func TestDrainable(t *testing.T) { var ( testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) replicas = int32(5) @@ -50,18 +50,6 @@ func TestDrain(t *testing.T) { Replicas: &replicas, }, } - - rcPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - ds = appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Name: "ds", @@ -69,32 +57,6 @@ func TestDrain(t *testing.T) { SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", }, } - - dsPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - cdsPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), - Annotations: map[string]string{ - "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - job = batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ Name: "job", @@ -102,15 +64,6 @@ func TestDrain(t *testing.T) { SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", }, } - - jobPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - }, - } - statefulset = appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ Name: "ss", @@ -118,15 +71,6 @@ func TestDrain(t *testing.T) { SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", }, } - - ssPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), - }, - } - rs = appsv1.ReplicaSet{ ObjectMeta: metav1.ObjectMeta{ Name: "rs", @@ -137,30 +81,6 @@ func TestDrain(t *testing.T) { Replicas: &replicas, }, } - - rsPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - rsPodDeleted = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - customControllerPod = &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -169,118 +89,18 @@ func TestDrain(t *testing.T) { // https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#naming-conventions // vadasambar: I am using it here just because `FooController`` // is easier to understand than say `FooSet` - OwnerReferences: GenerateOwnerReferences("Foo", "FooController", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - nakedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - nakedFailedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyNever, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - nakedTerminalPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodSucceeded, - }, - } - - nakedEvictedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyAlways, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - nakedSafePod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "true", - }, + OwnerReferences: test.GenerateOwnerReferences("Foo", "FooController", "apps/v1", ""), }, Spec: apiv1.PodSpec{ NodeName: "node", }, } - - zeroGracePeriod = int64(0) - nakedLongTerminatingPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &zeroGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - extendedGracePeriod = int64(6 * 60) // 6 minutes - nakedLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &extendedGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } ) type testCase struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods skipNodesWithCustomControllerPods bool @@ -289,102 +109,104 @@ func TestDrain(t *testing.T) { wantError bool } - sharedTests := []testCase{ - { - desc: "RC-managed pod", - pod: rcPod, - rcs: []*apiv1.ReplicationController{&rc}, - }, - { - desc: "DS-managed pod", - pod: dsPod, - }, - { - desc: "DS-managed pod by a custom Daemonset", - pod: cdsPod, + sharedTests := map[string]testCase{ + "RC-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, }, - { - desc: "Job-managed pod", - pod: jobPod, - rcs: []*apiv1.ReplicationController{&rc}, + "Job-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, }, - { - desc: "SS-managed pod", - pod: ssPod, - rcs: []*apiv1.ReplicationController{&rc}, + "SS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, }, - { - desc: "RS-managed pod", - pod: rsPod, - rss: []*appsv1.ReplicaSet{&rs}, + "RS-managed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rss: []*appsv1.ReplicaSet{&rs}, }, - { - desc: "RS-managed pod that is being deleted", - pod: rsPodDeleted, - rss: []*appsv1.ReplicaSet{&rs}, + "RS-managed pod that is being deleted": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rss: []*appsv1.ReplicaSet{&rs}, }, - { - desc: "naked pod", - pod: nakedPod, + "naked pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, wantReason: drain.NotReplicated, wantError: true, }, - { - desc: "naked failed pod", - pod: nakedFailedPod, - }, - { - desc: "naked terminal pod", - pod: nakedTerminalPod, - }, - { - desc: "naked evicted pod", - pod: nakedEvictedPod, - }, - { - desc: "naked pod with PodSafeToEvict annotation", - pod: nakedSafePod, - }, - { - desc: "naked long terminating pod with 0 grace period", - pod: nakedLongTerminatingPod, - }, - { - desc: "naked long terminating pod with extended grace period", - pod: nakedLongTerminatingPodWithExtendedGracePeriod, - }, } - var tests []testCase - - // Note: do not modify the underlying reference values for sharedTests. - for _, test := range sharedTests { + tests := make(map[string]testCase) + for desc, test := range sharedTests { for _, skipNodesWithCustomControllerPods := range []bool{true, false} { // Copy test to prevent side effects. test := test test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods - test.desc = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.desc, skipNodesWithCustomControllerPods) - tests = append(tests, test) + desc := fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", desc, skipNodesWithCustomControllerPods) + tests[desc] = test } } - - customControllerTests := []testCase{ - { - desc: "Custom-controller-managed blocking pod", - pod: customControllerPod, - skipNodesWithCustomControllerPods: true, - wantReason: drain.NotReplicated, - wantError: true, - }, - { - desc: "Custom-controller-managed non-blocking pod", - pod: customControllerPod, - }, + tests["custom-controller-managed non-blocking pod"] = testCase{ + pod: customControllerPod, + } + tests["custom-controller-managed blocking pod"] = testCase{ + pod: customControllerPod, + skipNodesWithCustomControllerPods: true, + wantReason: drain.NotReplicated, + wantError: true, } - tests = append(tests, customControllerTests...) - for _, test := range tests { - t.Run(test.desc, func(t *testing.T) { + for desc, test := range tests { + t.Run(desc, func(t *testing.T) { var err error var rcLister v1lister.ReplicationControllerLister if len(test.rcs) > 0 { diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index c155b13688b6..73c7d2b73cdb 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -20,12 +20,17 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/customcontroller" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/daemonset" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/localstorage" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/longterminating" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/mirror" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/notsafetoevict" pdbrule "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/safetoevict" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/terminal" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" ) @@ -42,6 +47,15 @@ type Rule interface { func Default(deleteOptions options.NodeDeleteOptions) Rules { return []Rule{ mirror.New(), + longterminating.New(), + customcontroller.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount), + + // Interrupting checks + daemonset.New(), + safetoevict.New(), + terminal.New(), + + // Blocking checks replicated.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount), system.New(deleteOptions.SkipNodesWithSystemPods), notsafetoevict.New(), @@ -64,7 +78,8 @@ func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) d } for _, r := range rs { - if d := r.Drainable(drainCtx, pod); d.Outcome != drainability.UndefinedOutcome { + d := r.Drainable(drainCtx, pod) + if d.Interrupted || d.Outcome != drainability.UndefinedOutcome { return d } } diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go new file mode 100644 index 000000000000..e9fd565d6d97 --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go @@ -0,0 +1,39 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package safetoevict + +import ( + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +// Rule is a drainability rule on how to handle safe to evict pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with safe to evict pods on node drain. +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.HasSafeToEvictAnnotation(pod) { + return drainability.NewUndefinedStatus(drainability.Interrupt) + } + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go new file mode 100644 index 000000000000..d0560f2fea0d --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go @@ -0,0 +1,62 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package safetoevict + +import ( + "testing" + + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +func TestDrainable(t *testing.T) { + for desc, tc := range map[string]struct { + pod *apiv1.Pod + want drainability.Status + }{ + "regular pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: "ns", + }, + }, + want: drainability.NewUndefinedStatus(), + }, + "safe to evict pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + }, + want: drainability.NewUndefinedStatus(drainability.Interrupt), + }, + } { + t.Run(desc, func(t *testing.T) { + got := New().Drainable(nil, tc.pod) + if tc.want != got { + t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + } + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule.go b/cluster-autoscaler/simulator/drainability/rules/system/rule.go index 419ba3e28795..d8bfdf3443cc 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule.go @@ -22,7 +22,6 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) // Rule is a drainability rule on how to handle system pods. @@ -37,7 +36,7 @@ func New(enabled bool) *Rule { // Drainable decides what to do with system pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if r.enabled && !drain.IsPodLongTerminating(pod, drainCtx.Timestamp) && !pod_util.IsDaemonSetPod(pod) && !drain.HasSafeToEvictAnnotation(pod) && !drain.IsPodTerminal(pod) && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { + if r.enabled && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { return drainability.NewBlockedStatus(drain.UnmovableKubeSystemPod, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)) } return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go index 3c85d8c06663..2e6b7464cbc7 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go @@ -27,12 +27,12 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" + "k8s.io/autoscaler/cluster-autoscaler/utils/test" "github.com/stretchr/testify/assert" ) -func TestDrain(t *testing.T) { +func TestDrainable(t *testing.T) { var ( testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) replicas = int32(5) @@ -52,7 +52,7 @@ func TestDrain(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "bar", Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), }, Spec: apiv1.PodSpec{ NodeName: "node", @@ -74,7 +74,7 @@ func TestDrain(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "bar", Namespace: "kube-system", - OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), + OwnerReferences: test.GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), Labels: map[string]string{ "k8s-app": "bar", }, @@ -124,99 +124,9 @@ func TestDrain(t *testing.T) { }, }, } - - kubeSystemFailedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyNever, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - kubeSystemTerminalPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodSucceeded, - }, - } - - kubeSystemEvictedPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyAlways, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - kubeSystemSafePod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - drain.PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - zeroGracePeriod = int64(0) - kubeSystemLongTerminatingPod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &zeroGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - extendedGracePeriod = int64(6 * 60) // 6 minutes - kubeSystemLongTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * time.Duration(extendedGracePeriod) * time.Second)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &extendedGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } ) - for _, test := range []struct { - desc string + for desc, test := range map[string]struct { pod *apiv1.Pod rcs []*apiv1.ReplicationController rss []*appsv1.ReplicaSet @@ -226,77 +136,43 @@ func TestDrain(t *testing.T) { wantReason drain.BlockingPodReason wantError bool }{ - { - desc: "kube-system pod with PodSafeToEvict annotation", - pod: kubeSystemSafePod, - }, - { - desc: "empty PDB with RC-managed pod", + "empty PDB with RC-managed pod": { pod: rcPod, rcs: []*apiv1.ReplicationController{&rc}, pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, }, - { - desc: "kube-system PDB with matching kube-system pod", + "kube-system PDB with matching kube-system pod": { pod: kubeSystemRcPod, rcs: []*apiv1.ReplicationController{&kubeSystemRc}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, }, - { - desc: "kube-system PDB with non-matching kube-system pod", + "kube-system PDB with non-matching kube-system pod": { pod: kubeSystemRcPod, rcs: []*apiv1.ReplicationController{&kubeSystemRc}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, wantReason: drain.UnmovableKubeSystemPod, wantError: true, }, - { - desc: "kube-system PDB with default namespace pod", + "kube-system PDB with default namespace pod": { pod: rcPod, rcs: []*apiv1.ReplicationController{&rc}, pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, }, - { - desc: "default namespace PDB with matching labels kube-system pod", + "default namespace PDB with matching labels kube-system pod": { pod: kubeSystemRcPod, rcs: []*apiv1.ReplicationController{&kubeSystemRc}, pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, wantReason: drain.UnmovableKubeSystemPod, wantError: true, }, - { - desc: "default namespace PDB with matching labels kube-system pod and rule disabled", + "default namespace PDB with matching labels kube-system pod and rule disabled": { pod: kubeSystemRcPod, rcs: []*apiv1.ReplicationController{&kubeSystemRc}, pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, disableRule: true, }, - { - desc: "kube-system failed pod", - pod: kubeSystemFailedPod, - }, - { - desc: "kube-system terminal pod", - pod: kubeSystemTerminalPod, - }, - { - desc: "kube-system evicted pod", - pod: kubeSystemEvictedPod, - }, - { - desc: "kube-system pod with PodSafeToEvict annotation", - pod: kubeSystemSafePod, - }, - { - desc: "kube-system long terminating pod with 0 grace period", - pod: kubeSystemLongTerminatingPod, - }, - { - desc: "kube-system long terminating pod with extended grace period", - pod: kubeSystemLongTerminatingPodWithExtendedGracePeriod, - }, } { - t.Run(test.desc, func(t *testing.T) { + t.Run(desc, func(t *testing.T) { tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(test.pdbs) diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go new file mode 100644 index 000000000000..164c61f4677c --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go @@ -0,0 +1,39 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package terminal + +import ( + apiv1 "k8s.io/api/core/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" +) + +// Rule is a drainability rule on how to handle terminal pods. +type Rule struct{} + +// New creates a new Rule. +func New() *Rule { + return &Rule{} +} + +// Drainable decides what to do with terminal pods on node drain. +func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { + if drain.IsPodTerminal(pod) { + return drainability.NewUndefinedStatus(drainability.Interrupt) + } + return drainability.NewUndefinedStatus() +} diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go new file mode 100644 index 000000000000..9d8cfdc566ee --- /dev/null +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go @@ -0,0 +1,79 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package terminal + +import ( + "testing" + + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" +) + +func TestDrainable(t *testing.T) { + for desc, tc := range map[string]struct { + pod *apiv1.Pod + want drainability.Status + }{ + "regular pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pod", + Namespace: "ns", + }, + }, + want: drainability.NewUndefinedStatus(), + }, + "terminal pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + }, + want: drainability.NewUndefinedStatus(drainability.Interrupt), + }, + "failed pod": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + }, + want: drainability.NewUndefinedStatus(drainability.Interrupt), + }, + } { + t.Run(desc, func(t *testing.T) { + got := New().Drainable(nil, tc.pod) + if tc.want != got { + t.Errorf("Rule.Drainable(%v) = %v, want %v", tc.pod.Name, got, tc.want) + } + }) + } +} diff --git a/cluster-autoscaler/simulator/drainability/status.go b/cluster-autoscaler/simulator/drainability/status.go index d73f346bead2..281d03c9c5b4 100644 --- a/cluster-autoscaler/simulator/drainability/status.go +++ b/cluster-autoscaler/simulator/drainability/status.go @@ -48,6 +48,23 @@ type Status struct { BlockingReason drain.BlockingPodReason // Error contains an optional error message. Error error + // Interrupted means that the Rule returning the status exited early and that + // additional Rules should not be run. + Interrupted bool +} + +// Option is used to modify a Status. +type Option func(*Status) + +// Interrupt implies no additional Rules should be run. +func Interrupt(s *Status) { + s.Interrupted = true +} + +func applyOptions(s *Status, opts []Option) { + for _, opt := range opts { + opt(s) + } } // NewDrainableStatus returns a new Status indicating that a pod can be drained. @@ -74,6 +91,8 @@ func NewSkipStatus() Status { } // NewUndefinedStatus returns a new Status that doesn't contain a decision. -func NewUndefinedStatus() Status { - return Status{} +func NewUndefinedStatus(opts ...Option) Status { + s := Status{} + applyOptions(&s, opts) + return s } From 96e67eef318a380bc985cd5ba8d9f09d5b48dc37 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Fri, 6 Oct 2023 23:53:30 +0000 Subject: [PATCH 6/9] Filter out disabled drainability rules during creation --- .../rules/customcontroller/rule.go | 6 +- .../rules/customcontroller/rule_test.go | 64 +++++++++---------- .../drainability/rules/localstorage/rule.go | 12 ++-- .../rules/localstorage/rule_test.go | 40 ++---------- .../simulator/drainability/rules/rules.go | 33 ++++++---- .../drainability/rules/system/rule.go | 10 ++- .../drainability/rules/system/rule_test.go | 17 ++--- 7 files changed, 69 insertions(+), 113 deletions(-) diff --git a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go index 55caed24c15c..4661f12dc588 100644 --- a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go @@ -28,21 +28,19 @@ import ( // Rule is a drainability rule on how to handle replicated pods. type Rule struct { - enabled bool minReplicaCount int } // New creates a new Rule. -func New(enabled bool, minReplicaCount int) *Rule { +func New(minReplicaCount int) *Rule { return &Rule{ - enabled: enabled, minReplicaCount: minReplicaCount, } } // Drainable decides what to do with replicated pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if !r.enabled || drainCtx.Listers == nil { + if drainCtx.Listers == nil { return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go index 2c6909694f01..59d001878dfb 100644 --- a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go @@ -17,7 +17,6 @@ limitations under the License. package customcontroller import ( - "fmt" "testing" "time" @@ -84,11 +83,10 @@ func TestDrainable(t *testing.T) { ) for desc, test := range map[string]struct { - desc string - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - enabled bool + desc string + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet wantReason drain.BlockingPodReason wantError bool @@ -181,38 +179,34 @@ func TestDrainable(t *testing.T) { rss: []*appsv1.ReplicaSet{&rs}, }, } { - for _, enabled := range []bool{true, false} { - desc = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.desc, enabled) - - t.Run(desc, func(t *testing.T) { - var err error - var rcLister v1lister.ReplicationControllerLister - if len(test.rcs) > 0 { - rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) - assert.NoError(t, err) - } - var rsLister v1appslister.ReplicaSetLister - if len(test.rss) > 0 { - rsLister, err = kube_util.NewTestReplicaSetLister(test.rss) - assert.NoError(t, err) - } - dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) - assert.NoError(t, err) - jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + t.Run(desc, func(t *testing.T) { + var err error + var rcLister v1lister.ReplicationControllerLister + if len(test.rcs) > 0 { + rcLister, err = kube_util.NewTestReplicationControllerLister(test.rcs) assert.NoError(t, err) - ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + } + var rsLister v1appslister.ReplicaSetLister + if len(test.rss) > 0 { + rsLister, err = kube_util.NewTestReplicaSetLister(test.rss) assert.NoError(t, err) + } + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) + assert.NoError(t, err) + jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + assert.NoError(t, err) + ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + assert.NoError(t, err) - registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) + registry := kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) - drainCtx := &drainability.DrainContext{ - Listers: registry, - Timestamp: testTime, - } - status := New(enabled, 0).Drainable(drainCtx, test.pod) - assert.Equal(t, test.wantReason, status.BlockingReason) - assert.Equal(t, test.wantError, status.Error != nil) - }) - } + drainCtx := &drainability.DrainContext{ + Listers: registry, + Timestamp: testTime, + } + status := New(0).Drainable(drainCtx, test.pod) + assert.Equal(t, test.wantReason, status.BlockingReason) + assert.Equal(t, test.wantError, status.Error != nil) + }) } } diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go index 68533fad5e1b..0884781f7708 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule.go @@ -25,20 +25,16 @@ import ( ) // Rule is a drainability rule on how to handle local storage pods. -type Rule struct { - enabled bool -} +type Rule struct{} // New creates a new Rule. -func New(enabled bool) *Rule { - return &Rule{ - enabled: enabled, - } +func New() *Rule { + return &Rule{} } // Drainable decides what to do with local storage pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if r.enabled && drain.HasBlockingLocalStorage(pod) { + if drain.HasBlockingLocalStorage(pod) { return drainability.NewBlockedStatus(drain.LocalStorageRequested, fmt.Errorf("pod with local storage present: %s", pod.Name)) } return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go index cb96b568678f..25713c7f5bc8 100644 --- a/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/localstorage/rule_test.go @@ -48,10 +48,9 @@ func TestDrainable(t *testing.T) { ) for desc, test := range map[string]struct { - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - disabledRule bool + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet wantReason drain.BlockingPodReason wantError bool @@ -273,43 +272,12 @@ func TestDrainable(t *testing.T) { wantReason: drain.LocalStorageRequested, wantError: true, }, - "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation empty values and rule disabled": { - pod: &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: test.GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - drain.SafeToEvictLocalVolumesKey: ",", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - }, - rcs: []*apiv1.ReplicationController{&rc}, - disabledRule: true, - }, } { t.Run(desc, func(t *testing.T) { drainCtx := &drainability.DrainContext{ Timestamp: testTime, } - status := New(!test.disabledRule).Drainable(drainCtx, test.pod) + status := New().Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 73c7d2b73cdb..6bd2f970a42f 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -45,23 +45,32 @@ type Rule interface { // Default returns the default list of Rules. func Default(deleteOptions options.NodeDeleteOptions) Rules { - return []Rule{ - mirror.New(), - longterminating.New(), - customcontroller.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount), + var rules Rules + for _, r := range []struct { + rule Rule + skip bool + }{ + {rule: mirror.New()}, + {rule: longterminating.New()}, + {rule: customcontroller.New(deleteOptions.MinReplicaCount), skip: !deleteOptions.SkipNodesWithCustomControllerPods}, // Interrupting checks - daemonset.New(), - safetoevict.New(), - terminal.New(), + {rule: daemonset.New()}, + {rule: safetoevict.New()}, + {rule: terminal.New()}, // Blocking checks - replicated.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount), - system.New(deleteOptions.SkipNodesWithSystemPods), - notsafetoevict.New(), - localstorage.New(deleteOptions.SkipNodesWithLocalStorage), - pdbrule.New(), + {rule: replicated.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount)}, + {rule: system.New(), skip: !deleteOptions.SkipNodesWithSystemPods}, + {rule: notsafetoevict.New()}, + {rule: localstorage.New(), skip: !deleteOptions.SkipNodesWithLocalStorage}, + {rule: pdbrule.New()}, + } { + if !r.skip { + rules = append(rules, r.rule) + } } + return rules } // Rules defines operations on a collections of rules. diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule.go b/cluster-autoscaler/simulator/drainability/rules/system/rule.go index d8bfdf3443cc..a0e9160189a5 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule.go @@ -25,18 +25,16 @@ import ( ) // Rule is a drainability rule on how to handle system pods. -type Rule struct { - enabled bool -} +type Rule struct{} // New creates a new Rule. -func New(enabled bool) *Rule { - return &Rule{enabled: enabled} +func New() *Rule { + return &Rule{} } // Drainable decides what to do with system pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { - if r.enabled && pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { + if pod.Namespace == "kube-system" && len(drainCtx.RemainingPdbTracker.MatchingPdbs(pod)) == 0 { return drainability.NewBlockedStatus(drain.UnmovableKubeSystemPod, fmt.Errorf("non-daemonset, non-mirrored, non-pdb-assigned kube-system pod present: %s", pod.Name)) } return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go index 2e6b7464cbc7..bc25337750c8 100644 --- a/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/system/rule_test.go @@ -127,11 +127,10 @@ func TestDrainable(t *testing.T) { ) for desc, test := range map[string]struct { - pod *apiv1.Pod - rcs []*apiv1.ReplicationController - rss []*appsv1.ReplicaSet - pdbs []*policyv1.PodDisruptionBudget - disableRule bool + pod *apiv1.Pod + rcs []*apiv1.ReplicationController + rss []*appsv1.ReplicaSet + pdbs []*policyv1.PodDisruptionBudget wantReason drain.BlockingPodReason wantError bool @@ -165,12 +164,6 @@ func TestDrainable(t *testing.T) { wantReason: drain.UnmovableKubeSystemPod, wantError: true, }, - "default namespace PDB with matching labels kube-system pod and rule disabled": { - pod: kubeSystemRcPod, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, - disableRule: true, - }, } { t.Run(desc, func(t *testing.T) { tracker := pdb.NewBasicRemainingPdbTracker() @@ -180,7 +173,7 @@ func TestDrainable(t *testing.T) { RemainingPdbTracker: tracker, Timestamp: testTime, } - status := New(!test.disableRule).Drainable(drainCtx, test.pod) + status := New().Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) From 084264c152c9fcf4cf06bc3204c51ff76253d914 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Fri, 6 Oct 2023 23:55:46 +0000 Subject: [PATCH 7/9] Refactor GetPodsForDeletion logic and tests into simulator --- cluster-autoscaler/simulator/cluster.go | 9 +- cluster-autoscaler/simulator/cluster_test.go | 28 +- cluster-autoscaler/simulator/drain.go | 29 +- cluster-autoscaler/simulator/drain_test.go | 710 +++++++++++++++--- .../drainability/rules/daemonset/rule.go | 2 +- .../drainability/rules/daemonset/rule_test.go | 2 +- .../rules/longterminating/rule.go | 2 +- .../rules/longterminating/rule_test.go | 4 +- .../simulator/drainability/rules/rules.go | 2 +- .../drainability/rules/safetoevict/rule.go | 2 +- .../rules/safetoevict/rule_test.go | 2 +- .../drainability/rules/terminal/rule.go | 2 +- .../drainability/rules/terminal/rule_test.go | 4 +- .../simulator/drainability/status.go | 23 +- cluster-autoscaler/utils/drain/drain.go | 33 - cluster-autoscaler/utils/drain/drain_test.go | 531 ------------- 16 files changed, 620 insertions(+), 765 deletions(-) diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index 1c3e5bf10a3a..6bd27ad44c69 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -123,9 +123,6 @@ func (r *RemovalSimulator) FindNodesToRemove( timestamp time.Time, remainingPdbTracker pdb.RemainingPdbTracker, ) (nodesToRemove []NodeToBeRemoved, unremovableNodes []*UnremovableNode) { - result := make([]NodeToBeRemoved, 0) - unremovable := make([]*UnremovableNode, 0) - destinationMap := make(map[string]bool, len(destinations)) for _, destination := range destinations { destinationMap[destination] = true @@ -134,12 +131,12 @@ func (r *RemovalSimulator) FindNodesToRemove( for _, nodeName := range candidates { rn, urn := r.SimulateNodeRemoval(nodeName, destinationMap, timestamp, remainingPdbTracker) if rn != nil { - result = append(result, *rn) + nodesToRemove = append(nodesToRemove, *rn) } else if urn != nil { - unremovable = append(unremovable, urn) + unremovableNodes = append(unremovableNodes, urn) } } - return result, unremovable + return nodesToRemove, unremovableNodes } // SimulateNodeRemoval simulates removing a node from the cluster to check diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index c53aec5cbc01..e08c605c7cf9 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -136,14 +136,11 @@ func TestFindNodesToRemove(t *testing.T) { fullNodeInfo.AddPod(pod4) emptyNodeToRemove := NodeToBeRemoved{ - Node: emptyNode, - PodsToReschedule: []*apiv1.Pod{}, - DaemonSetPods: []*apiv1.Pod{}, + Node: emptyNode, } drainableNodeToRemove := NodeToBeRemoved{ Node: drainableNode, PodsToReschedule: []*apiv1.Pod{pod1, pod2}, - DaemonSetPods: []*apiv1.Pod{}, } clusterSnapshot := clustersnapshot.NewBasicClusterSnapshot() @@ -153,19 +150,16 @@ func TestFindNodesToRemove(t *testing.T) { tests := []findNodesToRemoveTestConfig{ { - name: "just an empty node, should be removed", - pods: []*apiv1.Pod{}, - candidates: []string{emptyNode.Name}, - allNodes: []*apiv1.Node{emptyNode}, - toRemove: []NodeToBeRemoved{emptyNodeToRemove}, - unremovable: []*UnremovableNode{}, + name: "just an empty node, should be removed", + candidates: []string{emptyNode.Name}, + allNodes: []*apiv1.Node{emptyNode}, + toRemove: []NodeToBeRemoved{emptyNodeToRemove}, }, { name: "just a drainable node, but nowhere for pods to go to", pods: []*apiv1.Pod{pod1, pod2}, candidates: []string{drainableNode.Name}, allNodes: []*apiv1.Node{drainableNode}, - toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, { @@ -181,16 +175,14 @@ func TestFindNodesToRemove(t *testing.T) { pods: []*apiv1.Pod{pod1, pod2, pod4}, candidates: []string{drainableNode.Name}, allNodes: []*apiv1.Node{drainableNode, fullNode}, - toRemove: []NodeToBeRemoved{}, unremovable: []*UnremovableNode{{Node: drainableNode, Reason: NoPlaceToMovePods}}, }, { - name: "4 nodes, 1 empty, 1 drainable", - pods: []*apiv1.Pod{pod1, pod2, pod3, pod4}, - candidates: []string{emptyNode.Name, drainableNode.Name}, - allNodes: []*apiv1.Node{emptyNode, drainableNode, fullNode, nonDrainableNode}, - toRemove: []NodeToBeRemoved{emptyNodeToRemove, drainableNodeToRemove}, - unremovable: []*UnremovableNode{}, + name: "4 nodes, 1 empty, 1 drainable", + pods: []*apiv1.Pod{pod1, pod2, pod3, pod4}, + candidates: []string{emptyNode.Name, drainableNode.Name}, + allNodes: []*apiv1.Node{emptyNode, drainableNode, fullNode, nonDrainableNode}, + toRemove: []NodeToBeRemoved{emptyNodeToRemove, drainableNodeToRemove}, }, } diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index 5df8585e1f3b..8a7ae28b9e33 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -39,7 +39,6 @@ import ( // If listers is not nil it checks whether RC, DS, Jobs and RS that created // these pods still exist. func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options.NodeDeleteOptions, drainabilityRules rules.Rules, listers kube_util.ListerRegistry, remainingPdbTracker pdb.RemainingPdbTracker, timestamp time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod, blockingPod *drain.BlockingPod, err error) { - var drainPods, drainDs []*apiv1.Pod if drainabilityRules == nil { drainabilityRules = rules.Default(deleteOptions) } @@ -55,31 +54,21 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. pod := podInfo.Pod status := drainabilityRules.Drainable(drainCtx, pod) switch status.Outcome { - case drainability.UndefinedOutcome: - pods = append(pods, podInfo.Pod) - case drainability.DrainOk: + case drainability.UndefinedOutcome, drainability.DrainOk: + if drain.IsPodLongTerminating(pod, timestamp) { + continue + } if pod_util.IsDaemonSetPod(pod) { - drainDs = append(drainDs, pod) + daemonSetPods = append(daemonSetPods, pod) } else { - drainPods = append(drainPods, pod) + pods = append(pods, pod) } case drainability.BlockDrain: - blockingPod = &drain.BlockingPod{ + return nil, nil, &drain.BlockingPod{ Pod: pod, Reason: status.BlockingReason, - } - err = status.Error - return + }, status.Error } } - - pods, daemonSetPods = drain.GetPodsForDeletionOnNodeDrain( - pods, - remainingPdbTracker.GetPdbs(), - deleteOptions.SkipNodesWithSystemPods, - deleteOptions.SkipNodesWithLocalStorage, - deleteOptions.SkipNodesWithCustomControllerPods, - timestamp) - - return append(pods, drainPods...), append(daemonSetPods, drainDs...), nil, nil + return pods, daemonSetPods, nil, nil } diff --git a/cluster-autoscaler/simulator/drain_test.go b/cluster-autoscaler/simulator/drain_test.go index 1a7326be9a52..ca0d6a05548f 100644 --- a/cluster-autoscaler/simulator/drain_test.go +++ b/cluster-autoscaler/simulator/drain_test.go @@ -21,6 +21,8 @@ import ( "testing" "time" + appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" apiv1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -30,6 +32,8 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules" "k8s.io/autoscaler/cluster-autoscaler/simulator/options" "k8s.io/autoscaler/cluster-autoscaler/utils/drain" + "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" + kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/kubernetes/pkg/kubelet/types" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" @@ -38,150 +42,466 @@ import ( ) func TestGetPodsToMove(t *testing.T) { - testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - unreplicatedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "unreplicatedPod", - Namespace: "ns", - }, - } - rsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rsPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - } - manifestPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "manifestPod", - Namespace: "kube-system", - Annotations: map[string]string{ - types.ConfigMirrorAnnotationKey: "something", + var ( + testTime = time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) + replicas = int32(5) + + unreplicatedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "unreplicatedPod", + Namespace: "ns", }, - }, - } - dsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "dsPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("ds", "DaemonSet", "extensions/v1beta1", ""), - }, - } - systemPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "systemPod", - Namespace: "kube-system", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - } - localStoragePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "localStoragePod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - Spec: apiv1.PodSpec{ - Volumes: []apiv1.Volume{ - { - Name: "empty-vol", - VolumeSource: apiv1.VolumeSource{ - EmptyDir: &apiv1.EmptyDirVolumeSource{}, - }, + } + manifestPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "manifestPod", + Namespace: "kube-system", + Annotations: map[string]string{ + types.ConfigMirrorAnnotationKey: "something", }, }, - }, - } - nonLocalStoragePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nonLocalStoragePod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - }, - Spec: apiv1.PodSpec{ - Volumes: []apiv1.Volume{ - { - Name: "my-repo", - VolumeSource: apiv1.VolumeSource{ - GitRepo: &apiv1.GitRepoVolumeSource{ - Repository: "my-repo", + } + systemPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "systemPod", + Namespace: "kube-system", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + }, + } + localStoragePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "localStoragePod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + }, + Spec: apiv1.PodSpec{ + Volumes: []apiv1.Volume{ + { + Name: "empty-vol", + VolumeSource: apiv1.VolumeSource{ + EmptyDir: &apiv1.EmptyDirVolumeSource{}, }, }, }, }, - }, - } - pdbPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pdbPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - Labels: map[string]string{ - "critical": "true", + } + nonLocalStoragePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nonLocalStoragePod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), }, - }, - Spec: apiv1.PodSpec{}, - } - one := intstr.FromInt(1) - restrictivePdb := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Name: "foobar", - Namespace: "ns", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - MinAvailable: &one, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "critical": "true", + Spec: apiv1.PodSpec{ + Volumes: []apiv1.Volume{ + { + Name: "my-repo", + VolumeSource: apiv1.VolumeSource{ + GitRepo: &apiv1.GitRepoVolumeSource{ + Repository: "my-repo", + }, + }, + }, }, }, - }, - Status: policyv1.PodDisruptionBudgetStatus{ - DisruptionsAllowed: 0, - }, - } - permissivePdb := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Name: "foobar", - Namespace: "ns", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - MinAvailable: &one, - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ + } + pdbPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "pdbPod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + Labels: map[string]string{ "critical": "true", }, }, - }, - Status: policyv1.PodDisruptionBudgetStatus{ - DisruptionsAllowed: 1, - }, - } - terminatedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "terminatedPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - DeletionTimestamp: &metav1.Time{ - Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold - time.Minute), // more than PodLongTerminatingExtraThreshold + Spec: apiv1.PodSpec{}, + } + one = intstr.FromInt(1) + restrictivePdb = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foobar", + Namespace: "ns", }, - }, - } - terminatingPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "terminatingPod", - Namespace: "ns", - OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), - DeletionTimestamp: &metav1.Time{ - Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold + time.Minute), // still terminating, below the default TerminatingGracePeriode + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "critical": "true", + }, + }, }, - }, - } + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 0, + }, + } + permissivePdb = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foobar", + Namespace: "ns", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "critical": "true", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + terminatedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "terminatedPod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + DeletionTimestamp: &metav1.Time{ + Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold - time.Minute), // more than PodLongTerminatingExtraThreshold + }, + }, + } + terminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "terminatingPod", + Namespace: "ns", + OwnerReferences: GenerateOwnerReferences("rs", "ReplicaSet", "extensions/v1beta1", ""), + DeletionTimestamp: &metav1.Time{ + Time: testTime.Add(-1*drain.PodLongTerminatingExtraThreshold + time.Minute), // still terminating, below the default TerminatingGracePeriod + }, + }, + } + + rc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + rcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + kubeSystemRc = apiv1.ReplicationController{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rc", + Namespace: "kube-system", + SelfLink: "api/v1/namespaces/kube-system/replicationcontrollers/rc", + }, + Spec: apiv1.ReplicationControllerSpec{ + Replicas: &replicas, + }, + } + kubeSystemRcPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), + Labels: map[string]string{ + "k8s-app": "bar", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + ds = appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ds", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", + }, + } + dsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + cdsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + job = batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Name: "job", + Namespace: "default", + SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", + }, + } + jobPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), + }, + } + statefulset = appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ss", + Namespace: "default", + SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", + }, + } + ssPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), + }, + } + rs = appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rs", + Namespace: "default", + SelfLink: "api/v1/namespaces/default/replicasets/rs", + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: &replicas, + }, + } + rsPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + rsPodDeleted = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + emptyDirSafeToEvictLocalVolumeMultiValAllMatching = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + Annotations: map[string]string{ + drain.SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch-1", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-2", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + { + Name: "scratch-3", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + terminalPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodSucceeded, + }, + } + zeroGracePeriod = int64(0) + longTerminatingPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * drain.PodLongTerminatingExtraThreshold)}, + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + extendedGracePeriod = int64(6 * 60) // 6 minutes + longTerminatingPodWithExtendedGracePeriod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Duration(extendedGracePeriod/2) * time.Second)}, + OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + } + failedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyNever, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + evictedPod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + RestartPolicy: apiv1.RestartPolicyAlways, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodFailed, + }, + } + safePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + kubeSystemSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "kube-system", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + } + emptydirSafePod = &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + Annotations: map[string]string{ + drain.PodSafeToEvictKey: "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + Volumes: []apiv1.Volume{ + { + Name: "scratch", + VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, + }, + }, + }, + } + emptyPDB = &policyv1.PodDisruptionBudget{} + kubeSystemPDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "bar", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + kubeSystemFakePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "foo", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + defaultNamespacePDB = &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &one, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "k8s-app": "PDB-managed pod", + }, + }, + }, + Status: policyv1.PodDisruptionBudgetStatus{ + DisruptionsAllowed: 1, + }, + } + ) testCases := []struct { desc string pods []*apiv1.Pod pdbs []*policyv1.PodDisruptionBudget + rcs []*apiv1.ReplicationController + replicaSets []*appsv1.ReplicaSet rules rules.Rules wantPods []*apiv1.Pod wantDs []*apiv1.Pod @@ -304,9 +624,149 @@ func TestGetPodsToMove(t *testing.T) { rules: []rules.Rule{cantDecide{}}, wantPods: []*apiv1.Pod{rsPod}, }, + + { + desc: "RC-managed pod", + pods: []*apiv1.Pod{rcPod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{rcPod}, + }, + { + desc: "DS-managed pod", + pods: []*apiv1.Pod{dsPod}, + wantDs: []*apiv1.Pod{dsPod}, + }, + { + desc: "DS-managed pod by a custom Daemonset", + pods: []*apiv1.Pod{cdsPod}, + wantDs: []*apiv1.Pod{cdsPod}, + }, + { + desc: "Job-managed pod", + pods: []*apiv1.Pod{jobPod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{jobPod}, + }, + { + desc: "SS-managed pod", + pods: []*apiv1.Pod{ssPod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{ssPod}, + }, + { + desc: "RS-managed pod", + pods: []*apiv1.Pod{rsPod}, + replicaSets: []*appsv1.ReplicaSet{&rs}, + wantPods: []*apiv1.Pod{rsPod}, + }, + { + desc: "RS-managed pod that is being deleted", + pods: []*apiv1.Pod{rsPodDeleted}, + replicaSets: []*appsv1.ReplicaSet{&rs}, + }, + { + desc: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", + pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, + }, + { + desc: "failed pod", + pods: []*apiv1.Pod{failedPod}, + wantPods: []*apiv1.Pod{failedPod}, + }, + { + desc: "long terminating pod with 0 grace period", + pods: []*apiv1.Pod{longTerminatingPod}, + rcs: []*apiv1.ReplicationController{&rc}, + }, + { + desc: "long terminating pod with extended grace period", + pods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, + }, + { + desc: "evicted pod", + pods: []*apiv1.Pod{evictedPod}, + wantPods: []*apiv1.Pod{evictedPod}, + }, + { + desc: "pod in terminal state", + pods: []*apiv1.Pod{terminalPod}, + wantPods: []*apiv1.Pod{terminalPod}, + }, + { + desc: "pod with PodSafeToEvict annotation", + pods: []*apiv1.Pod{safePod}, + wantPods: []*apiv1.Pod{safePod}, + }, + { + desc: "kube-system pod with PodSafeToEvict annotation", + pods: []*apiv1.Pod{kubeSystemSafePod}, + wantPods: []*apiv1.Pod{kubeSystemSafePod}, + }, + { + desc: "pod with EmptyDir and PodSafeToEvict annotation", + pods: []*apiv1.Pod{emptydirSafePod}, + wantPods: []*apiv1.Pod{emptydirSafePod}, + }, + { + desc: "empty PDB with RC-managed pod", + pods: []*apiv1.Pod{rcPod}, + pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{rcPod}, + }, + { + desc: "kube-system PDB with matching kube-system pod", + pods: []*apiv1.Pod{kubeSystemRcPod}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + wantPods: []*apiv1.Pod{kubeSystemRcPod}, + }, + { + desc: "kube-system PDB with non-matching kube-system pod", + pods: []*apiv1.Pod{kubeSystemRcPod}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemFakePDB}, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + wantErr: true, + wantBlocking: &drain.BlockingPod{Pod: kubeSystemRcPod, Reason: drain.UnmovableKubeSystemPod}, + }, + { + desc: "kube-system PDB with default namespace pod", + pods: []*apiv1.Pod{rcPod}, + pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, + rcs: []*apiv1.ReplicationController{&rc}, + wantPods: []*apiv1.Pod{rcPod}, + }, + { + desc: "default namespace PDB with matching labels kube-system pod", + pods: []*apiv1.Pod{kubeSystemRcPod}, + pdbs: []*policyv1.PodDisruptionBudget{defaultNamespacePDB}, + rcs: []*apiv1.ReplicationController{&kubeSystemRc}, + wantErr: true, + wantBlocking: &drain.BlockingPod{Pod: kubeSystemRcPod, Reason: drain.UnmovableKubeSystemPod}, + }, } for _, tc := range testCases { t.Run(tc.desc, func(t *testing.T) { + var registry kubernetes.ListerRegistry + if tc.rcs != nil || tc.replicaSets != nil { + rcLister, err := kube_util.NewTestReplicationControllerLister(tc.rcs) + assert.NoError(t, err) + rsLister, err := kube_util.NewTestReplicaSetLister(tc.replicaSets) + assert.NoError(t, err) + dsLister, err := kube_util.NewTestDaemonSetLister([]*appsv1.DaemonSet{&ds}) + assert.NoError(t, err) + jobLister, err := kube_util.NewTestJobLister([]*batchv1.Job{&job}) + assert.NoError(t, err) + ssLister, err := kube_util.NewTestStatefulSetLister([]*appsv1.StatefulSet{&statefulset}) + assert.NoError(t, err) + + registry = kube_util.NewListerRegistry(nil, nil, nil, nil, dsLister, rcLister, jobLister, rsLister, ssLister) + } + deleteOptions := options.NodeDeleteOptions{ SkipNodesWithSystemPods: true, SkipNodesWithLocalStorage: true, @@ -315,7 +775,7 @@ func TestGetPodsToMove(t *testing.T) { rules := append(tc.rules, rules.Default(deleteOptions)...) tracker := pdb.NewBasicRemainingPdbTracker() tracker.SetPdbs(tc.pdbs) - p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, nil, tracker, testTime) + p, d, b, err := GetPodsToMove(schedulerframework.NewNodeInfo(tc.pods...), deleteOptions, rules, registry, tracker, testTime) if tc.wantErr { assert.Error(t, err) } else { diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go index 05f93d288a1d..894065a3de22 100644 --- a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with daemon set pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if pod_util.IsDaemonSetPod(pod) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go index d444d7079999..1bd05e7d35e4 100644 --- a/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/daemonset/rule_test.go @@ -47,7 +47,7 @@ func TestDrainable(t *testing.T) { OwnerReferences: test.GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", ""), }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go index 5edc6b70f603..8b7c6aa64ca3 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with long terminating pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go index 934453429d1e..e9660aa34f6e 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go @@ -61,7 +61,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodUnknown, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, "long terminating pod with extended grace period": { pod: &apiv1.Pod{ @@ -78,7 +78,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodUnknown, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index 6bd2f970a42f..b02fb9aba0f0 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -88,7 +88,7 @@ func (rs Rules) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) d for _, r := range rs { d := r.Drainable(drainCtx, pod) - if d.Interrupted || d.Outcome != drainability.UndefinedOutcome { + if d.Outcome != drainability.UndefinedOutcome { return d } } diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go index e9fd565d6d97..396e982c0213 100644 --- a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with safe to evict pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.HasSafeToEvictAnnotation(pod) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go index d0560f2fea0d..3052183ffc79 100644 --- a/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/safetoevict/rule_test.go @@ -49,7 +49,7 @@ func TestDrainable(t *testing.T) { }, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go index 164c61f4677c..addae1733762 100644 --- a/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with terminal pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodTerminal(pod) { - return drainability.NewUndefinedStatus(drainability.Interrupt) + return drainability.NewDrainableStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go index 9d8cfdc566ee..f63d9b660b79 100644 --- a/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/terminal/rule_test.go @@ -51,7 +51,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodSucceeded, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, "failed pod": { pod: &apiv1.Pod{ @@ -66,7 +66,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodFailed, }, }, - want: drainability.NewUndefinedStatus(drainability.Interrupt), + want: drainability.NewDrainableStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/status.go b/cluster-autoscaler/simulator/drainability/status.go index 281d03c9c5b4..d73f346bead2 100644 --- a/cluster-autoscaler/simulator/drainability/status.go +++ b/cluster-autoscaler/simulator/drainability/status.go @@ -48,23 +48,6 @@ type Status struct { BlockingReason drain.BlockingPodReason // Error contains an optional error message. Error error - // Interrupted means that the Rule returning the status exited early and that - // additional Rules should not be run. - Interrupted bool -} - -// Option is used to modify a Status. -type Option func(*Status) - -// Interrupt implies no additional Rules should be run. -func Interrupt(s *Status) { - s.Interrupted = true -} - -func applyOptions(s *Status, opts []Option) { - for _, opt := range opts { - opt(s) - } } // NewDrainableStatus returns a new Status indicating that a pod can be drained. @@ -91,8 +74,6 @@ func NewSkipStatus() Status { } // NewUndefinedStatus returns a new Status that doesn't contain a decision. -func NewUndefinedStatus(opts ...Option) Status { - s := Status{} - applyOptions(&s, opts) - return s +func NewUndefinedStatus() Status { + return Status{} } diff --git a/cluster-autoscaler/utils/drain/drain.go b/cluster-autoscaler/utils/drain/drain.go index 45a28a384185..81eef9ad858a 100644 --- a/cluster-autoscaler/utils/drain/drain.go +++ b/cluster-autoscaler/utils/drain/drain.go @@ -21,9 +21,7 @@ import ( "time" apiv1 "k8s.io/api/core/v1" - policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - pod_util "k8s.io/autoscaler/cluster-autoscaler/utils/pod" ) const ( @@ -70,37 +68,6 @@ const ( UnexpectedError ) -// GetPodsForDeletionOnNodeDrain returns pods that should be deleted on node -// drain as well as some extra information about possibly problematic pods -// (unreplicated and DaemonSets). -// -// This function assumes that default drainability rules have already been run -// to verify pod drainability. -func GetPodsForDeletionOnNodeDrain( - podList []*apiv1.Pod, - pdbs []*policyv1.PodDisruptionBudget, - skipNodesWithSystemPods bool, - skipNodesWithLocalStorage bool, - skipNodesWithCustomControllerPods bool, - currentTime time.Time) (pods []*apiv1.Pod, daemonSetPods []*apiv1.Pod) { - - pods = []*apiv1.Pod{} - daemonSetPods = []*apiv1.Pod{} - - for _, pod := range podList { - if IsPodLongTerminating(pod, currentTime) { - continue - } - - if pod_util.IsDaemonSetPod(pod) { - daemonSetPods = append(daemonSetPods, pod) - } else { - pods = append(pods, pod) - } - } - return pods, daemonSetPods -} - // ControllerRef returns the OwnerReference to pod's controller. func ControllerRef(pod *apiv1.Pod) *metav1.OwnerReference { return metav1.GetControllerOf(pod) diff --git a/cluster-autoscaler/utils/drain/drain_test.go b/cluster-autoscaler/utils/drain/drain_test.go index 0a20b781a9ac..95176fdf55d0 100644 --- a/cluster-autoscaler/utils/drain/drain_test.go +++ b/cluster-autoscaler/utils/drain/drain_test.go @@ -17,544 +17,13 @@ limitations under the License. package drain import ( - "fmt" "testing" "time" - appsv1 "k8s.io/api/apps/v1" - batchv1 "k8s.io/api/batch/v1" apiv1 "k8s.io/api/core/v1" - policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - . "k8s.io/autoscaler/cluster-autoscaler/utils/test" - - "github.com/stretchr/testify/assert" ) -// testOpts represents parameters required for a single unit test -type testOpts struct { - description string - pods []*apiv1.Pod - pdbs []*policyv1.PodDisruptionBudget - rcs []*apiv1.ReplicationController - replicaSets []*appsv1.ReplicaSet - expectPods []*apiv1.Pod - expectDaemonSetPods []*apiv1.Pod - // TODO(vadasambar): remove this when we get rid of scaleDownNodesWithCustomControllerPods - skipNodesWithCustomControllerPods bool -} - -func TestDrain(t *testing.T) { - testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) - replicas := int32(5) - - rc := apiv1.ReplicationController{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rc", - Namespace: "default", - SelfLink: "api/v1/namespaces/default/replicationcontrollers/rc", - }, - Spec: apiv1.ReplicationControllerSpec{ - Replicas: &replicas, - }, - } - - rcPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - kubeSystemRc := apiv1.ReplicationController{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rc", - Namespace: "kube-system", - SelfLink: "api/v1/namespaces/kube-system/replicationcontrollers/rc", - }, - Spec: apiv1.ReplicationControllerSpec{ - Replicas: &replicas, - }, - } - - kubeSystemRcPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - OwnerReferences: GenerateOwnerReferences(kubeSystemRc.Name, "ReplicationController", "core/v1", ""), - Labels: map[string]string{ - "k8s-app": "bar", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - ds := appsv1.DaemonSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ds", - Namespace: "default", - SelfLink: "/apiv1s/apps/v1/namespaces/default/daemonsets/ds", - }, - } - - dsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(ds.Name, "DaemonSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - cdsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(ds.Name, "CustomDaemonSet", "crd/v1", ""), - Annotations: map[string]string{ - "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - job := batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Name: "job", - Namespace: "default", - SelfLink: "/apiv1s/batch/v1/namespaces/default/jobs/job", - }, - } - - jobPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(job.Name, "Job", "batch/v1", ""), - }, - } - - statefulset := appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "ss", - Namespace: "default", - SelfLink: "/apiv1s/apps/v1/namespaces/default/statefulsets/ss", - }, - } - - ssPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(statefulset.Name, "StatefulSet", "apps/v1", ""), - }, - } - - rs := appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rs", - Namespace: "default", - SelfLink: "api/v1/namespaces/default/replicasets/rs", - }, - Spec: appsv1.ReplicaSetSpec{ - Replicas: &replicas, - }, - } - - rsPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - rsPodDeleted := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rs.Name, "ReplicaSet", "apps/v1", ""), - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Hour)}, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - customControllerPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - // Using names like FooController is discouraged - // https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#naming-conventions - // vadasambar: I am using it here just because `FooController`` - // is easier to understand than say `FooSet` - OwnerReferences: GenerateOwnerReferences("Foo", "FooController", "apps/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - emptyDirSafeToEvictLocalVolumeMultiValAllMatching := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - Annotations: map[string]string{ - SafeToEvictLocalVolumesKey: "scratch-1,scratch-2,scratch-3", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch-1", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-2", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - { - Name: "scratch-3", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - terminalPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodSucceeded, - }, - } - - zeroGracePeriod := int64(0) - longTerminatingPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-2 * PodLongTerminatingExtraThreshold)}, - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &zeroGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - extendedGracePeriod := int64(6 * 60) // 6 minutes - longTerminatingPodWithExtendedGracePeriod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - DeletionTimestamp: &metav1.Time{Time: testTime.Add(-time.Duration(extendedGracePeriod/2) * time.Second)}, - OwnerReferences: GenerateOwnerReferences(rc.Name, "ReplicationController", "core/v1", ""), - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyOnFailure, - TerminationGracePeriodSeconds: &extendedGracePeriod, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodUnknown, - }, - } - - failedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyNever, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - evictedPod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - RestartPolicy: apiv1.RestartPolicyAlways, - }, - Status: apiv1.PodStatus{ - Phase: apiv1.PodFailed, - }, - } - - safePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - kubeSystemSafePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "kube-system", - Annotations: map[string]string{ - PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - }, - } - - emptydirSafePod := &apiv1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "bar", - Namespace: "default", - Annotations: map[string]string{ - PodSafeToEvictKey: "true", - }, - }, - Spec: apiv1.PodSpec{ - NodeName: "node", - Volumes: []apiv1.Volume{ - { - Name: "scratch", - VolumeSource: apiv1.VolumeSource{EmptyDir: &apiv1.EmptyDirVolumeSource{Medium: ""}}, - }, - }, - }, - } - - emptyPDB := &policyv1.PodDisruptionBudget{} - - kubeSystemPDB := &policyv1.PodDisruptionBudget{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: "kube-system", - }, - Spec: policyv1.PodDisruptionBudgetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{ - "k8s-app": "bar", - }, - }, - }, - } - - sharedTests := []testOpts{ - { - description: "RC-managed pod", - pods: []*apiv1.Pod{rcPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{rcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "DS-managed pod", - pods: []*apiv1.Pod{dsPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{dsPod}, - }, - { - description: "DS-managed pod by a custom Daemonset", - pods: []*apiv1.Pod{cdsPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{cdsPod}, - }, - { - description: "Job-managed pod", - pods: []*apiv1.Pod{jobPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{jobPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "SS-managed pod", - pods: []*apiv1.Pod{ssPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{ssPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "RS-managed pod", - pods: []*apiv1.Pod{rsPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - replicaSets: []*appsv1.ReplicaSet{&rs}, - expectPods: []*apiv1.Pod{rsPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "RS-managed pod that is being deleted", - pods: []*apiv1.Pod{rsPodDeleted}, - pdbs: []*policyv1.PodDisruptionBudget{}, - replicaSets: []*appsv1.ReplicaSet{&rs}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and SafeToEvictLocalVolumesKey annotation with matching values", - pods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{emptyDirSafeToEvictLocalVolumeMultiValAllMatching}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "failed pod", - pods: []*apiv1.Pod{failedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{failedPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "long terminating pod with 0 grace period", - pods: []*apiv1.Pod{longTerminatingPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "long terminating pod with extended grace period", - pods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{longTerminatingPodWithExtendedGracePeriod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "evicted pod", - pods: []*apiv1.Pod{evictedPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{evictedPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod in terminal state", - pods: []*apiv1.Pod{terminalPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{terminalPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with PodSafeToEvict annotation", - pods: []*apiv1.Pod{safePod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{safePod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "kube-system pod with PodSafeToEvict annotation", - pods: []*apiv1.Pod{kubeSystemSafePod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{kubeSystemSafePod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "pod with EmptyDir and PodSafeToEvict annotation", - pods: []*apiv1.Pod{emptydirSafePod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{emptydirSafePod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "empty PDB with RC-managed pod", - pods: []*apiv1.Pod{rcPod}, - pdbs: []*policyv1.PodDisruptionBudget{emptyPDB}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{rcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "kube-system PDB with matching kube-system pod", - pods: []*apiv1.Pod{kubeSystemRcPod}, - pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, - rcs: []*apiv1.ReplicationController{&kubeSystemRc}, - expectPods: []*apiv1.Pod{kubeSystemRcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - { - description: "kube-system PDB with default namespace pod", - pods: []*apiv1.Pod{rcPod}, - pdbs: []*policyv1.PodDisruptionBudget{kubeSystemPDB}, - rcs: []*apiv1.ReplicationController{&rc}, - expectPods: []*apiv1.Pod{rcPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }, - } - - allTests := []testOpts{} - // Note: be careful about modifying the underlying reference values for sharedTest - // since they are shared (changing it once will change it for all shallow copies of sharedTest) - for _, sharedTest := range sharedTests { - for _, skipNodesWithCustomControllerPods := range []bool{true, false} { - // Copy test to prevent side effects. - test := sharedTest - test.skipNodesWithCustomControllerPods = skipNodesWithCustomControllerPods - test.description = fmt.Sprintf("%s with skipNodesWithCustomControllerPods:%t", test.description, skipNodesWithCustomControllerPods) - allTests = append(allTests, test) - } - } - - allTests = append(allTests, testOpts{ - description: "Custom-controller-managed non-blocking pod", - pods: []*apiv1.Pod{customControllerPod}, - pdbs: []*policyv1.PodDisruptionBudget{}, - expectPods: []*apiv1.Pod{customControllerPod}, - expectDaemonSetPods: []*apiv1.Pod{}, - }) - - for _, test := range allTests { - t.Run(test.description, func(t *testing.T) { - pods, daemonSetPods := GetPodsForDeletionOnNodeDrain(test.pods, test.pdbs, true, true, test.skipNodesWithCustomControllerPods, testTime) - - if len(pods) != len(test.expectPods) { - t.Fatal("wrong pod list content") - } - - assert.ElementsMatch(t, test.expectDaemonSetPods, daemonSetPods) - }) - } -} - func TestIsPodLongTerminating(t *testing.T) { testTime := time.Date(2020, time.December, 18, 17, 0, 0, 0, time.UTC) twoMinGracePeriod := int64(2 * 60) From 9a7459c88fcd8ba6ea3e7069a3af722076720401 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Mon, 9 Oct 2023 16:48:55 +0000 Subject: [PATCH 8/9] Fix custom controller drainability rule and add test coverage --- .../rules/longterminating/rule.go | 2 +- .../rules/longterminating/rule_test.go | 4 +- .../rule.go | 4 +- .../rule_test.go | 85 ++++++++++++++++++- .../drainability/rules/replicated/rule.go | 4 +- .../rules/replicated/rule_test.go | 2 +- .../simulator/drainability/rules/rules.go | 6 +- 7 files changed, 94 insertions(+), 13 deletions(-) rename cluster-autoscaler/simulator/drainability/rules/{customcontroller => replicacount}/rule.go (98%) rename cluster-autoscaler/simulator/drainability/rules/{customcontroller => replicacount}/rule_test.go (70%) diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go index 8b7c6aa64ca3..e5c50cf659b6 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule.go @@ -33,7 +33,7 @@ func New() *Rule { // Drainable decides what to do with long terminating pods on node drain. func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) drainability.Status { if drain.IsPodLongTerminating(pod, drainCtx.Timestamp) { - return drainability.NewDrainableStatus() + return drainability.NewSkipStatus() } return drainability.NewUndefinedStatus() } diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go index e9660aa34f6e..be0d87a9da80 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go @@ -61,7 +61,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodUnknown, }, }, - want: drainability.NewDrainableStatus(), + want: drainability.NewSkipStatus(), }, "long terminating pod with extended grace period": { pod: &apiv1.Pod{ @@ -78,7 +78,7 @@ func TestDrainable(t *testing.T) { Phase: apiv1.PodUnknown, }, }, - want: drainability.NewDrainableStatus(), + want: drainability.NewSkipStatus(), }, } { t.Run(desc, func(t *testing.T) { diff --git a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go similarity index 98% rename from cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go rename to cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go index 4661f12dc588..0612e11ea654 100644 --- a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package customcontroller +package replicacount import ( "fmt" @@ -66,7 +66,7 @@ func (r *Rule) Drainable(drainCtx *drainability.DrainContext, pod *apiv1.Pod) dr return drainability.NewBlockedStatus(drain.MinReplicasReached, fmt.Errorf("replication controller for %s/%s has too few replicas spec: %d min: %d", pod.Namespace, pod.Name, rc.Spec.Replicas, r.minReplicaCount)) } } else if pod_util.IsDaemonSetPod(pod) { - if refKind == "DaemonSet" { + if refKind != "DaemonSet" { // We don't have a listener for the other DaemonSet kind. // TODO: Use a generic client for checking the reference. return drainability.NewUndefinedStatus() diff --git a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go similarity index 70% rename from cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go rename to cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go index 59d001878dfb..342a69738bb5 100644 --- a/cluster-autoscaler/simulator/drainability/rules/customcontroller/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicacount/rule_test.go @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -package customcontroller +package replicacount import ( "testing" @@ -104,6 +104,21 @@ func TestDrainable(t *testing.T) { }, rcs: []*apiv1.ReplicationController{&rc}, }, + "RC-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "ReplicationController", "core/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.ControllerNotFound, + wantError: true, + }, "DS-managed pod": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -116,6 +131,20 @@ func TestDrainable(t *testing.T) { }, }, }, + "DS-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "DaemonSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + wantReason: drain.ControllerNotFound, + wantError: true, + }, "DS-managed pod by a custom Daemonset": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -131,6 +160,21 @@ func TestDrainable(t *testing.T) { }, }, }, + "DS-managed pod by a custom Daemonset with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "CustomDaemonSet", "crd/v1", ""), + Annotations: map[string]string{ + "cluster-autoscaler.kubernetes.io/daemonset-pod": "true", + }, + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + }, "Job-managed pod": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -141,6 +185,18 @@ func TestDrainable(t *testing.T) { }, rcs: []*apiv1.ReplicationController{&rc}, }, + "Job-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "Job", "batch/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.ControllerNotFound, + wantError: true, + }, "SS-managed pod": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -151,6 +207,18 @@ func TestDrainable(t *testing.T) { }, rcs: []*apiv1.ReplicationController{&rc}, }, + "SS-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "StatefulSet", "apps/v1", ""), + }, + }, + rcs: []*apiv1.ReplicationController{&rc}, + wantReason: drain.ControllerNotFound, + wantError: true, + }, "RS-managed pod": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ @@ -178,6 +246,21 @@ func TestDrainable(t *testing.T) { }, rss: []*appsv1.ReplicaSet{&rs}, }, + "RS-managed pod with missing reference": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + OwnerReferences: test.GenerateOwnerReferences("missing", "ReplicaSet", "apps/v1", ""), + }, + Spec: apiv1.PodSpec{ + NodeName: "node", + }, + }, + rss: []*appsv1.ReplicaSet{&rs}, + wantReason: drain.ControllerNotFound, + wantError: true, + }, } { t.Run(desc, func(t *testing.T) { var err error diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go index cca20d11422d..8b652e18d3c1 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule.go @@ -27,14 +27,12 @@ import ( // Rule is a drainability rule on how to handle replicated pods. type Rule struct { skipNodesWithCustomControllerPods bool - minReplicaCount int } // New creates a new Rule. -func New(skipNodesWithCustomControllerPods bool, minReplicaCount int) *Rule { +func New(skipNodesWithCustomControllerPods bool) *Rule { return &Rule{ skipNodesWithCustomControllerPods: skipNodesWithCustomControllerPods, - minReplicaCount: minReplicaCount, } } diff --git a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go index cee50dec100d..ce2943eb42c7 100644 --- a/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/replicated/rule_test.go @@ -231,7 +231,7 @@ func TestDrainable(t *testing.T) { Listers: registry, Timestamp: testTime, } - status := New(test.skipNodesWithCustomControllerPods, 0).Drainable(drainCtx, test.pod) + status := New(test.skipNodesWithCustomControllerPods).Drainable(drainCtx, test.pod) assert.Equal(t, test.wantReason, status.BlockingReason) assert.Equal(t, test.wantError, status.Error != nil) }) diff --git a/cluster-autoscaler/simulator/drainability/rules/rules.go b/cluster-autoscaler/simulator/drainability/rules/rules.go index b02fb9aba0f0..facd618304b7 100644 --- a/cluster-autoscaler/simulator/drainability/rules/rules.go +++ b/cluster-autoscaler/simulator/drainability/rules/rules.go @@ -20,13 +20,13 @@ import ( apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/core/scaledown/pdb" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability" - "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/customcontroller" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/daemonset" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/localstorage" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/longterminating" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/mirror" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/notsafetoevict" pdbrule "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/pdb" + "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicacount" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/replicated" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/safetoevict" "k8s.io/autoscaler/cluster-autoscaler/simulator/drainability/rules/system" @@ -52,7 +52,7 @@ func Default(deleteOptions options.NodeDeleteOptions) Rules { }{ {rule: mirror.New()}, {rule: longterminating.New()}, - {rule: customcontroller.New(deleteOptions.MinReplicaCount), skip: !deleteOptions.SkipNodesWithCustomControllerPods}, + {rule: replicacount.New(deleteOptions.MinReplicaCount), skip: !deleteOptions.SkipNodesWithCustomControllerPods}, // Interrupting checks {rule: daemonset.New()}, @@ -60,7 +60,7 @@ func Default(deleteOptions options.NodeDeleteOptions) Rules { {rule: terminal.New()}, // Blocking checks - {rule: replicated.New(deleteOptions.SkipNodesWithCustomControllerPods, deleteOptions.MinReplicaCount)}, + {rule: replicated.New(deleteOptions.SkipNodesWithCustomControllerPods)}, {rule: system.New(), skip: !deleteOptions.SkipNodesWithSystemPods}, {rule: notsafetoevict.New()}, {rule: localstorage.New(), skip: !deleteOptions.SkipNodesWithLocalStorage}, From 33e300f4356f6b8251d42a77273e2d2cbe342179 Mon Sep 17 00:00:00 2001 From: Artem Minyaylov Date: Mon, 9 Oct 2023 20:17:55 +0000 Subject: [PATCH 9/9] Add unit test for long-terminating pod past grace period --- cluster-autoscaler/simulator/drain.go | 3 -- .../rules/longterminating/rule_test.go | 34 +++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/cluster-autoscaler/simulator/drain.go b/cluster-autoscaler/simulator/drain.go index 8a7ae28b9e33..1c97ca09827c 100644 --- a/cluster-autoscaler/simulator/drain.go +++ b/cluster-autoscaler/simulator/drain.go @@ -55,9 +55,6 @@ func GetPodsToMove(nodeInfo *schedulerframework.NodeInfo, deleteOptions options. status := drainabilityRules.Drainable(drainCtx, pod) switch status.Outcome { case drainability.UndefinedOutcome, drainability.DrainOk: - if drain.IsPodLongTerminating(pod, timestamp) { - continue - } if pod_util.IsDaemonSetPod(pod) { daemonSetPods = append(daemonSetPods, pod) } else { diff --git a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go index be0d87a9da80..713812bce85f 100644 --- a/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go +++ b/cluster-autoscaler/simulator/drainability/rules/longterminating/rule_test.go @@ -47,6 +47,23 @@ func TestDrainable(t *testing.T) { want: drainability.NewUndefinedStatus(), }, "long terminating pod with 0 grace period": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(drain.PodLongTerminatingExtraThreshold / 2)}, + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &zeroGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + }, + want: drainability.NewUndefinedStatus(), + }, + "expired long terminating pod with 0 grace period": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar", @@ -64,6 +81,23 @@ func TestDrainable(t *testing.T) { want: drainability.NewSkipStatus(), }, "long terminating pod with extended grace period": { + pod: &apiv1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "bar", + Namespace: "default", + DeletionTimestamp: &metav1.Time{Time: testTime.Add(time.Duration(extendedGracePeriod) / 2 * time.Second)}, + }, + Spec: apiv1.PodSpec{ + RestartPolicy: apiv1.RestartPolicyOnFailure, + TerminationGracePeriodSeconds: &extendedGracePeriod, + }, + Status: apiv1.PodStatus{ + Phase: apiv1.PodUnknown, + }, + }, + want: drainability.NewUndefinedStatus(), + }, + "expired long terminating pod with extended grace period": { pod: &apiv1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "bar",