From 13fe3d0344bfeb84701ea6c464abdcb1b2717979 Mon Sep 17 00:00:00 2001 From: Cyrill Troxler Date: Tue, 19 Mar 2024 17:18:33 +0100 Subject: [PATCH] fix: deletion priority to avoid deleting too many machines This introduces an additional deletePriority between betterDelete and mustDelete to avoid a race where multiple machines might be deleted at the same time. --- .../machineset/machineset_delete_policy.go | 13 ++++---- .../machineset_delete_policy_test.go | 30 +++++++++++++------ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/internal/controllers/machineset/machineset_delete_policy.go b/internal/controllers/machineset/machineset_delete_policy.go index 280ff7ea43e3..4e029659f27f 100644 --- a/internal/controllers/machineset/machineset_delete_policy.go +++ b/internal/controllers/machineset/machineset_delete_policy.go @@ -35,6 +35,7 @@ type ( const ( mustDelete deletePriority = 100.0 + shouldDelete deletePriority = 75.0 betterDelete deletePriority = 50.0 couldDelete deletePriority = 20.0 mustNotDelete deletePriority = 0.0 @@ -48,10 +49,10 @@ func oldestDeletePriority(machine *clusterv1.Machine) deletePriority { return mustDelete } if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { - return mustDelete + return shouldDelete } if !isMachineHealthy(machine) { - return mustDelete + return betterDelete } if machine.ObjectMeta.CreationTimestamp.Time.IsZero() { return mustNotDelete @@ -60,7 +61,7 @@ func oldestDeletePriority(machine *clusterv1.Machine) deletePriority { if d.Seconds() < 0 { return mustNotDelete } - return deletePriority(float64(mustDelete) * (1.0 - math.Exp(-d.Seconds()/secondsPerTenDays))) + return deletePriority(float64(betterDelete) * (1.0 - math.Exp(-d.Seconds()/secondsPerTenDays))) } func newestDeletePriority(machine *clusterv1.Machine) deletePriority { @@ -68,12 +69,12 @@ func newestDeletePriority(machine *clusterv1.Machine) deletePriority { return mustDelete } if _, ok := machine.ObjectMeta.Annotations[clusterv1.DeleteMachineAnnotation]; ok { - return mustDelete + return shouldDelete } if !isMachineHealthy(machine) { - return mustDelete + return betterDelete } - return mustDelete - oldestDeletePriority(machine) + return betterDelete - oldestDeletePriority(machine) } func randomDeletePolicy(machine *clusterv1.Machine) deletePriority { diff --git a/internal/controllers/machineset/machineset_delete_policy_test.go b/internal/controllers/machineset/machineset_delete_policy_test.go index 6764c8aa540b..ca78da6b5ece 100644 --- a/internal/controllers/machineset/machineset_delete_policy_test.go +++ b/internal/controllers/machineset/machineset_delete_policy_test.go @@ -34,7 +34,7 @@ func TestMachineToDelete(t *testing.T) { now := metav1.Now() nodeRef := &corev1.ObjectReference{Name: "some-node"} healthyMachine := &clusterv1.Machine{Status: clusterv1.MachineStatus{NodeRef: nodeRef}} - mustDeleteMachine := &clusterv1.Machine{ + deletingMachine := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } @@ -125,13 +125,13 @@ func TestMachineToDelete(t *testing.T) { diff: 2, machines: []*clusterv1.Machine{ healthyMachine, - mustDeleteMachine, + deletingMachine, betterDeleteMachine, - mustDeleteMachine, + deletingMachine, }, expect: []*clusterv1.Machine{ - mustDeleteMachine, - mustDeleteMachine, + deletingMachine, + deletingMachine, }, }, { @@ -139,12 +139,12 @@ func TestMachineToDelete(t *testing.T) { diff: 2, machines: []*clusterv1.Machine{ healthyMachine, - mustDeleteMachine, + deletingMachine, healthyMachine, betterDeleteMachine, }, expect: []*clusterv1.Machine{ - mustDeleteMachine, + deletingMachine, betterDeleteMachine, }, }, @@ -153,11 +153,11 @@ func TestMachineToDelete(t *testing.T) { diff: 2, machines: []*clusterv1.Machine{ healthyMachine, - mustDeleteMachine, + deletingMachine, healthyMachine, }, expect: []*clusterv1.Machine{ - mustDeleteMachine, + deletingMachine, healthyMachine, }, }, @@ -383,6 +383,10 @@ func TestMachineOldestDelete(t *testing.T) { empty := &clusterv1.Machine{ Status: clusterv1.MachineStatus{NodeRef: nodeRef}, } + mustDeleteMachine := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: ¤tTime}, + Status: clusterv1.MachineStatus{NodeRef: nodeRef}, + } newest := &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{CreationTimestamp: metav1.NewTime(currentTime.Time.AddDate(0, 0, -1))}, Status: clusterv1.MachineStatus{NodeRef: nodeRef}, @@ -513,6 +517,14 @@ func TestMachineOldestDelete(t *testing.T) { }, expect: []*clusterv1.Machine{nodeHealthyConditionUnknownMachine}, }, + { + desc: "func=oldestDeletePriority, diff=1 (deletionTimestamp)", + diff: 1, + machines: []*clusterv1.Machine{ + empty, secondNewest, oldest, secondOldest, newest, nodeHealthyConditionUnknownMachine, mustDeleteMachine, + }, + expect: []*clusterv1.Machine{mustDeleteMachine}, + }, } for _, test := range tests {