Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Advanced deployment scale down old unhealthy pods firstly #150

Merged
merged 2 commits into from
Jul 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions pkg/controller/deployment/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,30 @@ func (dc *DeploymentController) scale(ctx context.Context, deployment *apps.Depl
// replica sets.
deploymentReplicasToAdd := allowedSize - allRSsReplicas

// Scale down the unhealthy replicas in old replica sets firstly to avoid some bad cases.
// For example:
// _______________________________________________________________________________________
// | ReplicaSet | oldRS-1 | oldRS-2 | newRS |
// | --------------| -------------------|----------------------|--------------------------|
// | Replicas | 4 healthy Pods | 2 unhealthy Pods | 4 unhealthy Pods |
// ---------------------------------------------------------------------------------------
// If we want to scale down these replica sets from 10 to 6, we expect to scale down the oldRS-2
// from 2 to 0 firstly, then scale down oldRS-1 1 Pod and newRS 1 Pod based on proportion.
//
// We do not scale down the newRS unhealthy Pods with higher priority, because these new revision
// Pods may be just created, not the one with the crash or other problems.
var err error
var cleanupCount int32
if deploymentReplicasToAdd < 0 {
oldRSs, cleanupCount, err = dc.cleanupUnhealthyReplicas(ctx, oldRSs, deployment, -deploymentReplicasToAdd)
if err != nil {
return err
}
klog.V(4).Infof("Cleaned up unhealthy replicas from old RSes by %d during scaling", cleanupCount)
deploymentReplicasToAdd += cleanupCount
allRSs = deploymentutil.FilterActiveReplicaSets(append(oldRSs, newRS))
}

// The additional replicas should be distributed proportionally amongst the active
// replica sets from the larger to the smaller in size replica set. Scaling direction
// drives what happens in case we are trying to scale replica sets of the same size.
Expand Down
21 changes: 19 additions & 2 deletions test/e2e/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ var _ = SIGDescribe("Advanced Deployment", func() {
return k8sClient.Get(context.TODO(), key, object)
}

UpdateDeployment := func(deployment *apps.Deployment, version string) *apps.Deployment {
UpdateDeployment := func(deployment *apps.Deployment, version string, images ...string) *apps.Deployment {
By(fmt.Sprintf("update deployment %v to version: %v", client.ObjectKeyFromObject(deployment), version))
var clone *apps.Deployment
Expect(retry.RetryOnConflict(defaultRetry, func() error {
Expand All @@ -53,7 +53,11 @@ var _ = SIGDescribe("Advanced Deployment", func() {
if err != nil {
return err
}
clone.Spec.Template.Spec.Containers[0].Image = deployment.Spec.Template.Spec.Containers[0].Image
if len(images) == 0 {
clone.Spec.Template.Spec.Containers[0].Image = deployment.Spec.Template.Spec.Containers[0].Image
} else {
clone.Spec.Template.Spec.Containers[0].Image = images[0]
}
clone.Spec.Template.Spec.Containers[0].Env[0].Value = version
strategy := unmarshal(clone.Annotations[rolloutsv1alpha1.DeploymentStrategyAnnotation])
strategy.Paused = true
Expand Down Expand Up @@ -324,6 +328,19 @@ var _ = SIGDescribe("Advanced Deployment", func() {
UpdatePartitionWithCheck(deployment, intstr.FromInt(3))
UpdatePartitionWithCheck(deployment, intstr.FromInt(5))
})

It("scale down old unhealthy first", func() {
deployment := &apps.Deployment{}
deployment.Namespace = namespace
Expect(ReadYamlToObject("./test_data/deployment/deployment.yaml", deployment)).ToNot(HaveOccurred())
deployment.Annotations["rollouts.kruise.io/deployment-strategy"] = `{"rollingUpdate":{"maxUnavailable":0,"maxSurge":1}}`
CreateObject(deployment)
UpdateDeployment(deployment, "version2", "busybox:not-exists")
UpdatePartitionWithoutCheck(deployment, intstr.FromInt(1))
CheckReplicas(deployment, 6, 5, 1)
UpdateDeployment(deployment, "version3", "busybox:1.32")
CheckReplicas(deployment, 5, 5, 0)
})
})
})

Expand Down