From 6f8b1b2fc9304d71888247b93a8418cc37aeeb5a Mon Sep 17 00:00:00 2001 From: yunbo Date: Fri, 22 Nov 2024 17:09:24 +0800 Subject: [PATCH] support bluegreen release: restore minReadySeconds Signed-off-by: yunbo --- .../bluegreenstyle/cloneset/control.go | 20 +++++--------- .../bluegreenstyle/deployment/control.go | 27 +++++++------------ 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/pkg/controller/batchrelease/control/bluegreenstyle/cloneset/control.go b/pkg/controller/batchrelease/control/bluegreenstyle/cloneset/control.go index 2bc7cd19..1a34a8c4 100644 --- a/pkg/controller/batchrelease/control/bluegreenstyle/cloneset/control.go +++ b/pkg/controller/batchrelease/control/bluegreenstyle/cloneset/control.go @@ -148,11 +148,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { c := util.GetEmptyObjectWithKey(rc.object) setting := control.GetOriginalSetting(rc.object) patchData := patch.NewClonesetPatch() - // 1. why should we set minReadySeconds to 0 firstly? - // if we set minReadySeconds to original value directly which is not 0, - // it will takes a while to wait all pods updated and ready. However, by setting minReadySeconds to 0 and - // then set minReadySeconds to original value, we can save this wait. - // 2. why we need a simple MinReadySeconds-based status machine? (ie. the if-else block) + // why we need a simple MinReadySeconds-based status machine? (ie. the if-else block) // It's possible for Finalize to be called multiple times, if error returned is not nil. // if we do all needed operations in a single code block, like, A->B->C, when C need retry, // both A and B will be executed as well, however, operations like restoreHPA cost a lot(which calls LIST API) @@ -161,8 +157,8 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { if err := hpa.RestoreHPA(rc.client, rc.object); err != nil { return err } - // restore the original setting, however set minReadySeconds to 0 - patchData.UpdateMinReadySeconds(0) + // restore the original setting + patchData.UpdateMinReadySeconds(setting.MinReadySeconds) patchData.UpdateMaxSurge(setting.MaxSurge) patchData.UpdateMaxUnavailable(setting.MaxUnavailable) if err := rc.client.Patch(context.TODO(), c, patchData); err != nil { @@ -170,21 +166,19 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } // we should return an error to trigger re-enqueue, so that we can go to the next if-else branch in the next reconcile return errors.NewBenignError(fmt.Errorf("cloneset bluegreen: we should wait all pods updated and available")) - } else if rc.object.Spec.MinReadySeconds == 0 { - klog.InfoS("restore the MinReadySeconds from 0 to original value", "cloneset", klog.KObj(rc.object), "value", setting.MinReadySeconds) + } else { + klog.InfoS("Finalize: cloneset bluegreen release: wait all pods updated and ready", "cloneset", klog.KObj(rc.object)) // wait all pods updated and ready if rc.object.Status.ReadyReplicas != rc.object.Status.UpdatedReadyReplicas { return errors.NewBenignError(fmt.Errorf("cloneset %v finalize not done, readyReplicas %d != updatedReadyReplicas %d, current policy %s", klog.KObj(rc.object), rc.object.Status.ReadyReplicas, rc.object.Status.UpdatedReadyReplicas, release.Spec.ReleasePlan.FinalizingPolicy)) } - klog.InfoS("cloneset bluegreen release: all pods updated and ready") - // restore the original setting and delete the annotation - patchData.UpdateMinReadySeconds(setting.MinReadySeconds) + klog.InfoS("Finalize: cloneset bluegreen release: all pods updated and ready") + // restore annotation patchData.DeleteAnnotation(v1beta1.OriginalDeploymentStrategyAnnotation) patchData.DeleteAnnotation(util.BatchReleaseControlAnnotation) return rc.client.Patch(context.TODO(), c, patchData) } - return nil } func (rc *realController) finalized() bool { diff --git a/pkg/controller/batchrelease/control/bluegreenstyle/deployment/control.go b/pkg/controller/batchrelease/control/bluegreenstyle/deployment/control.go index 856d7491..5c20f90a 100644 --- a/pkg/controller/batchrelease/control/bluegreenstyle/deployment/control.go +++ b/pkg/controller/batchrelease/control/bluegreenstyle/deployment/control.go @@ -160,11 +160,7 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { d := util.GetEmptyObjectWithKey(rc.object) setting := control.GetOriginalSetting(rc.object) patchData := patch.NewDeploymentPatch() - // 1. why should we set minReadySeconds to 0 firstly? - // if we set minReadySeconds to original value directly which is not 0, - // it will takes a while to wait all pods updated and ready. However, by setting minReadySeconds to 0 and - // then set minReadySeconds to original value, we can save this wait. - // 2. why we need a simple MinReadySeconds-based status machine? (ie. the if-else block) + // why we need a simple MinReadySeconds-based status machine? (ie. the if-else block) // It's possible for Finalize to be called multiple times, if error returned is not nil. // if we do all needed operations in a single code block, like, A->B->C, when C need retry, // both A and B will be executed as well, however, operations like restoreHPA cost a lot(which calls LIST API) @@ -173,9 +169,9 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { if err := hpa.RestoreHPA(rc.client, rc.object); err != nil { return err } - // restore the original setting, however set minReadySeconds to 0 + // restore the original setting patchData.UpdatePaused(false) - patchData.UpdateMinReadySeconds(0) + patchData.UpdateMinReadySeconds(setting.MinReadySeconds) patchData.UpdateProgressDeadlineSeconds(setting.ProgressDeadlineSeconds) patchData.UpdateMaxSurge(setting.MaxSurge) patchData.UpdateMaxUnavailable(setting.MaxUnavailable) @@ -184,21 +180,19 @@ func (rc *realController) Finalize(release *v1beta1.BatchRelease) error { } // we should return an error to trigger re-enqueue, so that we can go to the next if-else branch in the next reconcile return errors.NewBenignError(fmt.Errorf("deployment bluegreen: we should wait all pods updated and available")) - } else if rc.object.Spec.MinReadySeconds == 0 { - klog.InfoS("restore the MinReadySeconds from 0 to original value", "deployment", klog.KObj(rc.object), "value", setting.MinReadySeconds) + } else { + klog.InfoS("Finalize: deployment bluegreen release: wait all pods updated and ready", "cloneset", klog.KObj(rc.object)) // wait all pods updated and ready if err := waitAllUpdatedAndReady(d.(*apps.Deployment)); err != nil { return errors.NewBenignError(err) } klog.InfoS("Finalize: deployment is ready to resume, restore the original setting", "deployment", klog.KObj(rc.object)) - // restore the original setting and delete the annotation - patchData.UpdateMinReadySeconds(setting.MinReadySeconds) + // restore label and annotation patchData.DeleteAnnotation(v1beta1.OriginalDeploymentStrategyAnnotation) patchData.DeleteLabel(v1alpha1.DeploymentStableRevisionLabel) patchData.DeleteAnnotation(util.BatchReleaseControlAnnotation) return rc.client.Patch(context.TODO(), d, patchData) } - return nil } func (rc *realController) finalized() bool { @@ -289,15 +283,14 @@ func waitAllUpdatedAndReady(deployment *apps.Deployment) error { return fmt.Errorf("deployment should not be paused") } - createdReplicas := deployment.Status.Replicas - updatedReplicas := deployment.Status.UpdatedReplicas - if createdReplicas != updatedReplicas { - return fmt.Errorf("all replicas should be upgraded") + // ALL pods updated AND ready + if deployment.Status.ReadyReplicas != deployment.Status.UpdatedReplicas { + return fmt.Errorf("all ready replicas should be updated, and all updated replicas should be ready") } availableReplicas := deployment.Status.AvailableReplicas allowedUnavailable := util.DeploymentMaxUnavailable(deployment) - if allowedUnavailable+availableReplicas < createdReplicas { + if allowedUnavailable+availableReplicas < deployment.Status.Replicas { return fmt.Errorf("ready replicas should satisfy maxUnavailable") } return nil