diff --git a/internal/controllers/topology/cluster/reconcile_state.go b/internal/controllers/topology/cluster/reconcile_state.go index eafbe18a9054..478646d30f63 100644 --- a/internal/controllers/topology/cluster/reconcile_state.go +++ b/internal/controllers/topology/cluster/reconcile_state.go @@ -46,6 +46,7 @@ import ( "sigs.k8s.io/cluster-api/internal/hooks" tlog "sigs.k8s.io/cluster-api/internal/log" "sigs.k8s.io/cluster-api/internal/topology/check" + "sigs.k8s.io/cluster-api/util" ) const ( @@ -89,7 +90,7 @@ func (r *Reconciler) reconcileState(ctx context.Context, s *scope.Scope) error { return errControlPlane } - // In this case (reconcileInfrastructureCluster passed reporting creation of the infrastructure cluster object, reconcileControlPlane - which is expected to create the control plane object - failed), + // In this case (reconcileInfrastructureCluster reported creation of the infrastructure cluster object, reconcileControlPlane - which is expected to create the control plane object - failed), // if the creation of the control plane actually did not happen, blank out ControlPlaneRef from desired cluster. if s.Current.Cluster.Spec.ControlPlaneRef == nil && !createdControlPlane { s.Desired.Cluster.Spec.ControlPlaneRef = nil @@ -139,18 +140,18 @@ func (r *Reconciler) reconcileClusterShim(ctx context.Context, s *scope.Scope) e shim.APIVersion = corev1.SchemeGroupVersion.String() // Add the shim as a temporary owner for the InfrastructureCluster. - ownerRefs := s.Desired.InfrastructureCluster.GetOwnerReferences() - ownerRefs = append(ownerRefs, *ownerReferenceTo(shim, corev1.SchemeGroupVersion.WithKind("Secret"))) - if !hasOwnerReferenceFrom(s.Desired.InfrastructureCluster, shim) { - s.Desired.InfrastructureCluster.SetOwnerReferences(ownerRefs) - } + s.Desired.InfrastructureCluster.SetOwnerReferences( + util.EnsureOwnerRef(s.Desired.InfrastructureCluster.GetOwnerReferences(), + *ownerReferenceTo(shim, corev1.SchemeGroupVersion.WithKind("Secret")), + ), + ) // Add the shim as a temporary owner for the ControlPlane. - ownerRefs = s.Desired.ControlPlane.Object.GetOwnerReferences() - ownerRefs = append(ownerRefs, *ownerReferenceTo(shim, corev1.SchemeGroupVersion.WithKind("Secret"))) - if !hasOwnerReferenceFrom(s.Desired.ControlPlane.Object, shim) { - s.Desired.ControlPlane.Object.SetOwnerReferences(ownerRefs) - } + s.Desired.ControlPlane.Object.SetOwnerReferences( + util.EnsureOwnerRef(s.Desired.ControlPlane.Object.GetOwnerReferences(), + *ownerReferenceTo(shim, corev1.SchemeGroupVersion.WithKind("Secret")), + ), + ) } // If the InfrastructureCluster and the ControlPlane objects have been already created @@ -363,7 +364,7 @@ func (r *Reconciler) reconcileControlPlane(ctx context.Context, s *scope.Scope) infrastructureMachineCleanupFunc = func() { // Best effort cleanup of the InfrastructureMachineTemplate; // If this fails, the object will be garbage collected when the cluster is deleted. - if err = r.Client.Delete(ctx, s.Desired.ControlPlane.InfrastructureMachineTemplate); err != nil { + if err := r.Client.Delete(ctx, s.Desired.ControlPlane.InfrastructureMachineTemplate); err != nil { log := tlog.LoggerFrom(ctx). WithValues(s.Desired.ControlPlane.InfrastructureMachineTemplate.GetObjectKind().GroupVersionKind().Kind, s.Desired.ControlPlane.InfrastructureMachineTemplate.GetName()). WithValues("err", err.Error()) @@ -616,7 +617,7 @@ func (r *Reconciler) createMachineDeployment(ctx context.Context, s *scope.Scope infrastructureMachineCleanupFunc = func() { // Best effort cleanup of the InfrastructureMachineTemplate; // If this fails, the object will be garbage collected when the cluster is deleted. - if err = r.Client.Delete(ctx, md.InfrastructureMachineTemplate); err != nil { + if err := r.Client.Delete(ctx, md.InfrastructureMachineTemplate); err != nil { log := tlog.LoggerFrom(ctx). WithValues(md.InfrastructureMachineTemplate.GetObjectKind().GroupVersionKind().Kind, md.InfrastructureMachineTemplate.GetName()). WithValues("err", err.Error()) @@ -641,7 +642,7 @@ func (r *Reconciler) createMachineDeployment(ctx context.Context, s *scope.Scope bootstrapCleanupFunc = func() { // Best effort cleanup of the BootstrapTemplate; // If this fails, the object will be garbage collected when the cluster is deleted. - if err = r.Client.Delete(ctx, md.BootstrapTemplate); err != nil { + if err := r.Client.Delete(ctx, md.BootstrapTemplate); err != nil { log := tlog.LoggerFrom(ctx). WithValues(md.BootstrapTemplate.GetObjectKind().GroupVersionKind().Kind, md.BootstrapTemplate.GetName()). WithValues("err", err.Error()) @@ -715,33 +716,68 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope cluster := s.Current.Cluster infraCtx, _ := log.WithObject(desiredMD.InfrastructureMachineTemplate).Into(ctx) - if _, err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ + infrastructureMachineCleanupFunc := func() {} + createdInfra, err := r.reconcileReferencedTemplate(infraCtx, reconcileReferencedTemplateInput{ cluster: cluster, ref: &desiredMD.Object.Spec.Template.Spec.InfrastructureRef, current: currentMD.InfrastructureMachineTemplate, desired: desiredMD.InfrastructureMachineTemplate, templateNamePrefix: infrastructureMachineTemplateNamePrefix(cluster.Name, mdTopologyName), compatibilityChecker: check.ObjectsAreCompatible, - }); err != nil { + }) + if err != nil { return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) } + if createdInfra { + infrastructureMachineCleanupFunc = func() { + // Best effort cleanup of the InfrastructureMachineTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, desiredMD.InfrastructureMachineTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(desiredMD.InfrastructureMachineTemplate.GetObjectKind().GroupVersionKind().Kind, desiredMD.InfrastructureMachineTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup InfrastructureMachineTemplate for MachineDeployment while handling update error. The object will be garbage collected when the cluster is deleted.") + } + } + } + bootstrapCtx, _ := log.WithObject(desiredMD.BootstrapTemplate).Into(ctx) - if _, err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ + bootstrapCleanupFunc := func() {} + createdBootstrap, err := r.reconcileReferencedTemplate(bootstrapCtx, reconcileReferencedTemplateInput{ cluster: cluster, ref: desiredMD.Object.Spec.Template.Spec.Bootstrap.ConfigRef, current: currentMD.BootstrapTemplate, desired: desiredMD.BootstrapTemplate, templateNamePrefix: bootstrapTemplateNamePrefix(cluster.Name, mdTopologyName), compatibilityChecker: check.ObjectsAreInTheSameNamespace, - }); err != nil { + }) + if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate (only on template rotation). + infrastructureMachineCleanupFunc() return errors.Wrapf(err, "failed to reconcile %s", tlog.KObj{Obj: currentMD.Object}) } + if createdBootstrap { + bootstrapCleanupFunc = func() { + // Best effort cleanup of the BootstrapTemplate; + // If this fails, the object will be garbage collected when the cluster is deleted. + if err := r.Client.Delete(ctx, desiredMD.BootstrapTemplate); err != nil { + log := tlog.LoggerFrom(ctx). + WithValues(desiredMD.BootstrapTemplate.GetObjectKind().GroupVersionKind().Kind, desiredMD.BootstrapTemplate.GetName()). + WithValues("err", err.Error()) + log.Infof("WARNING! Failed to cleanup BootstrapTemplate for MachineDeployment while handling upgrade error. The object will be garbage collected when the cluster is deleted.") + } + } + } + // Check differences between current and desired MachineDeployment, and eventually patch the current object. log = log.WithObject(desiredMD.Object) patchHelper, err := r.patchHelperFactory(ctx, currentMD.Object, desiredMD.Object) if err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate & BootstrapTemplate (only on template rotation). + infrastructureMachineCleanupFunc() + bootstrapCleanupFunc() return errors.Wrapf(err, "failed to create patch helper for %s", tlog.KObj{Obj: currentMD.Object}) } if !patchHelper.HasChanges() { @@ -751,6 +787,9 @@ func (r *Reconciler) updateMachineDeployment(ctx context.Context, s *scope.Scope log.Infof("Patching %s", tlog.KObj{Obj: currentMD.Object}) if err := patchHelper.Patch(ctx); err != nil { + // Best effort cleanup of the InfrastructureMachineTemplate & BootstrapTemplate (only on creation). + infrastructureMachineCleanupFunc() + bootstrapCleanupFunc() return errors.Wrapf(err, "failed to patch %s", tlog.KObj{Obj: currentMD.Object}) } r.recorder.Eventf(cluster, corev1.EventTypeNormal, updateEventReason, "Updated %q%s", tlog.KObj{Obj: currentMD.Object}, logMachineDeploymentVersionChange(currentMD.Object, desiredMD.Object)) @@ -899,7 +938,7 @@ func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp * log := tlog.LoggerFrom(ctx).WithMachinePool(mp.Object) cluster := s.Current.Cluster infraCtx, _ := log.WithObject(mp.InfrastructureMachinePoolObject).Into(ctx) - infrastructureMachineMachinePool := func() {} + infrastructureMachineMachinePoolCleanupFunc := func() {} createdInfrastructureMachinePool, err := r.reconcileReferencedObject(infraCtx, reconcileReferencedObjectInput{ cluster: cluster, desired: mp.InfrastructureMachinePoolObject, @@ -909,16 +948,15 @@ func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp * } if createdInfrastructureMachinePool { - infrastructureMachineMachinePool = func() { + infrastructureMachineMachinePoolCleanupFunc = func() { // Best effort cleanup of the InfrastructureMachinePool; // If this fails, the object will be garbage collected when the cluster is deleted. - if err = r.Client.Delete(ctx, mp.InfrastructureMachinePoolObject); err != nil { + if err := r.Client.Delete(ctx, mp.InfrastructureMachinePoolObject); err != nil { log := tlog.LoggerFrom(ctx). WithValues(mp.InfrastructureMachinePoolObject.GetObjectKind().GroupVersionKind().Kind, mp.InfrastructureMachinePoolObject.GetName()). WithValues("err", err.Error()) log.Infof("WARNING! Failed to cleanup InfrastructureMachinePoolObject for MachinePool while handling creation error. The object will be garbage collected when the cluster is deleted.") } - } } @@ -930,7 +968,7 @@ func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp * }) if err != nil { // Best effort cleanup of the InfrastructureMachinePool (only on creation). - infrastructureMachineMachinePool() + infrastructureMachineMachinePoolCleanupFunc() return errors.Wrapf(err, "failed to create %s", mp.Object.Kind) } @@ -938,7 +976,7 @@ func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp * bootstrapCleanupFunc = func() { // Best effort cleanup of the BootstrapConfig; // If this fails, the object will be garbage collected when the cluster is deleted. - if err = r.Client.Delete(ctx, mp.BootstrapObject); err != nil { + if err := r.Client.Delete(ctx, mp.BootstrapObject); err != nil { log := tlog.LoggerFrom(ctx). WithValues(mp.BootstrapObject.GetObjectKind().GroupVersionKind().Kind, mp.BootstrapObject.GetName()). WithValues("err", err.Error()) @@ -952,13 +990,13 @@ func (r *Reconciler) createMachinePool(ctx context.Context, s *scope.Scope, mp * helper, err := r.patchHelperFactory(ctx, nil, mp.Object) if err != nil { // Best effort cleanup of the InfrastructureMachinePool & BootstrapConfig (only on creation). - infrastructureMachineMachinePool() + infrastructureMachineMachinePoolCleanupFunc() bootstrapCleanupFunc() return createErrorWithoutObjectName(ctx, err, mp.Object) } if err := helper.Patch(ctx); err != nil { // Best effort cleanup of the InfrastructureMachinePool & BootstrapConfig (only on creation). - infrastructureMachineMachinePool() + infrastructureMachineMachinePoolCleanupFunc() bootstrapCleanupFunc() return createErrorWithoutObjectName(ctx, err, mp.Object) } @@ -1287,7 +1325,7 @@ func (r *Reconciler) reconcileReferencedTemplate(ctx context.Context, in reconci // TODO: find a way to make side effect more explicit in.ref.Name = newName - return false, nil + return true, nil } // createErrorWithoutObjectName removes the name of the object from the error message. As each new Create call involves an diff --git a/internal/controllers/topology/cluster/reconcile_state_test.go b/internal/controllers/topology/cluster/reconcile_state_test.go index 70e2c1635933..5ba2585a00ca 100644 --- a/internal/controllers/topology/cluster/reconcile_state_test.go +++ b/internal/controllers/topology/cluster/reconcile_state_test.go @@ -2261,13 +2261,13 @@ func TestReconcileMachineDeployments(t *testing.T) { } func TestReconcileMachineDeploymentsCleanup(t *testing.T) { - infrastructureMachineTemplate1 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-1").Build() - bootstrapTemplate1 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-1").Build() - md1 := newFakeMachineDeploymentTopologyState("md-1", infrastructureMachineTemplate1, bootstrapTemplate1, nil) - - t.Run("cleanup InfrastructureMachineTemplate and BootstrapTemplate in case of errors", func(t *testing.T) { + t.Run("cleanup InfrastructureMachineTemplate and BootstrapTemplate in case of errors on creation", func(t *testing.T) { g := NewWithT(t) + infrastructureMachineTemplate1 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-1").Build() + bootstrapTemplate1 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-1").Build() + md1 := newFakeMachineDeploymentTopologyState("md-1", infrastructureMachineTemplate1, bootstrapTemplate1, nil) + // Create namespace and modify input to have correct namespace set namespace, err := env.CreateNamespace(ctx, "reconcile-machine-deployments") g.Expect(err).ToNot(HaveOccurred()) @@ -2315,6 +2315,71 @@ func TestReconcileMachineDeploymentsCleanup(t *testing.T) { Name: gotInfrastructureMachineTemplateRef.Name, }, &gotInfrastructureMachineTemplate) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + }) + t.Run("cleanup InfrastructureMachineTemplate and BootstrapTemplate in case of errors on upgrade", func(t *testing.T) { + g := NewWithT(t) + + infrastructureMachineTemplate2 := builder.TestInfrastructureMachineTemplate(metav1.NamespaceDefault, "infrastructure-machine-2").Build() + bootstrapTemplate2 := builder.TestBootstrapTemplate(metav1.NamespaceDefault, "bootstrap-config-2").Build() + md2 := newFakeMachineDeploymentTopologyState("md-2", infrastructureMachineTemplate2, bootstrapTemplate2, nil) + + bootstrapTemplate2WithChanges := bootstrapTemplate2.DeepCopy() + g.Expect(unstructured.SetNestedField(bootstrapTemplate2WithChanges.Object, "foo", "spec", "template", "spec", "foo")).To(Succeed()) + infrastructureMachineTemplate2WithChanges := infrastructureMachineTemplate2.DeepCopy() + g.Expect(unstructured.SetNestedField(infrastructureMachineTemplate2WithChanges.Object, "foo", "spec", "template", "spec", "foo")).To(Succeed()) + md2WithTemplateChanges := newFakeMachineDeploymentTopologyState(md2.Object.Name, infrastructureMachineTemplate2WithChanges, bootstrapTemplate2WithChanges, nil) + + // Create namespace and modify input to have correct namespace set + namespace, err := env.CreateNamespace(ctx, "reconcile-machine-deployments") + g.Expect(err).ToNot(HaveOccurred()) + md2 = prepareMachineDeploymentState(md2, namespace.GetName()) + md2WithTemplateChanges = prepareMachineDeploymentState(md2WithTemplateChanges, namespace.GetName()) + + s := scope.New(builder.Cluster(namespace.GetName(), "cluster-1").Build()) + s.Current.MachineDeployments = map[string]*scope.MachineDeploymentState{ + md2.Object.Name: md2, + } + s.Desired = &scope.ClusterState{ + MachineDeployments: map[string]*scope.MachineDeploymentState{ + md2WithTemplateChanges.Object.Name: md2WithTemplateChanges, + }, + } + + // Force md upgrade to fail + s.Desired.MachineDeployments[md2WithTemplateChanges.Object.Name].Object.Namespace = "do-not-exist" + + r := Reconciler{ + Client: env.GetClient(), + APIReader: env.GetAPIReader(), + patchHelperFactory: serverSideApplyPatchHelperFactory(env, ssa.NewCache()), + recorder: env.GetEventRecorderFor("test"), + } + err = r.reconcileMachineDeployments(ctx, s) + g.Expect(err).To(HaveOccurred()) + + newBootstrapTemplateRef := md2WithTemplateChanges.Object.Spec.Template.Spec.Bootstrap.ConfigRef + newBootstrapTemplate := unstructured.Unstructured{} + newBootstrapTemplate.SetKind(newBootstrapTemplateRef.Kind) + newBootstrapTemplate.SetAPIVersion(newBootstrapTemplateRef.APIVersion) + + err = env.GetAPIReader().Get(ctx, client.ObjectKey{ + Namespace: newBootstrapTemplateRef.Namespace, + Name: newBootstrapTemplateRef.Name, + }, &newBootstrapTemplate) + + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + newInfrastructureMachineTemplateRef := md2WithTemplateChanges.Object.Spec.Template.Spec.InfrastructureRef + newInfrastructureMachineTemplate := unstructured.Unstructured{} + newInfrastructureMachineTemplate.SetKind(newInfrastructureMachineTemplateRef.Kind) + newInfrastructureMachineTemplate.SetAPIVersion(newInfrastructureMachineTemplateRef.APIVersion) + + err = env.GetAPIReader().Get(ctx, client.ObjectKey{ + Namespace: newInfrastructureMachineTemplateRef.Namespace, + Name: newInfrastructureMachineTemplateRef.Name, + }, &newInfrastructureMachineTemplate) + g.Expect(apierrors.IsNotFound(err)).To(BeTrue()) }) }