Skip to content

Commit

Permalink
fix(uninstall): deleting failed backups when uninstalling
Browse files Browse the repository at this point in the history
Deleting failed backup failed when uninstalling because failed
backups will not update the backup target name in the `Status`.

Clean up system backups when deleting the default backup target.
Add OwnerReferences with default backup target into system backups.

ref: longhorn/longhorn 10104

Signed-off-by: James Lu <james.lu@suse.com>
  • Loading branch information
mantissahz authored and mergify[bot] committed Jan 8, 2025
1 parent cd125e6 commit d17b4ee
Show file tree
Hide file tree
Showing 9 changed files with 159 additions and 30 deletions.
46 changes: 24 additions & 22 deletions controller/backup_target_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,14 +369,9 @@ func (btc *BackupTargetController) reconcile(name string) (err error) {
if !backupTarget.DeletionTimestamp.IsZero() {
stopTimer(backupTarget.Name)

if err := btc.cleanupBackupVolumes(backupTarget.Name); err != nil {
return errors.Wrap(err, "failed to clean up BackupVolumes")
}

if err := btc.cleanupBackupBackingImages(backupTarget.Name); err != nil {
return errors.Wrap(err, "failed to clean up BackupBackingImages")
if err := btc.cleanUpAllBackupRelatedResources(backupTarget.Name); err != nil {
return err
}

return btc.ds.RemoveFinalizerForBackupTarget(backupTarget)
}

Expand Down Expand Up @@ -437,20 +432,9 @@ func (btc *BackupTargetController) reconcile(name string) (err error) {
longhorn.BackupTargetConditionTypeUnavailable, longhorn.ConditionStatusTrue,
longhorn.BackupTargetConditionReasonUnavailable, "backup target URL is empty")

if err := btc.cleanupBackupVolumes(backupTarget.Name); err != nil {
return errors.Wrap(err, "failed to clean up BackupVolumes")
}

if err := btc.cleanupBackupBackingImages(backupTarget.Name); err != nil {
return errors.Wrap(err, "failed to clean up BackupBackingImages")
}

if backupTarget.Name == types.DefaultBackupTargetName {
if err := btc.cleanupSystemBackups(); err != nil {
return errors.Wrap(err, "failed to clean up SystemBackups")
}
if err := btc.cleanUpAllBackupRelatedResources(backupTarget.Name); err != nil {
return err
}

return nil
}

Expand Down Expand Up @@ -479,10 +463,27 @@ func (btc *BackupTargetController) reconcile(name string) (err error) {
}

if backupTarget.Name == types.DefaultBackupTargetName {
if err = btc.syncSystemBackup(info.backupStoreSystemBackups, log); err != nil {
if err = btc.syncSystemBackup(backupTarget, info.backupStoreSystemBackups, log); err != nil {
return err
}
}
return nil
}

func (btc *BackupTargetController) cleanUpAllBackupRelatedResources(backupTargetName string) error {
if err := btc.cleanupBackupVolumes(backupTargetName); err != nil {
return errors.Wrap(err, "failed to clean up BackupVolumes")
}

if err := btc.cleanupBackupBackingImages(backupTargetName); err != nil {
return errors.Wrap(err, "failed to clean up BackupBackingImages")
}

if backupTargetName == types.DefaultBackupTargetName {
if err := btc.cleanupSystemBackups(); err != nil {
return errors.Wrap(err, "failed to clean up SystemBackups")
}
}

return nil
}
Expand Down Expand Up @@ -700,7 +701,7 @@ func (btc *BackupTargetController) syncBackupBackingImage(backupTarget *longhorn
return nil
}

func (btc *BackupTargetController) syncSystemBackup(backupStoreSystemBackups systembackupstore.SystemBackups, log logrus.FieldLogger) error {
func (btc *BackupTargetController) syncSystemBackup(backupTarget *longhorn.BackupTarget, backupStoreSystemBackups systembackupstore.SystemBackups, log logrus.FieldLogger) error {
clusterSystemBackups, err := btc.ds.ListSystemBackups()
if err != nil {
return errors.Wrap(err, "failed to list SystemBackups")
Expand Down Expand Up @@ -734,6 +735,7 @@ func (btc *BackupTargetController) syncSystemBackup(backupStoreSystemBackups sys
// to get the config from the backup target.
types.GetVersionLabelKey(): longhornVersion,
},
OwnerReferences: datastore.GetOwnerReferencesForBackupTarget(backupTarget),
},
}
_, err = btc.ds.CreateSystemBackup(systemBackup)
Expand Down
29 changes: 25 additions & 4 deletions controller/system_backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ const (
SystemBackupErrGenerateYAML = "failed to generate resource YAMLs"
SystemBackupErrGetFmt = "failed to get %v"
SystemBackupErrGetConfig = "failed to get system backup config"
SystemBackupErrGetBackupTarget = "failed to get backup target"
SystemBackupErrMkdir = "failed to create system backup file directory"
SystemBackupErrRemoveAll = "failed to remove system backup directory"
SystemBackupErrRemove = "failed to remove system backup file"
Expand Down Expand Up @@ -268,13 +269,16 @@ func (c *SystemBackupController) syncSystemBackup(key string) (err error) {
}

backupTarget, err := c.ds.GetDefaultBackupTargetRO()
if err != nil {
if err != nil && !apierrors.IsNotFound(err) {
return err
}

backupTargetClient, err := newBackupTargetClientFromDefaultEngineImage(c.ds, backupTarget)
if err != nil {
return err
var backupTargetClient *engineapi.BackupTargetClient
if backupTarget != nil {
backupTargetClient, err = newBackupTargetClientFromDefaultEngineImage(c.ds, backupTarget)
if err != nil {
return err
}
}

return c.reconcile(name, backupTargetClient, backupTarget)
Expand Down Expand Up @@ -362,6 +366,15 @@ func (c *SystemBackupController) reconcile(name string, backupTargetClient engin
return
}

// If the system backup is in the final state, we don't need to do anything for the backup target not found.
if backupTarget == nil && !systemBackupInFinalState(systemBackup) {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeError, longhorn.SystemBackupStateError,
constant.EventReasonFailedStarting, SystemBackupErrGetBackupTarget,
)
return fmt.Errorf(string(SystemBackupErrGetBackupTarget)+": %v", types.DefaultBackupTargetName)
}

tempBackupArchivePath := filepath.Join(SystemBackupTempDir, systemBackup.Name+types.SystemBackupExtension)
tempBackupDir := filepath.Join(SystemBackupTempDir, systemBackup.Name)

Expand Down Expand Up @@ -472,6 +485,14 @@ func (c *SystemBackupController) reconcile(name string, backupTargetClient engin
return nil
}

func systemBackupInFinalState(sb *longhorn.SystemBackup) bool {
// The state SystemBackupStateDeleting, as a final state, will clean up the system backup files locally and remotely if necessary,
// and the system backup will be deleted once the finalizer is removed.
return sb.Status.State == longhorn.SystemBackupStateDeleting ||
sb.Status.State == longhorn.SystemBackupStateReady ||
sb.Status.State == longhorn.SystemBackupStateError
}

func getSystemBackupVersionExistInRemoteBackupTarget(systemBackup *longhorn.SystemBackup, backupTargetClient engineapi.SystemBackupOperationInterface) (string, error) {
systemBackupsInBackupstore, err := backupTargetClient.ListSystemBackup()
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion controller/system_backup_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,9 @@ func (s *TestSuite) TestReconcileSystemBackup(c *C) {
systemBackupController.UploadSystemBackup(systemBackup, archievePath, tempDir, backupTargetClient)

default:
err = systemBackupController.reconcile(tc.systemBackupName, backupTargetClient, nil)
defaultBackupTarget, err := lhClient.LonghornV1beta2().BackupTargets(TestNamespace).Get(context.TODO(), types.DefaultBackupTargetName, metav1.GetOptions{})
c.Assert(err, IsNil)
err = systemBackupController.reconcile(tc.systemBackupName, backupTargetClient, defaultBackupTarget)
if tc.expectError {
c.Assert(err, NotNil)
} else {
Expand Down
4 changes: 3 additions & 1 deletion controller/uninstall_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,13 +771,15 @@ func (c *UninstallController) deleteReplicas(replicas map[string]*longhorn.Repli
// deleteLeftBackups deletes the backup having no backup volume
func (c *UninstallController) deleteLeftBackups(backup *longhorn.Backup) (err error) {
volumeName, ok := backup.Labels[types.LonghornLabelBackupVolume]
if !ok {
if !ok || backup.Status.BackupTargetName == "" {
// directly delete it if there is even no backup volume label
// or backup status is not updated (backup state is not BackupStateCompleted)
if err = c.ds.DeleteBackup(backup.Name); err != nil {
if !apierrors.IsNotFound(err) {
return errors.Wrapf(err, "failed to delete backup %v", backup.Name)
}
}
return nil
}
_, err = c.ds.GetBackupVolumeByBackupTargetAndVolumeRO(backup.Status.BackupTargetName, volumeName)
if err != nil && apierrors.IsNotFound(err) {
Expand Down
2 changes: 1 addition & 1 deletion datastore/longhorn.go
Original file line number Diff line number Diff line change
Expand Up @@ -4485,7 +4485,7 @@ func (s *DataStore) GetBackupVolumeByBackupTargetAndVolumeRO(backupTargetName, v
return nil, err
}

if len(list) >= 2 {
if len(list) > 1 {
return nil, fmt.Errorf("datastore: found more than one backup volume with backup target %v and volume %v", backupTargetName, volumeName)
}

Expand Down
1 change: 1 addition & 0 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const (
LonghornKindRecurringJob = "RecurringJob"
LonghornKindSetting = "Setting"
LonghornKindSupportBundle = "SupportBundle"
LonghornKindSystemBackup = "SystemBackup"
LonghornKindSystemRestore = "SystemRestore"
LonghornKindOrphan = "Orphan"

Expand Down
42 changes: 42 additions & 0 deletions upgrade/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,26 @@ func ListAndUpdateVolumeAttachmentsInProvidedCache(namespace string, lhClient *l
return volumeAttachments, nil
}

// ListAndUpdateSystemBackupsInProvidedCache list all system backups and save them into the provided cached `resourceMap`. This method is not thread-safe.
func ListAndUpdateSystemBackupsInProvidedCache(namespace string, lhClient *lhclientset.Clientset, resourceMaps map[string]interface{}) (map[string]*longhorn.SystemBackup, error) {
if v, ok := resourceMaps[types.LonghornKindSystemBackup]; ok {
return v.(map[string]*longhorn.SystemBackup), nil
}

systemBackups := map[string]*longhorn.SystemBackup{}
systemBackupList, err := lhClient.LonghornV1beta2().SystemBackups(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
return nil, err
}
for i, sb := range systemBackupList.Items {
systemBackups[sb.Name] = &systemBackupList.Items[i]
}

resourceMaps[types.LonghornKindSystemBackup] = systemBackups

return systemBackups, nil
}

// CreateAndUpdateRecurringJobInProvidedCache creates a recurringJob and saves it into the provided cached `resourceMap`. This method is not thread-safe.
func CreateAndUpdateRecurringJobInProvidedCache(namespace string, lhClient *lhclientset.Clientset, resourceMaps map[string]interface{}, job *longhorn.RecurringJob) (*longhorn.RecurringJob, error) {
obj, err := lhClient.LonghornV1beta2().RecurringJobs(namespace).Create(context.TODO(), job, metav1.CreateOptions{})
Expand Down Expand Up @@ -834,6 +854,8 @@ func UpdateResources(namespace string, lhClient *lhclientset.Clientset, resource
err = updateSnapshots(namespace, lhClient, resourceMap.(map[string]*longhorn.Snapshot))
case types.LonghornKindOrphan:
err = updateOrphans(namespace, lhClient, resourceMap.(map[string]*longhorn.Orphan))
case types.LonghornKindSystemBackup:
err = updateSystemBackups(namespace, lhClient, resourceMap.(map[string]*longhorn.SystemBackup))
default:
return fmt.Errorf("resource kind %v is not able to updated", resourceKind)
}
Expand Down Expand Up @@ -1198,6 +1220,26 @@ func updateOrphans(namespace string, lhClient *lhclientset.Clientset, orphans ma
return nil
}

func updateSystemBackups(namespace string, lhClient *lhclientset.Clientset, systemBackups map[string]*longhorn.SystemBackup) error {
existingSystemBackupList, err := lhClient.LonghornV1beta2().SystemBackups(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
return err
}
for _, existingSystemBackup := range existingSystemBackupList.Items {
sb, ok := systemBackups[existingSystemBackup.Name]
if !ok {
continue
}
if !reflect.DeepEqual(existingSystemBackup.Spec, sb.Spec) ||
!reflect.DeepEqual(existingSystemBackup.ObjectMeta, sb.ObjectMeta) {
if _, err = lhClient.LonghornV1beta2().SystemBackups(namespace).Update(context.TODO(), sb, metav1.UpdateOptions{}); err != nil {
return err
}
}
}
return nil
}

func createOrUpdateVolumeAttachments(namespace string, lhClient *lhclientset.Clientset, volumeAttachments map[string]*longhorn.VolumeAttachment) error {
existingVolumeAttachmentList, err := lhClient.LonghornV1beta2().VolumeAttachments(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
Expand Down
32 changes: 32 additions & 0 deletions upgrade/v17xto180/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"

"github.com/longhorn/longhorn-manager/datastore"
"github.com/longhorn/longhorn-manager/types"

longhorn "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2"
Expand Down Expand Up @@ -44,6 +45,10 @@ func UpgradeResources(namespace string, lhClient *lhclientset.Clientset, kubeCli
return err
}

if err := upgradeSystemBackups(namespace, lhClient, resourceMaps); err != nil {
return err
}

return upgradeBackingImages(namespace, lhClient, resourceMaps)
}

Expand Down Expand Up @@ -227,6 +232,33 @@ func upgradeBackingImages(namespace string, lhClient *lhclientset.Clientset, res
return nil
}

func upgradeSystemBackups(namespace string, lhClient *lhclientset.Clientset, resourceMaps map[string]interface{}) (err error) {
defer func() {
err = errors.Wrapf(err, upgradeLogPrefix+"upgrade system backup failed")
}()

sbMap, err := upgradeutil.ListAndUpdateSystemBackupsInProvidedCache(namespace, lhClient, resourceMaps)
if err != nil {
if apierrors.IsNotFound(err) {
return nil
}
return errors.Wrapf(err, "failed to list all existing Longhorn system backups during the system backup upgrade")
}

defaultBackupTarget, err := lhClient.LonghornV1beta2().BackupTargets(namespace).Get(context.TODO(), types.DefaultBackupTargetName, metav1.GetOptions{})
if err != nil {
return errors.Wrapf(err, "failed to get default backup target")
}

for _, sb := range sbMap {
if sb.OwnerReferences == nil {
sb.OwnerReferences = datastore.GetOwnerReferencesForBackupTarget(defaultBackupTarget)
}
}

return nil
}

func UpgradeResourcesStatus(namespace string, lhClient *lhclientset.Clientset, kubeClient *clientset.Clientset, resourceMaps map[string]interface{}) error {
if err := upgradeBackingImageStatus(namespace, lhClient, resourceMaps); err != nil {
return err
Expand Down
29 changes: 28 additions & 1 deletion webhook/resources/systembackup/mutator.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package systembackup

import (
"encoding/json"
"fmt"

"github.com/pkg/errors"
Expand Down Expand Up @@ -41,7 +42,33 @@ func (m *systemBackupMutator) Resource() admission.Resource {
}

func (m *systemBackupMutator) Create(request *admission.Request, newObj runtime.Object) (admission.PatchOps, error) {
return mutate(newObj)
sb, ok := newObj.(*longhorn.SystemBackup)
if !ok {
return nil, werror.NewInvalidError(fmt.Sprintf("%v is not a *longhorn.SystemBackup", newObj), "")
}

var patchOps admission.PatchOps
var err error
if patchOps, err = mutate(newObj); err != nil {
return nil, err
}

if len(sb.OwnerReferences) == 0 {
// system backup is always created with the default backup target.
backupTarget, err := m.ds.GetDefaultBackupTargetRO()
if err != nil {
return nil, err
}
btRef := datastore.GetOwnerReferencesForBackupTarget(backupTarget)
bytes, err := json.Marshal(btRef)
if err != nil {
err = errors.Wrapf(err, "failed to get JSON encoding for system backup %v ownerReferences", sb.Name)
return nil, werror.NewInvalidError(err.Error(), "")
}
patchOps = append(patchOps, fmt.Sprintf(`{"op": "replace", "path": "/metadata/ownerReferences", "value": %v}`, string(bytes)))
}

return patchOps, nil
}

func (m *systemBackupMutator) Update(request *admission.Request, oldObj runtime.Object, newObj runtime.Object) (admission.PatchOps, error) {
Expand Down

0 comments on commit d17b4ee

Please sign in to comment.