Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(system-backup): handle snapshot of last backup removed #3478

Merged
merged 1 commit into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions controller/system_backup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,167 +324,167 @@
record.message = message
}

func (c *SystemBackupController) reconcile(name string, backupTargetClient engineapi.SystemBackupOperationInterface, backupTarget *longhorn.BackupTarget) (err error) {
systemBackup, err := c.ds.GetSystemBackup(name)
if err != nil {
if !apierrors.IsNotFound(err) {
return err
}
return nil
}

if !c.isResponsibleFor(systemBackup) {
return nil
}

log := getLoggerForSystemBackup(c.logger, systemBackup)

if systemBackup.Status.OwnerID != c.controllerID {
systemBackup.Status.OwnerID = c.controllerID
systemBackup, err = c.ds.UpdateSystemBackupStatus(systemBackup)
if err != nil {
// we don't mind others coming first
if apierrors.IsConflict(errors.Cause(err)) {
return nil
}
return err
}

log.Infof("System backup got new owner %v", c.controllerID)
}

record := &systemBackupRecord{}
existingSystemBackup := systemBackup.DeepCopy()
defer c.handleStatusUpdate(record, systemBackup, existingSystemBackup, err, log)

if !systemBackup.DeletionTimestamp.IsZero() &&
systemBackup.Status.State != longhorn.SystemBackupStateDeleting {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeNormal, longhorn.SystemBackupStateDeleting,
constant.EventReasonDeleting, SystemBackupMsgDeletingRemote,
)
return
}

// If the system backup is in the final state, we don't need to do anything for the backup target not found.
if backupTarget == nil && !systemBackupInFinalState(systemBackup) {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeError, longhorn.SystemBackupStateError,
constant.EventReasonFailedStarting, SystemBackupErrGetBackupTarget,
)
return fmt.Errorf(string(SystemBackupErrGetBackupTarget)+": %v", types.DefaultBackupTargetName)
}

tempBackupArchivePath := filepath.Join(SystemBackupTempDir, systemBackup.Name+types.SystemBackupExtension)
tempBackupDir := filepath.Join(SystemBackupTempDir, systemBackup.Name)

switch systemBackup.Status.State {
case longhorn.SystemBackupStateSyncing:
err = syncSystemBackupFromBackupTarget(systemBackup, backupTargetClient)
if err != nil {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeError, longhorn.SystemBackupStateError,
longhorn.SystemBackupConditionReasonSync, SystemBackupErrSync,
)
return err
}

c.updateSystemBackupRecord(record,
systemBackupRecordTypeNormal, longhorn.SystemBackupStateReady,
constant.EventReasonSynced, SystemBackupMsgSyncedBackupTarget,
)

case longhorn.SystemBackupStateNone:
// Sync the ready SystemBackups that are not in the current cluster.
if isSystemBackupFromRemoteBackupTarget(systemBackup) {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeNormal, longhorn.SystemBackupStateSyncing,
constant.EventReasonSyncing, SystemBackupMsgSyncingBackupTarget,
)

return
}

var longhornVersion string
longhornVersion, err = getSystemBackupVersionExistInRemoteBackupTarget(systemBackup, backupTargetClient)
if err != nil {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeError, longhorn.SystemBackupStateError,
constant.EventReasonStart, err.Error(),
)
return nil
}

if longhornVersion != "" {
if err := datastore.LabelSystemBackupVersion(longhornVersion, systemBackup); err != nil {
return err
}

_, err = c.ds.UpdateSystemBackup(systemBackup)
if err != nil {
return err
}
return
}

err = c.InitSystemBackup(systemBackup, log)
if err != nil {
return err
}

c.updateSystemBackupRecord(record,
systemBackupRecordTypeNormal, longhorn.SystemBackupStateVolumeBackup,
constant.EventReasonStart, SystemBackupMsgStarting,
)

case longhorn.SystemBackupStateVolumeBackup:
backups, err := c.BackupVolumes(systemBackup)
if err != nil {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeError, longhorn.SystemBackupStateError,
constant.EventReasonStart, err.Error(),
)
return nil
}

// TODO: handle error check
go c.WaitForVolumeBackupToComplete(backups, systemBackup) // nolint: errcheck

case longhorn.SystemBackupStateBackingImageBackup:
backupBackingImages, err := c.BackupBackingImage()
if err != nil {
c.updateSystemBackupRecord(record,
systemBackupRecordTypeError, longhorn.SystemBackupStateError,
constant.EventReasonStart, err.Error(),
)
return nil
}

go c.WaitForBackingImageBackupToComplete(backupBackingImages, systemBackup)

case longhorn.SystemBackupStateGenerating:
go c.GenerateSystemBackup(systemBackup, tempBackupArchivePath, tempBackupDir)

case longhorn.SystemBackupStateUploading:
go c.UploadSystemBackup(systemBackup, tempBackupArchivePath, tempBackupDir, backupTargetClient)

case longhorn.SystemBackupStateReady, longhorn.SystemBackupStateError:
cleanupLocalSystemBackupFiles(tempBackupArchivePath, tempBackupDir, log)

case longhorn.SystemBackupStateDeleting:
cleanupRemoteSystemBackupFiles(systemBackup, backupTargetClient, backupTarget, log)

cleanupLocalSystemBackupFiles(tempBackupArchivePath, tempBackupDir, log)

err = c.ds.RemoveFinalizerForSystemBackup(systemBackup)
if err != nil {
return err
}
}

return nil
}

Check notice on line 487 in controller/system_backup_controller.go

View check run for this annotation

codefactor.io / CodeFactor

controller/system_backup_controller.go#L327-L487

Complex Method
func systemBackupInFinalState(sb *longhorn.SystemBackup) bool {
// The state SystemBackupStateDeleting, as a final state, will clean up the system backup files locally and remotely if necessary,
// and the system backup will be deleted once the finalizer is removed.
Expand Down Expand Up @@ -915,11 +915,19 @@
// Retrieve last backup and its snapshot.
lastBackup, err := c.ds.GetBackup(volume.Status.LastBackup)
if err != nil {
if apierrors.IsNotFound(err) {
log.Warnf("Last Backup %v not found for volume %v", volume.Status.LastBackup, volume.Name)
return false, nil
}
return false, err
}

lastBackupSnapshot, err := c.ds.GetSnapshot(lastBackup.Status.SnapshotName)
if err != nil {
if apierrors.IsNotFound(err) {
log.Warnf("Snapshot %v not found for backup %v", lastBackup.Status.SnapshotName, lastBackup.Name)
return false, nil
}
return false, err
}

Expand Down
31 changes: 31 additions & 0 deletions controller/system_backup_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
existPersistentVolumes map[SystemRolloutCRName]*corev1.PersistentVolume
existVolumes map[SystemRolloutCRName]*longhorn.Volume
existBackingImages map[SystemRolloutCRName]*longhorn.BackingImage
existBackups map[string]*longhorn.Backup

expectError bool
expectErrorConditionMessage string
Expand All @@ -54,274 +55,304 @@
expectNewVolumBackupCount int
}

func (s *TestSuite) TestReconcileSystemBackup(c *C) {
datastore.SkipListerCheck = true
datastore.SystemBackupTimeout = 10 * time.Second
datastore.VolumeBackupTimeout = 10 * time.Second

rolloutOwnerID := TestNode1

tempDirs := []string{}
defer func() {
for _, dir := range tempDirs {
err := os.RemoveAll(dir)
c.Assert(err, IsNil)
}
}()

testCases := map[string]SystemBackupTestCase{
"system backup create": {
state: longhorn.SystemBackupStateNone,
expectState: longhorn.SystemBackupStateVolumeBackup,
},
"system backup create list backup failed": {
systemBackupName: TestSystemBackupNameListFailed,
state: longhorn.SystemBackupStateNone,
expectState: longhorn.SystemBackupStateError,
},
"system backup create volume backup if-not-present": {
state: longhorn.SystemBackupStateVolumeBackup,
volumeBackupPolicy: longhorn.SystemBackupCreateVolumeBackupPolicyIfNotPresent,
existVolumes: map[SystemRolloutCRName]*longhorn.Volume{
SystemRolloutCRName(TestVolumeName): {
Status: longhorn.VolumeStatus{
LastBackup: "",
},
},
},
expectState: longhorn.SystemBackupStateBackingImageBackup,
expectNewVolumBackupCount: 1,
},
"system backup create volume backup if-not-present when backup exists": {
state: longhorn.SystemBackupStateVolumeBackup,
volumeBackupPolicy: longhorn.SystemBackupCreateVolumeBackupPolicyIfNotPresent,
existVolumes: map[SystemRolloutCRName]*longhorn.Volume{
SystemRolloutCRName(TestVolumeName): {
Status: longhorn.VolumeStatus{
LastBackup: "exists",
},
},
},
existBackups: map[string]*longhorn.Backup{
"exists": {
Status: longhorn.BackupStatus{
State: longhorn.BackupStateCompleted,
SnapshotName: "exists",
VolumeName: TestVolumeName,
},
},
},
expectState: longhorn.SystemBackupStateBackingImageBackup,
expectNewVolumBackupCount: 0,
},
"system backup create volume backup always": {
state: longhorn.SystemBackupStateVolumeBackup,
volumeBackupPolicy: longhorn.SystemBackupCreateVolumeBackupPolicyAlways,
existVolumes: map[SystemRolloutCRName]*longhorn.Volume{
SystemRolloutCRName(TestVolumeName): {
Status: longhorn.VolumeStatus{
LastBackup: "exists",
},
},
},
existBackups: map[string]*longhorn.Backup{
"exists": {
Status: longhorn.BackupStatus{
State: longhorn.BackupStateCompleted,
SnapshotName: "exists",
VolumeName: TestVolumeName,
},
},
},
expectState: longhorn.SystemBackupStateBackingImageBackup,
expectNewVolumBackupCount: 1,
},
"system backup create backingimage backup": {
state: longhorn.SystemBackupStateBackingImageBackup,
expectState: longhorn.SystemBackupStateGenerating,
},
"system backup generate": {
state: longhorn.SystemBackupStateGenerating,
expectState: longhorn.SystemBackupStateUploading,
},
"system backup upload": {
state: longhorn.SystemBackupStateUploading,
expectState: longhorn.SystemBackupStateReady,
},
"system backup upload file exceed timeout": {
systemBackupName: TestSystemBackupNameUploadExceedTimeout,
state: longhorn.SystemBackupStateUploading,
expectState: longhorn.SystemBackupStateReady,
},
"system backup upload file failed": {
systemBackupName: TestSystemBackupNameUploadFailed,
state: longhorn.SystemBackupStateUploading,
expectState: longhorn.SystemBackupStateError,
expectErrorConditionMessage: fmt.Sprintf("%v:", SystemBackupErrTimeoutUpload),
},
"system backup upload get config failed": {
systemBackupName: TestSystemBackupNameGetConfigFailed,
state: longhorn.SystemBackupStateUploading,
expectState: longhorn.SystemBackupStateError,
expectErrorConditionMessage: fmt.Sprintf("%v: %v:", SystemBackupErrTimeoutUpload, SystemBackupErrGetConfig),
},
"system backup ready": {
state: longhorn.SystemBackupStateReady,
},
"system backup error": {
state: longhorn.SystemBackupStateError,
},
"system backup with deletion timestamp": {
state: longhorn.SystemBackupStateReady,
isDeleting: true,
expectState: longhorn.SystemBackupStateDeleting,
},
"system backup delete": {
state: longhorn.SystemBackupStateDeleting,
isDeleting: true,
expectRemove: true,
},
"system backup from backup target": {
state: longhorn.SystemBackupStateNone,
expectState: longhorn.SystemBackupStateSyncing,
systemBackupVersion: TestSystemBackupLonghornVersion,
},
"system backup syncing": {
state: longhorn.SystemBackupStateSyncing,
expectState: longhorn.SystemBackupStateReady,
systemBackupVersion: TestSystemBackupLonghornVersion,
},
"system backup exist in remote": {
state: longhorn.SystemBackupStateNone,
expectState: longhorn.SystemBackupStateNone,
isExistInRemote: true,
},
"system backup PersistentVolume source not from CSI": {
state: longhorn.SystemBackupStateGenerating,
expectState: longhorn.SystemBackupStateUploading,

existPersistentVolumes: map[SystemRolloutCRName]*corev1.PersistentVolume{
SystemRolloutCRName(TestPVName): {
Spec: corev1.PersistentVolumeSpec{
ClaimRef: &corev1.ObjectReference{
Name: TestPVCName,
Namespace: TestNamespace,
},
StorageClassName: TestStorageClassName,
PersistentVolumeSource: corev1.PersistentVolumeSource{
HostPath: &corev1.HostPathVolumeSource{
Path: "/fake",
},
},
},
},
},
},
}

for name, tc := range testCases {
if tc.systemBackupName == "" {
tc.systemBackupName = TestSystemBackupName
}

if tc.expectState == "" {
tc.expectState = tc.state
}

if tc.controllerID == "" {
tc.controllerID = rolloutOwnerID
}

backupTargetClient := &FakeSystemBackupTargetClient{
name: tc.systemBackupName,
}
if tc.isExistInRemote {
backupTargetClient.version = TestSystemBackupLonghornVersion
}

fmt.Printf("testing %v\n", name)

kubeClient := fake.NewSimpleClientset()
lhClient := lhfake.NewSimpleClientset()
extensionsClient := apiextensionsfake.NewSimpleClientset()

informerFactories := util.NewInformerFactories(TestNamespace, kubeClient, lhClient, controller.NoResyncPeriodFunc())

fakeSystemRolloutNamespace(c, informerFactories.KubeInformerFactory, kubeClient)
fakeSystemRolloutSettingDefaultEngineImage(c, informerFactories.LhInformerFactory, lhClient)
fakeSystemRolloutBackupTargetDefault(c, informerFactories.LhInformerFactory, lhClient)
fakeSystemRolloutStorageClassesDefault(c, informerFactories.KubeInformerFactory, kubeClient)

fakeSystemRolloutVolumes(tc.existVolumes, c, informerFactories.LhInformerFactory, lhClient)
fakeSystemRolloutBackups(tc.existBackups, c, informerFactories.LhInformerFactory, lhClient)
fakeSystemRolloutBackingImages(tc.existBackingImages, c, informerFactories.LhInformerFactory, lhClient)
fakeSystemRolloutPersistentVolumes(tc.existPersistentVolumes, c, informerFactories.KubeInformerFactory, kubeClient)

systemBackupController, err := newFakeSystemBackupController(lhClient, kubeClient, extensionsClient, informerFactories, tc.controllerID)
c.Assert(err, IsNil)

systemBackup := fakeSystemBackup(tc.systemBackupName, rolloutOwnerID, tc.systemBackupVersion, tc.isDeleting, tc.volumeBackupPolicy, tc.state, c, informerFactories.LhInformerFactory, lhClient)
if tc.notExist {
systemBackup = fakeSystemBackup("none", rolloutOwnerID, tc.systemBackupVersion, tc.isDeleting, tc.volumeBackupPolicy, tc.state, c, informerFactories.LhInformerFactory, lhClient)
}

systemBackupTempDir, err := os.MkdirTemp(os.TempDir(), fmt.Sprintf("*-%v", TestSystemBackupName))
c.Assert(err, IsNil)
tempDirs = append(tempDirs, systemBackupTempDir)

archievePath := filepath.Join(systemBackupTempDir, tc.systemBackupName+".zip")
tempDir := filepath.Join(systemBackupTempDir, tc.systemBackupName)

switch systemBackup.Status.State {
case longhorn.SystemBackupStateVolumeBackup:
if tc.existBackups != nil {
existBackupSnap := &longhorn.Snapshot{
ObjectMeta: metav1.ObjectMeta{Name: "exists"},
Spec: longhorn.SnapshotSpec{Volume: TestVolumeName},
Status: longhorn.SnapshotStatus{
ReadyToUse: true,
CreationTime: metav1.Now().Time.Format(time.RFC3339),
},
}
fakeSystemRolloutSnapshot(existBackupSnap, c, informerFactories.LhInformerFactory, lhClient)
}
backups, _ := systemBackupController.BackupVolumes(systemBackup)

for _, backup := range backups {
backup.Status.State = longhorn.BackupStateCompleted
backup.Status.SnapshotName = backup.Spec.SnapshotName
snapshot, err := lhClient.LonghornV1beta2().Snapshots(TestNamespace).Get(context.TODO(), backup.Status.SnapshotName, metav1.GetOptions{})
c.Assert(err, IsNil)
tc.existVolumes[SystemRolloutCRName(snapshot.Spec.Volume)].Status.LastBackup = backup.Name
fakeSystemRolloutSnapshot(snapshot, c, informerFactories.LhInformerFactory, lhClient)
}
fakeSystemRolloutVolumes(tc.existVolumes, c, informerFactories.LhInformerFactory, lhClient)
fakeSystemRolloutBackups(backups, c, informerFactories.LhInformerFactory, lhClient)
err = systemBackupController.WaitForVolumeBackupToComplete(backups, systemBackup)
c.Assert(err, IsNil)

case longhorn.SystemBackupStateBackingImageBackup:
backupBackingImages, _ := systemBackupController.BackupBackingImage()
for _, backupBackingImage := range backupBackingImages {
backupBackingImage.Status.State = longhorn.BackupStateCompleted
}
fakeSystemRolloutBackupBackingImages(backupBackingImages, c, informerFactories.LhInformerFactory, lhClient)
systemBackupController.WaitForBackingImageBackupToComplete(backupBackingImages, systemBackup)

case longhorn.SystemBackupStateGenerating:
systemBackupController.GenerateSystemBackup(systemBackup, archievePath, tempDir)

case longhorn.SystemBackupStateUploading:
systemBackupController.UploadSystemBackup(systemBackup, archievePath, tempDir, backupTargetClient)

default:
defaultBackupTarget, err := lhClient.LonghornV1beta2().BackupTargets(TestNamespace).Get(context.TODO(), types.DefaultBackupTargetName, metav1.GetOptions{})
c.Assert(err, IsNil)
err = systemBackupController.reconcile(tc.systemBackupName, backupTargetClient, defaultBackupTarget)
if tc.expectError {
c.Assert(err, NotNil)
} else {
c.Assert(err, IsNil)
}
}

systemBackup, err = lhClient.LonghornV1beta2().SystemBackups(TestNamespace).Get(context.TODO(), tc.systemBackupName, metav1.GetOptions{})
if tc.notExist {
c.Assert(err, NotNil)
} else {
c.Assert(err, IsNil)
c.Assert(systemBackup.Status.State, Equals, tc.expectState)
}

checkFinalizer := !util.FinalizerExists(longhornFinalizerKey, systemBackup) == (tc.expectRemove || tc.notExist)
c.Assert(checkFinalizer, Equals, true)

if tc.expectErrorConditionMessage != "" {
errCondition := types.GetCondition(systemBackup.Status.Conditions, longhorn.SystemBackupConditionTypeError)
c.Assert(errCondition.Status, Equals, longhorn.ConditionStatusTrue)
c.Assert(strings.HasPrefix(errCondition.Message, tc.expectErrorConditionMessage), Equals, true)
}

if tc.isExistInRemote {
c.Assert(systemBackup.Labels[types.GetVersionLabelKey()], NotNil)
}

volumeBackups, err := lhClient.LonghornV1beta2().Backups(TestNamespace).List(context.TODO(), metav1.ListOptions{})
c.Assert(err, IsNil)
c.Assert(len(volumeBackups.Items), Equals, tc.expectNewVolumBackupCount)
}
}

Check notice on line 355 in controller/system_backup_controller_test.go

View check run for this annotation

codefactor.io / CodeFactor

controller/system_backup_controller_test.go#L58-L355

Complex Method
func newFakeSystemBackupController(lhClient *lhfake.Clientset, kubeClient *fake.Clientset, extensionsClient *apiextensionsfake.Clientset,
informerFactories *util.InformerFactories, controllerID string) (*SystemBackupController, error) {
ds := datastore.NewDataStore(TestNamespace, lhClient, kubeClient, extensionsClient, informerFactories)
Expand Down
Loading