Skip to content

Commit

Permalink
Fix: data lost during engine upgrade/migration
Browse files Browse the repository at this point in the history
Do not cleanup replica folder if there are more than 1 replicas CR
for it even if all replica CRs are in active.

More details at longhorn/longhorn#7396 (comment)

longhorn-7396

Signed-off-by: Phan Le <phan.le@suse.com>
  • Loading branch information
PhanLe1010 committed Dec 21, 2023
1 parent 4e602f0 commit 80b7916
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
7 changes: 3 additions & 4 deletions controller/replica_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ func (rc *ReplicaController) syncReplica(key string) (err error) {
if replica.Spec.BackendStoreDriver == longhorn.BackendStoreDriverTypeV1 {
// Clean up the data directory if this is the active replica or if this inactive replica is the only one
// using it.
if (replica.Spec.Active || !hasMatchingActiveReplica(replica, rs)) && dataPath != "" {
if (replica.Spec.Active || !hasMatchingReplica(replica, rs)) && dataPath != "" {
// prevent accidentally deletion
if !strings.Contains(filepath.Base(filepath.Clean(dataPath)), "-") {
return fmt.Errorf("%v doesn't look like a replica data path", dataPath)
Expand Down Expand Up @@ -882,10 +882,9 @@ func (rc *ReplicaController) isResponsibleFor(r *longhorn.Replica) bool {
return isControllerResponsibleFor(rc.controllerID, rc.ds, r.Name, r.Spec.NodeID, r.Status.OwnerID)
}

func hasMatchingActiveReplica(replica *longhorn.Replica, replicas map[string]*longhorn.Replica) bool {
func hasMatchingReplica(replica *longhorn.Replica, replicas map[string]*longhorn.Replica) bool {
for _, r := range replicas {
if r.Spec.Active &&
r.Name != replica.Name &&
if r.Name != replica.Name &&
r.Spec.NodeID == replica.Spec.NodeID &&
r.Spec.DiskID == replica.Spec.DiskID &&
r.Spec.DiskPath == replica.Spec.DiskPath &&
Expand Down
6 changes: 3 additions & 3 deletions controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -2693,7 +2693,7 @@ func (c *VolumeController) upgradeEngineForVolume(v *longhorn.Volume, es map[str
if v.Status.State != longhorn.VolumeStateAttached || v.Status.Robustness != longhorn.VolumeRobustnessDegraded {
return nil
}
// Clean up inactive replica corresponding to non-existing active replica.
// Clean up inactive replica which doesn't have any matching replica.
// This will allow volume controller to schedule and rebuild a new active replica
// and make the volume healthy again for the live upgrade to be able to continue.
// https://github.com/longhorn/longhorn/issues/7012
Expand All @@ -2708,13 +2708,13 @@ func (c *VolumeController) upgradeEngineForVolume(v *longhorn.Volume, es map[str
var inactiveReplicaToCleanup *longhorn.Replica
for _, rName := range sortedReplicaNames {
r := rs[rName]
if !r.Spec.Active && !hasMatchingActiveReplica(r, rs) {
if !r.Spec.Active && !hasMatchingReplica(r, rs) {
inactiveReplicaToCleanup = r
break
}
}
if inactiveReplicaToCleanup != nil {
log.Infof("Cleaning up inactive replica %v which doesn't have corresponding active replica", inactiveReplicaToCleanup.Name)
log.Infof("Cleaning up inactive replica %v which doesn't have any matching replica", inactiveReplicaToCleanup.Name)
if err := c.deleteReplica(inactiveReplicaToCleanup, rs); err != nil {
return errors.Wrapf(err, "failed to cleanup inactive replica %v", inactiveReplicaToCleanup.Name)
}
Expand Down

0 comments on commit 80b7916

Please sign in to comment.