Skip to content

Commit

Permalink
rbd: issue resync only if the force flag is set
Browse files Browse the repository at this point in the history
During failover we do demote the volume on the primary
as the image is still not promoted yet on the remote cluster,
there are spurious split-brain errors reported by RBD,
the Cephcsi resync will attempt to resync from the "known"
secondary and that will cause data loss

Signed-off-by: Madhu Rajanna <madhupr007@gmail.com>
(cherry picked from commit 3acaa01)
  • Loading branch information
Madhu-1 authored and mergify[bot] committed Jun 24, 2022
1 parent e706679 commit 471c134
Showing 1 changed file with 27 additions and 11 deletions.
38 changes: 27 additions & 11 deletions internal/rbd/replicationcontrollerserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -855,18 +855,11 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
ready = checkRemoteSiteStatus(ctx, mirrorStatus)
}

if resyncRequired(localStatus) {
err = rbdVol.resyncImage()
if err != nil {
log.ErrorLog(ctx, err.Error())

return nil, status.Error(codes.Internal, err.Error())
}
err = resyncVolume(localStatus, rbdVol, req.Force)
if err != nil {
log.ErrorLog(ctx, err.Error())

// If we issued a resync, return a non-final error as image needs to be recreated
// locally. Caller retries till RBD syncs an initial version of the image to
// report its status in the resync request.
return nil, status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
return nil, err
}

err = checkVolumeResyncStatus(localStatus)
Expand All @@ -886,6 +879,29 @@ func (rs *ReplicationServer) ResyncVolume(ctx context.Context,
return resp, nil
}

func resyncVolume(localStatus librbd.SiteMirrorImageStatus, rbdVol *rbdVolume, force bool) error {
if resyncRequired(localStatus) {
// If the force option is not set return the error message to retry
// with Force option.
if !force {
return status.Errorf(codes.FailedPrecondition,
"image is in %q state, description (%s). Force resync to recover volume",
localStatus.State, localStatus.Description)
}
err := rbdVol.resyncImage()
if err != nil {
return status.Error(codes.Internal, err.Error())
}

// If we issued a resync, return a non-final error as image needs to be recreated
// locally. Caller retries till RBD syncs an initial version of the image to
// report its status in the resync request.
return status.Error(codes.Unavailable, "awaiting initial resync due to split brain")
}

return nil
}

func checkVolumeResyncStatus(localStatus librbd.SiteMirrorImageStatus) error {
// we are considering 2 states to check resync started and resync completed
// as below. all other states will be considered as an error state so that
Expand Down

0 comments on commit 471c134

Please sign in to comment.