diff --git a/bzfs/bzfs.py b/bzfs/bzfs.py index aa2c547f..f1a8402a 100755 --- a/bzfs/bzfs.py +++ b/bzfs/bzfs.py @@ -2189,7 +2189,12 @@ def latest_common_snapshot(snapshots_with_guids: List[str], intersect_guids: Set cmd = p.split_args( f"{dst.sudo} {p.zfs_program} rollback -r {p.force_unmount} {p.force_hard}", latest_common_dst_snapshot ) - self.run_ssh_command(dst, log_debug, is_dry=p.dry_run, print_stdout=True, cmd=cmd) + try: + self.run_ssh_command(dst, log_debug, is_dry=p.dry_run, print_stdout=True, cmd=cmd) + except (subprocess.CalledProcessError, subprocess.TimeoutExpired, UnicodeDecodeError) as e: + no_sleep = self.clear_resumable_recv_state_if_necessary(dst_dataset, e.stderr) + # op isn't idempotent so retries regather current state from the start + raise RetryableError("Subprocess failed", no_sleep=no_sleep) from e if latest_src_snapshot and latest_src_snapshot == latest_common_src_snapshot: log.info(f"{tid} Already up-to-date: %s", dst_dataset) @@ -2568,6 +2573,16 @@ def clear_resumable_recv_state() -> bool: if all(marker in stderr for marker in markers): return clear_resumable_recv_state() + # Same cause as above, except that this error can occur during 'zfs rollback' + markers = [ + "cannot rollback to", + "clones of previous snapshots exist", + "use '-R' to force deletion of the following clones and dependents", + f"\n{dst_dataset}/%recv\n", + ] + if all(marker in stderr for marker in markers): + return clear_resumable_recv_state() + return False def _recv_resume_token(self, dst_dataset: str, retry_count: int) -> Tuple[Optional[str], List[str], List[str]]: