diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index d8d18693efcf6..3e81444ea3d0b 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -164,6 +164,10 @@ README*                                         @ceph/doc-writers
 /src/cls/rgw_gc                                 @ceph/rgw
 /src/cls/user                                   @ceph/rgw
 /src/cls/version                                @ceph/rgw
+/src/mrgw.sh                                    @ceph/rgw
+/src/mrun                                       @ceph/rgw
+/src/mstart.sh                                  @ceph/rgw
+/src/mstop.sh                                   @ceph/rgw
 /src/rgw                                        @ceph/rgw
 /src/s3select                                   @ceph/rgw
 /src/spawn                                      @ceph/rgw
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 1b50ff7c5a391..cc32be3850126 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -263,6 +263,19 @@ rbd:
   - systemd/rbdmap.service.in
   - udev/50-rbd.rules
 
+nvmeof:
+  - qa/suites/nvmeof/**
+  - qa/tasks/nvmeof.py
+  - qa/workunits/nvmeof/**
+  - src/ceph_nvmeof_monitor_client.cc
+  - src/cephadm/cephadmlib/daemons/nvmeof.py
+  - src/messages/MNVMeofGw*
+  - src/mon/NVMeofGw*
+  - src/nvmeof/**
+  - src/pybind/mgr/cephadm/services/nvmeof.py
+  - src/pybind/mgr/cephadm/templates/services/nvmeof/**
+  - src/tools/ceph-dencoder/nvmeof*
+
 rgw:
   - qa/suites/rgw/**
   - qa/tasks/rgw*
@@ -275,6 +288,9 @@ rgw:
   - src/cls/rgw_gc/**
   - src/cls/timeindex/**
   - src/mrgw.sh
+  - src/mrun
+  - src/mstart.sh
+  - src/mstop.sh
   - src/rgw/**
   - src/test/cls_rgw/**
   - src/test/librgw_*
diff --git a/.github/workflows/check-license.yml b/.github/workflows/check-license.yml
new file mode 100644
index 0000000000000..d201ed7135439
--- /dev/null
+++ b/.github/workflows/check-license.yml
@@ -0,0 +1,13 @@
+---
+name: "Check Incomatible Licenses"
+on: [pull_request]
+
+jobs:
+  check_pr:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Check PR
+      uses: JJ/github-pr-contains-action@526dfe784d8604ea1c39b6c26609074de95b1ffd  # releases/v14.1
+      with:
+        github-token: ${{github.token}}
+        diffDoesNotContain: "GNU General Public License"
diff --git a/.githubmap b/.githubmap
index 724de9c002d4c..1b622f68cb080 100644
--- a/.githubmap
+++ b/.githubmap
@@ -12,6 +12,7 @@ aaSharma14 Aashish Sharma <aasharma@redhat.com>
 aclamk Adam Kupczyk <akupczyk@redhat.com>
 adamemerson Adam C. Emerson <aemerson@redhat.com>
 adk3798 Adam King <adking@redhat.com>
+afreen23 Afreen Misbah <afreen@ibm.com>
 ajarr Ramana Raja <rraja@redhat.com>
 alfonsomthd Alfonso Martínez <almartin@redhat.com>
 alfredodeza Alfredo Deza <adeza@redhat.com>
@@ -27,7 +28,7 @@ b-ranto Boris Ranto <branto@redhat.com>
 badone Brad Hubbard <bhubbard@redhat.com>
 baruza Barbora Ančincová <bara@redhat.com>
 bassamtabbara Bassam Tabbara <bassam.tabbara@quantum.com>
-batrick Patrick Donnelly <pdonnell@redhat.com>
+batrick Patrick Donnelly <pdonnell@ibm.com>
 bigjust Justin Caratzas <jcaratza@redhat.com>
 bk201 Kiefer Chang <kiefer.chang@suse.com>
 BlaineEXE Blaine Gardner <bgardner@suse.com>
@@ -47,6 +48,7 @@ Devp00l Stephan Müller <smueller@suse.com>
 dillaman Jason Dillaman <dillaman@redhat.com>
 djgalloway David Galloway <dgallowa@redhat.com>
 dmick Dan Mick <dmick@redhat.com>
+dnyanee1997 Dnyaneshwari talwekar <dtalweka@redhat.com>
 dragonylffly Li Wang <laurence.liwang@gmail.com>
 dsavineau Dimitri Savineau <dsavinea@redhat.com>
 dvanders Dan van der Ster <dan.vanderster@clyso.com>
@@ -96,6 +98,7 @@ mikechristie Mike Christie <mchristi@redhat.com>
 mogeb Mohamad Gebai <mgebai@suse.com>
 MrFreezeex Arthur Outhenin-Chalandre <arthur.outhenin-chalandre@cern.ch>
 myoungwon Myoungwon Oh <myoungwon.oh@samsung.com>
+nmunet Naman Munet <nmunet@redhat.com>
 Naveenaidu Naveen Naidu <naveen.naidu@ibm.com>
 neha-ojha Neha Ojha <nojha@redhat.com>
 NitzanMordhai Nitzan Mordechai <nmordech@redhat.com>
@@ -109,6 +112,8 @@ p-se Patrick Seidensal <pseidensal@suse.com>
 pcuzner Paul Cuzner <pcuzner@redhat.com>
 Pegonzal Pedro Gonzalez Gomez <pegonzal@redhat.com>
 pereman2 Pere Diaz Bou <pdiazbou@redhat.com>
+prgoel-code Prachi prgoel@redhat.com
+pujaoshahu Puja Shahu <pshahu@redhat.com>
 rchagam Anjaneya Chagam <anjaneya.chagam@intel.com>
 renhwztetecs huanwen ren <ren.huanwen@zte.com.cn>
 ricardoasmarques Ricardo Marques <rimarques@suse.com>
diff --git a/.mailmap b/.mailmap
index 1c4ac95340abd..75a1610b244aa 100644
--- a/.mailmap
+++ b/.mailmap
@@ -24,6 +24,7 @@ Adam Kupczyk <akupczyk@redhat.com> <aclamk@gmail.com>
 Adam Kupczyk <akupczyk@redhat.com> <akucpzyk@redhat.com>
 Adam Twardowski <adam.twardowski@gmail.com>
 Adir Lev <adirl@mellanox.com>
+Afreen Misbah <afreen@ibm.com>
 Ahoussi Armand <ahoussi.say@telecom-bretagne.eu> <delco225>
 Ailing Zhang <zhangal1992@gmail.com> <ailzhang@users.noreply.github.com>
 Aishwarya Mathuria <amathuri@redhat.com> amathuria <NOT@FOUND>
@@ -168,6 +169,7 @@ Dhairya Parmar <dparmar@redhat.com> dparmar18 <dparmar@redhat.com>
 Dingdang Zhang <boqian.zy@alibaba-inc.com>
 Dmitry Smirnov <onlyjob@member.fsf.org> <onlyjob@debian.org>
 Dmitry Yatsushkevich <dyatsushkevich@mirantis.com> <dmitry.yatsushkevich@gmail.com>
+Dnyaneshwari talwekar <dtalweka@redhat.com>
 Dominik Hannen <cantares1+github@gmail.com> <dhxgit@users.noreply.github.com>
 Dongdong Tao <dongodng.tao@canonical.com>
 Dongdong Tao <tdd21151186@gmail.com>
@@ -508,6 +510,7 @@ Myoungwon Oh <omwmw@sk.com>
 Myoungwon Oh <omwmw@sk.com> <ommw@sk.com>
 Na Xie <xie.na@h3c.com>
 Nag Pavan Chilakam <nagpavan.chilakam@gmail.com> <55574442+nagpavan-chilakam@users.noreply.github.com>
+Naman Munet <nmunet@redhat.com>
 Nancy Su <su_nan@inspur.com>
 Nathan Cutler <ncutler@suse.com>
 Nathan Cutler <ncutler@suse.com> <cutler@suse.cz>
@@ -544,7 +547,8 @@ Pan Liu <pan.liu@istuary.com> <liupan1111@gmail.com>
 Parth Arora <paarora@redhat.com> parth-gr <paarora@redhat.com>
 Pascal de Bruijn <pascal@unilogicnetworks.net>
 Patience Warnick <patience@cranium.pelton.net> <patiencew@29311d96-e01e-0410-9327-a35deaab8ce9>
-Patrick Donnelly <pdonnell@redhat.com> <pdonell@redhat.com>
+Patrick Donnelly <pdonnell@ibm.com> <pdonnell@redhat.com>
+Patrick Donnelly <pdonnell@ibm.com> <batrick@batbytes.com>
 Patrick McGarry <patrick@inktank.com>
 Patrick McGarry <pmcgarry@redhat.com> <pmcgarry@gmail.com>
 Patrick Seidensal <pseidensal@suse.com>
@@ -572,6 +576,8 @@ Pooja Gautam <pooja.gautam@ts.fujitsu.com>
 Pritha Srivastava <prsrivas@redhat.com>
 Pritha Srivastava <prsrivas@redhat.com> <pritha@dhcp35-190.lab.eng.blr.redhat.com>
 Pritha Srivastava <prsrivas@redhat.com> <prsivas@redhat.com>
+Prachi prgoel@redhat.com
+Puja Shahu <pshahu@redhat.com>
 Qi Liang Hong <qilianghong@huawei.com>
 Qiankun Zheng <zheng.qiankun@h3c.com>
 Qinfei Liu <lucas.liuqinfei@huawei.com> <18138800392@163.com>
diff --git a/.organizationmap b/.organizationmap
index 3a601f4e2b2bf..4f3f7dfa7d945 100644
--- a/.organizationmap
+++ b/.organizationmap
@@ -346,21 +346,27 @@ Huayun <contact@huayun.com> Zheng Yin <zhengyin@huayun.com>
 Huazhong University of Science and Technology <contact@hust.edu.cn> Luo Runbing <runsisi@hust.edu.cn>
 HXT Semiconductor <contact@hxt-semitech.org> Jiang Yutang <yutang2.jiang@hxt-semitech.com>
 IBM <contact@IBM.com> Adam Kupczyk <akupczyk@ibm.com>
+IBM <contact@IBM.com> Afreen Misbah <afreen@ibm.com>
 IBM <contact@IBM.com> Aliaksei Makarau <aliaksei.makarau@ibm.com>
 IBM <contact@IBM.com> Andrew Solomon <asolomon@us.ibm.com>
+IBM <contact@IBM.com> Dnyaneshwari talwekar <Dnyaneshwari.Talwekar@ibm.com>
 IBM <contact@IBM.com> Guillaume Abrioux <gabrioux@ibm.com>
 IBM <contact@IBM.com> Jonas Pfefferle <jpf@ibm.com>
 IBM <contact@IBM.com> Laura Flores <lflores@ibm.com>
 IBM <contact@IBM.com> Martin Ohmacht <mohmacht@us.ibm.com>
 IBM <contact@IBM.com> Michel Normand <normand@linux.vnet.ibm.com>
+IBM <contact@IBM.com> Naman Munet <Naman.Munet@ibm.com>
 IBM <contact@IBM.com> Naveen Naidu <naveen.naidu@ibm.com>
 IBM <contact@IBM.com> Neeraj Pratap Singh <Neeraj.Pratap.Singh1@ibm.com>
 IBM <contact@IBM.com> Or Ozeri <oro@il.ibm.com>
 IBM <contact@IBM.com> Paul Cuzner <pcuzner@ibm.com>
+IBM <contact@IBM.com> Prachi Goel <PRACHI.GOEL2@ibm.com>
+IBM <contact@IBM.com> Puja Shahu <puja-shahu.omprakash@ibm.com>
 IBM <contact@IBM.com> Samuel Matzek <smatzek@us.ibm.com>
 IBM <contact@IBM.com> Shraddha Agrawal <shraddhaag@ibm.com>
 IBM <contact@IBM.com> Kushal Deb <Kushal.Deb@ibm.com>
 IBM <contact@IBM.com> Shweta Bhosale <Shweta.Bhosale1@ibm.com>
+IBM <contact@IBM.com> Patrick Donnelly <pdonnell@ibm.com>
 IBM <contact@IBM.com> Sunil Angadi <Sunil.Angadi@ibm.com>
 IBM <contact@IBM.com> Teoman Onay <tonay@ibm.com>
 IBM <contact@ibm.com> Ulrich Weigand <ulrich.weigand@de.ibm.com>
@@ -584,6 +590,7 @@ Red Hat <contact@redhat.com> Adam King <adking@redhat.com>
 Red Hat <contact@redhat.com> Adam King <adking@redhat.com>
 Red Hat <contact@redhat.com> Adam Kupczyk <akupczyk@redhat.com>
 Red Hat <contact@redhat.com> Ademar de Souza Reis Jr <areis@redhat.com>
+Red Hat <contact@redhat.com> Afreen Misbah <afrahman@redhat.com>
 Red Hat <contact@redhat.com> Aishwarya Mathuria <amathuri@redhat.com>
 Red Hat <contact@redhat.com> Albin Antony <aantony@redhat.com>
 Red Hat <contact@redhat.com> Alex Elder <aelder@redhat.com>
@@ -620,6 +627,7 @@ Red Hat <contact@redhat.com> Deepika Upadhyay <dupadhya@redhat.com>
 Red Hat <contact@redhat.com> Dhairya Parmar <dparmar@redhat.com>
 Red Hat <contact@redhat.com> Dimitri Savineau <dsavinea@redhat.com>
 Red Hat <contact@redhat.com> Divyansh Kamboj <dkamboj@redhat.com>
+Red Hat <contact@redhat.com> Dnyaneshwari talwekar <dtalweka@redhat.com>
 Red Hat <contact@redhat.com> Douglas Fuller <dfuller@redhat.com>
 Red Hat <contact@redhat.com> Ernesto Puerta <epuertat@redhat.com>
 Red Hat <contact@redhat.com> Erwan Velu <erwan@redhat.com>
@@ -685,6 +693,7 @@ Red Hat <contact@redhat.com> Mike Hackett <mhackett@redhat.com>
 Red Hat <contact@redhat.com> Mike Perez <miperez@redhat.com>
 Red Hat <contact@redhat.com> Milan Broz <mbroz@redhat.com>
 Red Hat <contact@redhat.com> Milind Changire <mchangir@redhat.com>
+Red Hat <contact@redhat.com> Naman Munet <nmunet@redhat.com>
 Red Hat <contact@redhat.com> Nathan Weinberg <nweinber@redhat.com>
 Red Hat <contact@redhat.com> Neeraj Pratap Singh <neesingh@redhat.com>
 Red Hat <contact@redhat.com> Neha Ojha <nojha@redhat.com>
@@ -708,9 +717,11 @@ Red Hat <contact@redhat.com> Pere Diaz Bou <pdiazbou@redhat.com>
 Red Hat <contact@redhat.com> Pete Zaitcev <zaitcev@redhat.com>
 Red Hat <contact@redhat.com> Petr Lautrbach <plautrba@redhat.com>
 Red Hat <contact@redhat.com> Petr Machata <pmachata@redhat.com>
+Red Hat <contact@redhat.com> Prachi prgoel@redhat.com
 Red Hat <contact@redhat.com> Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
 Red Hat <contact@redhat.com> Prashant D <pdhange@redhat.com>
 Red Hat <contact@redhat.com> Pritha Srivastava <prsrivas@redhat.com>
+Red Hat <contact@redhat.com> Puja Shahu <pshahu@redhat.com>
 Red Hat <contact@redhat.com> Radoslaw Zarzynski <rzarzynski@redhat.com>
 Red Hat <contact@redhat.com> Rafael Quintero <rquinter@redhat.com>
 Red Hat <contact@redhat.com> Ramakrishnan Periyasamy <rperiyas@redhat.com>
diff --git a/.peoplemap b/.peoplemap
index 507f50edb43e8..418e8505fb49c 100644
--- a/.peoplemap
+++ b/.peoplemap
@@ -73,5 +73,5 @@ Yehuda Sadeh <ysadehwe@redhat.com> Yehuda Sadeh <yehuda@inktank.com>
 Yuri Weinstein <yuriw@redhat.com> Yuri Weinstein <yuri.weinstein@inktank.com>
 Zhi Zhang <zhangz.david@outlook.com> Zhi (David) Zhang <zhangz@yahoo-inc.com>
 Zheng Yin <zhengyin@huayun.com> Zheng Yin <zhengyin@chinac.com>
-Patrick Donnelly <pdonnell@redhat.com> Patrick Donnelly <batrick@batbytes.com>
+Patrick Donnelly <pdonnell@ibm.com> Patrick Donnelly <pdonnell@redhat.com> Patrick Donnelly <batrick@batbytes.com>
 Myoungwon Oh <myoungwon.oh@samsung.com> Myoungwon Oh <omwmw@sk.com> Myoungwon Oh <ohmyoungwon@gmail.com>
diff --git a/CodingStyle b/CodingStyle
index 659298f0e5ae4..019d23c7703dc 100644
--- a/CodingStyle
+++ b/CodingStyle
@@ -108,6 +108,12 @@ by section.
    portability since `#pragma once` is widely supported and is known
    to work on GCC and Clang.
 
+* Header Files -> Forward declarations:
+
+    Forward declarations of structs, unions, classes and enums can be
+    used to reduce header dependencies.  This speeds up compile times
+    because the compiler has to process less code.
+
 
 The following guidelines have not been followed in the legacy code,
 but are worth mentioning and should be followed strictly for new code:
diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index 1a4e26e747fb2..8af2a262dff91 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -26,6 +26,14 @@
    - osd_op_num_threads_per_shard_hdd = 5 (was 1)
   For more details see https://tracker.ceph.com/issues/66289.
 
+* CephFS: Modifying the FS setting variable "max_mds" when a cluster is
+  unhealthy now requires users to pass the confirmation flag
+  (--yes-i-really-mean-it). This has been added as a precaution to tell the
+  users that modifying "max_mds" may not help with troubleshooting or recovery
+  effort. Instead, it might further destabilize the cluster.
+
+
+
 >=19.0.0
 
 * cephx: key rotation is now possible using `ceph auth rotate`. Previously,
diff --git a/SubmittingPatches-backports.rst b/SubmittingPatches-backports.rst
index 0f96aec65c4f8..bb55088cb5fac 100644
--- a/SubmittingPatches-backports.rst
+++ b/SubmittingPatches-backports.rst
@@ -121,14 +121,11 @@ If you do not have sufficient permissions to modify any field of the tracker
 issue, just add a comment describing what changes you would like to make.
 Someone with permissions will make the necessary modifications on your behalf.
 
-For straightforward backports, that's all that you (as the developer of the fix)
-need to do. Volunteers from the `Stable Releases and Backports team`_ will
-proceed to create Backport issues to track the necessary backports and stage the
-backports by opening GitHub PRs with the cherry-picks. If you don't want to
-wait, and provided you have sufficient permissions at https://tracker.ceph.com,
-you can `create Backport tracker issues` and `stage backports`_ yourself. In
-that case, read on.
-
+Authors of pull requests are responsible for creating associated backport pull
+requests. As long as you have sufficient permissions at
+https://tracker.ceph.com, you can `create Backport tracker issues` and `stage
+backports`_ yourself. Read these linked sections to learn how to create
+backport tracker issues and how to stage backports: 
 
 .. _`create backport tracker issues`:
 .. _`backport tracker issue`:
@@ -146,10 +143,7 @@ issues can be created in the backport tracker issue for tracking the backporting
 
 Under ordinary circumstances, the developer who merges the ``main`` PR will flag
 the ``main`` branch tracker issue for backport by changing the Status to "Pending
-Backport", and volunteers from the `Stable Releases and Backports team`_
-periodically create backport tracker issues by running the
-``backport-create-issue`` script. They also do the actual backporting. But that
-does take time and you may not want to wait.
+Backport". 
 
 You might be tempted to forge ahead and create the backport issues yourself.
 Please don't do that - it is difficult (bordering on impossible) to get all the
@@ -360,20 +354,11 @@ Once the backport PR is open, the first order of business is to set the
 Milestone tag to the stable release the backport PR is targeting. For example,
 if the PR is targeting "nautilus", set the Milestone tag to "nautilus".
 
-If you don't have sufficient GitHub permissions to set the Milestone, don't
-worry. Members of the `Stable Releases and Backports team`_ periodically run
-a script (``ceph-backport.sh --milestones``) which scans all PRs targetting stable
-branches and automatically adds the correct Milestone tag if it is missing.
-
 Next, check which component label was applied to the ``main`` PR corresponding to
 this backport, and double-check that that label is applied to the backport PR as
 well. For example, if the ``main`` PR carries the component label "core", the
 backport PR should also get that label.
 
-In general, it is the responsibility of the `Stable Releases and Backports
-team`_ to ensure that backport PRs are properly labelled. If in doubt, just
-leave the labelling to them.
-
 .. _`backport PR reviewing`:
 .. _`backport PR testing`:
 .. _`backport PR merging`:
@@ -381,9 +366,8 @@ leave the labelling to them.
 Reviewing, testing, and merging of backport PRs
 -----------------------------------------------
 
-Once your backport PR is open and the Milestone is set properly, the
-`Stable Releases and Backports team` will take care of getting the PR
-reviewed and tested. Once the PR is reviewed and tested, it will be merged.
+Once your backport PR is open, it will be reviewed and tested. When the PR has
+been reviewed and tested, it will be merged.
 
 If you would like to facilitate this process, you can solicit reviews and run
 integration tests on the PR. In this case, add comments to the PR describing the
@@ -394,22 +378,3 @@ it will be merged. Even if you have sufficient GitHub permissions to merge the
 PR, please do *not* merge it yourself. (Uncontrolled merging to stable branches
 unnecessarily complicates the release preparation process, which is done by
 volunteers.)
-
-
-Stable Releases and Backports team
-----------------------------------
-
-Ceph has a `Stable Releases and Backports`_ team, staffed by volunteers,
-which is charged with maintaining the stable releases and backporting bugfixes
-from the ``main`` branch to them. (That team maintains a wiki, accessible by
-clicking the `Stable Releases and Backports`_ link, which describes various
-workflows in the backporting lifecycle.)
-
-.. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki
-
-Ordinarily, it is enough to fill out the "Backport" field in the bug (tracker
-issue). The volunteers from the Stable Releases and Backports team will
-backport the fix, run regression tests on it, and include it in one or more
-future point releases.
-
-
diff --git a/container/build.sh b/container/build.sh
index 7c97e2261c16f..5edf469d2d2e4 100755
--- a/container/build.sh
+++ b/container/build.sh
@@ -136,9 +136,9 @@ if [[ ${CI_CONTAINER} == "true" ]] ; then
     branch_repo_tag=$repopath/ceph:${BRANCH}
     sha1_repo_tag=$repopath/ceph:${CEPH_SHA1}
 
-    if [[ "${ARCH}" == "aarch64" ]] ; then
-        branch_repo_tag=${branch_repo_tag}-aarch64
-        sha1_repo_tag=${sha1_repo_tag}-aarch64
+    if [[ "${ARCH}" == "arm64" ]] ; then
+        branch_repo_tag=${branch_repo_tag}-arm64
+        sha1_repo_tag=${sha1_repo_tag}-arm64
     fi
 
     podman tag ${image_id} ${full_repo_tag}
diff --git a/doc/cephadm/operations.rst b/doc/cephadm/operations.rst
index 3b117c1bd6a60..420ee655ac8ba 100644
--- a/doc/cephadm/operations.rst
+++ b/doc/cephadm/operations.rst
@@ -734,3 +734,72 @@ Purge ceph daemons from all hosts in the cluster
 
   # For each host:
   cephadm rm-cluster --force --zap-osds --fsid <fsid>
+
+
+Replacing a device
+==================
+
+The ``ceph orch device replace`` command automates the process of replacing the underlying device of an OSD.
+Previously, this process required manual intervention at various stages.
+With this new command, all necessary operations are performed automatically, streamlining the replacement process
+and improving the overall user experience.
+
+.. note:: This only supports LVM-based deployed OSD(s)
+
+.. prompt:: bash #
+
+  ceph orch device replace <host> <device-path>
+
+In the case the device being replaced is shared by multiple OSDs (eg: DB/WAL device shared by multiple OSDs), the orchestrator will warn you.
+
+.. prompt:: bash #
+
+  [ceph: root@ceph /]# ceph orch device replace osd-1 /dev/vdd
+
+  Error EINVAL: /dev/vdd is a shared device.
+  Replacing /dev/vdd implies destroying OSD(s): ['0', '1'].
+  Please, *be very careful*, this can be a very dangerous operation.
+  If you know what you are doing, pass --yes-i-really-mean-it
+
+If you know what you are doing, you can go ahead and pass ``--yes-i-really-mean-it``.
+
+.. prompt:: bash #
+
+  [ceph: root@ceph /]# ceph orch device replace osd-1 /dev/vdd --yes-i-really-mean-it
+    Scheduled to destroy osds: ['6', '7', '8'] and mark /dev/vdd as being replaced.
+
+``cephadm`` will make ``ceph-volume`` zap and destroy all related devices and mark the corresponding OSD as ``destroyed`` so the
+different OSD(s) ID(s) will be preserved:
+
+.. prompt:: bash #
+
+  [ceph: root@ceph-1 /]# ceph osd tree
+    ID  CLASS  WEIGHT   TYPE NAME         STATUS     REWEIGHT  PRI-AFF
+    -1         0.97659  root default
+    -3         0.97659      host devel-1
+     0    hdd  0.29300          osd.0     destroyed   1.00000  1.00000
+     1    hdd  0.29300          osd.1     destroyed   1.00000  1.00000
+     2    hdd  0.19530          osd.2            up   1.00000  1.00000
+     3    hdd  0.19530          osd.3            up   1.00000  1.00000
+
+The device being replaced is finally seen as ``being replaced`` preventing ``cephadm`` from redeploying the OSDs too fast:
+
+.. prompt:: bash #
+
+  [ceph: root@ceph-1 /]# ceph orch device ls
+  HOST     PATH      TYPE  DEVICE ID   SIZE  AVAILABLE  REFRESHED  REJECT REASONS
+  osd-1  /dev/vdb  hdd               200G  Yes        13s ago
+  osd-1  /dev/vdc  hdd               200G  Yes        13s ago
+  osd-1  /dev/vdd  hdd               200G  Yes        13s ago    Is being replaced
+  osd-1  /dev/vde  hdd               200G  No         13s ago    Has a FileSystem, Insufficient space (<10 extents) on vgs, LVM detected
+  osd-1  /dev/vdf  hdd               200G  No         13s ago    Has a FileSystem, Insufficient space (<10 extents) on vgs, LVM detected
+
+If for any reason you need to clear the 'device replace header' on a device, then you can use ``ceph orch device replace <host> <device> --clear``:
+
+.. prompt:: bash #
+
+  [ceph: root@devel-1 /]# ceph orch device replace devel-1 /dev/vdk --clear
+  Replacement header cleared on /dev/vdk
+  [ceph: root@devel-1 /]#
+
+After that, ``cephadm`` will redeploy the OSD service spec within a few minutes (unless the service is set to ``unmanaged``).
diff --git a/doc/cephfs/administration.rst b/doc/cephfs/administration.rst
index 5760e67f73e64..07646bff06786 100644
--- a/doc/cephfs/administration.rst
+++ b/doc/cephfs/administration.rst
@@ -61,10 +61,17 @@ is a subset of the same information from the ``ceph fs dump`` command.
 
 ::
 
-    ceph fs set <file system name> <var> <val>
+    ceph fs set <file system name> <var> <val> [--yes-i-really-mean-it]
 
 Change a setting on a file system. These settings are specific to the named
-file system and do not affect other file systems.
+file system and do not affect other file systems. Confirmation flag is only
+needed for changing ``max_mds`` when cluster is unhealthy.
+
+.. note:: It is mandatory to pass confirmation flag (--yes--i-really-mean-it)
+   for modifying FS setting variable ``max_mds`` when cluster is unhealthy.
+   It has been added a precaution to tell users that modifying ``max_mds``
+   during troubleshooting or recovery might not help. Instead, it might
+   further destabilize the cluster.
 
 ::
 
diff --git a/doc/cephfs/cephfs-journal-tool.rst b/doc/cephfs/cephfs-journal-tool.rst
index 4ad7304481f7f..3ae1139ceac2c 100644
--- a/doc/cephfs/cephfs-journal-tool.rst
+++ b/doc/cephfs/cephfs-journal-tool.rst
@@ -105,12 +105,12 @@ Example: header get/set
       "write_pos": 4274947,
       "expire_pos": 4194304,
       "trimmed_pos": 4194303,
+      "stream_format": 1,
       "layout": { "stripe_unit": 4194304,
-          "stripe_count": 4194304,
+          "stripe_count": 1,
           "object_size": 4194304,
-          "cas_hash": 4194304,
-          "object_stripe_unit": 4194304,
-          "pg_pool": 4194304}}
+          "pool_id": 2,
+          "pool_ns": ""}}
 
     # cephfs-journal-tool header set trimmed_pos 4194303
     Updating trimmed_pos 0x400000 -> 0x3fffff
diff --git a/doc/cephfs/troubleshooting.rst b/doc/cephfs/troubleshooting.rst
index 34de1b7501df9..78d0a8f54d336 100644
--- a/doc/cephfs/troubleshooting.rst
+++ b/doc/cephfs/troubleshooting.rst
@@ -128,6 +128,11 @@ things to do:
 
   That prevents any clients from establishing new sessions with the MDS.
 
+* **Dont tweak max_mds** Modifying the FS setting variable ``max_mds`` is
+  sometimes perceived as a good step during troubleshooting or recovery effort.
+  Instead, doing so might further destabilize the cluster. If ``max_mds`` must
+  be changed in such circumstances, run the command to change ``max_mds`` with
+  the confirmation flag (``--yes-i-really-mean-it``)
 
 
 Expediting MDS journal trim
diff --git a/doc/dev/cephfs-mirroring.rst b/doc/dev/cephfs-mirroring.rst
index a804a0075995f..e09fed213f230 100644
--- a/doc/dev/cephfs-mirroring.rst
+++ b/doc/dev/cephfs-mirroring.rst
@@ -17,12 +17,10 @@ Key Idea
 --------
 
 For a given snapshot pair in a directory, `cephfs-mirror` daemon will rely on
-readdir diff to identify changes in a directory tree. The diffs are applied to
+`CephFS Snapdiff Feature` to identify changes in a directory tree. The diffs are applied to
 directory in the remote file system thereby only synchronizing files that have
 changed between two snapshots.
 
-This feature is tracked here: https://tracker.ceph.com/issues/47034.
-
 Currently, snapshot data is synchronized by bulk copying to the remote
 filesystem.
 
@@ -407,3 +405,5 @@ Feature Status
 --------------
 
 `cephfs-mirror` daemon is built by default (follows `WITH_CEPHFS` CMake rule).
+
+.. _CephFS Snapdiff Feature: https://croit.io/blog/cephfs-snapdiff-feature
diff --git a/doc/dev/developer_guide/essentials.rst b/doc/dev/developer_guide/essentials.rst
index cbde8779a66da..7cce4c6f898ff 100644
--- a/doc/dev/developer_guide/essentials.rst
+++ b/doc/dev/developer_guide/essentials.rst
@@ -287,16 +287,13 @@ See :ref:`kubernetes-dev`
 Backporting
 -----------
 
-All bugfixes should be merged to the ``main`` branch before being
-backported. To flag a bugfix for backporting, make sure it has a
-`tracker issue`_ associated with it and set the ``Backport`` field to a
-comma-separated list of previous releases (e.g. "hammer,jewel") that you think
-need the backport.
-The rest (including the actual backporting) will be taken care of by the
-`Stable Releases and Backports`_ team.
+All bugfixes should be merged to the ``main`` branch before being backported.
+To flag a bugfix for backporting, make sure it has a `tracker issue`_
+associated with it and set the ``Backport`` field to a comma-separated list of
+previous releases (e.g. "hammer,jewel") that you think need the backport. You
+are responsible for the backporting of pull requests that you raise.
 
 .. _`tracker issue`: http://tracker.ceph.com/
-.. _`Stable Releases and Backports`: http://tracker.ceph.com/projects/ceph-releases/wiki
 
 Dependabot
 ----------
diff --git a/doc/dev/radosgw/bucket_index.rst b/doc/dev/radosgw/bucket_index.rst
index 6764641e0f50e..ceff57b58cfc8 100644
--- a/doc/dev/radosgw/bucket_index.rst
+++ b/doc/dev/radosgw/bucket_index.rst
@@ -32,7 +32,7 @@ For a given bucket, the index may be split into several rados objects, called bu
 
 The default shard count for new buckets is 11, but can be overridden in the zonegroup's ``bucket_index_max_shards`` or ceph.conf's ``rgw_override_bucket_index_max_shards``. As the number of objects in a bucket grows, its index shard count will also increase as a result of dynamic resharding.
 
-Information about the bucket's index object layout is stored in ``RGWBucketInfo`` as ``struct rgw::BucketLayout`` from ``src/rgw/rgw_bucket_layout.h``. The resharding logic is in ``src/rgw/rgw_reshard.cc``.
+Information about the bucket's index object layout is stored in ``RGWBucketInfo`` as ``struct rgw::BucketLayout`` from ``src/rgw/rgw_bucket_layout.h``. The resharding logic is in ``src/rgw/driver/rados/rgw_reshard.cc``.
 
 -----------------
 Index Transaction
@@ -46,7 +46,7 @@ To keep the bucket index consistent, all object writes or deletes must also upda
 
 Object writes and deletes may race with each other, so a given object may have more than one prepared transaction at a time. RGW considers an object entry to be 'pending' if there are any outstanding transactions, or 'completed' otherwise.
 
-This transaction is implemented in ``src/rgw/rgw_rados.cc`` as ``RGWRados::Object::Write::write_meta()`` for object writes, and ``RGWRados::Object::Delete::delete_obj()`` for object deletes. The bucket index operations are implemented in ``src/cls/rgw/cls_rgw.cc`` as ``rgw_bucket_prepare_op()`` and ``rgw_bucket_complete_op()``.
+This transaction is implemented in ``src/rgw/driver/rados/rgw_rados.cc`` as ``RGWRados::Object::Write::write_meta()`` for object writes, and ``RGWRados::Object::Delete::delete_obj()`` for object deletes. The bucket index operations are implemented in ``src/cls/rgw/cls_rgw.cc`` as ``rgw_bucket_prepare_op()`` and ``rgw_bucket_complete_op()``.
 
 -------
 Listing
@@ -56,7 +56,7 @@ When listing objects, RGW will read all entries (pending and completed) from the
 
 If an RGW crashes in the middle of an `Index Transaction`_, an index entry may get stuck in this 'pending' state. When bucket listing encounters these pending entries, it also sends information from the head object back to the bucket index so it can update the entry and resolve its stale transactions. This message is called 'dir suggest', because the bucket index treats it as a hint or suggestion.
 
-Bucket listing is implemented in ``src/rgw/rgw_rados.cc`` as ``RGWRados::Bucket::List::list_objects_ordered()`` and ``RGWRados::Bucket::List::list_objects_unordered()``. ``RGWRados::check_disk_state()`` is the part that reads the head object and encodes suggested changes. The corresponding bucket index operations are implemented in ``src/cls/rgw/cls_rgw.cc`` as ``rgw_bucket_list()`` and ``rgw_dir_suggest_changes()``.
+Bucket listing is implemented in ``src/rgw/driver/rados/rgw_rados.cc`` as ``RGWRados::Bucket::List::list_objects_ordered()`` and ``RGWRados::Bucket::List::list_objects_unordered()``. ``RGWRados::check_disk_state()`` is the part that reads the head object and encodes suggested changes. The corresponding bucket index operations are implemented in ``src/cls/rgw/cls_rgw.cc`` as ``rgw_bucket_list()`` and ``rgw_dir_suggest_changes()``.
 
 --------------------
 S3 Object Versioning
@@ -66,9 +66,9 @@ For versioned buckets, the bucket index contains an entry for each object versio
 
 RGW stores a head object in the rgw.buckets.data pool for each object version. This rados object's oid is a combination of the object name and its version id.
 
-In S3, a GET/HEAD request for an object name will give you that object's "current" version. To support this, RGW stores an extra 'object logical head' (olh) object whose oid includes the object name only, that acts as an indirection to the head object of its current version. This indirection logic is implemented in ``src/rgw/rgw_rados.cc`` as ``RGWRados::follow_olh()``.
+In S3, a GET/HEAD request for an object name will give you that object's "current" version. To support this, RGW stores an extra 'object logical head' (olh) object whose oid includes the object name only, that acts as an indirection to the head object of its current version. This indirection logic is implemented in ``src/rgw/driver/rados/rgw_rados.cc`` as ``RGWRados::follow_olh()``.
 
-To maintain the consistency between this olh object and the bucket index, the index keeps a separate 'olh' entry for each object name. This entry stores a log of all writes/deletes to its versions. In ``src/rgw/rgw_rados.cc``, ``RGWRados::apply_olh_log()`` replays this log to guarantee that this olh object converges on the same "current" version as the bucket index.
+To maintain the consistency between this olh object and the bucket index, the index keeps a separate 'olh' entry for each object name. This entry stores a log of all writes/deletes to its versions. In ``src/rgw/driver/rados/rgw_rados.cc``, ``RGWRados::apply_olh_log()`` replays this log to guarantee that this olh object converges on the same "current" version as the bucket index.
 
 .. _ListObjectsV2: https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjects.html
 .. _ListObjectVersions: https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectVersions.html
diff --git a/doc/governance.rst b/doc/governance.rst
index 3105a917f1b01..bc88560f18a8b 100644
--- a/doc/governance.rst
+++ b/doc/governance.rst
@@ -82,7 +82,7 @@ Current Members
  * Casey Bodley <cbodley@redhat.com>
  * Dan van der Ster <dan.vanderster@clyso.com>
  * David Orman <ormandj@1111systems.com>
- * Ernesto Puerta <epuerta@redhat.com>
+ * Ernesto Puerta <epuertat@redhat.com>
  * Gregory Farnum <gfarnum@redhat.com>
  * Haomai Wang <haomai@xsky.com>
  * Ilya Dryomov <idryomov@redhat.com>
@@ -96,7 +96,7 @@ Current Members
  * Mike Perez <miperez@redhat.com>
  * Myoungwon Oh <myoungwon.oh@samsung.com>
  * Neha Ojha <nojha@redhat.com>
- * Patrick Donnelly <pdonnell@redhat.com>
+ * Patrick Donnelly <pdonnell@ibm.com>
  * Sam Just <sjust@redhat.com>
  * Vikhyat Umrao <vikhyat@redhat.com>
  * Xie Xingguo <xie.xingguo@zte.com.cn>
@@ -104,6 +104,17 @@ Current Members
  * Yingxin Cheng <yingxin.cheng@intel.com>
  * Yuri Weinstein <yweinste@redhat.com>
  * Zac Dover <zac.dover@proton.me>
+ * Laura Flores <lflores@redhat.com>
+ * Venky Shankar <vshankar@redhat.com>
+ * Guillaume Abrioux <gabrioux@redhat.com>
+ * Anthony D'Atri <anthony.datri@gmail.com>
+ * Joseph Mundackal <jmundackal@bloomberg.net>
+ * Gaurav Sitlani <gsitlani@ibm.com>
+ * Afreen Misbah <afreen@ibm.com>
+ * Radoslaw Zarzynski <rzarzyns@redhat.com>
+ * Matan Breizman <mbreizma@redhat.com>
+ * Yaarit Hatuka <yhatuka@ibm.com>
+ * Adam C. Emerson <aemerson@redhat.com>
 
 .. _ctl:
 
diff --git a/doc/mgr/smb.rst b/doc/mgr/smb.rst
index 05e6369ddf107..3252c485a9aa7 100644
--- a/doc/mgr/smb.rst
+++ b/doc/mgr/smb.rst
@@ -96,6 +96,11 @@ clustering
     enables clustering regardless of the placement count. A value of ``never``
     disables clustering regardless of the placement count. If unspecified,
     ``default`` is assumed.
+public_addrs
+    Optional. A string in the form of <ipaddress/prefixlength>[%<destination interface>].
+    Supported only when using Samba's clustering. Assign "virtual" IP
+    addresses that will be managed by the clustering subsystem and may automatically
+    move between nodes running Samba containers.
 
 Remove Cluster
 ++++++++++++++
diff --git a/doc/monitoring/index.rst b/doc/monitoring/index.rst
index 794fdf8419505..afccd9ab16ac3 100644
--- a/doc/monitoring/index.rst
+++ b/doc/monitoring/index.rst
@@ -64,6 +64,30 @@ in:
 
 It is good to outline that the main tool allowing users to observe and monitor a Ceph cluster is the **Ceph dashboard**. It provides graphics where the most important cluster and service metrics are represented. Most of the examples in this document are extracted from the dashboard graphics or extrapolated from the metrics exposed by the Ceph dashboard.
 
+Ceph daemon health metrics
+==========================
+
+The Ceph exporter provides a metric called ``ceph_daemon_socket_up`` that reports the liveness status of each Ceph daemon that exposes an admin socket.
+
+The ``ceph_daemon_socket_up`` metric indicates the health status of a Ceph daemon based on its ability to respond via the admin socket, where a value of ``1`` means healthy, and ``0`` means unhealthy. Although a Ceph daemon might still be "alive" when it reports ``ceph_daemon_socket_up=0``, this situation highlights a significant issue in its functionality. As such, this metric serves as an excellent tool for detecting problems in any of the main Ceph daemons.
+
+Labels:
+- **``ceph_daemon``**: Identifier of the Ceph daemon exposing an admin socket on the host.
+- **``hostname``**: Name of the host where the Ceph daemon is running.
+
+Example:
+
+.. code-block:: bash
+
+   ceph_daemon_socket_up{ceph_daemon="mds.a",hostname="testhost"} 1
+   ceph_daemon_socket_up{ceph_daemon="osd.1",hostname="testhost"} 0
+
+To identify any Ceph daemons that were not responsive at any point in the last 12 hours, you can use the following PromQL expression:
+
+.. code-block:: bash
+
+   ceph_daemon_socket_up == 0 or min_over_time(ceph_daemon_socket_up[12h]) == 0
+
 
 Performance metrics
 ===================
diff --git a/doc/radosgw/index.rst b/doc/radosgw/index.rst
index da92692fa8b7b..3085e1a528f9f 100644
--- a/doc/radosgw/index.rst
+++ b/doc/radosgw/index.rst
@@ -88,4 +88,4 @@ Cluster with one API and then retrieve that data with the other API.
    D3N Data Cache <d3n_datacache>
    Cloud Transition <cloud-transition>
    Metrics <metrics>
-
+   UADK Acceleration for Compression <uadk-accel>
diff --git a/doc/radosgw/multisite.rst b/doc/radosgw/multisite.rst
index 6a21b7479e6f6..d6925c8ed9c04 100644
--- a/doc/radosgw/multisite.rst
+++ b/doc/radosgw/multisite.rst
@@ -507,7 +507,7 @@ For example:
 Updating the Period
 -------------------
 
-After updating the master zone configuration, update the period:
+After updating the secondary zone configuration, update the period:
 
 .. prompt:: bash #
 
diff --git a/doc/radosgw/uadk-accel.rst b/doc/radosgw/uadk-accel.rst
new file mode 100644
index 0000000000000..fdf99f891f0a7
--- /dev/null
+++ b/doc/radosgw/uadk-accel.rst
@@ -0,0 +1,132 @@
+===============================================
+UADK Acceleration for Compression
+===============================================
+
+UADK is a framework for applications to access hardware accelerators in a
+unified, secure, and efficient way. UADK is comprised of UACCE, libwd and many
+other algorithm libraries.
+
+See `Compressor UADK Support`_.
+
+
+UADK in the Software Stack
+==========================
+
+UADK is a general-purpose user space accelerator framework that uses shared
+virtual addressing (SVA) to provide a unified programming interface for hardware
+acceleration of cryptographic and compression algorithms.
+
+UADK includes Unified/User-space-access-intended Accelerator Framework (UACCE),
+which enables hardware accelerators that support SVA to adapt to UADK.
+
+Currently, HiSilicon Kunpeng hardware accelerators have been registered with
+UACCE. Through the UADK framework, users can run cryptographic and compression
+algorithms using hardware accelerators instead of CPUs, freeing up CPU computing
+power and improving computing performance.
+
+A user can access the hardware accelerators by performing user-mode operations on
+the character devices, or the use of UADK can be done via frameworks that have
+been enabled by others including UADK support (for example, OpenSSL* libcrypto*,
+DPDK, and the Linux* Kernel Crypto Framework).
+
+See `OpenSSL UADK Engine`_.
+
+UADK Environment Setup
+======================
+UADK consists of UACCE, vendors’ drivers, and an algorithm layer. UADK requires the
+hardware accelerator to support SVA, and the operating system to support IOMMU and
+SVA. Hardware accelerators from different vendors are registered as different character
+devices with UACCE by using kernel-mode drivers of the vendors.
+
+::
+
+          +----------------------------------+
+          |                apps              |
+          +----+------------------------+----+
+               |                        |
+               |                        |
+       +-------+--------+       +-------+-------+
+       |   scheduler    |       | alg libraries |
+       +-------+--------+       +-------+-------+
+               |                         |
+               |                         |
+               |                         |
+               |                +--------+------+
+               |                | vendor drivers|
+               |                +-+-------------+
+               |                  |
+               |                  |
+            +--+------------------+--+
+            |         libwd          |
+    User    +----+-------------+-----+
+    --------------------------------------------------
+    Kernel    +--+-----+   +------+
+              | uacce  |   | smmu |
+              +---+----+   +------+
+                  |
+              +---+------------------+
+              | vendor kernel driver |
+              +----------------------+
+    --------------------------------------------------
+             +----------------------+
+             |   HW Accelerators    |
+             +----------------------+
+
+Configuration
+=============
+
+#. Kernel Requirement
+
+User needs to make sure that UACCE is already supported in Linux kernel. The kernel version
+should be at least v5.9 with SVA (Shared Virtual Addressing) enabled.
+
+UACCE may be built as a module or built into the kernel. Here's an example to build UACCE
+with hardware accelerators for the HiSilicon Kunpeng platform.
+
+    .. prompt:: bash $
+
+       CONFIG_IOMMU_SVA_LIB=y
+       CONFIG_ARM_SMMU=y
+       CONFIG_ARM_SMMU_V3=y
+       CONFIG_ARM_SMMU_V3_SVA=y
+       CONFIG_PCI_PASID=y
+       CONFIG_UACCE=y
+       CONFIG_CRYPTO_DEV_HISI_QM=y
+       CONFIG_CRYPTO_DEV_HISI_ZIP=y
+
+Make sure all these above kernel configurations are selected.
+
+#. UADK enablement
+If the architecture is aarch64, it will automatically download the UADK source code to build
+the static library. If it runs on other architecture, user can enable it with build parameters
+`-DWITH_UADK=true`
+
+#. Manual Build UADK
+As the above paragraph shows, the UADK is enabled automatically, no need to build manually.
+For developer who is interested in UADK, you can refer to the below steps for building.
+
+   .. prompt:: bash $ 
+
+      git clone https://github.com/Linaro/uadk.git
+      cd uadk
+      mkdir build
+      ./autogen.sh
+      ./configure --prefix=$PWD/build
+      make
+      make install
+
+   .. note:: Without –prefix, UADK will be installed to /usr/local/lib by
+             default. If get error:"cannot find -lnuma", please install 
+             the `libnuma-dev`.
+
+#. Configure
+
+   Edit the Ceph configuration file (usually ``ceph.conf``) to enable UADK
+   support for *zlib* compression::
+
+         uadk_compressor_enabled=true
+
+   The default value in `global.yaml.in` for `uadk_compressor_enabled` is false.
+
+.. _Compressor UADK Support: https://github.com/ceph/ceph/pull/58336
+.. _OpenSSL UADK Engine: https://github.com/Linaro/uadk_engine
diff --git a/qa/README b/qa/README
index f9b8988c6f9f6..a6a95c479bc9c 100644
--- a/qa/README
+++ b/qa/README
@@ -83,3 +83,8 @@ supported_distros as distros$ will be run just once: either on centos, rhel or
 ubuntu, chosen randomly.
 
 The teuthology code can be found in https://github.com/ceph/teuthology.git
+
+Note: The performance suites clone CBT from master here: https://github.com/ceph/cbt.git
+CBT will not support cosbench beyond release tag v0.3, therefore no qa suite should use cosbench.
+cosbench support has been removed from qa/tasks/cbt.py.
+
diff --git a/qa/cephfs/overrides/ignorelist_health.yaml b/qa/cephfs/overrides/ignorelist_health.yaml
index 678548fe2cc22..94b4257977759 100644
--- a/qa/cephfs/overrides/ignorelist_health.yaml
+++ b/qa/cephfs/overrides/ignorelist_health.yaml
@@ -21,3 +21,6 @@ overrides:
       - overall HEALTH_
       - Replacing daemon
       - deprecated feature inline_data
+      - BLUESTORE_SLOW_OP_ALERT
+      - slow operation indications in BlueStore
+      - experiencing slow operations in BlueStore
diff --git a/qa/cephfs/overrides/pg_health.yaml b/qa/cephfs/overrides/pg_health.yaml
index 1740134a2e01b..07ca62e01fbec 100644
--- a/qa/cephfs/overrides/pg_health.yaml
+++ b/qa/cephfs/overrides/pg_health.yaml
@@ -9,3 +9,5 @@ overrides:
       - PG_DEGRADED
       - Reduced data availability
       - Degraded data redundancy
+      - pg .* is stuck inactive
+      - pg .* is .*degraded
diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh
index 4eac1106e8d3a..843e9b9901b9c 100755
--- a/qa/standalone/scrub/osd-recovery-scrub.sh
+++ b/qa/standalone/scrub/osd-recovery-scrub.sh
@@ -234,146 +234,6 @@ function wait_background_check() {
     return $return_code
 }
 
-# osd_scrub_during_recovery=true make sure scrub happens
-# update 26.8.24: the test should be redesigned. The current version is not
-# reliable, and playing around with the timeouts and such won't fix the
-# design issues.
-function TEST_recovery_scrub_2() {
-    local dir=$1
-    local poolname=test
-    return 0
-
-    TESTDATA="testdata.$$"
-    OSDS=8
-    PGS=32
-    OBJECTS=40
-
-    setup $dir || return 1
-    run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
-    run_mgr $dir x --mgr_stats_period=1 || return 1
-    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0.1 "
-    ceph_osd_args+="--osd_scrub_backoff_ratio=0 "
-    ceph_osd_args+="--osd_stats_update_period_not_scrubbing=3 "
-    ceph_osd_args+="--osd_stats_update_period_scrubbing=2 "
-    ceph_osd_args+="--mgr_stats_period=1"
-    for osd in $(seq 0 $(expr $OSDS - 1))
-    do
-        run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=1 \
-                          $ceph_osd_args || return 1
-    done
-
-    # Create a pool with $PGS pgs
-    create_pool $poolname $PGS $PGS
-    wait_for_clean || return 1
-    poolid=$(ceph osd dump | grep "^pool.*[']test[']" | awk '{ print $2 }')
-
-    dd if=/dev/urandom of=$TESTDATA bs=1M count=50
-    for i in $(seq 1 $OBJECTS)
-    do
-        rados -p $poolname put obj${i} $TESTDATA
-    done
-    rm -f $TESTDATA
-
-    ceph osd pool set $poolname size 3
-
-    ceph pg dump pgs
-
-    # note that the following will be needed if the mclock scheduler is specified
-    ceph tell osd.* config get osd_mclock_override_recovery_settings
-
-    # the '_max_active' is expected to be 0
-    ceph tell osd.1 config get osd_recovery_max_active
-    # both next parameters are expected to be >=3
-    ceph tell osd.1 config set osd_recovery_max_active_hdd 6
-    ceph tell osd.1 config set osd_recovery_max_active_ssd 6
-    ceph tell osd.1 config get osd_recovery_max_active_hdd
-    ceph tell osd.1 config get osd_recovery_max_active_ssd
-
-    # Wait for recovery to start
-    count=0
-    while(true)
-    do
-      #ceph --format json pg dump pgs | jq '.pg_stats | [.[].state]'
-      ceph pg dump pgs
-      if test $(ceph --format json pg dump pgs |
-	      jq '.pg_stats | [.[].state]'| grep recovering | wc -l) -ge 2
-      then
-        break
-      fi
-      sleep 2
-      if test "$count" -eq "10"
-      then
-        echo "Not enough recovery started simultaneously"
-        return 1
-      fi
-      count=$(expr $count + 1)
-    done
-    ceph pg dump pgs
-
-    pids=""
-    recov_scrub_count=0
-    for pg in $(seq 0 $(expr $PGS - 1))
-    do
-        run_in_background pids pg_scrub_mod $poolid.$(printf "%x" $pg)
-    done
-    wait_background_check pids
-    return_code=$?
-    if [ $return_code -ne 0 ]; then return $return_code; fi
-
-    ERRORS=0
-    if test $recov_scrub_count -eq 0
-    then
-      echo "No scrubs occurred while PG recovering"
-      ERRORS=$(expr $ERRORS + 1)
-    fi
-
-    pidfile=$(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid')
-    pid=$(cat $pidfile)
-    if ! kill -0 $pid
-    then
-        echo "OSD crash occurred"
-        #tail -100 $dir/osd.0.log
-        ERRORS=$(expr $ERRORS + 1)
-    fi
-
-    # Work around for http://tracker.ceph.com/issues/38195
-    kill_daemons $dir #|| return 1
-
-    declare -a err_strings
-    ## we do not expect a refusal to scrub
-    err_strings[0]="recovery in progress.*scrubs"
-    for osd in $(seq 0 $(expr $OSDS - 1))
-    do
-        grep "recovery in progress.*scrubs" $dir/osd.${osd}.log
-    done
-    for err_string in "${err_strings[@]}"
-    do
-        found=false
-        for osd in $(seq 0 $(expr $OSDS - 1))
-        do
-            if grep "$err_string" $dir/osd.${osd}.log > /dev/null;
-            then
-                found=true
-            fi
-        done
-        if [ "$found" = "true" ]; then
-            echo "Found log message not expected '$err_string'"
-	    ERRORS=$(expr $ERRORS + 1)
-        fi
-    done
-
-    teardown $dir || return 1
-
-    if [ $ERRORS != "0" ];
-    then
-        echo "TEST FAILED WITH $ERRORS ERRORS"
-        return 1
-    fi
-
-    echo "TEST PASSED"
-    return 0
-}
-
 main osd-recovery-scrub "$@"
 
 # Local Variables:
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
index 59564f7e37e28..491e46603f72e 100755
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -442,7 +442,6 @@ function TEST_auto_repair_bluestore_basic() {
         ['pool_name']="testpool"
         ['extras']=" --osd_scrub_auto_repair=true"
     )
-    local extr_dbg=3
     standard_scrub_cluster $dir cluster_conf
     local poolid=${cluster_conf['pool_id']}
     local poolname=${cluster_conf['pool_name']}
@@ -6252,6 +6251,254 @@ function TEST_request_scrub_priority() {
     grep "log_channel.*scrub ok" $dir/osd.${primary}.log | grep -v purged_snaps | head -1 | sed 's/.*[[]DBG[]]//' | grep -q $pg || return 1
 }
 
+#
+# Testing the "split scrub store" feature: shallow scrubs do not
+# purge deep errors from the store.
+#
+# Corrupt one copy of a replicated pool, creating both shallow and deep errors.
+# Then shallow-scrub the pool and verify that the deep errors are still present.
+#
+function TEST_dual_store_replicated_cluster() {
+    local dir=$1
+    local poolname=csr_pool
+    local total_objs=19
+    local extr_dbg=1 # note: 3 and above leave some temp files around
+
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x --mgr_stats_period=1 || return 1
+    local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 --osd-deep-scrub-randomize-ratio=0 "
+    ceph_osd_args+="--osd_scrub_backoff_ratio=0 --osd_stats_update_period_not_scrubbing=3 "
+    ceph_osd_args+="--osd_stats_update_period_scrubbing=2 --osd_op_queue=wpq --osd_scrub_auto_repair=0 "
+    for osd in $(seq 0 1)
+    do
+      run_osd $dir $osd $ceph_osd_args || return 1
+    done
+
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_pool foo 1 || return 1
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    ceph osd pool set $poolname noscrub 1
+    ceph osd pool set $poolname nodeep-scrub 1
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+        add_something $dir $poolname $objname || return 1
+
+        rados --pool $poolname setomapheader $objname hdr-$objname || return 1
+        rados --pool $poolname setomapval $objname key-$objname val-$objname || return 1
+    done
+
+    # Increase file 1 MB + 1KB
+    dd if=/dev/zero of=$dir/new.ROBJ19 bs=1024 count=1025
+    rados --pool $poolname put $objname $dir/new.ROBJ19 || return 1
+    rm -f $dir/new.ROBJ19
+
+    local pg=$(get_pg $poolname ROBJ0)
+    local primary=$(get_primary $poolname ROBJ0)
+
+    # Compute an old omap digest and save oi
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) \
+        config set osd_deep_scrub_update_digest_min_age 0
+    CEPH_ARGS='' ceph daemon $(get_asok_path osd.1) \
+        config set osd_deep_scrub_update_digest_min_age 0
+    pg_deep_scrub $pg
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+
+        # Alternate corruption between osd.0 and osd.1
+        local osd=$(expr $i % 2)
+
+        case $i in
+        1)
+            # Size (deep scrub data_digest too)
+            local payload=UVWXYZZZ
+            echo $payload > $dir/CORRUPT
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        2)
+            # digest (deep scrub only)
+            local payload=UVWXYZ
+            echo $payload > $dir/CORRUPT
+            objectstore_tool $dir $osd $objname set-bytes $dir/CORRUPT || return 1
+            ;;
+
+        3)
+             # missing
+             objectstore_tool $dir $osd $objname remove || return 1
+             ;;
+
+         4)
+             # Modify omap value (deep scrub only)
+             objectstore_tool $dir $osd $objname set-omap key-$objname $dir/CORRUPT || return 1
+             ;;
+
+         5)
+            # Delete omap key (deep scrub only)
+            objectstore_tool $dir $osd $objname rm-omap key-$objname || return 1
+            ;;
+
+         6)
+            # Add extra omap key (deep scrub only)
+            echo extra > $dir/extra-val
+            objectstore_tool $dir $osd $objname set-omap key2-$objname $dir/extra-val || return 1
+            rm $dir/extra-val
+            ;;
+
+         7)
+            # Modify omap header (deep scrub only)
+            echo -n newheader > $dir/hdr
+            objectstore_tool $dir $osd $objname set-omaphdr $dir/hdr || return 1
+            rm $dir/hdr
+            ;;
+
+         8)
+            rados --pool $poolname setxattr $objname key1-$objname val1-$objname || return 1
+            rados --pool $poolname setxattr $objname key2-$objname val2-$objname || return 1
+
+            # Break xattrs
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir $osd $objname set-attr _key1-$objname $dir/bad-val || return 1
+            objectstore_tool $dir $osd $objname rm-attr _key2-$objname || return 1
+            echo -n val3-$objname > $dir/newval
+            objectstore_tool $dir $osd $objname set-attr _key3-$objname $dir/newval || return 1
+            rm $dir/bad-val $dir/newval
+            ;;
+
+        9)
+            objectstore_tool $dir $osd $objname get-attr _ > $dir/robj9-oi
+            echo -n D > $dir/change
+            rados --pool $poolname put $objname $dir/change
+            objectstore_tool $dir $osd $objname set-attr _ $dir/robj9-oi
+            rm $dir/oi $dir/change
+            ;;
+
+          # ROBJ10 must be handled after digests are re-computed by a deep scrub below
+          # ROBJ11 must be handled with config change before deep scrub
+          # ROBJ12 must be handled with config change before scrubs
+          # ROBJ13 must be handled before scrubs
+
+        14)
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir 0 $objname set-attr _ $dir/bad-val || return 1
+            objectstore_tool $dir 1 $objname rm-attr _ || return 1
+            rm $dir/bad-val
+            ;;
+
+        15)
+            objectstore_tool $dir $osd $objname rm-attr _ || return 1
+            ;;
+
+        16)
+            objectstore_tool $dir 0 $objname rm-attr snapset || return 1
+            echo -n bad-val > $dir/bad-val
+            objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
+	    ;;
+
+	17)
+	    # Deep-scrub only (all replicas are diffent than the object info
+           local payload=ROBJ17
+           echo $payload > $dir/new.ROBJ17
+	   objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1
+	   objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1
+	   ;;
+
+	18)
+	    # Deep-scrub only (all replicas are diffent than the object info
+           local payload=ROBJ18
+           echo $payload > $dir/new.ROBJ18
+	   objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1
+	   objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1
+	   # Make one replica have a different object info, so a full repair must happen too
+	   objectstore_tool $dir $osd $objname corrupt-info || return 1
+	   ;;
+
+	19)
+	   # Set osd-max-object-size smaller than this object's size
+
+        esac
+    done
+
+    local pg=$(get_pg $poolname ROBJ0)
+
+    ceph tell osd.\* injectargs -- --osd-max-object-size=1048576
+
+    inject_eio rep data $poolname ROBJ11 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+    inject_eio rep mdata $poolname ROBJ12 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+    inject_eio rep data $poolname ROBJ13 $dir 0 || return 1 # shard 0 of [1, 0], osd.1
+
+    # first sequence: the final shallow scrub should not override any of the deep errors
+    pg_scrub $pg
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.'  > /tmp/WQR_1.json
+    pg_scrub $pg
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.'  > /tmp/WQR_1b.json
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/sh1_results.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > /tmp/WQR_1b_s.json
+
+    pg_deep_scrub $pg
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.'  > /tmp/WQR_2.json
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/dp_results.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > /tmp/WQR_2s.json
+
+    pg_scrub $pg
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.'  > /tmp/WQR_3.json
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > $dir/sh2_results.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > /tmp/WQR_3s.json
+
+    diff -u $dir/dp_results.json $dir/sh2_results.json || return 1
+
+    # inject a read error, which is a special case: the scrub encountering the read error
+    # would override the previously collected shard info.
+    inject_eio rep mdata $poolname ROBJ13 $dir 1 || return 1 # shard 1 of [1, 0], osd.0
+
+    pg_deep_scrub $pg
+
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.'  > /tmp/WQR_4.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > /tmp/WQR_4s_w13.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | \
+        jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
+        jq '.inconsistents' | python3 -c "$sortkeys" > /tmp/WQR_4s_wo13.json
+
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > $dir/dpPart2_w13_results.json
+    # Remove the entry with "name":"ROBJ13" from the $dir/d*_results.json
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' | \
+        jq '.inconsistents' | python3 -c "$sortkeys" > $dir/dpPart2_wo13_results.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > /tmp/WQR_4s.json
+
+    pg_scrub $pg
+
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | python3 -c "$sortkeys" | jq '.'  > /tmp/WQR_5.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | \
+        python3 -c "$sortkeys" > /tmp/WQR_5s_w13.json
+    (( extr_dbg >= 3 )) && rados list-inconsistent-obj $pg | jq "$jqfilter" | \
+        jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' |\
+        jq '.inconsistents' | python3 -c "$sortkeys" > /tmp/WQR_5s_wo13.json
+
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq '.inconsistents' | python3 -c "$sortkeys" > \
+        $dir/sh2Part2_w13_results.json
+    rados list-inconsistent-obj $pg | jq "$jqfilter" | jq 'del(.inconsistents[] | select(.object.name == "ROBJ13"))' |\
+        jq '.inconsistents' | python3 -c "$sortkeys" > $dir/shPart2_wo13_results.json
+
+    # the shallow scrub results should differ from the results of the deep
+    # scrub preceding it, but the difference should be limited to ROBJ13
+    diff -u $dir/dpPart2_w13_results.json $dir/sh2Part2_w13_results.json && return 1
+    diff -u $dir/dpPart2_wo13_results.json $dir/shPart2_wo13_results.json || return 1
+
+    ceph osd pool rm $poolname $poolname --yes-i-really-really-mean-it
+    return 0
+}
+
 
 main osd-scrub-repair "$@"
 
diff --git a/qa/suites/crimson-rados/perf/deploy/ceph.yaml b/qa/suites/crimson-rados/perf/deploy/ceph.yaml
index 0f6021975a4a2..50d170f502272 100644
--- a/qa/suites/crimson-rados/perf/deploy/ceph.yaml
+++ b/qa/suites/crimson-rados/perf/deploy/ceph.yaml
@@ -10,3 +10,4 @@ tasks:
       osd:
         debug monc: 20
     flavor: crimson
+- ssh_keys:
diff --git a/qa/suites/fs/libcephfs/tasks/client.yaml b/qa/suites/fs/libcephfs/tasks/client.yaml
index da84137322069..42ca9336c8e7d 100644
--- a/qa/suites/fs/libcephfs/tasks/client.yaml
+++ b/qa/suites/fs/libcephfs/tasks/client.yaml
@@ -12,3 +12,4 @@ tasks:
     clients:
       client.0:
         - client/test.sh
+        - client/test_oc_disabled.sh
diff --git a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml
index 713adb9628ab6..96e4353e99c7a 100644
--- a/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml
+++ b/qa/suites/fs/upgrade/mds_upgrade_sequence/overrides/ignorelist_upgrade.yaml
@@ -2,3 +2,4 @@ overrides:
   ceph:
     log-ignorelist:
       - OSD_DOWN
+      - osd.*is down
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
index 2e4741e814079..7c97edae552da 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_initiator.yaml
@@ -1,14 +1,14 @@
 tasks:
 - nvmeof:
     installer: host.a
-    gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
     rbd:
       pool_name: mypool
       image_name_prefix: myimage
     gateway_config:
       subsystems_count: 3
       namespaces_count: 20
-      cli_image: quay.io/ceph/nvmeof-cli:1.2
+      cli_image: quay.io/ceph/nvmeof-cli:latest
 
 - cephadm.wait_for_service:
     service: nvmeof.mypool.mygroup0
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
index 2e873a04bab2a..9ef3700442717 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_namespaces.yaml
@@ -1,14 +1,14 @@
 tasks:
 - nvmeof:
     installer: host.a
-    gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
     rbd:
       pool_name: mypool
       image_name_prefix: myimage
     gateway_config:
       subsystems_count: 3
       namespaces_count: 20
-      cli_image: quay.io/ceph/nvmeof-cli:1.2
+      cli_image: quay.io/ceph/nvmeof-cli:latest
 
 - cephadm.wait_for_service:
     service: nvmeof.mypool.mygroup0
diff --git a/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml b/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
index 83d16e4cb2c9a..12cb50b408d49 100644
--- a/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
+++ b/qa/suites/nvmeof/basic/workloads/nvmeof_scalability.yaml
@@ -1,14 +1,14 @@
 tasks:
 - nvmeof:
     installer: host.a
-    gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
     rbd:
       pool_name: mypool
       image_name_prefix: myimage
     gateway_config:
       subsystems_count: 3
       namespaces_count: 20
-      cli_image: quay.io/ceph/nvmeof-cli:1.2
+      cli_image: quay.io/ceph/nvmeof-cli:latest
 
 - cephadm.wait_for_service:
     service: nvmeof.mypool.mygroup0
diff --git a/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml b/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml
index 6db0c0d4e1829..b4755a6433b0a 100644
--- a/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml
+++ b/qa/suites/nvmeof/thrash/gateway-initiator-setup/3-subsys-60-namespace.yaml
@@ -1,14 +1,14 @@
 tasks:
 - nvmeof:
     installer: host.a
-    gw_image: quay.io/ceph/nvmeof:1.2 # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
+    gw_image: quay.io/ceph/nvmeof:latest # "default" is the image cephadm defaults to; change to test specific nvmeof images, example "latest"
     rbd:
       pool_name: mypool
       image_name_prefix: myimage
     gateway_config:
       subsystems_count: 3
       namespaces_count: 20 # each subsystem
-      cli_image: quay.io/ceph/nvmeof-cli:1.2
+      cli_image: quay.io/ceph/nvmeof-cli:latest
 
 - cephadm.wait_for_service:
     service: nvmeof.mypool.mygroup0
diff --git a/qa/suites/orch/cephadm/workunits/task/test_mgmt_gateway.yaml b/qa/suites/orch/cephadm/workunits/task/test_mgmt_gateway.yaml
new file mode 100644
index 0000000000000..5207fd415b7e6
--- /dev/null
+++ b/qa/suites/orch/cephadm/workunits/task/test_mgmt_gateway.yaml
@@ -0,0 +1,77 @@
+overrides:
+  ceph:
+    log-ignorelist:
+      - CEPHADM_FAILED_DAEMON
+    log-only-match:
+      - CEPHADM_
+roles:
+- - host.a
+  - mon.a
+  - mgr.a
+  - osd.0
+- - host.b
+  - mon.b
+  - mgr.b
+  - osd.1
+- - host.c
+  - mon.c
+  - osd.2
+tasks:
+- install:
+- cephadm:
+- cephadm.shell:
+    host.c:
+      - |
+        set -ex
+        # Deploy monitoring stack
+        ceph orch apply node-exporter
+        ceph orch apply grafana
+        ceph orch apply alertmanager
+        ceph orch apply prometheus
+        sleep 240
+        # generate SSL certificate
+        openssl req -x509 -newkey rsa:4096 -keyout /tmp/key.pem -out /tmp/cert.pem -sha256 -days 30 -nodes -subj "/CN=*"
+        # Generate a mgmt.spec template
+        cat << EOT > /tmp/mgmt.spec
+        service_type: mgmt-gateway
+        service_id: foo
+        placement:
+          hosts:
+            - ${HOSTNAME}
+        spec:
+          ssl_protocols:
+            - TLSv1.2
+            - TLSv1.3
+          ssl_ciphers:
+            - AES128-SHA
+            - AES256-SHA
+          enable_health_check_endpoint: True
+        EOT
+        # Add generated certificates to spec file
+        echo "  ssl_certificate: |" >> /tmp/mgmt.spec 
+        while read LINE; do echo $LINE | sed -e "s/^/    /"; done < /tmp/cert.pem >> /tmp/mgmt.spec
+        echo "  ssl_certificate_key: |" >> /tmp/mgmt.spec
+        while read LINE; do echo $LINE | sed -e "s/^/    /"; done < /tmp/key.pem >> /tmp/mgmt.spec
+        # Apply spec
+        ceph orch apply -i /tmp/mgmt.spec
+- cephadm.wait_for_service:
+    service: mgmt-gateway
+- cephadm.shell:
+    host.a:
+      - |
+        set -ex
+        # retrieve mgmt hostname and ip
+        MGMT_GTW_HOST=$(ceph orch ps --daemon-type mgmt-gateway -f json | jq -e '.[]' | jq -r '.hostname')
+        MGMT_GTW_IP=$(ceph orch host ls -f json | jq -r --arg MGMT_GTW_HOST "$MGMT_GTW_HOST" '.[] | select(.hostname==$MGMT_GTW_HOST) | .addr')
+        # check mgmt-gateway health
+        curl -k -s https://${MGMT_GTW_IP}/health
+        curl -k -s https://${MGMT_GTW_IP}:29443/health
+        # wait for background services to be reconfigured following mgmt-gateway installation
+        sleep 180
+        # check grafana endpoints are responsive and database health is okay
+        curl -k -s https://${MGMT_GTW_IP}/grafana/api/health | jq -e '.database == "ok"'
+        # check prometheus endpoints are responsive
+        curl -k -s -u admin:admin https://${MGMT_GTW_IP}/prometheus/api/v1/status/config | jq -e '.status == "success"'
+        # check alertmanager endpoints are responsive
+        curl -k -s -u admin:admin https://${MGMT_GTW_IP}/alertmanager/api/v2/status
+
diff --git a/qa/suites/upgrade/reef-x/parallel/0-start.yaml b/qa/suites/upgrade/reef-x/parallel/0-start.yaml
index 3814ea3efdb50..146bd57960dad 100644
--- a/qa/suites/upgrade/reef-x/parallel/0-start.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/0-start.yaml
@@ -31,3 +31,5 @@ overrides:
     conf:
       osd:
         osd shutdown pgref assert: true
+    log-ignorelist:
+        - PG_DEGRADED
diff --git a/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml b/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
index bf3005fad458f..ce4e0cc228bba 100644
--- a/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
+++ b/qa/suites/upgrade/reef-x/parallel/1-tasks.yaml
@@ -6,6 +6,7 @@ overrides:
       - MON_DOWN
       - out of quorum
       - PG_AVAILABILITY
+      - PG_DEGRADED
 tasks:
 - install:
     branch: reef
diff --git a/qa/tasks/cbt.py b/qa/tasks/cbt.py
index 84e096520b40f..e6a9dc8223cf8 100644
--- a/qa/tasks/cbt.py
+++ b/qa/tasks/cbt.py
@@ -47,22 +47,11 @@ def generate_cbt_config(self):
 
         benchmark_config = self.config.get('benchmarks')
         benchmark_type = next(iter(benchmark_config.keys()))
+  
         if benchmark_type in ['librbdfio', 'fio']:
           testdir = misc.get_testdir(self.ctx)
           benchmark_config[benchmark_type]['cmd_path'] = os.path.join(testdir, 'fio/fio')
-        if benchmark_type == 'cosbench':
-            # create cosbench_dir and cosbench_xml_dir
-            testdir = misc.get_testdir(self.ctx)
-            benchmark_config['cosbench']['cosbench_dir'] = os.path.join(testdir, 'cos')
-            benchmark_config['cosbench']['cosbench_xml_dir'] = os.path.join(testdir, 'xml')
-            self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', benchmark_config['cosbench']['cosbench_xml_dir']])
-            benchmark_config['cosbench']['controller'] = osd_hosts[0]
-
-            # set auth details
-            remotes_and_roles = self.ctx.cluster.remotes.items()
-            ips = [host for (host, port) in
-                   (remote.ssh.get_transport().getpeername() for (remote, role_list) in remotes_and_roles)]
-            benchmark_config['cosbench']['auth'] = "username=cosbench:operator;password=intel2012;url=http://%s:80/auth/v1.0;retry=9" %(ips[0])
+  
         client_endpoints_config = self.config.get('client_endpoints', None)
         monitoring_profiles = self.config.get('monitoring_profiles', {})
 
@@ -117,77 +106,6 @@ def install_dependencies(self):
                 ]
             )
 
-        if benchmark_type == 'cosbench':
-            # install cosbench
-            self.log.info('install dependencies for cosbench')
-            if system_type == 'rpm':
-                cosbench_depends = ['wget', 'unzip', 'java-1.7.0-openjdk', 'curl']
-            else:
-                cosbench_depends = ['wget', 'unzip', 'openjdk-8-jre', 'curl']
-            self.first_mon.run(args=install_cmd + cosbench_depends)
-            testdir = misc.get_testdir(self.ctx)
-            cosbench_version = '0.4.2.c3'
-            cosbench_location = 'https://github.com/intel-cloud/cosbench/releases/download/v0.4.2.c3/0.4.2.c3.zip'
-            os_version = misc.get_system_type(self.first_mon, False, True)
-
-            # additional requirements for bionic
-            if os_version == '18.04':
-                self.first_mon.run(
-                    args=['sudo', 'apt-get', '-y', 'purge', 'openjdk-11*'])
-                # use our own version of cosbench
-                cosbench_version = 'cosbench-0.4.2.c3.1'
-                # contains additional parameter "-N" to nc
-                cosbench_location = 'http://drop.ceph.com/qa/cosbench-0.4.2.c3.1.zip'
-                cosbench_dir = os.path.join(testdir, cosbench_version)
-                self.ctx.cluster.run(args=['mkdir', '-p', '-m0755', '--', cosbench_dir])
-                self.first_mon.run(
-                    args=[
-                        'cd', testdir, run.Raw('&&'),
-                        'wget',
-                        cosbench_location, run.Raw('&&'),
-                        'unzip', '{name}.zip'.format(name=cosbench_version), '-d', cosbench_version
-                    ]
-                )
-            else:
-                self.first_mon.run(
-                    args=[
-                        'cd', testdir, run.Raw('&&'),
-                        'wget',
-                        cosbench_location, run.Raw('&&'),
-                        'unzip', '{name}.zip'.format(name=cosbench_version)
-                    ]
-                )
-            self.first_mon.run(
-                args=[
-                    'cd', testdir, run.Raw('&&'),
-                    'ln', '-s', cosbench_version, 'cos',
-                ]
-            )
-            self.first_mon.run(
-                args=[
-                    'cd', os.path.join(testdir, 'cos'), run.Raw('&&'),
-                    'chmod', '+x', run.Raw('*.sh'),
-                ]
-            )
-
-            # start cosbench and check info
-            self.log.info('start cosbench')
-            self.first_mon.run(
-                args=[
-                    'cd', testdir, run.Raw('&&'),
-                    'cd', 'cos', run.Raw('&&'),
-                    'sh', 'start-all.sh'
-                ]
-            )
-            self.log.info('check cosbench info')
-            self.first_mon.run(
-                args=[
-                    'cd', testdir, run.Raw('&&'),
-                    'cd', 'cos', run.Raw('&&'),
-                    'sh', 'cli.sh', 'info'
-                ]
-            )
-
     def checkout_cbt(self):
         testdir = misc.get_testdir(self.ctx)
         repo = self.config.get('repo', 'https://github.com/ceph/cbt.git')
@@ -269,51 +187,6 @@ def end(self):
                 ]
             )
 
-        if benchmark_type == 'cosbench':
-            os_version = misc.get_system_type(self.first_mon, False, True)
-            if os_version == '18.04':
-                cosbench_version = 'cosbench-0.4.2.c3.1'
-            else:
-                cosbench_version = '0.4.2.c3'
-            # note: stop-all requires 'nc'
-            self.first_mon.run(
-                args=[
-                    'cd', testdir, run.Raw('&&'),
-                    'cd', 'cos', run.Raw('&&'),
-                    'sh', 'stop-all.sh',
-                    run.Raw('||'), 'true'
-                ]
-            )
-            self.first_mon.run(
-                args=[
-                    'sudo', 'killall', '-9', 'java',
-                    run.Raw('||'), 'true'
-                ]
-            )
-            self.first_mon.run(
-                args=[
-                    'rm', '--one-file-system', '-rf', '--',
-                    '{tdir}/cos'.format(tdir=testdir),
-                ]
-            )
-            self.first_mon.run(
-                args=[
-                    'rm', '--one-file-system', '-rf', '--',
-                    '{tdir}/{version}'.format(tdir=testdir, version=cosbench_version),
-                ]
-            )
-            self.first_mon.run(
-                args=[
-                    'rm', '--one-file-system', '-rf', '--',
-                    '{tdir}/{version}.zip'.format(tdir=testdir, version=cosbench_version),
-                ]
-            )
-            self.first_mon.run(
-                args=[
-                    'rm', '--one-file-system', '-rf', '--',
-                    '{tdir}/xml'.format(tdir=testdir),
-                ]
-            )
         # Collect cbt performance data
         cbt_performance = CBTperformance()
         cbt_performance.collect(self.ctx, self.config)
diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py
index 1c00a49077dff..2b7fd2ee56945 100644
--- a/qa/tasks/cephfs/filesystem.py
+++ b/qa/tasks/cephfs/filesystem.py
@@ -640,8 +640,11 @@ def set_down(self, down=True):
     def set_joinable(self, joinable=True):
         self.set_var("joinable", joinable)
 
-    def set_max_mds(self, max_mds):
-        self.set_var("max_mds", "%d" % max_mds)
+    def set_max_mds(self, max_mds, confirm=True):
+        if confirm:
+            self.set_var('max_mds', f'{max_mds}', '--yes-i-really-mean-it')
+        else:
+            self.set_var("max_mds", f"{max_mds}",)
 
     def set_session_timeout(self, timeout):
         self.set_var("session_timeout", "%d" % timeout)
diff --git a/qa/tasks/cephfs/test_admin.py b/qa/tasks/cephfs/test_admin.py
index 6a583cb4d0f2d..00a68dd0183fd 100644
--- a/qa/tasks/cephfs/test_admin.py
+++ b/qa/tasks/cephfs/test_admin.py
@@ -2683,3 +2683,60 @@ def test_with_health_warn_with_2_active_MDSs(self):
                               errmsgs=health_warn)
         self.run_ceph_cmd(f'mds fail {mds1_id} --yes-i-really-mean-it')
         self.run_ceph_cmd(f'mds fail {mds2_id} --yes-i-really-mean-it')
+
+
+class TestFSSetMaxMDS(TestAdminCommands):
+
+    def test_when_unhealthy_without_confirm(self):
+        '''
+        Test that command "ceph fs set <fsname> max_mds <num>" without the
+        confirmation flag (--yes-i-really-mean-it) fails when cluster is
+        unhealthy.
+        '''
+        self.gen_health_warn_mds_cache_oversized()
+
+        with self.assertRaises(CommandFailedError) as cfe:
+            self.fs.set_max_mds(2, confirm=False)
+        self.assertEqual(cfe.exception.exitstatus, errno.EPERM)
+
+    def test_when_unhealthy_with_confirm(self):
+        '''
+        Test that command "ceph fs set <fsname> max_mds <num>
+        --yes-i-really-mean-it" runs successfully when cluster is unhealthy.
+        '''
+        self.gen_health_warn_mds_cache_oversized()
+
+        self.fs.set_max_mds(2, confirm=True)
+        self.assertEqual(self.fs.get_var('max_mds'), 2)
+
+    def test_when_mds_trim_without_confirm(self):
+        '''
+        Test that command "ceph fs set <fsname> max_mds <num>" without the
+        confirmation flag (--yes-i-really-mean-it) fails when cluster has
+        MDS_TRIM health warning.
+        '''
+        self.gen_health_warn_mds_trim()
+
+        with self.assertRaises(CommandFailedError) as cfe:
+            self.fs.set_max_mds(2, confirm=False)
+        self.assertEqual(cfe.exception.exitstatus, errno.EPERM)
+
+    def test_when_mds_trim_when_with_confirm(self):
+        '''
+        Test that command "ceph fs set <fsname> max_mds <num>
+        --yes-i-really-mean-it" runs successfully when cluster has MDS_TRIM
+        health warning.
+        '''
+        self.gen_health_warn_mds_trim()
+
+        self.fs.set_max_mds(2, confirm=True)
+        self.assertEqual(self.fs.get_var('max_mds'), 2)
+
+    def test_when_healthy_with_confirm(self):
+        '''
+        Test that command "ceph fs set <fsname> max_mds <num>
+        --yes-i-really-mean-it" runs successfully also when cluster is
+        healthy.
+        '''
+        self.fs.set_max_mds(2, confirm=True)
+        self.assertEqual(self.fs.get_var('max_mds'), 2)
diff --git a/qa/tasks/cephfs/test_nfs.py b/qa/tasks/cephfs/test_nfs.py
index 932d504d47f3e..19076ea44b3be 100644
--- a/qa/tasks/cephfs/test_nfs.py
+++ b/qa/tasks/cephfs/test_nfs.py
@@ -8,6 +8,7 @@
 from tasks.mgr.mgr_test_case import MgrTestCase
 from teuthology import contextutil
 from teuthology.exceptions import CommandFailedError
+from teuthology.orchestra.run import Raw
 
 log = logging.getLogger(__name__)
 
@@ -319,7 +320,7 @@ def _get_port_ip_info(self):
                     else:
                         log.warning(f'{e}, retrying')
 
-    def _test_mnt(self, pseudo_path, port, ip, check=True):
+    def _test_mnt(self, pseudo_path, port, ip, check=True, datarw=False):
         '''
         Test mounting of created exports
         :param pseudo_path: It is the pseudo root name
@@ -347,12 +348,27 @@ def _test_mnt(self, pseudo_path, port, ip, check=True):
         self.ctx.cluster.run(args=['sudo', 'chmod', '1777', '/mnt'])
 
         try:
+            # Clean up volumes directory created by subvolume create by some tests
+            self.ctx.cluster.run(args=['sudo', 'rm', '-rf', '/mnt/volumes'])
             self.ctx.cluster.run(args=['touch', '/mnt/test'])
             out_mnt = self._sys_cmd(['ls', '/mnt'])
             self.assertEqual(out_mnt,  b'test\n')
+            if datarw:
+              self.ctx.cluster.run(args=['echo', 'test data', Raw('|'), 'tee', '/mnt/test1'])
+              out_test1 = self._sys_cmd(['cat', '/mnt/test1'])
+              self.assertEqual(out_test1,  b'test data\n')
         finally:
             self.ctx.cluster.run(args=['sudo', 'umount', '/mnt'])
 
+    def _test_data_read_write(self, pseudo_path, port, ip):
+        '''
+        Check if read/write works fine
+        '''
+        try:
+            self._test_mnt(pseudo_path, port, ip, True, True)
+        except CommandFailedError as e:
+            self.fail(f"expected read/write of a file to be successful but failed with {e.exitstatus}")
+
     def _write_to_read_only_export(self, pseudo_path, port, ip):
         '''
         Check if write to read only export fails
@@ -599,6 +615,18 @@ def test_write_to_read_only_export(self):
         self._write_to_read_only_export(self.pseudo_path, port, ip)
         self._test_delete_cluster()
 
+    def test_data_read_write(self):
+        '''
+        Test date read and write on export.
+        '''
+        self._test_create_cluster()
+        self._create_export(export_id='1', create_fs=True,
+                            extra_cmd=['--pseudo-path', self.pseudo_path])
+        port, ip = self._get_port_ip_info()
+        self._check_nfs_cluster_status('running', 'NFS Ganesha cluster restart failed')
+        self._test_data_read_write(self.pseudo_path, port, ip)
+        self._test_delete_cluster()
+
     def test_cluster_info(self):
         '''
         Test cluster info outputs correct ip and hostname
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py
index 2baefd72c3fbc..2ee3b6ac052e1 100644
--- a/qa/tasks/cephfs/test_volumes.py
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -2388,7 +2388,7 @@ def test_subvolume_set_and_get_earmark(self):
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # set earmark
-        earmark = "smb.test"
+        earmark = "smb"
         self._fs_cmd("subvolume", "earmark", "set", self.volname, subvolume, "--earmark", earmark)
 
         # get earmark
@@ -2401,7 +2401,7 @@ def test_subvolume_clear_earmark(self):
         self._fs_cmd("subvolume", "create", self.volname, subvolume)
 
         # set earmark
-        earmark = "smb.test"
+        earmark = "smb"
         self._fs_cmd("subvolume", "earmark", "set", self.volname, subvolume, "--earmark", earmark)
 
         # remove earmark
@@ -2559,7 +2559,7 @@ def test_subvolume_info(self):
             self.assertIn(feature, subvol_info["features"], msg="expected feature '{0}' in subvolume".format(feature))
 
         # set earmark
-        earmark = "smb.test"
+        earmark = "smb"
         self._fs_cmd("subvolume", "earmark", "set", self.volname, subvolume, "--earmark", earmark)
 
         subvol_info = json.loads(self._get_subvolume_info(self.volname, subvolume))
@@ -7876,7 +7876,22 @@ def tearDown(self):
                 self.run_ceph_cmd('fs subvolume snapshot rm --force '
                                   f'--format json {v} {sv} {ss}')
 
-            self.run_ceph_cmd(f'fs subvolume rm {v} {sv}')
+            try:
+                self.run_ceph_cmd(f'fs subvolume rm {v} {sv}')
+            except CommandFailedError as e:
+                if e.exitstatus == errno.ENOENT:
+                    log.info(
+                        'ignoring this error, perhaps subvolume was deleted '
+                        'during the test and snapshot deleted above is a '
+                        'retained snapshot. when a retained snapshot (which is '
+                        'snapshot retained despite of subvolume deletion) is '
+                        'deleted, the subvolume directory is also deleted '
+                        'along. and before retained snapshot deletion, the '
+                        'subvolume is reported by "subvolume ls" command, which'
+                        'is what probably caused confusion here')
+                    pass
+                else:
+                    raise
 
         # verify trash dir is clean
         self._wait_for_trash_empty()
@@ -8090,6 +8105,58 @@ def test_clone_to_diff_group_and_less_than_cloner_threads(self):
         # and not cancelling these clone doesnt affect this test case.
         self.cancel_clones_and_ignore_if_finished(c)
 
+    def test_clone_after_subvol_is_removed(self):
+        '''
+        Initiate cloning after source subvolume has been deleted but with
+        snapshots retained and then test that, when this clone is in progress,
+        one progress bar is printed in output of command "ceph status" that
+        shows progress of this clone.
+        '''
+        v = self.volname
+        sv = 'sv1'
+        ss = 'ss1'
+        # XXX: "clone" must be part of clone name for sake of tearDown()
+        c = 'ss1clone1'
+
+        # XXX: without setting mds_snap_rstat to true rstats are not updated on
+        # a subvolume snapshot and therefore clone progress bar will not show
+        # any progress.
+        self.config_set('mds', 'mds_snap_rstat', 'true')
+
+        self.run_ceph_cmd(f'fs subvolume create {v} {sv} --mode=777')
+        size = self._do_subvolume_io(sv, None, None, 10, 1024)
+
+        self.run_ceph_cmd(f'fs subvolume snapshot create {v} {sv} {ss}')
+        self.wait_till_rbytes_is_right(v, sv, size)
+
+        self.run_ceph_cmd(f'fs subvolume rm {v} {sv} --retain-snapshots')
+        self.run_ceph_cmd(f'fs subvolume snapshot clone {v} {sv} {ss} {c}')
+
+        with safe_while(tries=15, sleep=10) as proceed:
+            while proceed():
+                pev = self.get_pevs_from_ceph_status(c)
+
+                if len(pev) < 1:
+                   continue
+                elif len(pev) > 1:
+                    raise RuntimeError('For 1 clone "ceph status" output has 2 '
+                                       'progress bars, it should have only 1 '
+                                       f'progress bar.\npev -\n{pev}')
+
+                # ensure that exactly 1 progress bar for cloning is present in
+                # "ceph status" output
+                msg = ('"progress_events" dict in "ceph status" output must have '
+                       f'exactly one entry.\nprogress_event dict -\n{pev}')
+                self.assertEqual(len(pev), 1, msg)
+
+                pev_msg = tuple(pev.values())[0]['message']
+                self.assertIn('1 ongoing clones', pev_msg)
+                break
+
+        # allowing clone jobs to finish will consume too much time and space
+        # and not cancelling these clone doesnt affect this test case.
+        self.cancel_clones_and_ignore_if_finished(c)
+
     def test_clones_equal_to_cloner_threads(self):
         '''
         Test that one progress bar is printed in output of "ceph status" output
diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py
index 07c69ddc47cb6..be7afccf33176 100644
--- a/qa/tasks/mgr/dashboard/test_osd.py
+++ b/qa/tasks/mgr/dashboard/test_osd.py
@@ -11,6 +11,7 @@
 class OsdTest(DashboardTestCase):
 
     AUTH_ROLES = ['cluster-manager']
+    _VERSION = '1.1'
 
     @classmethod
     def setUpClass(cls):
@@ -24,7 +25,7 @@ def tearDown(self):
 
     @DashboardTestCase.RunAs('test', 'test', ['block-manager'])
     def test_access_permissions(self):
-        self._get('/api/osd')
+        self._get('/api/osd', version=self._VERSION)
         self.assertStatus(403)
         self._get('/api/osd/0')
         self.assertStatus(403)
@@ -33,7 +34,7 @@ def assert_in_and_not_none(self, data, properties):
         self.assertSchema(data, JObj({p: JAny(none=False) for p in properties}, allow_unknown=True))
 
     def test_list(self):
-        data = self._get('/api/osd')
+        data = self._get('/api/osd', version=self._VERSION)
         self.assertStatus(200)
 
         self.assertGreaterEqual(len(data), 1)
diff --git a/qa/workunits/client/test_oc_disabled.sh b/qa/workunits/client/test_oc_disabled.sh
new file mode 100755
index 0000000000000..88552aa50bdc5
--- /dev/null
+++ b/qa/workunits/client/test_oc_disabled.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -ex
+
+ceph_test_client --client_oc=false
diff --git a/qa/workunits/nvmeof/setup_subsystem.sh b/qa/workunits/nvmeof/setup_subsystem.sh
index fb72e1d6402dd..cc4024323eb87 100755
--- a/qa/workunits/nvmeof/setup_subsystem.sh
+++ b/qa/workunits/nvmeof/setup_subsystem.sh
@@ -29,7 +29,7 @@ list_subsystems () {
 # add all subsystems
 for i in $(seq 1 $NVMEOF_SUBSYSTEMS_COUNT); do
     subsystem_nqn="${NVMEOF_SUBSYSTEMS_PREFIX}${i}"
-    sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $subsystem_nqn
+    sudo podman run -it $NVMEOF_CLI_IMAGE --server-address $NVMEOF_DEFAULT_GATEWAY_IP_ADDRESS --server-port $NVMEOF_SRPORT subsystem add --subsystem $subsystem_nqn --no-group-append
 done
 
 list_subsystems
diff --git a/src/ceph-volume/ceph_volume/__init__.py b/src/ceph-volume/ceph_volume/__init__.py
index b10100c02185a..814619cfdddb4 100644
--- a/src/ceph-volume/ceph_volume/__init__.py
+++ b/src/ceph-volume/ceph_volume/__init__.py
@@ -6,6 +6,7 @@
 sys_info = namedtuple('sys_info', ['devices'])
 sys_info.devices = dict()
 logger = logging.getLogger(__name__)
+BEING_REPLACED_HEADER: str = 'CEPH_DEVICE_BEING_REPLACED'
 
 
 class AllowLoopDevices:
diff --git a/src/ceph-volume/ceph_volume/api/lvm.py b/src/ceph-volume/ceph_volume/api/lvm.py
index 16cbc08b26254..fc376f891fd25 100644
--- a/src/ceph-volume/ceph_volume/api/lvm.py
+++ b/src/ceph-volume/ceph_volume/api/lvm.py
@@ -10,6 +10,8 @@
 from math import floor
 from ceph_volume import process, util, conf
 from ceph_volume.exceptions import SizeAllocationError
+from typing import Any, Dict
+
 
 logger = logging.getLogger(__name__)
 
@@ -807,13 +809,16 @@ def get_all_devices_vgs(name_prefix=''):
                    '--units=b', '--nosuffix']
 
 
-class Volume(object):
+class Volume:
     """
     Represents a Logical Volume from LVM, with some top-level attributes like
     ``lv_name`` and parsed tags as a dictionary of key/value pairs.
     """
 
-    def __init__(self, **kw):
+    def __init__(self, **kw: str) -> None:
+        self.lv_path: str = ''
+        self.lv_name: str = ''
+        self.lv_uuid: str = ''
         for k, v in kw.items():
             setattr(self, k, v)
         self.lv_api = kw
@@ -824,13 +829,13 @@ def __init__(self, **kw):
         self.encrypted = self.tags.get('ceph.encrypted', '0') == '1'
         self.used_by_ceph = 'ceph.osd_id' in self.tags
 
-    def __str__(self):
+    def __str__(self) -> str:
         return '<%s>' % self.lv_api['lv_path']
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return self.__str__()
 
-    def as_dict(self):
+    def as_dict(self) -> Dict[str, Any]:
         obj = {}
         obj.update(self.lv_api)
         obj['tags'] = self.tags
@@ -839,7 +844,7 @@ def as_dict(self):
         obj['path'] = self.lv_path
         return obj
 
-    def report(self):
+    def report(self) -> Dict[str, Any]:
         if not self.used_by_ceph:
             return {
                 'name': self.lv_name,
diff --git a/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
index c1bef82c10975..388f6aeea2708 100644
--- a/src/ceph-volume/ceph_volume/devices/lvm/zap.py
+++ b/src/ceph-volume/ceph_volume/devices/lvm/zap.py
@@ -5,12 +5,12 @@
 
 from textwrap import dedent
 
-from ceph_volume import decorators, terminal, process
+from ceph_volume import decorators, terminal, process, BEING_REPLACED_HEADER
 from ceph_volume.api import lvm as api
 from ceph_volume.util import system, encryption, disk, arg_validators, str_to_int, merge_dict
 from ceph_volume.util.device import Device
 from ceph_volume.systemd import systemctl
-from typing import List
+from typing import Any, Dict, List
 
 logger = logging.getLogger(__name__)
 mlogger = terminal.MultiLogger(__name__)
@@ -95,29 +95,29 @@ def zap_data(path):
         'conv=fsync'
     ])
 
-
-def find_associated_devices(osd_id=None, osd_fsid=None):
+def find_associated_devices(osd_id: str = '', osd_fsid: str = '') -> List[api.Volume]:
     """
     From an ``osd_id`` and/or an ``osd_fsid``, filter out all the LVs in the
     system that match those tag values, further detect if any partitions are
     part of the OSD, and then return the set of LVs and partitions (if any).
     """
     lv_tags = {}
-    if osd_id:
-        lv_tags['ceph.osd_id'] = osd_id
-    if osd_fsid:
-        lv_tags['ceph.osd_fsid'] = osd_fsid
+    lv_tags = {key: value for key, value in {
+        'ceph.osd_id': osd_id,
+        'ceph.osd_fsid': osd_fsid
+    }.items() if value}
 
     lvs = api.get_lvs(tags=lv_tags)
+
     if not lvs:
         raise RuntimeError('Unable to find any LV for zapping OSD: '
-                           '%s' % osd_id or osd_fsid)
-
+                            f'{osd_id or osd_fsid}')
     devices_to_zap = ensure_associated_lvs(lvs, lv_tags)
-    return [Device(path) for path in set(devices_to_zap) if path]
 
+    return [Device(path) for path in set(devices_to_zap) if path]
 
-def ensure_associated_lvs(lvs, lv_tags={}):
+def ensure_associated_lvs(lvs: List[api.Volume],
+                          lv_tags: Dict[str, Any] = {}) -> List[str]:
     """
     Go through each LV and ensure if backing devices (journal, wal, block)
     are LVs or partitions, so that they can be accurately reported.
@@ -166,14 +166,14 @@ def ensure_associated_lvs(lvs, lv_tags={}):
     return list(set(verified_devices))
 
 
-class Zap(object):
-
+class Zap:
     help = 'Removes all data and filesystems from a logical volume or partition.'
 
-    def __init__(self, argv):
+    def __init__(self, argv: List[str]) -> None:
         self.argv = argv
+        self.osd_ids_to_zap: List[str] = []
 
-    def unmount_lv(self, lv):
+    def unmount_lv(self, lv: api.Volume) -> None:
         if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'):
             lv_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id'])
         else:
@@ -186,40 +186,95 @@ def unmount_lv(self, lv):
         if dmcrypt and dmcrypt_uuid:
             self.dmcrypt_close(dmcrypt_uuid)
 
-    def zap_lv(self, device):
+    def _write_replacement_header(self, device: str) -> None:
+        """Write a replacement header to a device.
+
+        This method writes the string defined in `BEING_REPLACED_HEADER`
+        to the specified device. This header indicates that the device
+        is in the process of being replaced.
+
+        Args:
+            device (str): The path to the device on which the replacement
+                          header will be written.
+        """
+        disk._dd_write(device,
+                       BEING_REPLACED_HEADER)
+
+    def clear_replace_header(self) -> bool:
+        """Safely erase the replacement header on a device if it is marked as being replaced.
+
+        This method checks whether the given device is marked as being replaced
+        (`device.is_being_replaced`). If true, it proceeds to erase the replacement header
+        from the device using the `_erase_replacement_header` method. The method returns
+        a boolean indicating whether any action was taken.
+
+        Args:
+            device (Device): The device object, which includes information about the device's
+                            path and status (such as whether it is currently being replaced).
+
+        Returns:
+            bool: True if the replacement header was successfully erased, False if the
+                device was not marked as being replaced or no action was necessary.
+        """
+        result: bool = False
+        device: Device = self.args.clear_replace_header
+        if device.is_being_replaced:
+            self._erase_replacement_header(device.path)
+            result = True
+        return result
+
+    def _erase_replacement_header(self, device: str) -> None:
+        """Erase the replacement header on a device.
+
+        This method writes a sequence of null bytes (`0x00`) over the area of the device
+        where the replacement header is stored, effectively erasing it.
+
+        Args:
+            device (str): The path to the device from which the replacement header will be erased.
+        """
+        disk._dd_write(device,
+                       b'\x00' * len(BEING_REPLACED_HEADER))
+
+    def zap_lv(self, device: Device) -> None:
         """
         Device examples: vg-name/lv-name, /dev/vg-name/lv-name
         Requirements: Must be a logical volume (LV)
         """
-        lv = api.get_single_lv(filters={'lv_name': device.lv_name, 'vg_name':
-                                        device.vg_name})
+        lv: api.Volume = device.lv_api
         self.unmount_lv(lv)
-
+        self.parent_device: str = disk.get_parent_device_from_mapper(lv.lv_path)
         zap_device(device.path)
 
         if self.args.destroy:
             lvs = api.get_lvs(filters={'vg_name': device.vg_name})
-            if lvs == []:
-                mlogger.info('No LVs left, exiting', device.vg_name)
-                return
-            elif len(lvs) <= 1:
+            if len(lvs) <= 1:
                 mlogger.info('Only 1 LV left in VG, will proceed to destroy '
                              'volume group %s', device.vg_name)
                 pvs = api.get_pvs(filters={'lv_uuid': lv.lv_uuid})
                 api.remove_vg(device.vg_name)
                 for pv in pvs:
                     api.remove_pv(pv.pv_name)
+                replacement_args: Dict[str, bool] = {
+                    'block': self.args.replace_block,
+                    'db': self.args.replace_db,
+                    'wal': self.args.replace_wal
+                }
+                if replacement_args.get(lv.tags.get('ceph.type'), False):
+                    mlogger.info(f'Marking {self.parent_device} as being replaced')
+                    self._write_replacement_header(self.parent_device)
             else:
                 mlogger.info('More than 1 LV left in VG, will proceed to '
                              'destroy LV only')
                 mlogger.info('Removing LV because --destroy was given: %s',
                              device.path)
+                if self.args.replace_block:
+                    mlogger.info(f'--replace-block passed but the device still has {str(len(lvs))} LV(s)')
                 api.remove_lv(device.path)
         elif lv:
             # just remove all lvm metadata, leaving the LV around
             lv.clear_tags()
 
-    def zap_partition(self, device):
+    def zap_partition(self, device: Device) -> None:
         """
         Device example: /dev/sda1
         Requirements: Must be a partition
@@ -247,7 +302,7 @@ def zap_partition(self, device):
             mlogger.info("Destroying partition since --destroy was used: %s" % device.path)
             disk.remove_partition(device)
 
-    def zap_lvm_member(self, device):
+    def zap_lvm_member(self, device: Device) -> None:
         """
         An LVM member may have more than one LV and or VG, for example if it is
         a raw device with multiple partitions each belonging to a different LV
@@ -267,7 +322,7 @@ def zap_lvm_member(self, device):
 
 
 
-    def zap_raw_device(self, device):
+    def zap_raw_device(self, device: Device) -> None:
         """
         Any whole (raw) device passed in as input will be processed here,
         checking for LVM membership and partitions (if any).
@@ -287,10 +342,19 @@ def zap_raw_device(self, device):
             self.zap_partition(Device('/dev/%s' % part_name))
 
         zap_device(device.path)
+        # TODO(guits): I leave this commented out, this should be part of a separate patch in order to
+        # support device replacement with raw-based OSDs
+        # if self.args.replace_block:
+        #     disk._dd_write(device.path, 'CEPH_DEVICE_BEING_REPLACED')
 
     @decorators.needs_root
-    def zap(self, devices=None):
-        devices = devices or self.args.devices
+    def zap(self) -> None:
+        """Zap a device.
+
+        Raises:
+            SystemExit: When the device is a mapper and not a mpath device.
+        """
+        devices = self.args.devices
 
         for device in devices:
             mlogger.info("Zapping: %s", device.path)
@@ -317,21 +381,21 @@ def zap(self, devices=None):
             )
 
     @decorators.needs_root
-    def zap_osd(self):
+    def zap_osd(self) -> None:
         if self.args.osd_id and not self.args.no_systemd:
             osd_is_running = systemctl.osd_is_active(self.args.osd_id)
             if osd_is_running:
                 mlogger.error("OSD ID %s is running, stop it with:" % self.args.osd_id)
                 mlogger.error("systemctl stop ceph-osd@%s" % self.args.osd_id)
                 raise SystemExit("Unable to zap devices associated with OSD ID: %s" % self.args.osd_id)
-        devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
-        self.zap(devices)
+        self.args.devices = find_associated_devices(self.args.osd_id, self.args.osd_fsid)
+        self.zap()
 
-    def dmcrypt_close(self, dmcrypt_uuid):
+    def dmcrypt_close(self, dmcrypt_uuid: str) -> None:
         mlogger.info("Closing encrypted volume %s", dmcrypt_uuid)
         encryption.dmcrypt_close(mapping=dmcrypt_uuid, skip_path_check=True)
 
-    def main(self):
+    def main(self) -> None:
         sub_command_help = dedent("""
         Zaps the given logical volume(s), raw device(s) or partition(s) for reuse by ceph-volume.
         If given a path to a logical volume it must be in the format of vg/lv. Any
@@ -419,12 +483,56 @@ def main(self):
             help='Skip systemd unit checks',
         )
 
+        parser.add_argument(
+            '--replace-block',
+            dest='replace_block',
+            action='store_true',
+            help='Mark the block device as unavailable.'
+        )
+
+        parser.add_argument(
+            '--replace-db',
+            dest='replace_db',
+            action='store_true',
+            help='Mark the db device as unavailable.'
+        )
+
+        parser.add_argument(
+            '--replace-wal',
+            dest='replace_wal',
+            action='store_true',
+            help='Mark the wal device as unavailable.'
+        )
+
+        parser.add_argument(
+            '--clear-replace-header',
+            dest='clear_replace_header',
+            type=arg_validators.ValidClearReplaceHeaderDevice(),
+            help='clear the replace header on devices.'
+        )
+
         if len(self.argv) == 0:
             print(sub_command_help)
             return
 
         self.args = parser.parse_args(self.argv)
 
+        if self.args.clear_replace_header:
+            rc: bool = False
+            try:
+                rc = self.clear_replace_header()
+            except Exception as e:
+                raise SystemExit(e)
+            if rc:
+                mlogger.info(f'Replacement header cleared on {self.args.clear_replace_header}')
+            else:
+                mlogger.info(f'No replacement header detected on {self.args.clear_replace_header}, nothing to do.')
+            raise SystemExit(not rc)
+
+        if self.args.replace_block or self.args.replace_db or self.args.replace_wal:
+            self.args.destroy = True
+            mlogger.info('--replace-block|db|wal passed, enforcing --destroy.')
+
         if self.args.osd_id or self.args.osd_fsid:
             self.zap_osd()
         else:
diff --git a/src/ceph-volume/ceph_volume/tests/conftest.py b/src/ceph-volume/ceph_volume/tests/conftest.py
index ee58081d97da1..e6bf31737b69c 100644
--- a/src/ceph-volume/ceph_volume/tests/conftest.py
+++ b/src/ceph-volume/ceph_volume/tests/conftest.py
@@ -360,7 +360,7 @@ def apply(devices=None, lsblk=None, lv=None, blkid=None, udevadm=None,
               has_bluestore_label=False):
         if devices:
             for dev in devices.keys():
-                devices[dev]['device_nodes'] = os.path.basename(dev)
+                devices[dev]['device_nodes'] = [os.path.basename(dev)]
         else:
             devices = {}
         lsblk = lsblk if lsblk else {}
diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py
index d630a7a6bf887..efe52c053ffc3 100644
--- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py
+++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_zap.py
@@ -7,11 +7,30 @@
 from ceph_volume.devices.lvm import zap
 
 
-class TestZap(object):
-    def test_invalid_osd_id_passed(self):
+class TestZap:
+    def test_invalid_osd_id_passed(self) -> None:
         with pytest.raises(SystemExit):
             zap.Zap(argv=['--osd-id', 'foo']).main()
 
+    @patch('ceph_volume.util.disk._dd_write', Mock())
+    @patch('ceph_volume.util.arg_validators.Device')
+    def test_clear_replace_header_is_being_replaced(self, m_device: Mock) -> None:
+        m_dev = m_device.return_value
+        m_dev.is_being_replaced = True
+        with pytest.raises(SystemExit) as e:
+            zap.Zap(argv=['--clear', '/dev/foo']).main()
+        assert e.value.code == 0
+
+    @patch('ceph_volume.util.disk._dd_write', Mock())
+    @patch('ceph_volume.util.arg_validators.Device')
+    def test_clear_replace_header_is_not_being_replaced(self, m_device: Mock) -> None:
+        m_dev = m_device.return_value
+        m_dev.is_being_replaced = False
+        with pytest.raises(SystemExit) as e:
+            zap.Zap(argv=['--clear', '/dev/foo']).main()
+        assert e.value.code == 1
+
+
 class TestFindAssociatedDevices(object):
 
     def test_no_lvs_found_that_match_id(self, monkeypatch, device_info):
diff --git a/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py b/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py
index f4f50b06f8a29..fd7c468037c5c 100644
--- a/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py
+++ b/src/ceph-volume/ceph_volume/tests/objectstore/test_rawbluestore.py
@@ -159,6 +159,7 @@ def test_activate_osd_id_and_fsid(self,
 
     @patch('ceph_volume.objectstore.rawbluestore.encryption_utils.rename_mapper', Mock(return_value=MagicMock()))
     @patch('ceph_volume.util.disk.get_bluestore_header')
+    @patch('ceph_volume.objectstore.rawbluestore.encryption_utils.luks_close', Mock(return_value=MagicMock()))
     @patch('ceph_volume.objectstore.rawbluestore.encryption_utils.luks_open', Mock(return_value=MagicMock()))
     def test_activate_dmcrypt_tpm(self, m_bs_header, rawbluestore, fake_lsblk_all, mock_raw_direct_report, is_root) -> None:
         m_bs_header.return_value = {
diff --git a/src/ceph-volume/ceph_volume/tests/test_inventory.py b/src/ceph-volume/ceph_volume/tests/test_inventory.py
index 785d8b56e86b6..832c083664212 100644
--- a/src/ceph-volume/ceph_volume/tests/test_inventory.py
+++ b/src/ceph-volume/ceph_volume/tests/test_inventory.py
@@ -126,6 +126,7 @@ class TestInventory(object):
         'lvs',
         'device_id',
         'lsm_data',
+        'being_replaced'
     ]
 
     expected_sys_api_keys = [
diff --git a/src/ceph-volume/ceph_volume/util/arg_validators.py b/src/ceph-volume/ceph_volume/util/arg_validators.py
index 99e7d039e742b..e75b34e550e3c 100644
--- a/src/ceph-volume/ceph_volume/util/arg_validators.py
+++ b/src/ceph-volume/ceph_volume/util/arg_validators.py
@@ -7,6 +7,9 @@
 from ceph_volume.util.encryption import set_dmcrypt_no_workqueue
 
 
+mlogger = terminal.MultiLogger(__name__)
+
+
 def valid_osd_id(val):
     return str(int(val))
 
@@ -70,6 +73,17 @@ def _is_valid_device(self, raise_sys_exit=True):
         return self._device
 
 
+class ValidClearReplaceHeaderDevice(ValidDevice):
+    def __call__(self, dev_path: str) -> str:
+        super().get_device(dev_path)
+        return self._format_device(self._is_valid_device())
+
+    def _is_valid_device(self) -> Device:
+        if not self._device.is_being_replaced:
+            mlogger.info(f'{self.dev_path} has no replacement header.')
+        return self._device
+
+
 class ValidDataDevice(ValidDevice):
     def __call__(self, dev_path):
         super().get_device(dev_path)
diff --git a/src/ceph-volume/ceph_volume/util/device.py b/src/ceph-volume/ceph_volume/util/device.py
index 9c2c11e7f316f..82ee3266e3f1f 100644
--- a/src/ceph-volume/ceph_volume/util/device.py
+++ b/src/ceph-volume/ceph_volume/util/device.py
@@ -1,13 +1,14 @@
 # -*- coding: utf-8 -*-
-
+# type: ignore
 import logging
 import os
 from functools import total_ordering
-from ceph_volume import sys_info, allow_loop_devices
+from ceph_volume import sys_info, allow_loop_devices, BEING_REPLACED_HEADER
 from ceph_volume.api import lvm
 from ceph_volume.util import disk, system
 from ceph_volume.util.lsmdisk import LSMDisk
 from ceph_volume.util.constants import ceph_disk_guids
+from typing import List, Tuple
 
 
 logger = logging.getLogger(__name__)
@@ -92,6 +93,7 @@ class Device(object):
         'sys_api',
         'device_id',
         'lsm_data',
+        'being_replaced'
     ]
     pretty_report_sys_fields = [
         'actuators',
@@ -136,6 +138,7 @@ def __init__(self, path, with_lsm=False, lvs=None, lsblk_all=None, all_devices_v
         self._exists = None
         self._is_lvm_member = None
         self.ceph_device = False
+        self.being_replaced: bool = self.is_being_replaced
         self._parse()
         if self.path in sys_info.devices.keys():
             self.device_nodes = sys_info.devices[self.path]['device_nodes']
@@ -298,7 +301,7 @@ def report(self):
             rot=self.rotational,
             available=self.available,
             model=self.model,
-            device_nodes=self.device_nodes
+            device_nodes=','.join(self.device_nodes)
         )
 
     def json_report(self):
@@ -590,7 +593,7 @@ def vg_free(self):
             return [vg_free]
 
     @property
-    def has_partitions(self):
+    def has_partitions(self) -> bool:
         '''
         Boolean to determine if a given device has partitions.
         '''
@@ -598,7 +601,14 @@ def has_partitions(self):
             return True
         return False
 
-    def _check_generic_reject_reasons(self):
+    @property
+    def is_being_replaced(self) -> bool:
+        '''
+        Boolean to indicate if the device is being replaced.
+        '''
+        return disk._dd_read(self.path, 26) == BEING_REPLACED_HEADER
+
+    def _check_generic_reject_reasons(self) -> List[str]:
         reasons = [
             ('id_bus', 'usb', 'id_bus'),
             ('ro', '1', 'read-only'),
@@ -639,9 +649,11 @@ def _check_generic_reject_reasons(self):
             rejected.append('Has partitions')
         if self.has_fs:
             rejected.append('Has a FileSystem')
+        if self.is_being_replaced:
+            rejected.append('Is being replaced')
         return rejected
 
-    def _check_lvm_reject_reasons(self):
+    def _check_lvm_reject_reasons(self) -> Tuple[bool, List[str]]:
         rejected = []
         if self.vgs:
             available_vgs = [vg for vg in self.vgs if int(vg.vg_free_count) > 10]
@@ -654,7 +666,7 @@ def _check_lvm_reject_reasons(self):
 
         return len(rejected) == 0, rejected
 
-    def _check_raw_reject_reasons(self):
+    def _check_raw_reject_reasons(self) -> Tuple[bool, List[str]]:
         rejected = self._check_generic_reject_reasons()
         if len(self.vgs) > 0:
             rejected.append('LVM detected')
diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py
index 78c140597d653..30ee56808c762 100644
--- a/src/ceph-volume/ceph_volume/util/disk.py
+++ b/src/ceph-volume/ceph_volume/util/disk.py
@@ -7,7 +7,7 @@
 from ceph_volume import process, allow_loop_devices
 from ceph_volume.api import lvm
 from ceph_volume.util.system import get_file_contents
-from typing import Dict, List, Any
+from typing import Dict, List, Any, Union
 
 
 logger = logging.getLogger(__name__)
@@ -857,13 +857,14 @@ def get_devices(_sys_block_path='/sys/block', device=''):
             device_slaves = os.listdir(os.path.join(sysdir, 'slaves'))
             metadata['partitions'] = get_partitions_facts(sysdir)
 
+        metadata['device_nodes'] = []
         if device_slaves:
-            metadata['device_nodes'] = ','.join(device_slaves)
+            metadata['device_nodes'].extend(device_slaves)
         else:
             if block[2] == 'part':
-                metadata['device_nodes'] = block[3]
+                metadata['device_nodes'].append(block[3])
             else:
-                metadata['device_nodes'] = devname
+                metadata['device_nodes'].append(devname)
 
         metadata['actuators'] = None
         if os.path.isdir(sysdir + "/queue/independent_access_ranges/"):
@@ -979,7 +980,7 @@ def _dd_read(device: str, count: int, skip: int = 0) -> str:
 
     return result
 
-def _dd_write(device: str, data: str, skip: int = 0) -> None:
+def _dd_write(device: str, data: Union[str, bytes], skip: int = 0) -> None:
     """Write bytes to a device
 
     Args:
@@ -991,10 +992,14 @@ def _dd_write(device: str, data: str, skip: int = 0) -> None:
         OSError: If there is an error opening or writing to the device.
         Exception: If any other error occurs during the write operation.
     """
+
+    if isinstance(data, str):
+        data = data.encode('utf-8')
+
     try:
         with open(device, 'r+b') as b:
             b.seek(skip)
-            b.write(data.encode('utf-8'))
+            b.write(data)
     except OSError:
         logger.warning(f"Can't write to {device}")
         raise
@@ -1370,8 +1375,8 @@ def slashed_path(self) -> str:
         """
         result: str = self.path
         if self.is_lvm:
-            vg: str = self.environment.get('DM_VG_NAME')
-            lv: str = self.environment.get('DM_LV_NAME')
+            vg: str = self.environment.get('DM_VG_NAME', '')
+            lv: str = self.environment.get('DM_LV_NAME', '')
             result = f'/dev/{vg}/{lv}'
         return result
 
@@ -1385,6 +1390,6 @@ def dashed_path(self) -> str:
         """
         result: str = self.path
         if self.is_lvm:
-            name: str = self.environment.get('DM_NAME')
+            name: str = self.environment.get('DM_NAME', '')
             result = f'/dev/mapper/{name}'
         return result
diff --git a/src/cephadm/cephadm.py b/src/cephadm/cephadm.py
index 1ab98a0ac4f1e..e32e2bc49f31a 100755
--- a/src/cephadm/cephadm.py
+++ b/src/cephadm/cephadm.py
@@ -29,6 +29,7 @@
 from io import StringIO
 from threading import Thread, Event
 from pathlib import Path
+from configparser import ConfigParser
 
 from cephadmlib.constants import (
     # default images
@@ -142,6 +143,7 @@
     SidecarContainer,
     extract_uid_gid,
     is_container_running,
+    get_mgr_images,
 )
 from cephadmlib.decorators import (
     deprecated_command,
@@ -2954,7 +2956,7 @@ def mgr_has_latest_epoch():
         mounts = {}
         mounts[pathify(ctx.apply_spec)] = '/tmp/spec.yml:ro'
         try:
-            out = cli(['orch', 'apply', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
+            out = cli(['orch', 'apply', '--continue-on-error', '-i', '/tmp/spec.yml'], extra_mounts=mounts)
             logger.info(out)
         except Exception:
             ctx.error_code = -errno.EINVAL
@@ -4679,6 +4681,13 @@ def probe_hba(scan_path: str) -> None:
     return f'Ok. {len(all_scan_files)} adapters detected: {len(scan_files)} rescanned, {len(skipped)} skipped, {len(failures)} failed ({elapsed:.2f}s)'
 
 
+def command_list_images(ctx: CephadmContext) -> None:
+    """this function will list the default images used by different services"""
+    cp_obj = ConfigParser()
+    cp_obj['mgr'] = get_mgr_images()
+    # print default images
+    cp_obj.write(sys.stdout)
+
 ##################################
 
 
@@ -5542,6 +5551,9 @@ def _get_parser():
         'disk-rescan', help='rescan all HBAs to detect new/removed devices')
     parser_disk_rescan.set_defaults(func=command_rescan_disks)
 
+    parser_list_images = subparsers.add_parser(
+        'list-images', help='list all the default images')
+    parser_list_images.set_defaults(func=command_list_images)
     return parser
 
 
diff --git a/src/cephadm/cephadmlib/constants.py b/src/cephadm/cephadmlib/constants.py
index d25eb1391e0c0..354c378239802 100644
--- a/src/cephadm/cephadmlib/constants.py
+++ b/src/cephadm/cephadmlib/constants.py
@@ -5,15 +5,15 @@
 DEFAULT_IMAGE_IS_MAIN = True
 DEFAULT_IMAGE_RELEASE = 'squid'
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.51.0'
-DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:3.0.0'
-DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:3.0.0'
+DEFAULT_LOKI_IMAGE = 'quay.io/ceph/loki:3.0.0'
+DEFAULT_PROMTAIL_IMAGE = 'quay.io/ceph/promtail:3.0.0'
 DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.7.0'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.27.0'
 DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/grafana:10.4.8'
 DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
 DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
 DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.2.17'
-DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
+DEFAULT_SNMP_GATEWAY_IMAGE = 'quay.io/ceph/snmp-notifier:v1.2.1'
 DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
 DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
 DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
@@ -22,7 +22,7 @@
 DEFAULT_SMBMETRICS_IMAGE = 'quay.io/samba.org/samba-metrics:latest'
 DEFAULT_NGINX_IMAGE = 'quay.io/ceph/nginx:sclorg-nginx-126'
 DEFAULT_OAUTH2_PROXY_IMAGE = 'quay.io/oauth2-proxy/oauth2-proxy:v7.6.0'
-DEFAULT_REGISTRY = 'docker.io'  # normalize unqualified digests to this
+DEFAULT_REGISTRY = 'quay.io'  # normalize unqualified digests to this
 # ------------------------------------------------------------------------------
 
 LATEST_STABLE_RELEASE = 'squid'
diff --git a/src/cephadm/cephadmlib/container_types.py b/src/cephadm/cephadmlib/container_types.py
index 665c4d89652a6..791a545538a3c 100644
--- a/src/cephadm/cephadmlib/container_types.py
+++ b/src/cephadm/cephadmlib/container_types.py
@@ -8,7 +8,28 @@
 from typing import Dict, List, Optional, Any, Union, Tuple, Iterable, cast
 
 from .call_wrappers import call, call_throws, CallVerbosity
-from .constants import DEFAULT_TIMEOUT
+from .constants import (
+    DEFAULT_TIMEOUT,
+    # default container images
+    DEFAULT_ALERT_MANAGER_IMAGE,
+    DEFAULT_GRAFANA_IMAGE,
+    DEFAULT_LOKI_IMAGE,
+    DEFAULT_NODE_EXPORTER_IMAGE,
+    DEFAULT_PROMETHEUS_IMAGE,
+    DEFAULT_PROMTAIL_IMAGE,
+    DEFAULT_HAPROXY_IMAGE,
+    DEFAULT_KEEPALIVED_IMAGE,
+    DEFAULT_NVMEOF_IMAGE,
+    DEFAULT_SNMP_GATEWAY_IMAGE,
+    DEFAULT_ELASTICSEARCH_IMAGE,
+    DEFAULT_JAEGER_COLLECTOR_IMAGE,
+    DEFAULT_JAEGER_AGENT_IMAGE,
+    DEFAULT_JAEGER_QUERY_IMAGE,
+    DEFAULT_SMB_IMAGE,
+    DEFAULT_SMBMETRICS_IMAGE,
+    DEFAULT_NGINX_IMAGE,
+    DEFAULT_OAUTH2_PROXY_IMAGE,
+)
 from .container_engines import Docker, Podman
 from .context import CephadmContext
 from .daemon_identity import DaemonIdentity, DaemonSubIdentity
@@ -660,3 +681,30 @@ def enable_shared_namespaces(
     cc = f'container:{name}'
     for n in ns:
         _replace_container_arg(args, n.to_option(cc))
+
+
+def get_mgr_images() -> dict:
+    """Return dict of default mgr images"""
+    mgr_prefix = 'mgr/cephadm/container_image_'
+    mgr_images = {}
+    mgr_images[mgr_prefix + 'prometheus'] = DEFAULT_PROMETHEUS_IMAGE
+    mgr_images[mgr_prefix + 'alertmanager'] = DEFAULT_ALERT_MANAGER_IMAGE
+    mgr_images[mgr_prefix + 'graphana'] = DEFAULT_GRAFANA_IMAGE
+    mgr_images[mgr_prefix + 'loki'] = DEFAULT_LOKI_IMAGE
+    mgr_images[mgr_prefix + 'promtail'] = DEFAULT_PROMTAIL_IMAGE
+    mgr_images[mgr_prefix + 'node_exporter'] = DEFAULT_NODE_EXPORTER_IMAGE
+    mgr_images[mgr_prefix + 'haproxy'] = DEFAULT_HAPROXY_IMAGE
+    mgr_images[mgr_prefix + 'keepalived'] = DEFAULT_KEEPALIVED_IMAGE
+    mgr_images[mgr_prefix + 'nvmeof'] = DEFAULT_NVMEOF_IMAGE
+    mgr_images[mgr_prefix + 'snmp_gateway'] = DEFAULT_SNMP_GATEWAY_IMAGE
+    mgr_images[mgr_prefix + 'elasticsearch'] = DEFAULT_ELASTICSEARCH_IMAGE
+    mgr_images[
+        mgr_prefix + 'jaeger_collector'
+    ] = DEFAULT_JAEGER_COLLECTOR_IMAGE
+    mgr_images[mgr_prefix + 'jaeger_agent'] = DEFAULT_JAEGER_AGENT_IMAGE
+    mgr_images[mgr_prefix + 'jaeger_query'] = DEFAULT_JAEGER_QUERY_IMAGE
+    mgr_images[mgr_prefix + 'smb'] = DEFAULT_SMB_IMAGE
+    mgr_images[mgr_prefix + 'smbmetrics'] = DEFAULT_SMBMETRICS_IMAGE
+    mgr_images[mgr_prefix + 'nginx'] = DEFAULT_NGINX_IMAGE
+    mgr_images[mgr_prefix + 'oauth2_proxy'] = DEFAULT_OAUTH2_PROXY_IMAGE
+    return mgr_images
diff --git a/src/cephadm/cephadmlib/daemons/smb.py b/src/cephadm/cephadmlib/daemons/smb.py
index 74cb13f4ab022..82f886e72ecd4 100644
--- a/src/cephadm/cephadmlib/daemons/smb.py
+++ b/src/cephadm/cephadmlib/daemons/smb.py
@@ -72,6 +72,7 @@ class Config:
     instance_id: str
     source_config: str
     samba_debug_level: int
+    ctdb_log_level: str
     debug_delay: int
     domain_member: bool
     clustered: bool
@@ -98,6 +99,7 @@ def __init__(
         domain_member: bool,
         clustered: bool,
         samba_debug_level: int = 0,
+        ctdb_log_level: str = '',
         debug_delay: int = 0,
         join_sources: Optional[List[str]] = None,
         user_sources: Optional[List[str]] = None,
@@ -119,6 +121,7 @@ def __init__(
         self.domain_member = domain_member
         self.clustered = clustered
         self.samba_debug_level = samba_debug_level
+        self.ctdb_log_level = ctdb_log_level
         self.debug_delay = debug_delay
         self.join_sources = join_sources or []
         self.user_sources = user_sources or []
@@ -370,6 +373,8 @@ def container_args(self) -> List[str]:
         # make conditional?
         # CAP_NET_ADMIN is needed for event script to add public ips to iface
         cargs.append('--cap-add=NET_ADMIN')
+        # CAP_NET_RAW allows to send gratuitous ARPs/tickle ACKs via raw sockets
+        cargs.append('--cap-add=NET_RAW')
         return cargs
 
 
@@ -756,7 +761,7 @@ def prepare_data_dir(self, data_dir: str, uid: int, gid: int) -> None:
     def _write_ctdb_stub_config(self, path: pathlib.Path) -> None:
         reclock_cmd = ' '.join(_MUTEX_SUBCMD + [self._cfg.cluster_lock_uri])
         nodes_cmd = ' '.join(_NODES_SUBCMD)
-        stub_config = {
+        stub_config: Dict[str, Any] = {
             'samba-container-config': 'v0',
             'ctdb': {
                 # recovery_lock is passed directly to ctdb: needs '!' prefix
@@ -768,6 +773,8 @@ def _write_ctdb_stub_config(self, path: pathlib.Path) -> None:
                 ),
             },
         }
+        if self._cfg.ctdb_log_level:
+            stub_config['ctdb']['log_level'] = self._cfg.ctdb_log_level
         with file_utils.write_new(path) as fh:
             json.dump(stub_config, fh)
 
diff --git a/src/cephadm/cephadmlib/data_utils.py b/src/cephadm/cephadmlib/data_utils.py
index 2f4674752cc17..0ab8b38d2b518 100644
--- a/src/cephadm/cephadmlib/data_utils.py
+++ b/src/cephadm/cephadmlib/data_utils.py
@@ -165,17 +165,17 @@ def is_fsid(s):
 def normalize_image_digest(digest: str) -> str:
     """
     Normal case:
-    >>> normalize_image_digest('ceph/ceph', 'docker.io')
-    'docker.io/ceph/ceph'
+    >>> normalize_image_digest('ceph/ceph', 'quay.io')
+    'quay.io/ceph/ceph'
 
     No change:
-    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
+    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'quay.io')
     'quay.ceph.io/ceph/ceph'
 
-    >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
-    'docker.io/ubuntu'
+    >>> normalize_image_digest('quay.io/ubuntu', 'quay.io')
+    'quay.io/ubuntu'
 
-    >>> normalize_image_digest('localhost/ceph', 'docker.io')
+    >>> normalize_image_digest('localhost/ceph', 'quay.io')
     'localhost/ceph'
     """
     known_shortnames = [
diff --git a/src/cephadm/samples/custom_container.json b/src/cephadm/samples/custom_container.json
index 194a44d2abbf1..210cf1e3e552a 100644
--- a/src/cephadm/samples/custom_container.json
+++ b/src/cephadm/samples/custom_container.json
@@ -1,5 +1,5 @@
 {
-    "image": "docker.io/prom/alertmanager:v0.20.0",
+    "image": "quay.io/prometheus/alertmanager:v0.20.0",
     "ports": [9093, 9094],
     "args": [
         "-p", "9093:9093",
diff --git a/src/cephadm/tests/build/test_cephadm_build.py b/src/cephadm/tests/build/test_cephadm_build.py
index 1465c2c5efea7..c2995a76d4b15 100644
--- a/src/cephadm/tests/build/test_cephadm_build.py
+++ b/src/cephadm/tests/build/test_cephadm_build.py
@@ -34,12 +34,12 @@
     },
     'ubuntu-20.04': {
         'name': 'cephadm-build-test:ubuntu-20-04-py3',
-        'base_image': 'docker.io/library/ubuntu:20.04',
+        'base_image': 'quay.io/library/ubuntu:20.04',
         'script': 'apt update && apt install -y python3-venv',
     },
     'ubuntu-22.04': {
         'name': 'cephadm-build-test:ubuntu-22-04-py3',
-        'base_image': 'docker.io/library/ubuntu:22.04',
+        'base_image': 'quay.io/library/ubuntu:22.04',
         'script': 'apt update && apt install -y python3-venv',
     },
 }
diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
index 928982de70b6f..f27b9bcd3625a 100644
--- a/src/cephadm/tests/test_cephadm.py
+++ b/src/cephadm/tests/test_cephadm.py
@@ -533,12 +533,12 @@ def test_registry_login(self, _logger, _get_parm, _call_throws):
 
     def test_get_image_info_from_inspect(self):
         # podman
-        out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]"""
+        out = """204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1,[quay.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992]"""
         r = _cephadm.get_image_info_from_inspect(out, 'registry/ceph/ceph:latest')
         print(r)
         assert r == {
             'image_id': '204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1',
-            'repo_digests': ['docker.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992']
+            'repo_digests': ['quay.io/ceph/ceph@sha256:1cc9b824e1b076cdff52a9aa3f0cc8557d879fb2fbbba0cafed970aca59a3992']
         }
 
         # docker
@@ -550,13 +550,13 @@ def test_get_image_info_from_inspect(self):
         }
 
         # multiple digests (podman)
-        out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]"""
+        out = """e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42,[quay.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4 quay.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a]"""
         r = _cephadm.get_image_info_from_inspect(out, 'registry/prom/prometheus:latest')
         assert r == {
             'image_id': 'e935122ab143a64d92ed1fbb27d030cf6e2f0258207be1baf1b509c466aeeb42',
             'repo_digests': [
-                'docker.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4',
-                'docker.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a',
+                'quay.io/prom/prometheus@sha256:e4ca62c0d62f3e886e684806dfe9d4e0cda60d54986898173c1083856cfda0f4',
+                'quay.io/prom/prometheus@sha256:efd99a6be65885c07c559679a0df4ec709604bcdd8cd83f0d00a1a683b28fb6a',
             ]
         }
 
@@ -604,7 +604,7 @@ def test_infer_local_ceph_image(self, _logger, _listdir):
                                  '')
         out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
         quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
-        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+        quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
         with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
             with mock.patch('cephadm.get_container_info', return_value=cinfo):
                 image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
@@ -613,7 +613,7 @@ def test_infer_local_ceph_image(self, _logger, _listdir):
         # make sure first valid image is used when no container_info is found
         out = '''quay.ceph.io/ceph-ci/ceph@sha256:87f200536bb887b36b959e887d5984dd7a3f008a23aa1f283ab55d48b22c6185|dad864ee21e9|main|2022-03-23 16:29:19 +0000 UTC
         quay.ceph.io/ceph-ci/ceph@sha256:b50b130fcda2a19f8507ddde3435bb4722266956e1858ac395c838bc1dcf1c0e|514e6a882f6e|pacific|2022-03-23 15:58:34 +0000 UTC
-        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+        quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
         with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
             with mock.patch('cephadm.get_container_info', return_value=None):
                 image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
@@ -621,12 +621,12 @@ def test_infer_local_ceph_image(self, _logger, _listdir):
 
         # make sure images without digest are discarded (no container_info is found)
         out = '''quay.ceph.io/ceph-ci/ceph@|||
-        docker.io/ceph/ceph@|||
-        docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
+        quay.io/ceph/ceph@|||
+        quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508|666bbfa87e8d|v15.2.5|2020-09-16 14:15:15 +0000 UTC'''
         with mock.patch('cephadm.call_throws', return_value=(out, '', '')):
             with mock.patch('cephadm.get_container_info', return_value=None):
                 image = _cephadm.infer_local_ceph_image(ctx, ctx.container_engine)
-                assert image == 'docker.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508'
+                assert image == 'quay.io/ceph/ceph@sha256:939a46c06b334e094901560c8346de33c00309e3e3968a2db240eb4897c6a508'
 
 
 
@@ -2409,7 +2409,7 @@ class TestSNMPGateway:
 
     def test_unit_run_V2c(self, cephadm_fs):
         fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
-        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
             import json
             ctx.config_json = json.dumps(self.V2c_config)
             ctx.fsid = fsid
@@ -2434,11 +2434,11 @@ def test_unit_run_V2c(self, cephadm_fs):
             )
             with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
                 run_cmd = f.readlines()[-1].rstrip()
-                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl')
+                assert run_cmd.endswith('quay.io/ceph/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V2c --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl')
 
     def test_unit_run_V3_noPriv(self, cephadm_fs):
         fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
-        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
             import json
             ctx.config_json = json.dumps(self.V3_no_priv_config)
             ctx.fsid = fsid
@@ -2463,11 +2463,11 @@ def test_unit_run_V3_noPriv(self, cephadm_fs):
             )
             with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
                 run_cmd = f.readlines()[-1].rstrip()
-                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000')
+                assert run_cmd.endswith('quay.io/ceph/snmp-notifier:v1.2.1 --web.listen-address=:9465 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000')
 
     def test_unit_run_V3_Priv(self, cephadm_fs):
         fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
-        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
             import json
             ctx.config_json = json.dumps(self.V3_priv_config)
             ctx.fsid = fsid
@@ -2492,11 +2492,11 @@ def test_unit_run_V3_Priv(self, cephadm_fs):
             )
             with open(f'/var/lib/ceph/{fsid}/snmp-gateway.daemon_id/unit.run', 'r') as f:
                 run_cmd = f.readlines()[-1].rstrip()
-                assert run_cmd.endswith('docker.io/maxwo/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES')
+                assert run_cmd.endswith('quay.io/ceph/snmp-notifier:v1.2.1 --web.listen-address=:9464 --snmp.destination=192.168.1.10:162 --snmp.version=V3 --log.level=info --snmp.trap-description-template=/etc/snmp_notifier/description-template.tpl --snmp.authentication-enabled --snmp.authentication-protocol=SHA --snmp.security-engine-id=8000C53F00000000 --snmp.private-enabled --snmp.private-protocol=DES')
 
     def test_unit_run_no_dest(self, cephadm_fs):
         fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
-        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
             import json
             ctx.config_json = json.dumps(self.no_destination_config)
             ctx.fsid = fsid
@@ -2512,7 +2512,7 @@ def test_unit_run_no_dest(self, cephadm_fs):
 
     def test_unit_run_bad_version(self, cephadm_fs):
         fsid = 'ca734440-3dc6-11ec-9b98-5254002537a6'
-        with with_cephadm_ctx(['--image=docker.io/maxwo/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
+        with with_cephadm_ctx(['--image=quay.io/ceph/snmp-notifier:v1.2.1'], list_networks={}) as ctx:
             import json
             ctx.config_json = json.dumps(self.bad_version_config)
             ctx.fsid = fsid
diff --git a/src/cephadm/tests/test_custom_container.py b/src/cephadm/tests/test_custom_container.py
index c185b0908df6c..197ed38dca3be 100644
--- a/src/cephadm/tests/test_custom_container.py
+++ b/src/cephadm/tests/test_custom_container.py
@@ -47,7 +47,7 @@ def setUp(self):
                     ]
                 ]
             },
-            image='docker.io/library/hello-world:latest'
+            image='quay.io/hello-world/hello-world:latest'
         )
 
     def test_entrypoint(self):
diff --git a/src/cephadm/tox.ini b/src/cephadm/tox.ini
index 70e9a411238fb..20608c1681ce1 100644
--- a/src/cephadm/tox.ini
+++ b/src/cephadm/tox.ini
@@ -49,7 +49,8 @@ deps =
     flake8-quotes
 commands =
     flake8 --config=tox.ini {posargs:cephadm.py cephadmlib}
-    bash -c 'test $(git ls-files 'cephadm.py' 'cephadmlib/*.py' | sort -u | xargs grep "docker.io" | wc -l) == 11'
+    bash -c 'test $(git ls-files 'cephadm.py' 'cephadmlib/*.py' | sort -u | xargs grep "docker.io" | wc -l) == 1'
+    bash -c 'test $(git ls-files 'cephadm.py' 'cephadmlib/*.py' | sort -u | xargs grep "quay.io" | wc -l) == 25'
 # Downstream distributions may choose to alter this "docker.io" number,
 # to make sure no new references to docker.io are creeping in unnoticed.
 
diff --git a/src/client/Client.cc b/src/client/Client.cc
index e208cf7667577..f8373095b38c1 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -6125,6 +6125,10 @@ int Client::may_open(Inode *in, int flags, const UserPerm& perms)
   int r = 0;
   switch (in->mode & S_IFMT) {
     case S_IFLNK:
+#if defined(__linux__) && defined(O_PATH)
+      if (flags & O_PATH)
+        break;
+#endif
       r = -CEPHFS_ELOOP;
       goto out;
     case S_IFDIR:
@@ -7953,6 +7957,12 @@ int Client::readlinkat(int dirfd, const char *relpath, char *buf, loff_t size, c
     return r;
   }
 
+  if (!strcmp(relpath, "")) {
+    if (!dirinode.get()->is_symlink())
+      return -CEPHFS_ENOENT;
+    return _readlink(dirinode.get(), buf, size);
+  }
+
   InodeRef in;
   filepath path(relpath);
   r = path_walk(path, &in, perms, false, 0, dirinode);
@@ -10798,7 +10808,6 @@ void Client::C_Read_Sync_NonBlocking::finish(int r)
         goto success;
     }
 
-    clnt->put_cap_ref(in, CEPH_CAP_FILE_RD);
     // reverify size
     {
       r = clnt->_getattr(in, CEPH_STAT_CAP_SIZE, f->actor_perms);
@@ -10810,14 +10819,6 @@ void Client::C_Read_Sync_NonBlocking::finish(int r)
     if ((uint64_t)pos >= in->size)
       goto success;
 
-    {
-      int have_caps2 = 0;
-      r = clnt->get_caps(f, CEPH_CAP_FILE_RD, have_caps, &have_caps2, -1);
-      if (r < 0) {
-        goto error;
-      }
-    }
-
     wanted = left;
     retry();
     clnt->client_lock.unlock();
@@ -10971,6 +10972,20 @@ int64_t Client::_read(Fh *f, int64_t offset, uint64_t size, bufferlist *bl,
     // branch below but in a non-blocking fashion. The code in _read_sync
     // is duplicated and modified and exists in
     // C_Read_Sync_NonBlocking::finish().
+
+    // trim read based on file size?
+    if ((offset >= in->size) || (size == 0)) {
+      // read is requested at the EOF or the read len is zero, therefore just
+      // release managed pointers and complete the C_Read_Finisher immediately with 0 bytes
+
+      Context *iof = iofinish.release();
+      crf.release();
+      iof->complete(0);
+
+      // Signal async completion
+      return 0;
+    }
+
     C_Read_Sync_NonBlocking *crsa =
       new C_Read_Sync_NonBlocking(this, iofinish.release(), f, in, f->pos,
                                   offset, size, bl, filer.get(), have);
@@ -11399,10 +11414,18 @@ int64_t Client::_write_success(Fh *f, utime_t start, uint64_t fpos,
   return r;
 }
 
+void Client::C_Lock_Client_Finisher::finish(int r)
+{
+  std::scoped_lock lock(clnt->client_lock);
+  onfinish->complete(r);
+}
+
 void Client::C_Write_Finisher::finish_io(int r)
 {
   bool fini;
 
+  ceph_assert(ceph_mutex_is_locked_by_me(clnt->client_lock));
+
   clnt->put_cap_ref(in, CEPH_CAP_FILE_BUFFER);
 
   if (r >= 0) {
@@ -11438,6 +11461,8 @@ void Client::C_Write_Finisher::finish_fsync(int r)
   bool fini;
   client_t const whoami = clnt->whoami;  // For the benefit of ldout prefix
 
+  ceph_assert(ceph_mutex_is_locked_by_me(clnt->client_lock));
+
   ldout(clnt->cct, 3) << "finish_fsync r = " << r << dendl;
 
   fsync_finished = true;
@@ -11598,6 +11623,7 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
 
   std::unique_ptr<Context> iofinish = nullptr;
   std::unique_ptr<C_Write_Finisher> cwf = nullptr;
+  std::unique_ptr<Context> filer_iofinish = nullptr;
   
   if (in->inline_version < CEPH_INLINE_NONE) {
     if (endoff > cct->_conf->client_max_inline_size ||
@@ -11709,7 +11735,10 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
     if (onfinish == nullptr) {
       // We need a safer condition to wait on.
       cond_iofinish = new C_SaferCond();
-      iofinish.reset(cond_iofinish);
+      filer_iofinish.reset(cond_iofinish);
+    } else {
+      //Register a wrapper callback for the C_Write_Finisher which takes 'client_lock'
+      filer_iofinish.reset(new C_Lock_Client_Finisher(this, iofinish.get()));
     }
 
     get_cap_ref(in, CEPH_CAP_FILE_BUFFER);
@@ -11717,11 +11746,12 @@ int64_t Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf,
     filer->write_trunc(in->ino, &in->layout, in->snaprealm->get_snap_context(),
 		       offset, size, bl, ceph::real_clock::now(), 0,
 		       in->truncate_size, in->truncate_seq,
-		       iofinish.get());
+		       filer_iofinish.get());
 
     if (onfinish) {
       // handle non-blocking caller (onfinish != nullptr), we can now safely
       // release all the managed pointers
+      filer_iofinish.release();
       iofinish.release();
       onuninline.release();
       cwf.release();
diff --git a/src/client/Client.h b/src/client/Client.h
index 5a1e69394d02a..f8c39e2fdd6ab 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -1409,6 +1409,21 @@ class Client : public Dispatcher, public md_config_obs_t {
     void finish(int r) override;
   };
 
+  // A wrapper callback which takes the 'client_lock' and finishes the context.
+  // One of the usecase is the filer->write_trunc which doesn't hold client_lock
+  // in the call back passed. So, use this wrapper in such cases.
+  class C_Lock_Client_Finisher : public Context {
+  public:
+    C_Lock_Client_Finisher(Client *clnt, Context *onfinish)
+      : clnt(clnt), onfinish(onfinish) {}
+
+  private:
+    Client *clnt;
+    Context *onfinish;
+
+    void finish(int r) override;
+  };
+
   class C_Write_Finisher : public Context {
   public:
     void finish_io(int r);
diff --git a/src/common/Thread.cc b/src/common/Thread.cc
index 9a7a31923c1b7..3903e8c0ed721 100644
--- a/src/common/Thread.cc
+++ b/src/common/Thread.cc
@@ -83,7 +83,7 @@ void *Thread::entry_wrapper()
   if (pid && cpuid >= 0)
     _set_affinity(cpuid);
 
-  ceph_pthread_setname(pthread_self(), thread_name.c_str());
+  ceph_pthread_setname(pthread_self(), Thread::thread_name.c_str());
   return entry();
 }
 
@@ -154,7 +154,7 @@ int Thread::try_create(size_t stacksize)
 void Thread::create(const char *name, size_t stacksize)
 {
   ceph_assert(strlen(name) < 16);
-  thread_name = name;
+  Thread::thread_name = name;
 
   int ret = try_create(stacksize);
   if (ret != 0) {
diff --git a/src/common/Thread.h b/src/common/Thread.h
index 5242fb5f30758..d3892c1b36b71 100644
--- a/src/common/Thread.h
+++ b/src/common/Thread.h
@@ -20,11 +20,14 @@
 #include <string_view>
 #include <system_error>
 #include <thread>
+#include <cstring>
 
 #include <pthread.h>
 #include <sys/types.h>
 
+#include "include/ceph_assert.h"
 #include "include/compat.h"
+#include "include/spinlock.h"
 
 extern pid_t ceph_gettid();
 
@@ -33,7 +36,7 @@ class Thread {
   pthread_t thread_id;
   pid_t pid;
   int cpuid;
-  std::string thread_name;
+  static inline thread_local std::string thread_name;
 
   void *entry_wrapper();
 
@@ -61,6 +64,9 @@ class Thread {
   int join(void **prval = 0);
   int detach();
   int set_affinity(int cpuid);
+  static const std::string get_thread_name() {
+    return Thread::thread_name;
+  }
 };
 
 // Functions for with std::thread
diff --git a/src/common/ceph_mutex.h b/src/common/ceph_mutex.h
index 059d81f2ac39b..6ed8c56d5dad3 100644
--- a/src/common/ceph_mutex.h
+++ b/src/common/ceph_mutex.h
@@ -83,7 +83,6 @@ namespace ceph {
     return {};
   }
 
-  static constexpr bool mutex_debugging = false;
   #define ceph_mutex_is_locked(m) true
   #define ceph_mutex_is_locked_by_me(m) true
 }
@@ -131,8 +130,6 @@ namespace ceph {
     return {std::forward<Args>(args)...};
   }
 
-  static constexpr bool mutex_debugging = true;
-
   // debug methods
   #define ceph_mutex_is_locked(m) ((m).is_locked())
   #define ceph_mutex_is_not_locked(m) (!(m).is_locked())
@@ -186,8 +183,6 @@ namespace ceph {
     return {};
   }
 
-  static constexpr bool mutex_debugging = false;
-
   // debug methods.  Note that these can blindly return true
   // because any code that does anything other than assert these
   // are true is broken.
diff --git a/src/common/cohort_lru.h b/src/common/cohort_lru.h
index af2baaa5c67bf..86ced8d183c71 100644
--- a/src/common/cohort_lru.h
+++ b/src/common/cohort_lru.h
@@ -15,6 +15,12 @@
 
 #include <boost/intrusive/list.hpp>
 #include <boost/intrusive/slist.hpp>
+#include <cstdint>
+#include <atomic>
+#include <mutex>
+#include <algorithm>
+#include <functional>
+#include <vector>
 
 #ifdef __CEPH__
 # include "include/ceph_assert.h"
diff --git a/src/common/config_proxy.h b/src/common/config_proxy.h
index b9b47d9cef472..12a273b8c84f7 100644
--- a/src/common/config_proxy.h
+++ b/src/common/config_proxy.h
@@ -31,7 +31,6 @@ class ConfigProxy {
   using rev_obs_map_t = ObsMgr::rev_obs_map;
 
   void _call_observers(rev_obs_map_t& rev_obs) {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     for (auto& [obs, keys] : rev_obs) {
       (*obs)->handle_conf_change(*this, keys);
     }
diff --git a/src/common/io_exerciser/RadosIo.cc b/src/common/io_exerciser/RadosIo.cc
index 3f907ccf47416..a28a1e2f488b4 100644
--- a/src/common/io_exerciser/RadosIo.cc
+++ b/src/common/io_exerciser/RadosIo.cc
@@ -81,7 +81,7 @@ RadosIo::AsyncOpInfo::AsyncOpInfo(uint64_t offset1, uint64_t length1,
 
 bool RadosIo::readyForIoOp(IoOp &op)
 {
-  ceph_assert(lock.is_locked_by_me()); //Must be called with lock held
+  ceph_assert(ceph_mutex_is_locked_by_me(lock)); //Must be called with lock held
   if (!om->readyForIoOp(op)) {
     return false;
   }
@@ -118,7 +118,8 @@ void RadosIo::applyIoOp(IoOp &op)
       op_info = std::make_shared<AsyncOpInfo>(0, op.length1);
       op_info->bl1 = db->generate_data(0, op.length1);
       op_info->wop.write_full(op_info->bl1);
-      auto create_cb = [this] (boost::system::error_code ec) {
+      auto create_cb = [this] (boost::system::error_code ec,
+                               version_t ver) {
         ceph_assert(ec == boost::system::errc::success);
         finish_io();
       };
@@ -132,7 +133,8 @@ void RadosIo::applyIoOp(IoOp &op)
       start_io();
       op_info = std::make_shared<AsyncOpInfo>();
       op_info->wop.remove();
-      auto remove_cb = [this] (boost::system::error_code ec) {
+      auto remove_cb = [this] (boost::system::error_code ec,
+                               version_t ver) {
         ceph_assert(ec == boost::system::errc::success);
         finish_io();
       };
@@ -148,7 +150,9 @@ void RadosIo::applyIoOp(IoOp &op)
       op_info->rop.read(op.offset1 * block_size,
                         op.length1 * block_size,
                         &op_info->bl1, nullptr);
-      auto read_cb = [this, op_info] (boost::system::error_code ec, bufferlist bl) {
+      auto read_cb = [this, op_info] (boost::system::error_code ec,
+                                      version_t ver,
+                                      bufferlist bl) {
         ceph_assert(ec == boost::system::errc::success);
         db->validate(op_info->bl1, op_info->offset1, op_info->length1);
         finish_io();
@@ -174,6 +178,7 @@ void RadosIo::applyIoOp(IoOp &op)
                     op.length2 * block_size,
                     &op_info->bl2, nullptr);
       auto read2_cb = [this, op_info] (boost::system::error_code ec,
+                                       version_t ver,
                                        bufferlist bl) {
         ceph_assert(ec == boost::system::errc::success);
         db->validate(op_info->bl1, op_info->offset1, op_info->length1);
@@ -202,6 +207,7 @@ void RadosIo::applyIoOp(IoOp &op)
                     op.length3 * block_size,
                     &op_info->bl3, nullptr);
       auto read3_cb = [this, op_info] (boost::system::error_code ec,
+                                       version_t ver,
                                        bufferlist bl) {
         ceph_assert(ec == boost::system::errc::success);
         db->validate(op_info->bl1, op_info->offset1, op_info->length1);
@@ -222,7 +228,8 @@ void RadosIo::applyIoOp(IoOp &op)
       op_info->bl1 = db->generate_data(op.offset1, op.length1);
 
       op_info->wop.write(op.offset1 * block_size, op_info->bl1);
-      auto write_cb = [this] (boost::system::error_code ec) {
+      auto write_cb = [this] (boost::system::error_code ec,
+                              version_t ver) {
         ceph_assert(ec == boost::system::errc::success);
         finish_io();
       };
@@ -241,7 +248,8 @@ void RadosIo::applyIoOp(IoOp &op)
       op_info->bl2 = db->generate_data(op.offset2, op.length2);
       op_info->wop.write(op.offset1 * block_size, op_info->bl1);
       op_info->wop.write(op.offset2 * block_size, op_info->bl2);
-      auto write2_cb = [this] (boost::system::error_code ec) {
+      auto write2_cb = [this] (boost::system::error_code ec,
+                               version_t ver) {
         ceph_assert(ec == boost::system::errc::success);
         finish_io();
       };
@@ -263,7 +271,8 @@ void RadosIo::applyIoOp(IoOp &op)
       op_info->wop.write(op.offset1 * block_size, op_info->bl1);
       op_info->wop.write(op.offset2 * block_size, op_info->bl2);
       op_info->wop.write(op.offset3 * block_size, op_info->bl3);
-      auto write3_cb = [this] (boost::system::error_code ec) {
+      auto write3_cb = [this] (boost::system::error_code ec,
+                               version_t ver) {
         ceph_assert(ec == boost::system::errc::success);
         finish_io();
       };
diff --git a/src/common/map_cacher.hpp b/src/common/map_cacher.hpp
index 4d843be75dc64..95353425de9e6 100644
--- a/src/common/map_cacher.hpp
+++ b/src/common/map_cacher.hpp
@@ -16,6 +16,7 @@
 #define MAPCACHER_H
 
 #include "include/Context.h"
+#include "include/expected.hpp"
 #include "common/sharedptr_registry.hpp"
 
 namespace MapCacher {
@@ -130,6 +131,50 @@ class MapCacher {
     return -EINVAL;
   } ///< @return error value, 0 on success, -ENOENT if no more entries
 
+  /// Fetch first key/value std::pair after specified key
+  struct PosAndData {
+    K last_key;
+    V data;
+  };
+  using MaybePosAndData = tl::expected<PosAndData, int>;
+
+  MaybePosAndData get_1st_after_key(
+      K key  ///< [in] key after which to get next
+  )
+  {
+    ceph_assert(driver);
+    while (true) {
+      std::pair<K, boost::optional<V>> cached;
+      bool got_cached = in_progress.get_next(key, &cached);
+
+      ///\todo a driver->get_next() that returns an expected<K, V> would be nice
+      bool got_store{false};
+      std::pair<K, V> store;
+      int r = driver->get_next(key, &store);
+      if (r < 0 && r != -ENOENT) {
+        return tl::unexpected(r);
+      } else if (r == 0) {
+	got_store = true;
+      }
+
+      if (!got_cached && !got_store) {
+        return tl::unexpected(-ENOENT);
+      } else if (got_cached && (!got_store || store.first >= cached.first)) {
+	if (cached.second) {
+	  return PosAndData{cached.first, *cached.second};
+	} else {
+	  key = cached.first;
+	  continue;  // value was cached as removed, recurse
+	}
+      } else {
+	return PosAndData{store.first, store.second};
+      }
+    }
+    ceph_abort();  // not reachable
+    return tl::unexpected(-EINVAL);
+  }
+
+
   /// Adds operation setting keys to Transaction
   void set_keys(
     const std::map<K, V> &keys,  ///< [in] keys/values to std::set
diff --git a/src/common/mutex_debug.h b/src/common/mutex_debug.h
index c1a4ff2a43501..d56d0ebee9987 100644
--- a/src/common/mutex_debug.h
+++ b/src/common/mutex_debug.h
@@ -169,20 +169,16 @@ class mutex_debug_impl : public mutex_debugging_base
   }
 
   bool try_lock(bool no_lockdep = false) {
-    bool locked = try_lock_impl();
-    if (locked) {
-      if (enable_lockdep(no_lockdep))
-	_locked();
-      _post_lock();
-    }
-    return locked;
+    ceph_assert(recursive || !is_locked_by_me());
+    return _try_lock(no_lockdep);
   }
 
   void lock(bool no_lockdep = false) {
+    ceph_assert(recursive || !is_locked_by_me());
     if (enable_lockdep(no_lockdep))
       _will_lock(recursive);
 
-    if (try_lock(no_lockdep))
+    if (_try_lock(no_lockdep))
       return;
 
     lock_impl();
@@ -198,6 +194,16 @@ class mutex_debug_impl : public mutex_debugging_base
     unlock_impl();
   }
 
+private:
+  bool _try_lock(bool no_lockdep) {
+    bool locked = try_lock_impl();
+    if (locked) {
+      if (enable_lockdep(no_lockdep))
+	_locked();
+      _post_lock();
+    }
+    return locked;
+  }
 };
 
 
diff --git a/src/common/scrub_types.cc b/src/common/scrub_types.cc
index b03a3cab70c84..4b4d191e09c39 100644
--- a/src/common/scrub_types.cc
+++ b/src/common/scrub_types.cc
@@ -161,6 +161,13 @@ void inconsistent_obj_wrapper::encode(bufferlist& bl) const
   ENCODE_FINISH(bl);
 }
 
+bufferlist inconsistent_obj_wrapper::encode() const
+{
+  bufferlist bl;
+  encode(bl);
+  return bl;
+}
+
 void inconsistent_obj_wrapper::decode(bufferlist::const_iterator& bp)
 {
   DECODE_START(2, bp);
@@ -240,6 +247,13 @@ void inconsistent_snapset_wrapper::encode(bufferlist& bl) const
   ENCODE_FINISH(bl);
 }
 
+bufferlist inconsistent_snapset_wrapper::encode() const
+{
+  bufferlist bl;
+  encode(bl);
+  return bl;
+}
+
 void inconsistent_snapset_wrapper::decode(bufferlist::const_iterator& bp)
 {
   DECODE_START(2, bp);
diff --git a/src/common/scrub_types.h b/src/common/scrub_types.h
index dd206f56f6035..d86fc12b6c8cf 100644
--- a/src/common/scrub_types.h
+++ b/src/common/scrub_types.h
@@ -152,6 +152,7 @@ struct inconsistent_obj_wrapper : librados::inconsistent_obj_t {
 			const pg_shard_t &primary);
   void set_version(uint64_t ver) { version = ver; }
   void encode(ceph::buffer::list& bl) const;
+  ceph::buffer::list encode() const;
   void decode(ceph::buffer::list::const_iterator& bp);
 };
 
@@ -181,6 +182,7 @@ struct inconsistent_snapset_wrapper : public librados::inconsistent_snapset_t {
   void set_size_mismatch();
 
   void encode(ceph::buffer::list& bl) const;
+  ceph::buffer::list encode() const;
   void decode(ceph::buffer::list::const_iterator& bp);
 };
 
diff --git a/src/crimson/common/log.h b/src/crimson/common/log.h
index 4f564ac044d05..c38b225c94b4f 100644
--- a/src/crimson/common/log.h
+++ b/src/crimson/common/log.h
@@ -90,7 +90,7 @@ static inline seastar::log_level to_log_level(int level) {
 #define SUBLOGDPP(subname_, level_, MSG, dpp, ...) \
   LOGGER(subname_).log(level_, "{} {}: " MSG, dpp, FNAME , ##__VA_ARGS__)
 #define SUBLOGDPPI(subname_, level_, MSG, dpp, ...) \
-  LOGGER(subname_).log(level_, "{} {}: " MSG, \
+  LOGGER(subname_).log(level_, "{} {} {}: " MSG,			\
   interruptor::get_interrupt_cond(), dpp, FNAME , ##__VA_ARGS__)
 #define SUBTRACEDPP(subname_, ...) SUBLOGDPP(subname_, seastar::log_level::trace, __VA_ARGS__)
 #define SUBTRACEDPPI(subname_, ...) SUBLOGDPPI(subname_, seastar::log_level::trace, __VA_ARGS__)
@@ -106,7 +106,7 @@ static inline seastar::log_level to_log_level(int level) {
 #define LOGDPP(level_, MSG, dpp, ...) \
   LOCAL_LOGGER.log(level_, "{} {}: " MSG, dpp, FNAME , ##__VA_ARGS__)
 #define LOGDPPI(level_, MSG, dpp, ...) \
-  LOCAL_LOGGER.log(level_, "{} {}: " MSG, \
+  LOCAL_LOGGER.log(level_, "{} {} {}: " MSG, \
   interruptor::get_interrupt_cond(), dpp, FNAME , ##__VA_ARGS__)
 #define TRACEDPP(...) LOGDPP(seastar::log_level::trace, __VA_ARGS__)
 #define TRACEDPPI(...) LOGDPPI(seastar::log_level::trace, __VA_ARGS__)
diff --git a/src/crimson/os/futurized_store.h b/src/crimson/os/futurized_store.h
index fe09cc5451072..0dca695ba3a1e 100644
--- a/src/crimson/os/futurized_store.h
+++ b/src/crimson/os/futurized_store.h
@@ -75,14 +75,15 @@ class FuturizedStore {
       CollectionRef c,
       const ghobject_t& oid) = 0;
 
-    using omap_values_t = std::map<std::string, ceph::bufferlist, std::less<>>;
+    using omap_values_t = attrs_t;
     using omap_keys_t = std::set<std::string>;
     virtual read_errorator::future<omap_values_t> omap_get_values(
       CollectionRef c,
       const ghobject_t& oid,
       const omap_keys_t& keys) = 0;
 
-    virtual read_errorator::future<std::tuple<bool, omap_values_t>> omap_get_values(
+    using omap_values_paged_t = std::tuple<bool, omap_values_t>;
+    virtual read_errorator::future<omap_values_paged_t> omap_get_values(
       CollectionRef c,           ///< [in] collection
       const ghobject_t &oid,     ///< [in] oid
       const std::optional<std::string> &start ///< [in] start, empty for begin
@@ -147,7 +148,8 @@ class FuturizedStore {
       return seastar::now();
     }
 
-    virtual read_errorator::future<std::map<uint64_t, uint64_t>> fiemap(
+    using fiemap_ret_t = std::map<uint64_t, uint64_t>;
+    virtual read_errorator::future<fiemap_ret_t> fiemap(
       CollectionRef ch,
       const ghobject_t& oid,
       uint64_t off,
diff --git a/src/crimson/os/seastore/cache.cc b/src/crimson/os/seastore/cache.cc
index cf8d3c0891d7f..5dcb7514ee1ab 100644
--- a/src/crimson/os/seastore/cache.cc
+++ b/src/crimson/os/seastore/cache.cc
@@ -990,8 +990,12 @@ void Cache::mark_transaction_conflicted(
     }
     efforts.mutate_delta_bytes += delta_stat.bytes;
 
-    for (auto &i: t.pre_alloc_list) {
-      epm.mark_space_free(i->get_paddr(), i->get_length());
+    if (t.get_pending_ool()) {
+      t.get_pending_ool()->is_conflicted = true;
+    } else {
+      for (auto &i: t.pre_alloc_list) {
+	epm.mark_space_free(i->get_paddr(), i->get_length());
+      }
     }
 
     auto& ool_stats = t.get_ool_write_stats();
diff --git a/src/crimson/os/seastore/cached_extent.cc b/src/crimson/os/seastore/cached_extent.cc
index cdad6dfb1b03d..76c18bde667a4 100644
--- a/src/crimson/os/seastore/cached_extent.cc
+++ b/src/crimson/os/seastore/cached_extent.cc
@@ -158,12 +158,14 @@ parent_tracker_t::~parent_tracker_t() {
 
 std::ostream &operator<<(std::ostream &out, const LBAMapping &rhs)
 {
-  out << "LBAMapping(" << rhs.get_key() << "~" << rhs.get_length()
+  out << "LBAMapping(" << rhs.get_key()
+      << "~0x" << std::hex << rhs.get_length() << std::dec
       << "->" << rhs.get_val();
   if (rhs.is_indirect()) {
-    out << " indirect(" << rhs.get_intermediate_base() << "~"
-	<< rhs.get_intermediate_key() << "~"
-	<< rhs.get_intermediate_length() << ")";
+    out << ",indirect(" << rhs.get_intermediate_base()
+        << "~0x" << std::hex << rhs.get_intermediate_length()
+        << "@0x" << rhs.get_intermediate_offset() << std::dec
+        << ")";
   }
   out << ")";
   return out;
diff --git a/src/crimson/os/seastore/cached_extent.h b/src/crimson/os/seastore/cached_extent.h
index 6c5c6c6fcc292..6025725aa337d 100644
--- a/src/crimson/os/seastore/cached_extent.h
+++ b/src/crimson/os/seastore/cached_extent.h
@@ -350,7 +350,7 @@ class CachedExtent
 	<< ", modify_time=" << sea_time_point_printer_t{modify_time}
 	<< ", paddr=" << get_paddr()
 	<< ", prior_paddr=" << prior_poffset_str
-	<< ", length=" << get_length()
+	<< std::hex << ", length=0x" << get_length() << std::dec
 	<< ", state=" << state
 	<< ", last_committed_crc=" << last_committed_crc
 	<< ", refcount=" << use_count()
diff --git a/src/crimson/os/seastore/extent_placement_manager.cc b/src/crimson/os/seastore/extent_placement_manager.cc
index 34ac199eed8dd..0458fbfed7480 100644
--- a/src/crimson/os/seastore/extent_placement_manager.cc
+++ b/src/crimson/os/seastore/extent_placement_manager.cc
@@ -987,7 +987,19 @@ RandomBlockOolWriter::alloc_write_ool_extents(
     return alloc_write_iertr::now();
   }
   return seastar::with_gate(write_guard, [this, &t, &extents] {
-    return do_write(t, extents);
+    seastar::lw_shared_ptr<rbm_pending_ool_t> ptr =
+      seastar::make_lw_shared<rbm_pending_ool_t>();
+    ptr->pending_extents = t.get_pre_alloc_list();
+    assert(!t.is_conflicted());
+    t.set_pending_ool(ptr);
+    return do_write(t, extents
+    ).finally([this, ptr=ptr] {
+      if (ptr->is_conflicted) {
+	for (auto &e : ptr->pending_extents) {
+	  rb_cleaner->mark_space_free(e->get_paddr(), e->get_length());
+	}
+      }
+    });
   });
 }
 
diff --git a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
index 5d6fa3cb1b170..ef10ff9623b50 100644
--- a/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
+++ b/src/crimson/os/seastore/lba_manager/btree/btree_lba_manager.h
@@ -173,16 +173,22 @@ class BtreeLBAMapping : public BtreeNodeMapping<laddr_t, paddr_t> {
     if (!parent_modified()) {
       return;
     }
+    LOG_PREFIX(BtreeLBAMapping::maybe_fix_pos);
     auto &p = static_cast<LBALeafNode&>(*parent);
     p.maybe_fix_mapping_pos(*this);
+    SUBDEBUGT(seastore_lba, "fixed pin {}",
+              ctx.trans, static_cast<LBAMapping&>(*this));
   }
 
   LBAMappingRef refresh_with_pending_parent() final {
+    LOG_PREFIX(BtreeLBAMapping::refresh_with_pending_parent);
     assert(is_parent_valid() && !is_parent_viewable());
     auto &p = static_cast<LBALeafNode&>(*parent);
     auto &viewable_p = static_cast<LBALeafNode&>(
       *p.find_pending_version(ctx.trans, get_key()));
-    return viewable_p.get_mapping(ctx, get_key());
+    auto new_pin = viewable_p.get_mapping(ctx, get_key());
+    SUBDEBUGT(seastore_lba, "new pin {}", ctx.trans, static_cast<LBAMapping&>(*new_pin));
+    return new_pin;
   }
 protected:
   std::unique_ptr<BtreeNodeMapping<laddr_t, paddr_t>> _duplicate(
diff --git a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h
index 960ea6ba41181..397a014a7c3d2 100644
--- a/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h
+++ b/src/crimson/os/seastore/onode_manager/staged-fltree/node_layout.h
@@ -925,7 +925,7 @@ class NodeLayoutT final : public InternalNodeImpl, public LeafNodeImpl {
     std::ostringstream sos;
     sos << "Node" << NODE_TYPE << FIELD_TYPE
         << "@" << extent.get_laddr()
-        << "+" << std::hex << extent.get_length() << std::dec
+        << "+0x" << std::hex << extent.get_length() << std::dec
         << "Lv" << (unsigned)level()
         << (is_level_tail() ? "$" : "");
     name = sos.str();
diff --git a/src/crimson/os/seastore/seastore.cc b/src/crimson/os/seastore/seastore.cc
index 1577433237351..d90edbb20dbe2 100644
--- a/src/crimson/os/seastore/seastore.cc
+++ b/src/crimson/os/seastore/seastore.cc
@@ -17,6 +17,7 @@
 #include "common/safe_io.h"
 #include "include/stringify.h"
 #include "os/Transaction.h"
+#include "osd/osd_types_fmt.h"
 
 #include "crimson/common/buffer_io.h"
 
@@ -30,8 +31,6 @@
 #include "crimson/os/seastore/onode_manager.h"
 #include "crimson/os/seastore/object_data_handler.h"
 
-
-using std::string;
 using crimson::common::local_conf;
 
 template <> struct fmt::formatter<crimson::os::seastore::op_type_t>
@@ -42,8 +41,8 @@ template <> struct fmt::formatter<crimson::os::seastore::op_type_t>
   auto format(op_type_t op, FormatContext& ctx) const {
     std::string_view name = "unknown";
     switch (op) {
-      case op_type_t::TRANSACTION:
-      name = "transaction";
+      case op_type_t::DO_TRANSACTION:
+      name = "do_transaction";
       break;
     case op_type_t::READ:
       name = "read";
@@ -63,8 +62,8 @@ template <> struct fmt::formatter<crimson::os::seastore::op_type_t>
     case op_type_t::OMAP_GET_VALUES:
       name = "omap_get_values";
       break;
-    case op_type_t::OMAP_LIST:
-      name = "omap_list";
+    case op_type_t::OMAP_GET_VALUES2:
+      name = "omap_get_values2";
       break;
     case op_type_t::MAX:
       name = "unknown";
@@ -143,14 +142,14 @@ void SeaStore::Shard::register_metrics()
   namespace sm = seastar::metrics;
   using op_type_t = crimson::os::seastore::op_type_t;
   std::pair<op_type_t, sm::label_instance> labels_by_op_type[] = {
-    {op_type_t::TRANSACTION,     sm::label_instance("latency", "TRANSACTION")},
-    {op_type_t::READ,            sm::label_instance("latency", "READ")},
-    {op_type_t::WRITE,           sm::label_instance("latency", "WRITE")},
-    {op_type_t::GET_ATTR,        sm::label_instance("latency", "GET_ATTR")},
-    {op_type_t::GET_ATTRS,       sm::label_instance("latency", "GET_ATTRS")},
-    {op_type_t::STAT,            sm::label_instance("latency", "STAT")},
-    {op_type_t::OMAP_GET_VALUES, sm::label_instance("latency",  "OMAP_GET_VALUES")},
-    {op_type_t::OMAP_LIST,       sm::label_instance("latency", "OMAP_LIST")},
+    {op_type_t::DO_TRANSACTION,   sm::label_instance("latency", "DO_TRANSACTION")},
+    {op_type_t::READ,             sm::label_instance("latency", "READ")},
+    {op_type_t::WRITE,            sm::label_instance("latency", "WRITE")},
+    {op_type_t::GET_ATTR,         sm::label_instance("latency", "GET_ATTR")},
+    {op_type_t::GET_ATTRS,        sm::label_instance("latency", "GET_ATTRS")},
+    {op_type_t::STAT,             sm::label_instance("latency", "STAT")},
+    {op_type_t::OMAP_GET_VALUES,  sm::label_instance("latency", "OMAP_GET_VALUES")},
+    {op_type_t::OMAP_GET_VALUES2, sm::label_instance("latency", "OMAP_GET_VALUES2")},
   };
 
   for (auto& [op_type, label] : labels_by_op_type) {
@@ -194,6 +193,9 @@ void SeaStore::Shard::register_metrics()
 
 seastar::future<> SeaStore::start()
 {
+  LOG_PREFIX(SeaStore::start);
+  INFO("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
 #ifndef NDEBUG
   bool is_test = true;
@@ -214,19 +216,30 @@ seastar::future<> SeaStore::start()
   }).then([this, is_test] {
     ceph_assert(device);
     return shard_stores.start(root, device.get(), is_test);
+  }).then([FNAME] {
+    INFO("done");
   });
 }
 
 seastar::future<> SeaStore::test_start(DeviceRef device_obj)
 {
+  LOG_PREFIX(SeaStore::test_start);
+  INFO("...");
+
   ceph_assert(device_obj);
   ceph_assert(root == "");
   device = std::move(device_obj);
-  return shard_stores.start_single(root, device.get(), true);
+  return shard_stores.start_single(root, device.get(), true
+  ).then([FNAME] {
+    INFO("done");
+  });
 }
 
 seastar::future<> SeaStore::stop()
 {
+  LOG_PREFIX(SeaStore::stop);
+  INFO("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
   return seastar::do_for_each(secondaries, [](auto& sec_dev) {
     return sec_dev->stop();
@@ -239,17 +252,28 @@ seastar::future<> SeaStore::stop()
     }
   }).then([this] {
     return shard_stores.stop();
+  }).then([FNAME] {
+    INFO("done");
   });
 }
 
 SeaStore::mount_ertr::future<> SeaStore::test_mount()
 {
+  LOG_PREFIX(SeaStore::test_mount);
+  INFO("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
-  return shard_stores.local().mount_managers();
+  return shard_stores.local().mount_managers(
+  ).then([FNAME] {
+    INFO("done");
+  });
 }
 
 SeaStore::mount_ertr::future<> SeaStore::mount()
 {
+  LOG_PREFIX(SeaStore::mount);
+  INFO("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
   return device->mount(
   ).safe_then([this] {
@@ -278,11 +302,13 @@ SeaStore::mount_ertr::future<> SeaStore::mount()
           return set_secondaries();
         });
       });
-    }).safe_then([this] {
-      return shard_stores.invoke_on_all([](auto &local_store) {
-        return local_store.mount_managers();
-      });
     });
+  }).safe_then([this] {
+    return shard_stores.invoke_on_all([](auto &local_store) {
+      return local_store.mount_managers();
+    });
+  }).safe_then([FNAME] {
+    INFO("done");
   }).handle_error(
     crimson::ct_error::assert_all{
       "Invalid error in SeaStore::mount"
@@ -302,9 +328,14 @@ seastar::future<> SeaStore::Shard::mount_managers()
 
 seastar::future<> SeaStore::umount()
 {
+  LOG_PREFIX(SeaStore::umount);
+  INFO("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
   return shard_stores.invoke_on_all([](auto &local_store) {
     return local_store.umount();
+  }).then([FNAME] {
+    INFO("done");
   });
 }
 
@@ -332,7 +363,7 @@ seastar::future<> SeaStore::Shard::umount()
     onode_manager.reset();
   }).handle_error(
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::umount"
+      "Invalid error in SeaStoreS::umount"
     }
   );
 }
@@ -345,15 +376,15 @@ seastar::future<> SeaStore::write_fsid(uuid_d new_osd_fsid)
     auto [ret, fsid] = tuple;
     std::string str_fsid = stringify(new_osd_fsid);
     if (ret == -1) {
-       return write_meta("fsid", stringify(new_osd_fsid));
+      return write_meta("fsid", stringify(new_osd_fsid));
     } else if (ret == 0 && fsid != str_fsid) {
-       ERROR("on-disk fsid {} != provided {}",
-         fsid, stringify(new_osd_fsid));
-       throw std::runtime_error("store fsid error");
-     } else {
+      ERROR("on-disk fsid {} != provided {}",
+            fsid, stringify(new_osd_fsid));
+      throw std::runtime_error("store fsid error");
+    } else {
       return seastar::now();
-     }
-   });
+    }
+  });
 }
 
 seastar::future<>
@@ -379,6 +410,8 @@ SeaStore::Shard::mkfs_managers()
 	"mkfs_seastore",
 	[this](auto& t)
       {
+        LOG_PREFIX(SeaStoreS::mkfs_managers);
+        DEBUGT("...", t);
 	return onode_manager->mkfs(t
 	).si_then([this, &t] {
 	  return collection_manager->mkfs(t);
@@ -412,15 +445,22 @@ seastar::future<> SeaStore::set_secondaries()
 
 SeaStore::mkfs_ertr::future<> SeaStore::test_mkfs(uuid_d new_osd_fsid)
 {
+  LOG_PREFIX(SeaStore::test_mkfs);
+  INFO("uuid={} ...", new_osd_fsid);
+
   ceph_assert(seastar::this_shard_id() == primary_core);
-  return read_meta("mkfs_done").then([this, new_osd_fsid] (auto tuple) {
+  return read_meta("mkfs_done"
+  ).then([this, new_osd_fsid, FNAME](auto tuple) {
     auto [done, value] = tuple;
     if (done == 0) {
+      ERROR("failed");
       return seastar::now();
     } 
     return shard_stores.local().mkfs_managers(
     ).then([this, new_osd_fsid] {
       return prepare_meta(new_osd_fsid);
+    }).then([FNAME] {
+      INFO("done");
     });
   });
 }
@@ -448,27 +488,29 @@ seastar::future<> SeaStore::prepare_meta(uuid_d new_osd_fsid)
 
 SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid)
 {
+  LOG_PREFIX(SeaStore::mkfs);
+  INFO("uuid={}, root={} ...", new_osd_fsid, root);
+
   ceph_assert(seastar::this_shard_id() == primary_core);
-  return read_meta("mkfs_done").then([this, new_osd_fsid] (auto tuple) {
+  return read_meta("mkfs_done"
+  ).then([this, new_osd_fsid, FNAME](auto tuple) {
     auto [done, value] = tuple;
     if (done == 0) {
+      ERROR("failed");
       return seastar::now();
     } else {
       return seastar::do_with(
         secondary_device_set_t(),
-        [this, new_osd_fsid](auto& sds) {
+        [this, new_osd_fsid, FNAME](auto& sds) {
         auto fut = seastar::now();
-        LOG_PREFIX(SeaStore::mkfs);
-        DEBUG("root: {}", root);
         if (!root.empty()) {
           fut = seastar::open_directory(root
-          ).then([this, &sds, new_osd_fsid](seastar::file rdir) mutable {
+          ).then([this, &sds, new_osd_fsid, FNAME](seastar::file rdir) mutable {
             std::unique_ptr<seastar::file> root_f =
               std::make_unique<seastar::file>(std::move(rdir));
             auto sub = root_f->list_directory(
-              [this, &sds, new_osd_fsid](auto de) mutable -> seastar::future<>
+              [this, &sds, new_osd_fsid, FNAME](auto de) mutable -> seastar::future<>
             {
-              LOG_PREFIX(SeaStore::mkfs);
               DEBUG("found file: {}", de.name);
               if (de.name.find("block.") == 0
                   && de.name.length() > 6 /* 6 for "block." */) {
@@ -533,6 +575,8 @@ SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid)
         return prepare_meta(new_osd_fsid);
       }).safe_then([this] {
 	return umount();
+      }).safe_then([FNAME] {
+        INFO("done");
       }).handle_error(
         crimson::ct_error::assert_all{
           "Invalid error in SeaStore::mkfs"
@@ -542,18 +586,22 @@ SeaStore::mkfs_ertr::future<> SeaStore::mkfs(uuid_d new_osd_fsid)
   });
 }
 
-using coll_core_t = FuturizedStore::coll_core_t;
+using coll_core_t = SeaStore::coll_core_t;
 seastar::future<std::vector<coll_core_t>>
 SeaStore::list_collections()
 {
+  LOG_PREFIX(SeaStore::list_collections);
+  DEBUG("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
   return shard_stores.map([](auto &local_store) {
     return local_store.list_collections();
-  }).then([](std::vector<std::vector<coll_core_t>> results) {
+  }).then([FNAME](std::vector<std::vector<coll_core_t>> results) {
     std::vector<coll_core_t> collections;
     for (auto& colls : results) {
       collections.insert(collections.end(), colls.begin(), colls.end());
     }
+    DEBUG("got {} collections", collections.size());
     return seastar::make_ready_future<std::vector<coll_core_t>>(
       std::move(collections));
   });
@@ -561,14 +609,18 @@ SeaStore::list_collections()
 
 store_statfs_t SeaStore::Shard::stat() const
 {
-  return transaction_manager->store_stat();
+  LOG_PREFIX(SeaStoreS::stat);
+  auto ss = transaction_manager->store_stat();
+  DEBUG("stat={}", ss);
+  return ss;
 }
 
 seastar::future<store_statfs_t> SeaStore::stat() const
 {
-  ceph_assert(seastar::this_shard_id() == primary_core);
   LOG_PREFIX(SeaStore::stat);
-  DEBUG("");
+  DEBUG("...");
+
+  ceph_assert(seastar::this_shard_id() == primary_core);
   return shard_stores.map_reduce0(
     [](const SeaStore::Shard &local_store) {
       return local_store.stat();
@@ -578,19 +630,30 @@ seastar::future<store_statfs_t> SeaStore::stat() const
       ss.add(ret);
       return std::move(ss);
     }
-  ).then([](store_statfs_t ss) {
+  ).then([FNAME](store_statfs_t ss) {
+    DEBUG("done, stat={}", ss);
     return seastar::make_ready_future<store_statfs_t>(std::move(ss));
   });
 }
 
 seastar::future<store_statfs_t> SeaStore::pool_statfs(int64_t pool_id) const
 {
-   //TODO
-   return SeaStore::stat();
+  LOG_PREFIX(SeaStore::pool_statfs);
+  DEBUG("pool_id={} ...", pool_id);
+  ceph_assert(seastar::this_shard_id() == primary_core);
+  //TODO
+  return SeaStore::stat(
+  ).then([FNAME, pool_id](store_statfs_t ss) {
+    DEBUG("done, pool_id={}, ret={}", pool_id, ss);
+    return seastar::make_ready_future<store_statfs_t>(std::move(ss));
+  });
 }
 
 seastar::future<> SeaStore::report_stats()
 {
+  LOG_PREFIX(SeaStore::report_stats);
+  DEBUG("...");
+
   ceph_assert(seastar::this_shard_id() == primary_core);
   shard_device_stats.resize(seastar::smp::count);
   shard_io_stats.resize(seastar::smp::count);
@@ -609,8 +672,7 @@ seastar::future<> SeaStore::report_stats()
       local_store.get_io_stats(report_detail, seconds);
     shard_cache_stats[seastar::this_shard_id()] =
       local_store.get_cache_stats(report_detail, seconds);
-  }).then([this] {
-    LOG_PREFIX(SeaStore);
+  }).then([this, FNAME] {
     auto now = seastar::lowres_clock::now();
     if (last_tp == seastar::lowres_clock::time_point::min()) {
       last_tp = now;
@@ -857,24 +919,26 @@ SeaStore::Shard::list_objects(CollectionRef ch,
         "list_objects",
         [this, ch, start, end, &limit, &ret](auto &t)
       {
+        LOG_PREFIX(SeaStoreS::list_objects);
+        DEBUGT("cid={} start={} end={} limit={} ...",
+               t, ch->get_cid(), start, end, limit);
         return get_coll_bits(
           ch, t
-	).si_then([this, ch, &t, start, end, &limit, &ret](auto bits) {
+	).si_then([FNAME, this, ch, &t, start, end, &limit, &ret](auto bits) {
           if (!bits) {
+            DEBUGT("no bits, return none", t);
             return list_iertr::make_ready_future<
               OnodeManager::list_onodes_bare_ret
 	      >(std::make_tuple(
 		  std::vector<ghobject_t>(),
 		  ghobject_t::get_max()));
           } else {
-	    LOG_PREFIX(SeaStore::list_objects);
-	    DEBUGT("start {}, end {}, limit {}, bits {}",
-	      t, start, end, limit, *bits);
+	    DEBUGT("bits={} ...", t, *bits);
             auto filter = SeaStore::get_objs_range(ch, *bits);
 	    using list_iertr = OnodeManager::list_onodes_iertr;
 	    using repeat_ret = list_iertr::future<seastar::stop_iteration>;
             return trans_intr::repeat(
-              [this, &t, &ret, &limit, end,
+              [this, FNAME, &t, &ret, &limit, end,
 	       filter, ranges = get_ranges(ch, start, end, filter)
 	      ]() mutable -> repeat_ret {
 		if (limit == 0 || ranges.empty()) {
@@ -886,11 +950,10 @@ SeaStore::Shard::list_objects(CollectionRef ch,
 		auto pstart = ite->first;
 		auto pend = ite->second;
 		ranges.pop_front();
-		LOG_PREFIX(SeaStore::list_objects);
-		DEBUGT("pstart {}, pend {}, limit {}", t, pstart, pend, limit);
+		DEBUGT("pstart {}, pend {}, limit {} ...", t, pstart, pend, limit);
 		return onode_manager->list_onodes(
 		  t, pstart, pend, limit
-		).si_then([&limit, &ret, pend, &t, last=ranges.empty(), end]
+		).si_then([&limit, &ret, pend, &t, last=ranges.empty(), end, FNAME]
 			  (auto &&_ret) mutable {
 		  auto &next_objects = std::get<0>(_ret);
 		  auto &ret_objects = std::get<0>(ret);
@@ -901,7 +964,6 @@ SeaStore::Shard::list_objects(CollectionRef ch,
 		  std::get<1>(ret) = std::get<1>(_ret);
 		  assert(limit >= next_objects.size());
 		  limit -= next_objects.size();
-		  LOG_PREFIX(SeaStore::list_objects);
 		  DEBUGT("got {} objects, left limit {}",
 		    t, next_objects.size(), limit);
 		  assert(limit == 0 ||
@@ -914,10 +976,13 @@ SeaStore::Shard::list_objects(CollectionRef ch,
 		    seastar::stop_iteration
 		    >(seastar::stop_iteration::no);
 		});
-	      }).si_then([&ret] {
-		return list_iertr::make_ready_future<
-		  OnodeManager::list_onodes_bare_ret>(std::move(ret));
-	      });
+	      }
+            ).si_then([&ret, FNAME] {
+              DEBUG("got {} objects, next={}",
+                    std::get<0>(ret).size(), std::get<1>(ret));
+              return list_iertr::make_ready_future<
+                OnodeManager::list_onodes_bare_ret>(std::move(ret));
+            });
           }
         });
       }).safe_then([&ret](auto&& _ret) {
@@ -927,7 +992,7 @@ SeaStore::Shard::list_objects(CollectionRef ch,
       return std::move(ret);
     }).handle_error(
       crimson::ct_error::assert_all{
-        "Invalid error in SeaStore::list_objects"
+        "Invalid error in SeaStoreS::list_objects"
       }
     );
   }).finally([this] {
@@ -939,23 +1004,26 @@ SeaStore::Shard::list_objects(CollectionRef ch,
 seastar::future<CollectionRef>
 SeaStore::Shard::create_new_collection(const coll_t& cid)
 {
-  LOG_PREFIX(SeaStore::create_new_collection);
-  DEBUG("{}", cid);
+  LOG_PREFIX(SeaStoreS::create_new_collection);
+  DEBUG("cid={}", cid);
   return seastar::make_ready_future<CollectionRef>(_get_collection(cid));
 }
 
 seastar::future<CollectionRef>
 SeaStore::Shard::open_collection(const coll_t& cid)
 {
-  LOG_PREFIX(SeaStore::open_collection);
-  DEBUG("{}", cid);
-  return list_collections().then([cid, this] (auto colls_cores) {
+  LOG_PREFIX(SeaStoreS::open_collection);
+  DEBUG("cid={} ...", cid);
+  return list_collections(
+  ).then([cid, this, FNAME] (auto colls_cores) {
     if (auto found = std::find(colls_cores.begin(),
                                colls_cores.end(),
                                std::make_pair(cid, seastar::this_shard_id()));
       found != colls_cores.end()) {
+      DEBUG("cid={} exists", cid);
       return seastar::make_ready_future<CollectionRef>(_get_collection(cid));
     } else {
+      DEBUG("cid={} not exists", cid);
       return seastar::make_ready_future<CollectionRef>();
     }
   });
@@ -965,6 +1033,8 @@ seastar::future<>
 SeaStore::Shard::set_collection_opts(CollectionRef c,
                                         const pool_opts_t& opts)
 {
+  LOG_PREFIX(SeaStoreS::set_collection_opts);
+  DEBUG("cid={}, opts={} not implemented", c->get_cid(), opts);
   //TODO
   return seastar::now();
 }
@@ -986,6 +1056,8 @@ SeaStore::Shard::list_collections()
           "list_collections",
           [this, &ret](auto& t)
         {
+          LOG_PREFIX(SeaStoreS::list_collections);
+          DEBUGT("...", t);
           return transaction_manager->read_collection_root(t
           ).si_then([this, &t](auto coll_root) {
             return collection_manager->list(coll_root, t);
@@ -1004,7 +1076,7 @@ SeaStore::Shard::list_collections()
     }
   ).handle_error(
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::list_collections"
+      "Invalid error in SeaStoreS::list_collections"
     }
   ).finally([this] {
     assert(shard_stats.pending_read_num);
@@ -1012,6 +1084,42 @@ SeaStore::Shard::list_collections()
   });
 }
 
+SeaStore::base_iertr::future<ceph::bufferlist>
+SeaStore::Shard::_read(
+  Transaction& t,
+  Onode& onode,
+  uint64_t offset,
+  std::size_t len,
+  uint32_t op_flags)
+{
+  LOG_PREFIX(SeaStoreS::_read);
+  size_t size = onode.get_layout().size;
+  if (offset >= size) {
+    DEBUGT("0x{:x}~0x{:x} onode-size=0x{:x} flags=0x{:x}, got none",
+           t, offset, len, size, op_flags);
+    return seastar::make_ready_future<ceph::bufferlist>();
+  }
+
+  DEBUGT("0x{:x}~0x{:x} onode-size=0x{:x} flags=0x{:x} ...",
+         t, offset, len, size, op_flags);
+  size_t corrected_len = (len == 0) ?
+    size - offset :
+    std::min(size - offset, len);
+
+  return ObjectDataHandler(max_object_size).read(
+    ObjectDataHandler::context_t{
+      *transaction_manager,
+      t,
+      onode,
+    },
+    offset,
+    corrected_len
+  ).si_then([FNAME, &t](auto bl) {
+    DEBUGT("got bl length=0x{:x}", t, bl.length());
+    return bl;
+  });
+}
+
 SeaStore::Shard::read_errorator::future<ceph::bufferlist>
 SeaStore::Shard::read(
   CollectionRef ch,
@@ -1020,9 +1128,6 @@ SeaStore::Shard::read(
   size_t len,
   uint32_t op_flags)
 {
-  LOG_PREFIX(SeaStore::read);
-  DEBUG("oid {} offset {} len {}", oid, offset, len);
-
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
@@ -1030,29 +1135,11 @@ SeaStore::Shard::read(
     ch,
     oid,
     Transaction::src_t::READ,
-    "read_obj",
+    "read",
     op_type_t::READ,
-    [=, this](auto &t, auto &onode) -> ObjectDataHandler::read_ret {
-      size_t size = onode.get_layout().size;
-
-      if (offset >= size) {
-	return seastar::make_ready_future<ceph::bufferlist>();
-      }
-
-      size_t corrected_len = (len == 0) ?
-	size - offset :
-	std::min(size - offset, len);
-
-      return ObjectDataHandler(max_object_size).read(
-        ObjectDataHandler::context_t{
-          *transaction_manager,
-          t,
-          onode,
-        },
-        offset,
-        corrected_len);
-    }
-  ).finally([this] {
+    [this, offset, len, op_flags](auto &t, auto &onode) {
+    return _read(t, onode, offset, len, op_flags);
+  }).finally([this] {
     assert(shard_stats.pending_read_num);
     --(shard_stats.pending_read_num);
   });
@@ -1063,9 +1150,7 @@ SeaStore::Shard::exists(
   CollectionRef c,
   const ghobject_t& oid)
 {
-  LOG_PREFIX(SeaStore::exists);
-  DEBUG("oid {}", oid);
-
+  LOG_PREFIX(SeaStoreS::exists);
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
@@ -1073,12 +1158,14 @@ SeaStore::Shard::exists(
     c,
     oid,
     Transaction::src_t::READ,
-    "oid_exists",
+    "exists",
     op_type_t::READ,
-    [](auto&, auto&) {
+    [FNAME](auto& t, auto&) {
+    DEBUGT("exists", t);
     return seastar::make_ready_future<bool>(true);
   }).handle_error(
-    crimson::ct_error::enoent::handle([] {
+    crimson::ct_error::enoent::handle([FNAME] {
+      DEBUG("not exists");
       return seastar::make_ready_future<bool>(false);
     }),
     crimson::ct_error::assert_all{"unexpected error"}
@@ -1095,66 +1182,78 @@ SeaStore::Shard::readv(
   interval_set<uint64_t>& m,
   uint32_t op_flags)
 {
+  LOG_PREFIX(SeaStoreS::readv);
+  DEBUG("cid={} oid={} op_flags=0x{:x} {} intervals",
+        ch->get_cid(), _oid, op_flags, m.num_intervals());
+
   return seastar::do_with(
     _oid,
     ceph::bufferlist{},
-    [=, this, &m](auto &oid, auto &ret) {
+    [ch, op_flags, this, FNAME, &m](auto &oid, auto &ret) {
     return crimson::do_for_each(
       m,
-      [=, this, &oid, &ret](auto &p) {
+      [ch, op_flags, this, &oid, &ret](auto &p) {
       return read(
 	ch, oid, p.first, p.second, op_flags
 	).safe_then([&ret](auto bl) {
         ret.claim_append(bl);
       });
-    }).safe_then([&ret] {
+    }).safe_then([&ret, FNAME] {
+      DEBUG("got bl length=0x{:x}", ret.length());
       return read_errorator::make_ready_future<ceph::bufferlist>
         (std::move(ret));
     });
   });
-  return read_errorator::make_ready_future<ceph::bufferlist>();
 }
 
 using crimson::os::seastore::omap_manager::BtreeOMapManager;
 
+SeaStore::Shard::_omap_get_value_ret
+SeaStore::Shard::_get_attr(
+  Transaction& t,
+  Onode& onode,
+  std::string_view name) const
+{
+  LOG_PREFIX(SeaStoreS::_get_attr);
+  auto& layout = onode.get_layout();
+  if (name == OI_ATTR && layout.oi_size) {
+    ceph::bufferlist bl;
+    bl.append(ceph::bufferptr(&layout.oi[0], layout.oi_size));
+    DEBUGT("got OI_ATTR, value length=0x{:x}", t, bl.length());
+    return seastar::make_ready_future<ceph::bufferlist>(std::move(bl));
+  }
+  if (name == SS_ATTR && layout.ss_size) {
+    ceph::bufferlist bl;
+    bl.append(ceph::bufferptr(&layout.ss[0], layout.ss_size));
+    DEBUGT("got SS_ATTR, value length=0x{:x}", t, bl.length());
+    return seastar::make_ready_future<ceph::bufferlist>(std::move(bl));
+  }
+  DEBUGT("name={} ...", t, name);
+  return _omap_get_value(
+    t,
+    layout.xattr_root.get(
+      onode.get_metadata_hint(device->get_block_size())),
+    name);
+}
+
 SeaStore::Shard::get_attr_errorator::future<ceph::bufferlist>
 SeaStore::Shard::get_attr(
   CollectionRef ch,
   const ghobject_t& oid,
   std::string_view name) const
 {
-  auto c = static_cast<SeastoreCollection*>(ch.get());
-  LOG_PREFIX(SeaStore::get_attr);
-  DEBUG("{} {}", c->get_cid(), oid);
-
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
   return repeat_with_onode<ceph::bufferlist>(
-    c,
+    ch,
     oid,
     Transaction::src_t::READ,
     "get_attr",
     op_type_t::GET_ATTR,
-    [=, this](auto &t, auto& onode) -> _omap_get_value_ret {
-      auto& layout = onode.get_layout();
-      if (name == OI_ATTR && layout.oi_size) {
-        ceph::bufferlist bl;
-        bl.append(ceph::bufferptr(&layout.oi[0], layout.oi_size));
-        return seastar::make_ready_future<ceph::bufferlist>(std::move(bl));
-      }
-      if (name == SS_ATTR && layout.ss_size) {
-        ceph::bufferlist bl;
-        bl.append(ceph::bufferptr(&layout.ss[0], layout.ss_size));
-        return seastar::make_ready_future<ceph::bufferlist>(std::move(bl));
-      }
-      return _omap_get_value(
-        t,
-        layout.xattr_root.get(
-          onode.get_metadata_hint(device->get_block_size())),
-        name);
-    }
-  ).handle_error(
+    [this, name](auto &t, auto& onode) {
+    return _get_attr(t, onode, name);
+  }).handle_error(
     crimson::ct_error::input_output_error::assert_failure{
       "EIO when getting attrs"},
     crimson::ct_error::pass_further_all{}
@@ -1164,48 +1263,53 @@ SeaStore::Shard::get_attr(
   });
 }
 
+SeaStore::base_iertr::future<SeaStore::Shard::attrs_t>
+SeaStore::Shard::_get_attrs(
+  Transaction& t,
+  Onode& onode)
+{
+  LOG_PREFIX(SeaStoreS::_get_attrs);
+  DEBUGT("...", t);
+  auto& layout = onode.get_layout();
+  return omap_list(onode, layout.xattr_root, t, std::nullopt,
+    OMapManager::omap_list_config_t()
+      .with_inclusive(false, false)
+      .without_max()
+  ).si_then([&layout, &t, FNAME](auto p) {
+    auto& attrs = std::get<1>(p);
+    DEBUGT("got {} attrs, OI length=0x{:x}, SS length=0x{:x}",
+           t, attrs.size(), (uint32_t)layout.oi_size, (uint32_t)layout.ss_size);
+    ceph::bufferlist bl;
+    if (layout.oi_size) {
+      bl.append(ceph::bufferptr(&layout.oi[0], layout.oi_size));
+      attrs.emplace(OI_ATTR, std::move(bl));
+    }
+    if (layout.ss_size) {
+      bl.clear();
+      bl.append(ceph::bufferptr(&layout.ss[0], layout.ss_size));
+      attrs.emplace(SS_ATTR, std::move(bl));
+    }
+    return seastar::make_ready_future<attrs_t>(std::move(attrs));
+  });
+}
+
 SeaStore::Shard::get_attrs_ertr::future<SeaStore::Shard::attrs_t>
 SeaStore::Shard::get_attrs(
   CollectionRef ch,
   const ghobject_t& oid)
 {
-  LOG_PREFIX(SeaStore::get_attrs);
-  auto c = static_cast<SeastoreCollection*>(ch.get());
-  DEBUG("{} {}", c->get_cid(), oid);
-
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
   return repeat_with_onode<attrs_t>(
-    c,
+    ch,
     oid,
     Transaction::src_t::READ,
-    "get_addrs",
+    "get_attrs",
     op_type_t::GET_ATTRS,
-    [=, this](auto &t, auto& onode) {
-      auto& layout = onode.get_layout();
-      return omap_list(onode, layout.xattr_root, t, std::nullopt,
-        OMapManager::omap_list_config_t()
-	  .with_inclusive(false, false)
-	  .without_max()
-      ).si_then([&layout, &t, FNAME](auto p) {
-        auto& attrs = std::get<1>(p);
-        ceph::bufferlist bl;
-        if (layout.oi_size) {
-          bl.append(ceph::bufferptr(&layout.oi[0], layout.oi_size));
-          attrs.emplace(OI_ATTR, std::move(bl));
-         DEBUGT("set oi from onode layout", t);
-        }
-        if (layout.ss_size) {
-          bl.clear();
-          bl.append(ceph::bufferptr(&layout.ss[0], layout.ss_size));
-          attrs.emplace(SS_ATTR, std::move(bl));
-         DEBUGT("set ss from onode layout", t);
-        }
-        return seastar::make_ready_future<omap_values_t>(std::move(attrs));
-      });
-    }
-  ).handle_error(
+    [this](auto &t, auto& onode) {
+    return _get_attrs(t, onode);
+  }).handle_error(
     crimson::ct_error::input_output_error::assert_failure{
       "EIO when getting attrs"},
     crimson::ct_error::pass_further_all{}
@@ -1215,6 +1319,23 @@ SeaStore::Shard::get_attrs(
   });
 }
 
+seastar::future<struct stat> SeaStore::Shard::_stat(
+  Transaction& t,
+  Onode& onode,
+  const ghobject_t& oid)
+{
+  LOG_PREFIX(SeaStoreS::_stat);
+  struct stat st;
+  auto &olayout = onode.get_layout();
+  st.st_size = olayout.size;
+  st.st_blksize = device->get_block_size();
+  st.st_blocks = (st.st_size + st.st_blksize - 1) / st.st_blksize;
+  st.st_nlink = 1;
+  DEBUGT("oid={}, size={}, blksize={}",
+         t, oid, st.st_size, st.st_blksize);
+  return seastar::make_ready_future<struct stat>(st);
+}
+
 seastar::future<struct stat> SeaStore::Shard::stat(
   CollectionRef c,
   const ghobject_t& oid)
@@ -1222,26 +1343,17 @@ seastar::future<struct stat> SeaStore::Shard::stat(
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
-  LOG_PREFIX(SeaStore::stat);
   return repeat_with_onode<struct stat>(
     c,
     oid,
     Transaction::src_t::READ,
     "stat",
     op_type_t::STAT,
-    [=, this](auto &t, auto &onode) {
-      struct stat st;
-      auto &olayout = onode.get_layout();
-      st.st_size = olayout.size;
-      st.st_blksize = device->get_block_size();
-      st.st_blocks = (st.st_size + st.st_blksize - 1) / st.st_blksize;
-      st.st_nlink = 1;
-      DEBUGT("cid {}, oid {}, return size {}", t, c->get_cid(), oid, st.st_size);
-      return seastar::make_ready_future<struct stat>(st);
-    }
-  ).handle_error(
+    [this, oid](auto &t, auto &onode) {
+    return _stat(t, onode, oid);
+  }).handle_error(
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::stat"
+      "Invalid error in SeaStoreS::stat"
     }
   ).finally([this] {
     assert(shard_stats.pending_read_num);
@@ -1257,6 +1369,22 @@ SeaStore::Shard::omap_get_header(
   return get_attr(ch, oid, OMAP_HEADER_XATTR_KEY);
 }
 
+SeaStore::base_iertr::future<SeaStore::Shard::omap_values_t>
+SeaStore::Shard::do_omap_get_values(
+  Transaction& t,
+  Onode& onode,
+  const omap_keys_t& keys)
+{
+  LOG_PREFIX(SeaStoreS::do_omap_get_values);
+  DEBUGT("{} keys ...", t, keys.size());
+  omap_root_t omap_root = onode.get_layout().omap_root.get(
+    onode.get_metadata_hint(device->get_block_size()));
+  return _omap_get_values(
+    t,
+    std::move(omap_root),
+    keys);
+}
+
 SeaStore::Shard::read_errorator::future<SeaStore::Shard::omap_values_t>
 SeaStore::Shard::omap_get_values(
   CollectionRef ch,
@@ -1266,22 +1394,15 @@ SeaStore::Shard::omap_get_values(
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
-  auto c = static_cast<SeastoreCollection*>(ch.get());
   return repeat_with_onode<omap_values_t>(
-    c,
+    ch,
     oid,
     Transaction::src_t::READ,
     "omap_get_values",
     op_type_t::OMAP_GET_VALUES,
     [this, keys](auto &t, auto &onode) {
-      omap_root_t omap_root = onode.get_layout().omap_root.get(
-	onode.get_metadata_hint(device->get_block_size()));
-      return _omap_get_values(
-	t,
-	std::move(omap_root),
-	keys);
-    }
-  ).finally([this] {
+    return do_omap_get_values(t, onode, keys);
+  }).finally([this] {
     assert(shard_stats.pending_read_num);
     --(shard_stats.pending_read_num);
   });
@@ -1298,58 +1419,62 @@ SeaStore::Shard::_omap_get_value(
     std::move(root),
     std::string(key),
     [&t](auto &manager, auto& root, auto& key) -> _omap_get_value_ret {
-      if (root.is_null()) {
+    LOG_PREFIX(SeaStoreS::_omap_get_value);
+    if (root.is_null()) {
+      DEBUGT("key={} is absent because of null root", t, key);
+      return crimson::ct_error::enodata::make();
+    }
+    return manager.omap_get_value(root, t, key
+    ).si_then([&key, &t, FNAME](auto opt) -> _omap_get_value_ret {
+      if (!opt) {
+        DEBUGT("key={} is absent", t, key);
         return crimson::ct_error::enodata::make();
       }
-      return manager.omap_get_value(root, t, key
-      ).si_then([](auto opt) -> _omap_get_value_ret {
-        if (!opt) {
-          return crimson::ct_error::enodata::make();
-        }
-        return seastar::make_ready_future<ceph::bufferlist>(std::move(*opt));
-      });
-    }
-  );
+      DEBUGT("key={}, value length=0x{:x}", t, key, opt->length());
+      return seastar::make_ready_future<ceph::bufferlist>(std::move(*opt));
+    });
+  });
 }
 
-SeaStore::Shard::_omap_get_values_ret
+SeaStore::base_iertr::future<SeaStore::Shard::omap_values_t>
 SeaStore::Shard::_omap_get_values(
   Transaction &t,
   omap_root_t &&omap_root,
   const omap_keys_t &keys) const
 {
+  LOG_PREFIX(SeaStoreS::_omap_get_values);
   if (omap_root.is_null()) {
+    DEBUGT("{} keys are absent because of null root", t, keys.size());
     return seastar::make_ready_future<omap_values_t>();
   }
   return seastar::do_with(
     BtreeOMapManager(*transaction_manager),
     std::move(omap_root),
     omap_values_t(),
-    [&](auto &manager, auto &root, auto &ret) {
-      return trans_intr::do_for_each(
-        keys.begin(),
-        keys.end(),
-        [&](auto &key) {
-          return manager.omap_get_value(
-            root,
-            t,
-            key
-          ).si_then([&ret, &key](auto &&p) {
-            if (p) {
-              bufferlist bl;
-              bl.append(*p);
-              ret.emplace(
-                std::move(key),
-                std::move(bl));
-            }
-            return seastar::now();
-          });
+    [&t, &keys, FNAME](auto &manager, auto &root, auto &ret) {
+    return trans_intr::do_for_each(
+      keys.begin(),
+      keys.end(),
+      [&t, &manager, &root, &ret](auto &key) {
+      return manager.omap_get_value(
+        root,
+        t,
+        key
+      ).si_then([&ret, &key](auto &&p) {
+        if (p) {
+          bufferlist bl;
+          bl.append(*p);
+          ret.emplace(
+            std::move(key),
+            std::move(bl));
         }
-      ).si_then([&ret] {
-        return std::move(ret);
+        return seastar::now();
       });
-    }
-  );
+    }).si_then([&t, &ret, &keys, FNAME] {
+      DEBUGT("{} keys got {} values", t, keys.size(), ret.size());
+      return std::move(ret);
+    });
+  });
 }
 
 SeaStore::Shard::omap_list_ret
@@ -1377,51 +1502,74 @@ SeaStore::Shard::omap_list(
   });
 }
 
-SeaStore::Shard::omap_get_values_ret_t
+SeaStore::base_iertr::future<SeaStore::Shard::omap_values_paged_t>
+SeaStore::Shard::do_omap_get_values(
+  Transaction& t,
+  Onode& onode,
+  const std::optional<std::string>& start)
+{
+  LOG_PREFIX(SeaStoreS::do_omap_get_values);
+  DEBUGT("start={} ...", t, start.has_value() ? *start : "");
+  return omap_list(
+    onode,
+    onode.get_layout().omap_root,
+    t,
+    start,
+    OMapManager::omap_list_config_t()
+      .with_inclusive(false, false)
+      .without_max()
+  ).si_then([FNAME, &t](omap_values_paged_t ret) {
+    DEBUGT("got {} values, complete={}",
+           t, std::get<1>(ret).size(), std::get<0>(ret));
+    return ret;
+  });
+}
+
+SeaStore::Shard::read_errorator::future<SeaStore::Shard::omap_values_paged_t>
 SeaStore::Shard::omap_get_values(
   CollectionRef ch,
   const ghobject_t &oid,
-  const std::optional<string> &start)
+  const std::optional<std::string> &start)
 {
-  auto c = static_cast<SeastoreCollection*>(ch.get());
-  LOG_PREFIX(SeaStore::omap_get_values);
-  DEBUG("{} {}", c->get_cid(), oid);
-
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
-  using ret_bare_t = std::tuple<bool, SeaStore::Shard::omap_values_t>;
-  return repeat_with_onode<ret_bare_t>(
-    c,
+  return repeat_with_onode<omap_values_paged_t>(
+    ch,
     oid,
     Transaction::src_t::READ,
-    "omap_list",
-    op_type_t::OMAP_LIST,
+    "omap_get_values2",
+    op_type_t::OMAP_GET_VALUES2,
     [this, start](auto &t, auto &onode) {
-      return omap_list(
-	onode,
-	onode.get_layout().omap_root,
-	t,
-	start,
-	OMapManager::omap_list_config_t()
-	  .with_inclusive(false, false)
-	  .without_max());
-    }
-  ).finally([this] {
+    return do_omap_get_values(t, onode, start);
+  }).finally([this] {
     assert(shard_stats.pending_read_num);
     --(shard_stats.pending_read_num);
   });
 }
 
-SeaStore::Shard::_fiemap_ret SeaStore::Shard::_fiemap(
+SeaStore::base_iertr::future<SeaStore::Shard::fiemap_ret_t>
+SeaStore::Shard::_fiemap(
   Transaction &t,
   Onode &onode,
   uint64_t off,
   uint64_t len) const
 {
+  LOG_PREFIX(SeaStoreS::_fiemap);
+  size_t size = onode.get_layout().size;
+  if (off >= size) {
+    DEBUGT("0x{:x}~0x{:x} onode-size=0x{:x}, got none",
+           t, off, len, size);
+    return seastar::make_ready_future<std::map<uint64_t, uint64_t>>();
+  }
+  DEBUGT("0x{:x}~0x{:x} onode-size=0x{:x} ...",
+         t, off, len, size);
+  size_t adjust_len = (len == 0) ?
+    size - off:
+    std::min(size - off, len);
   return seastar::do_with(
     ObjectDataHandler(max_object_size),
-    [=, this, &t, &onode] (auto &objhandler) {
+    [this, off, adjust_len, &t, &onode](auto &objhandler) {
     return objhandler.fiemap(
       ObjectDataHandler::context_t{
         *transaction_manager,
@@ -1429,39 +1577,31 @@ SeaStore::Shard::_fiemap_ret SeaStore::Shard::_fiemap(
         onode,
       },
       off,
-      len);
+      adjust_len);
+  }).si_then([FNAME, &t](auto ret) {
+    DEBUGT("got {} intervals", t, ret.size());
+    return ret;
   });
 }
 
-SeaStore::Shard::read_errorator::future<std::map<uint64_t, uint64_t>>
+SeaStore::Shard::read_errorator::future<SeaStore::Shard::fiemap_ret_t>
 SeaStore::Shard::fiemap(
   CollectionRef ch,
   const ghobject_t& oid,
   uint64_t off,
   uint64_t len)
 {
-  LOG_PREFIX(SeaStore::fiemap);
-  DEBUG("oid: {}, off: {}, len: {} ", oid, off, len);
-
   ++(shard_stats.read_num);
   ++(shard_stats.pending_read_num);
 
-  return repeat_with_onode<std::map<uint64_t, uint64_t>>(
+  return repeat_with_onode<fiemap_ret_t>(
     ch,
     oid,
     Transaction::src_t::READ,
-    "fiemap_read",
+    "fiemap",
     op_type_t::READ,
-    [=, this](auto &t, auto &onode) -> _fiemap_ret {
-    size_t size = onode.get_layout().size;
-    if (off >= size) {
-      INFOT("fiemap offset is over onode size!", t);
-      return seastar::make_ready_future<std::map<uint64_t, uint64_t>>();
-    }
-    size_t adjust_len = (len == 0) ?
-      size - off:
-      std::min(size - off, len);
-    return _fiemap(t, onode, off, adjust_len);
+    [this, off, len](auto &t, auto &onode) {
+    return _fiemap(t, onode, off, len);
   }).finally([this] {
     assert(shard_stats.pending_read_num);
     --(shard_stats.pending_read_num);
@@ -1469,7 +1609,7 @@ SeaStore::Shard::fiemap(
 }
 
 void SeaStore::Shard::on_error(ceph::os::Transaction &t) {
-  LOG_PREFIX(SeaStore::on_error);
+  LOG_PREFIX(SeaStoreS::on_error);
   ERROR(" transaction dump:\n");
   JSONFormatter f(true);
   f.open_object_section("transaction");
@@ -1490,17 +1630,22 @@ seastar::future<> SeaStore::Shard::do_transaction_no_callbacks(
   ++(shard_stats.starting_io_num);
 
   // repeat_with_internal_context ensures ordering via collection lock
+  auto num_bytes = _t.get_num_bytes();
   return repeat_with_internal_context(
     _ch,
     std::move(_t),
     Transaction::src_t::MUTATE,
     "do_transaction",
-    op_type_t::TRANSACTION,
-    [this](auto &ctx) {
-      return with_trans_intr(*ctx.transaction, [&, this](auto &t) {
-        LOG_PREFIX(SeaStore::Shard::do_transaction_no_callbacks);
-        SUBDEBUGT(seastore_t, "start with {} objects",
-                  t, ctx.iter.objects.size());
+    op_type_t::DO_TRANSACTION,
+    [this, num_bytes](auto &ctx) {
+      LOG_PREFIX(SeaStoreS::do_transaction_no_callbacks);
+      return with_trans_intr(*ctx.transaction, [&ctx, this, FNAME, num_bytes](auto &t) {
+        DEBUGT("cid={}, {} operations, {} bytes, {} colls, {} objects ...",
+               t, ctx.ch->get_cid(),
+               ctx.ext_transaction.get_num_ops(),
+               num_bytes,
+               ctx.iter.colls.size(),
+               ctx.iter.objects.size());
 #ifndef NDEBUG
 	TRACET(" transaction dump:\n", t);
 	JSONFormatter f(true);
@@ -1534,6 +1679,8 @@ seastar::future<> SeaStore::Shard::do_transaction_no_callbacks(
         }).si_then([this, &ctx] {
           return transaction_manager->submit_transaction(*ctx.transaction);
         });
+      }).safe_then([FNAME, &ctx] {
+        DEBUGT("done", *ctx.transaction);
       });
     }
   ).finally([this] {
@@ -1573,27 +1720,31 @@ SeaStore::Shard::_do_transaction_step(
   std::vector<OnodeRef> &d_onodes,
   ceph::os::Transaction::iterator &i)
 {
-  LOG_PREFIX(SeaStore::Shard::_do_transaction_step);
+  LOG_PREFIX(SeaStoreS::_do_transaction_step);
   auto op = i.decode_op();
-  SUBTRACET(seastore_t, "got op {}", *ctx.transaction, (uint32_t)op->op);
 
   using ceph::os::Transaction;
-  if (op->op == Transaction::OP_NOP)
+  if (op->op == Transaction::OP_NOP) {
+    DEBUGT("op NOP", *ctx.transaction);
     return tm_iertr::now();
+  }
 
   switch (op->op) {
     case Transaction::OP_RMCOLL:
     {
       coll_t cid = i.get_cid(op->cid);
+      DEBUGT("op RMCOLL, cid={} ...", *ctx.transaction, cid);
       return _remove_collection(ctx, cid);
     }
     case Transaction::OP_MKCOLL:
     {
       coll_t cid = i.get_cid(op->cid);
+      DEBUGT("op MKCOLL, cid={} ...", *ctx.transaction, cid);
       return _create_collection(ctx, cid, op->split_bits);
     }
     case Transaction::OP_COLL_HINT:
     {
+      DEBUGT("op COLL_HINT", *ctx.transaction);
       ceph::bufferlist hint;
       i.decode_bl(hint);
       return tm_iertr::now();
@@ -1611,14 +1762,18 @@ SeaStore::Shard::_do_transaction_step(
     create = true;
   }
   if (!onodes[op->oid]) {
+    const ghobject_t& oid = i.get_oid(op->oid);
     if (!create) {
-      fut = onode_manager->get_onode(*ctx.transaction, i.get_oid(op->oid));
+      DEBUGT("op {}, get oid={} ...",
+             *ctx.transaction, (uint32_t)op->op, oid);
+      fut = onode_manager->get_onode(*ctx.transaction, oid);
     } else {
-      fut = onode_manager->get_or_create_onode(
-        *ctx.transaction, i.get_oid(op->oid));
+      DEBUGT("op {}, get_or_create oid={} ...",
+             *ctx.transaction, (uint32_t)op->op, oid);
+      fut = onode_manager->get_or_create_onode(*ctx.transaction, oid);
     }
   }
-  return fut.si_then([&, op](auto get_onode) {
+  return fut.si_then([&, op, this, FNAME](auto get_onode) {
     OnodeRef &o = onodes[op->oid];
     if (!o) {
       assert(get_onode);
@@ -1628,11 +1783,13 @@ SeaStore::Shard::_do_transaction_step(
     if ((op->op == Transaction::OP_CLONE
 	  || op->op == Transaction::OP_COLL_MOVE_RENAME)
 	&& !d_onodes[op->dest_oid]) {
+      const ghobject_t& dest_oid = i.get_oid(op->dest_oid);
+      DEBUGT("op {}, get_or_create dest oid={} ...",
+             *ctx.transaction, (uint32_t)op->op, dest_oid);
       //TODO: use when_all_succeed after making onode tree
       //      support parallel extents loading
-      return onode_manager->get_or_create_onode(
-	*ctx.transaction, i.get_oid(op->dest_oid)
-      ).si_then([&, op](auto dest_onode) {
+      return onode_manager->get_or_create_onode(*ctx.transaction, dest_oid
+      ).si_then([&onodes, &d_onodes, op](auto dest_onode) {
 	assert(dest_onode);
 	auto &d_o = onodes[op->dest_oid];
 	assert(!d_o);
@@ -1644,13 +1801,13 @@ SeaStore::Shard::_do_transaction_step(
     } else {
       return OnodeManager::get_or_create_onode_iertr::now();
     }
-  }).si_then([&, op, this]() -> tm_ret {
-    LOG_PREFIX(SeaStore::_do_transaction_step);
+  }).si_then([&ctx, &i, &onodes, &d_onodes, op, this, FNAME]() -> tm_ret {
+    const ghobject_t& oid = i.get_oid(op->oid);
     try {
       switch (op->op) {
       case Transaction::OP_REMOVE:
       {
-	TRACET("removing {}", *ctx.transaction, i.get_oid(op->oid));
+        DEBUGT("op REMOVE, oid={} ...", *ctx.transaction, oid);
         return _remove(ctx, onodes[op->oid]
 	).si_then([&onodes, &d_onodes, op] {
 	  onodes[op->oid].reset();
@@ -1660,6 +1817,7 @@ SeaStore::Shard::_do_transaction_step(
       case Transaction::OP_CREATE:
       case Transaction::OP_TOUCH:
       {
+        DEBUGT("op CREATE/TOUCH, oid={} ...", *ctx.transaction, oid);
         return _touch(ctx, onodes[op->oid]);
       }
       case Transaction::OP_WRITE:
@@ -1669,6 +1827,8 @@ SeaStore::Shard::_do_transaction_step(
         uint32_t fadvise_flags = i.get_fadvise_flags();
         ceph::bufferlist bl;
         i.decode_bl(bl);
+        DEBUGT("op WRITE, oid={}, 0x{:x}~0x{:x}, flags=0x{:x} ...",
+               *ctx.transaction, oid, off, len, fadvise_flags);
         return _write(
 	  ctx, onodes[op->oid], off, len, std::move(bl),
 	  fadvise_flags);
@@ -1676,6 +1836,7 @@ SeaStore::Shard::_do_transaction_step(
       case Transaction::OP_TRUNCATE:
       {
         uint64_t off = op->off;
+        DEBUGT("op TRUNCATE, oid={}, 0x{:x} ...", *ctx.transaction, oid, off);
         return _truncate(ctx, onodes[op->oid], off);
       }
       case Transaction::OP_SETATTR:
@@ -1684,80 +1845,96 @@ SeaStore::Shard::_do_transaction_step(
         std::map<std::string, bufferlist> to_set;
         ceph::bufferlist& bl = to_set[name];
         i.decode_bl(bl);
+        DEBUGT("op SETATTR, oid={}, attr name={}, value length=0x{:x} ...",
+               *ctx.transaction, oid, name, bl.length());
         return _setattrs(ctx, onodes[op->oid], std::move(to_set));
       }
       case Transaction::OP_SETATTRS:
       {
         std::map<std::string, bufferlist> to_set;
         i.decode_attrset(to_set);
+        DEBUGT("op SETATTRS, oid={}, attrs size={} ...",
+               *ctx.transaction, oid, to_set.size());
         return _setattrs(ctx, onodes[op->oid], std::move(to_set));
       }
       case Transaction::OP_RMATTR:
       {
         std::string name = i.decode_string();
+        DEBUGT("op RMATTR, oid={}, attr name={} ...",
+               *ctx.transaction, oid, name);
         return _rmattr(ctx, onodes[op->oid], name);
       }
       case Transaction::OP_RMATTRS:
       {
+        DEBUGT("op RMATTRS, oid={} ...", *ctx.transaction, oid);
         return _rmattrs(ctx, onodes[op->oid]);
       }
       case Transaction::OP_OMAP_SETKEYS:
       {
         std::map<std::string, ceph::bufferlist> aset;
         i.decode_attrset(aset);
+        DEBUGT("op OMAP_SETKEYS, oid={}, omap size={} ...",
+               *ctx.transaction, oid, aset.size());
         return _omap_set_values(ctx, onodes[op->oid], std::move(aset));
       }
       case Transaction::OP_OMAP_SETHEADER:
       {
         ceph::bufferlist bl;
         i.decode_bl(bl);
+        DEBUGT("op OMAP_SETHEADER, oid={}, length=0x{:x} ...",
+               *ctx.transaction, oid, bl.length());
         return _omap_set_header(ctx, onodes[op->oid], std::move(bl));
       }
       case Transaction::OP_OMAP_RMKEYS:
       {
         omap_keys_t keys;
         i.decode_keyset(keys);
+        DEBUGT("op OMAP_RMKEYS, oid={}, omap size={} ...",
+               *ctx.transaction, oid, keys.size());
         return _omap_rmkeys(ctx, onodes[op->oid], std::move(keys));
       }
       case Transaction::OP_OMAP_RMKEYRANGE:
       {
-        string first, last;
+        std::string first, last;
         first = i.decode_string();
         last = i.decode_string();
+        DEBUGT("op OMAP_RMKEYRANGE, oid={}, first={}, last={} ...",
+               *ctx.transaction, oid, first, last);
         return _omap_rmkeyrange(
 	  ctx, onodes[op->oid],
 	  std::move(first), std::move(last));
       }
       case Transaction::OP_OMAP_CLEAR:
       {
+        DEBUGT("op OMAP_CLEAR, oid={} ...", *ctx.transaction, oid);
         return _omap_clear(ctx, onodes[op->oid]);
       }
       case Transaction::OP_ZERO:
       {
         objaddr_t off = op->off;
         extent_len_t len = op->len;
+        DEBUGT("op ZERO, oid={}, 0x{:x}~0x{:x} ...",
+               *ctx.transaction, oid, off, len);
         return _zero(ctx, onodes[op->oid], off, len);
       }
       case Transaction::OP_SETALLOCHINT:
       {
+        DEBUGT("op SETALLOCHINT, oid={}, not implemented",
+               *ctx.transaction, oid);
         // TODO
         return tm_iertr::now();
       }
       case Transaction::OP_CLONE:
       {
-	TRACET("cloning {} to {}",
-	  *ctx.transaction,
-	  i.get_oid(op->oid),
-	  i.get_oid(op->dest_oid));
+        DEBUGT("op CLONE, oid={}, dest oid={} ...",
+               *ctx.transaction, oid, i.get_oid(op->dest_oid));
 	return _clone(ctx, onodes[op->oid], d_onodes[op->dest_oid]);
       }
       case Transaction::OP_COLL_MOVE_RENAME:
       {
+        DEBUGT("op COLL_MOVE_RENAME, oid={}, dest oid={} ...",
+               *ctx.transaction, oid, i.get_oid(op->dest_oid));
 	ceph_assert(op->cid == op->dest_cid);
-	TRACET("renaming {} to {}",
-	  *ctx.transaction,
-	  i.get_oid(op->oid),
-	  i.get_oid(op->dest_oid));
 	return _rename(
 	  ctx, onodes[op->oid], d_onodes[op->dest_oid]
 	).si_then([&onodes, &d_onodes, op] {
@@ -1793,7 +1970,7 @@ SeaStore::Shard::_do_transaction_step(
       return seastar::now();
     }),
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::do_transaction_step"
+      "Invalid error in SeaStoreS::do_transaction_step"
     }
   );
 }
@@ -1829,7 +2006,7 @@ SeaStore::Shard::_rename(
   ).handle_error_interruptible(
     crimson::ct_error::input_output_error::pass_further(),
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::_rename"}
+      "Invalid error in SeaStoreS::_rename"}
   );
 }
 
@@ -1850,7 +2027,7 @@ SeaStore::Shard::_remove_omaps(
       ).handle_error_interruptible(
 	crimson::ct_error::input_output_error::pass_further(),
 	crimson::ct_error::assert_all{
-	  "Invalid error in SeaStore::_remove"
+	  "Invalid error in SeaStoreS::_remove_omaps"
 	}
       );
     });
@@ -1863,8 +2040,6 @@ SeaStore::Shard::_remove(
   internal_context_t &ctx,
   OnodeRef &onode)
 {
-  LOG_PREFIX(SeaStore::_remove);
-  DEBUGT("onode={}", *ctx.transaction, *onode);
   return _remove_omaps(
     ctx,
     onode,
@@ -1892,7 +2067,7 @@ SeaStore::Shard::_remove(
   }).handle_error_interruptible(
     crimson::ct_error::input_output_error::pass_further(),
     crimson::ct_error::assert_all(
-      "Invalid error in SeaStore::_remove"
+      "Invalid error in SeaStoreS::_remove"
     )
   );
 }
@@ -1902,8 +2077,6 @@ SeaStore::Shard::_touch(
   internal_context_t &ctx,
   OnodeRef &onode)
 {
-  LOG_PREFIX(SeaStore::_touch);
-  DEBUGT("onode={}", *ctx.transaction, *onode);
   return tm_iertr::now();
 }
 
@@ -1915,8 +2088,6 @@ SeaStore::Shard::_write(
   ceph::bufferlist &&_bl,
   uint32_t fadvise_flags)
 {
-  LOG_PREFIX(SeaStore::_write);
-  DEBUGT("onode={} {}~{}", *ctx.transaction, *onode, offset, len);
   const auto &object_size = onode->get_layout().size;
   if (offset + len > object_size) {
     onode->update_onode_size(
@@ -2007,8 +2178,6 @@ SeaStore::Shard::_clone(
   OnodeRef &onode,
   OnodeRef &d_onode)
 {
-  LOG_PREFIX(SeaStore::_clone);
-  DEBUGT("onode={} d_onode={}", *ctx.transaction, *onode, *d_onode);
   return seastar::do_with(
     ObjectDataHandler(max_object_size),
     [this, &ctx, &onode, &d_onode](auto &objHandler) {
@@ -2034,9 +2203,10 @@ SeaStore::Shard::_zero(
   objaddr_t offset,
   extent_len_t len)
 {
-  LOG_PREFIX(SeaStore::_zero);
-  DEBUGT("onode={} {}~{}", *ctx.transaction, *onode, offset, len);
   if (offset + len >= max_object_size) {
+    LOG_PREFIX(SeaStoreS::_zero);
+    ERRORT("0x{:x}~0x{:x} >= 0x{:x}",
+           *ctx.transaction, offset, len, max_object_size);
     return crimson::ct_error::input_output_error::make();
   }
   const auto &object_size = onode->get_layout().size;
@@ -2092,8 +2262,6 @@ SeaStore::Shard::_omap_set_values(
   OnodeRef &onode,
   std::map<std::string, ceph::bufferlist> &&aset)
 {
-  LOG_PREFIX(SeaStore::_omap_set_values);
-  DEBUGT("{} {} keys", *ctx.transaction, *onode, aset.size());
   return _omap_set_kvs(
     onode,
     onode->get_layout().omap_root,
@@ -2112,8 +2280,6 @@ SeaStore::Shard::_omap_set_header(
   OnodeRef &onode,
   ceph::bufferlist &&header)
 {
-  LOG_PREFIX(SeaStore::_omap_set_header);
-  DEBUGT("{} {} bytes", *ctx.transaction, *onode, header.length());
   std::map<std::string, bufferlist> to_set;
   to_set[OMAP_HEADER_XATTR_KEY] = header;
   return _setattrs(ctx, onode,std::move(to_set));
@@ -2124,10 +2290,8 @@ SeaStore::Shard::_omap_clear(
   internal_context_t &ctx,
   OnodeRef &onode)
 {
-  LOG_PREFIX(SeaStore::_omap_clear);
-  DEBUGT("{} {} keys", *ctx.transaction, *onode);
-  return _xattr_rmattr(ctx, onode, std::string(OMAP_HEADER_XATTR_KEY))
-    .si_then([this, &ctx, &onode]() -> tm_ret {
+  return _xattr_rmattr(ctx, onode, std::string(OMAP_HEADER_XATTR_KEY)
+  ).si_then([this, &ctx, &onode]() -> tm_ret {
     if (auto omap_root = onode->get_layout().omap_root.get(
       onode->get_metadata_hint(device->get_block_size()));
       omap_root.is_null()) {
@@ -2142,8 +2306,8 @@ SeaStore::Shard::_omap_clear(
         auto &omap_root) {
         return omap_manager.omap_clear(
           omap_root,
-          *ctx.transaction)
-        .si_then([&] {
+          *ctx.transaction
+        ).si_then([&] {
           if (omap_root.must_update()) {
 	    onode->update_omap_root(*ctx.transaction, omap_root);
           }
@@ -2159,8 +2323,6 @@ SeaStore::Shard::_omap_rmkeys(
   OnodeRef &onode,
   omap_keys_t &&keys)
 {
-  LOG_PREFIX(SeaStore::_omap_rmkeys);
-  DEBUGT("{} {} keys", *ctx.transaction, *onode, keys.size());
   auto omap_root = onode->get_layout().omap_root.get(
     onode->get_metadata_hint(device->get_block_size()));
   if (omap_root.is_null()) {
@@ -2201,10 +2363,9 @@ SeaStore::Shard::_omap_rmkeyrange(
   std::string first,
   std::string last)
 {
-  LOG_PREFIX(SeaStore::_omap_rmkeyrange);
-  DEBUGT("{} first={} last={}", *ctx.transaction, *onode, first, last);
   if (first > last) {
-    ERRORT("range error, first: {} > last:{}", *ctx.transaction, first, last);
+    LOG_PREFIX(SeaStoreS::_omap_rmkeyrange);
+    ERRORT("range error, first:{} > last:{}", *ctx.transaction, first, last);
     ceph_abort();
   }
   auto omap_root = onode->get_layout().omap_root.get(
@@ -2247,8 +2408,6 @@ SeaStore::Shard::_truncate(
   OnodeRef &onode,
   uint64_t size)
 {
-  LOG_PREFIX(SeaStore::_truncate);
-  DEBUGT("onode={} size={}", *ctx.transaction, *onode, size);
   onode->update_onode_size(*ctx.transaction, size);
   return seastar::do_with(
     ObjectDataHandler(max_object_size),
@@ -2269,9 +2428,7 @@ SeaStore::Shard::_setattrs(
   OnodeRef &onode,
   std::map<std::string, bufferlist>&& aset)
 {
-  LOG_PREFIX(SeaStore::_setattrs);
-  DEBUGT("onode={}", *ctx.transaction, *onode);
-
+  LOG_PREFIX(SeaStoreS::_setattrs);
   auto fut = tm_iertr::now();
   auto& layout = onode->get_layout();
   if (auto it = aset.find(OI_ATTR); it != aset.end()) {
@@ -2333,8 +2490,6 @@ SeaStore::Shard::_rmattr(
   OnodeRef &onode,
   std::string name)
 {
-  LOG_PREFIX(SeaStore::_rmattr);
-  DEBUGT("onode={}", *ctx.transaction, *onode);
   auto& layout = onode->get_layout();
   if ((name == OI_ATTR) && (layout.oi_size > 0)) {
     onode->clear_object_info(*ctx.transaction);
@@ -2356,7 +2511,7 @@ SeaStore::Shard::_xattr_rmattr(
   OnodeRef &onode,
   std::string &&name)
 {
-  LOG_PREFIX(SeaStore::_xattr_rmattr);
+  LOG_PREFIX(SeaStoreS::_xattr_rmattr);
   DEBUGT("onode={}", *ctx.transaction, *onode);
   auto xattr_root = onode->get_layout().xattr_root.get(
     onode->get_metadata_hint(device->get_block_size()));
@@ -2384,8 +2539,6 @@ SeaStore::Shard::_rmattrs(
   internal_context_t &ctx,
   OnodeRef &onode)
 {
-  LOG_PREFIX(SeaStore::_rmattrs);
-  DEBUGT("onode={}", *ctx.transaction, *onode);
   onode->clear_object_info(*ctx.transaction);
   onode->clear_snapset(*ctx.transaction);
   return _xattr_clear(ctx, onode);
@@ -2396,7 +2549,7 @@ SeaStore::Shard::_xattr_clear(
   internal_context_t &ctx,
   OnodeRef &onode)
 {
-  LOG_PREFIX(SeaStore::_xattr_clear);
+  LOG_PREFIX(SeaStoreS::_xattr_clear);
   DEBUGT("onode={}", *ctx.transaction, *onode);
   auto xattr_root = onode->get_layout().xattr_root.get(
     onode->get_metadata_hint(device->get_block_size()));
@@ -2446,7 +2599,7 @@ SeaStore::Shard::_create_collection(
   }).handle_error_interruptible(
     tm_iertr::pass_further{},
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::_create_collection"
+      "Invalid error in SeaStoreS::_create_collection"
     }
   );
 }
@@ -2478,7 +2631,7 @@ SeaStore::Shard::_remove_collection(
   }).handle_error_interruptible(
     tm_iertr::pass_further{},
     crimson::ct_error::assert_all{
-      "Invalid error in SeaStore::_create_collection"
+      "Invalid error in SeaStoreS::_create_collection"
     }
   );
 }
@@ -2489,40 +2642,53 @@ SeaStore::Shard::_get_collection(const coll_t& cid)
   return new SeastoreCollection{cid};
 }
 
+seastar::future<> SeaStore::write_meta(
+  const std::string& key,
+  const std::string& value) {
+  LOG_PREFIX(SeaStore::write_meta);
+  DEBUG("key={} value={} ...", key, value);
+
+  ceph_assert(seastar::this_shard_id() == primary_core);
+  return seastar::do_with(key, value,
+    [this, FNAME](auto& key, auto& value) {
+    return shard_stores.local().write_meta(key, value
+    ).then([this, &key, &value] {
+      return mdstore->write_meta(key, value);
+    }).safe_then([FNAME, &key, &value] {
+      DEBUG("key={} value={} done", key, value);
+    }).handle_error(
+      crimson::ct_error::assert_all{"Invalid error in SeaStore::write_meta"}
+    );
+  });
+}
+
 seastar::future<> SeaStore::Shard::write_meta(
   const std::string& key,
   const std::string& value)
 {
-  LOG_PREFIX(SeaStore::write_meta);
-  DEBUG("key: {}; value: {}", key, value);
-
   ++(shard_stats.io_num);
   ++(shard_stats.pending_io_num);
   // For TM::submit_transaction()
   ++(shard_stats.processing_inlock_io_num);
 
-  return seastar::do_with(
-      key, value,
-      [this, FNAME](auto& key, auto& value) {
-	return repeat_eagain([this, FNAME, &key, &value] {
-	  ++(shard_stats.repeat_io_num);
-
-	  return transaction_manager->with_transaction_intr(
-	    Transaction::src_t::MUTATE,
-            "write_meta",
-	    [this, FNAME, &key, &value](auto& t)
-          {
-            DEBUGT("Have transaction, key: {}; value: {}", t, key, value);
-            return transaction_manager->update_root_meta(
-              t, key, value
-            ).si_then([this, &t] {
-              return transaction_manager->submit_transaction(t);
-            });
-          });
-	});
-      }
-  ).handle_error(
-    crimson::ct_error::assert_all{"Invalid error in SeaStore::write_meta"}
+  return repeat_eagain([this, &key, &value] {
+    ++(shard_stats.repeat_io_num);
+
+    return transaction_manager->with_transaction_intr(
+      Transaction::src_t::MUTATE,
+      "write_meta",
+      [this, &key, &value](auto& t)
+    {
+      LOG_PREFIX(SeaStoreS::write_meta);
+      DEBUGT("key={} value={} ...", t, key, value);
+      return transaction_manager->update_root_meta(
+        t, key, value
+      ).si_then([this, &t] {
+        return transaction_manager->submit_transaction(t);
+      });
+    });
+  }).handle_error(
+    crimson::ct_error::assert_all{"Invalid error in SeaStoreS::write_meta"}
   ).finally([this] {
     assert(shard_stats.pending_io_num);
     --(shard_stats.pending_io_num);
@@ -2535,13 +2701,17 @@ seastar::future<> SeaStore::Shard::write_meta(
 seastar::future<std::tuple<int, std::string>>
 SeaStore::read_meta(const std::string& key)
 {
-  ceph_assert(seastar::this_shard_id() == primary_core);
   LOG_PREFIX(SeaStore::read_meta);
-  DEBUG("key: {}", key);
-  return mdstore->read_meta(key).safe_then([](auto v) {
+  DEBUG("key={} ...", key);
+
+  ceph_assert(seastar::this_shard_id() == primary_core);
+  return mdstore->read_meta(key
+  ).safe_then([key, FNAME](auto v) {
     if (v) {
+      DEBUG("key={}, value={}", key, *v);
       return std::make_tuple(0, std::move(*v));
     } else {
+      ERROR("key={} failed", key);
       return std::make_tuple(-1, std::string(""));
     }
   }).handle_error(
@@ -2598,7 +2768,7 @@ shard_stats_t SeaStore::Shard::get_io_stats(
   ret.minus(last_shard_stats);
 
   if (report_detail && seconds != 0) {
-    LOG_PREFIX(SeaStore::get_io_stats);
+    LOG_PREFIX(SeaStoreS::get_io_stats);
     auto calc_conflicts = [](uint64_t ios, uint64_t repeats) {
       return (double)(repeats-ios)/ios;
     };
diff --git a/src/crimson/os/seastore/seastore.h b/src/crimson/os/seastore/seastore.h
index fb495a422f656..185072744f2d8 100644
--- a/src/crimson/os/seastore/seastore.h
+++ b/src/crimson/os/seastore/seastore.h
@@ -35,14 +35,14 @@ using OnodeRef = boost::intrusive_ptr<Onode>;
 class TransactionManager;
 
 enum class op_type_t : uint8_t {
-    TRANSACTION = 0,
+    DO_TRANSACTION = 0,
     READ,
     WRITE,
     GET_ATTR,
     GET_ATTRS,
     STAT,
     OMAP_GET_VALUES,
-    OMAP_LIST,
+    OMAP_GET_VALUES2,
     MAX
 };
 
@@ -71,20 +71,19 @@ struct col_obj_ranges_t {
 
 class SeaStore final : public FuturizedStore {
 public:
+  using base_ertr = TransactionManager::base_ertr;
+  using base_iertr = TransactionManager::base_iertr;
+
   class MDStore {
   public:
-    using base_iertr = crimson::errorator<
-      crimson::ct_error::input_output_error
-    >;
-
-    using write_meta_ertr = base_iertr;
+    using write_meta_ertr = base_ertr;
     using write_meta_ret = write_meta_ertr::future<>;
     virtual write_meta_ret write_meta(
       const std::string &key,
       const std::string &val
     ) = 0;
 
-    using read_meta_ertr = base_iertr;
+    using read_meta_ertr = base_ertr;
     using read_meta_ret = write_meta_ertr::future<std::optional<std::string>>;
     virtual read_meta_ret read_meta(const std::string &key) = 0;
 
@@ -136,10 +135,7 @@ class SeaStore final : public FuturizedStore {
       const omap_keys_t& keys) final;
 
     /// Retrieves paged set of values > start (if present)
-    using omap_get_values_ret_bare_t = std::tuple<bool, omap_values_t>;
-    using omap_get_values_ret_t = read_errorator::future<
-      omap_get_values_ret_bare_t>;
-    omap_get_values_ret_t omap_get_values(
+    read_errorator::future<omap_values_paged_t> omap_get_values(
       CollectionRef c,           ///< [in] collection
       const ghobject_t &oid,     ///< [in] oid
       const std::optional<std::string> &start ///< [in] start, empty for begin
@@ -170,7 +166,7 @@ class SeaStore final : public FuturizedStore {
      * stages and locks as do_transaction. */
     seastar::future<> flush(CollectionRef ch) final;
 
-    read_errorator::future<std::map<uint64_t, uint64_t>> fiemap(
+    read_errorator::future<fiemap_ret_t> fiemap(
       CollectionRef ch,
       const ghobject_t& oid,
       uint64_t off,
@@ -190,7 +186,6 @@ class SeaStore final : public FuturizedStore {
       secondaries.emplace_back(&sec_dev);
     }
 
-    using coll_core_t = FuturizedStore::coll_core_t;
     seastar::future<std::vector<coll_core_t>> list_collections();
 
     seastar::future<> write_meta(const std::string& key,
@@ -305,18 +300,21 @@ class SeaStore final : public FuturizedStore {
       auto begin_time = std::chrono::steady_clock::now();
       return seastar::do_with(
         oid, Ret{}, std::forward<F>(f),
-        [this, src, op_type, begin_time, tname
+        [this, ch, src, op_type, begin_time, tname
         ](auto &oid, auto &ret, auto &f)
       {
-        return repeat_eagain([&, this, src, tname] {
+        return repeat_eagain([&, this, ch, src, tname] {
           assert(src == Transaction::src_t::READ);
           ++(shard_stats.repeat_read_num);
 
           return transaction_manager->with_transaction_intr(
             src,
             tname,
-            [&, this](auto& t)
+            [&, this, ch, tname](auto& t)
           {
+            LOG_PREFIX(SeaStoreS::repeat_with_onode);
+            SUBDEBUGT(seastore, "{} cid={} oid={} ...",
+                      t, tname, ch->get_cid(), oid);
             return onode_manager->get_onode(t, oid
             ).si_then([&](auto onode) {
               return seastar::do_with(std::move(onode), [&](auto& onode) {
@@ -334,14 +332,16 @@ class SeaStore final : public FuturizedStore {
       });
     }
 
-    using _fiemap_ret = ObjectDataHandler::fiemap_ret;
-    _fiemap_ret _fiemap(
-      Transaction &t,
-      Onode &onode,
-      uint64_t off,
-      uint64_t len) const;
+    using omap_list_bare_ret = OMapManager::omap_list_bare_ret;
+    using omap_list_ret = OMapManager::omap_list_ret;
+    omap_list_ret omap_list(
+      Onode& onode,
+      const omap_root_le_t& omap_root,
+      Transaction& t,
+      const std::optional<std::string>& start,
+      OMapManager::omap_list_config_t config) const;
 
-    using _omap_get_value_iertr = OMapManager::base_iertr::extend<
+    using _omap_get_value_iertr = base_iertr::extend<
       crimson::ct_error::enodata
       >;
     using _omap_get_value_ret = _omap_get_value_iertr::future<ceph::bufferlist>;
@@ -350,25 +350,51 @@ class SeaStore final : public FuturizedStore {
       omap_root_t &&root,
       std::string_view key) const;
 
-    using _omap_get_values_iertr = OMapManager::base_iertr;
-    using _omap_get_values_ret = _omap_get_values_iertr::future<omap_values_t>;
-    _omap_get_values_ret _omap_get_values(
+    base_iertr::future<omap_values_t> _omap_get_values(
       Transaction &t,
       omap_root_t &&root,
       const omap_keys_t &keys) const;
 
     friend class SeaStoreOmapIterator;
 
-    using omap_list_bare_ret = OMapManager::omap_list_bare_ret;
-    using omap_list_ret = OMapManager::omap_list_ret;
-    omap_list_ret omap_list(
-      Onode &onode,
-      const omap_root_le_t& omap_root,
+    base_iertr::future<ceph::bufferlist> _read( 
       Transaction& t,
-      const std::optional<std::string>& start,
-      OMapManager::omap_list_config_t config) const;
+      Onode& onode,
+      uint64_t offset,
+      std::size_t len,
+      uint32_t op_flags);
+
+    _omap_get_value_ret _get_attr(
+      Transaction& t,
+      Onode& onode,
+      std::string_view name) const;
+
+    base_iertr::future<attrs_t> _get_attrs(
+      Transaction& t,
+      Onode& onode);
+
+    seastar::future<struct stat> _stat(
+      Transaction& t,
+      Onode& onode,
+      const ghobject_t& oid);
+
+    base_iertr::future<omap_values_t> do_omap_get_values(
+      Transaction& t,
+      Onode& onode,
+      const omap_keys_t& keys);
 
-    using tm_iertr = TransactionManager::base_iertr;
+    base_iertr::future<omap_values_paged_t> do_omap_get_values(
+      Transaction& t,
+      Onode& onode,
+      const std::optional<std::string>& start);
+
+    base_iertr::future<fiemap_ret_t> _fiemap(
+      Transaction &t,
+      Onode &onode,
+      uint64_t off,
+      uint64_t len) const;
+
+    using tm_iertr = base_iertr;
     using tm_ret = tm_iertr::future<>;
     tm_ret _do_transaction_step(
       internal_context_t &ctx,
@@ -535,17 +561,7 @@ class SeaStore final : public FuturizedStore {
     return shard_stores.local().get_fsid();
   }
 
-  seastar::future<> write_meta(
-    const std::string& key,
-    const std::string& value) final {
-    ceph_assert(seastar::this_shard_id() == primary_core);
-    return shard_stores.local().write_meta(
-      key, value).then([this, key, value] {
-      return mdstore->write_meta(key, value);
-    }).handle_error(
-      crimson::ct_error::assert_all{"Invalid error in SeaStore::write_meta"}
-    );
-  }
+  seastar::future<> write_meta(const std::string& key, const std::string& value) final;
 
   seastar::future<std::tuple<int, std::string>> read_meta(const std::string& key) final;
 
diff --git a/src/crimson/os/seastore/seastore_types.cc b/src/crimson/os/seastore/seastore_types.cc
index e1430b30019a5..f379dd0117c8d 100644
--- a/src/crimson/os/seastore/seastore_types.cc
+++ b/src/crimson/os/seastore/seastore_types.cc
@@ -54,7 +54,9 @@ std::ostream &operator<<(std::ostream &out, const device_id_printer_t &id)
   } else if (_id == DEVICE_ID_ROOT) {
     return out << "Dev(ROOT)";
   } else {
-    return out << "Dev(" << (unsigned)_id << ")";
+    return out << "Dev(0x"
+               << std::hex << (unsigned)_id << std::dec
+               << ")";
   }
 }
 
@@ -64,7 +66,7 @@ std::ostream &operator<<(std::ostream &out, const segment_id_t &segment)
     return out << "Seg[NULL]";
   } else {
     return out << "Seg[" << device_id_printer_t{segment.device_id()}
-               << "," << segment.device_segment_id()
+               << ",0x" << std::hex << segment.device_segment_id() << std::dec
                << "]";
   }
 }
@@ -93,12 +95,12 @@ std::ostream& operator<<(std::ostream& out, segment_seq_printer_t seq)
 }
 
 std::ostream &operator<<(std::ostream &out, const laddr_t &laddr) {
-  return out << 'L' << std::hex << laddr.value << std::dec;
+  return out << "L0x" << std::hex << laddr.value << std::dec;
 }
 
 std::ostream &operator<<(std::ostream &out, const laddr_offset_t &laddr_offset) {
   return out << laddr_offset.get_aligned_laddr()
-	     << "+" << std::hex << laddr_offset.get_offset() << std::dec;
+	     << "+0x" << std::hex << laddr_offset.get_offset() << std::dec;
 }
 
 std::ostream &operator<<(std::ostream &out, const pladdr_t &pladdr)
@@ -123,18 +125,18 @@ std::ostream &operator<<(std::ostream &out, const paddr_t &rhs)
   } else if (has_device_off(id)) {
     auto &s = rhs.as_res_paddr();
     out << device_id_printer_t{id}
-        << ","
-        << s.get_device_off();
+        << ",0x"
+        << std::hex << s.get_device_off() << std::dec;
   } else if (rhs.get_addr_type() == paddr_types_t::SEGMENT) {
     auto &s = rhs.as_seg_paddr();
     out << s.get_segment_id()
-        << ","
-        << s.get_segment_off();
+        << ",0x"
+        << std::hex << s.get_segment_off() << std::dec;
   } else if (rhs.get_addr_type() == paddr_types_t::RANDOM_BLOCK) {
     auto &s = rhs.as_blk_paddr();
     out << device_id_printer_t{s.get_device_id()}
-        << ","
-        << s.get_device_off();
+        << ",0x"
+        << std::hex << s.get_device_off() << std::dec;
   } else {
     out << "INVALID!";
   }
diff --git a/src/crimson/os/seastore/transaction.h b/src/crimson/os/seastore/transaction.h
index 52515937a9e59..5d8ad00ba228b 100644
--- a/src/crimson/os/seastore/transaction.h
+++ b/src/crimson/os/seastore/transaction.h
@@ -80,6 +80,11 @@ struct rewrite_stats_t {
   }
 };
 
+struct rbm_pending_ool_t {
+  bool is_conflicted = false;
+  std::list<CachedExtentRef> pending_extents;
+};
+
 /**
  * Transaction
  *
@@ -554,6 +559,18 @@ class Transaction {
     return static_cast<T&>(*view);
   }
 
+  void set_pending_ool(seastar::lw_shared_ptr<rbm_pending_ool_t> ptr) {
+    pending_ool = ptr;
+  }
+
+  seastar::lw_shared_ptr<rbm_pending_ool_t> get_pending_ool() {
+    return pending_ool;
+  }
+
+  const auto& get_pre_alloc_list() {
+    return pre_alloc_list;
+  }
+
 private:
   friend class Cache;
   friend Ref make_test_transaction();
@@ -650,6 +667,8 @@ class Transaction {
   const src_t src;
 
   transaction_id_t trans_id = TRANS_ID_NULL;
+
+  seastar::lw_shared_ptr<rbm_pending_ool_t> pending_ool;
 };
 using TransactionRef = Transaction::Ref;
 
diff --git a/src/crimson/os/seastore/transaction_manager.cc b/src/crimson/os/seastore/transaction_manager.cc
index a76b7fbe0c96c..f4e3b0858f2f1 100644
--- a/src/crimson/os/seastore/transaction_manager.cc
+++ b/src/crimson/os/seastore/transaction_manager.cc
@@ -48,7 +48,7 @@ TransactionManager::TransactionManager(
 TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
 {
   LOG_PREFIX(TransactionManager::mkfs);
-  INFO("enter");
+  INFO("...");
   return epm->mount(
   ).safe_then([this] {
     return journal->open_for_mkfs();
@@ -94,14 +94,15 @@ TransactionManager::mkfs_ertr::future<> TransactionManager::mkfs()
   }).safe_then([this] {
     return close();
   }).safe_then([FNAME] {
-    INFO("completed");
+    INFO("done");
   });
 }
 
-TransactionManager::mount_ertr::future<> TransactionManager::mount()
+TransactionManager::mount_ertr::future<>
+TransactionManager::mount()
 {
   LOG_PREFIX(TransactionManager::mount);
-  INFO("enter");
+  INFO("...");
   cache->init();
   return epm->mount(
   ).safe_then([this] {
@@ -168,16 +169,17 @@ TransactionManager::mount_ertr::future<> TransactionManager::mount()
     return epm->open_for_write();
   }).safe_then([FNAME, this] {
     epm->start_background();
-    INFO("completed");
+    INFO("done");
   }).handle_error(
     mount_ertr::pass_further{},
     crimson::ct_error::assert_all{"unhandled error"}
   );
 }
 
-TransactionManager::close_ertr::future<> TransactionManager::close() {
+TransactionManager::close_ertr::future<>
+TransactionManager::close() {
   LOG_PREFIX(TransactionManager::close);
-  INFO("enter");
+  INFO("...");
   return epm->stop_background(
   ).then([this] {
     return cache->close();
@@ -187,7 +189,7 @@ TransactionManager::close_ertr::future<> TransactionManager::close() {
   }).safe_then([this] {
     return epm->close();
   }).safe_then([FNAME] {
-    INFO("completed");
+    INFO("done");
     return seastar::now();
   });
 }
@@ -229,28 +231,26 @@ TransactionManager::ref_ret TransactionManager::remove(
   LogicalCachedExtentRef &ref)
 {
   LOG_PREFIX(TransactionManager::remove);
-  TRACET("{}", t, *ref);
+  DEBUGT("{} ...", t, *ref);
   return lba_manager->decref_extent(t, ref->get_laddr()
   ).si_then([this, FNAME, &t, ref](auto result) {
-    DEBUGT("extent refcount is decremented to {} -- {}",
-           t, result.refcount, *ref);
     if (result.refcount == 0) {
       cache->retire_extent(t, ref);
     }
+    DEBUGT("removed {}~0x{:x} refcount={} -- {}",
+           t, result.addr, result.length, result.refcount, *ref);
     return result.refcount;
   });
 }
 
-TransactionManager::ref_ret TransactionManager::_dec_ref(
+TransactionManager::ref_ret TransactionManager::remove(
   Transaction &t,
   laddr_t offset)
 {
-  LOG_PREFIX(TransactionManager::_dec_ref);
-  TRACET("{}", t, offset);
+  LOG_PREFIX(TransactionManager::remove);
+  DEBUGT("{} ...", t, offset);
   return lba_manager->decref_extent(t, offset
   ).si_then([this, FNAME, offset, &t](auto result) -> ref_ret {
-    DEBUGT("extent refcount is decremented to {} -- {}~{}, {}",
-           t, result.refcount, offset, result.length, result.addr);
     auto fut = ref_iertr::now();
     if (result.refcount == 0) {
       if (result.addr.is_paddr() &&
@@ -259,8 +259,9 @@ TransactionManager::ref_ret TransactionManager::_dec_ref(
           t, result.addr.get_paddr(), result.length);
       }
     }
-
-    return fut.si_then([result=std::move(result)] {
+    return fut.si_then([result=std::move(result), offset, &t, FNAME] {
+      DEBUGT("removed {}~0x{:x} refcount={} -- offset={}",
+             t, result.addr, result.length, result.refcount, offset);
       return result.refcount;
     });
   });
@@ -271,19 +272,21 @@ TransactionManager::refs_ret TransactionManager::remove(
   std::vector<laddr_t> offsets)
 {
   LOG_PREFIX(TransactionManager::remove);
-  DEBUG("{} offsets", offsets.size());
+  DEBUGT("{} offsets ...", t, offsets.size());
   return seastar::do_with(std::move(offsets), std::vector<unsigned>(),
-      [this, &t] (auto &&offsets, auto &refcnt) {
-      return trans_intr::do_for_each(offsets.begin(), offsets.end(),
-        [this, &t, &refcnt] (auto &laddr) {
-        return this->remove(t, laddr).si_then([&refcnt] (auto ref) {
-          refcnt.push_back(ref);
-          return ref_iertr::now();
-        });
-      }).si_then([&refcnt] {
-        return ref_iertr::make_ready_future<std::vector<unsigned>>(std::move(refcnt));
+    [this, &t, FNAME](auto &&offsets, auto &refcnts) {
+    return trans_intr::do_for_each(offsets.begin(), offsets.end(),
+      [this, &t, &refcnts](auto &laddr) {
+      return this->remove(t, laddr
+      ).si_then([&refcnts](auto ref) {
+        refcnts.push_back(ref);
+        return ref_iertr::now();
       });
+    }).si_then([&refcnts, &t, FNAME] {
+      DEBUGT("removed {} offsets", t, refcnts.size());
+      return ref_iertr::make_ready_future<std::vector<unsigned>>(std::move(refcnts));
     });
+  });
 }
 
 TransactionManager::submit_transaction_iertr::future<>
@@ -340,6 +343,7 @@ TransactionManager::update_lba_mappings(
         return;
       }
       if (extent->is_logical()) {
+        assert(is_logical_type(extent->get_type()));
         // for rewritten extents, last_committed_crc should have been set
         // because the crc of the original extent may be reused.
         // also see rewrite_logical_extent()
@@ -359,6 +363,7 @@ TransactionManager::update_lba_mappings(
 #endif
         lextents.emplace_back(extent->template cast<LogicalCachedExtent>());
       } else {
+        assert(is_physical_type(extent->get_type()));
         pextents.emplace_back(extent);
       }
     };
@@ -515,7 +520,6 @@ TransactionManager::rewrite_logical_extent(
     ERRORT("extent has been invalidated -- {}", t, *extent);
     ceph_abort();
   }
-  TRACET("rewriting extent -- {}", t, *extent);
 
   auto lextent = extent->cast<LogicalCachedExtent>();
   cache->retire_extent(t, extent);
@@ -529,7 +533,7 @@ TransactionManager::rewrite_logical_extent(
       lextent->get_rewrite_generation())->cast<LogicalCachedExtent>();
     nlextent->rewrite(t, *lextent, 0);
 
-    DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent);
+    DEBUGT("rewriting meta -- {} to {}", t, *lextent, *nlextent);
 
 #ifndef NDEBUG
     if (get_checksum_needed(lextent->get_paddr())) {
@@ -566,16 +570,16 @@ TransactionManager::rewrite_logical_extent(
       0,
       lextent->get_length(),
       extent_ref_count_t(0),
-      [this, lextent, &t](auto &extents, auto &off, auto &left, auto &refcount) {
+      [this, FNAME, lextent, &t]
+      (auto &extents, auto &off, auto &left, auto &refcount) {
       return trans_intr::do_for_each(
         extents,
-        [lextent, this, &t, &off, &left, &refcount](auto &nextent) {
-        LOG_PREFIX(TransactionManager::rewrite_logical_extent);
+        [lextent, this, FNAME, &t, &off, &left, &refcount](auto &nextent) {
         bool first_extent = (off == 0);
         ceph_assert(left >= nextent->get_length());
         auto nlextent = nextent->template cast<LogicalCachedExtent>();
         nlextent->rewrite(t, *lextent, off);
-        DEBUGT("rewriting logical extent -- {} to {}", t, *lextent, *nlextent);
+        DEBUGT("rewriting data -- {} to {}", t, *lextent, *nlextent);
 
         /* This update_mapping is, strictly speaking, unnecessary for delayed_alloc
          * extents since we're going to do it again once we either do the ool write
@@ -629,10 +633,18 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
   {
     auto updated = cache->update_extent_from_transaction(t, extent);
     if (!updated) {
-      DEBUGT("extent is already retired, skipping -- {}", t, *extent);
+      DEBUGT("target={} {} already retired, skipping -- {}", t,
+             rewrite_gen_printer_t{target_generation},
+             sea_time_point_printer_t{modify_time},
+             *extent);
       return rewrite_extent_iertr::now();
     }
+
     extent = updated;
+    DEBUGT("target={} {} -- {} ...", t,
+           rewrite_gen_printer_t{target_generation},
+           sea_time_point_printer_t{modify_time},
+           *extent);
     ceph_assert(!extent->is_pending_io());
   }
 
@@ -650,9 +662,9 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
       // FIXME: is_dirty() is true for mutation pending extents
       // which shouldn't do inplace rewrite because a pending transaction
       // may fail.
-      DEBUGT("delta overwriting extent -- {}", t, *extent);
       t.add_inplace_rewrite_extent(extent);
       extent->set_inplace_rewrite_generation();
+      DEBUGT("rewritten as inplace rewrite -- {}", t, *extent);
       return rewrite_extent_iertr::now();
     }
     extent->set_target_rewrite_generation(INIT_GENERATION);
@@ -665,23 +677,25 @@ TransactionManager::rewrite_extent_ret TransactionManager::rewrite_extent(
     t.get_rewrite_stats().account_n_dirty();
   }
 
-  if (is_backref_node(extent->get_type())) {
-    DEBUGT("rewriting backref extent -- {}", t, *extent);
-    return backref_manager->rewrite_extent(t, extent);
-  }
-
   if (is_root_type(extent->get_type())) {
-    DEBUGT("rewriting root extent -- {}", t, *extent);
     cache->duplicate_for_write(t, extent);
+    DEBUGT("rewritten root {}", t, *extent);
     return rewrite_extent_iertr::now();
   }
 
+  auto fut = rewrite_extent_iertr::now();
   if (extent->is_logical()) {
-    return rewrite_logical_extent(t, extent->cast<LogicalCachedExtent>());
+    assert(is_logical_type(extent->get_type()));
+    fut = rewrite_logical_extent(t, extent->cast<LogicalCachedExtent>());
+  } else if (is_backref_node(extent->get_type())) {
+    fut = backref_manager->rewrite_extent(t, extent);
   } else {
-    DEBUGT("rewriting physical extent -- {}", t, *extent);
-    return lba_manager->rewrite_extent(t, extent);
+    assert(is_lba_node(extent->get_type()));
+    fut = lba_manager->rewrite_extent(t, extent);
   }
+  return fut.si_then([FNAME, &t] {
+    DEBUGT("rewritten", t);
+  });
 }
 
 TransactionManager::get_extents_if_live_ret
@@ -693,7 +707,7 @@ TransactionManager::get_extents_if_live(
   extent_len_t len)
 {
   LOG_PREFIX(TransactionManager::get_extents_if_live);
-  TRACET("{} {}~{} {}", t, type, laddr, len, paddr);
+  DEBUGT("{} {}~0x{:x} {} ...", t, type, laddr, len, paddr);
 
   // This only works with segments to check if alive,
   // as parallel transactions may split the extent at the same time.
@@ -703,7 +717,7 @@ TransactionManager::get_extents_if_live(
   ).si_then([=, this, &t](auto extent)
 	    -> get_extents_if_live_ret {
     if (extent && extent->get_length() == len) {
-      DEBUGT("{} {}~{} {} is live in cache -- {}",
+      DEBUGT("{} {}~0x{:x} {} is cached and alive -- {}",
              t, type, laddr, len, paddr, *extent);
       std::list<CachedExtentRef> res;
       res.emplace_back(std::move(extent));
@@ -757,7 +771,9 @@ TransactionManager::get_extents_if_live(
               list.emplace_back(std::move(ret));
               return seastar::now();
             });
-          }).si_then([&list] {
+          }).si_then([&list, &t, FNAME, type, laddr, len, paddr] {
+            DEBUGT("{} {}~0x{:x} {} is alive as {} extents",
+                   t, type, laddr, len, paddr, list.size());
             return get_extents_if_live_ret(
               interruptible::ready_future_marker{},
               std::move(list));
@@ -778,11 +794,11 @@ TransactionManager::get_extents_if_live(
       ).si_then([=, &t](auto ret) {
         std::list<CachedExtentRef> res;
         if (ret) {
-          DEBUGT("{} {}~{} {} is live as physical extent -- {}",
+          DEBUGT("{} {}~0x{:x} {} is absent and alive as physical extent -- {}",
                  t, type, laddr, len, paddr, *ret);
           res.emplace_back(std::move(ret));
         } else {
-          DEBUGT("{} {}~{} {} is not live as physical extent",
+          DEBUGT("{} {}~0x{:x} {} is not alive as physical extent",
                  t, type, laddr, len, paddr);
         }
         return get_extents_if_live_ret(
diff --git a/src/crimson/os/seastore/transaction_manager.h b/src/crimson/os/seastore/transaction_manager.h
index 828b8a25592fc..c7a94a9ef1132 100644
--- a/src/crimson/os/seastore/transaction_manager.h
+++ b/src/crimson/os/seastore/transaction_manager.h
@@ -106,8 +106,12 @@ class TransactionManager : public ExtentCallbackInterface {
     Transaction &t,
     laddr_t offset) {
     LOG_PREFIX(TransactionManager::get_pin);
-    SUBTRACET(seastore_tm, "{}", t, offset);
-    return lba_manager->get_mapping(t, offset);
+    SUBDEBUGT(seastore_tm, "{} ...", t, offset);
+    return lba_manager->get_mapping(t, offset
+    ).si_then([FNAME, &t](LBAMappingRef pin) {
+      SUBDEBUGT(seastore_tm, "got {}", t, *pin);
+      return pin;
+    });
   }
 
   /**
@@ -122,9 +126,13 @@ class TransactionManager : public ExtentCallbackInterface {
     laddr_t offset,
     extent_len_t length) {
     LOG_PREFIX(TransactionManager::get_pins);
-    SUBDEBUGT(seastore_tm, "{}~{}", t, offset, length);
+    SUBDEBUGT(seastore_tm, "{}~0x{:x} ...", t, offset, length);
     return lba_manager->get_mappings(
-      t, offset, length);
+      t, offset, length
+    ).si_then([FNAME, &t](lba_pin_list_t pins) {
+      SUBDEBUGT(seastore_tm, "got {} pins", t, pins.size());
+      return pins;
+    });
   }
 
   /**
@@ -142,15 +150,15 @@ class TransactionManager : public ExtentCallbackInterface {
     laddr_t offset,
     extent_len_t length) {
     LOG_PREFIX(TransactionManager::read_extent);
-    SUBTRACET(seastore_tm, "{}~{}", t, offset, length);
+    SUBDEBUGT(seastore_tm, "{}~0x{:x} {} ...",
+              t, offset, length, T::TYPE);
     return get_pin(
       t, offset
     ).si_then([this, FNAME, &t, offset, length] (auto pin)
       -> read_extent_ret<T> {
       if (length != pin->get_length() || !pin->get_val().is_real()) {
-        SUBERRORT(seastore_tm,
-            "offset {} len {} got wrong pin {}",
-            t, offset, length, *pin);
+        SUBERRORT(seastore_tm, "{}~0x{:x} {} got wrong {}",
+                  t, offset, length, T::TYPE, *pin);
         ceph_assert(0 == "Should be impossible");
       }
       return this->read_pin<T>(t, std::move(pin));
@@ -167,15 +175,15 @@ class TransactionManager : public ExtentCallbackInterface {
     Transaction &t,
     laddr_t offset) {
     LOG_PREFIX(TransactionManager::read_extent);
-    SUBTRACET(seastore_tm, "{}", t, offset);
+    SUBDEBUGT(seastore_tm, "{} {} ...",
+              t, offset, T::TYPE);
     return get_pin(
       t, offset
     ).si_then([this, FNAME, &t, offset] (auto pin)
       -> read_extent_ret<T> {
       if (!pin->get_val().is_real()) {
-        SUBERRORT(seastore_tm,
-            "offset {} got wrong pin {}",
-            t, offset, *pin);
+        SUBERRORT(seastore_tm, "{} {} got wrong {}",
+                  t, offset, T::TYPE, *pin);
         ceph_assert(0 == "Should be impossible");
       }
       return this->read_pin<T>(t, std::move(pin));
@@ -187,6 +195,8 @@ class TransactionManager : public ExtentCallbackInterface {
     Transaction &t,
     LBAMappingRef pin)
   {
+    LOG_PREFIX(TransactionManager::read_pin);
+    SUBDEBUGT(seastore_tm, "{} {} ...", t, T::TYPE, *pin);
     auto fut = base_iertr::make_ready_future<LBAMappingRef>();
     if (!pin->is_parent_viewable()) {
       if (pin->is_parent_valid()) {
@@ -212,52 +222,12 @@ class TransactionManager : public ExtentCallbackInterface {
       } else {
 	return this->pin_to_extent<T>(t, std::move(std::get<0>(ret)));
       }
+    }).si_then([FNAME, &t](TCachedExtentRef<T> ext) {
+      SUBDEBUGT(seastore_tm, "got {}", t, *ext);
+      return ext;
     });
   }
 
-  template <typename T>
-  std::variant<LBAMappingRef, base_iertr::future<TCachedExtentRef<T>>>
-  get_extent_if_linked(
-    Transaction &t,
-    LBAMappingRef pin)
-  {
-    ceph_assert(pin->is_parent_viewable());
-    // checking the lba child must be atomic with creating
-    // and linking the absent child
-    auto v = pin->get_logical_extent(t);
-    if (v.has_child()) {
-      return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) {
-#ifndef NDEBUG
-        auto lextent = extent->template cast<LogicalCachedExtent>();
-        auto pin_laddr = pin->get_key();
-        if (pin->is_indirect()) {
-          pin_laddr = pin->get_intermediate_base();
-        }
-        assert(lextent->get_laddr() == pin_laddr);
-#endif
-	return extent->template cast<T>();
-      });
-    } else {
-      return pin;
-    }
-  }
-
-  base_iertr::future<LogicalCachedExtentRef> read_pin_by_type(
-    Transaction &t,
-    LBAMappingRef pin,
-    extent_types_t type)
-  {
-    ceph_assert(!pin->parent_modified());
-    auto v = pin->get_logical_extent(t);
-    // checking the lba child must be atomic with creating
-    // and linking the absent child
-    if (v.has_child()) {
-      return std::move(v.get_child_fut());
-    } else {
-      return pin_to_extent_by_type(t, std::move(pin), type);
-    }
-  }
-
   /// Obtain mutable copy of extent
   LogicalCachedExtentRef get_mutable_extent(Transaction &t, LogicalCachedExtentRef ref) {
     LOG_PREFIX(TransactionManager::get_mutable_extent);
@@ -265,24 +235,15 @@ class TransactionManager : public ExtentCallbackInterface {
       t,
       ref)->cast<LogicalCachedExtent>();
     if (!ret->has_laddr()) {
-      SUBDEBUGT(seastore_tm,
-	"duplicating extent for write -- {} -> {}",
-	t,
-	*ref,
-	*ret);
+      SUBDEBUGT(seastore_tm, "duplicate from {}", t, *ref);
       ret->set_laddr(ref->get_laddr());
     } else {
-      SUBTRACET(seastore_tm,
-	"extent is already duplicated -- {}",
-	t,
-	*ref);
       assert(ref->is_mutable());
       assert(&*ref == &*ret);
     }
     return ret;
   }
 
-
   using ref_iertr = LBAManager::ref_iertr;
   using ref_ret = ref_iertr::future<extent_ref_count_t>;
 
@@ -302,26 +263,15 @@ class TransactionManager : public ExtentCallbackInterface {
    * remove
    *
    * Remove the extent and the corresponding lba mapping,
-   * users must make sure that lba mapping's refcount is 1
+   * users must make sure that lba mapping's refcount > 1
    */
   ref_ret remove(
     Transaction &t,
     LogicalCachedExtentRef &ref);
 
-  /**
-   * remove
-   *
-   * 1. Remove the indirect mapping(s), and if refcount drops to 0,
-   *    also remove the direct mapping and retire the extent.
-   * 
-   * 2. Remove the direct mapping(s) and retire the extent if
-   * 	refcount drops to 0.
-   */
   ref_ret remove(
     Transaction &t,
-    laddr_t offset) {
-    return _dec_ref(t, offset);
-  }
+    laddr_t offset);
 
   /// remove refcount for list of offset
   using refs_ret = ref_iertr::future<std::vector<unsigned>>;
@@ -346,23 +296,23 @@ class TransactionManager : public ExtentCallbackInterface {
     extent_len_t len,
     placement_hint_t placement_hint = placement_hint_t::HOT) {
     LOG_PREFIX(TransactionManager::alloc_non_data_extent);
-    SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
-              t, T::TYPE, len, placement_hint, laddr_hint);
+    SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
+              t, T::TYPE, laddr_hint, len, placement_hint);
     auto ext = cache->alloc_new_non_data_extent<T>(
       t,
       len,
       placement_hint,
       INIT_GENERATION);
     if (!ext) {
+      SUBERRORT(seastore_tm, "insufficient space!", t);
       return crimson::ct_error::enospc::make();
     }
     return lba_manager->alloc_extent(
       t,
       laddr_hint,
       *ext
-    ).si_then([ext=std::move(ext), laddr_hint, &t](auto &&) mutable {
-      LOG_PREFIX(TransactionManager::alloc_non_data_extent);
-      SUBDEBUGT(seastore_tm, "new extent: {}, laddr_hint: {}", t, *ext, laddr_hint);
+    ).si_then([ext=std::move(ext), &t, FNAME](auto &&) mutable {
+      SUBDEBUGT(seastore_tm, "allocated {}", t, *ext);
       return alloc_extent_iertr::make_ready_future<TCachedExtentRef<T>>(
 	std::move(ext));
     });
@@ -385,14 +335,15 @@ class TransactionManager : public ExtentCallbackInterface {
     extent_len_t len,
     placement_hint_t placement_hint = placement_hint_t::HOT) {
     LOG_PREFIX(TransactionManager::alloc_data_extents);
-    SUBTRACET(seastore_tm, "{} len={}, placement_hint={}, laddr_hint={}",
-              t, T::TYPE, len, placement_hint, laddr_hint);
+    SUBDEBUGT(seastore_tm, "{} hint {}~0x{:x} phint={} ...",
+              t, T::TYPE, laddr_hint, len, placement_hint);
     auto exts = cache->alloc_new_data_extents<T>(
       t,
       len,
       placement_hint,
       INIT_GENERATION);
     if (exts.empty()) {
+      SUBERRORT(seastore_tm, "insufficient space!", t);
       return crimson::ct_error::enospc::make();
     }
     return lba_manager->alloc_extents(
@@ -403,7 +354,7 @@ class TransactionManager : public ExtentCallbackInterface {
       EXTENT_DEFAULT_REF_COUNT
     ).si_then([exts=std::move(exts), &t, FNAME](auto &&) mutable {
       for (auto &ext : exts) {
-	SUBDEBUGT(seastore_tm, "new extent: {}", t, *ext);
+	SUBDEBUGT(seastore_tm, "allocated {}", t, *ext);
       }
       return alloc_extent_iertr::make_ready_future<
 	std::vector<TCachedExtentRef<T>>>(std::move(exts));
@@ -411,15 +362,21 @@ class TransactionManager : public ExtentCallbackInterface {
   }
 
   template <typename T>
-  read_extent_ret<T> get_mutable_extent_by_laddr(Transaction &t, laddr_t laddr, extent_len_t len) {
+  read_extent_ret<T> get_mutable_extent_by_laddr(
+      Transaction &t,
+      laddr_t laddr,
+      extent_len_t len) {
+    LOG_PREFIX(TransactionManager::get_mutable_extent_by_laddr);
+    SUBDEBUGT(seastore_tm, "{}~0x{:x} ...", t, laddr, len);
     return get_pin(t, laddr
     ).si_then([this, &t, len](auto pin) {
       ceph_assert(pin->is_data_stable() && !pin->is_zero_reserved());
       ceph_assert(!pin->is_clone());
       ceph_assert(pin->get_length() == len);
       return this->read_pin<T>(t, std::move(pin));
-    }).si_then([this, &t](auto extent) {
+    }).si_then([this, &t, FNAME](auto extent) {
       auto ext = get_mutable_extent(t, extent)->template cast<T>();
+      SUBDEBUGT(seastore_tm, "got mutable {}", t, *ext);
       return read_extent_iertr::make_ready_future<TCachedExtentRef<T>>(
 	std::move(ext));
     });
@@ -476,10 +433,8 @@ class TransactionManager : public ExtentCallbackInterface {
       extent_len_t original_len = pin->get_length();
       paddr_t original_paddr = pin->get_val();
       LOG_PREFIX(TransactionManager::remap_pin);
-      SUBDEBUGT(seastore_tm,
-	"original laddr: {}, original paddr: {}, original length: {},"
-	" remap to {} extents",
-	t, original_laddr, original_paddr, original_len, remaps.size());
+      SUBDEBUGT(seastore_tm, "{}~0x{:x} {} into {} remaps ... {}",
+                t, original_laddr, original_len, original_paddr, remaps.size(), *pin);
       // The according extent might be stable or pending.
       auto fut = base_iertr::now();
       if (!pin->is_indirect()) {
@@ -536,14 +491,13 @@ class TransactionManager : public ExtentCallbackInterface {
 	    auto remap_len = remap.len;
 	    auto remap_laddr = (original_laddr + remap_offset).checked_to_laddr();
 	    auto remap_paddr = original_paddr.add_offset(remap_offset);
+	    SUBDEBUGT(seastore_tm, "remap direct pin into {}~0x{:x} {} ...",
+	              t, remap_laddr, remap_len, remap_paddr);
 	    ceph_assert(remap_len < original_len);
 	    ceph_assert(remap_offset + remap_len <= original_len);
 	    ceph_assert(remap_len != 0);
 	    ceph_assert(remap_offset % cache->get_block_size() == 0);
 	    ceph_assert(remap_len % cache->get_block_size() == 0);
-	    SUBDEBUGT(seastore_tm,
-	      "remap laddr: {}, remap paddr: {}, remap length: {}", t,
-	      remap_laddr, remap_paddr, remap_len);
 	    auto extent = cache->alloc_remapped_extent<T>(
 	      t,
 	      remap_laddr,
@@ -555,13 +509,15 @@ class TransactionManager : public ExtentCallbackInterface {
 	  }
 	});
       }
-      return fut.si_then([this, &t, &pin, &remaps, &extents] {
+      return fut.si_then([this, &t, &pin, &remaps, &extents, FNAME] {
 	return lba_manager->remap_mappings(
 	  t,
 	  std::move(pin),
 	  std::vector<remap_entry>(remaps.begin(), remaps.end()),
 	  std::move(extents)
-	).si_then([](auto ret) {
+	).si_then([FNAME, &t](auto ret) {
+	  SUBDEBUGT(seastore_tm, "remapped {} pins",
+	            t, ret.remapped_mappings.size());
 	  return Cache::retire_extent_iertr::make_ready_future<
 	    std::vector<LBAMappingRef>>(std::move(ret.remapped_mappings));
 	});
@@ -581,11 +537,15 @@ class TransactionManager : public ExtentCallbackInterface {
     laddr_t hint,
     extent_len_t len) {
     LOG_PREFIX(TransactionManager::reserve_region);
-    SUBDEBUGT(seastore_tm, "len={}, laddr_hint={}", t, len, hint);
+    SUBDEBUGT(seastore_tm, "hint {}~0x{:x} ...", t, hint, len);
     return lba_manager->reserve_region(
       t,
       hint,
-      len);
+      len
+    ).si_then([FNAME, &t](auto pin) {
+      SUBDEBUGT(seastore_tm, "reserved {}", t, *pin);
+      return pin;
+    });
   }
 
   /*
@@ -612,15 +572,17 @@ class TransactionManager : public ExtentCallbackInterface {
         : mapping.get_key();
 
     LOG_PREFIX(TransactionManager::clone_pin);
-    SUBDEBUGT(seastore_tm, "len={}, laddr_hint={}, clone_offset {}",
-      t, mapping.get_length(), hint, intermediate_key);
+    SUBDEBUGT(seastore_tm, "{} clone to hint {} ...", t, mapping, hint);
     return lba_manager->clone_mapping(
       t,
       hint,
       mapping.get_length(),
       intermediate_key,
       intermediate_base
-    );
+    ).si_then([FNAME, &t](auto pin) {
+      SUBDEBUGT(seastore_tm, "cloned as {}", t, *pin);
+      return pin;
+    });
   }
 
   /* alloc_extents
@@ -635,10 +597,10 @@ class TransactionManager : public ExtentCallbackInterface {
      extent_len_t len,
      int num) {
      LOG_PREFIX(TransactionManager::alloc_extents);
-     SUBDEBUGT(seastore_tm, "len={}, laddr_hint={}, num={}",
-               t, len, hint, num);
+     SUBDEBUGT(seastore_tm, "hint {}~({} * 0x{:x}) ...",
+               t, hint, num, len);
      return seastar::do_with(std::vector<TCachedExtentRef<T>>(),
-       [this, &t, hint, len, num] (auto &extents) {
+       [this, &t, hint, len, num, FNAME](auto &extents) {
        return trans_intr::do_for_each(
                        boost::make_counting_iterator(0),
                        boost::make_counting_iterator(num),
@@ -647,7 +609,8 @@ class TransactionManager : public ExtentCallbackInterface {
            [&extents](auto &&node) {
            extents.push_back(node);
          });
-       }).si_then([&extents] {
+       }).si_then([&extents, &t, FNAME] {
+         SUBDEBUGT(seastore_tm, "allocated {} extents", t, extents.size());
          return alloc_extents_iertr::make_ready_future
                 <std::vector<TCachedExtentRef<T>>>(std::move(extents));
        });
@@ -753,7 +716,7 @@ class TransactionManager : public ExtentCallbackInterface {
     const std::string& key,
     const std::string& value) {
     LOG_PREFIX(TransactionManager::update_root_meta);
-    SUBDEBUGT(seastore_tm, "seastore_tm, {} -> {}", t, key, value);
+    SUBDEBUGT(seastore_tm, "seastore_tm, {} -> {} ...", t, key, value);
     return cache->get_root(
       t
     ).si_then([this, &t, &key, &value](RootBlockRef root) {
@@ -808,7 +771,7 @@ class TransactionManager : public ExtentCallbackInterface {
     return cache->get_root(t).si_then([&t](auto croot) {
       LOG_PREFIX(TransactionManager::read_collection_root);
       auto ret = croot->get_root().collection_root.get();
-      SUBTRACET(seastore_tm, "{}~{}",
+      SUBTRACET(seastore_tm, "{}~0x{:x}",
                 t, ret.get_location(), ret.get_size());
       return ret;
     });
@@ -821,7 +784,7 @@ class TransactionManager : public ExtentCallbackInterface {
    */
   void write_collection_root(Transaction &t, coll_root_t cmroot) {
     LOG_PREFIX(TransactionManager::write_collection_root);
-    SUBDEBUGT(seastore_tm, "{}~{}",
+    SUBDEBUGT(seastore_tm, "{}~0x{:x}",
               t, cmroot.get_location(), cmroot.get_size());
     auto croot = cache->get_root_fast(t);
     croot = cache->duplicate_for_write(t, croot)->cast<RootBlock>();
@@ -853,6 +816,49 @@ class TransactionManager : public ExtentCallbackInterface {
 
   shard_stats_t& shard_stats;
 
+  template <typename T>
+  std::variant<LBAMappingRef, base_iertr::future<TCachedExtentRef<T>>>
+  get_extent_if_linked(
+    Transaction &t,
+    LBAMappingRef pin)
+  {
+    ceph_assert(pin->is_parent_viewable());
+    // checking the lba child must be atomic with creating
+    // and linking the absent child
+    auto v = pin->get_logical_extent(t);
+    if (v.has_child()) {
+      return v.get_child_fut().safe_then([pin=std::move(pin)](auto extent) {
+#ifndef NDEBUG
+        auto lextent = extent->template cast<LogicalCachedExtent>();
+        auto pin_laddr = pin->get_key();
+        if (pin->is_indirect()) {
+          pin_laddr = pin->get_intermediate_base();
+        }
+        assert(lextent->get_laddr() == pin_laddr);
+#endif
+	return extent->template cast<T>();
+      });
+    } else {
+      return pin;
+    }
+  }
+
+  base_iertr::future<LogicalCachedExtentRef> read_pin_by_type(
+    Transaction &t,
+    LBAMappingRef pin,
+    extent_types_t type)
+  {
+    ceph_assert(!pin->parent_modified());
+    auto v = pin->get_logical_extent(t);
+    // checking the lba child must be atomic with creating
+    // and linking the absent child
+    if (v.has_child()) {
+      return std::move(v.get_child_fut());
+    } else {
+      return pin_to_extent_by_type(t, std::move(pin), type);
+    }
+  }
+
   rewrite_extent_ret rewrite_logical_extent(
     Transaction& t,
     LogicalCachedExtentRef extent);
@@ -862,11 +868,6 @@ class TransactionManager : public ExtentCallbackInterface {
     ExtentPlacementManager::dispatch_result_t dispatch_result,
     std::optional<journal_seq_t> seq_to_trim = std::nullopt);
 
-  /// Remove refcount for offset
-  ref_ret _dec_ref(
-    Transaction &t,
-    laddr_t offset);
-
   using update_lba_mappings_ret = LBAManager::update_mappings_ret;
   update_lba_mappings_ret update_lba_mappings(
     Transaction &t,
@@ -886,7 +887,7 @@ class TransactionManager : public ExtentCallbackInterface {
     Transaction &t,
     LBAMappingRef pin) {
     LOG_PREFIX(TransactionManager::pin_to_extent);
-    SUBTRACET(seastore_tm, "getting extent {}", t, *pin);
+    SUBTRACET(seastore_tm, "getting absent extent from pin {} ...", t, *pin);
     static_assert(is_logical_type(T::TYPE));
     using ret = pin_to_extent_ret<T>;
     auto &pref = *pin;
@@ -950,7 +951,8 @@ class TransactionManager : public ExtentCallbackInterface {
       extent_types_t type)
   {
     LOG_PREFIX(TransactionManager::pin_to_extent_by_type);
-    SUBTRACET(seastore_tm, "getting extent {} type {}", t, *pin, type);
+    SUBTRACET(seastore_tm, "getting absent extent from pin {} type {} ...",
+              t, *pin, type);
     assert(is_logical_type(type));
     auto &pref = *pin;
     return cache->get_absent_extent_by_type(
diff --git a/src/crimson/osd/backfill_facades.h b/src/crimson/osd/backfill_facades.h
index 683dc6ea64948..522a93a1ddcbe 100644
--- a/src/crimson/osd/backfill_facades.h
+++ b/src/crimson/osd/backfill_facades.h
@@ -52,6 +52,12 @@ struct PeeringFacade final : BackfillState::PeeringFacade {
     return peering_state.is_backfilling();
   }
 
+  void prepare_backfill_for_missing(
+    const hobject_t &soid,
+    const eversion_t &v,
+    const std::vector<pg_shard_t> &peers) override {
+    return peering_state.prepare_backfill_for_missing(soid, v, peers);
+  }
   PeeringFacade(PeeringState& peering_state)
     : peering_state(peering_state) {
   }
diff --git a/src/crimson/osd/backfill_state.cc b/src/crimson/osd/backfill_state.cc
index 70c43f49faf72..018e58b68f851 100644
--- a/src/crimson/osd/backfill_state.cc
+++ b/src/crimson/osd/backfill_state.cc
@@ -225,7 +225,7 @@ bool BackfillState::Enqueuing::should_rescan_primary(
   const BackfillInterval& backfill_info) const
 {
   return backfill_info.begin <= earliest_peer_backfill(peer_backfill_info) &&
-	 !backfill_info.extends_to_end();
+	 !backfill_info.extends_to_end() && backfill_info.empty();
 }
 
 void BackfillState::Enqueuing::trim_backfilled_object_from_intervals(
@@ -266,6 +266,7 @@ BackfillState::Enqueuing::update_on_peers(const hobject_t& check)
   logger().debug("{}: check={}", __func__, check);
   const auto& primary_bi = backfill_state().backfill_info;
   result_t result { {}, primary_bi.begin };
+  std::map<hobject_t, std::pair<eversion_t, std::vector<pg_shard_t>>> backfills;
 
   for (const auto& bt : peering_state().get_backfill_targets()) {
     const auto& peer_bi = backfill_state().peer_backfill_info.at(bt);
@@ -273,9 +274,13 @@ BackfillState::Enqueuing::update_on_peers(const hobject_t& check)
     // Find all check peers that have the wrong version
     if (const eversion_t& obj_v = primary_bi.objects.begin()->second;
         check == primary_bi.begin && check == peer_bi.begin) {
-      if(peer_bi.objects.begin()->second != obj_v &&
-          backfill_state().progress_tracker->enqueue_push(primary_bi.begin)) {
-        backfill_listener().enqueue_push(primary_bi.begin, obj_v);
+      if (peer_bi.objects.begin()->second != obj_v) {
+	std::ignore = backfill_state().progress_tracker->enqueue_push(
+	  primary_bi.begin);
+	auto &[v, peers] = backfills[primary_bi.begin];
+	assert(v == obj_v || v == eversion_t());
+	v = obj_v;
+	peers.push_back(bt);
       } else {
         // it's fine, keep it! OR already recovering
       }
@@ -284,12 +289,22 @@ BackfillState::Enqueuing::update_on_peers(const hobject_t& check)
       // Only include peers that we've caught up to their backfill line
       // otherwise, they only appear to be missing this object
       // because their peer_bi.begin > backfill_info.begin.
-      if (primary_bi.begin > peering_state().get_peer_last_backfill(bt) &&
-          backfill_state().progress_tracker->enqueue_push(primary_bi.begin)) {
-        backfill_listener().enqueue_push(primary_bi.begin, obj_v);
+      if (primary_bi.begin > peering_state().get_peer_last_backfill(bt)) {
+	std::ignore = backfill_state().progress_tracker->enqueue_push(
+	  primary_bi.begin);
+	auto &[v, peers] = backfills[primary_bi.begin];
+	assert(v == obj_v || v == eversion_t());
+	v = obj_v;
+	peers.push_back(bt);
       }
     }
   }
+  for (auto &backfill : backfills) {
+    auto &soid = backfill.first;
+    auto &obj_v = backfill.second.first;
+    auto &peers = backfill.second.second;
+    backfill_listener().enqueue_push(soid, obj_v, peers);
+  }
   return result;
 }
 
@@ -327,16 +342,29 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
   }
   trim_backfill_infos();
 
-  while (!all_emptied(primary_bi, backfill_state().peer_backfill_info)) {
+  if (should_rescan_primary(backfill_state().peer_backfill_info,
+				   primary_bi)) {
+    // need to grab one another chunk of the object namespace and restart
+    // the queueing.
+    logger().debug("{}: reached end for current local chunk", __func__);
+    post_event(RequestPrimaryScanning{});
+    return;
+  }
+
+  do {
     if (!backfill_listener().budget_available()) {
       post_event(RequestWaiting{});
       return;
     } else if (should_rescan_replicas(backfill_state().peer_backfill_info,
-                                      primary_bi)) {
+				      primary_bi)) {
       // Count simultaneous scans as a single op and let those complete
       post_event(RequestReplicasScanning{});
       return;
     }
+
+    if (all_emptied(primary_bi, backfill_state().peer_backfill_info)) {
+      break;
+    }
     // Get object within set of peers to operate on and the set of targets
     // for which that object applies.
     if (const hobject_t check = \
@@ -355,30 +383,23 @@ BackfillState::Enqueuing::Enqueuing(my_context ctx)
       trim_backfilled_object_from_intervals(std::move(result),
 					    backfill_state().last_backfill_started,
 					    backfill_state().peer_backfill_info);
-      primary_bi.pop_front();
+      if (!primary_bi.empty()) {
+	primary_bi.pop_front();
+      }
     }
     backfill_listener().maybe_flush();
-  }
+  } while (!all_emptied(primary_bi, backfill_state().peer_backfill_info));
 
-  if (should_rescan_primary(backfill_state().peer_backfill_info,
-                            primary_bi)) {
-    // need to grab one another chunk of the object namespace and restart
-    // the queueing.
-    logger().debug("{}: reached end for current local chunk",
-                   __func__);
-    post_event(RequestPrimaryScanning{});
-  } else {
-    if (backfill_state().progress_tracker->tracked_objects_completed()
-	&& Enqueuing::all_enqueued(peering_state(),
-				   backfill_state().backfill_info,
-				   backfill_state().peer_backfill_info)) {
-      backfill_state().last_backfill_started = hobject_t::get_max();
-      backfill_listener().update_peers_last_backfill(hobject_t::get_max());
-    }
-    logger().debug("{}: reached end for both local and all peers "
-                   "but still has in-flight operations", __func__);
-    post_event(RequestWaiting{});
+  if (backfill_state().progress_tracker->tracked_objects_completed()
+      && Enqueuing::all_enqueued(peering_state(),
+				 backfill_state().backfill_info,
+				 backfill_state().peer_backfill_info)) {
+    backfill_state().last_backfill_started = hobject_t::get_max();
+    backfill_listener().update_peers_last_backfill(hobject_t::get_max());
   }
+  logger().debug("{}: reached end for both local and all peers "
+		 "but still has in-flight operations", __func__);
+  post_event(RequestWaiting{});
 }
 
 // -- PrimaryScanning
@@ -403,7 +424,7 @@ BackfillState::PrimaryScanning::react(ObjectPushed evt)
 {
   logger().debug("PrimaryScanning::react() on ObjectPushed; evt.object={}",
                  evt.object);
-  backfill_state().progress_tracker->complete_to(evt.object, evt.stat);
+  backfill_state().progress_tracker->complete_to(evt.object, evt.stat, true);
   return discard_event();
 }
 
@@ -480,7 +501,7 @@ BackfillState::ReplicasScanning::react(ObjectPushed evt)
 {
   logger().debug("ReplicasScanning::react() on ObjectPushed; evt.object={}",
                  evt.object);
-  backfill_state().progress_tracker->complete_to(evt.object, evt.stat);
+  backfill_state().progress_tracker->complete_to(evt.object, evt.stat, true);
   return discard_event();
 }
 
@@ -496,16 +517,8 @@ BackfillState::Waiting::react(ObjectPushed evt)
 {
   logger().debug("Waiting::react() on ObjectPushed; evt.object={}",
                  evt.object);
-  backfill_state().progress_tracker->complete_to(evt.object, evt.stat);
-  if (!Enqueuing::all_enqueued(peering_state(),
-                               backfill_state().backfill_info,
-                               backfill_state().peer_backfill_info)) {
-    return transit<Enqueuing>();
-  } else {
-    // we still have something to wait on
-    logger().debug("Waiting::react() on ObjectPushed; still waiting");
-    return discard_event();
-  }
+  backfill_state().progress_tracker->complete_to(evt.object, evt.stat, false);
+  return transit<Enqueuing>();;
 }
 
 // -- Done
@@ -559,7 +572,8 @@ void BackfillState::ProgressTracker::enqueue_drop(const hobject_t& obj)
 
 void BackfillState::ProgressTracker::complete_to(
   const hobject_t& obj,
-  const pg_stat_t& stats)
+  const pg_stat_t& stats,
+  bool may_push_to_max)
 {
   logger().debug("{}: obj={}",
                  __func__, obj);
@@ -570,6 +584,7 @@ void BackfillState::ProgressTracker::complete_to(
   } else {
     ceph_abort_msg("completing untracked object shall not happen");
   }
+  auto new_last_backfill = peering_state().earliest_backfill();
   for (auto it = std::begin(registry);
        it != std::end(registry) &&
          it->second.stage != op_stage_t::enqueued_push;
@@ -579,15 +594,18 @@ void BackfillState::ProgressTracker::complete_to(
     peering_state().update_complete_backfill_object_stats(
       soid,
       *item.stats);
+    assert(soid > new_last_backfill);
+    new_last_backfill = soid;
   }
-  if (Enqueuing::all_enqueued(peering_state(),
+  if (may_push_to_max &&
+      Enqueuing::all_enqueued(peering_state(),
                               backfill_state().backfill_info,
                               backfill_state().peer_backfill_info) &&
       tracked_objects_completed()) {
     backfill_state().last_backfill_started = hobject_t::get_max();
     backfill_listener().update_peers_last_backfill(hobject_t::get_max());
   } else {
-    backfill_listener().update_peers_last_backfill(obj);
+    backfill_listener().update_peers_last_backfill(new_last_backfill);
   }
 }
 
diff --git a/src/crimson/osd/backfill_state.h b/src/crimson/osd/backfill_state.h
index 6c36db81813b7..ddc0cbf735557 100644
--- a/src/crimson/osd/backfill_state.h
+++ b/src/crimson/osd/backfill_state.h
@@ -336,7 +336,8 @@ struct BackfillState::BackfillListener {
 
   virtual void enqueue_push(
     const hobject_t& obj,
-    const eversion_t& v) = 0;
+    const eversion_t& v,
+    const std::vector<pg_shard_t> &peers) = 0;
 
   virtual void enqueue_drop(
     const pg_shard_t& target,
@@ -375,6 +376,10 @@ struct BackfillState::PeeringFacade {
   virtual void update_complete_backfill_object_stats(const hobject_t &hoid,
                                              const pg_stat_t &stats) = 0;
   virtual bool is_backfilling() const = 0;
+  virtual void prepare_backfill_for_missing(
+    const hobject_t &soid,
+    const eversion_t &v,
+    const std::vector<pg_shard_t> &peers) = 0;
   virtual ~PeeringFacade() {}
 };
 
@@ -421,7 +426,7 @@ class BackfillState::ProgressTracker {
 
   bool enqueue_push(const hobject_t&);
   void enqueue_drop(const hobject_t&);
-  void complete_to(const hobject_t&, const pg_stat_t&);
+  void complete_to(const hobject_t&, const pg_stat_t&, bool may_push_to_max);
 };
 
 } // namespace crimson::osd
diff --git a/src/crimson/osd/ops_executer.cc b/src/crimson/osd/ops_executer.cc
index df4f73d4077d1..9bf60140374c8 100644
--- a/src/crimson/osd/ops_executer.cc
+++ b/src/crimson/osd/ops_executer.cc
@@ -504,7 +504,7 @@ OpsExecuter::list_snaps_iertr::future<> OpsExecuter::do_list_snaps(
       auto p = ss.clone_snaps.find(clone);
       if (p == ss.clone_snaps.end()) {
 	logger().error(
-	  "OpsExecutor::do_list_snaps: {} has inconsistent "
+	  "OpsExecuter::do_list_snaps: {} has inconsistent "
 	  "clone_snaps, missing clone {}",
 	  os.oi.soid,
 	  clone);
@@ -518,7 +518,7 @@ OpsExecuter::list_snaps_iertr::future<> OpsExecuter::do_list_snaps(
       auto p = ss.clone_overlap.find(clone);
       if (p == ss.clone_overlap.end()) {
 	logger().error(
-	  "OpsExecutor::do_list_snaps: {} has inconsistent "
+	  "OpsExecuter::do_list_snaps: {} has inconsistent "
 	  "clone_overlap, missing clone {}",
 	  os.oi.soid,
 	  clone);
@@ -532,7 +532,7 @@ OpsExecuter::list_snaps_iertr::future<> OpsExecuter::do_list_snaps(
       auto p = ss.clone_size.find(clone);
       if (p == ss.clone_size.end()) {
 	logger().error(
-	  "OpsExecutor::do_list_snaps: {} has inconsistent "
+	  "OpsExecuter::do_list_snaps: {} has inconsistent "
 	  "clone_size, missing clone {}",
 	  os.oi.soid,
 	  clone);
@@ -551,7 +551,7 @@ OpsExecuter::list_snaps_iertr::future<> OpsExecuter::do_list_snaps(
   }
   resp.seq = ss.seq;
   logger().error(
-    "OpsExecutor::do_list_snaps: {}, resp.clones.size(): {}",
+    "OpsExecuter::do_list_snaps: {}, resp.clones.size(): {}",
     os.oi.soid,
     resp.clones.size());
   resp.encode(osd_op.outdata);
@@ -678,16 +678,32 @@ OpsExecuter::do_execute_op(OSDOp& osd_op)
       whiteout = true;
     }
     return do_write_op([this, whiteout](auto& backend, auto& os, auto& txn) {
-      int num_bytes = 0;
-      // Calculate num_bytes to be removed
-      if (obc->obs.oi.soid.is_snap()) {
-        ceph_assert(obc->ssc->snapset.clone_overlap.count(obc->obs.oi.soid.snap));
-        num_bytes = obc->ssc->snapset.get_clone_bytes(obc->obs.oi.soid.snap);
-      } else {
-        num_bytes = obc->obs.oi.size;
-      }
-      return backend.remove(os, txn, *osd_op_params,
-                            delta_stats, whiteout, num_bytes);
+      struct emptyctx_t {};
+      return with_effect_on_obc(
+	emptyctx_t{},
+	[&](auto &ctx) {
+	  int num_bytes = 0;
+	  // Calculate num_bytes to be removed
+	  if (obc->obs.oi.soid.is_snap()) {
+	    ceph_assert(obc->ssc->snapset.clone_overlap.count(
+			  obc->obs.oi.soid.snap));
+	    num_bytes = obc->ssc->snapset.get_clone_bytes(
+	      obc->obs.oi.soid.snap);
+	  } else {
+	    num_bytes = obc->obs.oi.size;
+	  }
+	  return backend.remove(os, txn, *osd_op_params,
+				delta_stats, whiteout, num_bytes);
+	},
+	[](auto &&ctx, ObjectContextRef obc, Ref<PG>) {
+	  return seastar::do_for_each(
+	    obc->watchers,
+	    [](auto &p) { return p.second->remove(); }
+	  ).then([obc] {
+	    obc->watchers.clear();
+	    return seastar::now();
+	  });
+	});
     });
   }
   case CEPH_OSD_OP_CALL:
@@ -957,7 +973,7 @@ void OpsExecuter::CloningContext::apply_to(
   processed_obc.ssc->snapset = std::move(new_snapset);
 }
 
-OpsExecuter::interruptible_future<std::vector<pg_log_entry_t>>
+std::vector<pg_log_entry_t>
 OpsExecuter::flush_clone_metadata(
   std::vector<pg_log_entry_t>&& log_entries,
   SnapMapper& snap_mapper,
@@ -965,7 +981,6 @@ OpsExecuter::flush_clone_metadata(
   ceph::os::Transaction& txn)
 {
   assert(!txn.empty());
-  auto maybe_snap_mapped = interruptor::now();
   update_clone_overlap();
   if (cloning_ctx) {
     std::move(*cloning_ctx).apply_to(log_entries, *obc);
@@ -977,12 +992,7 @@ OpsExecuter::flush_clone_metadata(
   }
   logger().debug("{} done, initial snapset={}, new snapset={}",
     __func__, obc->obs.oi.soid, obc->ssc->snapset);
-  return std::move(
-    maybe_snap_mapped
-  ).then_interruptible([log_entries=std::move(log_entries)]() mutable {
-    return interruptor::make_ready_future<std::vector<pg_log_entry_t>>(
-      std::move(log_entries));
-  });
+  return std::move(log_entries);
 }
 
 ObjectContextRef OpsExecuter::prepare_clone(
diff --git a/src/crimson/osd/ops_executer.h b/src/crimson/osd/ops_executer.h
index 0dea7d0515e93..e770e825b32d0 100644
--- a/src/crimson/osd/ops_executer.h
+++ b/src/crimson/osd/ops_executer.h
@@ -40,7 +40,7 @@ namespace crimson::osd {
 class PG;
 
 // OpsExecuter -- a class for executing ops targeting a certain object.
-class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
+class OpsExecuter {
   friend class SnapTrimObjSubEvent;
 
   using call_errorator = crimson::errorator<
@@ -170,16 +170,12 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
 
   object_stat_sum_t delta_stats;
 private:
-  // an operation can be divided into two stages: main and effect-exposing
-  // one. The former is performed immediately on call to `do_osd_op()` while
-  // the later on `submit_changes()` – after successfully processing main
-  // stages of all involved operations. When any stage fails, none of all
-  // scheduled effect-exposing stages will be executed.
-  // when operation requires this division, some variant of `with_effect()`
-  // should be used.
+  // with_effect can be used to schedule operations to be performed
+  // at commit time.  effects will be discarded if the operation does
+  // not commit.
   struct effect_t {
     // an effect can affect PG, i.e. create a watch timeout
-    virtual osd_op_errorator::future<> execute(Ref<PG> pg) = 0;
+    virtual seastar::future<> execute(Ref<PG> pg) = 0;
     virtual ~effect_t() = default;
   };
 
@@ -213,10 +209,10 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
    * execute_clone
    *
    * If snapc contains a snap which occurred logically after the last write
-   * seen by this object (see OpsExecutor::should_clone()), we first need
+   * seen by this object (see OpsExecuter::should_clone()), we first need
    * make a clone of the object at its current state.  execute_clone primes
    * txn with that clone operation and returns an
-   * OpsExecutor::CloningContext which will allow us to fill in the corresponding
+   * OpsExecuter::CloningContext which will allow us to fill in the corresponding
    * metadata and log_entries once the operations have been processed.
    *
    * Note that this strategy differs from classic, which instead performs this
@@ -267,7 +263,7 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
   */
   void update_clone_overlap();
 
-  interruptible_future<std::vector<pg_log_entry_t>> flush_clone_metadata(
+  std::vector<pg_log_entry_t> flush_clone_metadata(
     std::vector<pg_log_entry_t>&& log_entries,
     SnapMapper& snap_mapper,
     OSDriver& osdriver,
@@ -400,7 +396,7 @@ class OpsExecuter : public seastar::enable_lw_shared_from_this<OpsExecuter> {
   execute_op(OSDOp& osd_op);
 
   using rep_op_fut_tuple =
-    std::tuple<interruptible_future<>, osd_op_ierrorator::future<>>;
+    std::tuple<interruptible_future<>, interruptible_future<>>;
   using rep_op_fut_t =
     interruptible_future<rep_op_fut_tuple>;
   template <typename MutFunc>
@@ -475,7 +471,7 @@ auto OpsExecuter::with_effect_on_obc(
          effect_func(std::move(effect_func)),
          obc(std::move(obc)) {
     }
-    osd_op_errorator::future<> execute(Ref<PG> pg) final {
+    seastar::future<> execute(Ref<PG> pg) final {
       return std::move(effect_func)(std::move(ctx),
                                     std::move(obc),
                                     std::move(pg));
@@ -502,15 +498,14 @@ OpsExecuter::flush_changes_n_do_ops_effects(
   assert(obc);
 
   auto submitted = interruptor::now();
-  auto all_completed =
-    interruptor::make_interruptible(osd_op_errorator::now());
+  auto all_completed = interruptor::now();
 
   if (cloning_ctx) {
     ceph_assert(want_mutate);
   }
 
   if (want_mutate) {
-    auto log_entries = co_await flush_clone_metadata(
+    auto log_entries = flush_clone_metadata(
       prepare_transaction(ops),
       snap_mapper,
       osdriver,
@@ -536,7 +531,7 @@ OpsExecuter::flush_changes_n_do_ops_effects(
     // need extra ref pg due to apply_stats() which can be executed after
     // informing snap mapper
     all_completed =
-      std::move(all_completed).safe_then_interruptible([this, pg=this->pg] {
+      std::move(all_completed).then_interruptible([this, pg=this->pg] {
       // let's do the cleaning of `op_effects` in destructor
       return interruptor::do_for_each(op_effects,
         [pg=std::move(pg)](auto& op_effect) {
@@ -552,21 +547,19 @@ OpsExecuter::flush_changes_n_do_ops_effects(
 
 template <class Func>
 struct OpsExecuter::RollbackHelper {
-  void rollback_obc_if_modified(const std::error_code& e);
-  seastar::lw_shared_ptr<OpsExecuter> ox;
+  void rollback_obc_if_modified();
+  OpsExecuter *ox;
   Func func;
 };
 
 template <class Func>
 inline OpsExecuter::RollbackHelper<Func>
 OpsExecuter::create_rollbacker(Func&& func) {
-  return {shared_from_this(), std::forward<Func>(func)};
+  return {this, std::forward<Func>(func)};
 }
 
-
 template <class Func>
-void OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified(
-  const std::error_code& e)
+void OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified()
 {
   // Oops, an operation had failed. do_osd_ops() altogether with
   // OpsExecuter already dropped the ObjectStore::Transaction if
@@ -584,10 +577,9 @@ void OpsExecuter::RollbackHelper<Func>::rollback_obc_if_modified(
   assert(ox);
   const auto need_rollback = ox->has_seen_write();
   crimson::get_logger(ceph_subsys_osd).debug(
-    "{}: object {} got error {}, need_rollback={}",
+    "{}: object {} got error, need_rollback={}",
     __func__,
     ox->obc->get_oid(),
-    e,
     need_rollback);
   if (need_rollback) {
     func(ox->obc);
diff --git a/src/crimson/osd/osd.cc b/src/crimson/osd/osd.cc
index 8d2d10fbd7c45..34ad97ceb068a 100644
--- a/src/crimson/osd/osd.cc
+++ b/src/crimson/osd/osd.cc
@@ -23,6 +23,7 @@
 #include "messages/MOSDOp.h"
 #include "messages/MOSDPeeringOp.h"
 #include "messages/MOSDPGCreate2.h"
+#include "messages/MOSDPGRemove.h"
 #include "messages/MOSDPGUpdateLogMissing.h"
 #include "messages/MOSDPGUpdateLogMissingReply.h"
 #include "messages/MOSDRepOpReply.h"
@@ -863,6 +864,8 @@ OSD::do_ms_dispatch(
     [[fallthrough]];
   case MSG_OSD_PG_LOG:
     return handle_peering_op(conn, boost::static_pointer_cast<MOSDPeeringOp>(m));
+  case MSG_OSD_PG_REMOVE:
+    return handle_pg_remove(conn, boost::static_pointer_cast<MOSDPGRemove>(m));
   case MSG_OSD_REPOP:
     return handle_rep_op(conn, boost::static_pointer_cast<MOSDRepOp>(m));
   case MSG_OSD_REPOPREPLY:
@@ -1555,6 +1558,27 @@ seastar::future<> OSD::handle_peering_op(
     std::move(*evt)).second;
 }
 
+seastar::future<> OSD::handle_pg_remove(
+  crimson::net::ConnectionRef conn,
+  Ref<MOSDPGRemove> m)
+{
+  LOG_PREFIX(OSD::handle_pg_remove);
+  const int from = m->get_source().num();
+  std::vector<seastar::future<>> futs;
+  for (auto &pg : m->pg_list) {
+    DEBUG("{} from {}", pg, from);
+    futs.emplace_back(
+      pg_shard_manager.start_pg_operation<RemotePeeringEvent>(
+	conn,
+	pg_shard_t{from, pg.shard},
+	pg,
+	m->get_epoch(),
+	m->get_epoch(),
+	PeeringState::DeleteStart()).second);
+  }
+  return seastar::when_all_succeed(std::move(futs));
+}
+
 seastar::future<> OSD::check_osdmap_features()
 {
   LOG_PREFIX(OSD::check_osdmap_features);
diff --git a/src/crimson/osd/osd.h b/src/crimson/osd/osd.h
index de39d80827494..d7d54d5d2c3c3 100644
--- a/src/crimson/osd/osd.h
+++ b/src/crimson/osd/osd.h
@@ -208,6 +208,8 @@ class OSD final : public crimson::net::Dispatcher,
                                         Ref<MOSDRepOpReply> m);
   seastar::future<> handle_peering_op(crimson::net::ConnectionRef conn,
                                       Ref<MOSDPeeringOp> m);
+  seastar::future<> handle_pg_remove(crimson::net::ConnectionRef conn,
+				     Ref<MOSDPGRemove> m);
   seastar::future<> handle_recovery_subreq(crimson::net::ConnectionRef conn,
                                            Ref<MOSDFastDispatchOp> m);
   seastar::future<> handle_scrub_command(crimson::net::ConnectionRef conn,
diff --git a/src/crimson/osd/osd_operation.h b/src/crimson/osd/osd_operation.h
index fb0432edb8f9a..fd8b049c0bf08 100644
--- a/src/crimson/osd/osd_operation.h
+++ b/src/crimson/osd/osd_operation.h
@@ -40,6 +40,37 @@ struct PerShardPipeline {
   } create_or_wait_pg;
 };
 
+struct PGPeeringPipeline {
+  struct AwaitMap : OrderedExclusivePhaseT<AwaitMap> {
+    static constexpr auto type_name = "PeeringEvent::PGPipeline::await_map";
+  } await_map;
+  struct Process : OrderedExclusivePhaseT<Process> {
+    static constexpr auto type_name = "PeeringEvent::PGPipeline::process";
+  } process;
+};
+
+struct CommonPGPipeline {
+  struct WaitForActive : OrderedExclusivePhaseT<WaitForActive> {
+    static constexpr auto type_name = "CommonPGPipeline:::wait_for_active";
+  } wait_for_active;
+  struct RecoverMissing : OrderedConcurrentPhaseT<RecoverMissing> {
+    static constexpr auto type_name = "CommonPGPipeline::recover_missing";
+  } recover_missing;
+  struct CheckAlreadyCompleteGetObc : OrderedExclusivePhaseT<CheckAlreadyCompleteGetObc> {
+    static constexpr auto type_name = "CommonPGPipeline::check_already_complete_get_obc";
+  } check_already_complete_get_obc;
+  struct LockOBC : OrderedConcurrentPhaseT<LockOBC> {
+    static constexpr auto type_name = "CommonPGPipeline::lock_obc";
+  } lock_obc;
+  struct Process : OrderedExclusivePhaseT<Process> {
+    static constexpr auto type_name = "CommonPGPipeline::process";
+  } process;
+  struct WaitRepop : OrderedConcurrentPhaseT<WaitRepop> {
+    static constexpr auto type_name = "ClientRequest::PGPipeline::wait_repop";
+  } wait_repop;
+};
+
+
 enum class OperationTypeCode {
   client_request = 0,
   peering_event,
diff --git a/src/crimson/osd/osd_operation_external_tracking.h b/src/crimson/osd/osd_operation_external_tracking.h
index 530732ba71028..d2786a95e4d3c 100644
--- a/src/crimson/osd/osd_operation_external_tracking.h
+++ b/src/crimson/osd/osd_operation_external_tracking.h
@@ -36,7 +36,6 @@ struct LttngBackend
     ClientRequest::PGPipeline::RecoverMissing::
       BlockingEvent::ExitBarrierEvent::Backend,
     ClientRequest::PGPipeline::CheckAlreadyCompleteGetObc::BlockingEvent::Backend,
-    ClientRequest::PGPipeline::GetOBC::BlockingEvent::Backend,
     ClientRequest::PGPipeline::LockOBC::BlockingEvent::Backend,
     ClientRequest::PGPipeline::LockOBC::BlockingEvent::ExitBarrierEvent::Backend,
     ClientRequest::PGPipeline::Process::BlockingEvent::Backend,
@@ -117,10 +116,6 @@ struct LttngBackend
               const ClientRequest::PGPipeline::CheckAlreadyCompleteGetObc& blocker) override {
   }
 
-  void handle(ClientRequest::PGPipeline::GetOBC::BlockingEvent& ev,
-              const Operation& op,
-              const ClientRequest::PGPipeline::GetOBC& blocker) override {
-  }
 
   void handle(ClientRequest::PGPipeline::LockOBC::BlockingEvent& ev,
               const Operation& op,
@@ -171,7 +166,6 @@ struct HistoricBackend
     ClientRequest::PGPipeline::RecoverMissing::
       BlockingEvent::ExitBarrierEvent::Backend,
     ClientRequest::PGPipeline::CheckAlreadyCompleteGetObc::BlockingEvent::Backend,
-    ClientRequest::PGPipeline::GetOBC::BlockingEvent::Backend,
     ClientRequest::PGPipeline::LockOBC::BlockingEvent::Backend,
     ClientRequest::PGPipeline::LockOBC::BlockingEvent::ExitBarrierEvent::Backend,
     ClientRequest::PGPipeline::Process::BlockingEvent::Backend,
@@ -252,11 +246,6 @@ struct HistoricBackend
               const ClientRequest::PGPipeline::CheckAlreadyCompleteGetObc& blocker) override {
   }
 
-  void handle(ClientRequest::PGPipeline::GetOBC::BlockingEvent& ev,
-              const Operation& op,
-              const ClientRequest::PGPipeline::GetOBC& blocker) override {
-  }
-
   void handle(ClientRequest::PGPipeline::LockOBC::BlockingEvent& ev,
               const Operation& op,
               const ClientRequest::PGPipeline::LockOBC& blocker) override {
diff --git a/src/crimson/osd/osd_operations/client_request.cc b/src/crimson/osd/osd_operations/client_request.cc
index 8e9a7c4d7490c..a89fb2c84bc56 100644
--- a/src/crimson/osd/osd_operations/client_request.cc
+++ b/src/crimson/osd/osd_operations/client_request.cc
@@ -403,11 +403,6 @@ ClientRequest::process_op(
 		   *pg, *this, this_instance_id);
 	  return do_process(
 	    ihref, pg, obc, this_instance_id
-	  ).handle_error_interruptible(
-	    crimson::ct_error::eagain::handle(
-	      [this, pg, this_instance_id, &ihref]() mutable {
-		return process_op(ihref, pg, this_instance_id);
-	      })
 	  );
 	}
       );
@@ -437,7 +432,7 @@ ClientRequest::process_op(
   co_await std::move(process);
 }
 
-ClientRequest::do_process_iertr::future<>
+ClientRequest::interruptible_future<>
 ClientRequest::do_process(
   instance_handle_t &ihref,
   Ref<PG> pg, crimson::osd::ObjectContextRef obc,
@@ -507,22 +502,128 @@ ClientRequest::do_process(
     co_return;
   }
 
-  auto [submitted, all_completed] = co_await pg->do_osd_ops(
-    m, r_conn, obc, op_info, snapc
+  OpsExecuter ox(pg, obc, op_info, *m, r_conn, snapc);
+  auto ret = co_await pg->run_executer(
+    ox, obc, op_info, m->ops
+  ).si_then([]() -> std::optional<std::error_code> {
+    return std::nullopt;
+  }).handle_error_interruptible(crimson::ct_error::all_same_way(
+    [](auto e) -> std::optional<std::error_code> {
+      return e;
+    })
   );
-  co_await std::move(submitted);
 
-  co_await ihref.enter_stage<interruptor>(client_pp(*pg).wait_repop, *this);
+  auto should_log_error = [](std::error_code e) -> bool {
+    switch (e.value()) {
+    case EDQUOT:
+    case ENOSPC:
+    case EAGAIN:
+      return false;
+    default:
+      return true;
+    }
+  };
 
-  auto reply = co_await std::move(all_completed);
+  if (ret && !should_log_error(*ret)) {
+    co_await reply_op_error(pg, -ret->value());
+    co_return;
+  }
+
+  {
+    auto all_completed = interruptor::now();
+    if (ret) {
+      assert(should_log_error(*ret));
+      if (op_info.may_write()) {
+	auto rep_tid = pg->shard_services.get_tid();
+	auto version = co_await pg->submit_error_log(
+	  m, op_info, obc, *ret, rep_tid);
+
+	all_completed = pg->complete_error_log(
+	  rep_tid, version);
+      }
+      // simply return the error below, leaving all_completed alone
+    } else {
+      auto submitted = interruptor::now();
+      std::tie(submitted, all_completed) = co_await pg->submit_executer(
+	std::move(ox), m->ops);
+      co_await std::move(submitted);
+    }
+    co_await ihref.enter_stage<interruptor>(client_pp(*pg).wait_repop, *this);
+
+    co_await std::move(all_completed);
+  }
 
   co_await ihref.enter_stage<interruptor>(client_pp(*pg).send_reply, *this);
-  DEBUGDPP("{}.{}: sending response",
-	   *pg, *this, this_instance_id);
-  // TODO: gate the crosscore sending
-  co_await interruptor::make_interruptible(
-    get_foreign_connection().send_with_throttling(std::move(reply))
-  );
+
+  if (ret) {
+    int err = -ret->value();
+    DEBUGDPP("{}: replying with error {}", *pg, *this, err);
+
+    auto reply = crimson::make_message<MOSDOpReply>(
+      m.get(), err, pg->get_osdmap_epoch(), 0, false);
+
+    if (!m->ops.empty() && m->ops.back().op.flags & CEPH_OSD_OP_FLAG_FAILOK) {
+      reply->set_result(0);
+    }
+
+    // For all ops except for CMPEXT, the correct error value is encoded
+    // in e. For CMPEXT, osdop.rval has the actual error value.
+    if (err == -ct_error::cmp_fail_error_value) {
+      assert(!m->ops.empty());
+      for (auto &osdop : m->ops) {
+	if (osdop.rval < 0) {
+	  reply->set_result(osdop.rval);
+	  break;
+	}
+      }
+    }
+
+    reply->set_enoent_reply_versions(
+      pg->peering_state.get_info().last_update,
+      pg->peering_state.get_info().last_user_version);
+    reply->add_flags(CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK);
+    
+    // TODO: gate the crosscore sending
+    co_await interruptor::make_interruptible(
+      get_foreign_connection().send_with_throttling(std::move(reply)));
+  } else {
+    int result = m->ops.empty() ? 0 : m->ops.back().rval.code;
+    if (op_info.may_read() && result >= 0) {
+      for (auto &osdop : m->ops) {
+	if (osdop.rval < 0 && !(osdop.op.flags & CEPH_OSD_OP_FLAG_FAILOK)) {
+	  result = osdop.rval.code;
+	  break;
+	}
+      }
+    } else if (result > 0 && op_info.may_write() && !op_info.allows_returnvec()) {
+      result = 0;
+    } else if (result < 0 &&
+	     (m->ops.empty() ?
+	      0 : m->ops.back().op.flags & CEPH_OSD_OP_FLAG_FAILOK)) {
+      result = 0;
+    }
+    auto reply = crimson::make_message<MOSDOpReply>(
+      m.get(),
+      result,
+      pg->get_osdmap_epoch(),
+      0,
+      false);
+    reply->add_flags(CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK);
+    if (obc->obs.exists) {
+      reply->set_reply_versions(pg->peering_state.get_info().last_update,
+				obc->obs.oi.user_version);
+    } else {
+      reply->set_reply_versions(pg->peering_state.get_info().last_update,
+				pg->peering_state.get_info().last_user_version);
+    }
+    
+    DEBUGDPP("{}.{}: sending response {}",
+	     *pg, *this, this_instance_id, *m);
+    // TODO: gate the crosscore sending
+    co_await interruptor::make_interruptible(
+      get_foreign_connection().send_with_throttling(std::move(reply))
+    );
+  }
 }
 
 bool ClientRequest::is_misdirected(const PG& pg) const
diff --git a/src/crimson/osd/osd_operations/client_request.h b/src/crimson/osd/osd_operations/client_request.h
index ea7aade22ac75..6ee57e9874cd1 100644
--- a/src/crimson/osd/osd_operations/client_request.h
+++ b/src/crimson/osd/osd_operations/client_request.h
@@ -14,7 +14,6 @@
 #include "crimson/osd/osdmap_gate.h"
 #include "crimson/osd/osd_operation.h"
 #include "crimson/osd/osd_operations/client_request_common.h"
-#include "crimson/osd/osd_operations/common/pg_pipeline.h"
 #include "crimson/osd/pg_activation_blocker.h"
 #include "crimson/osd/pg_map.h"
 #include "crimson/osd/scrub/pg_scrubber.h"
@@ -104,7 +103,6 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
       PGPipeline::RecoverMissing::BlockingEvent,
       scrub::PGScrubber::BlockingEvent,
       PGPipeline::CheckAlreadyCompleteGetObc::BlockingEvent,
-      PGPipeline::GetOBC::BlockingEvent,
       PGPipeline::LockOBC::BlockingEvent,
       PGPipeline::Process::BlockingEvent,
       PGPipeline::WaitRepop::BlockingEvent,
@@ -276,12 +274,7 @@ class ClientRequest final : public PhasedOperationT<ClientRequest>,
   interruptible_future<> with_sequencer(FuncT&& func);
   interruptible_future<> reply_op_error(const Ref<PG>& pg, int err);
 
-
-  using do_process_iertr =
-    ::crimson::interruptible::interruptible_errorator<
-      ::crimson::osd::IOInterruptCondition,
-      ::crimson::errorator<crimson::ct_error::eagain>>;
-  do_process_iertr::future<> do_process(
+  interruptible_future<> do_process(
     instance_handle_t &ihref,
     Ref<PG> pg,
     crimson::osd::ObjectContextRef obc,
diff --git a/src/crimson/osd/osd_operations/common/pg_pipeline.h b/src/crimson/osd/osd_operations/common/pg_pipeline.h
deleted file mode 100644
index 2b2d03ae4b3ed..0000000000000
--- a/src/crimson/osd/osd_operations/common/pg_pipeline.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 smarttab
-
-#pragma once
-
-#include "osd/osd_op_util.h"
-#include "crimson/osd/osd_operation.h"
-
-namespace crimson::osd {
-
-class CommonPGPipeline {
-protected:
-  friend class InternalClientRequest;
-  friend class SnapTrimEvent;
-  friend class SnapTrimObjSubEvent;
-
-  struct WaitForActive : OrderedExclusivePhaseT<WaitForActive> {
-    static constexpr auto type_name = "CommonPGPipeline:::wait_for_active";
-  } wait_for_active;
-  struct RecoverMissing : OrderedConcurrentPhaseT<RecoverMissing> {
-    static constexpr auto type_name = "CommonPGPipeline::recover_missing";
-  } recover_missing;
-  struct CheckAlreadyCompleteGetObc : OrderedExclusivePhaseT<CheckAlreadyCompleteGetObc> {
-    static constexpr auto type_name = "CommonPGPipeline::check_already_complete_get_obc";
-  } check_already_complete_get_obc;
-  struct GetOBC : OrderedExclusivePhaseT<GetOBC> {
-    static constexpr auto type_name = "CommonPGPipeline::get_obc";
-  } get_obc;
-  struct LockOBC : OrderedConcurrentPhaseT<LockOBC> {
-    static constexpr auto type_name = "CommonPGPipeline::lock_obc";
-  } lock_obc;
-  struct Process : OrderedExclusivePhaseT<Process> {
-    static constexpr auto type_name = "CommonPGPipeline::process";
-  } process;
-  struct WaitRepop : OrderedConcurrentPhaseT<WaitRepop> {
-    static constexpr auto type_name = "ClientRequest::PGPipeline::wait_repop";
-  } wait_repop;
-};
-
-} // namespace crimson::osd
diff --git a/src/crimson/osd/osd_operations/internal_client_request.cc b/src/crimson/osd/osd_operations/internal_client_request.cc
index a19bb0826f004..9e5867caf8067 100644
--- a/src/crimson/osd/osd_operations/internal_client_request.cc
+++ b/src/crimson/osd/osd_operations/internal_client_request.cc
@@ -50,91 +50,107 @@ CommonPGPipeline& InternalClientRequest::client_pp()
   return pg->request_pg_pipeline;
 }
 
+InternalClientRequest::interruptible_future<>
+InternalClientRequest::do_process(
+  crimson::osd::ObjectContextRef obc,
+  std::vector<OSDOp> &osd_ops)
+{
+  LOG_PREFIX(InternalClientRequest::do_process);
+  auto params = get_do_osd_ops_params();
+  OpsExecuter ox(
+    pg, obc, op_info, params, params.get_connection(), SnapContext{});
+  co_await pg->run_executer(
+    ox, obc, op_info, osd_ops
+  ).handle_error_interruptible(
+    crimson::ct_error::all_same_way(
+      [this, FNAME](auto e) {
+	ERRORDPPI("{}: got unexpected error {}", *pg, *this, e);
+	ceph_assert(0 == "should not return an error");
+	return interruptor::now();
+      })
+  );
+
+  auto [submitted, completed] = co_await pg->submit_executer(
+    std::move(ox), osd_ops);
+
+  co_await std::move(submitted);
+  co_await std::move(completed);
+}
+
+InternalClientRequest::interruptible_future<>
+InternalClientRequest::with_interruption()
+{
+  LOG_PREFIX(InternalClientRequest::with_interruption);
+  co_await enter_stage<interruptor>(
+    client_pp().wait_for_active
+  );
+
+  co_await with_blocking_event<PGActivationBlocker::BlockingEvent,
+			       interruptor>([this] (auto&& trigger) {
+    return pg->wait_for_active_blocker.wait(std::move(trigger));
+  });
+
+  co_await enter_stage<interruptor>(client_pp().recover_missing);
+
+  bool unfound = co_await do_recover_missing(
+    pg, get_target_oid(), osd_reqid_t());
+
+  if (unfound) {
+    throw std::system_error(
+      std::make_error_code(std::errc::operation_canceled),
+      fmt::format("{} is unfound, drop it!", get_target_oid()));
+  }
+  co_await enter_stage<interruptor>(
+    client_pp().check_already_complete_get_obc);
+
+  DEBUGI("{}: getting obc lock", *this);
+
+  auto osd_ops = create_osd_ops();
+
+  DEBUGI("InternalClientRequest: got {} OSDOps to execute",
+	 std::size(osd_ops));
+  [[maybe_unused]] const int ret = op_info.set_from_op(
+    std::as_const(osd_ops), pg->get_pgid().pgid, *pg->get_osdmap());
+  assert(ret == 0);
+  // call with_locked_obc() in order, but wait concurrently for loading.
+  enter_stage_sync(client_pp().lock_obc);
+
+  auto fut = pg->with_locked_obc(
+    get_target_oid(), op_info,
+    [&osd_ops, this](auto, auto obc) {
+      return enter_stage<interruptor>(client_pp().process
+      ).then_interruptible(
+	[obc=std::move(obc), &osd_ops, this]() mutable {
+	  return do_process(std::move(obc), osd_ops);
+	});
+    }).handle_error_interruptible(
+      crimson::ct_error::assert_all("unexpected error")
+    );
+  co_await std::move(fut);
+
+  logger().debug("{}: complete", *this);
+  co_await interruptor::make_interruptible(handle.complete());
+  co_return;
+}
+
 seastar::future<> InternalClientRequest::start()
 {
   track_event<StartEvent>();
-  return crimson::common::handle_system_shutdown([this] {
-      LOG_PREFIX(InternalClientRequest::start);
-      DEBUGI("{}: in repeat", *this);
-
-      return interruptor::with_interruption([this]() mutable {
-        return enter_stage<interruptor>(
-	  client_pp().wait_for_active
-        ).then_interruptible([this] {
-          return with_blocking_event<PGActivationBlocker::BlockingEvent,
-	  			     interruptor>([this] (auto&& trigger) {
-            return pg->wait_for_active_blocker.wait(std::move(trigger));
-          });
-        }).then_interruptible([this] {
-          return enter_stage<interruptor>(
-            client_pp().recover_missing);
-        }).then_interruptible([this] {
-          return do_recover_missing(pg, get_target_oid(), osd_reqid_t());
-        }).then_interruptible([this](bool unfound) {
-          if (unfound) {
-            throw std::system_error(
-              std::make_error_code(std::errc::operation_canceled),
-              fmt::format("{} is unfound, drop it!", get_target_oid()));
-          }
-          return enter_stage<interruptor>(
-            client_pp().get_obc);
-        }).then_interruptible([this] () -> PG::load_obc_iertr::future<> {
-          LOG_PREFIX(InternalClientRequest::start);
-          DEBUGI("{}: getting obc lock", *this);
-          return seastar::do_with(create_osd_ops(),
-            [this](auto& osd_ops) mutable {
-            LOG_PREFIX(InternalClientRequest::start);
-            DEBUGI("InternalClientRequest: got {} OSDOps to execute",
-                           std::size(osd_ops));
-            [[maybe_unused]] const int ret = op_info.set_from_op(
-              std::as_const(osd_ops), pg->get_pgid().pgid, *pg->get_osdmap());
-            assert(ret == 0);
-            // call with_locked_obc() in order, but wait concurrently for loading.
-            enter_stage_sync(client_pp().lock_obc);
-            return pg->with_locked_obc(get_target_oid(), op_info,
-              [&osd_ops, this](auto, auto obc) {
-              return enter_stage<interruptor>(client_pp().process
-              ).then_interruptible(
-                [obc=std::move(obc), &osd_ops, this] {
-                return pg->do_osd_ops(
-                  std::move(obc),
-                  osd_ops,
-                  std::as_const(op_info),
-                  get_do_osd_ops_params()
-                ).safe_then_unpack_interruptible(
-                  [](auto submitted, auto all_completed) {
-                    return all_completed.handle_error_interruptible(
-                      crimson::ct_error::eagain::handle([] {
-                        return seastar::now();
-                      }));
-                  }, crimson::ct_error::eagain::handle([] {
-                    return interruptor::now();
-                  })
-                );
-              });
-            });
-          });
-        }).si_then([this] {
-          logger().debug("{}: complete", *this);
-          return handle.complete();
-        }).handle_error_interruptible(
-          PG::load_obc_ertr::all_same_way([] {
-            return seastar::now();
-          })
-	);
-      }, [](std::exception_ptr eptr) {
-	return seastar::now();
-      }, pg, start_epoch
-
-    ).then([this] {
-      track_event<CompletionEvent>();
-    }).handle_exception_type([](std::system_error &error) {
-      logger().debug("error {}, message: {}", error.code(), error.what());
-      return seastar::now();
-    }).finally([this] {
-      logger().debug("{}: exit", *this);
-      handle.exit();
-    });
+  LOG_PREFIX(InternalClientRequest::start);
+  DEBUGI("{}: in repeat", *this);
+
+  return interruptor::with_interruption([this]() mutable {
+    return with_interruption();
+  }, [](std::exception_ptr eptr) {
+    return seastar::now();
+  }, pg, start_epoch).then([this] {
+    track_event<CompletionEvent>();
+  }).handle_exception_type([](std::system_error &error) {
+    logger().debug("error {}, message: {}", error.code(), error.what());
+    return seastar::now();
+  }).finally([this] {
+    logger().debug("{}: exit", *this);
+    handle.exit();
   });
 }
 
diff --git a/src/crimson/osd/osd_operations/internal_client_request.h b/src/crimson/osd/osd_operations/internal_client_request.h
index f198e58464338..6023db0a8dbe2 100644
--- a/src/crimson/osd/osd_operations/internal_client_request.h
+++ b/src/crimson/osd/osd_operations/internal_client_request.h
@@ -6,7 +6,6 @@
 #include "crimson/common/type_helpers.h"
 #include "crimson/osd/osd_operation.h"
 #include "crimson/osd/osd_operations/client_request_common.h"
-#include "crimson/osd/osd_operations/common/pg_pipeline.h"
 #include "crimson/osd/pg.h"
 #include "crimson/osd/pg_activation_blocker.h"
 
@@ -41,6 +40,11 @@ class InternalClientRequest : public PhasedOperationT<InternalClientRequest>,
 
   CommonPGPipeline& client_pp();
 
+  InternalClientRequest::interruptible_future<> with_interruption();
+  InternalClientRequest::interruptible_future<> do_process(
+    crimson::osd::ObjectContextRef obc,
+    std::vector<OSDOp> &osd_ops);
+
   seastar::future<> do_process();
 
   Ref<PG> pg;
@@ -56,7 +60,7 @@ class InternalClientRequest : public PhasedOperationT<InternalClientRequest>,
     CommonPGPipeline::WaitForActive::BlockingEvent,
     PGActivationBlocker::BlockingEvent,
     CommonPGPipeline::RecoverMissing::BlockingEvent,
-    CommonPGPipeline::GetOBC::BlockingEvent,
+    CommonPGPipeline::CheckAlreadyCompleteGetObc::BlockingEvent,
     CommonPGPipeline::LockOBC::BlockingEvent,
     CommonPGPipeline::Process::BlockingEvent,
     CompletionEvent
diff --git a/src/crimson/osd/osd_operations/peering_event.h b/src/crimson/osd/osd_operations/peering_event.h
index 1e6bd957289ff..85de5c711d67c 100644
--- a/src/crimson/osd/osd_operations/peering_event.h
+++ b/src/crimson/osd/osd_operations/peering_event.h
@@ -23,15 +23,6 @@ class ShardServices;
 class PG;
 class BackfillRecovery;
 
-  struct PGPeeringPipeline {
-    struct AwaitMap : OrderedExclusivePhaseT<AwaitMap> {
-      static constexpr auto type_name = "PeeringEvent::PGPipeline::await_map";
-    } await_map;
-    struct Process : OrderedExclusivePhaseT<Process> {
-      static constexpr auto type_name = "PeeringEvent::PGPipeline::process";
-    } process;
-  };
-
 template <class T>
 class PeeringEvent : public PhasedOperationT<T> {
   T* that() {
diff --git a/src/crimson/osd/osd_operations/snaptrim_event.cc b/src/crimson/osd/osd_operations/snaptrim_event.cc
index 7512b3d108dfc..9ed0b73cfb458 100644
--- a/src/crimson/osd/osd_operations/snaptrim_event.cc
+++ b/src/crimson/osd/osd_operations/snaptrim_event.cc
@@ -396,7 +396,7 @@ SnapTrimObjSubEvent::start()
   });
 
   co_await enter_stage<interruptor>(
-    client_pp().get_obc);
+    client_pp().check_already_complete_get_obc);
 
   logger().debug("{}: getting obc for {}", *this, coid);
   // end of commonality
diff --git a/src/crimson/osd/osd_operations/snaptrim_event.h b/src/crimson/osd/osd_operations/snaptrim_event.h
index 06d8f43c2f3c9..1164b3169d293 100644
--- a/src/crimson/osd/osd_operations/snaptrim_event.h
+++ b/src/crimson/osd/osd_operations/snaptrim_event.h
@@ -9,7 +9,6 @@
 #include "crimson/osd/osdmap_gate.h"
 #include "crimson/osd/osd_operation.h"
 #include "crimson/common/subop_blocker.h"
-#include "crimson/osd/osd_operations/common/pg_pipeline.h"
 #include "crimson/osd/pg.h"
 #include "crimson/osd/pg_activation_blocker.h"
 #include "osd/osd_types.h"
@@ -170,7 +169,7 @@ class SnapTrimObjSubEvent : public PhasedOperationT<SnapTrimObjSubEvent> {
 
   std::tuple<
     StartEvent,
-    CommonPGPipeline::GetOBC::BlockingEvent,
+    CommonPGPipeline::CheckAlreadyCompleteGetObc::BlockingEvent,
     CommonPGPipeline::Process::BlockingEvent,
     CommonPGPipeline::WaitRepop::BlockingEvent,
     CompletionEvent
diff --git a/src/crimson/osd/pg.cc b/src/crimson/osd/pg.cc
index d210773ca3031..744a1dbc02b97 100644
--- a/src/crimson/osd/pg.cc
+++ b/src/crimson/osd/pg.cc
@@ -13,6 +13,9 @@
 #include <boost/range/numeric.hpp>
 #include <fmt/format.h>
 #include <fmt/ostream.h>
+
+#include <seastar/util/defer.hh>
+
 #include "include/utime_fmt.h"
 
 #include "common/hobject.h"
@@ -481,6 +484,7 @@ PG::do_delete_work(ceph::os::Transaction &t, ghobject_t _next)
   auto [objs_to_rm, next] = fut.get();
   if (objs_to_rm.empty()) {
     logger().info("all objs removed, removing coll for {}", pgid);
+    t.remove(coll_ref->get_cid(), pgid.make_snapmapper_oid());
     t.remove(coll_ref->get_cid(), pgmeta_oid);
     t.remove_collection(coll_ref->get_cid());
     (void) shard_services.get_store().do_transaction(
@@ -490,7 +494,7 @@ PG::do_delete_work(ceph::os::Transaction &t, ghobject_t _next)
     return {next, false};
   } else {
     for (auto &obj : objs_to_rm) {
-      if (obj == pgmeta_oid) {
+      if (obj == pgmeta_oid || obj.is_internal_pg_local()) {
         continue;
       }
       logger().trace("pg {}, removing obj {}", pgid, obj);
@@ -517,7 +521,8 @@ Context *PG::on_clean()
 {
   recovery_handler->on_pg_clean();
   scrubber.on_primary_active_clean();
-  return nullptr;
+  recovery_finisher = new C_PG_FinishRecovery(*this);
+  return recovery_finisher;
 }
 
 seastar::future<> PG::clear_temp_objects()
@@ -973,150 +978,6 @@ ObjectContextRef duplicate_obc(const ObjectContextRef &obc) {
   return object_context;
 }
 
-template <class Ret, class SuccessFunc, class FailureFunc>
-PG::do_osd_ops_iertr::future<PG::pg_rep_op_fut_t<Ret>>
-PG::do_osd_ops_execute(
-  seastar::lw_shared_ptr<OpsExecuter> ox,
-  ObjectContextRef obc,
-  const OpInfo &op_info,
-  Ref<MOSDOp> m,
-  std::vector<OSDOp>& ops,
-  SuccessFunc&& success_func,
-  FailureFunc&& failure_func)
-{
-  assert(ox);
-  auto rollbacker = ox->create_rollbacker(
-    [object_context=duplicate_obc(obc)] (auto& obc) mutable {
-    obc->update_from(*object_context);
-  });
-  auto failure_func_ptr = seastar::make_lw_shared(std::move(failure_func));
-  return interruptor::do_for_each(ops, [ox](OSDOp& osd_op) {
-    logger().debug(
-      "do_osd_ops_execute: object {} - handling op {}",
-      ox->get_target(),
-      ceph_osd_op_name(osd_op.op.op));
-    return ox->execute_op(osd_op);
-  }).safe_then_interruptible([this, ox, &ops] {
-    logger().debug(
-      "do_osd_ops_execute: object {} all operations successful",
-      ox->get_target());
-    // check for full
-    if ((ox->delta_stats.num_bytes > 0 ||
-      ox->delta_stats.num_objects > 0) &&
-      get_pgpool().info.has_flag(pg_pool_t::FLAG_FULL)) {
-      const auto& m = ox->get_message();
-      if (m.get_reqid().name.is_mds() ||   // FIXME: ignore MDS for now
-        m.has_flag(CEPH_OSD_FLAG_FULL_FORCE)) {
-        logger().info(" full, but proceeding due to FULL_FORCE or MDS");
-      } else if (m.has_flag(CEPH_OSD_FLAG_FULL_TRY)) {
-        // they tried, they failed.
-        logger().info(" full, replying to FULL_TRY op");
-        if (get_pgpool().info.has_flag(pg_pool_t::FLAG_FULL_QUOTA))
-          return interruptor::make_ready_future<OpsExecuter::rep_op_fut_tuple>(
-            seastar::now(),
-            OpsExecuter::osd_op_ierrorator::future<>(
-              crimson::ct_error::edquot::make()));
-        else
-          return interruptor::make_ready_future<OpsExecuter::rep_op_fut_tuple>(
-            seastar::now(),
-            OpsExecuter::osd_op_ierrorator::future<>(
-              crimson::ct_error::enospc::make()));
-      } else {
-        // drop request
-        logger().info(" full, dropping request (bad client)");
-        return interruptor::make_ready_future<OpsExecuter::rep_op_fut_tuple>(
-          seastar::now(),
-          OpsExecuter::osd_op_ierrorator::future<>(
-            crimson::ct_error::eagain::make()));
-      }
-    }
-    return std::move(*ox).flush_changes_n_do_ops_effects(
-      ops,
-      snap_mapper,
-      osdriver,
-      [this] (auto&& txn,
-              auto&& obc,
-              auto&& osd_op_p,
-              auto&& log_entries) {
-	logger().debug(
-	  "do_osd_ops_execute: object {} submitting txn",
-	  obc->get_oid());
-        mutate_object(obc, txn, osd_op_p);
-	return submit_transaction(
-          std::move(obc),
-          std::move(txn),
-          std::move(osd_op_p),
-          std::move(log_entries));
-    });
-  }).safe_then_unpack_interruptible(
-    [success_func=std::move(success_func), rollbacker, this, failure_func_ptr, obc]
-    (auto submitted_fut, auto _all_completed_fut) mutable {
-
-    auto all_completed_fut = _all_completed_fut.safe_then_interruptible_tuple(
-      std::move(success_func),
-      crimson::ct_error::object_corrupted::handle(
-      [rollbacker, this, obc] (const std::error_code& e) mutable {
-      // this is a path for EIO. it's special because we want to fix the obejct
-      // and try again. that is, the layer above `PG::do_osd_ops` is supposed to
-      // restart the execution.
-      rollbacker.rollback_obc_if_modified(e);
-      return repair_object(obc->obs.oi.soid,
-                           obc->obs.oi.version
-      ).then_interruptible([] {
-        return do_osd_ops_iertr::future<Ret>{crimson::ct_error::eagain::make()};
-      });
-    }), OpsExecuter::osd_op_errorator::all_same_way(
-        [rollbacker, failure_func_ptr]
-        (const std::error_code& e) mutable {
-          // handle non-fatal errors only
-          ceph_assert(e.value() == EDQUOT ||
-                      e.value() == ENOSPC ||
-                      e.value() == EAGAIN);
-          rollbacker.rollback_obc_if_modified(e);
-          return (*failure_func_ptr)(e);
-    }));
-
-    return PG::do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<Ret>>(
-      std::move(submitted_fut),
-      std::move(all_completed_fut)
-    );
-  }, OpsExecuter::osd_op_errorator::all_same_way(
-    [this, op_info, m, obc,
-     rollbacker, failure_func_ptr]
-    (const std::error_code& e) mutable {
-    ceph_tid_t rep_tid = shard_services.get_tid();
-    rollbacker.rollback_obc_if_modified(e);
-    // record error log
-    auto maybe_submit_error_log =
-      interruptor::make_ready_future<std::optional<eversion_t>>(std::nullopt);
-    // call submit_error_log only for non-internal clients
-    if constexpr (!std::is_same_v<Ret, void>) {
-      if(op_info.may_write()) {
-        maybe_submit_error_log =
-          submit_error_log(m, op_info, obc, e, rep_tid);
-      }
-    }
-    return maybe_submit_error_log.then_interruptible(
-    [this, failure_func_ptr, e, rep_tid] (auto version) {
-      auto all_completed =
-      [this, failure_func_ptr, e, rep_tid,  version] {
-        if (version.has_value()) {
-          return complete_error_log(rep_tid, version.value()
-          ).then_interruptible([failure_func_ptr, e] {
-            return (*failure_func_ptr)(e);
-          });
-        } else {
-          return (*failure_func_ptr)(e);
-        }
-      };
-      return PG::do_osd_ops_iertr::make_ready_future<pg_rep_op_fut_t<Ret>>(
-        std::move(seastar::now()),
-        std::move(all_completed())
-      );
-    });
-  }));
-}
-
 PG::interruptible_future<> PG::complete_error_log(const ceph_tid_t& rep_tid,
                                          const eversion_t& version)
 {
@@ -1146,7 +1007,7 @@ PG::interruptible_future<> PG::complete_error_log(const ceph_tid_t& rep_tid,
   return result;
 }
 
-PG::interruptible_future<std::optional<eversion_t>> PG::submit_error_log(
+PG::interruptible_future<eversion_t> PG::submit_error_log(
   Ref<MOSDOp> m,
   const OpInfo &op_info,
   ObjectContextRef obc,
@@ -1212,142 +1073,84 @@ PG::interruptible_future<std::optional<eversion_t>> PG::submit_error_log(
         get_collection_ref(), std::move(t)
       ).then([this] {
         peering_state.update_trim_to();
-        return seastar::make_ready_future<std::optional<eversion_t>>(projected_last_update);
+        return seastar::make_ready_future<eversion_t>(projected_last_update);
       });
     });
   });
 }
 
-PG::do_osd_ops_iertr::future<PG::pg_rep_op_fut_t<MURef<MOSDOpReply>>>
-PG::do_osd_ops(
-  Ref<MOSDOp> m,
-  crimson::net::ConnectionXcoreRef conn,
+PG::run_executer_fut PG::run_executer(
+  OpsExecuter &ox,
   ObjectContextRef obc,
   const OpInfo &op_info,
-  const SnapContext& snapc)
+  std::vector<OSDOp>& ops)
 {
-  if (__builtin_expect(stopping, false)) {
-    throw crimson::common::system_shutdown_exception();
-  }
-  return do_osd_ops_execute<MURef<MOSDOpReply>>(
-    seastar::make_lw_shared<OpsExecuter>(
-      Ref<PG>{this}, obc, op_info, *m, conn, snapc),
-    obc,
-    op_info,
-    m,
-    m->ops,
-    // success_func
-    [this, m, obc, may_write = op_info.may_write(),
-     may_read = op_info.may_read(), rvec = op_info.allows_returnvec()] {
-      // TODO: should stop at the first op which returns a negative retval,
-      //       cmpext uses it for returning the index of first unmatched byte
-      int result = m->ops.empty() ? 0 : m->ops.back().rval.code;
-      if (may_read && result >= 0) {
-        for (auto &osdop : m->ops) {
-          if (osdop.rval < 0 && !(osdop.op.flags & CEPH_OSD_OP_FLAG_FAILOK)) {
-            result = osdop.rval.code;
-            break;
-          }
-        }
-      } else if (result > 0 && may_write && !rvec) {
-        result = 0;
-      } else if (result < 0 && (m->ops.empty() ?
-        0 : m->ops.back().op.flags & CEPH_OSD_OP_FLAG_FAILOK)) {
-        result = 0;
-      }
-      auto reply = crimson::make_message<MOSDOpReply>(m.get(),
-                                             result,
-                                             get_osdmap_epoch(),
-                                             0,
-                                             false);
-      reply->add_flags(CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK);
-      logger().debug(
-        "do_osd_ops: {} - object {} sending reply",
-        *m,
-        m->get_hobj());
-      if (obc->obs.exists) {
-        reply->set_reply_versions(peering_state.get_info().last_update,
-          obc->obs.oi.user_version);
-      } else {
-        reply->set_reply_versions(peering_state.get_info().last_update,
-          peering_state.get_info().last_user_version);
-      }
-      return do_osd_ops_iertr::make_ready_future<MURef<MOSDOpReply>>(
-        std::move(reply));
-    },
-    // failure_func
-    [m, this]
-    (const std::error_code& e) {
-    logger().error("do_osd_ops_execute::failure_func {} got error: {}",
-                    *m, e);
-    return log_reply(m, e);
+  LOG_PREFIX(PG::run_executer);
+  auto rollbacker = ox.create_rollbacker(
+    [stored_obc=duplicate_obc(obc)](auto &obc) mutable {
+      obc->update_from(*stored_obc);
+    });
+  auto rollback_on_error = seastar::defer([&rollbacker] {
+    rollbacker.rollback_obc_if_modified();
   });
-}
 
-PG::do_osd_ops_iertr::future<MURef<MOSDOpReply>>
-PG::log_reply(
-  Ref<MOSDOp> m,
-  const std::error_code& e)
-{
-  auto reply = crimson::make_message<MOSDOpReply>(
-    m.get(), -e.value(), get_osdmap_epoch(), 0, false);
-  if (m->ops.empty() ? 0 :
-    m->ops.back().op.flags & CEPH_OSD_OP_FLAG_FAILOK) {
-      reply->set_result(0);
-    }
-  // For all ops except for CMPEXT, the correct error value is encoded
-  // in e.value(). For CMPEXT, osdop.rval has the actual error value.
-  if (e.value() == ct_error::cmp_fail_error_value) {
-    assert(!m->ops.empty());
-    for (auto &osdop : m->ops) {
-      if (osdop.rval < 0) {
-        reply->set_result(osdop.rval);
-        break;
+  for (auto &op: ops) {
+    DEBUGDPP("object {} handle op {}", *this, ox.get_target(), op);
+    co_await ox.execute_op(op);
+  }
+  DEBUGDPP("object {} all operations successful", *this, ox.get_target());
+
+  // check for full
+  if ((ox.delta_stats.num_bytes > 0 ||
+       ox.delta_stats.num_objects > 0) &&
+      get_pgpool().info.has_flag(pg_pool_t::FLAG_FULL)) {
+    const auto& m = ox.get_message();
+    if (m.get_reqid().name.is_mds() ||   // FIXME: ignore MDS for now
+	m.has_flag(CEPH_OSD_FLAG_FULL_FORCE)) {
+      INFODPP("full, but proceeding due to FULL_FORCE, or MDS", *this);
+    } else if (m.has_flag(CEPH_OSD_FLAG_FULL_TRY)) {
+      // they tried, they failed.
+      INFODPP("full, replying to FULL_TRY op", *this);
+      if (get_pgpool().info.has_flag(pg_pool_t::FLAG_FULL_QUOTA)) {
+	co_await run_executer_fut(
+	  crimson::ct_error::edquot::make());
+      } else {
+	co_await run_executer_fut(
+	  crimson::ct_error::enospc::make());
       }
+    } else {
+      // drop request
+      INFODPP("full, dropping request (bad client)", *this);
+      co_await run_executer_fut(
+	crimson::ct_error::eagain::make());
     }
   }
-  reply->set_enoent_reply_versions(
-    peering_state.get_info().last_update,
-    peering_state.get_info().last_user_version);
-  reply->add_flags(CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK);
-  return do_osd_ops_iertr::make_ready_future<MURef<MOSDOpReply>>(
-    std::move(reply));
-}
-
-PG::do_osd_ops_iertr::future<PG::pg_rep_op_fut_t<>>
-PG::do_osd_ops(
-  ObjectContextRef obc,
-  std::vector<OSDOp>& ops,
-  const OpInfo &op_info,
-  const do_osd_ops_params_t &&msg_params)
-{
-  // This overload is generally used for internal client requests,
-  // use an empty SnapContext.
-  return seastar::do_with(
-    std::move(msg_params),
-    [=, this, &ops, &op_info](auto &msg_params) {
-    return do_osd_ops_execute<void>(
-      seastar::make_lw_shared<OpsExecuter>(
-        Ref<PG>{this},
-        obc,
-        op_info,
-        msg_params,
-        msg_params.get_connection(),
-        SnapContext{}
-      ),
-      obc,
-      op_info,
-      Ref<MOSDOp>(),
-      ops,
-      // success_func
-      [] {
-        return do_osd_ops_iertr::now();
-      },
-      // failure_func
-      [] (const std::error_code& e) {
-        return do_osd_ops_iertr::now();
-      });
-  });
+  rollback_on_error.cancel();
+}
+
+PG::submit_executer_fut PG::submit_executer(
+  OpsExecuter &&ox,
+  const std::vector<OSDOp>& ops) {
+  LOG_PREFIX(PG::submit_executer);
+  // transaction must commit at this point
+  return std::move(
+    ox
+  ).flush_changes_n_do_ops_effects(
+    ops,
+    snap_mapper,
+    osdriver,
+    [FNAME, this](auto&& txn,
+		  auto&& obc,
+		  auto&& osd_op_p,
+		  auto&& log_entries) {
+      DEBUGDPP("object {} submitting txn", *this, obc->get_oid());
+      mutate_object(obc, txn, osd_op_p);
+      return submit_transaction(
+	std::move(obc),
+	std::move(txn),
+	std::move(osd_op_p),
+	std::move(log_entries));
+    });
 }
 
 PG::interruptible_future<MURef<MOSDOpReply>> PG::do_pg_ops(Ref<MOSDOp> m)
@@ -1885,4 +1688,19 @@ void PG::cancel_pglog_based_recovery_op() {
   pglog_based_recovery_op->cancel();
   reset_pglog_based_recovery_op();
 }
+
+void PG::C_PG_FinishRecovery::finish(int r) {
+  LOG_PREFIX(PG::C_PG_FinishRecovery::finish);
+  auto &peering_state = pg.get_peering_state();
+  if (peering_state.is_deleting() || !peering_state.is_clean()) {
+    DEBUGDPP("raced with delete or repair", pg);
+    return;
+  }
+  if (this == pg.recovery_finisher) {
+    peering_state.purge_strays();
+    pg.recovery_finisher = nullptr;
+  } else {
+    DEBUGDPP("stale recovery finsher", pg);
+  }
+}
 }
diff --git a/src/crimson/osd/pg.h b/src/crimson/osd/pg.h
index 93279a18c565b..604f49005ff04 100644
--- a/src/crimson/osd/pg.h
+++ b/src/crimson/osd/pg.h
@@ -375,7 +375,7 @@ class PG : public boost::intrusive_ref_counter<
   }
   void check_blocklisted_watchers() final;
   void clear_primary_state() final {
-    // Not needed yet
+    recovery_finisher = nullptr;
   }
 
   void queue_check_readable(epoch_t last_peering_reset,
@@ -394,7 +394,7 @@ class PG : public boost::intrusive_ref_counter<
   void on_replica_activate() final;
   void on_activate_complete() final;
   void on_new_interval() final {
-    // Not needed yet
+    recovery_finisher = nullptr;
   }
   Context *on_clean() final;
   void on_activate_committed() final {
@@ -621,7 +621,7 @@ class PG : public boost::intrusive_ref_counter<
   void dump_primary(Formatter*);
   interruptible_future<> complete_error_log(const ceph_tid_t& rep_tid,
                                        const eversion_t& version);
-  interruptible_future<std::optional<eversion_t>> submit_error_log(
+  interruptible_future<eversion_t> submit_error_log(
     Ref<MOSDOp> m,
     const OpInfo &op_info,
     ObjectContextRef obc,
@@ -645,41 +645,35 @@ class PG : public boost::intrusive_ref_counter<
     }
   } background_process_lock;
 
-  using do_osd_ops_ertr = crimson::errorator<
-   crimson::ct_error::eagain>;
-  using do_osd_ops_iertr =
-    ::crimson::interruptible::interruptible_errorator<
-      ::crimson::osd::IOInterruptCondition,
-      ::crimson::errorator<crimson::ct_error::eagain>>;
-  template <typename Ret = void>
-  using pg_rep_op_fut_t =
-    std::tuple<interruptible_future<>,
-               do_osd_ops_iertr::future<Ret>>;
-  do_osd_ops_iertr::future<pg_rep_op_fut_t<MURef<MOSDOpReply>>> do_osd_ops(
-    Ref<MOSDOp> m,
-    crimson::net::ConnectionXcoreRef conn,
+  using run_executer_ertr = crimson::compound_errorator_t<
+    OpsExecuter::osd_op_errorator,
+    crimson::errorator<
+      crimson::ct_error::edquot,
+      crimson::ct_error::eagain,
+      crimson::ct_error::enospc
+      >
+    >;
+  using run_executer_iertr = crimson::interruptible::interruptible_errorator<
+    ::crimson::osd::IOInterruptCondition,
+    run_executer_ertr>;
+  using run_executer_fut = run_executer_iertr::future<>;
+  run_executer_fut run_executer(
+    OpsExecuter &ox,
     ObjectContextRef obc,
     const OpInfo &op_info,
-    const SnapContext& snapc);
+    std::vector<OSDOp>& ops);
+
+  using submit_executer_ret = std::tuple<
+    interruptible_future<>,
+    interruptible_future<>>;
+  using submit_executer_fut = interruptible_future<
+    submit_executer_ret>;
+  submit_executer_fut submit_executer(
+    OpsExecuter &&ox,
+    const std::vector<OSDOp>& ops);
 
   struct do_osd_ops_params_t;
-  do_osd_ops_iertr::future<MURef<MOSDOpReply>> log_reply(
-    Ref<MOSDOp> m,
-    const std::error_code& e);
-  do_osd_ops_iertr::future<pg_rep_op_fut_t<>> do_osd_ops(
-    ObjectContextRef obc,
-    std::vector<OSDOp>& ops,
-    const OpInfo &op_info,
-    const do_osd_ops_params_t &&params);
-  template <class Ret, class SuccessFunc, class FailureFunc>
-  do_osd_ops_iertr::future<pg_rep_op_fut_t<Ret>> do_osd_ops_execute(
-    seastar::lw_shared_ptr<OpsExecuter> ox,
-    ObjectContextRef obc,
-    const OpInfo &op_info,
-    Ref<MOSDOp> m,
-    std::vector<OSDOp>& ops,
-    SuccessFunc&& success_func,
-    FailureFunc&& failure_func);
+
   interruptible_future<MURef<MOSDOpReply>> do_pg_ops(Ref<MOSDOp> m);
   interruptible_future<
     std::tuple<interruptible_future<>, interruptible_future<>>>
@@ -712,9 +706,17 @@ class PG : public boost::intrusive_ref_counter<
   }
   seastar::future<> stop();
 private:
+  class C_PG_FinishRecovery : public Context {
+  public:
+    explicit C_PG_FinishRecovery(PG &pg) : pg(pg) {}
+    void finish(int r) override;
+  private:
+    PG& pg;
+  };
   std::unique_ptr<PGBackend> backend;
   std::unique_ptr<RecoveryBackend> recovery_backend;
   std::unique_ptr<PGRecovery> recovery_handler;
+  C_PG_FinishRecovery *recovery_finisher;
 
   PeeringState peering_state;
   eversion_t projected_last_update;
diff --git a/src/crimson/osd/pg_backend.cc b/src/crimson/osd/pg_backend.cc
index fa8201b61c28d..24a381b4cf7e2 100644
--- a/src/crimson/osd/pg_backend.cc
+++ b/src/crimson/osd/pg_backend.cc
@@ -1289,7 +1289,7 @@ void PGBackend::clone(
   const ObjectState& d_os,
   ceph::os::Transaction& txn)
 {
-  // See OpsExecutor::execute_clone documentation
+  // See OpsExecuter::execute_clone documentation
   txn.clone(coll->get_cid(), ghobject_t{os.oi.soid}, ghobject_t{d_os.oi.soid});
   {
     ceph::bufferlist bv;
diff --git a/src/crimson/osd/pg_recovery.cc b/src/crimson/osd/pg_recovery.cc
index 4f874d526b3c6..ec3af0d2b0006 100644
--- a/src/crimson/osd/pg_recovery.cc
+++ b/src/crimson/osd/pg_recovery.cc
@@ -528,10 +528,12 @@ void PGRecovery::request_primary_scan(
 
 void PGRecovery::enqueue_push(
   const hobject_t& obj,
-  const eversion_t& v)
+  const eversion_t& v,
+  const std::vector<pg_shard_t> &peers)
 {
-  logger().info("{}: obj={} v={}",
-                 __func__, obj, v);
+  logger().info("{}: obj={} v={} peers={}", __func__, obj, v, peers);
+  auto &peering_state = pg->get_peering_state();
+  peering_state.prepare_backfill_for_missing(obj, v, peers);
   auto [recovering, added] = pg->get_recovery_backend()->add_recovering(obj);
   if (!added)
     return;
diff --git a/src/crimson/osd/pg_recovery.h b/src/crimson/osd/pg_recovery.h
index 6cd29c3dc5226..705b3176b9790 100644
--- a/src/crimson/osd/pg_recovery.h
+++ b/src/crimson/osd/pg_recovery.h
@@ -110,7 +110,8 @@ class PGRecovery : public crimson::osd::BackfillState::BackfillListener {
     const hobject_t& begin) final;
   void enqueue_push(
     const hobject_t& obj,
-    const eversion_t& v) final;
+    const eversion_t& v,
+    const std::vector<pg_shard_t> &peers) final;
   void enqueue_drop(
     const pg_shard_t& target,
     const hobject_t& obj,
diff --git a/src/crimson/osd/shard_services.cc b/src/crimson/osd/shard_services.cc
index 5f7c4a624471b..a053d9d5044c5 100644
--- a/src/crimson/osd/shard_services.cc
+++ b/src/crimson/osd/shard_services.cc
@@ -767,20 +767,26 @@ seastar::future<> ShardServices::dispatch_context_transaction(
   LOG_PREFIX(OSDSingletonState::dispatch_context_transaction);
   if (ctx.transaction.empty()) {
     DEBUG("empty transaction");
-    return seastar::now();
+    co_await get_store().flush(col);
+    Context* on_commit(
+      ceph::os::Transaction::collect_all_contexts(ctx.transaction));
+    if (on_commit) {
+      on_commit->complete(0);
+    }
+    co_return;
   }
 
   DEBUG("do_transaction ...");
-  auto ret = get_store().do_transaction(
+  co_await get_store().do_transaction(
     col,
     ctx.transaction.claim_and_reset());
-  return ret;
+  co_return;
 }
 
 seastar::future<> ShardServices::dispatch_context_messages(
   BufferedRecoveryMessages &&ctx)
 {
-  LOG_PREFIX(OSDSingletonState::dispatch_context_transaction);
+  LOG_PREFIX(OSDSingletonState::dispatch_context_messages);
   auto ret = seastar::parallel_for_each(std::move(ctx.message_map),
     [FNAME, this](auto& osd_messages) {
       auto& [peer, messages] = osd_messages;
diff --git a/src/exporter/DaemonMetricCollector.cc b/src/exporter/DaemonMetricCollector.cc
index d4930ea35c0d2..4b8a8131bcfd3 100644
--- a/src/exporter/DaemonMetricCollector.cc
+++ b/src/exporter/DaemonMetricCollector.cc
@@ -168,10 +168,17 @@ void DaemonMetricCollector::dump_asok_metrics(bool sort_metrics, int64_t counter
     if (sockClientsPing) {
       bool ok;
       sock_client.ping(&ok);
+      std::string ceph_daemon_socket_up_desc(
+      "Reports the health status of a Ceph daemon, as determined by whether it is able to respond via its admin socket (1 = healthy, 0 = unhealthy).");
+      labels_t ceph_daemon_socket_up_labels;
+      ceph_daemon_socket_up_labels["hostname"] = quote(ceph_get_hostname());
+      ceph_daemon_socket_up_labels["ceph_daemon"] = quote(daemon_name);
+      add_metric(builder, static_cast<int>(ok), "ceph_daemon_socket_up", ceph_daemon_socket_up_desc,
+             "gauge", ceph_daemon_socket_up_labels);
       if (!ok) {
         failures++;
         continue;
-      } 
+      }
     }
     std::string counter_dump_response = dump_response.size() > 0 ? dump_response :
       asok_request(sock_client, "counter dump", daemon_name);
diff --git a/src/exporter/DaemonMetricCollector.h b/src/exporter/DaemonMetricCollector.h
index d2e929b4d670f..3302e95df916c 100644
--- a/src/exporter/DaemonMetricCollector.h
+++ b/src/exporter/DaemonMetricCollector.h
@@ -42,11 +42,11 @@ class DaemonMetricCollector {
   std::map<std::string, AdminSocketClient> clients;
   std::string metrics;
   std::pair<labels_t, std::string> add_fixed_name_metrics(std::string metric_name);
+  void update_sockets();
 
 private:
   std::mutex metrics_mutex;
   std::unique_ptr<MetricsBuilder> builder;
-  void update_sockets();
   void request_loop(boost::asio::steady_timer &timer);
 
   void dump_asok_metric(boost::json::object perf_info,
diff --git a/src/log/Entry.h b/src/log/Entry.h
index 3677c8eb95180..db39eca0ef3ba 100644
--- a/src/log/Entry.h
+++ b/src/log/Entry.h
@@ -4,9 +4,12 @@
 #ifndef __CEPH_LOG_ENTRY_H
 #define __CEPH_LOG_ENTRY_H
 
+#include "include/compat.h"
+
 #include "log/LogClock.h"
 
 #include "common/StackStringStream.h"
+#include "common/Thread.h"
 
 #include "boost/container/small_vector.hpp"
 
@@ -14,6 +17,7 @@
 
 #include <string_view>
 
+
 namespace ceph {
 namespace logging {
 
@@ -27,7 +31,10 @@ class Entry {
     m_thread(pthread_self()),
     m_prio(pr),
     m_subsys(sub)
-  {}
+  {
+    strncpy(m_thread_name, Thread::get_thread_name().data(), 16);
+    m_thread_name[15] = '\0';
+  }
   Entry(const Entry &) = default;
   Entry& operator=(const Entry &) = default;
   Entry(Entry &&e) = default;
@@ -40,6 +47,7 @@ class Entry {
   time m_stamp;
   pthread_t m_thread;
   short m_prio, m_subsys;
+  char m_thread_name[16];
 
   static log_clock& clock() {
     static log_clock clock;
diff --git a/src/log/Log.cc b/src/log/Log.cc
index 69f6df82ecbb7..49dd03c06c096 100644
--- a/src/log/Log.cc
+++ b/src/log/Log.cc
@@ -493,13 +493,13 @@ void Log::dump_recent()
   _flush(m_flush, false);
 
   _log_message("--- begin dump of recent events ---", true);
-  std::set<pthread_t> recent_pthread_ids;
+  std::set<std::pair<pthread_t, const char *>> recent_pthread_ids;
   {
     EntryVector t;
     t.insert(t.end(), std::make_move_iterator(m_recent.begin()), std::make_move_iterator(m_recent.end()));
     m_recent.clear();
     for (const auto& e : t) {
-      recent_pthread_ids.emplace(e.m_thread);
+      recent_pthread_ids.emplace(std::make_pair(e.m_thread, e.m_thread_name));
     }
     _flush(t, true);
   }
@@ -515,14 +515,11 @@ void Log::dump_recent()
 			   m_stderr_log, m_stderr_crash), true);
 
   _log_message("--- pthread ID / name mapping for recent threads ---", true);
-  for (const auto pthread_id : recent_pthread_ids)
+  for (auto& [pthread_id, pthread_name] : recent_pthread_ids)
   {
-    char pthread_name[16] = {0}; //limited by 16B include terminating null byte.
-    ceph_pthread_getname(pthread_id, pthread_name, sizeof(pthread_name));
     // we want the ID to be printed in the same format as we use for a log entry.
     // The reason is easier grepping.
-    _log_message(fmt::format("  {:x} / {}",
-			     tid_to_int(pthread_id), pthread_name), true);
+    _log_message(fmt::format("  {:x} / {}", tid_to_int(pthread_id), pthread_name), true);
   }
 
   _log_message(fmt::format("  max_recent {:9}", m_recent.capacity()), true);
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index 0e9b6996ad2c5..dfad411d323d8 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -4589,8 +4589,11 @@ void InodeStoreBase::dump(Formatter *f) const
     for (const auto& [key, val] : *xattrs) {
       f->open_object_section("xattr");
       f->dump_string("key", key);
-      std::string v(val.c_str(), val.length());
-      f->dump_string("val", v);
+      if (val.length()) {
+        f->dump_string("val", std::string(val.c_str(), val.length()));
+      } else {
+        f->dump_string("val", "");
+      }
       f->close_section();
     }
   }
diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc
index 62d37574ded67..b935ace4affba 100644
--- a/src/mon/FSCommands.cc
+++ b/src/mon/FSCommands.cc
@@ -385,6 +385,17 @@ class SetHandler : public FileSystemCommandHandler
       return -EINVAL;
     }
 
+  bool confirm = false;
+  cmd_getval(cmdmap, "yes_i_really_mean_it", confirm);
+  if (var == "max_mds" && !confirm && mon->mdsmon()->has_any_health_warning()) {
+    ss << "One or more file system health warnings are present. Modifying "
+       << "the file system setting variable \"max_mds\" may not help "
+       << "troubleshoot or recover from these warnings and may further "
+       << "destabilize the system. If you really wish to proceed, run "
+       << "again with --yes-i-really-mean-it";
+    return -EPERM;
+  }
+
     return set_val(mon, fsmap, op, cmdmap, ss, fsp->get_fscid(), var, val);
   }
 };
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc
index 76a57ac443de7..d8cca4ceb61b1 100644
--- a/src/mon/MDSMonitor.cc
+++ b/src/mon/MDSMonitor.cc
@@ -1557,6 +1557,13 @@ bool MDSMonitor::has_health_warnings(vector<mds_metric_t> warnings)
   return false;
 }
 
+bool MDSMonitor::has_any_health_warning()
+{
+  return std::any_of(
+    pending_daemon_health.begin(), pending_daemon_health.end(),
+    [](auto& it) { return !it.second.metrics.empty() ? true : false; });
+}
+
 int MDSMonitor::filesystem_command(
     FSMap &fsmap,
     MonOpRequestRef op,
diff --git a/src/mon/MDSMonitor.h b/src/mon/MDSMonitor.h
index b0f88cd31302d..dd2a269009de2 100644
--- a/src/mon/MDSMonitor.h
+++ b/src/mon/MDSMonitor.h
@@ -53,6 +53,7 @@ class MDSMonitor : public PaxosService, public PaxosFSMap, protected CommandHand
   bool prepare_update(MonOpRequestRef op) override;
   bool should_propose(double& delay) override;
   bool has_health_warnings(std::vector<mds_metric_t> warnings);
+  bool has_any_health_warning();
 
   bool should_print_status() const {
     auto& fs = get_fsmap();
diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc
index 7b1bc9b8e56cf..c01ea9e710321 100755
--- a/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@ -254,7 +254,7 @@ void NVMeofGwMap::track_deleting_gws(const NvmeGroupKey& group_key,
   }
 }
 
-int NVMeofGwMap::process_gw_map_gw_no_subsystems(
+int NVMeofGwMap::process_gw_map_gw_no_subsys_no_listeners(
   const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending)
 {
   int rc = 0;
@@ -424,7 +424,6 @@ void NVMeofGwMap::find_failback_gw(
   auto& gws_states = created_gws[group_key];
   auto& gw_state = created_gws[group_key][gw_id];
   bool do_failback = false;
-
   dout(10) << "Find failback GW for GW " << gw_id << dendl;
   for (auto& gw_state_it: gws_states) {
     auto& st = gw_state_it.second;
diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h
index 2971037174218..267d85b10f918 100755
--- a/src/mon/NVMeofGwMap.h
+++ b/src/mon/NVMeofGwMap.h
@@ -54,7 +54,7 @@ class NVMeofGwMap
   int process_gw_map_gw_down(
     const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
     bool &propose_pending);
-  int process_gw_map_gw_no_subsystems(
+  int process_gw_map_gw_no_subsys_no_listeners(
     const NvmeGwId &gw_id, const NvmeGroupKey& group_key,
     bool &propose_pending);
   void update_active_timers(bool &propose_pending);
diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc
index 734e90defd946..d9e936e27df34 100644
--- a/src/mon/NVMeofGwMon.cc
+++ b/src/mon/NVMeofGwMon.cc
@@ -367,6 +367,13 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op)
 	std::stringstream  sstrm1;
 	sstrm1 << state.availability;
 	f->dump_string("Availability", sstrm1.str());
+	uint32_t num_listeners = 0;
+	if (state.availability == gw_availability_t::GW_AVAILABLE) {
+	  for (auto &subs: state.subsystems) {
+	    num_listeners += subs.listeners.size();
+	  }
+	  f->dump_unsigned("num-listeners", num_listeners);
+	}
 	sstrm1.str("");
 	for (auto &state_itr: map.created_gws[group_key][gw_id].sm_state) {
 	  sstrm1 << " " << state_itr.first + 1 << ": "
@@ -476,7 +483,7 @@ void NVMeofGwMon::process_gw_down(const NvmeGwId &gw_id,
     if (avail == gw_availability_t::GW_UNAVAILABLE) {
       pending_map.process_gw_map_gw_down(gw_id, group_key, propose_pending);
     } else {
-      pending_map.process_gw_map_gw_no_subsystems(gw_id, group_key, propose_pending);
+      pending_map.process_gw_map_gw_no_subsys_no_listeners(gw_id, group_key, propose_pending);
     }
 
   }
@@ -600,7 +607,18 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
 
   if (sub.size() == 0) {
     avail = gw_availability_t::GW_CREATED;
-  }
+  } else {
+    bool listener_found = false;
+    for (auto &subs: sub) {
+      if (subs.listeners.size()) {
+        listener_found = true;
+        break;
+      }
+    }
+    if (!listener_found) {
+     avail = gw_availability_t::GW_CREATED;
+    }
+  }// for HA no-subsystems and no-listeners are same usecases
   if (pending_map.created_gws[group_key][gw_id].subsystems != sub) {
     dout(10) << "subsystems of GW changed, propose pending " << gw_id << dendl;
     pending_map.created_gws[group_key][gw_id].subsystems =  sub;
diff --git a/src/mrgw.sh b/src/mrgw.sh
index 05739bf015ebc..86bef336867de 100755
--- a/src/mrgw.sh
+++ b/src/mrgw.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+# Start/restart a radosgw instance on the given mstart.sh cluster.
+
 set -e
 
 rgw_frontend=${RGW_FRONTEND:-"beast"}
diff --git a/src/mrun b/src/mrun
index a85221800218b..df7e3542b93a5 100755
--- a/src/mrun
+++ b/src/mrun
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+# Run a ceph command against the given mstart.sh cluster.
+
 [ $# -lt 2 ] && echo "usage: $0 <name> <command> [params...]" && exit 1
 
 root=`dirname $0`
diff --git a/src/mstart.sh b/src/mstart.sh
index 34b57e1761125..0c512ca9eb8c3 100755
--- a/src/mstart.sh
+++ b/src/mstart.sh
@@ -1,5 +1,33 @@
 #!/bin/sh
 
+# Deploy a vstart.sh cluster in a named subdirectory. This makes it possible to
+# start multiple clusters in different subdirectories. See mstop.sh for cleanup.
+#
+# Example:
+#
+# ~/ceph/build $ MON=1 OSD=1 RGW=1 MDS=0 MGR=0 ../src/mstart.sh c1 -n -d
+# ~/ceph/build $ MON=1 OSD=1 RGW=1 MDS=0 MGR=0 ../src/mstart.sh c2 -n -d
+#
+# ~/ceph/build $ ls run
+# c1  c2
+# ~/ceph/build $ ls run/c1
+# asok  ceph.conf  dev  keyring  out
+#
+# ~/ceph/build $ ../src/mrun c1 radosgw-admin user list
+# [
+#     "56789abcdef0123456789abcdef0123456789abcdef0123456789abcdef01234",
+#     "testx$9876543210abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+#     "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+#     "testacct1user",
+#     "test",
+#     "testacct2root",
+#     "testacct1root",
+#     "testid"
+# ]
+#
+# ~/ceph/build $ ../src/mstop.sh c1
+# ~/ceph/build $ ../src/mstop.sh c2
+
 usage="usage: $0 <name> [vstart options]..\n"
 
 usage_exit() {
diff --git a/src/mstop.sh b/src/mstop.sh
index 702d1765941e5..eec0ca02e42ae 100755
--- a/src/mstop.sh
+++ b/src/mstop.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env bash
 
+# Stop a named cluster started by mstart.sh
+
 set -e
 
 script_root=`dirname $0`
diff --git a/src/mypy-constrains.txt b/src/mypy-constrains.txt
index 7810870804ed2..0a79b8ef4f11d 100644
--- a/src/mypy-constrains.txt
+++ b/src/mypy-constrains.txt
@@ -2,7 +2,7 @@
 # Unfortunately this means we have to manually update those 
 # packages regularly. 
 
-mypy==1.1.1
+mypy==1.9
 
 # global
 types-python-dateutil==0.1.3
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc
index 3dcd96830c48d..5f4f1a4d48ac2 100644
--- a/src/os/bluestore/BlueFS.cc
+++ b/src/os/bluestore/BlueFS.cc
@@ -3760,15 +3760,16 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
 {
   auto t0 = mono_clock::now();
   std::lock_guard hl(h->lock);
+  auto& fnode = h->file->fnode;
   dout(10) << __func__ << " 0x" << std::hex << offset << std::dec
-           << " file " << h->file->fnode << dendl;
+           << " file " << fnode << dendl;
   if (h->file->deleted) {
     dout(10) << __func__ << "  deleted, no-op" << dendl;
     return 0;
   }
 
   // we never truncate internal log files
-  ceph_assert(h->file->fnode.ino > 1);
+  ceph_assert(fnode.ino > 1);
 
   // truncate off unflushed data?
   if (h->pos < offset &&
@@ -3782,20 +3783,58 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
     if (r < 0)
       return r;
   }
-  if (offset == h->file->fnode.size) {
-    return 0;  // no-op!
-  }
-  if (offset > h->file->fnode.size) {
+  if (offset > fnode.size) {
     ceph_abort_msg("truncate up not supported");
   }
-  ceph_assert(h->file->fnode.size >= offset);
+  ceph_assert(offset <= fnode.size);
   _flush_bdev(h);
-
-  std::lock_guard ll(log.lock);
-  vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size - offset);
-  h->file->fnode.size = offset;
-  h->file->is_dirty = true;
-  log.t.op_file_update_inc(h->file->fnode);
+  {
+    std::lock_guard ll(log.lock);
+    std::lock_guard dl(dirty.lock);
+    bool changed_extents = false;
+    vselector->sub_usage(h->file->vselector_hint, fnode);
+    uint64_t x_off = 0;
+    auto p = fnode.seek(offset, &x_off);
+    uint64_t cut_off =
+      (p == fnode.extents.end()) ? 0 : p2roundup(x_off, alloc_size[p->bdev]);
+    uint64_t new_allocated;
+    if (0 == cut_off) {
+      // whole pextent to remove
+      changed_extents = true;
+      new_allocated = offset;
+    } else if (cut_off < p->length) {
+      dirty.pending_release[p->bdev].insert(p->offset + cut_off, p->length - cut_off);
+      new_allocated = (offset - x_off) + cut_off;
+      p->length = cut_off;
+      changed_extents = true;
+      ++p;
+    } else {
+      ceph_assert(cut_off >= p->length);
+      new_allocated  = (offset - x_off) + p->length;
+      // just leave it here
+      ++p;
+    }
+    while (p != fnode.extents.end()) {
+      dirty.pending_release[p->bdev].insert(p->offset, p->length);
+      p = fnode.extents.erase(p);
+      changed_extents = true;
+    }
+    if (changed_extents) {
+      fnode.size = offset;
+      fnode.allocated = new_allocated;
+      fnode.reset_delta();
+      log.t.op_file_update(fnode);
+      // sad, but is_dirty must be set to signal flushing of the log
+      h->file->is_dirty = true;
+    } else {
+      if (offset != fnode.size) {
+        fnode.size = offset;
+        //skipping log.t.op_file_update_inc, it will be done by flush()
+        h->file->is_dirty = true;
+      }
+    }
+    vselector->add_usage(h->file->vselector_hint, fnode);
+  }
   logger->tinc(l_bluefs_truncate_lat, mono_clock::now() - t0);
   return 0;
 }
diff --git a/src/os/bluestore/BlueRocksEnv.cc b/src/os/bluestore/BlueRocksEnv.cc
index 68040af428280..7cbe0a1d12146 100644
--- a/src/os/bluestore/BlueRocksEnv.cc
+++ b/src/os/bluestore/BlueRocksEnv.cc
@@ -221,18 +221,12 @@ class BlueRocksWritableFile : public rocksdb::WritableFile {
   }
 
   rocksdb::Status Close() override {
-    fs->fsync(h);
 
-    // mimic posix env, here.  shrug.
-    size_t block_size;
-    size_t last_allocated_block;
-    GetPreallocationStatus(&block_size, &last_allocated_block);
-    if (last_allocated_block > 0) {
-      int r = fs->truncate(h, h->pos);
-      if (r < 0)
-	return err_to_status(r);
+    int r = fs->truncate(h, h->pos);
+    if (r < 0) {
+      return err_to_status(r);
     }
-
+    fs->fsync(h);
     return rocksdb::Status::OK();
   }
 
diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc
index 44a171873c08c..535cf166f0a18 100644
--- a/src/os/bluestore/BlueStore.cc
+++ b/src/os/bluestore/BlueStore.cc
@@ -6794,9 +6794,8 @@ void BlueStore::_main_bdev_label_try_reserve()
   vector<uint64_t> candidate_positions;
   vector<uint64_t> accepted_positions;
   uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size);
-  for (size_t i = 1; i < bdev_label_positions.size(); i++) {
-    uint64_t location = bdev_label_positions[i];
-    if (location + lsize <= bdev->get_size()) {
+  for (uint64_t location : bdev_label_valid_locations) {
+    if (location != BDEV_FIRST_LABEL_POSITION) {
       candidate_positions.push_back(location);
     }
   }
@@ -11497,9 +11496,7 @@ int BlueStore::_fsck_on_open(BlueStore::FSCKDepth depth, bool repair)
     string p = path + "/block";
     _write_bdev_label(cct, bdev, p, bdev_label, bdev_labels_in_repair);
     for (uint64_t pos : bdev_labels_in_repair) {
-      if (pos != BDEV_FIRST_LABEL_POSITION) {
-        bdev_label_valid_locations.push_back(pos);
-      }
+      bdev_label_valid_locations.push_back(pos);
     }
     repaired += bdev_labels_in_repair.size();
   }
@@ -20572,6 +20569,14 @@ int BlueStore::read_allocation_from_drive_for_bluestore_tool()
     if (ret < 0) {
       return ret;
     }
+    if (bdev_label_multi) {
+      uint64_t lsize = std::max(BDEV_LABEL_BLOCK_SIZE, min_alloc_size);
+      for (uint64_t p : bdev_label_valid_locations) {
+	if (p != BDEV_FIRST_LABEL_POSITION) {
+	  allocator->init_rm_free(p, lsize);
+	}
+      }
+    }
 
     duration = ceph_clock_now() - start;
     stats.insert_count = 0;
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 76256df49b807..71b9b71338506 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -1137,46 +1137,10 @@ void PG::update_snap_map(
   const vector<pg_log_entry_t> &log_entries,
   ObjectStore::Transaction &t)
 {
-  for (auto i = log_entries.cbegin(); i != log_entries.cend(); ++i) {
+  for (const auto& entry : log_entries) {
     OSDriver::OSTransaction _t(osdriver.get_transaction(&t));
-    if (i->soid.snap < CEPH_MAXSNAP) {
-      if (i->is_delete()) {
-	int r = snap_mapper.remove_oid(
-	  i->soid,
-	  &_t);
-	if (r)
-	  derr << __func__ << " remove_oid " << i->soid << " failed with " << r << dendl;
-        // On removal tolerate missing key corruption
-        ceph_assert(r == 0 || r == -ENOENT);
-      } else if (i->is_update()) {
-	ceph_assert(i->snaps.length() > 0);
-	vector<snapid_t> snaps;
-	bufferlist snapbl = i->snaps;
-	auto p = snapbl.cbegin();
-	try {
-	  decode(snaps, p);
-	} catch (...) {
-	  derr << __func__ << " decode snaps failure on " << *i << dendl;
-	  snaps.clear();
-	}
-	set<snapid_t> _snaps(snaps.begin(), snaps.end());
-
-	if (i->is_clone() || i->is_promote()) {
-	  snap_mapper.add_oid(
-	    i->soid,
-	    _snaps,
-	    &_t);
-	} else if (i->is_modify()) {
-	  int r = snap_mapper.update_snaps(
-	    i->soid,
-	    _snaps,
-	    0,
-	    &_t);
-	  ceph_assert(r == 0);
-	} else {
-	  ceph_assert(i->is_clean());
-	}
-      }
+    if (entry.soid.snap < CEPH_MAXSNAP) {
+      snap_mapper.update_snap_map(entry, &_t);
     }
   }
 }
diff --git a/src/osd/osd_types_fmt.h b/src/osd/osd_types_fmt.h
index 04f4d46ee5109..100ce6e4646b3 100644
--- a/src/osd/osd_types_fmt.h
+++ b/src/osd/osd_types_fmt.h
@@ -392,4 +392,6 @@ inline std::ostream &operator<<(std::ostream &lhs, const object_stat_sum_t &sum)
 
 #if FMT_VERSION >= 90000
 template <bool TrackChanges> struct fmt::formatter<pg_missing_set<TrackChanges>> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<pool_opts_t> : fmt::ostream_formatter {};
+template <> struct fmt::formatter<store_statfs_t> : fmt::ostream_formatter {};
 #endif
diff --git a/src/osd/scrubber/ScrubStore.cc b/src/osd/scrubber/ScrubStore.cc
index a00ab2caecee6..7f28ca2d642a8 100644
--- a/src/osd/scrubber/ScrubStore.cc
+++ b/src/osd/scrubber/ScrubStore.cc
@@ -1,11 +1,13 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
 // vim: ts=8 sw=2 smarttab
 
-#include "ScrubStore.h"
+#include "./ScrubStore.h"
 #include "osd/osd_types.h"
 #include "common/scrub_types.h"
 #include "include/rados/rados_types.hpp"
 
+#include "pg_scrubber.h"
+
 using std::ostringstream;
 using std::string;
 using std::vector;
@@ -13,21 +15,9 @@ using std::vector;
 using ceph::bufferlist;
 
 namespace {
-ghobject_t make_scrub_object(const spg_t& pgid)
-{
-  ostringstream ss;
-  ss << "scrub_" << pgid;
-  return pgid.make_temp_ghobject(ss.str());
-}
-
 string first_object_key(int64_t pool)
 {
-  auto hoid = hobject_t(object_t(),
-			"",
-			0,
-			0x00000000,
-			pool,
-			"");
+  auto hoid = hobject_t(object_t(), "", CEPH_NOSNAP, 0x00000000, pool, "");
   hoid.build_hash_cache();
   return "SCRUB_OBJ_" + hoid.to_str();
 }
@@ -47,12 +37,7 @@ string to_object_key(int64_t pool, const librados::object_id_t& oid)
 
 string last_object_key(int64_t pool)
 {
-  auto hoid = hobject_t(object_t(),
-			"",
-			0,
-			0xffffffff,
-			pool,
-			"");
+  auto hoid = hobject_t(object_t(), "", CEPH_NOSNAP, 0xffffffff, pool, "");
   hoid.build_hash_cache();
   return "SCRUB_OBJ_" + hoid.to_str();
 }
@@ -60,14 +45,9 @@ string last_object_key(int64_t pool)
 string first_snap_key(int64_t pool)
 {
   // scrub object is per spg_t object, so we can misuse the hash (pg.seed) for
-  // the representing the minimal and maximum keys. and this relies on how
+  // representing the minimal and maximum keys. and this relies on how
   // hobject_t::to_str() works: hex(pool).hex(revhash).
-  auto hoid = hobject_t(object_t(),
-			"",
-			0,
-			0x00000000,
-			pool,
-			"");
+  auto hoid = hobject_t(object_t(), "", 0, 0x00000000, pool, "");
   hoid.build_hash_cache();
   return "SCRUB_SS_" + hoid.to_str();
 }
@@ -86,123 +66,447 @@ string to_snap_key(int64_t pool, const librados::object_id_t& oid)
 
 string last_snap_key(int64_t pool)
 {
-  auto hoid = hobject_t(object_t(),
-			"",
-			0,
-			0xffffffff,
-			pool,
-			"");
+  auto hoid = hobject_t(object_t(), "", 0, 0xffffffff, pool, "");
   hoid.build_hash_cache();
   return "SCRUB_SS_" + hoid.to_str();
 }
+
+}  // namespace
+
+#undef dout_context
+#define dout_context (m_scrubber.get_pg_cct())
+#define dout_subsys ceph_subsys_osd
+#undef dout_prefix
+#define dout_prefix _prefix_fn(_dout, this, __func__)
+
+template <class T>
+static std::ostream& _prefix_fn(std::ostream* _dout, T* t, std::string fn = "")
+{
+  return t->gen_prefix(*_dout, fn);
 }
 
 namespace Scrub {
 
-Store*
-Store::create(ObjectStore* store,
-	      ObjectStore::Transaction* t,
-	      const spg_t& pgid,
-	      const coll_t& coll)
+Store::Store(
+    PgScrubber& scrubber,
+    ObjectStore& osd_store,
+    ObjectStore::Transaction* t,
+    const spg_t& pgid,
+    const coll_t& coll)
+    : m_scrubber{scrubber}
+    , object_store{osd_store}
+    , coll{coll}
 {
-  ceph_assert(store);
   ceph_assert(t);
-  ghobject_t oid = make_scrub_object(pgid);
-  t->touch(coll, oid);
-  return new Store{coll, oid, store};
+
+  // shallow errors DB object
+  const auto sh_err_obj =
+      pgid.make_temp_ghobject(fmt::format("scrub_{}", pgid));
+  t->touch(coll, sh_err_obj);
+  shallow_db.emplace(
+      pgid, sh_err_obj, OSDriver{&object_store, coll, sh_err_obj});
+
+  // and the DB for deep errors
+  const auto dp_err_obj =
+      pgid.make_temp_ghobject(fmt::format("deep_scrub_{}", pgid));
+  t->touch(coll, dp_err_obj);
+  deep_db.emplace(pgid, dp_err_obj, OSDriver{&object_store, coll, dp_err_obj});
+
+  dout(20) << fmt::format(
+		  "created Scrub::Store for pg[{}], shallow: {}, deep: {}",
+		  pgid, sh_err_obj, dp_err_obj)
+	   << dendl;
 }
 
-Store::Store(const coll_t& coll, const ghobject_t& oid, ObjectStore* store)
-  : coll(coll),
-    hoid(oid),
-    driver(store, coll, hoid),
-    backend(&driver)
-{}
 
 Store::~Store()
 {
-  ceph_assert(results.empty());
+  ceph_assert(!shallow_db || shallow_db->results.empty());
+  ceph_assert(!deep_db || deep_db->results.empty());
 }
 
+
+std::ostream& Store::gen_prefix(std::ostream& out, std::string_view fn) const
+{
+  if (fn.starts_with("operator")) {
+    // it's a lambda, and __func__ is not available
+    return m_scrubber.gen_prefix(out) << "Store::";
+  } else {
+    return m_scrubber.gen_prefix(out) << "Store::" << fn << ": ";
+  }
+}
+
+
 void Store::add_error(int64_t pool, const inconsistent_obj_wrapper& e)
 {
   add_object_error(pool, e);
 }
 
+
 void Store::add_object_error(int64_t pool, const inconsistent_obj_wrapper& e)
 {
-  bufferlist bl;
-  e.encode(bl);
-  results[to_object_key(pool, e.object)] = bl;
+  using librados::obj_err_t;
+  const auto key = to_object_key(pool, e.object);
+  dout(20) << fmt::format(
+		  "{}: adding error for object {} ({}). Errors: {} ({}/{}) "
+		  "unfiltered:{}",
+		  (current_level == scrub_level_t::deep ? "deep" : "shallow"),
+		  e.object, key, obj_err_t{e.errors},
+		  obj_err_t{e.errors & obj_err_t::SHALLOW_ERRORS},
+		  obj_err_t{e.errors & obj_err_t::DEEP_ERRORS}, e)
+	   << dendl;
+
+  if (current_level == scrub_level_t::deep) {
+    // not overriding the deep errors DB during shallow scrubs
+    deep_db->results[key] = e.encode();
+  }
+
+  // only shallow errors are stored in the shallow DB
+  auto e_copy = e;
+  e_copy.errors &= librados::obj_err_t::SHALLOW_ERRORS;
+  e_copy.union_shards.errors &= librados::err_t::SHALLOW_ERRORS;
+  shallow_db->results[key] = e_copy.encode();
 }
 
+
 void Store::add_error(int64_t pool, const inconsistent_snapset_wrapper& e)
 {
   add_snap_error(pool, e);
 }
 
+
 void Store::add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e)
 {
-  bufferlist bl;
-  e.encode(bl);
-  results[to_snap_key(pool, e.object)] = bl;
+  // note: snap errors are only placed in the shallow store
+  shallow_db->results[to_snap_key(pool, e.object)] = e.encode();
 }
 
-bool Store::empty() const
+
+bool Store::is_empty() const
 {
-  return results.empty();
+  return (!shallow_db || shallow_db->results.empty()) &&
+	 (!deep_db || deep_db->results.empty());
 }
 
+
 void Store::flush(ObjectStore::Transaction* t)
 {
   if (t) {
-    OSDriver::OSTransaction txn = driver.get_transaction(t);
-    backend.set_keys(results, &txn);
+    auto txn = shallow_db->driver.get_transaction(t);
+    shallow_db->backend.set_keys(shallow_db->results, &txn);
+    txn = deep_db->driver.get_transaction(t);
+    deep_db->backend.set_keys(deep_db->results, &txn);
+  }
+
+  shallow_db->results.clear();
+  deep_db->results.clear();
+}
+
+
+void Store::clear_level_db(
+    ObjectStore::Transaction* t,
+    at_level_t& db,
+    std::string_view db_name)
+{
+  dout(20) << fmt::format("removing (omap) entries for {} error DB", db_name)
+	   << dendl;
+  // easiest way to guarantee that the object representing the DB exists
+  t->touch(coll, db.errors_hoid);
+
+  // remove all the keys in the DB
+  t->omap_clear(coll, db.errors_hoid);
+
+  // restart the 'in progress' part of the MapCacher
+  db.backend.reset();
+}
+
+
+void Store::reinit(
+    ObjectStore::Transaction* t,
+    scrub_level_t level)
+{
+  // Note: only one caller, and it creates the transaction passed to reinit().
+  // No need to assert on 't'
+  dout(20) << fmt::format(
+		  "re-initializing the Scrub::Store (for {} scrub)",
+		  (level == scrub_level_t::deep ? "deep" : "shallow"))
+	   << dendl;
+
+  current_level = level;
+
+  // always clear the known shallow errors DB (as both shallow and deep scrubs
+  // would recreate it)
+  if (shallow_db) {
+    clear_level_db(t, *shallow_db, "shallow");
+  }
+  // only a deep scrub recreates the deep errors DB
+  if (level == scrub_level_t::deep && deep_db) {
+    clear_level_db(t, *deep_db, "deep");
   }
-  results.clear();
 }
 
+
 void Store::cleanup(ObjectStore::Transaction* t)
 {
-  t->remove(coll, hoid);
+  dout(20) << "discarding error DBs" << dendl;
+  ceph_assert(t);
+  if (shallow_db)
+    t->remove(coll, shallow_db->errors_hoid);
+  if (deep_db)
+    t->remove(coll, deep_db->errors_hoid);
 }
 
-std::vector<bufferlist>
-Store::get_snap_errors(int64_t pool,
-		       const librados::object_id_t& start,
-		       uint64_t max_return) const
+
+std::vector<bufferlist> Store::get_snap_errors(
+    int64_t pool,
+    const librados::object_id_t& start,
+    uint64_t max_return) const
 {
-  const string begin = (start.name.empty() ?
-			first_snap_key(pool) : to_snap_key(pool, start));
+  vector<bufferlist> errors;
+  const string begin =
+      (start.name.empty() ? first_snap_key(pool) : to_snap_key(pool, start));
   const string end = last_snap_key(pool);
-  return get_errors(begin, end, max_return);
+
+  // the snap errors are stored only in the shallow store
+  ExpCacherPosData latest_sh = shallow_db->backend.get_1st_after_key(begin);
+
+  while (max_return-- && latest_sh.has_value() && latest_sh->last_key < end) {
+    errors.push_back(latest_sh->data);
+    latest_sh = shallow_db->backend.get_1st_after_key(latest_sh->last_key);
+  }
+
+  return errors;
 }
 
-std::vector<bufferlist>
-Store::get_object_errors(int64_t pool,
-			 const librados::object_id_t& start,
-			 uint64_t max_return) const
+
+std::vector<bufferlist> Store::get_object_errors(
+    int64_t pool,
+    const librados::object_id_t& start,
+    uint64_t max_return) const
 {
-  const string begin = (start.name.empty() ?
-			first_object_key(pool) : to_object_key(pool, start));
+  const string begin =
+      (start.name.empty() ? first_object_key(pool)
+			  : to_object_key(pool, start));
   const string end = last_object_key(pool);
+  dout(20) << fmt::format("fetching errors, from {} to {}", begin, end)
+	   << dendl;
   return get_errors(begin, end, max_return);
 }
 
-std::vector<bufferlist>
-Store::get_errors(const string& begin,
-		  const string& end,
-		  uint64_t max_return) const
+
+inline void decode(
+    librados::inconsistent_obj_t& obj,
+    ceph::buffer::list::const_iterator& bp)
 {
+  reinterpret_cast<inconsistent_obj_wrapper&>(obj).decode(bp);
+}
+
+
+inconsistent_obj_wrapper decode_wrapper(
+    hobject_t obj,
+    ceph::buffer::list::const_iterator bp)
+{
+  inconsistent_obj_wrapper iow{obj};
+  iow.decode(bp);
+  return iow;
+}
+
+
+void Store::collect_specific_store(
+    MapCacher::MapCacher<std::string, ceph::buffer::list>& backend,
+    Store::ExpCacherPosData& latest,
+    std::vector<bufferlist>& errors,
+    std::string_view end_key,
+    uint64_t max_return) const
+{
+  while (max_return-- && latest.has_value() &&
+	 latest.value().last_key < end_key) {
+    errors.push_back(latest->data);
+    latest = backend.get_1st_after_key(latest->last_key);
+  }
+}
+
+
+/*
+ * Implementation notes:
+ * - see https://github.com/ceph/ceph/commit/df3ff6dafeadb3822b35c424a890db9a14d7f60f
+ *   for why we encode the shard_info_t in the store.
+ * - to maintain known shard_info-s created during a deep scrub (but only when
+ *   needed), we use our knowledge of the level of the last scrub performed
+ *   (current_level), and the object user version as encoded in the error
+ *   structure.
+ */
+bufferlist Store::merge_encoded_error_wrappers(
+    hobject_t obj,
+    ExpCacherPosData& latest_sh,
+    ExpCacherPosData& latest_dp) const
+{
+  // decode both error wrappers
+  auto sh_wrap = decode_wrapper(obj, latest_sh->data.cbegin());
+  auto dp_wrap = decode_wrapper(obj, latest_dp->data.cbegin());
+
+  // note: the '20' level is just until we're sure the merging works as
+  // expected
+  if (g_conf()->subsys.should_gather<ceph_subsys_osd, 20>()) {
+    dout(20) << fmt::format(
+		    "merging errors {}. Deep: {:#x}-({})", sh_wrap.object,
+		    dp_wrap.errors, dp_wrap)
+	     << dendl;
+    dout(20) << fmt::format(
+		    "merging errors {}. Shallow: {:#x}-({})", sh_wrap.object,
+		    sh_wrap.errors, sh_wrap)
+	     << dendl;
+    // dev: list the attributes:
+    for (const auto& [shard, si] : sh_wrap.shards) {
+      for (const auto& [attr, bl] : si.attrs) {
+	dout(20) << fmt::format(" shallow: shard {} attr: {}", shard, attr)
+		 << dendl;
+      }
+    }
+    for (const auto& [shard, si] : dp_wrap.shards) {
+      for (const auto& [attr, bl] : si.attrs) {
+	dout(20) << fmt::format(" deep: shard {} attr: {}", shard, attr)
+		 << dendl;
+      }
+    }
+  }
+
+  // Actual merging of the shard map entries is only performed if the
+  // latest version is from the shallow scrub.
+  // Otherwise, the deep scrub, which (for the shards info) contains all data,
+  // and the shallow scrub is ignored.
+  if (current_level == scrub_level_t::shallow) {
+    // is the object data related to the same object version?
+    if (sh_wrap.version == dp_wrap.version) {
+      // combine the error information
+      dp_wrap.errors |= sh_wrap.errors;
+      for (const auto& [shard, si] : sh_wrap.shards) {
+	if (dp_wrap.shards.contains(shard)) {
+	  dout(20) << fmt::format(
+			  "-----> {}-{}  combining: sh-errors: {} dp-errors:{}",
+			  sh_wrap.object, shard, si, dp_wrap.shards[shard])
+		   << dendl;
+	  const auto saved_er = dp_wrap.shards[shard].errors;
+	  dp_wrap.shards[shard].selected_oi = si.selected_oi;
+	  dp_wrap.shards[shard].primary = si.primary;
+	  dp_wrap.shards[shard].errors |= saved_er;
+
+	  // the attributes:
+	  for (const auto& [attr, bl] : si.attrs) {
+	    if (!dp_wrap.shards[shard].attrs.contains(attr)) {
+	      dout(20) << fmt::format(
+			      "-----> {}-{}  copying shallow attr: attr: {}",
+			      sh_wrap.object, shard, attr)
+		       << dendl;
+	      dp_wrap.shards[shard].attrs[attr] = bl;
+	    }
+	    // otherwise - we'll ignore the shallow attr buffer
+	  }
+	} else {
+	  // the deep scrub data for this shard is missing. We take the shallow
+	  // scrub data.
+	  dp_wrap.shards[shard] = si;
+	}
+      }
+    } else if (sh_wrap.version > dp_wrap.version) {
+	if (false && dp_wrap.version == 0) {
+	  // there was a read error in the deep scrub. The deep version
+	  // shows as '0'. That's severe enough for us to ignore the shallow.
+	  dout(10) << fmt::format("{} ignoring deep after read failure",
+			  sh_wrap.object)
+		   << dendl;
+	} else {
+	  // There is a new shallow version of the object results.
+	  // The deep data is for an older version of that object.
+	  // There are multiple possibilities here, but for now we ignore the
+	  // deep data.
+	  dp_wrap = sh_wrap;
+	}
+      }
+  }
+
+  return dp_wrap.encode();
+}
+
+
+// a better way to implement get_errors(): use two generators, one for each store.
+// and sort-merge the results. Almost like a merge-sort, but with equal
+// keys combined. 'todo' once 'ranges' are really working.
+
+std::vector<bufferlist> Store::get_errors(
+    const std::string& from_key,
+    const std::string& end_key,
+    uint64_t max_return) const
+{
+  // merge the input from the two sorted DBs into 'errors' (until
+  // enough errors are collected)
   vector<bufferlist> errors;
-  auto next = std::make_pair(begin, bufferlist{});
-  while (max_return && !backend.get_next(next.first, &next)) {
-    if (next.first >= end)
+  dout(20) << fmt::format("getting errors from {} to {}", from_key, end_key)
+	   << dendl;
+
+  ceph_assert(shallow_db);
+  ceph_assert(deep_db);
+  ExpCacherPosData latest_sh = shallow_db->backend.get_1st_after_key(from_key);
+  ExpCacherPosData latest_dp = deep_db->backend.get_1st_after_key(from_key);
+
+  while (max_return) {
+    dout(20) << fmt::format(
+		    "n:{} latest_sh: {}, latest_dp: {}", max_return,
+		    (latest_sh ? latest_sh->last_key : "(none)"),
+		    (latest_dp ? latest_dp->last_key : "(none)"))
+	     << dendl;
+
+    // keys not smaller than end_key are not interesting
+    if (latest_sh.has_value() && latest_sh->last_key >= end_key) {
+      latest_sh = tl::unexpected(-EINVAL);
+    }
+    if (latest_dp.has_value() && latest_dp->last_key >= end_key) {
+      latest_dp = tl::unexpected(-EINVAL);
+    }
+
+    if (!latest_sh && !latest_dp) {
+      // both stores are exhausted
+      break;
+    }
+    if (!latest_sh.has_value()) {
+      // continue with the deep store
+      dout(10) << fmt::format("collecting from deep store") << dendl;
+      collect_specific_store(
+	  deep_db->backend, latest_dp, errors, end_key, max_return);
       break;
-    errors.push_back(next.second);
+    }
+    if (!latest_dp.has_value()) {
+      // continue with the shallow store
+      dout(10) << fmt::format("collecting from shallow store") << dendl;
+      collect_specific_store(
+	  shallow_db->backend, latest_sh, errors, end_key, max_return);
+      break;
+    }
+
+    // we have results from both stores. Select the one with a lower key.
+    // If the keys are equal, combine the errors.
+    if (latest_sh->last_key == latest_dp->last_key) {
+      auto bl = merge_encoded_error_wrappers(
+	  shallow_db->errors_hoid.hobj, latest_sh, latest_dp);
+      errors.push_back(bl);
+      latest_sh = shallow_db->backend.get_1st_after_key(latest_sh->last_key);
+      latest_dp = deep_db->backend.get_1st_after_key(latest_dp->last_key);
+
+    } else if (latest_sh->last_key < latest_dp->last_key) {
+      dout(20) << fmt::format("shallow store element ({})", latest_sh->last_key)
+	       << dendl;
+      errors.push_back(latest_sh->data);
+      latest_sh = shallow_db->backend.get_1st_after_key(latest_sh->last_key);
+    } else {
+      dout(20) << fmt::format("deep store element ({})", latest_dp->last_key)
+	       << dendl;
+      errors.push_back(latest_dp->data);
+      latest_dp = deep_db->backend.get_1st_after_key(latest_dp->last_key);
+    }
     max_return--;
   }
+
+  dout(10) << fmt::format("{} errors reported", errors.size()) << dendl;
   return errors;
 }
-
-} // namespace Scrub
+}  // namespace Scrub
diff --git a/src/osd/scrubber/ScrubStore.h b/src/osd/scrubber/ScrubStore.h
index 567badf608b6c..0955654d78e91 100644
--- a/src/osd/scrubber/ScrubStore.h
+++ b/src/osd/scrubber/ScrubStore.h
@@ -1,10 +1,9 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
 // vim: ts=8 sw=2 smarttab
-
-#ifndef CEPH_SCRUB_RESULT_H
-#define CEPH_SCRUB_RESULT_H
+#pragma once
 
 #include "common/map_cacher.hpp"
+#include "osd/osd_types_fmt.h"
 #include "osd/SnapMapper.h"  // for OSDriver
 
 namespace librados {
@@ -13,27 +12,71 @@ struct object_id_t;
 
 struct inconsistent_obj_wrapper;
 struct inconsistent_snapset_wrapper;
+class PgScrubber;
 
 namespace Scrub {
 
+/**
+ * Storing errors detected during scrubbing.
+ *
+ * From both functional and internal perspectives, the store is a pair of key-value
+ * databases: one maps objects to shallow errors detected during their scrubbing,
+ * and other stores deep errors.
+ * Note that the first store is updated in both shallow and in deep scrubs. The
+ * second - only while deep scrubbing.
+ *
+ * The DBs can be consulted by the operator, when trying to list 'errors known
+ * at this point in time'. Whenever a scrub starts - the relevant entries in the
+ * DBs are removed. Specifically - the shallow errors DB is recreated each scrub,
+ * while the deep errors DB is recreated only when a deep scrub starts.
+ *
+ * When queried - the data from both DBs is merged for each named object, and
+ * returned to the operator.
+ *
+ * Implementation:
+ * Each of the two DBs is implemented as OMAP entries of a single, uniquely named,
+ * object. Both DBs are cached using the general KV Cache mechanism.
+ */
+
 class Store {
  public:
   ~Store();
-  static Store* create(ObjectStore* store,
-		       ObjectStore::Transaction* t,
-		       const spg_t& pgid,
-		       const coll_t& coll);
+
+  Store(
+      PgScrubber& scrubber,
+      ObjectStore& osd_store,
+      ObjectStore::Transaction* t,
+      const spg_t& pgid,
+      const coll_t& coll);
+
+
+  /// mark down detected errors, either shallow or deep
   void add_object_error(int64_t pool, const inconsistent_obj_wrapper& e);
+
   void add_snap_error(int64_t pool, const inconsistent_snapset_wrapper& e);
 
   // and a variant-friendly interface:
   void add_error(int64_t pool, const inconsistent_obj_wrapper& e);
   void add_error(int64_t pool, const inconsistent_snapset_wrapper& e);
 
-  bool empty() const;
+  [[nodiscard]] bool is_empty() const;
   void flush(ObjectStore::Transaction*);
+
+  /// remove both shallow and deep errors DBs. Called on interval.
   void cleanup(ObjectStore::Transaction*);
 
+  /**
+   * prepare the Store object for a new scrub session.
+   * This involves clearing one or both of the errors DBs, and resetting
+   * the cache.
+   *
+   * @param level: the scrub level to prepare for. Whenever a deep scrub
+   * is requested, both the shallow and deep errors DBs are cleared.
+   * If, on the other hand, a shallow scrub is requested, only the shallow
+   * errors DB is cleared.
+   */
+  void reinit(ObjectStore::Transaction* t, scrub_level_t level);
+
   std::vector<ceph::buffer::list> get_snap_errors(
     int64_t pool,
     const librados::object_id_t& start,
@@ -44,20 +87,89 @@ class Store {
     const librados::object_id_t& start,
     uint64_t max_return) const;
 
+  std::ostream& gen_prefix(std::ostream& out, std::string_view fn) const;
+
  private:
-  Store(const coll_t& coll, const ghobject_t& oid, ObjectStore* store);
-  std::vector<ceph::buffer::list> get_errors(const std::string& start,
-					     const std::string& end,
-					     uint64_t max_return) const;
- private:
+  /**
+   * at_level_t
+   *
+   * The machinery for caching and storing errors at a specific scrub level.
+   */
+  struct at_level_t {
+    at_level_t(const spg_t& pgid, const ghobject_t& err_obj, OSDriver&& drvr)
+	: errors_hoid{err_obj}
+	, driver{std::move(drvr)}
+	, backend{&driver}
+    {}
+
+    /// the object in the PG store, where the errors are stored
+    ghobject_t errors_hoid;
+
+    /// abstracted key fetching
+    OSDriver driver;
+
+    /// a K,V cache for the errors that are detected during the scrub
+    /// session. The errors marked for a specific object are stored as
+    /// an OMap entry with the object's name as the key.
+    MapCacher::MapCacher<std::string, ceph::buffer::list> backend;
+
+    /// a temp object mapping seq-id to inconsistencies
+    std::map<std::string, ceph::buffer::list> results;
+  };
+
+  using CacherPosData =
+      MapCacher::MapCacher<std::string, ceph::buffer::list>::PosAndData;
+  using ExpCacherPosData = tl::expected<CacherPosData, int>;
+
+  /// access to the owning Scrubber object, for logging mostly
+  PgScrubber& m_scrubber;
+
+  /// the OSD's storage backend
+  ObjectStore& object_store;
+
+  /// the collection (i.e. - the PG store) in which the errors are stored
   const coll_t coll;
-  const ghobject_t hoid;
-  // a temp object holding mappings from seq-id to inconsistencies found in
-  // scrubbing
-  OSDriver driver;
-  mutable MapCacher::MapCacher<std::string, ceph::buffer::list> backend;
-  std::map<std::string, ceph::buffer::list> results;
+
+  scrub_level_t current_level;
+
+  /**
+   * the machinery (backend details, cache, etc.) for storing both levels
+   * of errors (note: 'optional' to allow delayed creation w/o dynamic
+   * allocations; and 'mutable', as the caching mechanism is used in const
+   * methods)
+   */
+  mutable std::optional<at_level_t> shallow_db;
+  mutable std::optional<at_level_t> deep_db;
+
+  std::vector<ceph::buffer::list> get_errors(
+      const std::string& start,
+      const std::string& end,
+      uint64_t max_return) const;
+
+  void collect_specific_store(
+      MapCacher::MapCacher<std::string, ceph::buffer::list>& backend,
+      ExpCacherPosData& latest,
+      std::vector<bufferlist>& errors,
+      std::string_view end_key,
+      uint64_t max_return) const;
+
+  /**
+   * Clear the DB of errors at a specific scrub level by performing an
+   * omap_clear() on the DB object, and resetting the MapCacher.
+   */
+  void clear_level_db(
+      ObjectStore::Transaction* t,
+      at_level_t& db,
+      std::string_view db_name);
+
+  /**
+   * merge the two error wrappers - fetched from both DBs for the same object.
+   * Specifically, the object errors are or'ed, and so are the per-shard
+   * entries.
+   */
+  bufferlist merge_encoded_error_wrappers(
+      hobject_t obj,
+      ExpCacherPosData& latest_sh,
+      ExpCacherPosData& latest_dp) const;
 };
 }  // namespace Scrub
-
-#endif	// CEPH_SCRUB_RESULT_H
diff --git a/src/osd/scrubber/pg_scrubber.cc b/src/osd/scrubber/pg_scrubber.cc
index 555d13ba72b2b..594ffb15e2b5b 100644
--- a/src/osd/scrubber/pg_scrubber.cc
+++ b/src/osd/scrubber/pg_scrubber.cc
@@ -1183,6 +1183,7 @@ void PgScrubber::_request_scrub_map(pg_shard_t replica,
   m_osds->send_message_osd_cluster(replica.osd, repscrubop, get_osdmap_epoch());
 }
 
+// only called on interval change. Both DBs are to be removed.
 void PgScrubber::cleanup_store(ObjectStore::Transaction* t)
 {
   if (!m_store)
@@ -1200,6 +1201,38 @@ void PgScrubber::cleanup_store(ObjectStore::Transaction* t)
   ceph_assert(!m_store);
 }
 
+
+void PgScrubber::reinit_scrub_store()
+{
+  // Entering, 0 to 3 of the following objects(*) may exist:
+  // ((*)'objects' here: both code objects (the ScrubStore object) and
+  // actual Object Store objects).
+  // 1. The ScrubStore object itself.
+  // 2,3. The two special hobjects in the coll (the PG data) holding the last
+  //      scrub's results.
+  //
+  // The Store object can be deleted and recreated, as a way to guarantee
+  // no junk is left. We won't do it here, but we will clear the at_level_t
+  // structures.
+  // The hobjects: possibly. The shallow DB object is always cleared. The
+  // deep one - only if running a deep scrub.
+  ObjectStore::Transaction t;
+  if (m_store) {
+    dout(10) << __func__ << " reusing existing store" << dendl;
+    m_store->flush(&t);
+  } else {
+    dout(10) << __func__ << " creating new store" << dendl;
+    m_store = std::make_unique<Scrub::Store>(
+	*this, *m_pg->osd->store, &t, m_pg->info.pgid, m_pg->coll);
+  }
+
+  // regardless of whether the ScrubStore object was recreated or reused, we need to
+  // (possibly) clear the actual DB objects in the Object Store.
+  m_store->reinit(&t, m_active_target->level());
+  m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t), nullptr);
+}
+
+
 void PgScrubber::on_init()
 {
   // going upwards from 'inactive'
@@ -1217,14 +1250,8 @@ void PgScrubber::on_init()
     m_is_deep ? scrub_level_t::deep : scrub_level_t::shallow,
     m_pg->get_actingset());
 
-  //  create a new store
-  {
-    ObjectStore::Transaction t;
-    cleanup_store(&t);
-    m_store.reset(
-      Scrub::Store::create(m_pg->osd->store, &t, m_pg->info.pgid, m_pg->coll));
-    m_pg->osd->store->queue_transaction(m_pg->ch, std::move(t), nullptr);
-  }
+  // create or reuse the 'known errors' store
+  reinit_scrub_store();
 
   m_start = m_pg->info.pgid.pgid.get_hobj_start();
   m_active = true;
diff --git a/src/osd/scrubber/pg_scrubber.h b/src/osd/scrubber/pg_scrubber.h
index ff8c98d387ea2..1a5813bd9235c 100644
--- a/src/osd/scrubber/pg_scrubber.h
+++ b/src/osd/scrubber/pg_scrubber.h
@@ -771,6 +771,16 @@ class PgScrubber : public ScrubPgIF,
 
   std::unique_ptr<Scrub::Store> m_store;
 
+  /**
+   * the ScrubStore sub-object caches and manages the database of known
+   * scrub errors. reinit_scrub_store() clears the database and re-initializes
+   * the ScrubStore object.
+   *
+   * in the next iteration - reinit_..() potentially deletes only the
+   * shallow errors part of the database.
+   */
+  void reinit_scrub_store();
+
   int num_digest_updates_pending{0};
   hobject_t m_start, m_end;  ///< note: half-closed: [start,end)
 
diff --git a/src/osdc/Journaler.h b/src/osdc/Journaler.h
index 4a574ed66d94e..d15862c08ba52 100644
--- a/src/osdc/Journaler.h
+++ b/src/osdc/Journaler.h
@@ -529,43 +529,35 @@ class Journaler {
   // ===================
 
   Header get_last_committed() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return last_committed;
   }
   Header get_last_written() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return last_written;
   }
 
   uint64_t get_layout_period() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return layout.get_period();
   }
   file_layout_t get_layout() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return layout;
   }
   bool is_active() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return state == STATE_ACTIVE;
   }
   bool is_stopping() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return state == STATE_STOPPING;
   }
   int get_error() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return error;
   }
   bool is_readonly() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return readonly;
   }
@@ -573,32 +565,26 @@ class Journaler {
   bool _is_readable();
   bool try_read_entry(bufferlist& bl);
   uint64_t get_write_pos() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return write_pos;
   }
   uint64_t get_write_safe_pos() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return safe_pos;
   }
   uint64_t get_read_pos() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return read_pos;
   }
   uint64_t get_expire_pos() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return expire_pos;
   }
   uint64_t get_trimmed_pos() const {
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return trimmed_pos;
   }
   size_t get_journal_envelope_size() const { 
-    ceph_assert(!ceph::mutex_debugging || !ceph_mutex_is_locked_by_me(lock));
     lock_guard l(lock);
     return journal_stream.get_envelope_size(); 
   }
diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h
index 68bd76268ae94..927c7e413296f 100644
--- a/src/osdc/Objecter.h
+++ b/src/osdc/Objecter.h
@@ -48,7 +48,6 @@
 #include "include/function2.hpp"
 #include "include/neorados/RADOS_Decodable.hpp"
 
-#include "common/async/completion.h"
 #include "common/admin_socket.h"
 #include "common/ceph_time.h"
 #include "common/ceph_mutex.h"
@@ -1968,30 +1967,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
     }
   }
 
-  boost::asio::any_completion_handler<void(boost::system::error_code)>
-  OpCompletionVert(std::unique_ptr<ceph::async::Completion<
-		     void(boost::system::error_code)>> c) {
-    if (c)
-      return [c = std::move(c)](boost::system::error_code ec) mutable {
-	c->dispatch(std::move(c), ec);
-      };
-    else
-      return nullptr;
-  }
-
-  template<typename T>
-  boost::asio::any_completion_handler<void(boost::system::error_code, T)>
-  OpCompletionVert(std::unique_ptr<ceph::async::Completion<
-		     void(boost::system::error_code, T)>> c) {
-    if (c) {
-      return [c = std::move(c)](boost::system::error_code ec, T t) mutable {
-	c->dispatch(std::move(c), ec, std::move(t));
-      };
-    } else {
-      return nullptr;
-    }
-  }
-
   struct Op : public RefCountedObject {
     OSDSession *session = nullptr;
     int incarnation = 0;
@@ -3268,18 +3243,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
     return linger_watch(info, op, snapc, mtime, inbl,
 			OpContextVert<ceph::buffer::list>(onfinish, nullptr), objver);
   }
-  ceph_tid_t linger_watch(LingerOp *info,
-			  ObjectOperation& op,
-			  const SnapContext& snapc, ceph::real_time mtime,
-			  ceph::buffer::list& inbl,
-			  std::unique_ptr<ceph::async::Completion<
-			    void(boost::system::error_code,
-			         ceph::buffer::list)>> onfinish,
-			  version_t *objver) {
-    return linger_watch(info, op, snapc, mtime, inbl,
-			OpCompletionVert<ceph::buffer::list>(
-			  std::move(onfinish)), objver);
-  }
   ceph_tid_t linger_notify(LingerOp *info,
 			   ObjectOperation& op,
 			   snapid_t snap, ceph::buffer::list& inbl,
@@ -3295,17 +3258,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
 			 OpContextVert(onack, poutbl),
 			 objver);
   }
-  ceph_tid_t linger_notify(LingerOp *info,
-			   ObjectOperation& op,
-			   snapid_t snap, ceph::buffer::list& inbl,
-			   std::unique_ptr<ceph::async::Completion<
-			     void(boost::system::error_code,
-			          ceph::buffer::list)>> onack,
-			   version_t *objver) {
-    return linger_notify(info, op, snap, inbl,
-			 OpCompletionVert<ceph::buffer::list>(
-			   std::move(onack)), objver);
-  }
   tl::expected<ceph::timespan,
 	       boost::system::error_code> linger_check(LingerOp *info);
   void linger_cancel(LingerOp *info);  // releases a reference
@@ -3886,12 +3838,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
     create_pool_snap(pool, snapName,
 		     OpContextVert<ceph::buffer::list>(c, nullptr));
   }
-  void create_pool_snap(
-    int64_t pool, std::string_view snapName,
-    std::unique_ptr<ceph::async::Completion<PoolOp::OpSig>> c) {
-    create_pool_snap(pool, snapName,
-		     OpCompletionVert<ceph::buffer::list>(std::move(c)));
-  }
   void allocate_selfmanaged_snap(int64_t pool,
 				 boost::asio::any_completion_handler<
 				 void(boost::system::error_code,
@@ -3901,12 +3847,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
     allocate_selfmanaged_snap(pool,
 			      OpContextVert(c, psnapid));
   }
-  void allocate_selfmanaged_snap(int64_t pool,
-				 std::unique_ptr<ceph::async::Completion<void(
-				   boost::system::error_code, snapid_t)>> c) {
-    allocate_selfmanaged_snap(pool,
-			      OpCompletionVert<snapid_t>(std::move(c)));
-  }
   void delete_pool_snap(int64_t pool, std::string_view snapName,
 			decltype(PoolOp::onfinish)&& onfinish);
   void delete_pool_snap(int64_t pool, std::string_view snapName,
@@ -3914,12 +3854,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
     delete_pool_snap(pool, snapName,
 		     OpContextVert<ceph::buffer::list>(c, nullptr));
   }
-  void delete_pool_snap(int64_t pool, std::string_view snapName,
-			std::unique_ptr<ceph::async::Completion<void(
-                          boost::system::error_code, ceph::buffer::list)>> c) {
-    delete_pool_snap(pool, snapName,
-		     OpCompletionVert<ceph::buffer::list>(std::move(c)));
-  }
 
   void delete_selfmanaged_snap(int64_t pool, snapid_t snap,
 			       decltype(PoolOp::onfinish)&& onfinish);
@@ -3928,12 +3862,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
     delete_selfmanaged_snap(pool, snap,
 			    OpContextVert<ceph::buffer::list>(c, nullptr));
   }
-  void delete_selfmanaged_snap(int64_t pool, snapid_t snap,
-			       std::unique_ptr<ceph::async::Completion<void(
-                                 boost::system::error_code, ceph::buffer::list)>> c) {
-    delete_selfmanaged_snap(pool, snap,
-			    OpCompletionVert<ceph::buffer::list>(std::move(c)));
-  }
 
 
   void create_pool(std::string_view name,
@@ -3945,25 +3873,12 @@ class Objecter : public md_config_obs_t, public Dispatcher {
 		OpContextVert<ceph::buffer::list>(onfinish, nullptr),
 		crush_rule);
   }
-  void create_pool(std::string_view name,
-		   std::unique_ptr<ceph::async::Completion<void(
-                     boost::system::error_code, ceph::buffer::list)>> c,
-		   int crush_rule=-1) {
-    create_pool(name,
-		OpCompletionVert<ceph::buffer::list>(std::move(c)),
-		crush_rule);
-  }
   void delete_pool(int64_t pool,
 		   decltype(PoolOp::onfinish)&& onfinish);
   void delete_pool(int64_t pool,
 		   Context* onfinish) {
     delete_pool(pool, OpContextVert<ceph::buffer::list>(onfinish, nullptr));
   }
-  void delete_pool(int64_t pool,
-		   std::unique_ptr<ceph::async::Completion<void(
-                    boost::system::error_code, ceph::buffer::list)>> c) {
-    delete_pool(pool, OpCompletionVert<ceph::buffer::list>(std::move(c)));
-  }
 
   void delete_pool(std::string_view name,
 		   decltype(PoolOp::onfinish)&& onfinish);
@@ -3972,11 +3887,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
 		   Context* onfinish) {
     delete_pool(name, OpContextVert<ceph::buffer::list>(onfinish, nullptr));
   }
-  void delete_pool(std::string_view name,
-		   std::unique_ptr<ceph::async::Completion<void(
-                     boost::system::error_code, ceph::buffer::list)>> c) {
-    delete_pool(name, OpCompletionVert<ceph::buffer::list>(std::move(c)));
-  }
 
   void handle_pool_op_reply(MPoolOpReply *m);
   int pool_op_cancel(ceph_tid_t tid, int r);
@@ -4026,11 +3936,6 @@ class Objecter : public md_config_obs_t, public Dispatcher {
 		    Context *onfinish) {
     get_fs_stats_(poolid, OpContextVert(onfinish, result));
   }
-  void get_fs_stats(std::optional<int64_t> poolid,
-		    std::unique_ptr<ceph::async::Completion<void(
-                      boost::system::error_code, struct ceph_statfs)>> c) {
-    get_fs_stats_(poolid, OpCompletionVert<struct ceph_statfs>(std::move(c)));
-  }
   int statfs_op_cancel(ceph_tid_t tid, int r);
   void _finish_statfs_op(StatfsOp *op, int r);
 
diff --git a/src/pybind/mgr/cephadm/ceph_volume.py b/src/pybind/mgr/cephadm/ceph_volume.py
new file mode 100644
index 0000000000000..a270bb7028f46
--- /dev/null
+++ b/src/pybind/mgr/cephadm/ceph_volume.py
@@ -0,0 +1,430 @@
+from cephadm.serve import CephadmServe
+from typing import List, TYPE_CHECKING, Any, Dict, Set, Tuple
+if TYPE_CHECKING:
+    from cephadm import CephadmOrchestrator
+
+
+class CephVolume:
+    def __init__(self, mgr: "CephadmOrchestrator", _inheritance: bool = False) -> None:
+        self.mgr: "CephadmOrchestrator" = mgr
+        if not _inheritance:
+            self.lvm_list: "CephVolumeLvmList" = CephVolumeLvmList(mgr)
+
+    def run_json(self, hostname: str, command: List[str]) -> Dict[str, Any]:
+        """Execute a JSON command on the specified hostname and return the result.
+
+        This method wraps the asynchronous execution of a JSON command on the
+        specified hostname, waiting for the command to complete. It utilizes the
+        `_run_json` method to perform the actual execution.
+
+        Args:
+            hostname (str): The hostname of the target node where the JSON command
+                            will be executed.
+            command (List[str]): A list of command arguments to be passed to the
+                                JSON command.
+
+        Returns:
+            Dict[str, Any]: A dictionary containing the JSON response from the
+                            executed command, which may include various data
+                            based on the command executed.
+        """
+        return self.mgr.wait_async(self._run_json(hostname, command))
+
+    def run(self, hostname: str, command: List[str], **kw: Any) -> Tuple[List[str], List[str], int]:
+        """Execute a command on the specified hostname and return the result.
+
+        This method wraps the asynchronous execution of a command on the
+        specified hostname, waiting for the command to complete. It utilizes the
+        `_run` method to perform the actual execution.
+
+        Args:
+            hostname (str): The hostname of the target node where the command
+                            will be executed.
+            command (List[str]): A list of command arguments to be passed to the
+                                command.
+            **kw (Any): Additional keyword arguments to customize the command
+                        execution.
+
+        Returns:
+            Tuple[List[str], List[str], int]: A tuple containing:
+                - A list of strings representing the standard output of the command.
+                - A list of strings representing the standard error output of the command.
+                - An integer representing the return code of the command execution.
+        """
+        return self.mgr.wait_async(self._run(hostname, command, **kw))
+
+    async def _run(self,
+                   hostname: str,
+                   command: List[str],
+                   **kw: Any) -> Tuple[List[str], List[str], int]:
+        """Execute a ceph-volume command on the specified hostname and return the result.
+
+        This asynchronous method constructs a ceph-volume command and then executes
+        it on the specified host.
+        The result of the command is returned in JSON format.
+
+        Args:
+            hostname (str): The hostname of the target node where the command will be executed.
+            command (List[str]): A list of command arguments to be passed to the Ceph command.
+            **kw (Any): Additional keyword arguments to customize the command execution.
+
+        Returns:
+            Tuple[List[str], List[str], int]: A tuple containing:
+                - A list of strings representing the standard output of the command.
+                - A list of strings representing the standard error output of the command.
+                - An integer representing the return code of the command execution.
+        """
+        cmd: List[str] = ['--']
+        cmd.extend(command)
+        result = await CephadmServe(self.mgr)._run_cephadm(
+            hostname, 'osd', 'ceph-volume',
+            cmd,
+            **kw)
+        return result
+
+    async def _run_json(self,
+                        hostname: str,
+                        command: List[str]) -> Dict[str, Any]:
+        """Execute a ceph-volume command on a specified hostname.
+
+        This asynchronous method constructs a ceph-volume command and then executes
+        it on the specified host.
+        The result of the command is returned in JSON format.
+
+        Args:
+            hostname (str): The hostname of the target node where the command will be executed.
+            command (List[str]): A list of command arguments to be passed to the Ceph command.
+
+        Returns:
+            Dict[str, Any]: The result of the command execution as a dictionary parsed from
+                            the JSON output.
+        """
+        cmd: List[str] = ['--']
+        cmd.extend(command)
+        result = await CephadmServe(self.mgr)._run_cephadm_json(
+            hostname, 'osd', 'ceph-volume',
+            cmd)
+        return result
+
+    def clear_replace_header(self, hostname: str, device: str) -> str:
+        """Clear the replacement header on a specified device for a given hostname.
+
+        This method checks if a replacement header exists on the specified device
+        and clears it if found. After clearing, it invalidates the cached device
+        information for the specified hostname and kicks the serve loop.
+
+        Args:
+            hostname (str): The hostname of the device on which the replacement header
+                            will be cleared. This is used to identify the specific
+                            device within the manager's context.
+            device (str): The path to the device (e.g., '/dev/sda') from which the
+                          replacement header will be cleared.
+
+        Returns:
+            str: A message indicating the result of the operation. It will either confirm
+                 that the replacement header was cleared or state that no replacement header
+                 was detected on the device.
+        """
+        output: str = ''
+        result = self.run(hostname, ['lvm',
+                                     'zap',
+                                     '--clear-replace-header',
+                                     device],
+                          error_ok=True)
+        out, err, rc = result
+        if not rc:
+            output = f'Replacement header cleared on {device}'
+            self.mgr.cache.invalidate_host_devices(hostname)
+            self.mgr._kick_serve_loop()
+        else:
+            plain_out: str = '\n'.join(out)
+            plain_err: str = '\n'.join(err)
+            output = f'No replacement header could be cleared on {device}.\n{plain_out}\n{plain_err}'
+        return output
+
+
+class CephVolumeLvmList(CephVolume):
+    def __init__(self, mgr: "CephadmOrchestrator") -> None:
+        super().__init__(mgr, True)
+        self.data: Dict[str, Any] = {}
+
+    def get_data(self, hostname: str) -> None:
+        """Execute the `ceph-volume lvm list` command to list LVM-based OSDs.
+
+        This asynchronous method interacts with the Ceph manager to retrieve
+        information about the Logical Volume Manager (LVM) devices associated
+        with the OSDs. It calls the `ceph-volume lvm list` command in JSON format
+        to gather relevant data.
+
+        Returns:
+            None: This method does not return a value. The retrieved data is
+                  stored in the `self.data` attribute for further processing.
+        """
+        self.data = self.run_json(hostname,
+                                  ['lvm', 'list', '--format', 'json'])
+
+    def devices_by_type(self, device_type: str) -> List[str]:
+        """Retrieve a list of devices of a specified type across all OSDs.
+
+        This method iterates through all OSDs and collects devices that match
+        the specified type (e.g., 'block', 'db', 'wal'). The resulting list
+        contains unique device paths.
+
+        Args:
+            device_type (str): The type of devices to retrieve. This should
+                               be one of the recognized device types such as
+                               'block', 'db', or 'wal'.
+
+        Returns:
+            List[str]: A list of unique device paths of the specified type
+                       found across all OSDs. If no devices of the specified
+                       type are found, an empty list is returned.
+        """
+        result: Set[str] = set()
+        for osd in self.osd_ids():
+            for lv in self.data.get(osd, []):
+                if lv.get('type') == device_type:
+                    result.update(lv.get('devices', []))
+        return list(result)
+
+    def block_devices(self) -> List[str]:
+        """List all block devices used by OSDs.
+
+        This method returns a list of devices that are used as 'block' devices
+        for storing the main OSD data.
+
+        Returns:
+            List[str]: A list of device paths (strings) that are used as 'block' devices.
+        """
+        return self.devices_by_type('block')
+
+    def db_devices(self) -> List[str]:
+        """List all database (DB) devices used by OSDs.
+
+        This method returns a list of devices that are used as 'db' devices
+        for storing the database files associated with OSDs.
+
+        Returns:
+            List[str]: A list of device paths (strings) that are used as 'db' devices.
+        """
+        return self.devices_by_type('db')
+
+    def wal_devices(self) -> List[str]:
+        """List all write-ahead log (WAL) devices used by OSDs.
+
+        This method returns a list of devices that are used as 'wal' devices
+        for storing write-ahead log data associated with OSDs.
+
+        Returns:
+            List[str]: A list of device paths (strings) that are used as 'wal' devices.
+        """
+        return self.devices_by_type('wal')
+
+    def all_devices(self) -> List[str]:
+        """List all devices used by OSDs for 'block', 'db', or 'wal' purposes.
+
+        This method aggregates all devices that are currently used by the OSDs
+        in the system for the following device types:
+        - 'block' devices: Used to store the OSD's data.
+        - 'db' devices: Used for database purposes.
+        - 'wal' devices: Used for Write-Ahead Logging.
+
+        The returned list combines devices from all these categories.
+
+        Returns:
+            List[str]: A list of device paths (strings) that are used as 'block', 'db', or 'wal' devices.
+        """
+        return self.block_devices() + self.db_devices() + self.wal_devices()
+
+    def device_osd_mapping(self, device_type: str = '') -> Dict[str, Dict[str, List[str]]]:
+        """Create a mapping of devices to their corresponding OSD IDs based on device type.
+
+        This method serves as a 'proxy' function, designed to be called by the *_device_osd_mapping() methods.
+
+        This method iterates over the OSDs and their logical volumes to build a
+        dictionary that maps each device of the specified type to the list of
+        OSD IDs that use it. The resulting dictionary can be used to determine
+        which OSDs share a specific device.
+
+        Args:
+            device_type (str): The type of the device to filter by (e.g., 'block', 'db', or 'wal').
+                               If an empty string is provided, devices of all types will be included.
+
+        Returns:
+            Dict[str, Dict[str, List[str]]]: A dictionary where the keys are device
+            names and the values are dictionaries containing a list of OSD IDs
+            that use the corresponding device.
+
+        eg:
+        ```
+            {
+                '/dev/vda': {'osd_ids': ['0', '1']},
+                '/dev/vdb': {'osd_ids': ['2']}
+            }
+        ```
+
+        """
+        result: Dict[str, Dict[str, List[str]]] = {}
+        for osd in self.osd_ids():
+            for lv in self.data.get(osd, []):
+                if lv.get('type') == device_type or not device_type:
+                    for device in lv.get('devices', []):
+                        if device not in result:
+                            result[device] = {'osd_ids': []}
+                        result[device]['osd_ids'].append(osd)
+        return result
+
+    def block_device_osd_mapping(self) -> Dict[str, Dict[str, List[str]]]:
+        """Get a dictionnary with all block devices and their corresponding
+        osd(s) id(s).
+
+        eg:
+        ```
+        {'/dev/vdb': {'osd_ids': ['0']},
+         '/dev/vdc': {'osd_ids': ['1']},
+         '/dev/vdf': {'osd_ids': ['2']},
+         '/dev/vde': {'osd_ids': ['3', '4']}}
+         ```
+
+        Returns:
+            Dict[str, Dict[str, List[str]]]: A dict including all block devices with their corresponding
+        osd id(s).
+        """
+        return self.device_osd_mapping('block')
+
+    def db_device_osd_mapping(self) -> Dict[str, Dict[str, List[str]]]:
+        """Get a dictionnary with all db devices and their corresponding
+        osd(s) id(s).
+
+        eg:
+        ```
+        {'/dev/vdv': {'osd_ids': ['0', '1', '2', '3']},
+         '/dev/vdx': {'osd_ids': ['4']}}
+         ```
+
+        Returns:
+            Dict[str, Dict[str, List[str]]]: A dict including all db devices with their corresponding
+        osd id(s).
+        """
+        return self.device_osd_mapping('db')
+
+    def wal_device_osd_mapping(self) -> Dict[str, Dict[str, List[str]]]:
+        """Get a dictionnary with all wal devices and their corresponding
+        osd(s) id(s).
+
+        eg:
+        ```
+        {'/dev/vdy': {'osd_ids': ['0', '1', '2', '3']},
+         '/dev/vdz': {'osd_ids': ['4']}}
+         ```
+
+        Returns:
+            Dict[str, Dict[str, List[str]]]: A dict including all wal devices with their corresponding
+        osd id(s).
+        """
+        return self.device_osd_mapping('wal')
+
+    def is_shared_device(self, device: str) -> bool:
+        """Determines if a device is shared between multiple OSDs.
+
+        This method checks if a given device is shared by multiple OSDs for a specified device type
+        (such as 'block', 'db', or 'wal'). If the device is associated with more than one OSD,
+        it is considered shared.
+
+        Args:
+            device (str): The device path to check (e.g., '/dev/sda').
+            device_type (str): The type of the device (e.g., 'block', 'db', 'wal').
+
+        Raises:
+            RuntimeError: If the device is not valid or not found in the shared devices mapping.
+
+        Returns:
+            bool: True if the device is shared by more than one OSD, False otherwise.
+        """
+        device_osd_mapping = self.device_osd_mapping()
+        if not device or device not in device_osd_mapping:
+            raise RuntimeError('Not a valid device path.')
+        return len(device_osd_mapping[device]['osd_ids']) > 1
+
+    def is_block_device(self, device: str) -> bool:
+        """Check if a specified device is a block device.
+
+        This method checks if the specified device is included in the
+        list of block devices used by OSDs.
+
+        Args:
+            device (str): The path of the device to check.
+
+        Returns:
+            bool: True if the device is a block device,
+                  False otherwise.
+        """
+        return device in self.block_devices()
+
+    def is_db_device(self, device: str) -> bool:
+        """Check if a specified device is a DB device.
+
+        This method checks if the specified device is included in the
+        list of DB devices used by OSDs.
+
+        Args:
+            device (str): The path of the device to check.
+
+        Returns:
+            bool: True if the device is a DB device,
+                  False otherwise.
+        """
+        return device in self.db_devices()
+
+    def is_wal_device(self, device: str) -> bool:
+        """Check if a specified device is a WAL device.
+
+        This method checks if the specified device is included in the
+        list of WAL devices used by OSDs.
+
+        Args:
+            device (str): The path of the device to check.
+
+        Returns:
+            bool: True if the device is a WAL device,
+                  False otherwise.
+        """
+        return device in self.wal_devices()
+
+    def get_block_devices_from_osd_id(self, osd_id: str) -> List[str]:
+        """Retrieve the list of block devices associated with a given OSD ID.
+
+        This method looks up the specified OSD ID in the `data` attribute
+        and returns a list of devices that are of type 'block'. If there are
+        no devices of type 'block' for the specified OSD ID, an empty list is returned.
+
+        Args:
+            osd_id (str): The OSD ID for which to retrieve block devices.
+
+        Returns:
+            List[str]: A list of block device paths associated with the
+                       specified OSD ID. If no block devices are found,
+                       an empty list is returned.
+        """
+        result: List[str] = []
+        for lv in self.data.get(osd_id, []):
+            if lv.get('type') == 'block':
+                result = lv.get('devices', [])
+        return result
+
+    def osd_ids(self) -> List[str]:
+        """Retrieve the list of OSD IDs.
+
+        This method returns a list of OSD IDs by extracting the keys
+        from the `data` attribute, which is expected to contain
+        information about OSDs. If there is no data available, an
+        empty list is returned.
+
+        Returns:
+            List[str]: A list of OSD IDs. If no data is present,
+                       an empty list is returned.
+        """
+        result: List[str] = []
+        if self.data:
+            result = list(self.data.keys())
+        return result
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index 5216c489064c9..7bf65b532fafc 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -101,6 +101,7 @@
 from .configchecks import CephadmConfigChecks
 from .offline_watcher import OfflineHostWatcher
 from .tuned_profiles import TunedProfileUtils
+from .ceph_volume import CephVolume
 
 try:
     import asyncssh
@@ -135,13 +136,13 @@ def os_exit_noop(status: int) -> None:
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.51.0'
 DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.7.0'
 DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.2.17'
-DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:3.0.0'
-DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:3.0.0'
+DEFAULT_LOKI_IMAGE = 'quay.io/ceph/loki:3.0.0'
+DEFAULT_PROMTAIL_IMAGE = 'quay.io/ceph/promtail:3.0.0'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.27.0'
 DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/grafana:10.4.8'
 DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
 DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
-DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
+DEFAULT_SNMP_GATEWAY_IMAGE = 'quay.io/ceph/snmp-notifier:v1.2.1'
 DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
 DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
 DEFAULT_JAEGER_AGENT_IMAGE = 'quay.io/jaegertracing/jaeger-agent:1.29'
@@ -446,7 +447,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule,
         Option(
             'default_registry',
             type='str',
-            default='docker.io',
+            default='quay.io',
             desc='Search-registry to which we should normalize unqualified image names. '
                  'This is not the default registry',
         ),
@@ -764,6 +765,7 @@ def __init__(self, *args: Any, **kwargs: Any):
         self.iscsi_service: IscsiService = cast(IscsiService, self.cephadm_services['iscsi'])
         self.nvmeof_service: NvmeofService = cast(NvmeofService, self.cephadm_services['nvmeof'])
         self.node_proxy_service: NodeProxy = cast(NodeProxy, self.cephadm_services['node-proxy'])
+        self.rgw_service: RgwService = cast(RgwService, self.cephadm_services['rgw'])
 
         self.scheduled_async_actions: List[Callable] = []
 
@@ -791,6 +793,8 @@ def __init__(self, *args: Any, **kwargs: Any):
         # as part of the handling of stray daemons
         self.recently_altered_daemons: Dict[str, datetime.datetime] = {}
 
+        self.ceph_volume: CephVolume = CephVolume(self)
+
     def shutdown(self) -> None:
         self.log.debug('shutdown')
         self._worker_pool.close()
@@ -3609,7 +3613,12 @@ def _apply_service_spec(self, spec: ServiceSpec) -> str:
         return "Scheduled %s update..." % spec.service_name()
 
     @handle_orch_error
-    def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> List[str]:
+    def apply(
+        self,
+        specs: Sequence[GenericSpec],
+        no_overwrite: bool = False,
+        continue_on_error: bool = True
+    ) -> List[str]:
         results = []
         for spec in specs:
             if no_overwrite:
@@ -3621,7 +3630,14 @@ def apply(self, specs: Sequence[GenericSpec], no_overwrite: bool = False) -> Lis
                     results.append('Skipped %s service spec. To change %s spec omit --no-overwrite flag'
                                    % (cast(ServiceSpec, spec).service_name(), cast(ServiceSpec, spec).service_name()))
                     continue
-            results.append(self._apply(spec))
+            try:
+                res = self._apply(spec)
+                results.append(res)
+            except Exception as e:
+                if continue_on_error:
+                    results.append(f'Failed to apply spec for {spec}: {str(e)}')
+                else:
+                    raise e
         return results
 
     @handle_orch_error
@@ -3827,9 +3843,56 @@ def upgrade_resume(self) -> str:
     def upgrade_stop(self) -> str:
         return self.upgrade.upgrade_stop()
 
+    @handle_orch_error
+    def replace_device(self,
+                       hostname: str,
+                       device: str,
+                       clear: bool = False,
+                       yes_i_really_mean_it: bool = False) -> Any:
+        output: str = ''
+
+        self.ceph_volume.lvm_list.get_data(hostname=hostname)
+
+        if clear:
+            output = self.ceph_volume.clear_replace_header(hostname, device)
+        else:
+            osds_to_zap: List[str] = []
+            if hostname not in list(self.inventory.keys()):
+                raise OrchestratorError(f'{hostname} invalid host.')
+
+            if device not in self.ceph_volume.lvm_list.all_devices():
+                raise OrchestratorError(f"{device} doesn't appear to be used for an OSD, not a valid device in {hostname}.")
+
+            device_osd_mapping = self.ceph_volume.lvm_list.device_osd_mapping()
+            osds_to_zap = device_osd_mapping[device]['osd_ids']
+
+            if self.ceph_volume.lvm_list.is_shared_device(device):
+                if not yes_i_really_mean_it:
+                    raise OrchestratorError(f'{device} is a shared device.\n'
+                                            f'Replacing {device} implies destroying OSD(s): {osds_to_zap}.\n'
+                                            'Please, *be very careful*, this can be a very dangerous operation.\n'
+                                            'If you know what you are doing, pass --yes-i-really-mean-it')
+            if not self.to_remove_osds.rm_util.safe_to_destroy([int(osd_id) for osd_id in osds_to_zap]):
+                raise OrchestratorError(f"Destroying OSD(s) {osds_to_zap} would cause some PGs to be undersized/degraded.\n"
+                                        'Refusing to proceed.')
+            replace_block: bool = self.ceph_volume.lvm_list.is_block_device(device)
+            replace_db: bool = self.ceph_volume.lvm_list.is_db_device(device)
+            replace_wal: bool = self.ceph_volume.lvm_list.is_wal_device(device)
+
+            self.remove_osds(list(osds_to_zap),
+                             replace_block=replace_block,
+                             replace_db=replace_db,
+                             replace_wal=replace_wal)
+
+            output = f'Scheduled to destroy osds: {osds_to_zap} and mark {device} as being replaced.'
+        return output
+
     @handle_orch_error
     def remove_osds(self, osd_ids: List[str],
                     replace: bool = False,
+                    replace_block: bool = False,
+                    replace_db: bool = False,
+                    replace_wal: bool = False,
                     force: bool = False,
                     zap: bool = False,
                     no_destroy: bool = False) -> str:
@@ -3852,6 +3915,9 @@ def remove_osds(self, osd_ids: List[str],
             try:
                 self.to_remove_osds.enqueue(OSD(osd_id=int(daemon.daemon_id),
                                                 replace=replace,
+                                                replace_block=replace_block,
+                                                replace_db=replace_db,
+                                                replace_wal=replace_wal,
                                                 force=force,
                                                 zap=zap,
                                                 no_destroy=no_destroy,
diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py
index c6212c9efb83d..4a7959ae04502 100644
--- a/src/pybind/mgr/cephadm/serve.py
+++ b/src/pybind/mgr/cephadm/serve.py
@@ -96,7 +96,10 @@ def serve(self) -> None:
                 if not self.mgr.paused:
                     self._run_async_actions()
 
-                    self.mgr.to_remove_osds.process_removal_queue()
+                    removal_queue_result = self.mgr.to_remove_osds.process_removal_queue()
+                    self.log.debug(f'process_removal_queue() returned = {removal_queue_result}')
+                    if removal_queue_result:
+                        continue
 
                     self.mgr.migration.migrate()
                     if self.mgr.migration.is_migration_ongoing():
@@ -950,6 +953,10 @@ def update_progress() -> None:
                     )
                     continue
 
+                # set multisite config before deploying the rgw daemon
+                if service_type == 'rgw':
+                    self.mgr.rgw_service.set_realm_zg_zone(cast(RGWSpec, spec))
+
                 # deploy new daemon
                 daemon_id = slot.name
 
diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py
index eb9a1c838a656..9043577bc5a60 100644
--- a/src/pybind/mgr/cephadm/services/cephadmservice.py
+++ b/src/pybind/mgr/cephadm/services/cephadmservice.py
@@ -984,10 +984,9 @@ class RgwService(CephService):
     def allow_colo(self) -> bool:
         return True
 
-    def config(self, spec: RGWSpec) -> None:  # type: ignore
+    def set_realm_zg_zone(self, spec: RGWSpec) -> None:
         assert self.TYPE == spec.service_type
 
-        # set rgw_realm rgw_zonegroup and rgw_zone, if present
         if spec.rgw_realm:
             ret, out, err = self.mgr.check_mon_command({
                 'prefix': 'config set',
@@ -1010,6 +1009,12 @@ def config(self, spec: RGWSpec) -> None:  # type: ignore
                 'value': spec.rgw_zone,
             })
 
+    def config(self, spec: RGWSpec) -> None:  # type: ignore
+        assert self.TYPE == spec.service_type
+
+        # set rgw_realm rgw_zonegroup and rgw_zone, if present
+        self.set_realm_zg_zone(spec)
+
         if spec.generate_cert and not spec.rgw_frontend_ssl_certificate:
             # generate a self-signed cert for the rgw service
             cert, key = self.mgr.cert_mgr.ssl_certs.generate_root_cert(custom_san_list=spec.zonegroup_hostnames)
diff --git a/src/pybind/mgr/cephadm/services/nvmeof.py b/src/pybind/mgr/cephadm/services/nvmeof.py
index 8b15aace373c5..162815da24c73 100644
--- a/src/pybind/mgr/cephadm/services/nvmeof.py
+++ b/src/pybind/mgr/cephadm/services/nvmeof.py
@@ -123,10 +123,9 @@ def get_set_cmd_dicts(out: str) -> List[dict]:
             gateways = json.loads(out)['gateways']
             cmd_dicts = []
 
-            spec = cast(NvmeofServiceSpec,
-                        self.mgr.spec_store.all_specs.get(daemon_descrs[0].service_name(), None))
-
             for dd in daemon_descrs:
+                spec = cast(NvmeofServiceSpec,
+                            self.mgr.spec_store.all_specs.get(dd.service_name(), None))
                 service_name = dd.service_name()
                 if dd.hostname is None:
                     err_msg = ('Trying to config_dashboard nvmeof but no hostname is defined')
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py
index 9b09b8c9f4925..80bf92772c49b 100644
--- a/src/pybind/mgr/cephadm/services/osd.py
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -551,6 +551,12 @@ def zap_osd(self, osd: "OSD") -> str:
         "Zaps all devices that are associated with an OSD"
         if osd.hostname is not None:
             cmd = ['--', 'lvm', 'zap', '--osd-id', str(osd.osd_id)]
+            if osd.replace_block:
+                cmd.append('--replace-block')
+            if osd.replace_db:
+                cmd.append('--replace-db')
+            if osd.replace_wal:
+                cmd.append('--replace-wal')
             if not osd.no_destroy:
                 cmd.append('--destroy')
             with self.mgr.async_timeout_handler(osd.hostname, f'cephadm ceph-volume {" ".join(cmd)}'):
@@ -618,6 +624,9 @@ def __init__(self,
                  started: bool = False,
                  stopped: bool = False,
                  replace: bool = False,
+                 replace_block: bool = False,
+                 replace_db: bool = False,
+                 replace_wal: bool = False,
                  force: bool = False,
                  hostname: Optional[str] = None,
                  zap: bool = False,
@@ -649,6 +658,12 @@ def __init__(self,
 
         # If this is a replace or remove operation
         self.replace = replace
+        # If this is a block device replacement
+        self.replace_block = replace_block
+        # If this is a db device replacement
+        self.replace_db = replace_db
+        # If this is a wal device replacement
+        self.replace_wal = replace_wal
         # If we wait for the osd to be drained
         self.force = force
         # The name of the node
@@ -676,7 +691,7 @@ def start_draining(self) -> bool:
         if self.stopped:
             logger.debug(f"Won't start draining {self}. OSD draining is stopped.")
             return False
-        if self.replace:
+        if self.any_replace_params:
             self.rm_util.set_osd_flag([self], 'out')
         else:
             self.rm_util.reweight_osd(self, 0.0)
@@ -686,7 +701,7 @@ def start_draining(self) -> bool:
         return True
 
     def stop_draining(self) -> bool:
-        if self.replace:
+        if self.any_replace_params:
             self.rm_util.set_osd_flag([self], 'in')
         else:
             if self.original_weight:
@@ -764,6 +779,9 @@ def to_json(self) -> dict:
         out['draining'] = self.draining
         out['stopped'] = self.stopped
         out['replace'] = self.replace
+        out['replace_block'] = self.replace_block
+        out['replace_db'] = self.replace_db
+        out['replace_wal'] = self.replace_wal
         out['force'] = self.force
         out['zap'] = self.zap
         out['hostname'] = self.hostname  # type: ignore
@@ -789,6 +807,13 @@ def from_json(cls, inp: Optional[Dict[str, Any]], rm_util: RemoveUtil) -> Option
             inp['hostname'] = hostname
         return cls(**inp)
 
+    @property
+    def any_replace_params(self) -> bool:
+        return any([self.replace,
+                    self.replace_block,
+                    self.replace_db,
+                    self.replace_wal])
+
     def __hash__(self) -> int:
         return hash(self.osd_id)
 
@@ -812,7 +837,7 @@ def __init__(self, mgr: "CephadmOrchestrator") -> None:
         # network calls, like mon commands.
         self.lock = Lock()
 
-    def process_removal_queue(self) -> None:
+    def process_removal_queue(self) -> bool:
         """
         Performs actions in the _serve() loop to remove an OSD
         when criteria is met.
@@ -820,6 +845,8 @@ def process_removal_queue(self) -> None:
         we can't hold self.lock, as we're calling _remove_daemon in the loop
         """
 
+        result: bool = False
+
         # make sure that we don't run on OSDs that are not in the cluster anymore.
         self.cleanup()
 
@@ -863,16 +890,23 @@ def process_removal_queue(self) -> None:
             if self.mgr.cache.has_daemon(f'osd.{osd.osd_id}'):
                 CephadmServe(self.mgr)._remove_daemon(f'osd.{osd.osd_id}', osd.hostname)
                 logger.info(f"Successfully removed {osd} on {osd.hostname}")
+                result = True
             else:
                 logger.info(f"Daemon {osd} on {osd.hostname} was already removed")
 
-            if osd.replace:
+            any_replace_params: bool = any([osd.replace,
+                                            osd.replace_block,
+                                            osd.replace_db,
+                                            osd.replace_wal])
+            if any_replace_params:
                 # mark destroyed in osdmap
                 if not osd.destroy():
                     raise orchestrator.OrchestratorError(
                         f"Could not destroy {osd}")
                 logger.info(
                     f"Successfully destroyed old {osd} on {osd.hostname}; ready for replacement")
+                if any_replace_params:
+                    osd.zap = True
             else:
                 # purge from osdmap
                 if not osd.purge():
@@ -884,7 +918,7 @@ def process_removal_queue(self) -> None:
                 logger.info(f"Zapping devices for {osd} on {osd.hostname}")
                 osd.do_zap()
                 logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}")
-
+            self.mgr.cache.invalidate_host_devices(osd.hostname)
             logger.debug(f"Removing {osd} from the queue.")
 
         # self could change while this is processing (osds get added from the CLI)
@@ -893,6 +927,7 @@ def process_removal_queue(self) -> None:
         with self.lock:
             self.osds.intersection_update(new_queue)
             self._save_to_store()
+        return result
 
     def cleanup(self) -> None:
         # OSDs can always be cleaned up manually. This ensures that we run on existing OSDs
diff --git a/src/pybind/mgr/cephadm/services/smb.py b/src/pybind/mgr/cephadm/services/smb.py
index dabc202a024bd..e322acb0e3e73 100644
--- a/src/pybind/mgr/cephadm/services/smb.py
+++ b/src/pybind/mgr/cephadm/services/smb.py
@@ -1,6 +1,9 @@
+import errno
 import logging
 from typing import Any, Dict, List, Tuple, cast, Optional
 
+from mgr_module import HandleCommandResult
+
 from ceph.deployment.service_spec import ServiceSpec, SMBSpec
 
 from orchestrator import DaemonDescription
@@ -117,6 +120,23 @@ def ignore_possible_stray(
             return True
         return False
 
+    def ok_to_stop(
+        self, daemon_ids: List[str], force: bool = False, known: Optional[List[str]] = None
+    ) -> HandleCommandResult:
+        # if only 1 smb, alert user (this is not passable with --force)
+        warn, warn_message = self._enough_daemons_to_stop(self.TYPE, daemon_ids, "SMB", 1, True)
+        if warn:
+            return HandleCommandResult(-errno.EBUSY, "", warn_message)
+
+        # if reached here, there is > 1 smb daemon.
+        if force:
+            return HandleCommandResult(0, warn_message, "")
+
+        # if reached here, > 1 smb daemon and no force flag.
+        # Provide warning
+        warn_message = "WARNING: Removing SMB daemons can cause clients to lose connectivity. "
+        return HandleCommandResult(-errno.EBUSY, "", warn_message)
+
     def _allow_config_key_command(self, name: str) -> str:
         # permit the samba container config access to the mon config key store
         # with keys like smb/config/<cluster_id>/*.
diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
index 972ef22e7b58e..967f1355af14b 100644
--- a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
+++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
@@ -15,7 +15,8 @@
   http_port = {{ http_port }}
   http_addr = {{ http_addr }}
 {% if mgmt_gw_enabled %}
-  root_url = %(protocol)s://%(domain)s/grafana/
+  root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/
+  serve_from_sub_path = true
 {% endif %}
 [snapshots]
   external_enabled = false
diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2
index f33bc6c8dfdc8..594582e7ee4c9 100644
--- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2
+++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/external_server.conf.j2
@@ -115,11 +115,11 @@ server {
 
 {% if grafana_endpoints %}
     location /grafana {
-        rewrite ^/grafana/(.*) /$1 break;
         proxy_pass {{ grafana_scheme }}://grafana_servers;
         # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser
         # will send this header if Grafana is running on the same node as one of those services
         proxy_set_header Authorization "";
+        proxy_buffering off;
         {% if oauth2_proxy_url %}
         auth_request /oauth2/auth;
         error_page 401 = /oauth2/sign_in;
diff --git a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2 b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2
index 829c075758921..9148ddc4a142d 100644
--- a/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2
+++ b/src/pybind/mgr/cephadm/templates/services/mgmt-gateway/internal_server.conf.j2
@@ -1,5 +1,8 @@
 
 server {
+    ssl_client_certificate /etc/nginx/ssl/ca.crt;
+    ssl_verify_client on;
+
     listen              {{ internal_port }} ssl;
     listen              [::]:{{ internal_port }} ssl;
     ssl_certificate     /etc/nginx/ssl/nginx_internal.crt;
diff --git a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2 b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2
index ded403169c976..03ff8a32ca292 100644
--- a/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2
+++ b/src/pybind/mgr/cephadm/templates/services/nfs/ganesha.conf.j2
@@ -4,6 +4,7 @@ NFS_CORE_PARAM {
         Enable_RQUOTA = false;
         Protocols = 4;
         NFS_Port = {{ port }};
+        allow_set_io_flusher_fail = true;
 {% if bind_addr %}
         Bind_addr = {{ bind_addr }};
 {% endif %}
diff --git a/src/pybind/mgr/cephadm/tests/ceph_volume_data.py b/src/pybind/mgr/cephadm/tests/ceph_volume_data.py
new file mode 100644
index 0000000000000..afd6d89d39e40
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/ceph_volume_data.py
@@ -0,0 +1 @@
+data = '{"0":[{"devices":["/dev/vdb"],"lv_name":"osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","lv_path":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.block_uuid=d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c,ceph.db_uuid=EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX,ceph.encrypted=0,ceph.osd_fsid=8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.osd_id=0,ceph.osdspec_affinity=osd.shared_db,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p","name":"osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","path":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","tags":{"ceph.block_device":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.block_uuid":"d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","ceph.db_uuid":"EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX","ceph.encrypted":"0","ceph.osd_fsid":"8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.osd_id":"0","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668"},{"devices":["/dev/vdk"],"lv_name":"osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","lv_path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.block_uuid=d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c,ceph.db_uuid=EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX,ceph.encrypted=0,ceph.osd_fsid=8cd7fa43-ef40-49e7-abb2-db5cfd91bc92,ceph.osd_id=0,ceph.osdspec_affinity=osd.shared_db,ceph.type=db,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX","name":"osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","tags":{"ceph.block_device":"/dev/ceph-81c76363-7a89-47d2-83c1-fdcbab5d6668/osd-block-8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.block_uuid":"d518Lz-gTnC-FyX7-4MN2-icIp-LBCB-zdQw2p","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-f0f5e20c-f1ee-42df-9a78-0e70b9c08e6c","ceph.db_uuid":"EInXUQ-LDDO-7jCL-Y0Jb-tPZ2-KuKl-VNJ2hX","ceph.encrypted":"0","ceph.osd_fsid":"8cd7fa43-ef40-49e7-abb2-db5cfd91bc92","ceph.osd_id":"0","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"db","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf"}],"1":[{"devices":["/dev/vdc"],"lv_name":"osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","lv_path":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.block_uuid=Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774,ceph.db_uuid=1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC,ceph.encrypted=0,ceph.osd_fsid=aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.osd_id=1,ceph.osdspec_affinity=osd.shared_db,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l","name":"osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","path":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","tags":{"ceph.block_device":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.block_uuid":"Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","ceph.db_uuid":"1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC","ceph.encrypted":"0","ceph.osd_fsid":"aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.osd_id":"1","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb"},{"devices":["/dev/vdk"],"lv_name":"osd-db-38f53373-7575-4c90-98ca-28f189685774","lv_path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.block_uuid=Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774,ceph.db_uuid=1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC,ceph.encrypted=0,ceph.osd_fsid=aaa4c8cb-2b54-4df8-9846-17063c59b6ce,ceph.osd_id=1,ceph.osdspec_affinity=osd.shared_db,ceph.type=db,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC","name":"osd-db-38f53373-7575-4c90-98ca-28f189685774","path":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","tags":{"ceph.block_device":"/dev/ceph-964cfc71-ad91-4189-97c1-cab4fd3066bb/osd-block-aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.block_uuid":"Ccvedr-7t3C-BgIg-lfSl-qW3J-Zw1V-FuH14l","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf/osd-db-38f53373-7575-4c90-98ca-28f189685774","ceph.db_uuid":"1mEAHd-mxQn-Qr9c-DkD8-XGOQ-xfIN-ZsPReC","ceph.encrypted":"0","ceph.osd_fsid":"aaa4c8cb-2b54-4df8-9846-17063c59b6ce","ceph.osd_id":"1","ceph.osdspec_affinity":"osd.shared_db","ceph.type":"db","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-e10d6a69-68ec-44ba-bd3b-9a20d15cacbf"}],"2":[{"devices":["/dev/vdf"],"lv_name":"osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","lv_path":"/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33,ceph.block_uuid=adQsil-KScK-5QkX-bLbg-EpJa-sNJL-3oDtaO,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=a0434b49-759a-46a4-91dc-d7cc65af3a33,ceph.osd_id=2,ceph.osdspec_affinity=None,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"adQsil-KScK-5QkX-bLbg-EpJa-sNJL-3oDtaO","name":"osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","path":"/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","tags":{"ceph.block_device":"/dev/ceph-3ba7a728-709b-408c-a043-9e48704b5ffb/osd-block-a0434b49-759a-46a4-91dc-d7cc65af3a33","ceph.block_uuid":"adQsil-KScK-5QkX-bLbg-EpJa-sNJL-3oDtaO","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"a0434b49-759a-46a4-91dc-d7cc65af3a33","ceph.osd_id":"2","ceph.osdspec_affinity":"None","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-3ba7a728-709b-408c-a043-9e48704b5ffb"}],"3":[{"devices":["/dev/vde"],"lv_name":"osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","lv_path":"/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f,ceph.block_uuid=GBfm14-4hPu-oaWk-wSdA-O1Fw-eU5o-Q2KOh8,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=861ea81a-c24b-4c69-b4f6-e527151b132f,ceph.osd_id=3,ceph.osdspec_affinity=None,ceph.type=block,ceph.vdo=0,ceph.with_tpm=0","lv_uuid":"GBfm14-4hPu-oaWk-wSdA-O1Fw-eU5o-Q2KOh8","name":"osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","path":"/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","tags":{"ceph.block_device":"/dev/ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b/osd-block-861ea81a-c24b-4c69-b4f6-e527151b132f","ceph.block_uuid":"GBfm14-4hPu-oaWk-wSdA-O1Fw-eU5o-Q2KOh8","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"861ea81a-c24b-4c69-b4f6-e527151b132f","ceph.osd_id":"3","ceph.osdspec_affinity":"None","ceph.type":"block","ceph.vdo":"0","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-97ac74d9-d351-4a7e-bbd1-27b8dd3e7f7b"}],"4":[{"devices":["/dev/vdg"],"lv_name":"osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","lv_path":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.block_uuid=diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16,ceph.db_uuid=5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx,ceph.encrypted=0,ceph.osd_fsid=242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.osd_id=4,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=block,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008,ceph.wal_uuid=ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2,ceph.with_tpm=0","lv_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","name":"osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","path":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","tags":{"ceph.block_device":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.block_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","ceph.db_uuid":"5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx","ceph.encrypted":"0","ceph.osd_fsid":"242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.osd_id":"4","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"block","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","ceph.wal_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-20acdce8-5548-4707-a38e-b8e925485bc5"},{"devices":["/dev/vdj"],"lv_name":"osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","lv_path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.block_uuid=diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.osd_id=4,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=wal,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008,ceph.wal_uuid=ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2,ceph.with_tpm=0","lv_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","name":"osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","tags":{"ceph.block_device":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.block_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.osd_id":"4","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"wal","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","ceph.wal_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","ceph.with_tpm":"0"},"type":"wal","vg_name":"ceph-776f980b-152a-4e8f-99b6-bae27ed0b528"},{"devices":["/dev/vdi"],"lv_name":"osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","lv_path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.block_uuid=diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16,ceph.db_uuid=5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx,ceph.encrypted=0,ceph.osd_fsid=242c4a21-b076-424c-94fb-3f556ed2ddbd,ceph.osd_id=4,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=db,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008,ceph.wal_uuid=ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2,ceph.with_tpm=0","lv_uuid":"5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx","name":"osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","tags":{"ceph.block_device":"/dev/ceph-20acdce8-5548-4707-a38e-b8e925485bc5/osd-block-242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.block_uuid":"diO6OQ-jjkD-tdVS-FJ5f-VcP7-8QEW-geP4Ds","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-19fc3a21-ce53-4881-9217-f1d58166af16","ceph.db_uuid":"5mng9E-Q3ej-37eY-Ny9C-p6wf-h17w-gC3jtx","ceph.encrypted":"0","ceph.osd_fsid":"242c4a21-b076-424c-94fb-3f556ed2ddbd","ceph.osd_id":"4","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"db","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-2542dafe-2ff7-4e8b-bc70-a0297b421008","ceph.wal_uuid":"ppb82k-9cEs-yb1K-QTNl-c4BM-33PQ-bNX0c2","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452"}],"5":[{"devices":["/dev/vdj"],"lv_name":"osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","lv_path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.block_uuid=gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.osd_id=5,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=wal,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea,ceph.wal_uuid=DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz,ceph.with_tpm=0","lv_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","name":"osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","path":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","tags":{"ceph.block_device":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.block_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.encrypted":"0","ceph.osd_fsid":"8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.osd_id":"5","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"wal","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","ceph.wal_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","ceph.with_tpm":"0"},"type":"wal","vg_name":"ceph-776f980b-152a-4e8f-99b6-bae27ed0b528"},{"devices":["/dev/vdh"],"lv_name":"osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","lv_path":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","lv_size":"214744170496","lv_tags":"ceph.block_device=/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.block_uuid=gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb,ceph.db_uuid=wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2,ceph.encrypted=0,ceph.osd_fsid=8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.osd_id=5,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=block,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea,ceph.wal_uuid=DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz,ceph.with_tpm=0","lv_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","name":"osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","path":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","tags":{"ceph.block_device":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.block_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","ceph.db_uuid":"wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2","ceph.encrypted":"0","ceph.osd_fsid":"8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.osd_id":"5","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"block","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","ceph.wal_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","ceph.with_tpm":"0"},"type":"block","vg_name":"ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351"},{"devices":["/dev/vdi"],"lv_name":"osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","lv_path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","lv_size":"107369988096","lv_tags":"ceph.block_device=/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.block_uuid=gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=83231340-7cd4-11ef-ab48-525400e54507,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.db_device=/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb,ceph.db_uuid=wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2,ceph.encrypted=0,ceph.osd_fsid=8cf28853-3453-49b0-a3f9-a693443ed75f,ceph.osd_id=5,ceph.osdspec_affinity=osd.shared_db_wal,ceph.type=db,ceph.vdo=0,ceph.wal_device=/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea,ceph.wal_uuid=DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz,ceph.with_tpm=0","lv_uuid":"wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2","name":"osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","path":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","tags":{"ceph.block_device":"/dev/ceph-84a4ccfc-80f1-4784-9558-a9a08b15a351/osd-block-8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.block_uuid":"gmQkh2-T5i3-Kwfa-YMMO-j88X-RvDw-dx7N6E","ceph.cephx_lockbox_secret":"","ceph.cluster_fsid":"83231340-7cd4-11ef-ab48-525400e54507","ceph.cluster_name":"ceph","ceph.crush_device_class":"","ceph.db_device":"/dev/ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452/osd-db-635f592b-1d4f-4117-aaa6-b68878f84dfb","ceph.db_uuid":"wf407q-HwuD-OWhh-xm2A-d2sv-Fdsx-JqeUj2","ceph.encrypted":"0","ceph.osd_fsid":"8cf28853-3453-49b0-a3f9-a693443ed75f","ceph.osd_id":"5","ceph.osdspec_affinity":"osd.shared_db_wal","ceph.type":"db","ceph.vdo":"0","ceph.wal_device":"/dev/ceph-776f980b-152a-4e8f-99b6-bae27ed0b528/osd-wal-90739e2d-ec18-4761-8290-1ad508ecbeea","ceph.wal_uuid":"DFQDJy-6bE0-iagr-hgmh-oUEH-HF2R-ILBzzz","ceph.with_tpm":"0"},"type":"db","vg_name":"ceph-8da158be-4d0d-41bd-86ef-d75dbfc71452"}]}'
diff --git a/src/pybind/mgr/cephadm/tests/conftest.py b/src/pybind/mgr/cephadm/tests/conftest.py
index e8add2c7b834a..5cc2fabaf49b6 100644
--- a/src/pybind/mgr/cephadm/tests/conftest.py
+++ b/src/pybind/mgr/cephadm/tests/conftest.py
@@ -1,13 +1,14 @@
 import pytest
 
 from cephadm.services.osd import RemoveUtil, OSD
-from tests import mock
-
+from mock import mock
 from .fixtures import with_cephadm_module
+from cephadm import CephadmOrchestrator
+from typing import Generator
 
 
 @pytest.fixture()
-def cephadm_module():
+def cephadm_module() -> Generator[CephadmOrchestrator, None, None]:
     with with_cephadm_module({}) as m:
         yield m
 
diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py
index dd858c6c7dabe..dda0c6720ac6c 100644
--- a/src/pybind/mgr/cephadm/tests/fixtures.py
+++ b/src/pybind/mgr/cephadm/tests/fixtures.py
@@ -35,11 +35,11 @@ def get_module_option_ex(_, module, key, default=None):
     return None
 
 
-def _run_cephadm(ret):
+def _run_cephadm(ret, rc: int = 0):
     async def foo(s, host, entity, cmd, e, **kwargs):
         if cmd == 'gather-facts':
             return '{}', '', 0
-        return [ret], '', 0
+        return [ret], '', rc
     return foo
 
 
diff --git a/src/pybind/mgr/cephadm/tests/test_ceph_volume.py b/src/pybind/mgr/cephadm/tests/test_ceph_volume.py
new file mode 100644
index 0000000000000..cc1378a75753c
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_ceph_volume.py
@@ -0,0 +1,231 @@
+import json
+import pytest
+from .ceph_volume_data import data
+from cephadm.serve import CephadmServe
+from cephadm import CephadmOrchestrator
+from mock import patch
+from .fixtures import _run_cephadm, with_host
+
+
+class TestCephVolume:
+    def test_run(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('fake-output', 0)):
+                    c = cephadm_module.ceph_volume.run('test', ['/bin/foo'])
+                assert c == (['fake-output'], '', 0)
+
+    def test_run_json(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('{"this-is-a-fake-key": "this-is-a-fake-value"}', 0)):
+                    c = cephadm_module.ceph_volume.run_json('test', ['/bin/foo'])
+                assert c == {"this-is-a-fake-key": "this-is-a-fake-value"}
+
+    def test_clear_replace_header_ok(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('fake-output', 0)):
+                    c = cephadm_module.ceph_volume.clear_replace_header('test', '/dev/foo')
+                assert c == 'Replacement header cleared on /dev/foo'
+
+    def test_clear_replace_header_nok(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm('', 1)):
+                    c = cephadm_module.ceph_volume.clear_replace_header('fake-output', '/dev/foo')
+                assert c.strip() == 'No replacement header could be cleared on /dev/foo.'
+
+
+class TestCephVolumeList:
+    def test_get_data(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.data == json.loads(data)
+
+    def test_devices_by_type_block(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.devices_by_type('block')) == set(['/dev/vdb',
+                                                                                                     '/dev/vdc',
+                                                                                                     '/dev/vdg',
+                                                                                                     '/dev/vde',
+                                                                                                     '/dev/vdf',
+                                                                                                     '/dev/vdh'])
+
+    def test_devices_by_type_db(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.devices_by_type('db')) == set(['/dev/vdi',
+                                                                                                  '/dev/vdk'])
+
+    def test_devices_by_type_wal(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.devices_by_type('wal') == ['/dev/vdj']
+
+    def test_block_devices(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.block_devices()) == set(['/dev/vdb',
+                                                                                            '/dev/vdc',
+                                                                                            '/dev/vdg',
+                                                                                            '/dev/vde',
+                                                                                            '/dev/vdf',
+                                                                                            '/dev/vdh'])
+
+    def test_db_devices(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.db_devices()) == set(['/dev/vdk',
+                                                                                         '/dev/vdi'])
+
+    def test_wal_devices(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.wal_devices()) == set(['/dev/vdj'])
+
+    def test_all_devices(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.all_devices()) == set(['/dev/vdg',
+                                                                                          '/dev/vdj',
+                                                                                          '/dev/vdh',
+                                                                                          '/dev/vdi',
+                                                                                          '/dev/vdc',
+                                                                                          '/dev/vde',
+                                                                                          '/dev/vdf',
+                                                                                          '/dev/vdb',
+                                                                                          '/dev/vdk'])
+
+    def test_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.device_osd_mapping() == {'/dev/vdb': {'osd_ids': ['0']},
+                                                                                        '/dev/vdk': {'osd_ids': ['0', '1']},
+                                                                                        '/dev/vdc': {'osd_ids': ['1']},
+                                                                                        '/dev/vdf': {'osd_ids': ['2']},
+                                                                                        '/dev/vde': {'osd_ids': ['3']},
+                                                                                        '/dev/vdg': {'osd_ids': ['4']},
+                                                                                        '/dev/vdj': {'osd_ids': ['4', '5']},
+                                                                                        '/dev/vdi': {'osd_ids': ['4', '5']},
+                                                                                        '/dev/vdh': {'osd_ids': ['5']}}
+
+    def test_block_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.block_device_osd_mapping() == {'/dev/vdb': {'osd_ids': ['0']},
+                                                                                              '/dev/vdc': {'osd_ids': ['1']},
+                                                                                              '/dev/vdf': {'osd_ids': ['2']},
+                                                                                              '/dev/vde': {'osd_ids': ['3']},
+                                                                                              '/dev/vdg': {'osd_ids': ['4']},
+                                                                                              '/dev/vdh': {'osd_ids': ['5']}}
+
+    def test_db_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.db_device_osd_mapping() == {'/dev/vdk': {'osd_ids': ['0', '1']},
+                                                                                           '/dev/vdi': {'osd_ids': ['4', '5']}}
+
+    def test_wal_device_osd_mapping(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.wal_device_osd_mapping() == {'/dev/vdj': {'osd_ids': ['4', '5']}}
+
+    def test_is_shared_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.is_shared_device('/dev/vdj')
+
+    def test_is_shared_device_with_invalid_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    with pytest.raises(RuntimeError) as e:
+                        assert cephadm_module.ceph_volume.lvm_list.is_shared_device('/dev/invalid-device')
+                    assert str(e.value) == 'Not a valid device path.'
+
+    def test_is_block_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.is_block_device('/dev/vdb')
+
+    def test_is_db_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.is_db_device('/dev/vdk')
+
+    def test_is_wal_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.is_wal_device('/dev/vdj')
+
+    def test_get_block_devices_from_osd_id(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert cephadm_module.ceph_volume.lvm_list.get_block_devices_from_osd_id('0') == ['/dev/vdb']
+
+    def test_osd_ids(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    cephadm_module.ceph_volume.lvm_list.get_data('test')
+                    assert set(cephadm_module.ceph_volume.lvm_list.osd_ids()) == set(['0', '1', '2', '3', '4', '5'])
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index 5a485f98be390..975c125225dc8 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -2040,7 +2040,7 @@ def test_blink_device_light_custom_per_host(self, _run_cephadm, cephadm_module):
             ), CephadmOrchestrator.apply_iscsi),
             (CustomContainerSpec(
                 service_id='hello-world',
-                image='docker.io/library/hello-world:latest',
+                image='quay.io/hello-world/hello-world:latest',
                 uid=65534,
                 gid=65534,
                 dirs=['foo/bar'],
diff --git a/src/pybind/mgr/cephadm/tests/test_replace_device.py b/src/pybind/mgr/cephadm/tests/test_replace_device.py
new file mode 100644
index 0000000000000..b4a2c81ad9a76
--- /dev/null
+++ b/src/pybind/mgr/cephadm/tests/test_replace_device.py
@@ -0,0 +1,53 @@
+import pytest
+from mock import patch
+from .fixtures import _run_cephadm, with_host, wait
+from .ceph_volume_data import data
+from cephadm.serve import CephadmServe
+from cephadm import CephadmOrchestrator
+from orchestrator import OrchestratorError
+
+
+class TestReplaceDevice:
+    def test_invalid_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    with pytest.raises(OrchestratorError) as e:
+                        cephadm_module.replace_device('test', '/dev/invalid-device')
+                    assert "/dev/invalid-device doesn't appear to be used for an OSD, not a valid device in test." in str(e.value)
+
+    def test_invalid_hostname(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    with pytest.raises(OrchestratorError):
+                        cephadm_module.replace_device('invalid-hostname', '/dev/vdb')
+
+    def test_block_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    c = cephadm_module.replace_device('test', '/dev/vdb')
+                    result = wait(cephadm_module, c)
+                    assert result == "Scheduled to destroy osds: ['0'] and mark /dev/vdb as being replaced."
+
+    def test_shared_db_device_no_ireallymeanit_flag(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    with pytest.raises(OrchestratorError) as e:
+                        cephadm_module.replace_device('test', '/dev/vdk')
+                    assert "/dev/vdk is a shared device.\nReplacing /dev/vdk implies destroying OSD(s): ['0', '1'].\nPlease, *be very careful*, this can be a very dangerous operation.\nIf you know what you are doing, pass --yes-i-really-mean-it" in str(e.value)
+
+    def test_shared_db_device(self, cephadm_module: CephadmOrchestrator) -> None:
+        with patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('[]')):
+            with with_host(cephadm_module, 'test'):
+                CephadmServe(cephadm_module)._refresh_host_daemons('test')
+                with patch('cephadm.serve.CephadmServe._run_cephadm', _run_cephadm(data)):
+                    c = cephadm_module.replace_device('test', '/dev/vdk', yes_i_really_mean_it=True)
+                    result = wait(cephadm_module, c)
+                    assert result == "Scheduled to destroy osds: ['0', '1'] and mark /dev/vdk as being replaced."
diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py
index a9b7da624a0e6..4b11a588ad393 100644
--- a/src/pybind/mgr/cephadm/tests/test_services.py
+++ b/src/pybind/mgr/cephadm/tests/test_services.py
@@ -49,9 +49,9 @@
 
 cephadm_root_ca = """-----BEGIN CERTIFICATE-----\\nMIIE7DCCAtSgAwIBAgIUE8b2zZ64geu2ns3Zfn3/4L+Cf6MwDQYJKoZIhvcNAQEL\\nBQAwFzEVMBMGA1UEAwwMY2VwaGFkbS1yb290MB4XDTI0MDYyNjE0NDA1M1oXDTM0\\nMDYyNzE0NDA1M1owFzEVMBMGA1UEAwwMY2VwaGFkbS1yb290MIICIjANBgkqhkiG\\n9w0BAQEFAAOCAg8AMIICCgKCAgEAsZRJsdtTr9GLG1lWFql5SGc46ldFanNJd1Gl\\nqXq5vgZVKRDTmNgAb/XFuNEEmbDAXYIRZolZeYKMHfn0pouPRSel0OsC6/02ZUOW\\nIuN89Wgo3IYleCFpkVIumD8URP3hwdu85plRxYZTtlruBaTRH38lssyCqxaOdEt7\\nAUhvYhcMPJThB17eOSQ73mb8JEC83vB47fosI7IhZuvXvRSuZwUW30rJanWNhyZq\\neS2B8qw2RSO0+77H6gA4ftBnitfsE1Y8/F9Z/f92JOZuSMQXUB07msznPbRJia3f\\nueO8gOc32vxd1A1/Qzp14uX34yEGY9ko2lW226cZO29IVUtXOX+LueQttwtdlpz8\\ne6Npm09pXhXAHxV/OW3M28MdXmobIqT/m9MfkeAErt5guUeC5y8doz6/3VQRjFEn\\nRpN0WkblgnNAQ3DONPc+Qd9Fi/wZV2X7bXoYpNdoWDsEOiE/eLmhG1A2GqU/mneP\\nzQ6u79nbdwTYpwqHpa+PvusXeLfKauzI8lLUJotdXy9EK8iHUofibB61OljYye6B\\nG3b8C4QfGsw8cDb4APZd/6AZYyMx/V3cGZ+GcOV7WvsC8k7yx5Uqasm/kiGQ3EZo\\nuNenNEYoGYrjb8D/8QzqNUTwlEh27/ps80tO7l2GGTvWVZL0PRZbmLDvO77amtOf\\nOiRXMoUCAwEAAaMwMC4wGwYDVR0RBBQwEocQAAAAAAAAAAAAAAAAAAAAATAPBgNV\\nHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQAxwzX5AhYEWhTV4VUwUj5+\\nqPdl4Q2tIxRokqyE+cDxoSd+6JfGUefUbNyBxDt0HaBq8obDqqrbcytxnn7mpnDu\\nhtiauY+I4Amt7hqFOiFA4cCLi2mfok6g2vL53tvhd9IrsfflAU2wy7hL76Ejm5El\\nA+nXlkJwps01Whl9pBkUvIbOn3pXX50LT4hb5zN0PSu957rjd2xb4HdfuySm6nW4\\n4GxtVWfmGA6zbC4XMEwvkuhZ7kD2qjkAguGDF01uMglkrkCJT3OROlNBuSTSBGqt\\ntntp5VytHvb7KTF7GttM3ha8/EU2KYaHM6WImQQTrOfiImAktOk4B3lzUZX3HYIx\\n+sByO4P4dCvAoGz1nlWYB2AvCOGbKf0Tgrh4t4jkiF8FHTXGdfvWmjgi1pddCNAy\\nn65WOCmVmLZPERAHOk1oBwqyReSvgoCFo8FxbZcNxJdlhM0Z6hzKggm3O3Dl88Xl\\n5euqJjh2STkBW8Xuowkg1TOs5XyWvKoDFAUzyzeLOL8YSG+gXV22gPTUaPSVAqdb\\nwd0Fx2kjConuC5bgTzQHs8XWA930U3XWZraj21Vaa8UxlBLH4fUro8H5lMSYlZNE\\nJHRNW8BkznAClaFSDG3dybLsrzrBFAu/Qb5zVkT1xyq0YkepGB7leXwq6vjWA5Pw\\nmZbKSphWfh0qipoqxqhfkw==\\n-----END CERTIFICATE-----\\n"""
 
-ceph_generated_cert = """-----BEGIN CERTIFICATE-----\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\n-----END CERTIFICATE-----\n"""
+ceph_generated_cert = """-----BEGIN CERTIFICATE-----\\nMIICxjCCAa4CEQDIZSujNBlKaLJzmvntjukjMA0GCSqGSIb3DQEBDQUAMCExDTAL\\nBgNVBAoMBENlcGgxEDAOBgNVBAMMB2NlcGhhZG0wHhcNMjIwNzEzMTE0NzA3WhcN\\nMzIwNzEwMTE0NzA3WjAhMQ0wCwYDVQQKDARDZXBoMRAwDgYDVQQDDAdjZXBoYWRt\\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAyyMe4DMA+MeYK7BHZMHB\\nq7zjliEOcNgxomjU8qbf5USF7Mqrf6+/87XWqj4pCyAW8x0WXEr6A56a+cmBVmt+\\nqtWDzl020aoId6lL5EgLLn6/kMDCCJLq++Lg9cEofMSvcZh+lY2f+1p+C+00xent\\nrLXvXGOilAZWaQfojT2BpRnNWWIFbpFwlcKrlg2G0cFjV5c1m6a0wpsQ9JHOieq0\\nSvwCixajwq3CwAYuuiU1wjI4oJO4Io1+g8yB3nH2Mo/25SApCxMXuXh4kHLQr/T4\\n4hqisvG4uJYgKMcSIrWj5o25mclByGi1UI/kZkCUES94i7Z/3ihx4Bad0AMs/9tw\\nFwIDAQABMA0GCSqGSIb3DQEBDQUAA4IBAQAf+pwz7Gd7mDwU2LY0TQXsK6/8KGzh\\nHuX+ErOb8h5cOAbvCnHjyJFWf6gCITG98k9nxU9NToG0WYuNm/max1y/54f0dtxZ\\npUo6KSNl3w6iYCfGOeUIj8isi06xMmeTgMNzv8DYhDt+P2igN6LenqWTVztogkiV\\nxQ5ZJFFLEw4sN0CXnrZX3t5ruakxLXLTLKeE0I91YJvjClSBGkVJq26wOKQNHMhx\\npWxeydQ5EgPZY+Aviz5Dnxe8aB7oSSovpXByzxURSabOuCK21awW5WJCGNpmqhWK\\nZzACBDEstccj57c4OGV0eayHJRsluVr2e9NHRINZA3qdB37e6gsI1xHo\\n-----END CERTIFICATE-----\\n"""
 
-ceph_generated_key = """-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\n39gnaegswnz9KMQAvzKFdg==\n-----END PRIVATE KEY-----\n"""
+ceph_generated_key = """-----BEGIN PRIVATE KEY-----\\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQDLIx7gMwD4x5gr\\nsEdkwcGrvOOWIQ5w2DGiaNTypt/lRIXsyqt/r7/ztdaqPikLIBbzHRZcSvoDnpr5\\nyYFWa36q1YPOXTbRqgh3qUvkSAsufr+QwMIIkur74uD1wSh8xK9xmH6VjZ/7Wn4L\\n7TTF6e2ste9cY6KUBlZpB+iNPYGlGc1ZYgVukXCVwquWDYbRwWNXlzWbprTCmxD0\\nkc6J6rRK/AKLFqPCrcLABi66JTXCMjigk7gijX6DzIHecfYyj/blICkLExe5eHiQ\\nctCv9PjiGqKy8bi4liAoxxIitaPmjbmZyUHIaLVQj+RmQJQRL3iLtn/eKHHgFp3Q\\nAyz/23AXAgMBAAECggEAVoTB3Mm8azlPlaQB9GcV3tiXslSn+uYJ1duCf0sV52dV\\nBzKW8s5fGiTjpiTNhGCJhchowqxoaew+o47wmGc2TvqbpeRLuecKrjScD0GkCYyQ\\neM2wlshEbz4FhIZdgS6gbuh9WaM1dW/oaZoBNR5aTYo7xYTmNNeyLA/jO2zr7+4W\\n5yES1lMSBXpKk7bDGKYY4bsX2b5RLr2Grh2u2bp7hoLABCEvuu8tSQdWXLEXWpXo\\njwmV3hc6tabypIa0mj2Dmn2Dmt1ppSO0AZWG/WAizN3f4Z0r/u9HnbVrVmh0IEDw\\n3uf2LP5o3msG9qKCbzv3lMgt9mMr70HOKnJ8ohMSKQKBgQDLkNb+0nr152HU9AeJ\\nvdz8BeMxcwxCG77iwZphZ1HprmYKvvXgedqWtS6FRU+nV6UuQoPUbQxJBQzrN1Qv\\nwKSlOAPCrTJgNgF/RbfxZTrIgCPuK2KM8I89VZv92TSGi362oQA4MazXC8RAWjoJ\\nSu1/PHzK3aXOfVNSLrOWvIYeZQKBgQD/dgT6RUXKg0UhmXj7ExevV+c7oOJTDlMl\\nvLngrmbjRgPO9VxLnZQGdyaBJeRngU/UXfNgajT/MU8B5fSKInnTMawv/tW7634B\\nw3v6n5kNIMIjJmENRsXBVMllDTkT9S7ApV+VoGnXRccbTiDapBThSGd0wri/CuwK\\nNWK1YFOeywKBgEDyI/XG114PBUJ43NLQVWm+wx5qszWAPqV/2S5MVXD1qC6zgCSv\\nG9NLWN1CIMimCNg6dm7Wn73IM7fzvhNCJgVkWqbItTLG6DFf3/DPODLx1wTMqLOI\\nqFqMLqmNm9l1Nec0dKp5BsjRQzq4zp1aX21hsfrTPmwjxeqJZdioqy2VAoGAXR5X\\nCCdSHlSlUW8RE2xNOOQw7KJjfWT+WAYoN0c7R+MQplL31rRU7dpm1bLLRBN11vJ8\\nMYvlT5RYuVdqQSP6BkrX+hLJNBvOLbRlL+EXOBrVyVxHCkDe+u7+DnC4epbn+N8P\\nLYpwqkDMKB7diPVAizIKTBxinXjMu5fkKDs5n+sCgYBbZheYKk5M0sIxiDfZuXGB\\nkf4mJdEkTI1KUGRdCwO/O7hXbroGoUVJTwqBLi1tKqLLarwCITje2T200BYOzj82\\nqwRkCXGtXPKnxYEEUOiFx9OeDrzsZV00cxsEnX0Zdj+PucQ/J3Cvd0dWUspJfLHJ\\n39gnaegswnz9KMQAvzKFdg==\\n-----END PRIVATE KEY-----\\n"""
 
 
 class FakeInventory:
@@ -602,6 +602,101 @@ def test_alertmanager_config(
                     use_current_daemon_image=False,
                 )
 
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("socket.getfqdn")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
+    @patch("cephadm.services.monitoring.password_hash", lambda password: 'alertmanager_password_hash')
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: 'cephadm_root_cert')
+    @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: ('mycert', 'mykey'))
+    def test_alertmanager_config_when_mgmt_gw_enabled(self, _get_fqdn, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(('{}', '', 0))
+
+        fqdn = 'host1.test'
+        _get_fqdn.return_value = fqdn
+
+        with with_host(cephadm_module, 'test'):
+            cephadm_module.secure_monitoring_stack = True
+            cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
+            cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+            with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
+                 with_service(cephadm_module, AlertManagerSpec()):
+
+                y = dedent("""
+                # This file is generated by cephadm.
+                # See https://prometheus.io/docs/alerting/configuration/ for documentation.
+
+                global:
+                  resolve_timeout: 5m
+                  http_config:
+                    tls_config:
+                      ca_file: root_cert.pem
+
+                route:
+                  receiver: 'default'
+                  routes:
+                    - group_by: ['alertname']
+                      group_wait: 10s
+                      group_interval: 10s
+                      repeat_interval: 1h
+                      receiver: 'ceph-dashboard'
+
+                receivers:
+                - name: 'default'
+                  webhook_configs:
+                - name: 'ceph-dashboard'
+                  webhook_configs:
+                  - url: 'https://host_fqdn:29443/internal/dashboard/api/prometheus_receiver'
+                """).lstrip()
+
+                web_config = dedent("""
+                tls_server_config:
+                  cert_file: alertmanager.crt
+                  key_file: alertmanager.key
+                  client_auth_type: RequireAndVerifyClientCert
+                  client_ca_file: root_cert.pem
+                basic_auth_users:
+                    alertmanager_user: alertmanager_password_hash
+                """).lstrip()
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "alertmanager.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'alertmanager.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9093, 9094],
+                        },
+                        "meta": {
+                            'service_name': 'alertmanager',
+                            'ports': [9093, 9094],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "alertmanager.yml": y,
+                                'alertmanager.crt': 'mycert',
+                                'alertmanager.key': 'mykey',
+                                'web.yml': web_config,
+                                'root_cert.pem': 'cephadm_root_cert'
+                            },
+                            'peers': [],
+                            'web_config': '/etc/alertmanager/web.yml',
+                            "use_url_prefix": True,
+                        }
+                    }),
+                    use_current_daemon_image=False,
+                )
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("socket.getfqdn")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
@@ -738,6 +833,110 @@ def test_ceph_exporter_config_security_enabled(self, _get_fqdn, _run_cephadm, ce
                                                             "ceph-exporter.key": "mykey"}}}),
                                                 use_current_daemon_image=False)
 
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("mgr_module.MgrModule.get")
+    @patch("socket.getfqdn")
+    def test_node_exporter_config_without_mgmt_gw(
+        self,
+        mock_getfqdn,
+        mock_get,
+        _run_cephadm,
+        cephadm_module: CephadmOrchestrator,
+    ):
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+        fqdn = 'host1.test'
+        mock_getfqdn.return_value = fqdn
+
+        with with_host(cephadm_module, "test"):
+            with with_service(cephadm_module, MonitoringSpec('node-exporter')):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "node-exporter.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'node-exporter.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9100],
+                        },
+                        "meta": {
+                            'service_name': 'node-exporter',
+                            'ports': [9100],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {}
+                    }),
+                    use_current_daemon_image=False,
+                )
+
+    @patch('cephadm.cert_mgr.CertMgr.generate_cert', lambda instance, fqdn, ip: (ceph_generated_cert, ceph_generated_key))
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("socket.getfqdn")
+    def test_node_exporter_config_with_mgmt_gw(
+        self,
+        mock_getfqdn,
+        _run_cephadm,
+        cephadm_module: CephadmOrchestrator,
+    ):
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+        mock_getfqdn.return_value = 'host1.test'
+
+        y = dedent("""
+        tls_server_config:
+          cert_file: node_exporter.crt
+          key_file: node_exporter.key
+          client_auth_type: RequireAndVerifyClientCert
+          client_ca_file: root_cert.pem
+        """).lstrip()
+
+        with with_host(cephadm_module, "test"):
+            with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
+                 with_service(cephadm_module, MonitoringSpec('node-exporter')):
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "node-exporter.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'node-exporter.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [9100],
+                        },
+                        "meta": {
+                            'service_name': 'node-exporter',
+                            'ports': [9100],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": {
+                                "web.yml": y,
+                                'root_cert.pem': f"{cephadm_root_ca}",
+                                'node_exporter.crt': f"{ceph_generated_cert}",
+                                'node_exporter.key': f"{ceph_generated_key}",
+                            },
+                            'web_config': '/etc/node-exporter/web.yml',
+                        }
+                    }),
+                    use_current_daemon_image=False,
+                )
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
     def test_prometheus_config_security_disabled(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
@@ -1240,6 +1439,286 @@ def test_promtail_config(self, _run_cephadm, cephadm_module: CephadmOrchestrator
                     use_current_daemon_image=False,
                 )
 
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+    @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn')
+    @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
+    def test_grafana_config_with_mgmt_gw_and_ouath2_proxy(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+
+        y = dedent(f"""
+             # This file is generated by cephadm.
+             apiVersion: 1
+
+             deleteDatasources:
+               - name: 'Dashboard1'
+                 orgId: 1
+
+             datasources:
+               - name: 'Dashboard1'
+                 type: 'prometheus'
+                 access: 'proxy'
+                 orgId: 1
+                 url: 'https://host_fqdn:29443/internal/prometheus'
+                 basicAuth: true
+                 isDefault: true
+                 editable: false
+                 basicAuthUser: admin
+                 jsonData:
+                    graphiteVersion: "1.1"
+                    tlsAuth: false
+                    tlsAuthWithCACert: true
+                    tlsSkipVerify: false
+                 secureJsonData:
+                   basicAuthPassword: admin
+                   tlsCACert: "{cephadm_root_ca}"
+                   tlsClientCert: "{ceph_generated_cert}"
+                   tlsClientKey: "{ceph_generated_key}"
+
+               - name: 'Loki'
+                 type: 'loki'
+                 access: 'proxy'
+                 url: ''
+                 basicAuth: false
+                 isDefault: false
+                 editable: false""").lstrip()
+
+        oauth2_spec = OAuth2ProxySpec(provider_display_name='my_idp_provider',
+                                      client_id='my_client_id',
+                                      client_secret='my_client_secret',
+                                      oidc_issuer_url='http://192.168.10.10:8888/dex',
+                                      cookie_secret='kbAEM9opAmuHskQvt0AW8oeJRaOM2BYy5Loba0kZ0SQ=',
+                                      ssl_certificate=ceph_generated_cert,
+                                      ssl_certificate_key=ceph_generated_key)
+
+        with with_host(cephadm_module, "test"):
+            cephadm_module.cert_key_store.save_cert('grafana_cert', ceph_generated_cert, host='test')
+            cephadm_module.cert_key_store.save_key('grafana_key', ceph_generated_key, host='test')
+            with with_service(cephadm_module, PrometheusSpec("prometheus")) as _, \
+                 with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
+                 with_service(cephadm_module, oauth2_spec) as _, \
+                 with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service(
+                     cephadm_module, GrafanaSpec("grafana")
+            ) as _:
+                files = {
+                    'grafana.ini': dedent("""
+                        # This file is generated by cephadm.
+                        [users]
+                          default_theme = light
+                        [auth.anonymous]
+                          enabled = true
+                          org_name = 'Main Org.'
+                          org_role = 'Viewer'
+                        [server]
+                          domain = 'host_fqdn'
+                          protocol = https
+                          cert_file = /etc/grafana/certs/cert_file
+                          cert_key = /etc/grafana/certs/cert_key
+                          http_port = 3000
+                          http_addr = 
+                          root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/
+                          serve_from_sub_path = true
+                        [snapshots]
+                          external_enabled = false
+                        [security]
+                          disable_initial_admin_creation = true
+                          cookie_secure = true
+                          cookie_samesite = none
+                          allow_embedding = true
+                        [auth]
+                          disable_login_form = true
+                        [auth.proxy]
+                          enabled = true
+                          header_name = X-WEBAUTH-USER
+                          header_property = username
+                          auto_sign_up = true
+                          sync_ttl = 15
+                          whitelist = 1::4
+                          headers_encoded = false
+                          enable_login_token = false
+                          headers = Role:X-WEBAUTH-ROLE\n""").lstrip(),  # noqa: W291
+                    "provisioning/datasources/ceph-dashboard.yml": y,
+                    'certs/cert_file': dedent(f"""
+                        # generated by cephadm\n{ceph_generated_cert}""").lstrip(),
+                    'certs/cert_key': dedent(f"""
+                        # generated by cephadm\n{ceph_generated_key}""").lstrip(),
+                    'provisioning/dashboards/default.yml': dedent("""
+                        # This file is generated by cephadm.
+                        apiVersion: 1
+
+                        providers:
+                          - name: 'Ceph Dashboard'
+                            orgId: 1
+                            folder: ''
+                            type: file
+                            disableDeletion: false
+                            updateIntervalSeconds: 3
+                            editable: false
+                            options:
+                              path: '/etc/grafana/provisioning/dashboards'""").lstrip(),
+                }
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "grafana.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'grafana.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [3000],
+                        },
+                        "meta": {
+                            'service_name': 'grafana',
+                            'ports': [3000],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": files,
+                        },
+                    }),
+                    use_current_daemon_image=False,
+                )
+
+    @patch("cephadm.serve.CephadmServe._run_cephadm")
+    @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
+    @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn')
+    @patch("cephadm.services.monitoring.verify_tls", lambda *_: None)
+    @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
+    def test_grafana_config_with_mgmt_gw(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.side_effect = async_side_effect(("{}", "", 0))
+
+        y = dedent(f"""
+             # This file is generated by cephadm.
+             apiVersion: 1
+
+             deleteDatasources:
+               - name: 'Dashboard1'
+                 orgId: 1
+
+             datasources:
+               - name: 'Dashboard1'
+                 type: 'prometheus'
+                 access: 'proxy'
+                 orgId: 1
+                 url: 'https://host_fqdn:29443/internal/prometheus'
+                 basicAuth: true
+                 isDefault: true
+                 editable: false
+                 basicAuthUser: admin
+                 jsonData:
+                    graphiteVersion: "1.1"
+                    tlsAuth: false
+                    tlsAuthWithCACert: true
+                    tlsSkipVerify: false
+                 secureJsonData:
+                   basicAuthPassword: admin
+                   tlsCACert: "{cephadm_root_ca}"
+                   tlsClientCert: "{ceph_generated_cert}"
+                   tlsClientKey: "{ceph_generated_key}"
+
+               - name: 'Loki'
+                 type: 'loki'
+                 access: 'proxy'
+                 url: ''
+                 basicAuth: false
+                 isDefault: false
+                 editable: false""").lstrip()
+
+        with with_host(cephadm_module, "test"):
+            cephadm_module.cert_key_store.save_cert('grafana_cert', ceph_generated_cert, host='test')
+            cephadm_module.cert_key_store.save_key('grafana_key', ceph_generated_key, host='test')
+            with with_service(
+                cephadm_module, PrometheusSpec("prometheus")
+            ) as _, with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
+                with_service(cephadm_module, ServiceSpec("mgr")) as _, with_service(
+                cephadm_module, GrafanaSpec("grafana")
+            ) as _:
+                files = {
+                    'grafana.ini': dedent("""
+                        # This file is generated by cephadm.
+                        [users]
+                          default_theme = light
+                        [auth.anonymous]
+                          enabled = true
+                          org_name = 'Main Org.'
+                          org_role = 'Viewer'
+                        [server]
+                          domain = 'host_fqdn'
+                          protocol = https
+                          cert_file = /etc/grafana/certs/cert_file
+                          cert_key = /etc/grafana/certs/cert_key
+                          http_port = 3000
+                          http_addr = 
+                          root_url = %(protocol)s://%(domain)s:%(http_port)s/grafana/
+                          serve_from_sub_path = true
+                        [snapshots]
+                          external_enabled = false
+                        [security]
+                          disable_initial_admin_creation = true
+                          cookie_secure = true
+                          cookie_samesite = none
+                          allow_embedding = true\n""").lstrip(),  # noqa: W291
+                    "provisioning/datasources/ceph-dashboard.yml": y,
+                    'certs/cert_file': dedent(f"""
+                        # generated by cephadm\n{ceph_generated_cert}""").lstrip(),
+                    'certs/cert_key': dedent(f"""
+                        # generated by cephadm\n{ceph_generated_key}""").lstrip(),
+                    'provisioning/dashboards/default.yml': dedent("""
+                        # This file is generated by cephadm.
+                        apiVersion: 1
+
+                        providers:
+                          - name: 'Ceph Dashboard'
+                            orgId: 1
+                            folder: ''
+                            type: file
+                            disableDeletion: false
+                            updateIntervalSeconds: 3
+                            editable: false
+                            options:
+                              path: '/etc/grafana/provisioning/dashboards'""").lstrip(),
+                }
+
+                _run_cephadm.assert_called_with(
+                    'test',
+                    "grafana.test",
+                    ['_orch', 'deploy'],
+                    [],
+                    stdin=json.dumps({
+                        "fsid": "fsid",
+                        "name": 'grafana.test',
+                        "image": '',
+                        "deploy_arguments": [],
+                        "params": {
+                            'tcp_ports': [3000],
+                        },
+                        "meta": {
+                            'service_name': 'grafana',
+                            'ports': [3000],
+                            'ip': None,
+                            'deployed_by': [],
+                            'rank': None,
+                            'rank_generation': None,
+                            'extra_container_args': None,
+                            'extra_entrypoint_args': None,
+                        },
+                        "config_blobs": {
+                            "files": files,
+                        },
+                    }),
+                    use_current_daemon_image=False,
+                )
+
     @patch("cephadm.serve.CephadmServe._run_cephadm")
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '1::4')
     @patch("cephadm.module.CephadmOrchestrator.get_fqdn", lambda a, b: 'host_fqdn')
@@ -2710,6 +3189,7 @@ def fake_keys():
             '        Enable_RQUOTA = false;\n'
             '        Protocols = 4;\n'
             '        NFS_Port = 2049;\n'
+            '        allow_set_io_flusher_fail = true;\n'
             '        HAProxy_Hosts = 192.168.122.111, 10.10.2.20, 192.168.122.222;\n'
             '}\n'
             '\n'
@@ -3296,7 +3776,7 @@ class TestMgmtGateway:
     @patch("cephadm.module.CephadmOrchestrator.get_mgr_ip", lambda _: '::1')
     @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
     @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https"))
-    def test_mgmt_gateway_config_no_auth(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator):
+    def test_mgmt_gw_config_no_auth(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator):
 
         def get_services_endpoints(name):
             if name == 'prometheus':
@@ -3417,11 +3897,11 @@ def get_services_endpoints(name):
                                                  }
 
                                                  location /grafana {
-                                                     rewrite ^/grafana/(.*) /$1 break;
                                                      proxy_pass https://grafana_servers;
                                                      # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser
                                                      # will send this header if Grafana is running on the same node as one of those services
                                                      proxy_set_header Authorization "";
+                                                     proxy_buffering off;
                                                  }
 
                                                  location /prometheus {
@@ -3446,6 +3926,9 @@ def get_services_endpoints(name):
                                              }"""),
                     "nginx_internal_server.conf": dedent("""
                                              server {
+                                                 ssl_client_certificate /etc/nginx/ssl/ca.crt;
+                                                 ssl_verify_client on;
+
                                                  listen              29443 ssl;
                                                  listen              [::]:29443 ssl;
                                                  ssl_certificate     /etc/nginx/ssl/nginx_internal.crt;
@@ -3518,7 +4001,7 @@ def get_services_endpoints(name):
     @patch('cephadm.cert_mgr.CertMgr.get_root_ca', lambda instance: cephadm_root_ca)
     @patch("cephadm.services.mgmt_gateway.get_dashboard_endpoints", lambda _: (["ceph-node-2:8443", "ceph-node-2:8443"], "https"))
     @patch("cephadm.services.mgmt_gateway.MgmtGatewayService.get_oauth2_service_url", lambda _: "https://192.168.100.102:4180")
-    def test_mgmt_gateway_config_with_auth(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator):
+    def test_mgmt_gw_config_with_auth(self, get_service_endpoints_mock: List[str], _run_cephadm, cephadm_module: CephadmOrchestrator):
 
         def get_services_endpoints(name):
             if name == 'prometheus':
@@ -3689,11 +4172,11 @@ def get_services_endpoints(name):
                                                  }
 
                                                  location /grafana {
-                                                     rewrite ^/grafana/(.*) /$1 break;
                                                      proxy_pass https://grafana_servers;
                                                      # clear any Authorization header as Prometheus and Alertmanager are using basic-auth browser
                                                      # will send this header if Grafana is running on the same node as one of those services
                                                      proxy_set_header Authorization "";
+                                                     proxy_buffering off;
                                                      auth_request /oauth2/auth;
                                                      error_page 401 = /oauth2/sign_in;
 
@@ -3760,6 +4243,9 @@ def get_services_endpoints(name):
                                              }"""),
                     "nginx_internal_server.conf": dedent("""
                                              server {
+                                                 ssl_client_certificate /etc/nginx/ssl/ca.crt;
+                                                 ssl_verify_client on;
+
                                                  listen              29443 ssl;
                                                  listen              [::]:29443 ssl;
                                                  ssl_certificate     /etc/nginx/ssl/nginx_internal.crt;
diff --git a/src/pybind/mgr/cephadm/tests/test_spec.py b/src/pybind/mgr/cephadm/tests/test_spec.py
index 78a2d73118fe7..42e590945cd96 100644
--- a/src/pybind/mgr/cephadm/tests/test_spec.py
+++ b/src/pybind/mgr/cephadm/tests/test_spec.py
@@ -130,7 +130,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "d94d7969094d",
         "container_image_id": "0881eb8f169f5556a292b4e2c01d683172b12830a62a9225a98a8e206bb734f0",
-        "container_image_name": "docker.io/prom/alertmanager:latest",
+        "container_image_name": "quay.io/prometheus/alertmanager:latest",
         "daemon_id": "ceph-001",
         "daemon_type": "alertmanager",
         "version": "0.20.0",
@@ -145,7 +145,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "c4b036202241",
         "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
-        "container_image_name": "docker.io/ceph/ceph:v15",
+        "container_image_name": "quay.io/ceph/ceph:v15",
         "daemon_id": "ceph-001",
         "daemon_type": "crash",
         "version": "15.2.0",
@@ -160,7 +160,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "5b7b94b48f31",
         "container_image_id": "87a51ecf0b1c9a7b187b21c1b071425dafea0d765a96d5bc371c791169b3d7f4",
-        "container_image_name": "docker.io/ceph/ceph-grafana:latest",
+        "container_image_name": "quay.io/ceph/ceph-grafana:latest",
         "daemon_id": "ceph-001",
         "daemon_type": "grafana",
         "version": "6.6.2",
@@ -175,7 +175,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "9ca007280456",
         "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
-        "container_image_name": "docker.io/ceph/ceph:v15",
+        "container_image_name": "quay.io/ceph/ceph:v15",
         "daemon_id": "ceph-001.gkjwqp",
         "daemon_type": "mgr",
         "version": "15.2.0",
@@ -190,7 +190,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "3d1ba9a2b697",
         "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
-        "container_image_name": "docker.io/ceph/ceph:v15",
+        "container_image_name": "quay.io/ceph/ceph:v15",
         "daemon_id": "ceph-001",
         "daemon_type": "mon",
         "version": "15.2.0",
@@ -205,7 +205,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "36d026c68ba1",
         "container_image_id": "e5a616e4b9cf68dfcad7782b78e118be4310022e874d52da85c55923fb615f87",
-        "container_image_name": "docker.io/prom/node-exporter:latest",
+        "container_image_name": "quay.io/prometheus/node-exporter:latest",
         "daemon_id": "ceph-001",
         "daemon_type": "node-exporter",
         "version": "0.18.1",
@@ -220,7 +220,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "faf76193cbfe",
         "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
-        "container_image_name": "docker.io/ceph/ceph:v15",
+        "container_image_name": "quay.io/ceph/ceph:v15",
         "daemon_id": "0",
         "daemon_type": "osd",
         "version": "15.2.0",
@@ -235,7 +235,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "f82505bae0f1",
         "container_image_id": "204a01f9b0b6710dd0c0af7f37ce7139c47ff0f0105d778d7104c69282dfbbf1",
-        "container_image_name": "docker.io/ceph/ceph:v15",
+        "container_image_name": "quay.io/ceph/ceph:v15",
         "daemon_id": "1",
         "daemon_type": "osd",
         "version": "15.2.0",
@@ -250,7 +250,7 @@ def convert_to_old_style_json(j):
         "hostname": "ceph-001",
         "container_id": "2708d84cd484",
         "container_image_id": "358a0d2395fe711bb8258e8fb4b2d7865c0a9a6463969bcd1452ee8869ea6653",
-        "container_image_name": "docker.io/prom/prometheus:latest",
+        "container_image_name": "quay.io/prom/prometheus:latest",
         "daemon_id": "ceph-001",
         "daemon_type": "prometheus",
         "version": "2.17.1",
@@ -569,7 +569,7 @@ def convert_to_old_style_json(j):
         CustomContainerSpec(
             service_type='container',
             service_id='hello-world',
-            image='docker.io/library/hello-world:latest',
+            image='quay.io/hello-world/hello-world:latest',
         ),
         DaemonDescription(
             daemon_type='container',
diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py
index d8ffab2da5187..ed3d26807e5ce 100644
--- a/src/pybind/mgr/cephadm/upgrade.py
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -29,17 +29,17 @@
 def normalize_image_digest(digest: str, default_registry: str) -> str:
     """
     Normal case:
-    >>> normalize_image_digest('ceph/ceph', 'docker.io')
-    'docker.io/ceph/ceph'
+    >>> normalize_image_digest('ceph/ceph', 'quay.io')
+    'quay.io/ceph/ceph'
 
     No change:
-    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'docker.io')
+    >>> normalize_image_digest('quay.ceph.io/ceph/ceph', 'quay.io')
     'quay.ceph.io/ceph/ceph'
 
-    >>> normalize_image_digest('docker.io/ubuntu', 'docker.io')
-    'docker.io/ubuntu'
+    >>> normalize_image_digest('quay.io/centos', 'quay.io')
+    'quay.io/centos'
 
-    >>> normalize_image_digest('localhost/ceph', 'docker.io')
+    >>> normalize_image_digest('localhost/ceph', 'quay.io')
     'localhost/ceph'
     """
     known_shortnames = [
diff --git a/src/pybind/mgr/dashboard/controllers/nvmeof.py b/src/pybind/mgr/dashboard/controllers/nvmeof.py
index 757b9e8ac02cf..519c310a98bcc 100644
--- a/src/pybind/mgr/dashboard/controllers/nvmeof.py
+++ b/src/pybind/mgr/dashboard/controllers/nvmeof.py
@@ -63,7 +63,10 @@ def list(self, gw_group: Optional[str] = None):
 
         @EndpointDoc(
             "Get information from a specific NVMeoF subsystem",
-            parameters={"nqn": Param(str, "NVMeoF subsystem NQN")},
+            parameters={
+                "nqn": Param(str, "NVMeoF subsystem NQN"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
+            },
         )
         @map_model(model.Subsystem, first="subsystems")
         @handle_nvmeof_error
@@ -78,6 +81,7 @@ def get(self, nqn: str, gw_group: Optional[str] = None):
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "max_namespaces": Param(int, "Maximum number of namespaces", True, 1024),
                 "enable_ha": Param(bool, "Enable high availability"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
@@ -95,6 +99,7 @@ def create(self, nqn: str, enable_ha: bool, max_namespaces: int = 1024,
             parameters={
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "force": Param(bool, "Force delete", "false"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
@@ -111,12 +116,15 @@ def delete(self, nqn: str, force: Optional[str] = "false", gw_group: Optional[st
     class NVMeoFListener(RESTController):
         @EndpointDoc(
             "List all NVMeoF listeners",
-            parameters={"nqn": Param(str, "NVMeoF subsystem NQN")},
+            parameters={
+                "nqn": Param(str, "NVMeoF subsystem NQN"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
+            },
         )
         @map_collection(model.Listener, pick="listeners")
         @handle_nvmeof_error
-        def list(self, nqn: str):
-            return NVMeoFClient().stub.list_listeners(
+        def list(self, nqn: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.list_listeners(
                 NVMeoFClient.pb2.list_listeners_req(subsystem=nqn)
             )
 
@@ -128,6 +136,7 @@ def list(self, nqn: str):
                 "traddr": Param(str, "NVMeoF transport address"),
                 "trsvcid": Param(int, "NVMeoF transport service port", True, 4420),
                 "adrfam": Param(int, "NVMeoF address family (0 - IPv4, 1 - IPv6)", True, 0),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
@@ -138,9 +147,10 @@ def create(
             host_name: str,
             traddr: str,
             trsvcid: int = 4420,
-            adrfam: int = 0,  # IPv4
+            adrfam: int = 0,  # IPv4,
+            gw_group: Optional[str] = None
         ):
-            return NVMeoFClient(traddr=traddr).stub.create_listener(
+            return NVMeoFClient(gw_group=gw_group, traddr=traddr).stub.create_listener(
                 NVMeoFClient.pb2.create_listener_req(
                     nqn=nqn,
                     host_name=host_name,
@@ -158,6 +168,7 @@ def create(
                 "traddr": Param(str, "NVMeoF transport address"),
                 "trsvcid": Param(int, "NVMeoF transport service port", True, 4420),
                 "adrfam": Param(int, "NVMeoF address family (0 - IPv4, 1 - IPv6)", True, 0),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
@@ -170,8 +181,9 @@ def delete(
             trsvcid: int = 4420,
             adrfam: int = 0,  # IPv4
             force: bool = False,
+            gw_group: Optional[str] = None
         ):
-            return NVMeoFClient().stub.delete_listener(
+            return NVMeoFClient(gw_group=gw_group, traddr=traddr).stub.delete_listener(
                 NVMeoFClient.pb2.delete_listener_req(
                     nqn=nqn,
                     host_name=host_name,
@@ -187,12 +199,15 @@ def delete(
     class NVMeoFNamespace(RESTController):
         @EndpointDoc(
             "List all NVMeoF namespaces in a subsystem",
-            parameters={"nqn": Param(str, "NVMeoF subsystem NQN")},
+            parameters={
+                "nqn": Param(str, "NVMeoF subsystem NQN"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
+            },
         )
         @map_collection(model.Namespace, pick="namespaces")
         @handle_nvmeof_error
-        def list(self, nqn: str):
-            return NVMeoFClient().stub.list_namespaces(
+        def list(self, nqn: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.list_namespaces(
                 NVMeoFClient.pb2.list_namespaces_req(subsystem=nqn)
             )
 
@@ -201,12 +216,13 @@ def list(self, nqn: str):
             parameters={
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "nsid": Param(str, "NVMeoF Namespace ID"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @map_model(model.Namespace, first="namespaces")
         @handle_nvmeof_error
-        def get(self, nqn: str, nsid: str):
-            return NVMeoFClient().stub.list_namespaces(
+        def get(self, nqn: str, nsid: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.list_namespaces(
                 NVMeoFClient.pb2.list_namespaces_req(subsystem=nqn, nsid=int(nsid))
             )
 
@@ -217,12 +233,13 @@ def get(self, nqn: str, nsid: str):
             parameters={
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "nsid": Param(str, "NVMeoF Namespace ID"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @map_model(model.NamespaceIOStats)
         @handle_nvmeof_error
-        def io_stats(self, nqn: str, nsid: str):
-            return NVMeoFClient().stub.namespace_get_io_stats(
+        def io_stats(self, nqn: str, nsid: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.namespace_get_io_stats(
                 NVMeoFClient.pb2.namespace_get_io_stats_req(
                     subsystem_nqn=nqn, nsid=int(nsid))
             )
@@ -237,6 +254,7 @@ def io_stats(self, nqn: str, nsid: str):
                 "size": Param(int, "RBD image size"),
                 "block_size": Param(int, "NVMeoF namespace block size"),
                 "load_balancing_group": Param(int, "Load balancing group"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @map_model(model.NamespaceCreation)
@@ -250,8 +268,9 @@ def create(
             size: Optional[int] = 1024,
             block_size: int = 512,
             load_balancing_group: Optional[int] = None,
+            gw_group: Optional[str] = None,
         ):
-            return NVMeoFClient().stub.namespace_add(
+            return NVMeoFClient(gw_group=gw_group).stub.namespace_add(
                 NVMeoFClient.pb2.namespace_add_req(
                     subsystem_nqn=nqn,
                     rbd_image_name=rbd_image_name,
@@ -274,6 +293,7 @@ def create(
                 "rw_mbytes_per_second": Param(int, "Read/Write MB/s"),
                 "r_mbytes_per_second": Param(int, "Read MB/s"),
                 "w_mbytes_per_second": Param(int, "Write MB/s"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
@@ -288,12 +308,13 @@ def update(
             rw_mbytes_per_second: Optional[int] = None,
             r_mbytes_per_second: Optional[int] = None,
             w_mbytes_per_second: Optional[int] = None,
+            gw_group: Optional[str] = None
         ):
             if rbd_image_size:
                 mib = 1024 * 1024
                 new_size_mib = int((rbd_image_size + mib - 1) / mib)
 
-                response = NVMeoFClient().stub.namespace_resize(
+                response = NVMeoFClient(gw_group=gw_group).stub.namespace_resize(
                     NVMeoFClient.pb2.namespace_resize_req(
                         subsystem_nqn=nqn, nsid=int(nsid), new_size=new_size_mib
                     )
@@ -336,12 +357,13 @@ def update(
             parameters={
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "nsid": Param(str, "NVMeoF Namespace ID"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
         @handle_nvmeof_error
-        def delete(self, nqn: str, nsid: str):
-            return NVMeoFClient().stub.namespace_delete(
+        def delete(self, nqn: str, nsid: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.namespace_delete(
                 NVMeoFClient.pb2.namespace_delete_req(subsystem_nqn=nqn, nsid=int(nsid))
             )
 
@@ -351,7 +373,10 @@ def delete(self, nqn: str, nsid: str):
     class NVMeoFHost(RESTController):
         @EndpointDoc(
             "List all allowed hosts for an NVMeoF subsystem",
-            parameters={"nqn": Param(str, "NVMeoF subsystem NQN")},
+            parameters={
+                "nqn": Param(str, "NVMeoF subsystem NQN"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
+            },
         )
         @map_collection(
             model.Host,
@@ -362,8 +387,8 @@ class NVMeoFHost(RESTController):
             else o,
         )
         @handle_nvmeof_error
-        def list(self, nqn: str):
-            return NVMeoFClient().stub.list_hosts(
+        def list(self, nqn: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.list_hosts(
                 NVMeoFClient.pb2.list_hosts_req(subsystem=nqn)
             )
 
@@ -372,12 +397,13 @@ def list(self, nqn: str):
             parameters={
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "host_nqn": Param(str, 'NVMeoF host NQN. Use "*" to allow any host.'),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
         @handle_nvmeof_error
-        def create(self, nqn: str, host_nqn: str):
-            return NVMeoFClient().stub.add_host(
+        def create(self, nqn: str, host_nqn: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.add_host(
                 NVMeoFClient.pb2.add_host_req(subsystem_nqn=nqn, host_nqn=host_nqn)
             )
 
@@ -386,12 +412,13 @@ def create(self, nqn: str, host_nqn: str):
             parameters={
                 "nqn": Param(str, "NVMeoF subsystem NQN"),
                 "host_nqn": Param(str, 'NVMeoF host NQN. Use "*" to disallow any host.'),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
             },
         )
         @empty_response
         @handle_nvmeof_error
-        def delete(self, nqn: str, host_nqn: str):
-            return NVMeoFClient().stub.remove_host(
+        def delete(self, nqn: str, host_nqn: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.remove_host(
                 NVMeoFClient.pb2.remove_host_req(subsystem_nqn=nqn, host_nqn=host_nqn)
             )
 
@@ -400,12 +427,15 @@ def delete(self, nqn: str, host_nqn: str):
     class NVMeoFConnection(RESTController):
         @EndpointDoc(
             "List all NVMeoF Subsystem Connections",
-            parameters={"nqn": Param(str, "NVMeoF subsystem NQN")},
+            parameters={
+                "nqn": Param(str, "NVMeoF subsystem NQN"),
+                "gw_group": Param(str, "NVMeoF gateway group", True, None),
+            },
         )
         @map_collection(model.Connection, pick="connections")
         @handle_nvmeof_error
-        def list(self, nqn: str):
-            return NVMeoFClient().stub.list_connections(
+        def list(self, nqn: str, gw_group: Optional[str] = None):
+            return NVMeoFClient(gw_group=gw_group).stub.list_connections(
                 NVMeoFClient.pb2.list_connections_req(subsystem=nqn)
             )
 
@@ -433,16 +463,17 @@ def status(self) -> dict:
                      parameters={
                          'subsystem_nqn': (str, 'Subsystem NQN'),
                          "host_nqn": Param(str, 'Comma separated list of NVMeoF host NQNs'),
+                         "gw_group": Param(str, "NVMeoF gateway group")
                      })
         @empty_response
         @handle_nvmeof_error
         @CreatePermission
-        def add(self, subsystem_nqn: str, host_nqn: str = ""):
+        def add(self, subsystem_nqn: str, gw_group: str, host_nqn: str = ""):
             response = None
             all_host_nqns = host_nqn.split(',')
 
             for nqn in all_host_nqns:
-                response = NVMeoFClient().stub.add_host(
+                response = NVMeoFClient(gw_group=gw_group).stub.add_host(
                     NVMeoFClient.pb2.add_host_req(subsystem_nqn=subsystem_nqn, host_nqn=nqn)
                 )
                 if response.status != 0:
@@ -454,16 +485,17 @@ def add(self, subsystem_nqn: str, host_nqn: str = ""):
                      parameters={
                          "subsystem_nqn": Param(str, "NVMeoF subsystem NQN"),
                          "host_nqn": Param(str, 'Comma separated list of NVMeoF host NQN.'),
+                         "gw_group": Param(str, "NVMeoF gateway group")
                      })
         @empty_response
         @handle_nvmeof_error
         @DeletePermission
-        def remove(self, subsystem_nqn: str, host_nqn: str):
+        def remove(self, subsystem_nqn: str, host_nqn: str, gw_group: str):
             response = None
             to_delete_nqns = host_nqn.split(',')
 
             for del_nqn in to_delete_nqns:
-                response = NVMeoFClient().stub.remove_host(
+                response = NVMeoFClient(gw_group=gw_group).stub.remove_host(
                     NVMeoFClient.pb2.remove_host_req(subsystem_nqn=subsystem_nqn, host_nqn=del_nqn)
                 )
                 if response.status != 0:
diff --git a/src/pybind/mgr/dashboard/controllers/osd.py b/src/pybind/mgr/dashboard/controllers/osd.py
index c9d1417720005..07d8db7755b8a 100644
--- a/src/pybind/mgr/dashboard/controllers/osd.py
+++ b/src/pybind/mgr/dashboard/controllers/osd.py
@@ -5,12 +5,14 @@
 import time
 from typing import Any, Dict, List, Optional, Union
 
+import cherrypy
 from ceph.deployment.drive_group import DriveGroupSpec, DriveGroupValidationError  # type: ignore
 from mgr_util import get_most_recent_rate
 
 from .. import mgr
 from ..exceptions import DashboardException
 from ..security import Scope
+from ..services._paginate import ListPaginator
 from ..services.ceph_service import CephService, SendCommandError
 from ..services.exception import handle_orchestrator_error, handle_send_command_error
 from ..services.orchestrator import OrchClient, OrchFeature
@@ -121,8 +123,30 @@ def osd_task(name, metadata, wait_for=2.0):
 @APIRouter('/osd', Scope.OSD)
 @APIDoc('OSD management API', 'OSD')
 class Osd(RESTController):
-    def list(self):
-        osds = self.get_osd_map()
+    @RESTController.MethodMap(version=APIVersion(1, 1))
+    def list(self, offset: int = 0, limit: int = 10,
+             search: str = '', sort: str = ''):
+        all_osds = self.get_osd_map()
+
+        paginator = ListPaginator(int(offset), int(limit), sort, search,
+                                  input_list=all_osds.values(),
+                                  searchable_params=['id'],
+                                  sortable_params=['id'],
+                                  default_sort='+id')
+
+        cherrypy.response.headers['X-Total-Count'] = paginator.get_count()
+
+        paginated_osds_list = list(paginator.list())
+        # creating a dictionary to have faster lookups
+        paginated_osds_by_id = {osd['id']: osd for osd in paginated_osds_list}
+        try:
+            osds = {
+                key: paginated_osds_by_id[int(key)]
+                for key in all_osds.keys()
+                if int(key) in paginated_osds_by_id
+            }
+        except ValueError as e:
+            raise DashboardException(e, component='osd', http_status_code=400)
 
         # Extending by osd stats information
         for stat in mgr.get('osd_stats')['osd_stats']:
diff --git a/src/pybind/mgr/dashboard/controllers/rgw.py b/src/pybind/mgr/dashboard/controllers/rgw.py
index 8667d469060f8..2e6e466f97b19 100755
--- a/src/pybind/mgr/dashboard/controllers/rgw.py
+++ b/src/pybind/mgr/dashboard/controllers/rgw.py
@@ -162,9 +162,9 @@ class RgwMultisiteController(RESTController):
     @ReadPermission
     @allow_empty_body
     # pylint: disable=W0102,W0613
-    def get_sync_status(self):
+    def get_sync_status(self, daemon_name=None):
         multisite_instance = RgwMultisite()
-        result = multisite_instance.get_multisite_sync_status()
+        result = multisite_instance.get_multisite_sync_status(daemon_name)
         return result
 
     @Endpoint(path='/sync-policy')
@@ -176,6 +176,15 @@ def get_sync_policy(self, bucket_name='', zonegroup_name='', all_policy=None):
         if all_policy:
             sync_policy_list = []
             buckets = json.loads(RgwBucket().list(stats=False))
+            zonegroups_info = RgwMultisite().get_all_zonegroups_info()
+            default_zonegroup = ''
+            if 'zonegroups' in zonegroups_info and 'default_zonegroup' in zonegroups_info:
+                default_zonegroup = next(
+                    (zonegroup['name'] for zonegroup in zonegroups_info['zonegroups']
+                        if 'id' in zonegroup and 'name' in zonegroup
+                        and zonegroup['id'] == zonegroups_info['default_zonegroup']),
+                    ''
+                )
             for bucket in buckets:
                 sync_policy = multisite_instance.get_sync_policy(bucket, zonegroup_name)
                 for policy in sync_policy['groups']:
@@ -183,6 +192,7 @@ def get_sync_policy(self, bucket_name='', zonegroup_name='', all_policy=None):
                     sync_policy_list.append(policy)
             other_sync_policy = multisite_instance.get_sync_policy(bucket_name, zonegroup_name)
             for policy in other_sync_policy['groups']:
+                policy['zonegroup'] = default_zonegroup
                 sync_policy_list.append(policy)
             return sync_policy_list
         return multisite_instance.get_sync_policy(bucket_name, zonegroup_name)
@@ -244,11 +254,13 @@ def create_sync_pipe(self, group_id: str, pipe_id: str,
                          source_zones: Dict[str, Any],
                          destination_zones: Dict[str, Any],
                          source_bucket: str = '',
-                         destination_bucket: str = '', bucket_name: str = ''):
+                         destination_bucket: str = '', bucket_name: str = '',
+                         user: str = '', mode: str = ''):
         multisite_instance = RgwMultisite()
         return multisite_instance.create_sync_pipe(group_id, pipe_id, source_zones,
                                                    destination_zones, source_bucket,
-                                                   destination_bucket, bucket_name, True)
+                                                   destination_bucket, bucket_name, True,
+                                                   user, mode)
 
     @Endpoint(method='DELETE', path='/sync-pipe')
     @EndpointDoc("Remove the sync pipe")
@@ -256,12 +268,10 @@ def create_sync_pipe(self, group_id: str, pipe_id: str,
     def remove_sync_pipe(self, group_id: str, pipe_id: str,
                          source_zones: Optional[List[str]] = None,
                          destination_zones: Optional[List[str]] = None,
-                         destination_bucket: str = '',
                          bucket_name: str = ''):
         multisite_instance = RgwMultisite()
         return multisite_instance.remove_sync_pipe(group_id, pipe_id, source_zones,
-                                                   destination_zones, destination_bucket,
-                                                   bucket_name, True)
+                                                   destination_zones, bucket_name, True)
 
 
 @APIRouter('/rgw/daemon', Scope.RGW)
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-form/nvmeof-initiators-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-form/nvmeof-initiators-form.component.ts
index 3a143a1a8df90..32f7c76a36282 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-form/nvmeof-initiators-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-form/nvmeof-initiators-form.component.ts
@@ -10,7 +10,7 @@ import { AuthStorageService } from '~/app/shared/services/auth-storage.service';
 import { TaskWrapperService } from '~/app/shared/services/task-wrapper.service';
 import { FinishedTask } from '~/app/shared/models/finished-task';
 import { ActivatedRoute, Router } from '@angular/router';
-import { NvmeofService } from '~/app/shared/api/nvmeof.service';
+import { InitiatorRequest, NvmeofService } from '~/app/shared/api/nvmeof.service';
 
 @Component({
   selector: 'cd-nvmeof-initiators-form',
@@ -26,6 +26,7 @@ export class NvmeofInitiatorsFormComponent implements OnInit {
   remove: boolean = false;
   subsystemNQN: string;
   removeHosts: { name: string; value: boolean; id: number }[] = [];
+  group: string;
 
   constructor(
     private authStorageService: AuthStorageService,
@@ -52,6 +53,9 @@ export class NvmeofInitiatorsFormComponent implements OnInit {
   );
 
   ngOnInit() {
+    this.route.queryParams.subscribe((params) => {
+      this.group = params?.['group'];
+    });
     this.createForm();
     this.action = this.actionLabels.ADD;
     this.route.params.subscribe((params: { subsystem_nqn: string }) => {
@@ -108,8 +112,9 @@ export class NvmeofInitiatorsFormComponent implements OnInit {
     const hosts: string[] = this.addedHosts.value;
     let taskUrl = `nvmeof/initiator/${URLVerbs.ADD}`;
 
-    const request = {
-      host_nqn: hosts.join(',')
+    const request: InitiatorRequest = {
+      host_nqn: hosts.join(','),
+      gw_group: this.group
     };
 
     if (allowAnyHost) {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-list/nvmeof-initiators-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-list/nvmeof-initiators-list.component.ts
index fff38e6985a43..a5575a9c9267e 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-list/nvmeof-initiators-list.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-initiators-list/nvmeof-initiators-list.component.ts
@@ -1,4 +1,4 @@
-import { Component, Input, OnChanges, OnInit, TemplateRef, ViewChild } from '@angular/core';
+import { Component, Input, OnInit, TemplateRef, ViewChild } from '@angular/core';
 import { Router } from '@angular/router';
 import { NvmeofService } from '~/app/shared/api/nvmeof.service';
 import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component';
@@ -20,9 +20,11 @@ const BASE_URL = 'block/nvmeof/subsystems';
   templateUrl: './nvmeof-initiators-list.component.html',
   styleUrls: ['./nvmeof-initiators-list.component.scss']
 })
-export class NvmeofInitiatorsListComponent implements OnInit, OnChanges {
+export class NvmeofInitiatorsListComponent implements OnInit {
   @Input()
   subsystemNQN: string;
+  @Input()
+  group: string;
 
   @ViewChild('hostTpl', { static: true })
   hostTpl: TemplateRef<any>;
@@ -58,10 +60,10 @@ export class NvmeofInitiatorsListComponent implements OnInit, OnChanges {
         permission: 'create',
         icon: Icons.add,
         click: () =>
-          this.router.navigate([
-            BASE_URL,
-            { outlets: { modal: [URLVerbs.ADD, this.subsystemNQN, 'initiator'] } }
-          ]),
+          this.router.navigate(
+            [BASE_URL, { outlets: { modal: [URLVerbs.ADD, this.subsystemNQN, 'initiator'] } }],
+            { queryParams: { group: this.group } }
+          ),
         canBePrimary: (selection: CdTableSelection) => !selection.hasSelection
       },
       {
@@ -79,17 +81,13 @@ export class NvmeofInitiatorsListComponent implements OnInit, OnChanges {
     return this.selection.selected.findIndex((selected) => selected.nqn === '*');
   }
 
-  ngOnChanges() {
-    this.listInitiators();
-  }
-
   updateSelection(selection: CdTableSelection) {
     this.selection = selection;
   }
 
   listInitiators() {
     this.nvmeofService
-      .getInitiators(this.subsystemNQN)
+      .getInitiators(this.subsystemNQN, this.group)
       .subscribe((initiators: NvmeofSubsystemInitiator[]) => {
         this.initiators = initiators;
       });
@@ -118,7 +116,10 @@ export class NvmeofInitiatorsListComponent implements OnInit, OnChanges {
             nqn: this.subsystemNQN,
             plural: itemNames.length > 1
           }),
-          call: this.nvmeofService.removeInitiators(this.subsystemNQN, { host_nqn })
+          call: this.nvmeofService.removeInitiators(this.subsystemNQN, {
+            host_nqn,
+            gw_group: this.group
+          })
         })
     });
   }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-form/nvmeof-listeners-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-form/nvmeof-listeners-form.component.ts
index cd362bf8abe19..8310e65d203e5 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-form/nvmeof-listeners-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-form/nvmeof-listeners-form.component.ts
@@ -103,7 +103,8 @@ export class NvmeofListenersFormComponent implements OnInit {
     const host = this.listenerForm.getValue('host');
     let trsvcid = Number(this.listenerForm.getValue('trsvcid'));
     if (!trsvcid) trsvcid = 4420;
-    const request = {
+    const request: ListenerRequest = {
+      gw_group: this.group,
       host_name: host.hostname,
       traddr: host.addr,
       trsvcid
@@ -128,9 +129,7 @@ export class NvmeofListenersFormComponent implements OnInit {
           component.listenerForm.setErrors({ cdSubmitButton: true });
         },
         complete: () => {
-          this.router.navigate([this.pageURL, { outlets: { modal: null } }], {
-            queryParams: { group: this.group }
-          });
+          this.router.navigate([this.pageURL, { outlets: { modal: null } }]);
         }
       });
   }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-list/nvmeof-listeners-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-list/nvmeof-listeners-list.component.ts
index f88442e1bd619..b49adda7c1b92 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-list/nvmeof-listeners-list.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-listeners-list/nvmeof-listeners-list.component.ts
@@ -1,4 +1,4 @@
-import { Component, Input, OnChanges, OnInit } from '@angular/core';
+import { Component, Input, OnInit } from '@angular/core';
 import { Router } from '@angular/router';
 import { NvmeofService } from '~/app/shared/api/nvmeof.service';
 import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component';
@@ -21,7 +21,7 @@ const BASE_URL = 'block/nvmeof/subsystems';
   templateUrl: './nvmeof-listeners-list.component.html',
   styleUrls: ['./nvmeof-listeners-list.component.scss']
 })
-export class NvmeofListenersListComponent implements OnInit, OnChanges {
+export class NvmeofListenersListComponent implements OnInit {
   @Input()
   subsystemNQN: string;
   @Input()
@@ -76,22 +76,18 @@ export class NvmeofListenersListComponent implements OnInit, OnChanges {
         name: this.actionLabels.DELETE,
         permission: 'delete',
         icon: Icons.destroy,
-        click: () => this.deleteSubsystemModal()
+        click: () => this.deleteListenerModal()
       }
     ];
   }
 
-  ngOnChanges() {
-    this.listListeners();
-  }
-
   updateSelection(selection: CdTableSelection) {
     this.selection = selection;
   }
 
   listListeners() {
     this.nvmeofService
-      .listListeners(this.subsystemNQN)
+      .listListeners(this.subsystemNQN, this.group)
       .subscribe((listResponse: NvmeofListener[]) => {
         this.listeners = listResponse.map((listener, index) => {
           listener['id'] = index;
@@ -101,7 +97,7 @@ export class NvmeofListenersListComponent implements OnInit, OnChanges {
       });
   }
 
-  deleteSubsystemModal() {
+  deleteListenerModal() {
     const listener = this.selection.first();
     this.modalService.show(CriticalConfirmationModalComponent, {
       itemDescription: 'Listener',
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-form/nvmeof-namespaces-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-form/nvmeof-namespaces-form.component.ts
index f5721e11ab6d3..b65ad62bdb4b1 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-form/nvmeof-namespaces-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-form/nvmeof-namespaces-form.component.ts
@@ -41,6 +41,7 @@ export class NvmeofNamespacesFormComponent implements OnInit {
   nsid: string;
   currentBytes: number;
   invalidSizeError: boolean;
+  group: string;
 
   constructor(
     public actionLabels: ActionLabelsI18n,
@@ -62,6 +63,9 @@ export class NvmeofNamespacesFormComponent implements OnInit {
   }
 
   init() {
+    this.route.queryParams.subscribe((params) => {
+      this.group = params?.['group'];
+    });
     this.createForm();
     this.action = this.actionLabels.CREATE;
     this.route.params.subscribe((params: { subsystem_nqn: string; nsid: string }) => {
@@ -74,7 +78,7 @@ export class NvmeofNamespacesFormComponent implements OnInit {
     this.edit = true;
     this.action = this.actionLabels.EDIT;
     this.nvmeofService
-      .getNamespace(this.subsystemNQN, this.nsid)
+      .getNamespace(this.subsystemNQN, this.nsid, this.group)
       .subscribe((res: NvmeofSubsystemNamespace) => {
         const convertedSize = this.dimlessBinaryPipe.transform(res.rbd_image_size).split(' ');
         this.currentBytes = res.rbd_image_size;
@@ -120,6 +124,7 @@ export class NvmeofNamespacesFormComponent implements OnInit {
     const image_size = this.nsForm.getValue('image_size');
     const image_size_unit = this.nsForm.getValue('unit');
     const request = {} as NamespaceCreateRequest | NamespaceEditRequest;
+    request['gw_group'] = this.group;
     if (image_size) {
       const key: string = this.edit ? 'rbd_image_size' : 'size';
       const value: number = this.formatterService.toBytes(image_size + image_size_unit);
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-list/nvmeof-namespaces-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-list/nvmeof-namespaces-list.component.ts
index c40b538c82088..8f8f6eb8d0598 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-list/nvmeof-namespaces-list.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-namespaces-list/nvmeof-namespaces-list.component.ts
@@ -1,4 +1,4 @@
-import { Component, Input, OnChanges, OnInit } from '@angular/core';
+import { Component, Input, OnInit } from '@angular/core';
 import { Router } from '@angular/router';
 import { NvmeofService } from '~/app/shared/api/nvmeof.service';
 import { CriticalConfirmationModalComponent } from '~/app/shared/components/critical-confirmation-modal/critical-confirmation-modal.component';
@@ -23,9 +23,11 @@ const BASE_URL = 'block/nvmeof/subsystems';
   templateUrl: './nvmeof-namespaces-list.component.html',
   styleUrls: ['./nvmeof-namespaces-list.component.scss']
 })
-export class NvmeofNamespacesListComponent implements OnInit, OnChanges {
+export class NvmeofNamespacesListComponent implements OnInit {
   @Input()
   subsystemNQN: string;
+  @Input()
+  group: string;
 
   namespacesColumns: any;
   tableActions: CdTableAction[];
@@ -117,10 +119,10 @@ export class NvmeofNamespacesListComponent implements OnInit, OnChanges {
         permission: 'create',
         icon: Icons.add,
         click: () =>
-          this.router.navigate([
-            BASE_URL,
-            { outlets: { modal: [URLVerbs.CREATE, this.subsystemNQN, 'namespace'] } }
-          ]),
+          this.router.navigate(
+            [BASE_URL, { outlets: { modal: [URLVerbs.CREATE, this.subsystemNQN, 'namespace'] } }],
+            { queryParams: { group: this.group } }
+          ),
         canBePrimary: (selection: CdTableSelection) => !selection.hasSelection
       },
       {
@@ -128,41 +130,45 @@ export class NvmeofNamespacesListComponent implements OnInit, OnChanges {
         permission: 'update',
         icon: Icons.edit,
         click: () =>
-          this.router.navigate([
-            BASE_URL,
-            {
-              outlets: {
-                modal: [URLVerbs.EDIT, this.subsystemNQN, 'namespace', this.selection.first().nsid]
+          this.router.navigate(
+            [
+              BASE_URL,
+              {
+                outlets: {
+                  modal: [
+                    URLVerbs.EDIT,
+                    this.subsystemNQN,
+                    'namespace',
+                    this.selection.first().nsid
+                  ]
+                }
               }
-            }
-          ])
+            ],
+            { queryParams: { group: this.group } }
+          )
       },
       {
         name: this.actionLabels.DELETE,
         permission: 'delete',
         icon: Icons.destroy,
-        click: () => this.deleteSubsystemModal()
+        click: () => this.deleteNamespaceModal()
       }
     ];
   }
 
-  ngOnChanges() {
-    this.listNamespaces();
-  }
-
   updateSelection(selection: CdTableSelection) {
     this.selection = selection;
   }
 
   listNamespaces() {
     this.nvmeofService
-      .listNamespaces(this.subsystemNQN)
+      .listNamespaces(this.subsystemNQN, this.group)
       .subscribe((res: NvmeofSubsystemNamespace[]) => {
         this.namespaces = res;
       });
   }
 
-  deleteSubsystemModal() {
+  deleteNamespaceModal() {
     const namespace = this.selection.first();
     this.modalService.show(CriticalConfirmationModalComponent, {
       itemDescription: 'Namespace',
@@ -174,7 +180,7 @@ export class NvmeofNamespacesListComponent implements OnInit, OnChanges {
             nqn: this.subsystemNQN,
             nsid: namespace.nsid
           }),
-          call: this.nvmeofService.deleteNamespace(this.subsystemNQN, namespace.nsid)
+          call: this.nvmeofService.deleteNamespace(this.subsystemNQN, namespace.nsid, this.group)
         })
     });
   }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html
index 7f15a1360adc2..58a1e01a52510 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-details/nvmeof-subsystems-details.component.html
@@ -24,14 +24,18 @@
       <a ngbNavLink
          i18n>Namespaces</a>
       <ng-template ngbNavContent>
-        <cd-nvmeof-namespaces-list [subsystemNQN]="subsystemNQN"></cd-nvmeof-namespaces-list>
+        <cd-nvmeof-namespaces-list [subsystemNQN]="subsystemNQN"
+                                   [group]="group">
+        </cd-nvmeof-namespaces-list>
       </ng-template>
     </ng-container>
     <ng-container ngbNavItem="initiators">
       <a ngbNavLink
          i18n>Initiators</a>
       <ng-template ngbNavContent>
-        <cd-nvmeof-initiators-list [subsystemNQN]="subsystemNQN"></cd-nvmeof-initiators-list>
+        <cd-nvmeof-initiators-list [subsystemNQN]="subsystemNQN"
+                                   [group]="group">
+        </cd-nvmeof-initiators-list>
       </ng-template>
     </ng-container>
   </nav>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts
index f7b35a2d645ec..7e5b064f37929 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/block/nvmeof-subsystems-form/nvmeof-subsystems-form.component.ts
@@ -118,9 +118,7 @@ export class NvmeofSubsystemsFormComponent implements OnInit {
           component.subsystemForm.setErrors({ cdSubmitButton: true });
         },
         complete: () => {
-          this.router.navigate([this.pageURL, { outlets: { modal: null } }], {
-            queryParams: { group: this.group }
-          });
+          this.router.navigate([this.pageURL, { outlets: { modal: null } }]);
         }
       });
   }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html
index 5f5f91dd0ed67..a56877512f99a 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.html
@@ -6,13 +6,15 @@
        i18n>OSDs List</a>
     <ng-template ngbNavContent>
       <cd-table [data]="osds"
-                (fetchData)="getOsdList()"
+                (fetchData)="getOsdList($event)"
                 [columns]="columns"
                 selectionType="multiClick"
                 [hasDetails]="true"
                 (setExpandedRow)="setExpandedRow($event)"
                 (updateSelection)="updateSelection($event)"
-                [updateSelectionOnRefresh]="'never'">
+                [updateSelectionOnRefresh]="'never'"
+                [serverSide]="true"
+                [count]="count">
 
         <div class="table-actions">
           <cd-table-actions [permission]="permissions.osd"
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts
index 77facfe3f8510..85ea924041405 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.spec.ts
@@ -33,6 +33,8 @@ import {
 import { OsdReweightModalComponent } from '../osd-reweight-modal/osd-reweight-modal.component';
 import { OsdListComponent } from './osd-list.component';
 import { ResizeObserver as ResizeObserverPolyfill } from '@juggle/resize-observer';
+import { PaginateObservable } from '~/app/shared/api/paginate.model';
+import { Osd } from '~/app/shared/models/osd.model';
 
 describe('OsdListComponent', () => {
   let component: OsdListComponent;
@@ -141,38 +143,42 @@ describe('OsdListComponent', () => {
   });
 
   describe('getOsdList', () => {
-    let osds: any[];
+    let osds: Osd[];
     let flagsSpy: jasmine.Spy;
 
-    const createOsd = (n: number) =>
-      <Record<string, any>>{
-        in: 'in',
-        up: 'up',
-        tree: {
-          device_class: 'ssd'
-        },
-        stats_history: {
-          op_out_bytes: [
-            [n, n],
-            [n * 2, n * 2]
-          ],
-          op_in_bytes: [
-            [n * 3, n * 3],
-            [n * 4, n * 4]
-          ]
-        },
-        stats: {
-          stat_bytes_used: n * n,
-          stat_bytes: n * n * n
-        },
-        state: []
-      };
+    const createOsd = (n: number): Osd => ({
+      id: n,
+      host: {
+        id: 0,
+        name: 'test_host'
+      },
+      in: 1,
+      up: 1,
+      tree: {
+        device_class: 'ssd'
+      },
+      stats_history: {
+        op_out_bytes: [
+          [n, n],
+          [n * 2, n * 2]
+        ],
+        op_in_bytes: [
+          [n * 3, n * 3],
+          [n * 4, n * 4]
+        ]
+      },
+      stats: {
+        stat_bytes_used: n * n,
+        stat_bytes: n * n * n
+      },
+      state: []
+    });
 
     const expectAttributeOnEveryOsd = (attr: string) =>
       expect(component.osds.every((osd) => Boolean(_.get(osd, attr)))).toBeTruthy();
 
     beforeEach(() => {
-      spyOn(osdService, 'getList').and.callFake(() => of(osds));
+      spyOn(osdService, 'getList').and.callFake(() => new PaginateObservable<Osd[]>(of(osds)));
       flagsSpy = spyOn(osdService, 'getFlags').and.callFake(() => of([]));
       osds = [createOsd(1), createOsd(2), createOsd(3)];
       component.getOsdList();
@@ -556,8 +562,9 @@ describe('OsdListComponent', () => {
 
     beforeEach(() => {
       component.permissions = fakeAuthStorageService.getPermissions();
-      spyOn(osdService, 'getList').and.callFake(() => of(fakeOsds));
+      spyOn(osdService, 'getList').and.callFake(() => new PaginateObservable<Osd[]>(of(fakeOsds)));
       spyOn(osdService, 'getFlags').and.callFake(() => of([]));
+      component.getOsdList();
     });
 
     const testTableActions = async (
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts
index 103b61e79f0af..91cb0193f3cce 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-list/osd-list.component.ts
@@ -39,6 +39,8 @@ import { OsdRecvSpeedModalComponent } from '../osd-recv-speed-modal/osd-recv-spe
 import { OsdReweightModalComponent } from '../osd-reweight-modal/osd-reweight-modal.component';
 import { OsdScrubModalComponent } from '../osd-scrub-modal/osd-scrub-modal.component';
 import { ModalCdsService } from '~/app/shared/services/modal-cds.service';
+import { CdTableFetchDataContext } from '~/app/shared/models/cd-table-fetch-data-context';
+import { Osd } from '~/app/shared/models/osd.model';
 
 const BASE_URL = 'osd';
 
@@ -71,6 +73,7 @@ export class OsdListComponent extends ListWithDetails implements OnInit {
   clusterWideActions: CdTableAction[];
   icons = Icons;
   osdSettings = new OsdSettings();
+  count = 0;
 
   selection = new CdTableSelection();
   osds: any[] = [];
@@ -426,10 +429,13 @@ export class OsdListComponent extends ListWithDetails implements OnInit {
     }
   }
 
-  getOsdList() {
-    const observables = [this.osdService.getList(), this.osdService.getFlags()];
-    observableForkJoin(observables).subscribe((resp: [any[], string[]]) => {
-      this.osds = resp[0].map((osd) => {
+  getOsdList(context?: CdTableFetchDataContext) {
+    if (!context) context = new CdTableFetchDataContext();
+    const pagination_obs = this.osdService.getList(context.toParams());
+    const observables = [pagination_obs.observable, this.osdService.getFlags()];
+    observableForkJoin(observables).subscribe((resp: any) => {
+      this.osds = resp[0].map((osd: Osd) => {
+        this.count = pagination_obs.count;
         osd.collectedStates = OsdListComponent.collectStates(osd);
         osd.stats_history.out_bytes = osd.stats_history.op_out_bytes.map((i: string) => i[1]);
         osd.stats_history.in_bytes = osd.stats_history.op_in_bytes.map((i: string) => i[1]);
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts
index d3ea8c018f66a..367418c752e07 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/services/service-daemon-list/service-daemon-list.component.spec.ts
@@ -27,7 +27,7 @@ describe('ServiceDaemonListComponent', () => {
       hostname: 'osd0',
       container_id: '003c10beafc8c27b635bcdfed1ed832e4c1005be89bb1bb05ad4cc6c2b98e41b',
       container_image_id: 'e70344c77bcbf3ee389b9bf5128f635cf95f3d59e005c5d8e67fc19bcc74ed23',
-      container_image_name: 'docker.io/ceph/daemon-base:latest-master-devel',
+      container_image_name: 'quay.io/ceph/daemon-base:latest-master-devel',
       daemon_id: '3',
       daemon_type: 'osd',
       daemon_name: 'osd.3',
@@ -47,7 +47,7 @@ describe('ServiceDaemonListComponent', () => {
       hostname: 'osd0',
       container_id: 'baeec41a01374b3ed41016d542d19aef4a70d69c27274f271e26381a0cc58e7a',
       container_image_id: 'e70344c77bcbf3ee389b9bf5128f635cf95f3d59e005c5d8e67fc19bcc74ed23',
-      container_image_name: 'docker.io/ceph/daemon-base:latest-master-devel',
+      container_image_name: 'quay.io/ceph/daemon-base:latest-master-devel',
       daemon_id: '4',
       daemon_type: 'osd',
       daemon_name: 'osd.4',
@@ -63,7 +63,7 @@ describe('ServiceDaemonListComponent', () => {
       hostname: 'osd0',
       container_id: '8483de277e365bea4365cee9e1f26606be85c471e4da5d51f57e4b85a42c616e',
       container_image_id: 'e70344c77bcbf3ee389b9bf5128f635cf95f3d59e005c5d8e67fc19bcc74ed23',
-      container_image_name: 'docker.io/ceph/daemon-base:latest-master-devel',
+      container_image_name: 'quay.io/ceph/daemon-base:latest-master-devel',
       daemon_id: '5',
       daemon_type: 'osd',
       daemon_name: 'osd.5',
@@ -79,7 +79,7 @@ describe('ServiceDaemonListComponent', () => {
       hostname: 'mon0',
       container_id: '6ca0574f47e300a6979eaf4e7c283a8c4325c2235ae60358482fc4cd58844a21',
       container_image_id: 'e70344c77bcbf3ee389b9bf5128f635cf95f3d59e005c5d8e67fc19bcc74ed23',
-      container_image_name: 'docker.io/ceph/daemon-base:latest-master-devel',
+      container_image_name: 'quay.io/ceph/daemon-base:latest-master-devel',
       daemon_id: 'a',
       daemon_name: 'mon.a',
       daemon_type: 'mon',
@@ -99,7 +99,7 @@ describe('ServiceDaemonListComponent', () => {
       service_name: 'osd',
       status: {
         container_image_id: 'e70344c77bcbf3ee389b9bf5128f635cf95f3d59e005c5d8e67fc19bcc74ed23',
-        container_image_name: 'docker.io/ceph/daemon-base:latest-master-devel',
+        container_image_name: 'quay.io/ceph/daemon-base:latest-master-devel',
         size: 3,
         running: 3,
         last_refresh: '2020-02-25T04:33:26.465699'
@@ -111,7 +111,7 @@ describe('ServiceDaemonListComponent', () => {
       service_name: 'crash',
       status: {
         container_image_id: 'e70344c77bcbf3ee389b9bf5128f635cf95f3d59e005c5d8e67fc19bcc74ed23',
-        container_image_name: 'docker.io/ceph/daemon-base:latest-master-devel',
+        container_image_name: 'quay.io/ceph/daemon-base:latest-master-devel',
         size: 1,
         running: 1,
         last_refresh: '2020-02-25T04:33:26.465766'
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html
index 1a73490175db7..0da4913e9b8a4 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.html
@@ -106,7 +106,6 @@
                       [invalid]="nfsForm.controls.fsal.controls.user_id.invalid && (nfsForm.controls.fsal.controls.user_id.dirty)"
                       [invalidText]="userIdError"
                       [skeleton]="allRGWUsers === null"
-                      (valueChange)="pathChangeHandler()"
                       i18n>
             <option *ngIf="allRGWUsers === null"
                     value="">Loading...</option>
@@ -223,8 +222,6 @@
                  name="path"
                  formControlName="path"
                  [ngbTypeahead]="pathDataSource"
-                 (selectItem)="pathChangeHandler()"
-                 (blur)="pathChangeHandler()"
                  [invalid]="nfsForm.controls.path.invalid && (nfsForm.controls.path.dirty)">
         </cds-text-label>
         <ng-template #pathError>
@@ -259,8 +256,6 @@
                  name="path"
                  formControlName="path"
                  [ngbTypeahead]="bucketDataSource"
-                 (selectItem)="pathChangeHandler()"
-                 (blur)="pathChangeHandler()"
                  [invalid]="nfsForm.controls.path.invalid && (nfsForm.controls.path.dirty)">
         </cds-text-label>
         <ng-template #bucketPathError>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts
index 2317671b02238..d502524256ee9 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/nfs/nfs-form/nfs-form.component.ts
@@ -434,7 +434,7 @@ export class NfsFormComponent extends CdForm implements OnInit {
           fs_name: this.selectedFsName
         }
       });
-      this.volumeChangeHandler();
+      this.getSubVolGrp(this.selectedFsName);
     }
     if (!_.isEmpty(this.selectedSubvolGroup)) {
       this.nfsForm.patchValue({
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html
index ddc202152b9f4..463eac88b1e99 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-bucket-details/rgw-bucket-details.component.html
@@ -158,8 +158,14 @@
                   </div>
                 </td>
                 <td>
-                  <pre *ngIf="lifecycleFormat === 'json'">{{selection.lifecycle | json}}</pre>
-                  <pre *ngIf="lifecycleFormat === 'xml'">{{ (selection.lifecycle | xml) || '-'}}</pre>
+                  <cds-code-snippet display="multi"
+                                    *ngIf="lifecycleFormat === 'json'">
+                    {{selection.lifecycle | json}}
+                  </cds-code-snippet>
+                  <cds-code-snippet display="multi"
+                                    *ngIf="lifecycleFormat === 'xml'">
+                    {{ (selection.lifecycle | xml:{'Rules':'Rule'}) || '-'}}
+                  </cds-code-snippet>
                 </td>
               </tr>
               <tr>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.html b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.html
index e50666cdeaa96..767305958d4c8 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.html
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.html
@@ -64,6 +64,9 @@
                    i18n-placeholder
                    placeholder="Source Bucket Name..."
                    formControlName="source_bucket"/>
+            <cd-help-text>
+              <span i18n>{{ allBucketSelectedHelpText }}</span>
+            </cd-help-text>
           </div>
           </div>
         <div class="form-group row">
@@ -78,6 +81,9 @@
                    i18n-placeholder
                    placeholder="Destination Bucket Name..."
                    formControlName="destination_bucket"/>
+            <cd-help-text>
+              <span i18n>{{ allBucketSelectedHelpText }}</span>
+            </cd-help-text>
           </div>
         </div>
       </div>
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.spec.ts
index 369658d7d427f..1127db1c59a59 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.spec.ts
@@ -89,6 +89,47 @@ describe('RgwMultisiteSyncPipeModalComponent', () => {
     component.submit();
     expect(spy).toHaveBeenCalled();
     expect(putDataSpy).toHaveBeenCalled();
-    expect(putDataSpy).toHaveBeenCalledWith(component.pipeForm.getRawValue());
+    expect(putDataSpy).toHaveBeenCalledWith({
+      ...component.pipeForm.getRawValue(),
+      mode: '',
+      user: ''
+    });
+  });
+
+  it('should pass "user" and "mode" while creating/editing pipe', () => {
+    component.editing = true;
+    component.pipeForm.patchValue({
+      pipe_id: 'pipe1',
+      group_id: 's3-bucket-replication:enabled',
+      source_bucket: '',
+      source_zones: { added: ['zone1-zg1-realm1'], removed: [] },
+      destination_bucket: '',
+      destination_zones: { added: ['zone2-zg1-realm1'], removed: [] }
+    });
+    component.pipeSelectedRow = {
+      dest: { bucket: '*', zones: ['zone2-zg1-realm1'] },
+      id: 'pipi1',
+      params: {
+        dest: {},
+        mode: 'user',
+        priority: 0,
+        source: { filter: { tags: [] } },
+        user: 'dashboard'
+      },
+      source: { bucket: '*', zones: ['zone1-zg1-realm1'] }
+    };
+
+    component.sourceZones.data.selected = ['zone1-zg1-realm1'];
+    component.destZones.data.selected = ['zone2-zg1-realm1'];
+    const spy = jest.spyOn(component, 'submit');
+    const putDataSpy = jest.spyOn(multisiteServiceMock, 'createEditSyncPipe');
+    component.submit();
+    expect(spy).toHaveBeenCalled();
+    expect(putDataSpy).toHaveBeenCalled();
+    expect(putDataSpy).toHaveBeenCalledWith({
+      ...component.pipeForm.getRawValue(),
+      mode: 'user',
+      user: 'dashboard'
+    });
   });
 });
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.ts
index 2f41dbd23c843..43742ef60b839 100755
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-pipe-modal/rgw-multisite-sync-pipe-modal.component.ts
@@ -18,6 +18,8 @@ import { ZoneData } from '../models/rgw-multisite-zone-selector';
 import { SucceededActionLabelsI18n } from '~/app/shared/constants/app.constants';
 
 const ALL_ZONES = $localize`All zones (*)`;
+const ALL_BUCKET_SELECTED_HELP_TEXT =
+  'If no value is provided, all the buckets in the zone group will be selected.';
 
 @Component({
   selector: 'cd-rgw-multisite-sync-pipe-modal',
@@ -33,6 +35,7 @@ export class RgwMultisiteSyncPipeModalComponent implements OnInit {
   sourceZones = new ZoneData(false, 'Filter Zones');
   destZones = new ZoneData(false, 'Filter Zones');
   icons = Icons;
+  allBucketSelectedHelpText = ALL_BUCKET_SELECTED_HELP_TEXT;
 
   constructor(
     public activeModal: NgbActiveModal,
@@ -187,7 +190,9 @@ export class RgwMultisiteSyncPipeModalComponent implements OnInit {
       .createEditSyncPipe({
         ...this.pipeForm.getRawValue(),
         source_zones: sourceZones,
-        destination_zones: destZones
+        destination_zones: destZones,
+        user: this.editing ? this.pipeSelectedRow?.params?.user : '',
+        mode: this.editing ? this.pipeSelectedRow?.params?.mode : ''
       })
       .subscribe(
         () => {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts
index ee261db5042c3..03228856125d9 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-multisite-sync-policy/rgw-multisite-sync-policy.component.ts
@@ -88,12 +88,22 @@ export class RgwMultisiteSyncPolicyComponent extends ListWithDetails implements
       {
         name: $localize`Zonegroup`,
         prop: 'zonegroup',
-        flexGrow: 1
+        flexGrow: 1,
+        cellTransformation: CellTemplate.map,
+        customTemplateConfig: {
+          undefined: '-',
+          '': '-'
+        }
       },
       {
         name: $localize`Bucket`,
         prop: 'bucket',
-        flexGrow: 1
+        flexGrow: 1,
+        cellTransformation: CellTemplate.map,
+        customTemplateConfig: {
+          undefined: '-',
+          '': '-'
+        }
       }
     ];
     this.rgwDaemonService.list().subscribe();
@@ -137,7 +147,7 @@ export class RgwMultisiteSyncPolicyComponent extends ListWithDetails implements
           groupName: policy['id'],
           status: policy['status'],
           bucket: policy['bucketName'],
-          zonegroup: ''
+          zonegroup: policy['zonegroup']
         });
       });
       this.syncPolicyData = [...this.syncPolicyData];
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts
index 8b5901769c357..00037a7235b8e 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw-overview-dashboard/rgw-overview-dashboard.component.ts
@@ -91,7 +91,9 @@ export class RgwOverviewDashboardComponent implements OnInit, OnDestroy {
         this.totalPoolUsedBytes = data['total_pool_bytes_used'];
         this.averageObjectSize = data['average_object_size'];
       });
-      this.getSyncStatus();
+      setTimeout(() => {
+        this.getSyncStatus();
+      });
     });
     this.BucketSub = this.rgwBucketService
       .getTotalBucketsAndUsersLength()
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts
index 3439562c8e223..5f8c6f50135c2 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/rgw/rgw.module.ts
@@ -70,7 +70,8 @@ import {
   IconModule,
   LoadingModule,
   ModalModule,
-  ProgressIndicatorModule
+  ProgressIndicatorModule,
+  CodeSnippetModule
 } from 'carbon-components-angular';
 import { CephSharedModule } from '../shared/ceph-shared.module';
 
@@ -94,6 +95,7 @@ import { CephSharedModule } from '../shared/ceph-shared.module';
     ModalModule,
     GridModule,
     ProgressIndicatorModule,
+    CodeSnippetModule,
     ButtonModule,
     LoadingModule,
     IconModule,
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts
index 313db3445f298..a5c84e60b6f95 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.spec.ts
@@ -27,6 +27,7 @@ describe('NvmeofService', () => {
     expect(service).toBeTruthy();
   });
 
+  // gateways
   it('should call listGatewayGroups', () => {
     service.listGatewayGroups().subscribe();
     const req = httpTesting.expectOne('api/nvmeof/gateway/group');
@@ -39,6 +40,7 @@ describe('NvmeofService', () => {
     expect(req.request.method).toBe('GET');
   });
 
+  // subsystems
   it('should call listSubsystems', () => {
     service.listSubsystems(mockGroupName).subscribe();
     const req = httpTesting.expectOne(`api/nvmeof/subsystem?gw_group=${mockGroupName}`);
@@ -69,9 +71,12 @@ describe('NvmeofService', () => {
     expect(req.request.method).toBe('DELETE');
   });
 
+  // initiators
   it('should call getInitiators', () => {
-    service.getInitiators(mockNQN).subscribe();
-    const req = httpTesting.expectOne(`api/nvmeof/subsystem/${mockNQN}/host`);
+    service.getInitiators(mockNQN, mockGroupName).subscribe();
+    const req = httpTesting.expectOne(
+      `api/nvmeof/subsystem/${mockNQN}/host?gw_group=${mockGroupName}`
+    );
     expect(req.request.method).toBe('GET');
   });
 });
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts
index 40202d0d67250..a2bbf507bc345 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/nvmeof.service.ts
@@ -8,6 +8,7 @@ import { catchError, mapTo } from 'rxjs/operators';
 export const MAX_NAMESPACE = 1024;
 
 export interface ListenerRequest {
+  gw_group: string;
   host_name: string;
   traddr: string;
   trsvcid: number;
@@ -17,14 +18,17 @@ export interface NamespaceCreateRequest {
   rbd_image_name: string;
   rbd_pool: string;
   size: number;
+  gw_group: string;
 }
 
 export interface NamespaceEditRequest {
   rbd_image_size: number;
+  gw_group: string;
 }
 
 export interface InitiatorRequest {
   host_nqn: string;
+  gw_group: string;
 }
 
 const API_PATH = 'api/nvmeof';
@@ -81,8 +85,8 @@ export class NvmeofService {
   }
 
   // Initiators
-  getInitiators(subsystemNQN: string) {
-    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/host`);
+  getInitiators(subsystemNQN: string, group: string) {
+    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/host?gw_group=${group}`);
   }
 
   addInitiators(subsystemNQN: string, request: InitiatorRequest) {
@@ -92,14 +96,17 @@ export class NvmeofService {
   }
 
   removeInitiators(subsystemNQN: string, request: InitiatorRequest) {
-    return this.http.delete(`${UI_API_PATH}/subsystem/${subsystemNQN}/host/${request.host_nqn}`, {
-      observe: 'response'
-    });
+    return this.http.delete(
+      `${UI_API_PATH}/subsystem/${subsystemNQN}/host/${request.host_nqn}/${request.gw_group}`,
+      {
+        observe: 'response'
+      }
+    );
   }
 
   // Listeners
-  listListeners(subsystemNQN: string) {
-    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/listener`);
+  listListeners(subsystemNQN: string, group: string) {
+    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/listener?gw_group=${group}`);
   }
 
   createListener(subsystemNQN: string, request: ListenerRequest) {
@@ -121,12 +128,14 @@ export class NvmeofService {
   }
 
   // Namespaces
-  listNamespaces(subsystemNQN: string) {
-    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/namespace`);
+  listNamespaces(subsystemNQN: string, group: string) {
+    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/namespace?gw_group=${group}`);
   }
 
-  getNamespace(subsystemNQN: string, nsid: string) {
-    return this.http.get(`${API_PATH}/subsystem/${subsystemNQN}/namespace/${nsid}`);
+  getNamespace(subsystemNQN: string, nsid: string, group: string) {
+    return this.http.get(
+      `${API_PATH}/subsystem/${subsystemNQN}/namespace/${nsid}?gw_group=${group}`
+    );
   }
 
   createNamespace(subsystemNQN: string, request: NamespaceCreateRequest) {
@@ -141,9 +150,12 @@ export class NvmeofService {
     });
   }
 
-  deleteNamespace(subsystemNQN: string, nsid: string) {
-    return this.http.delete(`${API_PATH}/subsystem/${subsystemNQN}/namespace/${nsid}`, {
-      observe: 'response'
-    });
+  deleteNamespace(subsystemNQN: string, nsid: string, group: string) {
+    return this.http.delete(
+      `${API_PATH}/subsystem/${subsystemNQN}/namespace/${nsid}?gw_group=${group}`,
+      {
+        observe: 'response'
+      }
+    );
   }
 }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts
index d1f9997791ae0..c81c9193a2e3c 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts
@@ -3,6 +3,7 @@ import { TestBed } from '@angular/core/testing';
 
 import { configureTestBed } from '~/testing/unit-test-helper';
 import { OsdService } from './osd.service';
+import { CdTableFetchDataContext } from '../models/cd-table-fetch-data-context';
 
 describe('OsdService', () => {
   let service: OsdService;
@@ -64,8 +65,9 @@ describe('OsdService', () => {
   });
 
   it('should call getList', () => {
-    service.getList().subscribe();
-    const req = httpTesting.expectOne('api/osd');
+    const context = new CdTableFetchDataContext(() => {});
+    service.getList(context.toParams()).observable.subscribe();
+    const req = httpTesting.expectOne('api/osd?offset=0&limit=10&search=&sort=%2Bname');
     expect(req.request.method).toBe('GET');
   });
 
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts
index f2ed4d7cc9e76..85a75073deafc 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts
@@ -1,4 +1,4 @@
-import { HttpClient } from '@angular/common/http';
+import { HttpClient, HttpParams } from '@angular/common/http';
 import { Injectable } from '@angular/core';
 
 import _ from 'lodash';
@@ -12,6 +12,9 @@ import { OsdSettings } from '../models/osd-settings';
 import { SmartDataResponseV1 } from '../models/smart';
 import { DeviceService } from '../services/device.service';
 import { CdFormGroup } from '../forms/cd-form-group';
+import { PaginateObservable } from './paginate.model';
+import { PaginateParams } from '../classes/paginate-params.class';
+import { Osd } from '../models/osd.model';
 
 @Injectable({
   providedIn: 'root'
@@ -80,8 +83,10 @@ export class OsdService {
     return this.http.post(this.path, request, { observe: 'response' });
   }
 
-  getList() {
-    return this.http.get(`${this.path}`);
+  getList(params: HttpParams): PaginateObservable<Osd[]> {
+    return new PaginateObservable<Osd[]>(
+      this.http.get<Osd[]>(this.path, new PaginateParams(params, 1, 1))
+    );
   }
 
   getOsdSettings(): Observable<OsdSettings> {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/paginate.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/paginate.model.ts
index 703792a757181..77ec4e43f7cfe 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/paginate.model.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/paginate.model.ts
@@ -9,7 +9,7 @@ export class PaginateObservable<Type> {
     this.observable = obs.pipe(
       map((response: any) => {
         this.count = Number(response.headers?.get('X-Total-Count'));
-        return response['body'];
+        return response['body'] || response;
       })
     );
   }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts
index b3a77e32198b2..3dc886e172fc4 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/rgw-multisite.service.ts
@@ -32,7 +32,9 @@ export class RgwMultisiteService {
   }
 
   getSyncStatus() {
-    return this.http.get(`${this.url}/sync_status`);
+    return this.rgwDaemonService.request((params: HttpParams) => {
+      return this.http.get(`${this.url}/sync_status`, { params: params });
+    });
   }
 
   status() {
@@ -127,8 +129,15 @@ export class RgwMultisiteService {
     );
   }
 
-  createEditSyncPipe(payload: any) {
-    return this.http.put(`${this.url}/sync-pipe`, payload);
+  createEditSyncPipe(payload: any, user?: string, mode?: string) {
+    let params = new HttpParams();
+    if (user) {
+      params = params.append('user', user);
+    }
+    if (mode) {
+      params = params.append('mode', mode);
+    }
+    return this.http.put(`${this.url}/sync-pipe`, payload, { params });
   }
 
   removeSyncPipe(pipe_id: string, group_id: string, bucket_name?: string) {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/classes/paginate-params.class.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/classes/paginate-params.class.ts
new file mode 100644
index 0000000000000..a1b079b426b9d
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/classes/paginate-params.class.ts
@@ -0,0 +1,15 @@
+import { HttpParams } from '@angular/common/http';
+
+export class PaginateParams {
+  constructor(params: HttpParams, majorVersion = 1, minorVersion = 0) {
+    const options = {
+      params: params,
+      headers: {
+        Accept: `application/vnd.ceph.api.v${majorVersion}.${minorVersion}+json`
+      }
+    };
+
+    options['observe'] = 'response';
+    return options;
+  }
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-table-fetch-data-context.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-table-fetch-data-context.ts
index 0df2d2ebbe071..6ea415bfee983 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-table-fetch-data-context.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/cd-table-fetch-data-context.ts
@@ -18,7 +18,7 @@ export class CdTableFetchDataContext {
   search = '';
   sort = '+name';
 
-  constructor(error: () => void) {
+  constructor(error?: () => void) {
     this.error = error;
   }
 
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/models/osd.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/osd.model.ts
new file mode 100644
index 0000000000000..f22987e439ea5
--- /dev/null
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/models/osd.model.ts
@@ -0,0 +1,49 @@
+/* We will need to check what are all the value that the
+   UI need and only make them the mandatory parameters here.
+   For now based on what I saw in the unit test file;
+   osd-list.component.spec.ts, I've made the decision to make
+   things optional and non-optional. This should be re-evaluated. */
+
+export interface Osd {
+  id: number;
+  host: Host;
+  stats_history: StatsHistory;
+  state: string[];
+  stats: Stats;
+  collectedStates?: string[];
+  in?: number;
+  out?: number;
+  up?: number;
+  down?: number;
+  destroyed?: number;
+  cdIsBinary?: boolean;
+  cdIndivFlags?: string[];
+  cdClusterFlags?: string[];
+  cdExecuting?: any;
+  tree?: Tree;
+  operational_status?: string;
+}
+
+interface Tree {
+  device_class: string;
+}
+
+interface Host {
+  id: number;
+  name: string;
+}
+
+interface StatsHistory {
+  op_out_bytes: any[];
+  op_in_bytes: any[];
+  out_bytes?: any[];
+  in_bytes?: any[];
+}
+
+interface Stats {
+  stat_bytes_used: number;
+  stat_bytes: number;
+  op_w?: number;
+  op_r?: number;
+  usage?: number;
+}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts
index 59d7572e9f004..45cca684dab01 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/pipes/xml.pipe.ts
@@ -7,9 +7,13 @@ import { JsonToXmlService } from '../services/json-to-xml.service';
 export class XmlPipe implements PipeTransform {
   constructor(private jsonToXmlService: JsonToXmlService) {}
 
-  transform(value: string, valueFormat: string = 'json'): string {
+  transform(
+    value: string,
+    replaceKey: Record<string, string> = {},
+    valueFormat: string = 'json'
+  ): string {
     if (valueFormat === 'json') {
-      value = this.jsonToXmlService.format(value);
+      value = this.jsonToXmlService.format(value, replaceKey);
     }
     return value;
   }
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts
index 8f1d128c0c59c..e9d30f9b7f2f4 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/services/json-to-xml.service.ts
@@ -6,29 +6,39 @@ import { Injectable } from '@angular/core';
 export class JsonToXmlService {
   constructor() {}
 
-  format(json: any, indentSize: number = 2, currentIndent: number = 0): string {
+  format(
+    json: any,
+    replaceKey: Record<string, string> = null,
+    indentSize: number = 2,
+    currentIndent: number = 0
+  ): string {
     if (!json) return null;
     let xml = '';
     if (typeof json === 'string') {
       json = JSON.parse(json);
     }
 
-    for (const key in json) {
+    for (let key in json) {
       if (json.hasOwnProperty(key)) {
         const value = json[key];
         const indentation = ' '.repeat(currentIndent);
-
+        if (replaceKey) {
+          const [oldKey, newKey] = Object.entries(replaceKey)[0];
+          if (key === oldKey) {
+            key = newKey;
+          }
+        }
         if (Array.isArray(value)) {
           value.forEach((item) => {
             xml +=
               `${indentation}<${key}>\n` +
-              this.format(item, indentSize, currentIndent + indentSize) +
+              this.format(item, replaceKey, indentSize, currentIndent + indentSize) +
               `${indentation}</${key}>\n`;
           });
         } else if (typeof value === 'object') {
           xml +=
             `${indentation}<${key}>\n` +
-            this.format(value, indentSize, currentIndent + indentSize) +
+            this.format(value, replaceKey, indentSize, currentIndent + indentSize) +
             `${indentation}</${key}>\n`;
         } else {
           xml += `${indentation}<${key}>${value}</${key}>\n`;
diff --git a/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss b/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss
index 1d12facaf6a2f..61ca421101e6d 100644
--- a/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss
+++ b/src/pybind/mgr/dashboard/frontend/src/styles/_carbon-defaults.scss
@@ -142,3 +142,10 @@ Dashboard page
 cd-dashboard {
   font-size: 12px;
 }
+
+/******************************************
+Code snippet
+******************************************/
+.cds--snippet {
+  width: fit-content;
+}
diff --git a/src/pybind/mgr/dashboard/openapi.yaml b/src/pybind/mgr/dashboard/openapi.yaml
index e8ab663d0d593..aedee7e493db4 100644
--- a/src/pybind/mgr/dashboard/openapi.yaml
+++ b/src/pybind/mgr/dashboard/openapi.yaml
@@ -8293,6 +8293,7 @@ paths:
                   description: Enable high availability
                   type: boolean
                 gw_group:
+                  description: NVMeoF gateway group
                   type: string
                 max_namespaces:
                   default: 1024
@@ -8346,6 +8347,7 @@ paths:
         schema:
           type: boolean
       - allowEmptyValue: true
+        description: NVMeoF gateway group
         in: query
         name: gw_group
         schema:
@@ -8384,6 +8386,7 @@ paths:
         schema:
           type: string
       - allowEmptyValue: true
+        description: NVMeoF gateway group
         in: query
         name: gw_group
         schema:
@@ -8417,6 +8420,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '200':
           content:
@@ -8446,6 +8455,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '200':
           content:
@@ -8479,6 +8494,9 @@ paths:
           application/json:
             schema:
               properties:
+                gw_group:
+                  description: NVMeoF gateway group
+                  type: string
                 host_nqn:
                   description: NVMeoF host NQN. Use "*" to allow any host.
                   type: string
@@ -8525,6 +8543,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '202':
           content:
@@ -8559,6 +8583,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '200':
           content:
@@ -8596,6 +8626,9 @@ paths:
                   default: 0
                   description: NVMeoF address family (0 - IPv4, 1 - IPv6)
                   type: integer
+                gw_group:
+                  description: NVMeoF gateway group
+                  type: string
                 host_name:
                   description: NVMeoF hostname
                   type: string
@@ -8673,6 +8706,12 @@ paths:
         name: force
         schema:
           type: boolean
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '202':
           content:
@@ -8707,6 +8746,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '200':
           content:
@@ -8748,6 +8793,9 @@ paths:
                   default: true
                   description: Create RBD image
                   type: boolean
+                gw_group:
+                  description: NVMeoF gateway group
+                  type: string
                 load_balancing_group:
                   description: Load balancing group
                   type: integer
@@ -8805,6 +8853,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '202':
           content:
@@ -8844,6 +8898,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '200':
           content:
@@ -8883,6 +8943,9 @@ paths:
           application/json:
             schema:
               properties:
+                gw_group:
+                  description: NVMeoF gateway group
+                  type: string
                 load_balancing_group:
                   description: Load balancing group
                   type: integer
@@ -8937,6 +9000,12 @@ paths:
         required: true
         schema:
           type: string
+      - allowEmptyValue: true
+        description: NVMeoF gateway group
+        in: query
+        name: gw_group
+        schema:
+          type: string
       responses:
         '200':
           content:
@@ -8959,11 +9028,31 @@ paths:
       - NVMe-oF Subsystem Namespace
   /api/osd:
     get:
-      parameters: []
+      parameters:
+      - default: 0
+        in: query
+        name: offset
+        schema:
+          type: integer
+      - default: 10
+        in: query
+        name: limit
+        schema:
+          type: integer
+      - default: ''
+        in: query
+        name: search
+        schema:
+          type: string
+      - default: ''
+        in: query
+        name: sort
+        schema:
+          type: string
       responses:
         '200':
           content:
-            application/vnd.ceph.api.v1.0+json:
+            application/vnd.ceph.api.v1.1+json:
               type: object
           description: OK
         '400':
@@ -11384,6 +11473,9 @@ paths:
                   type: string
                 group_id:
                   type: string
+                mode:
+                  default: ''
+                  type: string
                 pipe_id:
                   type: string
                 source_bucket:
@@ -11391,6 +11483,9 @@ paths:
                   type: string
                 source_zones:
                   type: string
+                user:
+                  default: ''
+                  type: string
               required:
               - group_id
               - pipe_id
@@ -11445,11 +11540,6 @@ paths:
         name: destination_zones
         schema:
           type: string
-      - default: ''
-        in: query
-        name: destination_bucket
-        schema:
-          type: string
       - default: ''
         in: query
         name: bucket_name
@@ -11677,7 +11767,12 @@ paths:
       - RgwMultisite
   /api/rgw/multisite/sync_status:
     get:
-      parameters: []
+      parameters:
+      - allowEmptyValue: true
+        in: query
+        name: daemon_name
+        schema:
+          type: string
       responses:
         '200':
           content:
diff --git a/src/pybind/mgr/dashboard/services/nvmeof_conf.py b/src/pybind/mgr/dashboard/services/nvmeof_conf.py
index 1802f8a5fce9f..2426c59907874 100644
--- a/src/pybind/mgr/dashboard/services/nvmeof_conf.py
+++ b/src/pybind/mgr/dashboard/services/nvmeof_conf.py
@@ -177,6 +177,18 @@ def _get_running_daemon_svc_config(svc_config, running_daemons):
 
 def _get_default_service(gateways):
     if gateways:
-        service_name = list(gateways.keys())[0]
+        gateway_keys = list(gateways.keys())
+        # if there are more than 1 gateway, rather than chosing a random gateway
+        # from any of the group, raise an exception to make it clear that we need
+        # to specify the group name in the API request.
+        if len(gateway_keys) > 1:
+            raise DashboardException(
+                msg=(
+                    "Multiple NVMe-oF gateway groups are configured. "
+                    "Please specify the 'gw_group' parameter in the request."
+                ),
+                component="nvmeof"
+            )
+        service_name = gateway_keys[0]
         return service_name, gateways[service_name][0]['service_url']
     return None
diff --git a/src/pybind/mgr/dashboard/services/rgw_client.py b/src/pybind/mgr/dashboard/services/rgw_client.py
index 21eff2dd7f904..2fe098216942d 100755
--- a/src/pybind/mgr/dashboard/services/rgw_client.py
+++ b/src/pybind/mgr/dashboard/services/rgw_client.py
@@ -10,6 +10,7 @@
 import time
 import uuid
 import xml.etree.ElementTree as ET  # noqa: N814
+from collections import defaultdict
 from enum import Enum
 from subprocess import SubprocessError
 from urllib.parse import urlparse
@@ -288,21 +289,22 @@ def instance(userid: Optional[str] = None,
 
         daemon_keys = RgwClient._daemons.keys()
         if not daemon_name:
-            if len(daemon_keys) > 1:
-                try:
-                    multiiste = RgwMultisite()
-                    default_zonegroup = multiiste.get_all_zonegroups_info()['default_zonegroup']
-
-                    # Iterate through _daemons.values() to find the daemon with the
-                    # matching zonegroup_id
-                    for daemon in RgwClient._daemons.values():
-                        if daemon.zonegroup_id == default_zonegroup:
-                            daemon_name = daemon.name
-                            break
-                except Exception:  # pylint: disable=broad-except
-                    daemon_name = next(iter(daemon_keys))
-            else:
-                # Handle the case where there is only one or no key in _daemons
+            try:
+                if len(daemon_keys) > 1:
+                    default_zonegroup = (
+                        RgwMultisite()
+                        .get_all_zonegroups_info()['default_zonegroup']
+                    )
+                    if default_zonegroup:
+                        daemon_name = next(
+                            (daemon.name
+                             for daemon in RgwClient._daemons.values()
+                             if daemon.zonegroup_id == default_zonegroup),
+                            None
+                        )
+                daemon_name = daemon_name or next(iter(daemon_keys))
+            except Exception as e:  # pylint: disable=broad-except
+                logger.exception('Failed to determine default RGW daemon: %s', str(e))
                 daemon_name = next(iter(daemon_keys))
 
         # Discard all cached instances if any rgw setting has changed
@@ -700,12 +702,28 @@ def set_tags(self, bucket_name, tags, request=None):
             raise DashboardException(msg=str(e), component='rgw')
         return result
 
+    @staticmethod
+    def _handle_rules(pairs):
+        result = defaultdict(list)
+        for key, value in pairs:
+            if key == 'Rule':
+                result['Rules'].append(value)
+            else:
+                result[key] = value
+        return result
+
     @RestClient.api_get('/{bucket_name}?lifecycle')
     def get_lifecycle(self, bucket_name, request=None):
         # pylint: disable=unused-argument
         try:
-            result = request()  # type: ignore
-            result = {'LifecycleConfiguration': result}
+            decoded_request = request(raw_content=True).decode("utf-8")  # type: ignore
+            result = {
+                'LifecycleConfiguration':
+                json.loads(
+                    decoded_request,
+                    object_pairs_hook=RgwClient._handle_rules
+                )
+            }
         except RequestException as e:
             if e.content:
                 content = json_str_to_object(e.content)
@@ -757,15 +775,15 @@ def set_lifecycle(self, bucket_name, lifecycle, request=None):
             lifecycle = RgwClient.dict_to_xml(lifecycle)
         try:
             if lifecycle and '<LifecycleConfiguration>' not in str(lifecycle):
-                lifecycle = f'<LifecycleConfiguration>{lifecycle}</LifecycleConfiguration>'
+                lifecycle = f'<LifecycleConfiguration>\n{lifecycle}\n</LifecycleConfiguration>'
             result = request(data=lifecycle)  # type: ignore
         except RequestException as e:
+            msg = ''
             if e.content:
                 content = json_str_to_object(e.content)
                 if content.get("Code") == "MalformedXML":
                     msg = "Invalid Lifecycle document"
-                    raise DashboardException(msg=msg, component='rgw')
-            raise DashboardException(msg=str(e), component='rgw')
+            raise DashboardException(msg=msg or str(e), component='rgw')
         return result
 
     @RestClient.api_delete('/{bucket_name}?lifecycle')
@@ -1981,8 +1999,16 @@ def get_multisite_status(self):
             is_multisite_configured = False
         return is_multisite_configured
 
-    def get_multisite_sync_status(self):
+    def get_multisite_sync_status(self, daemon_name: str):
         rgw_multisite_sync_status_cmd = ['sync', 'status']
+        daemons = _get_daemons()
+        try:
+            realm_name = daemons[daemon_name].realm_name
+        except (KeyError, AttributeError):
+            raise DashboardException('Unable to get realm name from daemon',
+                                     http_status_code=500, component='rgw')
+        if realm_name:
+            rgw_multisite_sync_status_cmd.extend(['--rgw-realm', realm_name])
         try:
             exit_code, out, _ = mgr.send_rgwadmin_command(rgw_multisite_sync_status_cmd, False)
             if exit_code > 0:
@@ -2236,7 +2262,8 @@ def create_sync_pipe(self, group_id: str, pipe_id: str,
                          source_bucket: str = '',
                          destination_bucket: str = '',
                          bucket_name: str = '',
-                         update_period=False):
+                         update_period=False,
+                         user: str = '', mode: str = ''):
 
         if source_zones['added'] or destination_zones['added']:
             rgw_sync_policy_cmd = ['sync', 'group', 'pipe', 'create',
@@ -2245,11 +2272,9 @@ def create_sync_pipe(self, group_id: str, pipe_id: str,
             if bucket_name:
                 rgw_sync_policy_cmd += ['--bucket', bucket_name]
 
-            if source_bucket:
-                rgw_sync_policy_cmd += ['--source-bucket', source_bucket]
+            rgw_sync_policy_cmd += ['--source-bucket', source_bucket]
 
-            if destination_bucket:
-                rgw_sync_policy_cmd += ['--dest-bucket', destination_bucket]
+            rgw_sync_policy_cmd += ['--dest-bucket', destination_bucket]
 
             if source_zones['added']:
                 rgw_sync_policy_cmd += ['--source-zones', ','.join(source_zones['added'])]
@@ -2257,6 +2282,12 @@ def create_sync_pipe(self, group_id: str, pipe_id: str,
             if destination_zones['added']:
                 rgw_sync_policy_cmd += ['--dest-zones', ','.join(destination_zones['added'])]
 
+            if user:
+                rgw_sync_policy_cmd += ['--uid', user]
+
+            if mode:
+                rgw_sync_policy_cmd += ['--mode', mode]
+
             logger.info("Creating sync pipe!")
             try:
                 exit_code, _, err = mgr.send_rgwadmin_command(rgw_sync_policy_cmd)
@@ -2271,13 +2302,13 @@ def create_sync_pipe(self, group_id: str, pipe_id: str,
         if ((source_zones['removed'] and '*' not in source_zones['added'])
                 or (destination_zones['removed'] and '*' not in destination_zones['added'])):
             self.remove_sync_pipe(group_id, pipe_id, source_zones['removed'],
-                                  destination_zones['removed'], destination_bucket,
-                                  bucket_name)
+                                  destination_zones['removed'],
+                                  bucket_name, True)
 
     def remove_sync_pipe(self, group_id: str, pipe_id: str,
                          source_zones: Optional[List[str]] = None,
                          destination_zones: Optional[List[str]] = None,
-                         destination_bucket: str = '', bucket_name: str = '',
+                         bucket_name: str = '',
                          update_period=False):
         rgw_sync_policy_cmd = ['sync', 'group', 'pipe', 'remove',
                                '--group-id', group_id, '--pipe-id', pipe_id]
@@ -2291,9 +2322,6 @@ def remove_sync_pipe(self, group_id: str, pipe_id: str,
         if destination_zones:
             rgw_sync_policy_cmd += ['--dest-zones', ','.join(destination_zones)]
 
-        if destination_bucket:
-            rgw_sync_policy_cmd += ['--dest-bucket', destination_bucket]
-
         logger.info("Removing sync pipe! %s", rgw_sync_policy_cmd)
         try:
             exit_code, _, err = mgr.send_rgwadmin_command(rgw_sync_policy_cmd)
diff --git a/src/pybind/mgr/dashboard/tests/test_osd.py b/src/pybind/mgr/dashboard/tests/test_osd.py
index c3cd0dca88dca..9b6dbd10de18d 100644
--- a/src/pybind/mgr/dashboard/tests/test_osd.py
+++ b/src/pybind/mgr/dashboard/tests/test_osd.py
@@ -8,6 +8,7 @@
 from ceph.deployment.service_spec import PlacementSpec
 
 from .. import mgr
+from ..controllers._version import APIVersion
 from ..controllers.osd import Osd, OsdUi
 from ..services.osd import OsdDeploymentOptions
 from ..tests import ControllerTestCase
@@ -274,7 +275,7 @@ def test_osd_list_aggregation(self):
         osds_leftover = [0, 1, 2]
         with self._mock_osd_list(osd_stat_ids=osds_actual, osdmap_tree_node_ids=osds_leftover,
                                  osdmap_ids=osds_actual):
-            self._get('/api/osd')
+            self._get('/api/osd', version=APIVersion(1, 1))
             self.assertEqual(len(self.json_body()), 2, 'It should display two OSDs without failure')
             self.assertStatus(200)
 
diff --git a/src/pybind/mgr/dashboard/tools.py b/src/pybind/mgr/dashboard/tools.py
index 51ed9c471aac6..14de970cceb0f 100644
--- a/src/pybind/mgr/dashboard/tools.py
+++ b/src/pybind/mgr/dashboard/tools.py
@@ -9,9 +9,9 @@
 import time
 import urllib
 from datetime import datetime, timedelta
-from distutils.util import strtobool
 
 import cherrypy
+from ceph.utils import strtobool
 from mgr_util import build_url
 
 from . import mgr
diff --git a/src/pybind/mgr/mgr_util.py b/src/pybind/mgr/mgr_util.py
index 67246545eea0f..5d37d478de7b1 100644
--- a/src/pybind/mgr/mgr_util.py
+++ b/src/pybind/mgr/mgr_util.py
@@ -22,6 +22,7 @@
 from ipaddress import ip_address
 from threading import Lock, Condition
 from typing import no_type_check, NewType
+from traceback import format_exc as tb_format_exc
 import urllib
 from functools import wraps
 if sys.version_info >= (3, 3):
@@ -88,9 +89,9 @@ def run(self):
             while not self.finished.is_set():
                 self.finished.wait(self.interval)
                 self.function(*self.args, **self.kwargs)
-            self.finished.set()
-        except Exception as e:
-            logger.error("task exception: %s", e)
+        except Exception:
+            logger.error(f'exception encountered in RTimer instance "{self}":'
+                         f'\n{tb_format_exc()}')
             raise
 
 
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
index 82a8c13a9c11e..d5c351fda7ead 100644
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -520,6 +520,15 @@ def rescan_host(self, hostname: str) -> OrchResult:
         """
         raise NotImplementedError()
 
+    def replace_device(self,
+                       hostname: str,
+                       device: str,
+                       clear: bool = False,
+                       yes_i_really_mean_it: bool = False) -> OrchResult:
+        """Perform all required operations in order to replace a device.
+        """
+        raise NotImplementedError()
+
     def get_inventory(self, host_filter: Optional['InventoryFilter'] = None, refresh: bool = False) -> OrchResult[List['InventoryHost']]:
         """
         Returns something that was created by `ceph-volume inventory`.
@@ -576,7 +585,12 @@ def cert_store_get_key(
         raise NotImplementedError()
 
     @handle_orch_error
-    def apply(self, specs: Sequence["GenericSpec"], no_overwrite: bool = False) -> List[str]:
+    def apply(
+        self,
+        specs: Sequence["GenericSpec"],
+        no_overwrite: bool = False,
+        continue_on_error: bool = False
+    ) -> List[str]:
         """
         Applies any spec
         """
@@ -699,12 +713,18 @@ def preview_osdspecs(self,
 
     def remove_osds(self, osd_ids: List[str],
                     replace: bool = False,
+                    replace_block: bool = False,
+                    replace_db: bool = False,
+                    replace_wal: bool = False,
                     force: bool = False,
                     zap: bool = False,
                     no_destroy: bool = False) -> OrchResult[str]:
         """
         :param osd_ids: list of OSD IDs
         :param replace: marks the OSD as being destroyed. See :ref:`orchestrator-osd-replace`
+        :param replace_block: marks the corresponding block device as being replaced.
+        :param replace_db: marks the corresponding db device as being replaced.
+        :param replace_wal: marks the corresponding wal device as being replaced.
         :param force: Forces the OSD removal process without waiting for the data to be drained first.
         :param zap: Zap/Erase all devices associated with the OSDs (DESTROYS DATA)
         :param no_destroy: Do not destroy associated VGs/LVs with the OSD.
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
index be0096bb2d96e..dbfa10fb720c3 100644
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -818,6 +818,21 @@ def _host_rescan(self, hostname: str, with_summary: bool = False) -> HandleComma
             return HandleCommandResult(stdout=completion.result_str())
         return HandleCommandResult(stdout=completion.result_str().split('.')[0])
 
+    @_cli_read_command('orch device replace')
+    def _replace_device(self,
+                        hostname: str,
+                        device: str,
+                        clear: bool = False,
+                        yes_i_really_mean_it: bool = False) -> HandleCommandResult:
+        """Perform all required operations in order to replace a device.
+        """
+        completion = self.replace_device(hostname=hostname,
+                                         device=device,
+                                         clear=clear,
+                                         yes_i_really_mean_it=yes_i_really_mean_it)
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
+
     @_cli_read_command('orch device ls')
     def _list_devices(self,
                       hostname: Optional[List[str]] = None,
@@ -1415,8 +1430,9 @@ def _osd_rm_start(self,
                       zap: bool = False,
                       no_destroy: bool = False) -> HandleCommandResult:
         """Remove OSD daemons"""
-        completion = self.remove_osds(osd_id, replace=replace, force=force,
-                                      zap=zap, no_destroy=no_destroy)
+        completion = self.remove_osds(osd_id,
+                                      replace=replace,
+                                      force=force, zap=zap, no_destroy=no_destroy)
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
@@ -1635,12 +1651,14 @@ def apply_misc(self,
                    format: Format = Format.plain,
                    unmanaged: bool = False,
                    no_overwrite: bool = False,
+                   continue_on_error: bool = False,
                    inbuf: Optional[str] = None) -> HandleCommandResult:
         """Update the size or placement for a service or apply a large yaml spec"""
         usage = """Usage:
   ceph orch apply -i <yaml spec> [--dry-run]
   ceph orch apply <service_type> [--placement=<placement_string>] [--unmanaged]
         """
+        errs: List[str] = []
         if inbuf:
             if service_type or placement or unmanaged:
                 raise OrchestratorValidationError(usage)
@@ -1650,7 +1668,14 @@ def apply_misc(self,
             # None entries in the output. Let's skip them silently.
             content = [o for o in yaml_objs if o is not None]
             for s in content:
-                spec = json_to_generic_spec(s)
+                try:
+                    spec = json_to_generic_spec(s)
+                except Exception as e:
+                    if continue_on_error:
+                        errs.append(f'Failed to convert {s} from json object: {str(e)}')
+                        continue
+                    else:
+                        raise e
 
                 # validate the config (we need MgrModule for that)
                 if isinstance(spec, ServiceSpec) and spec.config:
@@ -1658,7 +1683,12 @@ def apply_misc(self,
                         try:
                             self.get_foreign_ceph_option('mon', k)
                         except KeyError:
-                            raise SpecValidationError(f'Invalid config option {k} in spec')
+                            err = SpecValidationError(f'Invalid config option {k} in spec')
+                            if continue_on_error:
+                                errs.append(str(err))
+                                continue
+                            else:
+                                raise err
 
                 # There is a general "osd" service with no service id, but we use
                 # that to dump osds created individually with "ceph orch daemon add osd"
@@ -1673,7 +1703,12 @@ def apply_misc(self,
                     and spec.service_type == 'osd'
                     and not spec.service_id
                 ):
-                    raise SpecValidationError('Please provide the service_id field in your OSD spec')
+                    err = SpecValidationError('Please provide the service_id field in your OSD spec')
+                    if continue_on_error:
+                        errs.append(str(err))
+                        continue
+                    else:
+                        raise err
 
                 if dry_run and not isinstance(spec, HostSpec):
                     spec.preview_only = dry_run
@@ -1683,15 +1718,30 @@ def apply_misc(self,
                     continue
                 specs.append(spec)
         else:
+            # Note in this case there is only ever one spec
+            # being applied so there is no need to worry about
+            # handling of continue_on_error
             placementspec = PlacementSpec.from_string(placement)
             if not service_type:
                 raise OrchestratorValidationError(usage)
             specs = [ServiceSpec(service_type.value, placement=placementspec,
                                  unmanaged=unmanaged, preview_only=dry_run)]
-        return self._apply_misc(specs, dry_run, format, no_overwrite)
-
-    def _apply_misc(self, specs: Sequence[GenericSpec], dry_run: bool, format: Format, no_overwrite: bool = False) -> HandleCommandResult:
-        completion = self.apply(specs, no_overwrite)
+        cmd_result = self._apply_misc(specs, dry_run, format, no_overwrite, continue_on_error)
+        if errs:
+            # HandleCommandResult is a named tuple, so use
+            # _replace to modify it.
+            cmd_result = cmd_result._replace(stdout=cmd_result.stdout + '\n' + '\n'.join(errs))
+        return cmd_result
+
+    def _apply_misc(
+        self,
+        specs: Sequence[GenericSpec],
+        dry_run: bool,
+        format: Format,
+        no_overwrite: bool = False,
+        continue_on_error: bool = False
+    ) -> HandleCommandResult:
+        completion = self.apply(specs, no_overwrite, continue_on_error)
         raise_if_exception(completion)
         out = completion.result_str()
         if dry_run:
diff --git a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py
index 726a7ac7937c5..3247b06a3993b 100644
--- a/src/pybind/mgr/orchestrator/tests/test_orchestrator.py
+++ b/src/pybind/mgr/orchestrator/tests/test_orchestrator.py
@@ -102,7 +102,7 @@ def test_yaml():
   host_pattern: '*'
 status:
   container_image_id: 74803e884bea289d2d2d3ebdf6d37cd560499e955595695b1390a89800f4e37a
-  container_image_name: docker.io/ceph/daemon-base:latest-master-devel
+  container_image_name: quay.io/ceph/daemon-base:latest-main-devel
   created: '2020-06-10T10:37:31.051288Z'
   last_refresh: '2020-06-10T10:57:40.715637Z'
   running: 1
diff --git a/src/pybind/mgr/smb/enums.py b/src/pybind/mgr/smb/enums.py
index dea45f951f831..3e8544f43cf5a 100644
--- a/src/pybind/mgr/smb/enums.py
+++ b/src/pybind/mgr/smb/enums.py
@@ -21,7 +21,7 @@ class CephFSStorageProvider(_StrEnum):
 
     def expand(self) -> 'CephFSStorageProvider':
         """Expand abbreviated/default values into the full/expanded form."""
-        if self == self.SAMBA_VFS:
+        if self is self.SAMBA_VFS:
             # mypy gets confused by enums
             return self.__class__(self.SAMBA_VFS_NEW)
         return self
@@ -89,9 +89,9 @@ class LoginAccess(_StrEnum):
     def expand(self) -> 'LoginAccess':
         """Exapend abbreviated enum values into their full forms."""
         # the extra LoginAccess(...) calls are to appease mypy
-        if self == self.READ_ONLY_SHORT:
+        if self is self.READ_ONLY_SHORT:
             return LoginAccess(self.READ_ONLY)
-        if self == self.READ_WRITE_SHORT:
+        if self is self.READ_WRITE_SHORT:
             return LoginAccess(self.READ_WRITE)
         return self
 
diff --git a/src/pybind/mgr/smb/module.py b/src/pybind/mgr/smb/module.py
index 7483eb7964b4a..4512ad6add336 100644
--- a/src/pybind/mgr/smb/module.py
+++ b/src/pybind/mgr/smb/module.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
+from typing import TYPE_CHECKING, Any, List, Optional, cast
 
 import logging
 
@@ -171,6 +171,7 @@ def cluster_create(
         custom_dns: Optional[List[str]] = None,
         placement: Optional[str] = None,
         clustering: Optional[SMBClustering] = None,
+        public_addrs: Optional[List[str]] = None,
     ) -> results.Result:
         """Create an smb cluster"""
         domain_settings = None
@@ -255,6 +256,18 @@ def cluster_create(
                 )
             )
 
+        c_public_addrs = []
+        if public_addrs:
+            for pa in public_addrs:
+                pa_arr = pa.split('%', 1)
+                address = pa_arr[0]
+                destination = pa_arr[1] if len(pa_arr) > 1 else None
+                c_public_addrs.append(
+                    resources.ClusterPublicIPAssignment(
+                        address=address, destination=destination
+                    )
+                )
+
         pspec = resources.WrappedPlacementSpec.wrap(
             PlacementSpec.from_string(placement)
         )
@@ -266,6 +279,7 @@ def cluster_create(
             custom_dns=custom_dns,
             placement=pspec,
             clustering=clustering,
+            public_addrs=c_public_addrs,
         )
         to_apply.append(cluster)
         return self._handler.apply(to_apply, create_only=True).squash(cluster)
@@ -336,45 +350,6 @@ def show(self, resource_names: Optional[List[str]] = None) -> Simplified:
             return resources[0].to_simplified()
         return {'resources': [r.to_simplified() for r in resources]}
 
-    @cli.SMBCommand('dump cluster-config', perm='r')
-    def dump_config(self, cluster_id: str) -> Dict[str, Any]:
-        """DEBUG: Generate an example configuration"""
-        # TODO: Remove this command prior to release
-        return self._handler.generate_config(cluster_id)
-
-    @cli.SMBCommand('dump service-spec', perm='r')
-    def dump_service_spec(self, cluster_id: str) -> Dict[str, Any]:
-        """DEBUG: Generate an example smb service spec"""
-        # TODO: Remove this command prior to release
-        return dict(
-            self._handler.generate_smb_service_spec(cluster_id).to_json()
-        )
-
-    @cli.SMBCommand('dump everything', perm='r')
-    def dump_everything(self) -> Dict[str, Any]:
-        """DEBUG: Show me everything"""
-        # TODO: Remove this command prior to release
-        everything: Dict[str, Any] = {}
-        everything['PUBLIC'] = {}
-        log.warning('dumping PUBLIC')
-        for key in self._public_store:
-            e = self._public_store[key]
-            log.warning('dumping e: %s %r', e.uri, e.full_key)
-            everything['PUBLIC'][e.uri] = e.get()
-        log.warning('dumping PRIV')
-        everything['PRIV'] = {}
-        for key in self._priv_store:
-            e = self._priv_store[key]
-            log.warning('dumping e: %s %r', e.uri, e.full_key)
-            everything['PRIV'][e.uri] = e.get()
-        log.warning('dumping INTERNAL')
-        everything['INTERNAL'] = {}
-        for key in self._internal_store:
-            e = self._internal_store[key]
-            log.warning('dumping e: %s %r', e.uri, e.full_key)
-            everything['INTERNAL'][e.uri] = e.get()
-        return everything
-
     def submit_smb_spec(self, spec: SMBSpec) -> None:
         """Submit a new or updated smb spec object to ceph orchestration."""
         completion = self.apply_smb(spec)
diff --git a/src/pybind/mgr/smb/tests/test_smb.py b/src/pybind/mgr/smb/tests/test_smb.py
index c9fd02968b904..0d3610326c225 100644
--- a/src/pybind/mgr/smb/tests/test_smb.py
+++ b/src/pybind/mgr/smb/tests/test_smb.py
@@ -410,72 +410,6 @@ def test_cmd_apply_share(tmodule):
     assert bdata["results"][0]["state"] == "created"
 
 
-def test_share_dump_config(tmodule):
-    _example_cfg_1(tmodule)
-
-    cfg = tmodule.dump_config('foo')
-    assert cfg == {
-        'samba-container-config': "v0",
-        'configs': {
-            'foo': {
-                'instance_name': 'foo',
-                'instance_features': [],
-                'shares': ['Ess One', 'Ess Two'],
-                'globals': ['default', 'foo'],
-            },
-        },
-        'shares': {
-            'Ess One': {
-                'options': {
-                    'path': '/',
-                    'read only': 'No',
-                    'browseable': 'Yes',
-                    'kernel share modes': 'no',
-                    'x:ceph:id': 'foo.s1',
-                    'vfs objects': 'acl_xattr ceph_new',
-                    'acl_xattr:security_acl_name': 'user.NTACL',
-                    'ceph_new:config_file': '/etc/ceph/ceph.conf',
-                    'ceph_new:filesystem': 'cephfs',
-                    'ceph_new:user_id': 'smb.fs.cluster.foo',
-                },
-            },
-            'Ess Two': {
-                'options': {
-                    'path': '/two',
-                    'read only': 'No',
-                    'browseable': 'Yes',
-                    'kernel share modes': 'no',
-                    'x:ceph:id': 'foo.stwo',
-                    'vfs objects': 'acl_xattr ceph_new',
-                    'acl_xattr:security_acl_name': 'user.NTACL',
-                    'ceph_new:config_file': '/etc/ceph/ceph.conf',
-                    'ceph_new:filesystem': 'cephfs',
-                    'ceph_new:user_id': 'smb.fs.cluster.foo',
-                },
-            },
-        },
-        'globals': {
-            'default': {
-                'options': {
-                    'load printers': 'No',
-                    'printing': 'bsd',
-                    'printcap name': '/dev/null',
-                    'disable spoolss': 'Yes',
-                },
-            },
-            'foo': {
-                'options': {
-                    'idmap config * : backend': 'autorid',
-                    'idmap config * : range': '2000-9999999',
-                    'realm': 'dom1.example.com',
-                    'security': 'ads',
-                    'workgroup': 'DOM1',
-                },
-            },
-        },
-    }
-
-
 def test_cluster_create_ad1(tmodule):
     _example_cfg_1(tmodule)
 
@@ -613,29 +547,6 @@ def test_cluster_rm(tmodule):
     assert result.success
 
 
-def test_dump_service_spec(tmodule):
-    _example_cfg_1(tmodule)
-    tmodule._public_store.overwrite(
-        {
-            'foo.config.smb': '',
-        }
-    )
-    tmodule._priv_store.overwrite(
-        {
-            'foo.join.2b9902c05d08bcba.json': '',
-            'foo.join.08129d4d3b8c37c7.json': '',
-        }
-    )
-
-    cfg = tmodule.dump_service_spec('foo')
-    assert cfg
-    assert cfg['service_id'] == 'foo'
-    assert cfg['spec']['cluster_id'] == 'foo'
-    assert cfg['spec']['features'] == ['domain']
-    assert cfg['spec']['config_uri'] == 'mem:foo/config.smb'
-    assert len(cfg['spec']['join_sources']) == 2
-
-
 def test_cmd_show_resource_json(tmodule):
     _example_cfg_1(tmodule)
 
diff --git a/src/pybind/mgr/telemetry/tox.ini b/src/pybind/mgr/telemetry/tox.ini
index a887590eed89b..b2210da54eaa8 100644
--- a/src/pybind/mgr/telemetry/tox.ini
+++ b/src/pybind/mgr/telemetry/tox.ini
@@ -1,7 +1,6 @@
 [tox]
 envlist =
     py3
-    mypy
 skipsdist = true
 
 [testenv]
diff --git a/src/pybind/mgr/tox.ini b/src/pybind/mgr/tox.ini
index a8a2d39d01a73..f39ececa93dd5 100644
--- a/src/pybind/mgr/tox.ini
+++ b/src/pybind/mgr/tox.ini
@@ -160,7 +160,8 @@ modules =
 commands =
     flake8 --config=tox.ini {posargs} \
       {posargs:{[testenv:flake8]modules}}
-    bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "docker.io" | wc -l) == 13'
+    bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "docker.io" | wc -l) == 3'
+    bash -c 'test $(git ls-files cephadm | grep ".py$" | grep -v tests | xargs grep "quay.io" | wc -l) == 26'
 
 [testenv:jinjalint]
 deps =
diff --git a/src/pybind/mgr/volumes/fs/async_job.py b/src/pybind/mgr/volumes/fs/async_job.py
index 6834e3e240b33..83a119ca5564c 100644
--- a/src/pybind/mgr/volumes/fs/async_job.py
+++ b/src/pybind/mgr/volumes/fs/async_job.py
@@ -167,7 +167,7 @@ def run(self):
                     for i in range(c, self.nr_concurrent_jobs):
                         self.threads.append(JobThread(self, self.vc, name="{0}.{1}.{2}".format(self.name_pfx, time.time(), i)))
                         self.threads[-1].start()
-                self.cv.wait(timeout=5)
+                self.cv.wait(timeout=self.wakeup_timeout)
 
     def shutdown(self):
         self.stopping.set()
diff --git a/src/pybind/mgr/volumes/fs/operations/pin_util.py b/src/pybind/mgr/volumes/fs/operations/pin_util.py
index a12ab5b4d4b28..631fdd8fcaa25 100644
--- a/src/pybind/mgr/volumes/fs/operations/pin_util.py
+++ b/src/pybind/mgr/volumes/fs/operations/pin_util.py
@@ -3,7 +3,7 @@
 import cephfs
 
 from ..exception import VolumeException
-from distutils.util import strtobool
+from ceph.utils import strtobool
 
 _pin_value = {
     "export": lambda x: int(x),
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
index 610a61e6a4c1f..146d6d3f453d6 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/metadata_manager.py
@@ -172,7 +172,7 @@ def list_all_options_from_section(self, section):
                 metadata_dict[option] = self.config.get(section,option)
         return metadata_dict
 
-    def list_all_keys_with_specified_values_from_section(self, section, value):
+    def filter_keys(self, section, value):
         keys = []
         if self.config.has_section(section):
             options = self.config.options(section)
diff --git a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
index 33d364b8b452a..72209ca61b5e0 100644
--- a/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
+++ b/src/pybind/mgr/volumes/fs/operations/versions/subvolume_v1.py
@@ -758,7 +758,7 @@ def get_pending_clones(self, snapname):
 
         try:
             if self.has_pending_clones(snapname):
-                pending_track_id_list = self.metadata_mgr.list_all_keys_with_specified_values_from_section('clone snaps', snapname)
+                pending_track_id_list = self.metadata_mgr.filter_keys('clone snaps', snapname)
             else:
                 return pending_clones_info
         except MetadataMgrException as me:
@@ -780,9 +780,9 @@ def get_pending_clones(self, snapname):
                     raise VolumeException(-e.args[0], e.args[1])
                 else:
                     try:
-                        # If clone is completed between 'list_all_keys_with_specified_values_from_section'
-                        # and readlink(track_id_path) call then readlink will fail with error ENOENT (2)
-                        # Hence we double check whether track_id is exist in .meta file or not.
+                        # If clone is completed between 'filter_keys' and readlink(track_id_path) call
+                        # then readlink will fail with error ENOENT (2). Hence we double check whether
+                        # track_id exists in .meta file or not.
                         # Edge case scenario.
                         # If track_id for clone exist but path /volumes/_index/clone/{track_id} not found
                         # then clone is orphan.
diff --git a/src/pybind/mgr/volumes/fs/stats_util.py b/src/pybind/mgr/volumes/fs/stats_util.py
index cec33eaa8873d..3334dc5a3d765 100644
--- a/src/pybind/mgr/volumes/fs/stats_util.py
+++ b/src/pybind/mgr/volumes/fs/stats_util.py
@@ -106,6 +106,11 @@ def __init__(self, volclient, vol_spec):
         # reporting has already been initiated by calling RTimer.is_alive().
         self.update_task = RTimer(1, self._update_progress_bars)
 
+        # progress event ID for ongoing clone jobs
+        self.on_pev_id: Optional[str] = 'mgr-vol-ongoing-clones'
+        # progress event ID for ongoing+pending clone jobs
+        self.onpen_pev_id: Optional[str] = 'mgr-vol-total-clones'
+
     def initiate_reporting(self):
         if self.update_task.is_alive():
             log.info('progress reporting thread is already alive, not '
@@ -113,11 +118,6 @@ def initiate_reporting(self):
             return
 
         log.info('initiating progress reporting for clones...')
-        # progress event ID for ongoing clone jobs
-        self.on_pev_id: Optional[str] = 'mgr-vol-ongoing-clones'
-        # progress event ID for ongoing+pending clone jobs
-        self.onpen_pev_id: Optional[str] = 'mgr-vol-total-clones'
-
         self.update_task = RTimer(1, self._update_progress_bars)
         self.update_task.start()
         log.info('progress reporting for clones has been initiated')
@@ -294,10 +294,7 @@ def _finish_progress_events(self):
         assert self.onpen_pev_id is not None
 
         self.volclient.mgr.remote('progress', 'complete', self.on_pev_id)
-        self.on_pev_id = None
-
         self.volclient.mgr.remote('progress', 'complete', self.onpen_pev_id)
-        self.onpen_pev_id = None
 
         log.info('finished removing progress bars from "ceph status" output')
 
diff --git a/src/python-common/ceph/deployment/drive_selection/selector.py b/src/python-common/ceph/deployment/drive_selection/selector.py
index 041f1ed30446f..59ebbb6347e43 100644
--- a/src/python-common/ceph/deployment/drive_selection/selector.py
+++ b/src/python-common/ceph/deployment/drive_selection/selector.py
@@ -131,6 +131,10 @@ def assign_devices(self, device_filter):
         for disk in self.disks:
             logger.debug("Processing disk {}".format(disk.path))
 
+            if disk.being_replaced:
+                logger.debug('Ignoring disk {} as it is being replaced.'.format(disk.path))
+                continue
+
             if not disk.available and not disk.ceph_device:
                 logger.debug(
                     ("Ignoring disk {}. "
diff --git a/src/python-common/ceph/deployment/inventory.py b/src/python-common/ceph/deployment/inventory.py
index a3023882108e3..e2c1a5605f9a6 100644
--- a/src/python-common/ceph/deployment/inventory.py
+++ b/src/python-common/ceph/deployment/inventory.py
@@ -54,7 +54,8 @@ class Device(object):
         'human_readable_type',
         'device_id',
         'lsm_data',
-        'crush_device_class'
+        'crush_device_class',
+        'being_replaced'
     ]
 
     def __init__(self,
@@ -67,7 +68,8 @@ def __init__(self,
                  lsm_data=None,  # type: Optional[Dict[str, Dict[str, str]]]
                  created=None,  # type: Optional[datetime.datetime]
                  ceph_device=None,  # type: Optional[bool]
-                 crush_device_class=None  # type: Optional[str]
+                 crush_device_class=None,  # type: Optional[str]
+                 being_replaced=None,  # type: Optional[bool]
                  ):
 
         self.path = path
@@ -80,6 +82,7 @@ def __init__(self,
         self.created = created if created is not None else datetime_now()
         self.ceph_device = ceph_device
         self.crush_device_class = crush_device_class
+        self.being_replaced = being_replaced
 
     def __eq__(self, other):
         # type: (Any) -> bool
@@ -129,7 +132,8 @@ def __repr__(self) -> str:
             'lvs': self.lvs if self.lvs else 'None',
             'available': str(self.available),
             'ceph_device': str(self.ceph_device),
-            'crush_device_class': str(self.crush_device_class)
+            'crush_device_class': str(self.crush_device_class),
+            'being_replaced': str(self.being_replaced)
         }
         if not self.available and self.rejected_reasons:
             device_desc['rejection reasons'] = self.rejected_reasons
diff --git a/src/python-common/ceph/utils.py b/src/python-common/ceph/utils.py
index e92a2d1de7db8..0544e9f4173d1 100644
--- a/src/python-common/ceph/utils.py
+++ b/src/python-common/ceph/utils.py
@@ -167,3 +167,18 @@ def http_req(hostname: str = '',
         log.error(e)
         # handle error here if needed
         raise
+
+
+_TRUE_VALS = {'y', 'yes', 't', 'true', 'on', '1'}
+_FALSE_VALS = {'n', 'no', 'f', 'false', 'off', '0'}
+
+
+def strtobool(value: str) -> bool:
+    """Convert a string to a boolean value.
+    Based on a simlilar function once available at distutils.util.strtobool.
+    """
+    if value.lower() in _TRUE_VALS:
+        return True
+    if value.lower() in _FALSE_VALS:
+        return False
+    raise ValueError(f'invalid truth value {value!r}')
diff --git a/src/rgw/rgw_lua_background.h b/src/rgw/rgw_lua_background.h
index 7b8d12599f4e8..2973a753fff63 100644
--- a/src/rgw/rgw_lua_background.h
+++ b/src/rgw/rgw_lua_background.h
@@ -153,9 +153,8 @@ class Background : public RGWRealmReloader::Pauser {
 
   void run();
 
-protected:
   std::string rgw_script;
-  virtual int read_script();
+  int read_script();
 
 public:
   Background(rgw::sal::Driver* _driver,
@@ -173,7 +172,7 @@ class Background : public RGWRealmReloader::Pauser {
     std::unique_lock cond_lock(table_mutex);
     rgw_map[key] = value;
   }
-   
+
   // update the manager after 
   void set_manager(rgw::sal::LuaManager* _lua_manager);
   void pause() override;
diff --git a/src/test/client/nonblocking.cc b/src/test/client/nonblocking.cc
index d4aecb10ffcb4..93bcfabd3fcf1 100644
--- a/src/test/client/nonblocking.cc
+++ b/src/test/client/nonblocking.cc
@@ -111,6 +111,8 @@ TEST_F(TestClient, LlreadvLlwritev) {
   writefinish.reset(new C_SaferCond("test-nonblocking-writefinish"));
   readfinish.reset(new C_SaferCond("test-nonblocking-readfinish"));
   ssize_t nwritten_a = iov_out_a[0].iov_len + iov_out_a[1].iov_len;
+  // reset bufferlist
+  bl.clear();
 
   rc = client->ll_preadv_pwritev(fh, iov_out_a, 2, 100, true, writefinish.get(), nullptr);
   ASSERT_EQ(0, rc);
@@ -130,6 +132,8 @@ TEST_F(TestClient, LlreadvLlwritev) {
   writefinish.reset(new C_SaferCond("test-nonblocking-writefinish"));
   readfinish.reset(new C_SaferCond("test-nonblocking-readfinish"));
   ssize_t nwritten_b = iov_out_b[0].iov_len + iov_out_b[1].iov_len;
+  // reset bufferlist
+  bl.clear();
 
   rc = client->ll_preadv_pwritev(fh, iov_out_b, 2, 1000, true, writefinish.get(), nullptr, true, false);
   ASSERT_EQ(0, rc);
diff --git a/src/test/common/test_mutex_debug.cc b/src/test/common/test_mutex_debug.cc
index 977dfe738a921..cee4b427770ae 100644
--- a/src/test/common/test_mutex_debug.cc
+++ b/src/test/common/test_mutex_debug.cc
@@ -1,5 +1,5 @@
 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
-// vim: ts=8 sw=2 &smarttab
+// vim: ts=8 sw=2 smarttab
 /*
  * Ceph - scalable distributed file system
  *
@@ -57,21 +57,13 @@ TEST(MutexDebug, Lock) {
   test_lock<ceph::mutex_debug>();
 }
 
-TEST(MutexDebug, NotRecursive) {
+TEST(MutexDebugDeathTest, NotRecursive) {
   ceph::mutex_debug m("foo");
-  auto ttl = &test_try_lock<mutex_debug>;
-
-  ASSERT_NO_THROW(m.lock());
-  ASSERT_TRUE(m.is_locked());
-  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
-
-  ASSERT_THROW(m.lock(), std::system_error);
+  // avoid assert during test cleanup where the mutex is locked and cannot be
+  // pthread_mutex_destroy'd
+  std::unique_lock locker{m};
   ASSERT_TRUE(m.is_locked());
-  ASSERT_FALSE(std::async(std::launch::async, ttl, &m).get());
-
-  ASSERT_NO_THROW(m.unlock());
-  ASSERT_FALSE(m.is_locked());
-  ASSERT_TRUE(std::async(std::launch::async, ttl, &m).get());
+  ASSERT_DEATH(m.lock(), "FAILED ceph_assert(recursive || !is_locked_by_me())");
 }
 
 TEST(MutexRecursiveDebug, Lock) {
diff --git a/src/test/crimson/test_backfill.cc b/src/test/crimson/test_backfill.cc
index 6648719c61c8f..df743327aaad8 100644
--- a/src/test/crimson/test_backfill.cc
+++ b/src/test/crimson/test_backfill.cc
@@ -128,7 +128,8 @@ class BackfillFixture : public crimson::osd::BackfillState::BackfillListener {
 
   void enqueue_push(
     const hobject_t& obj,
-    const eversion_t& v) override;
+    const eversion_t& v,
+    const std::vector<pg_shard_t> &peers) override;
 
   void enqueue_drop(
     const pg_shard_t& target,
@@ -243,6 +244,10 @@ struct BackfillFixture::PeeringFacade
   void update_complete_backfill_object_stats(const hobject_t &hoid,
                                              const pg_stat_t &stats) override {
   }
+  void prepare_backfill_for_missing(
+    const hobject_t &soid,
+    const eversion_t &v,
+    const std::vector<pg_shard_t> &peers) override {}
   bool is_backfilling() const override {
     return true;
   }
@@ -270,6 +275,9 @@ BackfillFixture::BackfillFixture(
                                                    this->backfill_targets),
                    std::make_unique<PGFacade>(this->backfill_source))
 {
+  seastar::global_logger_registry().set_all_loggers_level(
+    seastar::log_level::debug
+  );
   backfill_state.process_event(crimson::osd::BackfillState::Triggered{}.intrusive_from_this());
 }
 
@@ -303,7 +311,8 @@ void BackfillFixture::request_primary_scan(
 
 void BackfillFixture::enqueue_push(
   const hobject_t& obj,
-  const eversion_t& v)
+  const eversion_t& v,
+  const std::vector<pg_shard_t> &)
 {
   for (auto& [ _, bt ] : backfill_targets) {
     bt.store.push(obj, v);
diff --git a/src/test/exporter/test_exporter.cc b/src/test/exporter/test_exporter.cc
index 907884fe35d60..e24773886bcb3 100644
--- a/src/test/exporter/test_exporter.cc
+++ b/src/test/exporter/test_exporter.cc
@@ -1,6 +1,8 @@
 #include "common/ceph_argparse.h"
 #include "common/config.h"
 #include "common/config_proxy.h"
+#include "common/admin_socket.h"
+#include "common/admin_socket_client.h"
 #include <gmock/gmock.h>
 #include "gtest/gtest.h"
 #include "common/ceph_context.h"
@@ -8,6 +10,7 @@
 #include "global/global_init.h"
 #include "exporter/util.h"
 #include "exporter/DaemonMetricCollector.h"
+#include <filesystem>
 
 #include <regex>
 #include <string>
@@ -674,6 +677,27 @@ static std::vector<std::pair<std::string, std::string>> promethize_data = {
   {"rocksdb.submit_sync_latency_sum", "ceph_rocksdb_submit_sync_latency_sum"}
 };
 
+
+class AdminSocketTest
+{
+public:
+  explicit AdminSocketTest(AdminSocket *asokc)
+    : m_asokc(asokc)
+  {
+  }
+  bool init(const std::string &uri) {
+    return m_asokc->init(uri);
+  }
+  std::string bind_and_listen(const std::string &sock_path, int *fd) {
+    return m_asokc->bind_and_listen(sock_path, fd);
+  }
+  bool shutdown() {
+    m_asokc->shutdown();
+    return true;
+  }
+  AdminSocket *m_asokc;
+};
+
 int main(int argc, char **argv)
 {
   ::testing::InitGoogleTest(&argc, argv);
@@ -1289,8 +1313,11 @@ ceph_mon_session_rm{ceph_daemon="mon.a"} 577
 # TYPE ceph_mon_session_trim counter
 ceph_mon_session_trim{ceph_daemon="mon.a"} 9
 )";
-  
-  ASSERT_TRUE(collector.metrics.find(expectedMetrics) != std::string::npos);
+
+  std::string actualMetrics = collector.metrics;
+  std::cout << "Actual MON Metrics: " << actualMetrics << std::endl;
+  ASSERT_TRUE(actualMetrics.find(expectedMetrics) != std::string::npos);
+  //ASSERT_TRUE(collector.metrics.find(expectedMetrics) != std::string::npos);
 
   // Test for labeled metrics - RGW
   daemon = "ceph-client.rgw.foo.ceph-node-00.aayrrj.2.93993527376064";
@@ -1452,3 +1479,82 @@ TEST(Exporter, add_fixed_name_metrics) {
     EXPECT_EQ(new_metric.first, expected_labels);
     ASSERT_TRUE(new_metric.second == expected_metric_name);
 }
+
+TEST(Exporter, UpdateSockets) {
+    const std::string mock_dir = "/tmp/fake_sock_dir";
+
+    // Create the mock directory
+    std::filesystem::create_directories(mock_dir);
+
+    // Create a mix of vstart and real cluster mock .asok files
+    std::ofstream(mock_dir + "/ceph-osd.0.asok").close();
+    std::ofstream(mock_dir + "/ceph-mds.a.asok").close();
+    std::ofstream(mock_dir + "/ceph-mgr.chatest-node-00.ijzynn.asok").close();
+    std::ofstream(mock_dir + "/ceph-client.rgw.rgwfoo.chatest-node-00.yqaoen.2.94354846193952.asok").close();
+    std::ofstream(mock_dir + "/ceph-client.ceph-exporter.chatest-node-00.asok").close();
+    std::ofstream(mock_dir + "/ceph-mon.chatest-node-00.asok").close();
+
+    g_conf().set_val("exporter_sock_dir", mock_dir);
+
+    DaemonMetricCollector collector;
+
+    // Run the function that interacts with the mock directory
+    collector.update_sockets();
+
+    // Verify the expected results
+    ASSERT_EQ(collector.clients.size(), 4);
+    ASSERT_TRUE(collector.clients.find("ceph-osd.0") != collector.clients.end());
+    ASSERT_TRUE(collector.clients.find("ceph-mds.a") != collector.clients.end());
+    ASSERT_TRUE(collector.clients.find("ceph-mon.chatest-node-00") != collector.clients.end());
+    ASSERT_TRUE(collector.clients.find("ceph-client.rgw.rgwfoo.chatest-node-00.yqaoen.2.94354846193952") != collector.clients.end());
+
+
+    // Remove the mock directory and files
+    std::filesystem::remove_all(mock_dir);
+}
+
+
+TEST(Exporter, HealthMetrics) {
+    std::map<std::string, AdminSocketClient> clients;
+    DaemonMetricCollector &collector = collector_instance();
+    std::string daemon = "test_daemon";
+    std::string expectedCounterDump = "";
+    std::string expectedCounterSchema = "";
+    std::string metricName = "ceph_daemon_socket_up";
+
+    // Fake admin socket
+    std::string asok_path = "/tmp/" + daemon + ".asok";
+    std::unique_ptr<AdminSocket> asokc = std::make_unique<AdminSocket>(g_ceph_context);
+    AdminSocketClient client(asok_path);
+
+    // Add the daemon clients to the collector
+    clients.insert({daemon, std::move(client)});
+    collector.clients = clients;
+
+    auto verifyMetricValue = [&](const std::string &metricValue, bool shouldInitializeSocket) {
+        collector.metrics = "";
+
+        if (shouldInitializeSocket) {
+            AdminSocketTest asoct(asokc.get());
+            ASSERT_TRUE(asoct.init(asok_path));
+        }
+
+        collector.dump_asok_metrics(true, 5, true, expectedCounterDump, expectedCounterSchema, false);
+
+        if (shouldInitializeSocket) {
+            AdminSocketTest asoct(asokc.get());
+            ASSERT_TRUE(asoct.shutdown());
+        }
+
+        std::string retrievedMetrics = collector.metrics;
+        std::string pattern = metricName + R"(\{[^}]*ceph_daemon=\")" + daemon + R"(\"[^}]*\}\s+)" + metricValue + R"(\b)";
+        std::regex regexPattern(pattern);
+        ASSERT_TRUE(std::regex_search(retrievedMetrics, regexPattern));
+    };
+
+    // Test an admin socket not answering: metric value should be "0"
+    verifyMetricValue("0", false);
+
+    // Test an admin socket answering: metric value should be "1"
+    verifyMetricValue("1", true);
+}
diff --git a/src/test/libcephfs/test.cc b/src/test/libcephfs/test.cc
index f2c87168633bb..6f10d2bbd4e06 100644
--- a/src/test/libcephfs/test.cc
+++ b/src/test/libcephfs/test.cc
@@ -976,6 +976,13 @@ TEST(LibCephFS, Symlinks) {
   fd = ceph_open(cmount, test_symlink, O_NOFOLLOW, 0);
   ASSERT_EQ(fd, -CEPHFS_ELOOP);
 
+#if defined(__linux__) && defined(O_PATH)
+  // test the O_NOFOLLOW with O_PATH case
+  fd = ceph_open(cmount, test_symlink, O_PATH|O_NOFOLLOW, 0);
+  ASSERT_GT(fd, 0);
+  ceph_close(cmount, fd);
+#endif /* __linux */
+
   // stat the original file
   struct ceph_statx stx_orig;
   ASSERT_EQ(ceph_statx(cmount, test_file, &stx_orig, CEPH_STATX_ALL_STATS, 0), 0);
@@ -3012,6 +3019,18 @@ TEST(LibCephFS, Readlinkat) {
   ASSERT_EQ(0, memcmp(target, rel_file_path, target_len));
 
   ASSERT_EQ(0, ceph_close(cmount, fd));
+#if defined(__linux__) && defined(O_PATH)
+  // test readlinkat with empty pathname relative to O_PATH|O_NOFOLLOW fd
+  fd = ceph_open(cmount, link_path, O_PATH | O_NOFOLLOW, 0);
+  ASSERT_LE(0, fd);
+  size_t link_target_len = strlen(rel_file_path);
+  char link_target[link_target_len+1];
+  ASSERT_EQ(link_target_len, ceph_readlinkat(cmount, fd, "", link_target, link_target_len));
+  link_target[link_target_len] = '\0';
+  ASSERT_EQ(0, memcmp(link_target, rel_file_path, link_target_len));
+  ASSERT_EQ(0, ceph_close(cmount, fd));
+#endif /* __linux */
+
   ASSERT_EQ(0, ceph_unlink(cmount, link_path));
   ASSERT_EQ(0, ceph_unlink(cmount, file_path));
   ASSERT_EQ(0, ceph_rmdir(cmount, dir_path));
diff --git a/src/test/rgw/test_rgw_lua.cc b/src/test/rgw/test_rgw_lua.cc
index b2e11e442a28f..ad923023a6d01 100644
--- a/src/test/rgw/test_rgw_lua.cc
+++ b/src/test/rgw/test_rgw_lua.cc
@@ -9,6 +9,7 @@
 #include "rgw_lua_background.h"
 #include "rgw_lua_data_filter.h"
 #include "rgw_sal_config.h"
+#include "rgw_perf_counters.h"
 
 using namespace std;
 using namespace rgw;
@@ -184,9 +185,51 @@ inline std::unique_ptr<sal::RadosStore> make_store() {
   return std::make_unique<StoreBundle>(std::move(context_pool));
 };
 
+class TestLuaManager : public rgw::sal::StoreLuaManager {
+  public:
+    std::string lua_script;
+    unsigned read_time = 0;
+    TestLuaManager() {
+      rgw_perf_start(g_cct);
+    }
+    int get_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, std::string& script) override {
+      std::this_thread::sleep_for(std::chrono::seconds(read_time));
+      script = lua_script;
+      return 0;
+    }
+    int put_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, const std::string& script) override {
+      return 0;
+    }
+    int del_script(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key) override {
+      return 0;
+    }
+    int add_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override {
+      return 0;
+    }
+    int remove_package(const DoutPrefixProvider* dpp, optional_yield y, const std::string& package_name) override {
+      return 0;
+    }
+    int list_packages(const DoutPrefixProvider* dpp, optional_yield y, rgw::lua::packages_t& packages) override {
+      return 0;
+    }
+    int reload_packages(const DoutPrefixProvider* dpp, optional_yield y) override {
+      return 0;
+    }
+    ~TestLuaManager() {
+      rgw_perf_stop(g_cct);
+    }
+};
+
+void set_script(rgw::sal::LuaManager* manager, const std::string& script) {
+  static_cast<TestLuaManager*>(manager)->lua_script = script;
+}
+void set_read_time(rgw::sal::LuaManager* manager, unsigned read_time) {
+  static_cast<TestLuaManager*>(manager)->read_time = read_time;
+}
+
 #define DEFINE_REQ_STATE RGWProcessEnv pe; \
   auto store = make_store();                   \
-  pe.lua.manager = store->get_lua_manager(""); \
+  pe.lua.manager = std::make_unique<TestLuaManager>(); \
   RGWEnv e; \
   req_state s(g_cct, pe, &e, 0);
 
@@ -850,24 +893,12 @@ TEST(TestRGWLua, OpsLog)
 }
 
 class TestBackground : public rgw::lua::Background {
-  const unsigned read_time;
-
-protected:
-  int read_script() override {
-    // don't read the object from the store
-    std::this_thread::sleep_for(std::chrono::seconds(read_time));
-    return 0;
-  }
-
 public:
-  TestBackground(sal::RadosStore* store, const std::string& script, rgw::sal::LuaManager* manager, unsigned read_time = 0) : 
+  TestBackground(sal::RadosStore* store, rgw::sal::LuaManager* manager) : 
     rgw::lua::Background(store, 
         g_cct, 
         manager,
-        1 /* run every second */),
-    read_time(read_time) {
-      // the script is passed in the constructor
-      rgw_script = script;
+        1 /* run every second */) {
     }
 
   ~TestBackground() override {
@@ -878,20 +909,19 @@ class TestBackground : public rgw::lua::Background {
 TEST(TestRGWLuaBackground, Start)
 {
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
+  auto manager = std::make_unique<TestLuaManager>();
   {
     // ctr and dtor without running
-    TestBackground lua_background(store.get(), "", manager.get());
+    TestBackground lua_background(store.get(), manager.get());
   }
   {
     // ctr and dtor with running
-    TestBackground lua_background(store.get(), "", manager.get());
+    TestBackground lua_background(store.get(), manager.get());
     lua_background.start();
   }
 }
 
-
-constexpr auto wait_time = std::chrono::seconds(3);
+constexpr auto wait_time = std::chrono::milliseconds(100);
 
 template<typename T>
 const T& get_table_value(const TestBackground& b, const std::string& index) {
@@ -903,6 +933,15 @@ const T& get_table_value(const TestBackground& b, const std::string& index) {
   }
 }
 
+#define WAIT_FOR_BACKGROUND \
+{ \
+  unsigned max_tries = 100; \
+  do { \
+    std::this_thread::sleep_for(wait_time); \
+    --max_tries; \
+  } while (perfcounter->get(l_rgw_lua_script_ok) + perfcounter->get(l_rgw_lua_script_fail) == 0 && max_tries > 0); \
+}
+
 TEST(TestRGWLuaBackground, Script)
 {
   const std::string script = R"(
@@ -912,10 +951,11 @@ TEST(TestRGWLuaBackground, Script)
   )";
 
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
-  TestBackground lua_background(store.get(), script, manager.get());
+  auto manager = std::make_unique<TestLuaManager>();
+  set_script(manager.get(), script);
+  TestBackground lua_background(store.get(), manager.get());
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   EXPECT_EQ(get_table_value<std::string>(lua_background, "hello"), "world");
 }
 
@@ -928,9 +968,10 @@ TEST(TestRGWLuaBackground, RequestScript)
   )";
 
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), background_script, pe.lua.manager.get());
+  set_script(pe.lua.manager.get(), background_script);
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
 
   const std::string request_script = R"(
     local key = "hello"
@@ -947,8 +988,9 @@ TEST(TestRGWLuaBackground, RequestScript)
   ASSERT_EQ(rc, 0);
   EXPECT_EQ(get_table_value<std::string>(lua_background, "hello"), "from request");
   // now we resume and let the background set the value
+  perfcounter->set(l_rgw_lua_script_ok, 0);
   lua_background.resume(store.get());
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   EXPECT_EQ(get_table_value<std::string>(lua_background, "hello"), "from background");
 }
 
@@ -965,14 +1007,16 @@ TEST(TestRGWLuaBackground, Pause)
   )";
 
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
-  TestBackground lua_background(store.get(), script, manager.get());
+  auto manager = std::make_unique<TestLuaManager>();
+  set_script(manager.get(), script);
+  TestBackground lua_background(store.get(), manager.get());
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   const auto value_len = get_table_value<std::string>(lua_background, "hello").size();
   EXPECT_GT(value_len, 0);
   lua_background.pause();
-  std::this_thread::sleep_for(wait_time);
+  // make sure no execution occurs
+  std::this_thread::sleep_for(wait_time*10);
   // no change in len
   EXPECT_EQ(value_len, get_table_value<std::string>(lua_background, "hello").size());
 }
@@ -991,15 +1035,17 @@ TEST(TestRGWLuaBackground, PauseWhileReading)
   )";
 
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
-  TestBackground lua_background(store.get(), script, manager.get(), 2);
+  auto manager = std::make_unique<TestLuaManager>();
+  set_script(manager.get(), script);
+  set_read_time(manager.get(), 2);
+  TestBackground lua_background(store.get(), manager.get());
   lua_background.start();
-  constexpr auto long_wait_time = std::chrono::seconds(6);
-  std::this_thread::sleep_for(long_wait_time);
+  WAIT_FOR_BACKGROUND;
   const auto value_len = get_table_value<std::string>(lua_background, "hello").size();
   EXPECT_GT(value_len, 0);
   lua_background.pause();
-  std::this_thread::sleep_for(long_wait_time);
+  // make sure no execution occurs
+  std::this_thread::sleep_for(wait_time*10);
   // one execution might occur after pause
   EXPECT_TRUE(value_len + 1 >= get_table_value<std::string>(lua_background, "hello").size());
 }
@@ -1013,14 +1059,16 @@ TEST(TestRGWLuaBackground, ReadWhilePaused)
   )";
 
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
-  TestBackground lua_background(store.get(), script, manager.get());
+  auto manager = std::make_unique<TestLuaManager>();
+  set_script(manager.get(), script);
+  TestBackground lua_background(store.get(), manager.get());
   lua_background.pause();
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  // make sure no execution occurs
+  std::this_thread::sleep_for(wait_time*10);
   EXPECT_EQ(get_table_value<std::string>(lua_background, "hello"), "");
   lua_background.resume(store.get());
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   EXPECT_EQ(get_table_value<std::string>(lua_background, "hello"), "world");
 }
 
@@ -1037,18 +1085,21 @@ TEST(TestRGWLuaBackground, PauseResume)
   )";
 
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
-  TestBackground lua_background(store.get(), script, manager.get());
+  auto manager = std::make_unique<TestLuaManager>();
+  set_script(manager.get(), script);
+  TestBackground lua_background(store.get(), manager.get());
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   const auto value_len = get_table_value<std::string>(lua_background, "hello").size();
   EXPECT_GT(value_len, 0);
   lua_background.pause();
-  std::this_thread::sleep_for(wait_time);
+  // make sure no execution occurs
+  std::this_thread::sleep_for(wait_time*10);
   // no change in len
   EXPECT_EQ(value_len, get_table_value<std::string>(lua_background, "hello").size());
+  perfcounter->set(l_rgw_lua_script_ok, 0);
   lua_background.resume(store.get());
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   // should be a change in len
   EXPECT_GT(get_table_value<std::string>(lua_background, "hello").size(), value_len);
 }
@@ -1066,18 +1117,19 @@ TEST(TestRGWLuaBackground, MultipleStarts)
   )";
 
   auto store = make_store();
-  auto manager = store->get_lua_manager("");
-  TestBackground lua_background(store.get(), script, manager.get());
+  auto manager = std::make_unique<TestLuaManager>();
+  set_script(manager.get(), script);
+  TestBackground lua_background(store.get(), manager.get());
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   const auto value_len = get_table_value<std::string>(lua_background, "hello").size();
   EXPECT_GT(value_len, 0);
   lua_background.start();
   lua_background.shutdown();
   lua_background.shutdown();
-  std::this_thread::sleep_for(wait_time);
+  perfcounter->set(l_rgw_lua_script_ok, 0);
   lua_background.start();
-  std::this_thread::sleep_for(wait_time);
+  WAIT_FOR_BACKGROUND;
   // should be a change in len
   EXPECT_GT(get_table_value<std::string>(lua_background, "hello").size(), value_len);
 }
@@ -1085,7 +1137,7 @@ TEST(TestRGWLuaBackground, MultipleStarts)
 TEST(TestRGWLuaBackground, TableValues)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["key1"] = "string value"
@@ -1107,7 +1159,7 @@ TEST(TestRGWLuaBackground, TableValues)
 TEST(TestRGWLuaBackground, TablePersist)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   std::string request_script = R"(
     RGW["key1"] = "string value"
@@ -1137,7 +1189,7 @@ TEST(TestRGWLuaBackground, TablePersist)
 TEST(TestRGWLuaBackground, TableValuesFromRequest)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
   lua_background.start();
 
   const std::string request_script = R"(
@@ -1165,7 +1217,7 @@ TEST(TestRGWLuaBackground, TableValuesFromRequest)
 TEST(TestRGWLuaBackground, TableInvalidValue)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
   lua_background.start();
 
   const std::string request_script = R"(
@@ -1191,7 +1243,7 @@ TEST(TestRGWLuaBackground, TableInvalidValue)
 TEST(TestRGWLuaBackground, TableErase)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   std::string request_script = R"(
     RGW["size"] = 0
@@ -1229,7 +1281,7 @@ TEST(TestRGWLuaBackground, TableErase)
 TEST(TestRGWLuaBackground, TableIterate)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["key1"] = "string value"
@@ -1256,7 +1308,7 @@ TEST(TestRGWLuaBackground, TableIterate)
 TEST(TestRGWLuaBackground, TableIterateWrite)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["a"] = 1
@@ -1286,7 +1338,7 @@ TEST(TestRGWLuaBackground, TableIterateWrite)
 TEST(TestRGWLuaBackground, TableIncrement)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["key1"] = 42
@@ -1306,7 +1358,7 @@ TEST(TestRGWLuaBackground, TableIncrement)
 TEST(TestRGWLuaBackground, TableIncrementBy)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["key1"] = 42
@@ -1328,7 +1380,7 @@ TEST(TestRGWLuaBackground, TableIncrementBy)
 TEST(TestRGWLuaBackground, TableDecrement)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["key1"] = 42
@@ -1348,7 +1400,7 @@ TEST(TestRGWLuaBackground, TableDecrement)
 TEST(TestRGWLuaBackground, TableDecrementBy)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   const std::string request_script = R"(
     RGW["key1"] = 42
@@ -1370,7 +1422,7 @@ TEST(TestRGWLuaBackground, TableDecrementBy)
 TEST(TestRGWLuaBackground, TableIncrementValueError)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   std::string request_script = R"(
     -- cannot increment string values
@@ -1405,7 +1457,7 @@ TEST(TestRGWLuaBackground, TableIncrementValueError)
 TEST(TestRGWLuaBackground, TableIncrementError)
 {
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
 
   std::string request_script = R"(
     -- missing argument
@@ -1494,7 +1546,7 @@ TEST(TestRGWLua, Data)
   )";
 
   DEFINE_REQ_STATE;
-  TestBackground lua_background(store.get(), "", pe.lua.manager.get());
+  TestBackground lua_background(store.get(), pe.lua.manager.get());
   s.host_id = "foo";
   pe.lua.background = &lua_background;
   lua::RGWObjFilter filter(&s, script);
diff --git a/src/tools/cephfs/shell/cephfs-shell b/src/tools/cephfs/shell/cephfs-shell
index 9df0f900604a9..f95a4afd0579c 100755
--- a/src/tools/cephfs/shell/cephfs-shell
+++ b/src/tools/cephfs/shell/cephfs-shell
@@ -15,14 +15,22 @@ import re
 import shlex
 import stat
 import errno
+import distro
 
 from cmd2 import Cmd
 from cmd2 import __version__ as cmd2_version
 from packaging.version import Version
 
+# DFLAG is used to override the checks done by cephfs-shell
+# for cmd2 versions due to weird behaviour of Ubuntu22.04 with
+# cmd2's version i.e. it always gets the version of cmd2 as
+# "0.0.0" instead of the actual cmd2 version.
+DFLAG = False
+if distro.name() == "Ubuntu" and distro.version() == "22.04":
+    DFLAG = True
 # XXX: In cmd2 versions < 1.0.1, we'll get SystemExit(2) instead of
 # Cmd2ArgparseError
-if Version(cmd2_version) >= Version("1.0.1"):
+if Version(cmd2_version) >= Version("1.0.1") or DFLAG is True:
     from cmd2.exceptions import Cmd2ArgparseError
 else:
     # HACK: so that we don't have check for version everywhere
@@ -1700,7 +1708,7 @@ def read_shell_conf(shell, shell_conf_file):
 
     sec = 'cephfs-shell'
     opts = []
-    if Version(cmd2_version) >= Version("0.10.0"):
+    if Version(cmd2_version) >= Version("0.10.0") or DFLAG is True:
         for attr in shell.settables.keys():
             opts.append(attr)
     else:
@@ -1768,7 +1776,7 @@ def manage_args():
     args.exe_and_quit = False    # Execute and quit, don't launch the shell.
 
     if args.batch:
-        if Version(cmd2_version) <= Version("0.9.13"):
+        if Version(cmd2_version) <= Version("0.9.13") and DFLAG is not True:
             args.commands = ['load ' + args.batch, ',quit']
         else:
             args.commands = ['run_script ' + args.batch, ',quit']
@@ -1813,7 +1821,7 @@ def execute_cmds_and_quit(args):
     # value to indicate whether the execution of the commands should stop, but
     # since 0.9.7 it returns the return value of do_* methods only if it's
     # not None. When it is None it returns False instead of None.
-    if Version(cmd2_version) <= Version("0.9.6"):
+    if Version(cmd2_version) <= Version("0.9.6") and DFLAG is not True:
         stop_exec_val = None
     else:
         stop_exec_val = False