From cc4c4ffeb28c1db62768eebe68ac81f34c0112f2 Mon Sep 17 00:00:00 2001
From: Andrew Myers <atmyers2@gmail.com>
Date: Wed, 13 Dec 2023 10:05:00 -0800
Subject: [PATCH 01/15] Fix warnings in SortParticlesForDeposition (#3664)

---
 Src/Particle/AMReX_ParticleUtil.H | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index 5430cd34037..03fe2a29d65 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -675,6 +675,7 @@ void PermutationForDeposition (Gpu::DeviceVector<index_type>& perm, index_type n
             }
         });
 #else
+    amrex::ignore_unused(pperm, pglobal_idx);
     Abort("Not implemented");
 #endif
 

From b903e8896afbb202cf5dca786ba75b0cb598ad02 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Wed, 13 Dec 2023 11:53:12 -0800
Subject: [PATCH 02/15] Documentation for Profiling: Hot Spots and Load Balance
 (#3622)

Add more documentation on identifying hot spots and load imbalance in
profiling results.

---------

Co-authored-by: Andrew Myers <atmyers2@gmail.com>
---
 .../source/AMReX_Profiling_Tools.rst          | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst b/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
index 8726f51a2ba..cdd774488e4 100644
--- a/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
+++ b/Docs/sphinx_documentation/source/AMReX_Profiling_Tools.rst
@@ -93,6 +93,47 @@ it is also recommended to wrap any ``BL_PROFILE_TINY_FLUSH();`` calls in
 informative ``amrex::Print()`` lines to ensure accurate identification of each
 set of timers.
 
+Hot Spots and Load Balance
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The output of TinyProfiler can help us to identify hot spots. For example,
+the following output shows the top three hot spots of a linear solver test
+running on 4 MPI processes.
+
+.. highlight:: console
+
+::
+
+    --------------------------------------------------------------------------------------------
+    Name                                         NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %
+    --------------------------------------------------------------------------------------------
+    MLPoisson::Fsmooth()                            560     0.4775     0.4793     0.4815  34.97%
+    MLPoisson::Fapply()                             114     0.1103      0.113     0.1167   8.48%
+    FabArray::Xpay()                                109        0.1     0.1013     0.1038   7.54%
+
+In this test, there are 16 boxes evenly distributed among 4 MPI processes. The
+output above shows that the load is perfectly balanced. However, if the load
+is not balanced, the results can be very different and sometimes
+misleading. For example, if we put 2, 2, 6 and 6 boxes on processes 0, 1, 2
+and 3, respectively, the top three hot spots now include two MPI
+communication functions, ``FillBoundary`` and ``ParallelCopy``.
+
+.. highlight:: console
+
+::
+
+    --------------------------------------------------------------------------------------------
+    Name                                         NCalls  Excl. Min  Excl. Avg  Excl. Max   Max %
+    --------------------------------------------------------------------------------------------
+    FillBoundary_finish()                           607    0.01568     0.3367     0.6574  41.97%
+    MLPoisson::Fsmooth()                            560     0.2133     0.4047     0.5973  38.13%
+    FabArray::ParallelCopy_finish()                 231   0.002977    0.09748     0.1895  12.10%
+
+The reason that the MPI communication appears slow is that the lightly
+loaded processes have to wait for messages sent by the heavily loaded
+processes. See also :ref:`sec:profopts` for a diagnostic option that may
+provide more insight on the load imbalance.
+
 .. _sec:full:profiling:
 
 Full Profiling

From d988c985950bc0e6733db672a62cd19b8c046c9a Mon Sep 17 00:00:00 2001
From: Edward Basso <edwardebasso@gmail.com>
Date: Thu, 14 Dec 2023 09:45:24 -0800
Subject: [PATCH 03/15] Delete empty below comments on classes and functions
 (#3669)

Deleting the empty lines between the Doxygen comments and the class
declarations makes Intellisense actually recognize the connections.
---
 Src/Amr/AMReX_Amr.H                           |  1 -
 Src/Amr/AMReX_AmrLevel.H                      |  1 -
 Src/Amr/AMReX_Derive.H                        |  2 --
 Src/Amr/AMReX_LevelBld.H                      |  1 -
 Src/Amr/AMReX_StateData.H                     |  1 -
 Src/Amr/AMReX_StateDescriptor.H               |  2 --
 Src/AmrCore/AMReX_AmrCore.H                   |  1 -
 Src/AmrCore/AMReX_Cluster.H                   |  2 --
 Src/AmrCore/AMReX_ErrorList.H                 |  2 --
 Src/AmrCore/AMReX_FillPatcher.H               |  1 -
 Src/AmrCore/AMReX_FluxRegister.H              |  1 -
 Src/AmrCore/AMReX_InterpFaceRegister.H        |  1 -
 Src/AmrCore/AMReX_Interpolater.H              | 11 -------
 Src/AmrCore/AMReX_TagBox.H                    |  2 --
 Src/Base/AMReX_Arena.H                        |  1 -
 Src/Base/AMReX_BArena.H                       |  1 -
 Src/Base/AMReX_BCRec.H                        |  5 ++--
 Src/Base/AMReX_BaseFab.H                      |  1 -
 Src/Base/AMReX_BoxDomain.H                    |  8 ++---
 Src/Base/AMReX_BoxList.H                      |  1 -
 Src/Base/AMReX_CArena.H                       |  1 -
 Src/Base/AMReX_CoordSys.H                     |  1 -
 Src/Base/AMReX_DistributionMapping.H          |  1 -
 Src/Base/AMReX_FACopyDescriptor.H             |  1 -
 Src/Base/AMReX_FPC.H                          |  1 -
 Src/Base/AMReX_Geometry.H                     | 16 +++++-----
 Src/Base/AMReX_IArrayBox.H                    |  1 -
 Src/Base/AMReX_IndexType.H                    |  1 -
 Src/Base/AMReX_IntVect.H                      |  1 -
 Src/Base/AMReX_MultiFabUtil.H                 | 30 +++++++++----------
 Src/Base/AMReX_NFiles.H                       |  1 -
 Src/Base/AMReX_Orientation.H                  |  1 -
 Src/Base/AMReX_PArena.H                       |  1 -
 Src/Base/AMReX_ParmParse.H                    |  1 -
 Src/Base/AMReX_ParmParse.cpp                  |  1 -
 Src/Base/AMReX_RealVect.H                     |  1 -
 Src/Base/AMReX_RungeKutta.H                   |  3 +-
 Src/Base/AMReX_Vector.H                       |  1 -
 Src/Base/AMReX_VisMF.H                        |  1 -
 Src/Boundary/AMReX_BoundCond.H                |  1 -
 Src/Boundary/AMReX_FabSet.H                   |  1 -
 Src/Boundary/AMReX_Mask.H                     |  1 -
 Src/Boundary/AMReX_YAFluxRegister.H           |  1 -
 Src/EB/AMReX_EBFluxRegister.H                 |  1 -
 Src/Extern/Bittree/AMReX_Bittree.H            |  1 -
 .../SUNDIALS/AMReX_NVector_MultiFab.cpp       |  3 --
 Src/Extern/SUNDIALS/AMReX_SUNMemory.H         |  1 -
 Src/Extern/SUNDIALS/AMReX_Sundials_Core.H     |  3 --
 Src/Particle/AMReX_ParticleTransformation.H   |  2 --
 Src/Particle/AMReX_ParticleUtil.H             |  1 -
 50 files changed, 29 insertions(+), 98 deletions(-)

diff --git a/Src/Amr/AMReX_Amr.H b/Src/Amr/AMReX_Amr.H
index a7173fd105a..bb18ec9d160 100644
--- a/Src/Amr/AMReX_Amr.H
+++ b/Src/Amr/AMReX_Amr.H
@@ -30,7 +30,6 @@ class AmrInSituBridge;
 * not belong on a single level, like establishing and updating the hierarchy
 * of levels, global timestepping, and managing the different AmrLevels
 */
-
 class Amr
     : public AmrCore
 {
diff --git a/Src/Amr/AMReX_AmrLevel.H b/Src/Amr/AMReX_AmrLevel.H
index d4ac6c7c70d..8abb00b5475 100644
--- a/Src/Amr/AMReX_AmrLevel.H
+++ b/Src/Amr/AMReX_AmrLevel.H
@@ -34,7 +34,6 @@ class TagBoxArray;
 * AmrLevel functions both as a container for state data on a level
 * and also manages the advancement of data in time.
 */
-
 class AmrLevel
 {
     friend class Amr;
diff --git a/Src/Amr/AMReX_Derive.H b/Src/Amr/AMReX_Derive.H
index e1a7310a7b2..1e0cceb7894 100644
--- a/Src/Amr/AMReX_Derive.H
+++ b/Src/Amr/AMReX_Derive.H
@@ -100,7 +100,6 @@ class DescriptorList;
 * from the state data contained in AmrLevel and its derivatives. Some
 * examples might be kinetic energy, vorticity, concentration gradients ...
 */
-
 class DeriveRec
 {
    friend class DeriveList;
@@ -339,7 +338,6 @@ private:
 *
 * DeriveList manages and provides access to the list of DeriveRecs.
 */
-
 class DeriveList
 {
 public:
diff --git a/Src/Amr/AMReX_LevelBld.H b/Src/Amr/AMReX_LevelBld.H
index 8b421265bfc..bb79184ca5e 100644
--- a/Src/Amr/AMReX_LevelBld.H
+++ b/Src/Amr/AMReX_LevelBld.H
@@ -18,7 +18,6 @@ namespace amrex {
 * Abstract base class specifying an interface for building problem-specific
 * AmrLevels.
 */
-
 class LevelBld
 {
 public:
diff --git a/Src/Amr/AMReX_StateData.H b/Src/Amr/AMReX_StateData.H
index 251e6482a45..e6edb486c41 100644
--- a/Src/Amr/AMReX_StateData.H
+++ b/Src/Amr/AMReX_StateData.H
@@ -29,7 +29,6 @@ class StateDataPhysBCFunct;
 *
 * StateData holds state data on a level for the current and previous time step.
 */
-
 class StateData
 {
     friend class StateDataPhysBCFunct;
diff --git a/Src/Amr/AMReX_StateDescriptor.H b/Src/Amr/AMReX_StateDescriptor.H
index 6cd6c92cdd3..2830b955705 100644
--- a/Src/Amr/AMReX_StateDescriptor.H
+++ b/Src/Amr/AMReX_StateDescriptor.H
@@ -29,7 +29,6 @@ namespace amrex {
 /**
 * \brief Attributes of StateData.
 */
-
 class StateDescriptor
 {
     friend class DescriptorList;
@@ -434,7 +433,6 @@ private:
 *
 * A container class for StateDescriptors.
 */
-
 class DescriptorList
 {
 public:
diff --git a/Src/AmrCore/AMReX_AmrCore.H b/Src/AmrCore/AMReX_AmrCore.H
index 20428b40930..2969b986a75 100644
--- a/Src/AmrCore/AMReX_AmrCore.H
+++ b/Src/AmrCore/AMReX_AmrCore.H
@@ -20,7 +20,6 @@ class AmrParGDB;
  * virtual functions to allocate, initialize and delete data.  It also
  * requires the derived class to tag cells for refinement.
  */
-
 class AmrCore
     : public AmrMesh
 {
diff --git a/Src/AmrCore/AMReX_Cluster.H b/Src/AmrCore/AMReX_Cluster.H
index 5bbf5c796bc..7d60131e6cf 100644
--- a/Src/AmrCore/AMReX_Cluster.H
+++ b/Src/AmrCore/AMReX_Cluster.H
@@ -20,7 +20,6 @@ class ClusterList;
 *
 * Utility class for tagging error cells.
 */
-
 class Cluster
 {
 public:
@@ -138,7 +137,6 @@ private:
 *
 * A container class for Cluster.
 */
-
 class ClusterList
 {
 public:
diff --git a/Src/AmrCore/AMReX_ErrorList.H b/Src/AmrCore/AMReX_ErrorList.H
index 9ab1a978966..ab4395d8ce8 100644
--- a/Src/AmrCore/AMReX_ErrorList.H
+++ b/Src/AmrCore/AMReX_ErrorList.H
@@ -102,7 +102,6 @@ extern "C"
 * actual error tagging will be through derivation, so provision is made
 * for this as well.
 */
-
 class ErrorRec
 {
 public:
@@ -348,7 +347,6 @@ private:
 *
 * Container class for ErrorRecs.
 */
-
 class ErrorList
 {
 public:
diff --git a/Src/AmrCore/AMReX_FillPatcher.H b/Src/AmrCore/AMReX_FillPatcher.H
index d36b3529efd..5ff1c9550d3 100644
--- a/Src/AmrCore/AMReX_FillPatcher.H
+++ b/Src/AmrCore/AMReX_FillPatcher.H
@@ -68,7 +68,6 @@ namespace amrex {
  * See AmrLevel::RK for an example of using the RungeKutta functions and
  * FillPatcher together.
  */
-
 template <class MF = MultiFab>
 class FillPatcher
 {
diff --git a/Src/AmrCore/AMReX_FluxRegister.H b/Src/AmrCore/AMReX_FluxRegister.H
index 4178eb289ba..f5983e18872 100644
--- a/Src/AmrCore/AMReX_FluxRegister.H
+++ b/Src/AmrCore/AMReX_FluxRegister.H
@@ -14,7 +14,6 @@ namespace amrex {
 *
 * Stores and manipulates fluxes at coarse-fine interfaces.
 */
-
 class FluxRegister
     :
     public BndryRegister
diff --git a/Src/AmrCore/AMReX_InterpFaceRegister.H b/Src/AmrCore/AMReX_InterpFaceRegister.H
index a63c2c23e4c..c54879bcaf6 100644
--- a/Src/AmrCore/AMReX_InterpFaceRegister.H
+++ b/Src/AmrCore/AMReX_InterpFaceRegister.H
@@ -12,7 +12,6 @@ namespace amrex {
  *  \brief InterpFaceRegister is a coarse/fine boundary register for
  *  interpolation of face data at the coarse/fine boundary.
  */
-
 class InterpFaceRegister
 {
 public:
diff --git a/Src/AmrCore/AMReX_Interpolater.H b/Src/AmrCore/AMReX_Interpolater.H
index d2fe66b0cbd..e1210a83329 100644
--- a/Src/AmrCore/AMReX_Interpolater.H
+++ b/Src/AmrCore/AMReX_Interpolater.H
@@ -17,7 +17,6 @@ class IArrayBox;
 *
 * Specifies interpolater interface for coarse-to-fine interpolation in space.
 */
-
 class Interpolater
     : public InterpBase
 {
@@ -160,7 +159,6 @@ public:
 *
 * Bilinear interpolation on node centered data.
 */
-
 class NodeBilinear
     :
     public Interpolater
@@ -219,7 +217,6 @@ public:
 *
 * Bilinear interpolation on cell centered data.
 */
-
 class CellBilinear
     :
     public Interpolater
@@ -286,7 +283,6 @@ public:
 * sum_ivar a(ic,jc,ivar)*fab(if,jf,ivar) = 0 is satisfied
 * in all fine cells if,jf covering coarse cell ic,jc.
 */
-
 class CellConservativeLinear
     :
     public Interpolater
@@ -344,7 +340,6 @@ protected:
 * Linear conservative interpolation on cell centered data
 * but with protection against undershoots or overshoots.
 */
-
 class CellConservativeProtected
     :
     public CellConservativeLinear
@@ -393,7 +388,6 @@ public:
 *
 * Quadratic interpolation on cell centered data.
 */
-
 class CellQuadratic
     :
     public Interpolater
@@ -451,7 +445,6 @@ public:
 /**
 * \brief Piecewise Constant interpolation on cell centered data.
 */
-
 class PCInterp
     :
     public Interpolater
@@ -512,7 +505,6 @@ public:
 * in constructing the polynomial, the average of the polynomial inside that
 * cell is equal to the cell averaged value of the original data.
 */
-
 class CellConservativeQuartic
     :
     public Interpolater
@@ -574,7 +566,6 @@ public:
 * a given coarse cell will have the same divergence, even when the coarse
 * grid divergence is spatially varying.
 */
-
 class FaceDivFree
     :
     public Interpolater
@@ -667,7 +658,6 @@ public:
 *
 * Bilinear interpolation on data.
 */
-
 class FaceLinear
     :
     public Interpolater
@@ -789,7 +779,6 @@ public:
 *
 * Quartic interpolation on cell centered data.
 */
-
 class CellQuartic
     :
     public Interpolater
diff --git a/Src/AmrCore/AMReX_TagBox.H b/Src/AmrCore/AMReX_TagBox.H
index 929e181e0e8..3d26f76e9cb 100644
--- a/Src/AmrCore/AMReX_TagBox.H
+++ b/Src/AmrCore/AMReX_TagBox.H
@@ -20,7 +20,6 @@ namespace amrex {
 *
 * This class is used to tag cells in a Box that need addition refinement.
 */
-
 class TagBox final
     :
     public BaseFab<char>
@@ -145,7 +144,6 @@ public:
 *
 * A container class for TagBoxes.
 */
-
 class TagBoxArray
     :
     public FabArray<TagBox>
diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H
index d328f693a96..b93c476f86b 100644
--- a/Src/Base/AMReX_Arena.H
+++ b/Src/Base/AMReX_Arena.H
@@ -82,7 +82,6 @@ struct ArenaInfo
 * A virtual base class for objects that manage their own dynamic
 * memory allocation.
 */
-
 class Arena
 {
 public:
diff --git a/Src/Base/AMReX_BArena.H b/Src/Base/AMReX_BArena.H
index 9a3b4aa0f1b..d587d100859 100644
--- a/Src/Base/AMReX_BArena.H
+++ b/Src/Base/AMReX_BArena.H
@@ -11,7 +11,6 @@ namespace amrex {
 * This is the simplest dynamic memory management class derived from Arena.
 * Makes calls to std::malloc and std::free.
 */
-
 class BArena
     :
     public Arena
diff --git a/Src/Base/AMReX_BCRec.H b/Src/Base/AMReX_BCRec.H
index 268147a3a04..d23da777eda 100644
--- a/Src/Base/AMReX_BCRec.H
+++ b/Src/Base/AMReX_BCRec.H
@@ -10,10 +10,9 @@ namespace amrex {
 /**
 * \brief Boundary Condition Records.
 * Necessary information and functions for computing boundary conditions.
+*
+* This class has standard layout.  And we should keep it so!
 */
-
-// This class has standard layout.  And we should keep it so!
-
 class BCRec
 {
 public:
diff --git a/Src/Base/AMReX_BaseFab.H b/Src/Base/AMReX_BaseFab.H
index e0331e7a0ae..eb8e5c59615 100644
--- a/Src/Base/AMReX_BaseFab.H
+++ b/Src/Base/AMReX_BaseFab.H
@@ -3528,7 +3528,6 @@ BaseFab<T>::protected_divide (const BaseFab<T>& src, const Box& srcbox, const Bo
 * and stored in component comp of this FAB.
 * This fab is returned as a reference for chaining.
 */
-
 template <class T>
 template <RunOn run_on>
 BaseFab<T>&
diff --git a/Src/Base/AMReX_BoxDomain.H b/Src/Base/AMReX_BoxDomain.H
index a82e5ddc72e..af92d631607 100644
--- a/Src/Base/AMReX_BoxDomain.H
+++ b/Src/Base/AMReX_BoxDomain.H
@@ -55,14 +55,12 @@ std::ostream& operator<< (std::ostream& os, const BoxDomain& bd);
 
 /**
 * \brief A List of Disjoint Boxes.
+*
 * A BoxDomain is a BoxList with the restriction that Boxes in the list
 * are disjoint.
+* Note that a BoxDomain is NOT a BoxList due to the protected inheritance.
+* This is a concrete class, not a polymorphic one.
 */
-
-//Note that a BoxDomain is NOT a BoxList due to the protected inheritance.
-//This is a concrete class, not a polymorphic one.
-
-
 class BoxDomain
     :
     protected BoxList
diff --git a/Src/Base/AMReX_BoxList.H b/Src/Base/AMReX_BoxList.H
index c0ff30025ff..cab414d36c8 100644
--- a/Src/Base/AMReX_BoxList.H
+++ b/Src/Base/AMReX_BoxList.H
@@ -48,7 +48,6 @@ namespace amrex
 * IndexType.  This class implements operations for sets of Boxes.
 * This is a concrete class, not a polymorphic one.
 */
-
 class BoxList
 {
 public:
diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H
index 163039df2ef..9547bc92f21 100644
--- a/Src/Base/AMReX_CArena.H
+++ b/Src/Base/AMReX_CArena.H
@@ -24,7 +24,6 @@ struct MemStat;
 * chunks of heap space and apportions it out as requested.  It merges
 * together neighboring chunks on each free().
 */
-
 class CArena
     :
     public Arena
diff --git a/Src/Base/AMReX_CoordSys.H b/Src/Base/AMReX_CoordSys.H
index ab946ffa3d0..24096c6f428 100644
--- a/Src/Base/AMReX_CoordSys.H
+++ b/Src/Base/AMReX_CoordSys.H
@@ -20,7 +20,6 @@ class FArrayBox;
 *
 * Routines for mapping between physical coordinate system and index space.
 */
-
 class CoordSys
 {
 public:
diff --git a/Src/Base/AMReX_DistributionMapping.H b/Src/Base/AMReX_DistributionMapping.H
index 0707532a0fc..e9aa82f16a2 100644
--- a/Src/Base/AMReX_DistributionMapping.H
+++ b/Src/Base/AMReX_DistributionMapping.H
@@ -37,7 +37,6 @@ class FabArrayBase;
 *  BoxArray are as equal across CPUs as is possible.  The SFC distribution is
 *  based on a space filling curve.
 */
-
 class DistributionMapping
 {
   public:
diff --git a/Src/Base/AMReX_FACopyDescriptor.H b/Src/Base/AMReX_FACopyDescriptor.H
index 7e1e383d237..ca7d3f47029 100644
--- a/Src/Base/AMReX_FACopyDescriptor.H
+++ b/Src/Base/AMReX_FACopyDescriptor.H
@@ -103,7 +103,6 @@ FabCopyDescriptor<FAB>::~FabCopyDescriptor ()
 * \brief This class orchestrates filling a destination fab of size destFabBox
 * from fabarray on the local processor (myProc).
 */
-
 template <class FAB>
 class FabArrayCopyDescriptor
 {
diff --git a/Src/Base/AMReX_FPC.H b/Src/Base/AMReX_FPC.H
index 77c4dfa923b..8975ed8e9b9 100644
--- a/Src/Base/AMReX_FPC.H
+++ b/Src/Base/AMReX_FPC.H
@@ -15,7 +15,6 @@ namespace amrex {
 * namespaces, and we don't like global constants, we make them static
 * constant data members of this class.
 */
-
 class FPC
 {
 public:
diff --git a/Src/Base/AMReX_Geometry.H b/Src/Base/AMReX_Geometry.H
index 4017273151a..550b42f2f6a 100644
--- a/Src/Base/AMReX_Geometry.H
+++ b/Src/Base/AMReX_Geometry.H
@@ -16,14 +16,6 @@
 #include <map>
 
 namespace amrex {
-/**
-* \class Geometry
-* \brief Rectangular problem domain geometry.
-*
-* This class describes problem domain and coordinate system for
-* RECTANGULAR problem domains.  Since the problem domain is RECTANGULAR,
-* periodicity is meaningful.
-*/
 
 class MultiFab;
 class DistributionMapping;
@@ -67,6 +59,14 @@ public:
     int coord;
 };
 
+/**
+ * \class Geometry
+ * \brief Rectangular problem domain geometry.
+ *
+ * This class describes problem domain and coordinate system for
+ * RECTANGULAR problem domains.  Since the problem domain is RECTANGULAR,
+ * periodicity is meaningful.
+ */
 class Geometry
     :
     public CoordSys
diff --git a/Src/Base/AMReX_IArrayBox.H b/Src/Base/AMReX_IArrayBox.H
index b5240395f02..db0f26d5080 100644
--- a/Src/Base/AMReX_IArrayBox.H
+++ b/Src/Base/AMReX_IArrayBox.H
@@ -41,7 +41,6 @@ public:
 
 *  This class does NOT provide a copy constructor or assignment operator.
 */
-
 class IArrayBox
     :
     public BaseFab<int>
diff --git a/Src/Base/AMReX_IndexType.H b/Src/Base/AMReX_IndexType.H
index 02a56aae2a2..0fd613d2a99 100644
--- a/Src/Base/AMReX_IndexType.H
+++ b/Src/Base/AMReX_IndexType.H
@@ -19,7 +19,6 @@ namespace amrex {
 * enumerated type CellIndex to be either CELL or NODE; i.e. each of the
 * AMREX_SPACEDIM dimensions must be either CELL or NODE.
 */
-
 class IndexType
 {
     friend MPI_Datatype ParallelDescriptor::Mpi_typemap<IndexType>::type();
diff --git a/Src/Base/AMReX_IntVect.H b/Src/Base/AMReX_IntVect.H
index fd71c93ae87..b2658a5ec94 100644
--- a/Src/Base/AMReX_IntVect.H
+++ b/Src/Base/AMReX_IntVect.H
@@ -42,7 +42,6 @@ int coarsen (int i, int ratio) noexcept
 * C++ array.  In addition, the basic arithmetic operators have been overloaded
 * to implement scaling and translation operations.
 */
-
 class IntVect
 {
     friend MPI_Datatype ParallelDescriptor::Mpi_typemap<IntVect>::type();
diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H
index 29af89ba88e..ca9b1ab7fff 100644
--- a/Src/Base/AMReX_MultiFabUtil.H
+++ b/Src/Base/AMReX_MultiFabUtil.H
@@ -637,13 +637,13 @@ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
 
 
 
-   /**
-    * \brief Returns part of a norm based on two MultiFabs
-    * The MultiFabs MUST have the same underlying BoxArray.
-    * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
-    * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
-    */
-
+/**
+ * \brief Returns part of a norm based on two MultiFabs.
+ *
+ * The MultiFabs MUST have the same underlying BoxArray.
+ * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
+ * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
+ */
 template <typename F>
 Real
 NormHelper (const MultiFab& x, int xcomp,
@@ -696,14 +696,14 @@ NormHelper (const MultiFab& x, int xcomp,
     return sm;
 }
 
-   /**
-    * \brief Returns part of a norm based on three MultiFabs
-    * The MultiFabs MUST have the same underlying BoxArray.
-    * The Predicate pf is used to test the mask
-    * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
-    * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
-    */
-
+/**
+ * \brief Returns part of a norm based on three MultiFabs
+ *
+ * The MultiFabs MUST have the same underlying BoxArray.
+ * The Predicate pf is used to test the mask
+ * The function f is applied elementwise as f(x(i,j,k,n),y(i,j,k,n))
+ * inside the summation (subject to a valid mask entry pf(mask(i,j,k,n)
+ */
 template <typename MMF, typename Pred, typename F>
 Real
 NormHelper (const MMF& mask,
diff --git a/Src/Base/AMReX_NFiles.H b/Src/Base/AMReX_NFiles.H
index 824351b50dc..bd1518dd44c 100644
--- a/Src/Base/AMReX_NFiles.H
+++ b/Src/Base/AMReX_NFiles.H
@@ -23,7 +23,6 @@ namespace amrex {
 *   nfi.Stream().write((const char *) data.dataPtr(), nChars);
 * }
 */
-
 class NFilesIter
 {
   public:
diff --git a/Src/Base/AMReX_Orientation.H b/Src/Base/AMReX_Orientation.H
index 263bb84a95e..61e3622b336 100644
--- a/Src/Base/AMReX_Orientation.H
+++ b/Src/Base/AMReX_Orientation.H
@@ -25,7 +25,6 @@ class OrientationIter;
 * AMREX_SPACEDIM-1 and then the AMREX_SPACEDIM high sides from direction 0 ..
 * AMREX_SPACEDIM-1.
 */
-
 class Orientation
 {
 public:
diff --git a/Src/Base/AMReX_PArena.H b/Src/Base/AMReX_PArena.H
index d2e8c8ebec3..75db747fd9f 100644
--- a/Src/Base/AMReX_PArena.H
+++ b/Src/Base/AMReX_PArena.H
@@ -15,7 +15,6 @@ namespace amrex {
 * \brief This arena uses CUDA stream-ordered memory allocator if available.
 * If not, use The_Arena().
 */
-
 class PArena
     :
     public Arena
diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H
index 01a0098333e..b6f4799f2e8 100644
--- a/Src/Base/AMReX_ParmParse.H
+++ b/Src/Base/AMReX_ParmParse.H
@@ -267,7 +267,6 @@ class IntVect;
 *    #endif
 *
 */
-
 class ParmParse
 {
 public:
diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp
index c2ecfc7b37a..6fe442bfc5b 100644
--- a/Src/Base/AMReX_ParmParse.cpp
+++ b/Src/Base/AMReX_ParmParse.cpp
@@ -450,7 +450,6 @@ ppfound (const std::string& keyword,
 // except if n==-1, return the index of the last occurrence.
 // Return 0 if the specified occurrence does not exist.
 //
-
 const ParmParse::PP_entry*
 ppindex (const ParmParse::Table& table,
          int         n,
diff --git a/Src/Base/AMReX_RealVect.H b/Src/Base/AMReX_RealVect.H
index 635d21927f6..9e1d72700f7 100644
--- a/Src/Base/AMReX_RealVect.H
+++ b/Src/Base/AMReX_RealVect.H
@@ -28,7 +28,6 @@ namespace amrex
   C++ array.  In addition, the basic arithmetic operators have been overloaded
   to implement scaling and translation operations.
 */
-
 class RealVect
 {
 public:
diff --git a/Src/Base/AMReX_RungeKutta.H b/Src/Base/AMReX_RungeKutta.H
index cfac0851cab..d68bf00bfb4 100644
--- a/Src/Base/AMReX_RungeKutta.H
+++ b/Src/Base/AMReX_RungeKutta.H
@@ -4,8 +4,6 @@
 
 #include <AMReX_FabArray.H>
 
-namespace amrex::RungeKutta {
-
 /**
  * \brief Functions for Runge-Kutta methods
  *
@@ -48,6 +46,7 @@ namespace amrex::RungeKutta {
  * FillPatcher class can be useful for implementing such a callable.  See
  * AmrLevel::RK for an example.
  */
+namespace amrex::RungeKutta {
 
 struct PostStageNoOp {
     template <typename MF>
diff --git a/Src/Base/AMReX_Vector.H b/Src/Base/AMReX_Vector.H
index c377076fe1b..18e14d5c3c0 100644
--- a/Src/Base/AMReX_Vector.H
+++ b/Src/Base/AMReX_Vector.H
@@ -20,7 +20,6 @@ namespace amrex {
 * Vector::operator[] provides bound checking when compiled with
 * DEBUG=TRUE.
 */
-
 template <class T, class Allocator=std::allocator<T> >
 class Vector
     :
diff --git a/Src/Base/AMReX_VisMF.H b/Src/Base/AMReX_VisMF.H
index f0b146f6a9d..468523e0039 100644
--- a/Src/Base/AMReX_VisMF.H
+++ b/Src/Base/AMReX_VisMF.H
@@ -29,7 +29,6 @@ class IArrayBox;
 * \brief File I/O for FabArray<FArrayBox>.
 *  Wrapper class for reading/writing FabArray<FArrayBox> objects to disk in various "smart" ways.
 */
-
 class VisMF
     : public VisMFBuffer
 {
diff --git a/Src/Boundary/AMReX_BoundCond.H b/Src/Boundary/AMReX_BoundCond.H
index 834f790f6b5..963a2fa7ecc 100644
--- a/Src/Boundary/AMReX_BoundCond.H
+++ b/Src/Boundary/AMReX_BoundCond.H
@@ -16,7 +16,6 @@ namespace amrex {
    boundary conditions are specified via an integer identifier.
    This class maintains that integer.
 */
-
 class BoundCond
 {
 public:
diff --git a/Src/Boundary/AMReX_FabSet.H b/Src/Boundary/AMReX_FabSet.H
index f4ae8b7d247..9841555b336 100644
--- a/Src/Boundary/AMReX_FabSet.H
+++ b/Src/Boundary/AMReX_FabSet.H
@@ -40,7 +40,6 @@ namespace amrex {
         FabSets are used primarily as a data storage mechanism, and are
         manipulated by more sophisticated control classes.
 */
-
 template <typename MF>
 class FabSetT
 {
diff --git a/Src/Boundary/AMReX_Mask.H b/Src/Boundary/AMReX_Mask.H
index 3a41ea81913..02000250f49 100644
--- a/Src/Boundary/AMReX_Mask.H
+++ b/Src/Boundary/AMReX_Mask.H
@@ -22,7 +22,6 @@ namespace amrex {
 
         This class does NOT provide a copy constructor or assignment operator.
 */
-
 class Mask final
     :
     public BaseFab<int>
diff --git a/Src/Boundary/AMReX_YAFluxRegister.H b/Src/Boundary/AMReX_YAFluxRegister.H
index 075a630a2f7..e26426ce15b 100644
--- a/Src/Boundary/AMReX_YAFluxRegister.H
+++ b/Src/Boundary/AMReX_YAFluxRegister.H
@@ -23,7 +23,6 @@ namespace amrex {
   `Reflux` is called to update the coarse cells next to the
   coarse/fine boundary.
 */
-
 template <typename MF>
 class YAFluxRegisterT
 {
diff --git a/Src/EB/AMReX_EBFluxRegister.H b/Src/EB/AMReX_EBFluxRegister.H
index 33ec811dcf6..72fec3b6a7c 100644
--- a/Src/EB/AMReX_EBFluxRegister.H
+++ b/Src/EB/AMReX_EBFluxRegister.H
@@ -53,7 +53,6 @@ namespace amrex {
   to add the part in ghost cells (excluding ghost cells covered by
   valid cells of other grids) to EBFluxRegister's internal data.
 */
-
 class EBFluxRegister
     : public YAFluxRegister
 {
diff --git a/Src/Extern/Bittree/AMReX_Bittree.H b/Src/Extern/Bittree/AMReX_Bittree.H
index 54a046be720..feb05e9f189 100644
--- a/Src/Extern/Bittree/AMReX_Bittree.H
+++ b/Src/Extern/Bittree/AMReX_Bittree.H
@@ -18,7 +18,6 @@ LIBRARIES += -lbittree
 Include in inputs:
 amr.use_bittree = true
 */
-
 class btUnit {
   // Functions used in AmrMesh
   public:
diff --git a/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp b/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp
index 8408f75c41d..34671fac1a8 100644
--- a/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp
+++ b/Src/Extern/SUNDIALS/AMReX_NVector_MultiFab.cpp
@@ -24,7 +24,6 @@ namespace amrex::sundials {
 /* ----------------------------------------------------------------------------
  * Function to create a new empty multifab vector
  */
-
 N_Vector N_VNewEmpty_MultiFab(sunindextype length, ::sundials::Context* sunctx)
 {
     /* Create vector */
@@ -76,7 +75,6 @@ N_Vector N_VNewEmpty_MultiFab(sunindextype length, ::sundials::Context* sunctx)
 /* ----------------------------------------------------------------------------
  * Function to create a new MultiFab vector
  */
-
 N_Vector N_VNew_MultiFab(sunindextype length,
                          const amrex::BoxArray &ba,
                          const amrex::DistributionMapping &dm,
@@ -102,7 +100,6 @@ N_Vector N_VNew_MultiFab(sunindextype length,
 /* ----------------------------------------------------------------------------
  * Function to create a MultiFab N_Vector with user-specific MultiFab
  */
-
 N_Vector N_VMake_MultiFab(sunindextype length, amrex::MultiFab *v_mf,
                           ::sundials::Context* sunctx)
 {
diff --git a/Src/Extern/SUNDIALS/AMReX_SUNMemory.H b/Src/Extern/SUNDIALS/AMReX_SUNMemory.H
index f7700ce4210..5fc01c3b6b9 100644
--- a/Src/Extern/SUNDIALS/AMReX_SUNMemory.H
+++ b/Src/Extern/SUNDIALS/AMReX_SUNMemory.H
@@ -13,7 +13,6 @@ namespace amrex::sundials {
  *
  * This class allows SUNDIALS to allocate memory using the amrex::Arena.
  */
-
 class MemoryHelper {
 public:
     MemoryHelper(::sundials::Context* sunctx);
diff --git a/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H b/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H
index bb3695d19ac..090a5f43534 100644
--- a/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H
+++ b/Src/Extern/SUNDIALS/AMReX_Sundials_Core.H
@@ -15,7 +15,6 @@ namespace amrex::sundials {
  * This will create the nthreads SUNDIALS context objects that are needed by
  * the SUNDIALS solver and vector objects. Called by amrex::Initialize.
  */
-
 void Initialize(int nthreads);
 
 /**
@@ -23,7 +22,6 @@ void Initialize(int nthreads);
  *
  * Called by amrex::Finalize.
  */
-
 void Finalize();
 
 /**
@@ -33,7 +31,6 @@ void Finalize();
  *
  * A SUNDIALS context should not be used concurrently from different threads.
  */
-
 ::sundials::Context* The_Sundials_Context(int i = amrex::OpenMP::get_thread_num());
 
 }
diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H
index aa737455ce6..7ca26cef064 100644
--- a/Src/Particle/AMReX_ParticleTransformation.H
+++ b/Src/Particle/AMReX_ParticleTransformation.H
@@ -608,10 +608,8 @@ int filterAndTransformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile&
  * \param p predicate function - particles will be copied if p returns true
  * \param src_start the offset at which to start reading particles from src
  * \param dst_start the offset at which to start writing particles to dst
- * \param n the number of particles to apply the operation to
  *
  */
-
 template <typename DstTile, typename SrcTile, typename Pred, typename F, typename Index,
           std::enable_if_t<!std::is_pointer_v<std::decay_t<Pred>>,Index> nvccfoo = 0>
 Index filterAndTransformParticles (DstTile& dst, const SrcTile& src, Pred&& p, F&& f,
diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index 03fe2a29d65..182802e51f2 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -47,7 +47,6 @@ numParticlesOutOfRange (Iterator const& pti, int nGrow)
  * \param nGrow the number of grow cells allowed.
  *
  */
-
 template <class Iterator, std::enable_if_t<IsParticleIterator<Iterator>::value && !Iterator::ContainerType::ParticleType::is_soa_particle, int> foo = 0>
 int
 numParticlesOutOfRange (Iterator const& pti, IntVect nGrow)

From 0a208bbc4635abacd9f087b62aa3cdffc25db113 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Thu, 14 Dec 2023 11:04:37 -0800
Subject: [PATCH 04/15] Clang-Tidy in CI: Keep Going after Errors (#3667)

Add `-k` to the make command running clang-tidy. With that, the jobs
will keep going and show all the clang-tidy check errors instead of
stopping on the first error.
---
 .github/workflows/bittree.yml  |  4 ++--
 .github/workflows/clang.yml    |  6 +++---
 .github/workflows/gcc.yml      | 26 +++++++++++++-------------
 .github/workflows/hypre.yml    |  4 ++--
 .github/workflows/petsc.yml    |  2 +-
 .github/workflows/smoke.yml    |  2 +-
 .github/workflows/sundials.yml |  2 +-
 7 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/bittree.yml b/.github/workflows/bittree.yml
index c12fbedc58f..687bf07c00d 100644
--- a/.github/workflows/bittree.yml
+++ b/.github/workflows/bittree.yml
@@ -52,7 +52,7 @@ jobs:
         mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs_bittree amr.plot_int=1000
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -104,7 +104,7 @@ jobs:
         mpiexec -n 2 ./main3d.gnu.TEST.MPI.ex inputs_bittree max_step=10
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/clang.yml b/.github/workflows/clang.yml
index ec469bb5de6..a343832b510 100644
--- a/.github/workflows/clang.yml
+++ b/.github/workflows/clang.yml
@@ -59,7 +59,7 @@ jobs:
         make test_install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -117,7 +117,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -159,7 +159,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/gcc.yml b/.github/workflows/gcc.yml
index afc2044bdd7..aca7a9c8723 100644
--- a/.github/workflows/gcc.yml
+++ b/.github/workflows/gcc.yml
@@ -55,7 +55,7 @@ jobs:
         make test_install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -107,7 +107,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -155,7 +155,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -204,7 +204,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -263,7 +263,7 @@ jobs:
 
         # Let's not use clang-tidy for this test because it wants to use C++20.
         # ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        # make -j2 -f clang-tidy-ccache-misses.mak \
+        # make -j2 -k -f clang-tidy-ccache-misses.mak \
         #     CLANG_TIDY=clang-tidy-12 \
         #     CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -320,7 +320,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -384,7 +384,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -426,7 +426,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -466,7 +466,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -506,7 +506,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -546,7 +546,7 @@ jobs:
         make install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -585,7 +585,7 @@ jobs:
             CCACHE=ccache
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -630,7 +630,7 @@ jobs:
         make -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-12 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/hypre.yml b/.github/workflows/hypre.yml
index 50423f3942a..871224fc79c 100644
--- a/.github/workflows/hypre.yml
+++ b/.github/workflows/hypre.yml
@@ -100,7 +100,7 @@ jobs:
         mpiexec -n 2 ./main3d.gnu.MPI.ex inputs.hypre
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
@@ -148,7 +148,7 @@ jobs:
         mpiexec -n 2 ./main2d.gnu.MPI.ex inputs.2d
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/petsc.yml b/.github/workflows/petsc.yml
index 6d0b92b1343..eaddf1c2489 100644
--- a/.github/workflows/petsc.yml
+++ b/.github/workflows/petsc.yml
@@ -50,7 +50,7 @@ jobs:
         mpiexec -n 2 ./main2d.gnu.TEST.MPI.ex inputs.rt.2d.petsc
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml
index 080a17fd984..d907b485261 100644
--- a/.github/workflows/smoke.yml
+++ b/.github/workflows/smoke.yml
@@ -47,7 +47,7 @@ jobs:
         make test_install
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-15 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 
diff --git a/.github/workflows/sundials.yml b/.github/workflows/sundials.yml
index 12dfd42c159..a890e10fad3 100644
--- a/.github/workflows/sundials.yml
+++ b/.github/workflows/sundials.yml
@@ -60,7 +60,7 @@ jobs:
         cmake --build build -j 2
 
         ${{github.workspace}}/Tools/C_scripts/mmclt.py --input ${{github.workspace}}/ccache.log.txt
-        make -j2 -f clang-tidy-ccache-misses.mak \
+        make -j2 -k -f clang-tidy-ccache-misses.mak \
             CLANG_TIDY=clang-tidy-14 \
             CLANG_TIDY_ARGS="--config-file=${{github.workspace}}/.clang-tidy --warnings-as-errors=*"
 

From 0c6f2b4ad81178a6e14f26ebb5f2ad6c642c785c Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Thu, 14 Dec 2023 14:55:44 -0800
Subject: [PATCH 05/15] CMake: AMReX_PARALLEL_LINK_JOBS (#3628)

Add AMReX_PARALLEL_LINK_JOBS option to the CMake build system and use it
for SYCL builds.

Adjust SYCL's RelWithDebInfo mode to the recommendation of Intel VTune.

This is a follow-up on #3498 that made similar changes to GNU Make.
---
 .github/workflows/intel.yml              | 12 +++++---
 Docs/sphinx_documentation/source/GPU.rst | 36 +++++++++++++-----------
 Tools/CMake/AMReXFlagsTargets.cmake      |  2 +-
 Tools/CMake/AMReXOptions.cmake           | 10 +++++++
 Tools/CMake/AMReXSYCL.cmake              |  6 ++++
 5 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index d86035d916e..6474214e0a2 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -44,7 +44,8 @@ jobs:
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which icpx)             \
             -DCMAKE_Fortran_COMPILER=$(which ifx)          \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
@@ -86,7 +87,8 @@ jobs:
             -DAMReX_GPU_BACKEND=SYCL                       \
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which icpx)             \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
@@ -136,7 +138,8 @@ jobs:
             -DAMReX_GPU_BACKEND=SYCL                       \
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which clang++)          \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
@@ -186,7 +189,8 @@ jobs:
             -DAMReX_SYCL_SUB_GROUP_SIZE=64                 \
             -DCMAKE_C_COMPILER=$(which icx)                \
             -DCMAKE_CXX_COMPILER=$(which clang++)          \
-            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
+            -DCMAKE_CXX_COMPILER_LAUNCHER=ccache           \
+            -DAMReX_PARALLEL_LINK_JOBS=2
         cmake --build build --parallel 2
 
         ccache -s
diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst
index aff060e9166..08297cb3e2a 100644
--- a/Docs/sphinx_documentation/source/GPU.rst
+++ b/Docs/sphinx_documentation/source/GPU.rst
@@ -217,7 +217,7 @@ variables to configure the build
    +------------------------------+-------------------------------------------------+-------------+-----------------+
    | SYCL_SUB_GROUP_SIZE          | Specify subgroup size                           | 32          | 64, 32, 16      |
    +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | SYCL_MAX_PARALLEL_LINK_JOBS  | Number of parallel jobs in device link          | 1           | 1, 2, 3, etc.   |
+   | SYCL_PARALLEL_LINK_JOBS      | Number of parallel jobs in device link          | 1           | 1, 2, 3, etc.   |
    +------------------------------+-------------------------------------------------+-------------+-----------------+
 .. raw:: latex
 
@@ -428,22 +428,24 @@ Below is an example configuration for SYCL:
 
 .. table:: AMReX SYCL-specific build options
 
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | Variable Name                | Description                                     | Default     | Possible values |
-   +==============================+=================================================+=============+=================+
-   | AMReX_SYCL_AOT               | Enable SYCL ahead-of-time compilation           | NO          | YES, NO         |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_AOT_GRF_MODE      | Specify AOT register file mode                  | Default     | Default, Large, |
-   |                              |                                                 |             | AutoLarge       |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMREX_INTEL_ARCH             | Specify target if AOT is enabled                | None        | pvc, etc.       |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_SPLIT_KERNEL      | Enable SYCL kernel splitting                    | YES         | YES, NO         |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_ONEDPL            | Enable SYCL's oneDPL algorithms                 | NO          | YES, NO         |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
-   | AMReX_SYCL_SUB_GROUP_SIZE    | Specify subgroup size                           | 32          | 64, 32, 16      |
-   +------------------------------+-------------------------------------------------+-------------+-----------------+
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | Variable Name                 | Description                                  | Default     | Possible values  |
+   +===============================+==============================================+=============+==================+
+   | AMReX_SYCL_AOT                | Enable SYCL ahead-of-time compilation        | NO          | YES, NO          |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_AOT_GRF_MODE       | Specify AOT register file mode               | Default     | Default, Large,  |
+   |                               |                                              |             | AutoLarge        |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMREX_INTEL_ARCH              | Specify target if AOT is enabled             | None        | pvc, etc.        |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_SPLIT_KERNEL       | Enable SYCL kernel splitting                 | YES         | YES, NO          |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_ONEDPL             | Enable SYCL's oneDPL algorithms              | NO          | YES, NO          |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_SYCL_SUB_GROUP_SIZE     | Specify subgroup size                        | 32          | 64, 32, 16       |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
+   | AMReX_PARALLEL_LINK_JOBS      | Specify number of parallel link jobs         | 1           | positive integer |
+   +-------------------------------+----------------------------------------------+-------------+------------------+
 .. raw:: latex
 
    \end{center}
diff --git a/Tools/CMake/AMReXFlagsTargets.cmake b/Tools/CMake/AMReXFlagsTargets.cmake
index 9e3073cd53f..a2e86b2fbd3 100644
--- a/Tools/CMake/AMReXFlagsTargets.cmake
+++ b/Tools/CMake/AMReXFlagsTargets.cmake
@@ -89,7 +89,7 @@ target_compile_options( Flags_CXX
    $<${_cxx_appleclang_rwdbg}:>
    $<${_cxx_appleclang_rel}:>
    $<${_cxx_intelllvm_dbg}:-O0 -Wall -Wextra -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable>
-   $<${_cxx_intelllvm_rwdbg}:-g1>
+   $<${_cxx_intelllvm_rwdbg}:-gline-tables-only -fdebug-info-for-profiling> # recommended by Intel VTune
    $<${_cxx_intelllvm_rel}:>
    )
 
diff --git a/Tools/CMake/AMReXOptions.cmake b/Tools/CMake/AMReXOptions.cmake
index 80196639984..e24244ea29a 100644
--- a/Tools/CMake/AMReXOptions.cmake
+++ b/Tools/CMake/AMReXOptions.cmake
@@ -213,6 +213,16 @@ if (AMReX_SYCL)
       endif()
    endif()
 
+   set(AMReX_PARALLEL_LINK_JOBS_DEFAULT 1)
+   if (DEFINED ENV{AMREX_PARALLEL_LINK_JOBS})
+      set(AMReX_PARALLEL_LINK_JOBS_DEFAULT "$ENV{AMREX_PARALLEL_LINK_JOBS}")
+   endif()
+   set(AMReX_PARALLEL_LINK_JOBS ${AMReX_PARALLEL_LINK_JOBS_DEFAULT}
+       CACHE STRING "SYCL max parallel link jobs")
+   if (NOT AMReX_PARALLEL_LINK_JOBS GREATER_EQUAL 1 OR
+       NOT AMReX_PARALLEL_LINK_JOBS MATCHES "^[1-9][0-9]*$")
+      message(FATAL_ERROR "AMReX_PARALLEL_LINK_JOBS (${AMReX_PARALLEL_LINK_JOBS}) must be a positive integer")
+   endif()
 endif ()
 
 # --- HIP ----
diff --git a/Tools/CMake/AMReXSYCL.cmake b/Tools/CMake/AMReXSYCL.cmake
index a67571dc412..2b48f1c53fe 100644
--- a/Tools/CMake/AMReXSYCL.cmake
+++ b/Tools/CMake/AMReXSYCL.cmake
@@ -88,4 +88,10 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND "${CMAKE_BUILD_TYPE}" MATCHES "Debug"
       "$<${_cxx_sycl}:-fsycl-link-huge-device-code>" )
 endif ()
 
+if (AMReX_PARALLEL_LINK_JOBS GREATER 1)
+   target_link_options( SYCL
+      INTERFACE
+      $<${_cxx_sycl}:-fsycl-max-parallel-link-jobs=${AMReX_PARALLEL_LINK_JOBS}>)
+endif()
+
 unset(_cxx_sycl)

From 554b1ca3b00c3a56cbed4861bfa16c8074a78fc5 Mon Sep 17 00:00:00 2001
From: AlexanderSinn <64009254+AlexanderSinn@users.noreply.github.com>
Date: Fri, 15 Dec 2023 01:21:16 +0100
Subject: [PATCH 06/15] PureSoA IdCpu fixes (#3671)

## Summary

I noticed a few issues in AMReX while trying to update HiPACE++ for
#3585.

Additionally, I would like to point out that `ParticleTile` has
`push_back_real` and `push_back_int` functions but for PureSoA there is
no `push_back_idcpu`, however this is not added in this PR.

## Additional background

## Checklist

The proposed changes:
- [x] fix a bug or incorrect behavior in AMReX
- [ ] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
---
 Src/Particle/AMReX_Particle.H       |  9 ---------
 Src/Particle/AMReX_ParticleTile.H   | 16 +++++++++++-----
 Src/Particle/AMReX_StructOfArrays.H |  3 +--
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H
index 16004d1231e..4ae8b7c4365 100644
--- a/Src/Particle/AMReX_Particle.H
+++ b/Src/Particle/AMReX_Particle.H
@@ -24,15 +24,6 @@ namespace
         constexpr Long NoSplitParticleID = GhostParticleID - 4;
     }
 
-    /** Used for 32bit int particle Ids, as in pure SoA layout */
-    namespace IntParticleIds {
-        constexpr int GhostParticleID = 2147483647; // 2**31-1
-        constexpr int VirtualParticleID = GhostParticleID - 1;
-        constexpr int LastParticleID = GhostParticleID - 2;
-        constexpr int DoSplitParticleID = GhostParticleID - 3;
-        constexpr int NoSplitParticleID = GhostParticleID - 4;
-    }
-
     using namespace LongParticleIds;
 }
 
diff --git a/Src/Particle/AMReX_ParticleTile.H b/Src/Particle/AMReX_ParticleTile.H
index a1bdbdd56e2..1048df8724d 100644
--- a/Src/Particle/AMReX_ParticleTile.H
+++ b/Src/Particle/AMReX_ParticleTile.H
@@ -458,7 +458,7 @@ SoAParticle<NArrayReal, NArrayInt>::NextID ()
 #endif
     next = the_next_id++;
 
-    if (next > IntParticleIds::LastParticleID) {
+    if (next > LongParticleIds::LastParticleID) {
         amrex::Abort("SoAParticle<NArrayReal, NArrayInt>::NextID() -- too many particles");
     }
 
@@ -470,7 +470,7 @@ int
 SoAParticle<NArrayReal, NArrayInt>::UnprotectedNextID ()
 {
     int next = the_next_id++;
-    if (next > IntParticleIds::LastParticleID) {
+    if (next > LongParticleIds::LastParticleID) {
         amrex::Abort("SoAParticle<NArrayReal, NArrayInt>::NextID() -- too many particles");
     }
     return next;
@@ -1039,7 +1039,9 @@ struct ParticleTile
 
     void shrink_to_fit ()
     {
-        if constexpr (!ParticleType::is_soa_particle) {
+        if constexpr (ParticleType::is_soa_particle) {
+            GetStructOfArrays().GetIdCPUData().shrink_to_fit();
+        } else {
             m_aos_tile().shrink_to_fit();
         }
         for (int j = 0; j < NumRealComps(); ++j)
@@ -1058,7 +1060,9 @@ struct ParticleTile
     Long capacity () const
     {
         Long nbytes = 0;
-        if constexpr (!ParticleType::is_soa_particle) {
+        if constexpr (ParticleType::is_soa_particle) {
+            nbytes += GetStructOfArrays().GetIdCPUData().capacity() * sizeof(uint64_t);
+        } else {
             nbytes += m_aos_tile().capacity() * sizeof(ParticleType);
         }
         for (int j = 0; j < NumRealComps(); ++j)
@@ -1077,7 +1081,9 @@ struct ParticleTile
 
     void swap (ParticleTile<ParticleType, NArrayReal, NArrayInt, Allocator>& other)
     {
-        if constexpr (!ParticleType::is_soa_particle) {
+        if constexpr (ParticleType::is_soa_particle) {
+            GetStructOfArrays().GetIdCPUData().swap(other.GetStructOfArrays().GetIdCPUData());
+        } else {
             m_aos_tile().swap(other.GetArrayOfStructs()());
         }
         for (int j = 0; j < NumRealComps(); ++j)
diff --git a/Src/Particle/AMReX_StructOfArrays.H b/Src/Particle/AMReX_StructOfArrays.H
index 6cd498e20a2..4de35e085ca 100644
--- a/Src/Particle/AMReX_StructOfArrays.H
+++ b/Src/Particle/AMReX_StructOfArrays.H
@@ -195,13 +195,12 @@ struct StructOfArrays {
         for (int i = 0; i < int(m_runtime_idata.size()); ++i) { m_runtime_idata[i].resize(count); }
     }
 
-    [[nodiscard]] IdCPU* idcpuarray () {
+    [[nodiscard]] uint64_t* idcpuarray () {
         if constexpr (use64BitIdCpu == true) {
             return m_idcpu.dataPtr();
         } else {
             return nullptr;
         }
-
     }
 
     [[nodiscard]] GpuArray<ParticleReal*, NReal> realarray ()

From 3497f016d9204adf2b406659100adb2803118468 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Fri, 15 Dec 2023 17:27:48 -0800
Subject: [PATCH 07/15] Fix CI for ROCm 6.0 (#3673)

Need to explicitly install hiprand package in CI because it's now a
standalone project, not a submodule for rocRand according to the release
notes.
---
 .github/workflows/dependencies/dependencies_hip.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh
index 852342e4ac1..36df2f384ba 100755
--- a/.github/workflows/dependencies/dependencies_hip.sh
+++ b/.github/workflows/dependencies/dependencies_hip.sh
@@ -43,7 +43,8 @@ sudo apt-get install -y --no-install-recommends \
     roctracer-dev   \
     rocprofiler-dev \
     rocrand-dev     \
-    rocprim-dev
+    rocprim-dev     \
+    hiprand-dev
 
 # activate
 #

From 78c4fdaf95175f992b87e670780f5cf0d01d0783 Mon Sep 17 00:00:00 2001
From: Andrew Myers <atmyers2@gmail.com>
Date: Mon, 18 Dec 2023 11:42:06 -0800
Subject: [PATCH 08/15] Improve ParIter docs (#3676)

Show how to get the ParIter type from the ParticleContainer - should be
less confusing to new users.

The proposed changes:
- [ ] fix a bug or incorrect behavior in AMReX
- [ ] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
---
 Docs/sphinx_documentation/source/Particle.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Docs/sphinx_documentation/source/Particle.rst b/Docs/sphinx_documentation/source/Particle.rst
index be8292c772b..e3a28591a72 100644
--- a/Docs/sphinx_documentation/source/Particle.rst
+++ b/Docs/sphinx_documentation/source/Particle.rst
@@ -86,7 +86,8 @@ tracked as the particle positions change. To do this, we provide the
 
 ::
 
-      ParticleContainer<3, 2, 4, 4> mypc;
+      using MyParticleContainer = ParticleContainer<3, 2, 4, 4>;
+      MyParticleContainer mypc;
 
 Like the :cpp:`Particle` class itself, the :cpp:`ParticleContainer`
 class is templated. The first two template parameters have the same meaning as
@@ -375,8 +376,8 @@ example, to iterate over all the AoS data:
 ::
 
 
-    using MyParIter = ConstParIter<2*BL_SPACEDIM>;
-    for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
+    using MyParConstIter = MyParticleContainer::ParConstIterType;
+    for (MyParConstIter pti(pc, lev); pti.isValid(); ++pti) {
         const auto& particles = pti.GetArrayOfStructs();
         for (const auto& p : particles) {
             // do stuff with p...
@@ -392,7 +393,7 @@ skipped. You can also access the SoA data using the :math:`ParIter` as follows:
 ::
 
 
-    using MyParIter = ParIter<0, 0, 2, 2>;
+    using MyParIter = MyParticleContainer::ParIterType;
     for (MyParIter pti(pc, lev); pti.isValid(); ++pti) {
         auto& particle_attributes = pti.GetStructOfArrays();
         RealVector& real_comp0 = particle_attributes.GetRealData(0);

From d1e55fbab7c5d5cb72d3c448a91430a568628b9e Mon Sep 17 00:00:00 2001
From: Andrew Myers <atmyers2@gmail.com>
Date: Mon, 18 Dec 2023 12:01:08 -0800
Subject: [PATCH 09/15] Add PTD version of getParticleCell (#3675)

The proposed changes:
- [ ] fix a bug or incorrect behavior in AMReX
- [x] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
---
 Src/Particle/AMReX_ParticleUtil.H | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Src/Particle/AMReX_ParticleUtil.H b/Src/Particle/AMReX_ParticleUtil.H
index 182802e51f2..682a82450f0 100644
--- a/Src/Particle/AMReX_ParticleUtil.H
+++ b/Src/Particle/AMReX_ParticleUtil.H
@@ -371,6 +371,26 @@ IntVect getParticleCell (P const& p,
     return iv;
 }
 
+template <typename PTD>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+IntVect getParticleCell (PTD const& ptd, int i,
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& plo,
+                         amrex::GpuArray<amrex::Real,AMREX_SPACEDIM> const& dxi,
+                         const Box& domain) noexcept
+{
+    if constexpr (PTD::ParticleType::is_soa_particle)
+    {
+        IntVect iv(
+                   AMREX_D_DECL(int(amrex::Math::floor((ptd.m_rdata[0][i]-plo[0])*dxi[0])),
+                                int(amrex::Math::floor((ptd.m_rdata[1][i]-plo[1])*dxi[1])),
+                                int(amrex::Math::floor((ptd.m_rdata[2][i]-plo[2])*dxi[2]))));
+        iv += domain.smallEnd();
+        return iv;
+    } else {
+        return getParticleCell(ptd.m_aos[i], plo, dxi, domain);;
+    }
+}
+
 struct DefaultAssignor
 {
 

From ef38229189e3213f992a2e89dbe304fb49db9287 Mon Sep 17 00:00:00 2001
From: Max Katz <maxpkatz@gmail.com>
Date: Mon, 18 Dec 2023 23:21:03 -0500
Subject: [PATCH 10/15] Add a for loop that is unrolled at compile time (#3674)

## Summary

The constexpr_for function is fully unrolled at compile time. This is
useful for relatively short loops where some of the functions inside the
loop are known to possible to evaluate at compile time and may be
relatively expensive, so evaluating them at compile time rather than at
runtime may be beneficial for performance reasons.

## Additional background

This has been used in AMReX-Astro/Microphysics successfully in the
context of evaluating some nuclear reaction network quantities at
compile time.

## Checklist

The proposed changes:
- [ ] fix a bug or incorrect behavior in AMReX
- [x] add new capabilities to AMReX
- [ ] changes answers in the test suite to more than roundoff level
- [ ] are likely to significantly affect the results of downstream AMReX
users
- [ ] include documentation in the code and/or rst files, if appropriate
---
 Src/Base/AMReX_Loop.H | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/Src/Base/AMReX_Loop.H b/Src/Base/AMReX_Loop.H
index 84b39107e45..19e1c3e5191 100644
--- a/Src/Base/AMReX_Loop.H
+++ b/Src/Base/AMReX_Loop.H
@@ -211,6 +211,30 @@ void LoopConcurrentOnCpu (Box const& bx, int ncomp, F&& f) noexcept
     }}}}
 }
 
+// Implementation of "constexpr for" based on
+// https://artificial-mind.net/blog/2020/10/31/constexpr-for
+//
+// Approximates what one would get from a compile-time
+// unrolling of the loop
+// for (int i = 0; i < N; ++i) {
+//    f(i);
+// }
+//
+// The mechanism is recursive: we evaluate f(i) at the current
+// i and then call the for loop at i+1. f() is a lambda function
+// that provides the body of the loop and takes only an integer
+// i as its argument.
+
+template<auto I, auto N, class F>
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+constexpr void constexpr_for (F&& f)
+{
+    if constexpr (I < N) {
+        f(std::integral_constant<decltype(I), I>());
+        constexpr_for<I+1, N>(f);
+    }
+}
+
 #include <AMReX_Loop.nolint.H>
 
 }

From 85462ce91e038451894daef45d339c4374c0b6ac Mon Sep 17 00:00:00 2001
From: Ankith Anil Das <46082093+ankithadas@users.noreply.github.com>
Date: Thu, 21 Dec 2023 05:58:00 +1100
Subject: [PATCH 11/15] Eliminating Matrix operations in MLMG CG bottom solver
 if initial vector is zero (#3668)

A matrix multiplication and a few copy operations can be avoided if the
input vector is zero. MLMG calls all the the bottom solvers with zeroed
`x` vector, and thus the initial residual calculation `b - Ax` is `b`.
Furthermore, it also eliminates the memory requirement of storing the
initial vector.
---
 Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H | 60 +++++++++++++++++------
 Src/LinearSolvers/MLMG/AMReX_MLMG.H       |  1 +
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
index 3afa56ee245..ff9198215fa 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
@@ -42,6 +42,16 @@ public:
     void setMaxIter (int _maxiter) { maxiter = _maxiter; }
     [[nodiscard]] int getMaxIter () const { return maxiter; }
 
+
+    /**
+    * Is the initial guess provided to the solver zero ?
+    * If so, set this to true.
+    * The solver will avoid a few operations if this is true.
+    * Default is false.
+    */
+    void setInitSolnZeroed (bool _sol_zeroed) { initial_vec_zeroed = _sol_zeroed; }
+    [[nodiscard]] bool getInitSolnZeroed () const { return initial_vec_zeroed; }
+
     void setNGhost(int _nghost) {nghost = IntVect(_nghost);}
     [[nodiscard]] int getNGhost() {return nghost[0];}
 
@@ -62,6 +72,7 @@ private:
     int maxiter   = 100;
     IntVect nghost = IntVect(0);
     int iter = -1;
+    bool initial_vec_zeroed = false;
 };
 
 template <typename MF>
@@ -95,21 +106,28 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
     p.setVal(RT(0.0)); // Make sure all entries are initialized to avoid errors
     r.setVal(RT(0.0));
 
-    MF sorig = Lp.make(amrlev, mglev, nghost);
     MF rh    = Lp.make(amrlev, mglev, nghost);
     MF v     = Lp.make(amrlev, mglev, nghost);
     MF t     = Lp.make(amrlev, mglev, nghost);
 
-    Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
+
+    MF sorig;
+
+    if ( initial_vec_zeroed ) {
+        r.LocalCopy(rhs,0,0,ncomp,nghost);
+    } else {
+        sorig = Lp.make(amrlev, mglev, nghost);
+
+        Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
+
+        sorig.LocalCopy(sol,0,0,ncomp,nghost);
+        sol.setVal(RT(0.0));
+    }
 
     // Then normalize
     Lp.normalize(amrlev, mglev, r);
-
-    sorig.LocalCopy(sol,0,0,ncomp,nghost);
     rh.LocalCopy   (r  ,0,0,ncomp,nghost);
 
-    sol.setVal(RT(0.0));
-
     RT rnorm = norm_inf(r);
     const RT rnorm0 = rnorm;
 
@@ -238,12 +256,16 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 
     if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
     {
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        if ( !initial_vec_zeroed ) {
+            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        }
     }
     else
     {
         sol.setVal(RT(0.0));
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        if ( !initial_vec_zeroed ) {
+            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        }
     }
 
     return ret;
@@ -260,15 +282,21 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
     MF p = Lp.make(amrlev, mglev, sol.nGrowVect());
     p.setVal(RT(0.0));
 
-    MF sorig = Lp.make(amrlev, mglev, nghost);
     MF r     = Lp.make(amrlev, mglev, nghost);
     MF q     = Lp.make(amrlev, mglev, nghost);
 
-    sorig.LocalCopy(sol,0,0,ncomp,nghost);
+    MF sorig;
+
+    if ( initial_vec_zeroed ) {
+        r.LocalCopy(rhs,0,0,ncomp,nghost);
+    } else {
+        sorig = Lp.make(amrlev, mglev, nghost);
 
-    Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
+        Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
 
-    sol.setVal(RT(0.0));
+        sorig.LocalCopy(sol,0,0,ncomp,nghost);
+        sol.setVal(RT(0.0));
+    }
 
     RT       rnorm    = norm_inf(r);
     const RT rnorm0   = rnorm;
@@ -364,12 +392,16 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 
     if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
     {
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        if ( !initial_vec_zeroed ) {
+            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        }
     }
     else
     {
         sol.setVal(RT(0.0));
-        sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        if ( !initial_vec_zeroed ) {
+            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+        }
     }
 
     return ret;
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
index 70e7e121486..84adba7dfdc 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
@@ -1526,6 +1526,7 @@ MLMGT<MF>::bottomSolveWithCG (MF& x, const MF& b, typename MLCGSolverT<MF>::Type
     cg_solver.setSolver(type);
     cg_solver.setVerbose(bottom_verbose);
     cg_solver.setMaxIter(bottom_maxiter);
+    cg_solver.setInitSolnZeroed(true);
     if (cf_strategy == CFStrategy::ghostnodes) { cg_solver.setNGhost(linop.getNGrow()); }
 
     int ret = cg_solver.solve(x, b, bottom_reltol, bottom_abstol);

From 3407e877a7b219546c0ef12bef287845f5cb9f05 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Wed, 20 Dec 2023 12:11:53 -0800
Subject: [PATCH 12/15] Add a few free functions for MLMG (#3680)

These are useful when we use Array<MultiFab,AMREX_SPACEDIM> as the data
type for MLMG.
---
 Src/Base/AMReX_FabArrayBase.H    |   5 +
 Src/Base/AMReX_FabArrayBase.cpp  |  20 ++++
 Src/Base/AMReX_FabArrayUtility.H | 187 +++++++++++++++++++++++++++++++
 Src/Base/AMReX_TypeTraits.H      |  12 ++
 4 files changed, 224 insertions(+)

diff --git a/Src/Base/AMReX_FabArrayBase.H b/Src/Base/AMReX_FabArrayBase.H
index d8bc4411874..e2cf0ed9641 100644
--- a/Src/Base/AMReX_FabArrayBase.H
+++ b/Src/Base/AMReX_FabArrayBase.H
@@ -721,6 +721,11 @@ public:
 
 };
 
+[[nodiscard]] int nComp (FabArrayBase const& fa);
+[[nodiscard]] IntVect nGrowVect (FabArrayBase const& fa);
+[[nodiscard]] BoxArray const& boxArray (FabArrayBase const& fa);
+[[nodiscard]] DistributionMapping const& DistributionMap (FabArrayBase const& fa);
+
 #ifdef BL_USE_MPI
 bool CheckRcvStats (Vector<MPI_Status>& recv_stats, const Vector<std::size_t>& recv_size, int tag);
 #endif
diff --git a/Src/Base/AMReX_FabArrayBase.cpp b/Src/Base/AMReX_FabArrayBase.cpp
index 8dd8275f66a..6997f3489dd 100644
--- a/Src/Base/AMReX_FabArrayBase.cpp
+++ b/Src/Base/AMReX_FabArrayBase.cpp
@@ -2699,4 +2699,24 @@ FabArrayBase::flushParForCache ()
 
 #endif
 
+int nComp (FabArrayBase const& fa)
+{
+    return fa.nComp();
+}
+
+IntVect nGrowVect (FabArrayBase const& fa)
+{
+    return fa.nGrowVect();
+}
+
+BoxArray const& boxArray (FabArrayBase const& fa)
+{
+    return fa.boxArray();
+}
+
+DistributionMapping const& DistributionMap (FabArrayBase const& fa)
+{
+    return fa.DistributionMap();
+}
+
 }
diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H
index ca80a070f45..0897c57ed4f 100644
--- a/Src/Base/AMReX_FabArrayUtility.H
+++ b/Src/Base/AMReX_FabArrayUtility.H
@@ -1602,6 +1602,193 @@ Dot (FabArray<FAB> const& x, int xcomp, FabArray<FAB> const& y, int ycomp, int n
     return sm;
 }
 
+//! dst = val
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setVal (MF& dst, typename MF::value_type val)
+{
+    dst.setVal(val);
+}
+
+//! dst = val in ghost cells.
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setBndry (MF& dst, typename MF::value_type val, int scomp, int ncomp)
+{
+    dst.setBndry(val, scomp, ncomp);
+}
+
+//! dst = src
+template <class DMF, class SMF,
+          std::enable_if_t<IsMultiFabLike_v<DMF> &&
+                           IsMultiFabLike_v<SMF>, int> = 0>
+void LocalCopy (DMF& dst, SMF const& src, int scomp, int dcomp,
+                int ncomp, IntVect const& nghost)
+{
+    amrex::Copy(dst, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst += src
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void LocalAdd (MF& dst, MF const& src, int scomp, int dcomp,
+                int ncomp, IntVect const& nghost)
+{
+    amrex::Add(dst, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst += a * src
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Saxpy (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
+            int ncomp, IntVect const& nghost)
+{
+    MF::Saxpy(dst, a, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst = src + a * dst
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Xpay (MF& dst, typename MF::value_type a, MF const& src, int scomp, int dcomp,
+           int ncomp, IntVect const& nghost)
+{
+    MF::Xpay(dst, a, src, scomp, dcomp, ncomp, nghost);
+}
+
+//! dst = src w/ MPI communication
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+void ParallelCopy (MF& dst, MF const& src, int scomp, int dcomp, int ncomp,
+                   IntVect const& ng_src = IntVect(0),
+                   IntVect const& ng_dst = IntVect(0),
+                   Periodicity const& period = Periodicity::NonPeriodic())
+{
+    dst.ParallelCopy(src, scomp, dcomp, ncomp, ng_src, ng_dst, period);
+}
+
+template <class MF, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+[[nodiscard]] typename MF::value_type
+norminf (MF const& mf, int scomp, int ncomp, IntVect const& nghost,
+         bool local = false)
+{
+    return mf.norminf(scomp, ncomp, nghost, local);
+}
+
+//! dst = val
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setVal (Array<MF,N>& dst, typename MF::value_type val)
+{
+    for (auto& mf: dst) {
+        mf.setVal(val);
+    }
+}
+
+//! dst = val in ghost cells.
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void setBndry (Array<MF,N>& dst, typename MF::value_type val, int scomp, int ncomp)
+{
+    for (auto& mf : dst) {
+        mf.setBndry(val, scomp, ncomp);
+    }
+}
+
+//! dst = src
+template <class DMF, class SMF, std::size_t N,
+          std::enable_if_t<IsMultiFabLike_v<DMF> &&
+                           IsMultiFabLike_v<SMF>, int> = 0>
+void LocalCopy (Array<DMF,N>& dst, Array<SMF,N> const& src, int scomp, int dcomp,
+                int ncomp, IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        amrex::Copy(dst[i], src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst += src
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void LocalAdd (Array<MF,N>& dst, Array<MF,N> const& src, int scomp, int dcomp,
+               int ncomp, IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        amrex::Add(dst[i], src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst += a * src
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Saxpy (Array<MF,N>& dst, typename MF::value_type a,
+            Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
+            IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        MF::Saxpy(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst = src + a * dst
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>,int> = 0>
+void Xpay (Array<MF,N>& dst, typename MF::value_type a,
+           Array<MF,N> const& src, int scomp, int dcomp, int ncomp,
+           IntVect const& nghost)
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        MF::Xpay(dst[i], a, src[i], scomp, dcomp, ncomp, nghost);
+    }
+}
+
+//! dst = src w/ MPI communication
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+void ParallelCopy (Array<MF,N>& dst, Array<MF,N> const& src,
+                   int scomp, int dcomp, int ncomp,
+                   IntVect const& ng_src = IntVect(0),
+                   IntVect const& ng_dst = IntVect(0),
+                   Periodicity const& period = Periodicity::NonPeriodic())
+{
+    for (std::size_t i = 0; i < N; ++i) {
+        dst[i].ParallelCopy(src[i], scomp, dcomp, ncomp, ng_src, ng_dst, period);
+    }
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF>, int> = 0>
+[[nodiscard]] typename MF::value_type
+norminf (Array<MF,N> const& mf, int scomp, int ncomp, IntVect const& nghost,
+         bool local = false)
+{
+    auto r = typename MF::value_type(0);
+    for (std::size_t i = 0; i < N; ++i) {
+        auto tmp = mf[i].norminf(scomp, ncomp, nghost, true);
+        r = std::max(r,tmp);
+    }
+    if (!local) {
+        ParallelAllReduce::Max(r, ParallelContext::CommunicatorSub());
+    }
+    return r;
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] int nComp (Array<MF,N> const& mf)
+{
+    return mf[0].nComp();
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] IntVect nGrowVect (Array<MF,N> const& mf)
+{
+    return mf[0].nGrowVect();
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] BoxArray const&
+boxArray (Array<MF,N> const& mf)
+{
+    return mf[0].boxArray();
+}
+
+template <class MF, std::size_t N, std::enable_if_t<IsMultiFabLike_v<MF> && (N > 0),
+                                                    int> = 0>
+[[nodiscard]] DistributionMapping const&
+DistributionMap (Array<MF,N> const& mf)
+{
+    return mf[0].DistributionMap();
+}
+
 }
 
 #endif
diff --git a/Src/Base/AMReX_TypeTraits.H b/Src/Base/AMReX_TypeTraits.H
index 222576f05f5..fbcb7a2c0e3 100644
--- a/Src/Base/AMReX_TypeTraits.H
+++ b/Src/Base/AMReX_TypeTraits.H
@@ -37,6 +37,18 @@ namespace amrex
     template <class A>
     inline constexpr bool IsFabArray_v = IsFabArray<A>::value;
 
+    template <class M, class Enable = void>
+    struct IsMultiFabLike : std::false_type {};
+    //
+    template <class M>
+    struct IsMultiFabLike<M, std::enable_if_t<IsFabArray_v<M> &&
+                                              IsBaseFab_v<typename M::fab_type> > >
+        : std::true_type {};
+    //
+    template <class M>
+    inline constexpr bool IsMultiFabLike_v = IsMultiFabLike<M>::value;
+
+
     template <bool B, class T = void>
     using EnableIf_t = typename std::enable_if<B,T>::type;
 

From 75571e2dcbf2417529c5ed8e24113580e8e1f3f1 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Wed, 20 Dec 2023 19:04:57 -0800
Subject: [PATCH 13/15] MLMG: Use free functions instead of MF member functions
 (#3681)

Note that the use of unqualified functions (e.g., setVal instead of
amrex::setVal) is intentional. With ADL, these calls in MLMG could work
with user defined data.
---
 Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H |  72 ++++----
 Src/LinearSolvers/MLMG/AMReX_MLLinOp.H    |  83 ++++++---
 Src/LinearSolvers/MLMG/AMReX_MLMG.H       | 204 +++++++++++-----------
 3 files changed, 199 insertions(+), 160 deletions(-)

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
index ff9198215fa..3bfab3c9f61 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLCGSolver.H
@@ -12,8 +12,8 @@ class MLCGSolverT
 {
 public:
 
-    using FAB = typename MF::fab_type;
-    using RT  = typename MF::value_type;
+    using FAB = typename MLLinOpT<MF>::FAB;
+    using RT  = typename MLLinOpT<MF>::RT;
 
     enum struct Type { BiCGStab, CG };
 
@@ -99,12 +99,12 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 {
     BL_PROFILE("MLCGSolver::bicgstab");
 
-    const int ncomp = sol.nComp();
+    const int ncomp = nComp(sol);
 
-    MF p = Lp.make(amrlev, mglev, sol.nGrowVect());
-    MF r = Lp.make(amrlev, mglev, sol.nGrowVect());
-    p.setVal(RT(0.0)); // Make sure all entries are initialized to avoid errors
-    r.setVal(RT(0.0));
+    MF p = Lp.make(amrlev, mglev, nGrowVect(sol));
+    MF r = Lp.make(amrlev, mglev, nGrowVect(sol));
+    setVal(p, RT(0.0)); // Make sure all entries are initialized to avoid errors
+    setVal(r, RT(0.0));
 
     MF rh    = Lp.make(amrlev, mglev, nghost);
     MF v     = Lp.make(amrlev, mglev, nghost);
@@ -114,19 +114,19 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
     MF sorig;
 
     if ( initial_vec_zeroed ) {
-        r.LocalCopy(rhs,0,0,ncomp,nghost);
+        LocalCopy(r,rhs,0,0,ncomp,nghost);
     } else {
         sorig = Lp.make(amrlev, mglev, nghost);
 
         Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
 
-        sorig.LocalCopy(sol,0,0,ncomp,nghost);
-        sol.setVal(RT(0.0));
+        LocalCopy(sorig,sol,0,0,ncomp,nghost);
+        setVal(sol, RT(0.0));
     }
 
     // Then normalize
     Lp.normalize(amrlev, mglev, r);
-    rh.LocalCopy   (r  ,0,0,ncomp,nghost);
+    LocalCopy(rh, r, 0,0,ncomp,nghost);
 
     RT rnorm = norm_inf(r);
     const RT rnorm0 = rnorm;
@@ -159,13 +159,13 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         }
         if ( iter == 1 )
         {
-            p.LocalCopy(r,0,0,ncomp,nghost);
+            LocalCopy(p,r,0,0,ncomp,nghost);
         }
         else
         {
             const RT beta = (rho/rho_1)*(alpha/omega);
-            MF::Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v
-            MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p
+            Saxpy(p, -omega, v, 0, 0, ncomp, nghost); // p += -omega*v
+            Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta*p
         }
         Lp.apply(amrlev, mglev, v, p, MLLinOpT<MF>::BCMode::Homogeneous, MLLinOpT<MF>::StateMode::Correction);
         Lp.normalize(amrlev, mglev, v);
@@ -179,8 +179,8 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         {
             ret = 2; break;
         }
-        MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
-        MF::Saxpy(r,  -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v
+        Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
+        Saxpy(r,  -alpha, v, 0, 0, ncomp, nghost); // r += -alpha * v
 
         rnorm = norm_inf(r);
         rnorm = norm_inf(r);
@@ -216,8 +216,8 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         {
             ret = 3; break;
         }
-        MF::Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r
-        MF::Saxpy(r,  -omega, t, 0, 0, ncomp, nghost); // r += -omega * t
+        Saxpy(sol, omega, r, 0, 0, ncomp, nghost); // sol += omega * r
+        Saxpy(r,  -omega, t, 0, 0, ncomp, nghost); // r += -omega * t
 
         rnorm = norm_inf(r);
 
@@ -257,14 +257,14 @@ MLCGSolverT<MF>::solve_bicgstab (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
     if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
     {
         if ( !initial_vec_zeroed ) {
-            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
         }
     }
     else
     {
-        sol.setVal(RT(0.0));
+        setVal(sol, RT(0.0));
         if ( !initial_vec_zeroed ) {
-            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
         }
     }
 
@@ -277,10 +277,10 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
 {
     BL_PROFILE("MLCGSolver::cg");
 
-    const int ncomp = sol.nComp();
+    const int ncomp = nComp(sol);
 
-    MF p = Lp.make(amrlev, mglev, sol.nGrowVect());
-    p.setVal(RT(0.0));
+    MF p = Lp.make(amrlev, mglev, nGrowVect(sol));
+    setVal(p, RT(0.0));
 
     MF r     = Lp.make(amrlev, mglev, nghost);
     MF q     = Lp.make(amrlev, mglev, nghost);
@@ -288,14 +288,14 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
     MF sorig;
 
     if ( initial_vec_zeroed ) {
-        r.LocalCopy(rhs,0,0,ncomp,nghost);
+        LocalCopy(r,rhs,0,0,ncomp,nghost);
     } else {
         sorig = Lp.make(amrlev, mglev, nghost);
 
         Lp.correctionResidual(amrlev, mglev, r, sol, rhs, MLLinOpT<MF>::BCMode::Homogeneous);
 
-        sorig.LocalCopy(sol,0,0,ncomp,nghost);
-        sol.setVal(RT(0.0));
+        LocalCopy(sorig,sol,0,0,ncomp,nghost);
+        setVal(sol, RT(0.0));
     }
 
     RT       rnorm    = norm_inf(r);
@@ -330,12 +330,12 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
         }
         if (iter == 1)
         {
-            p.LocalCopy(r,0,0,ncomp,nghost);
+            LocalCopy(p,r,0,0,ncomp,nghost);
         }
         else
         {
             RT beta = rho/rho_1;
-            MF::Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p
+            Xpay(p, beta, r, 0, 0, ncomp, nghost); // p = r + beta * p
         }
         Lp.apply(amrlev, mglev, q, p, MLLinOpT<MF>::BCMode::Homogeneous, MLLinOpT<MF>::StateMode::Correction);
 
@@ -357,8 +357,8 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
                            << " rho " << rho
                            << " alpha " << alpha << '\n';
         }
-        MF::Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
-        MF::Saxpy(r, -alpha, q, 0, 0, ncomp, nghost); // r += -alpha * q
+        Saxpy(sol, alpha, p, 0, 0, ncomp, nghost); // sol += alpha * p
+        Saxpy(r, -alpha, q, 0, 0, ncomp, nghost); // r += -alpha * q
         rnorm = norm_inf(r);
 
         if ( verbose > 2 )
@@ -393,14 +393,14 @@ MLCGSolverT<MF>::solve_cg (MF& sol, const MF& rhs, RT eps_rel, RT eps_abs)
     if ( ( ret == 0 || ret == 8 ) && (rnorm < rnorm0) )
     {
         if ( !initial_vec_zeroed ) {
-            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
         }
     }
     else
     {
-        sol.setVal(RT(0.0));
+        setVal(sol, RT(0.0));
         if ( !initial_vec_zeroed ) {
-            sol.LocalAdd(sorig, 0, 0, ncomp, nghost);
+            LocalAdd(sol, sorig, 0, 0, ncomp, nghost);
         }
     }
 
@@ -422,8 +422,8 @@ template <typename MF>
 auto
 MLCGSolverT<MF>::norm_inf (const MF& res, bool local) -> RT
 {
-    int ncomp = res.nComp();
-    RT result = res.norminf(0,ncomp,IntVect(0),true);
+    int ncomp = nComp(res);
+    RT result = norminf(res,0,ncomp,IntVect(0),true);
     if (!local) {
         BL_PROFILE("MLCGSolver::ParallelAllReduce");
         ParallelAllReduce::Max(result, Lp.BottomCommunicator());
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
index b8aa71eebd0..f0dca07f3ab 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H
@@ -85,6 +85,15 @@ struct LinOpEnumType
     enum struct Location { FaceCenter, FaceCentroid, CellCenter, CellCentroid };
 };
 
+template <typename T, class Enable = void> struct LinOpData {};
+//
+template <typename T>
+struct LinOpData <T, std::enable_if_t<IsMultiFabLike_v<T> > >
+{
+    using   fab_type = typename T::fab_type;
+    using value_type = typename T::value_type;
+};
+
 template <typename T> class MLMGT;
 template <typename T> class MLCGSolverT;
 template <typename T> class MLPoissonT;
@@ -100,8 +109,8 @@ public:
     template <typename T> friend class MLPoissonT;
     template <typename T> friend class MLABecLaplacianT;
 
-    using FAB = typename MF::fab_type;
-    using RT  = typename MF::value_type;
+    using FAB = typename LinOpData<MF>::fab_type;
+    using RT  = typename LinOpData<MF>::value_type;
 
     using BCType = LinOpBCType;
     using BCMode    = LinOpEnumType::BCMode;
@@ -1375,13 +1384,18 @@ template <typename MF>
 void
 MLLinOpT<MF>::make (Vector<Vector<MF> >& mf, IntVect const& ng) const
 {
-    mf.clear();
-    mf.resize(m_num_amr_levels);
-    for (int alev = 0; alev < m_num_amr_levels; ++alev) {
-        mf[alev].resize(m_num_mg_levels[alev]);
-        for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) {
-            mf[alev][mlev] = make(alev, mlev, ng);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        mf.clear();
+        mf.resize(m_num_amr_levels);
+        for (int alev = 0; alev < m_num_amr_levels; ++alev) {
+            mf[alev].resize(m_num_mg_levels[alev]);
+            for (int mlev = 0; mlev < m_num_mg_levels[alev]; ++mlev) {
+                mf[alev][mlev] = make(alev, mlev, ng);
+            }
         }
+    } else {
+        amrex::ignore_unused(mf, ng);
+        amrex::Abort("MLLinOpT::make: how did we get here?");
     }
 }
 
@@ -1389,39 +1403,62 @@ template <typename MF>
 MF
 MLLinOpT<MF>::make (int amrlev, int mglev, IntVect const& ng) const
 {
-    return MF(amrex::convert(m_grids[amrlev][mglev], m_ixtype),
-              m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(),
-              *m_factory[amrlev][mglev]);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        return MF(amrex::convert(m_grids[amrlev][mglev], m_ixtype),
+                  m_dmap[amrlev][mglev], getNComp(), ng, MFInfo(),
+                  *m_factory[amrlev][mglev]);
+    } else {
+        amrex::ignore_unused(amrlev, mglev, ng);
+        amrex::Abort("MLLinOpT::make: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
 MF
 MLLinOpT<MF>::makeAlias (MF const& mf) const
 {
-    return MF(mf, amrex::make_alias, 0, mf.nComp());
+    if constexpr (IsMultiFabLike_v<MF>) {
+        return MF(mf, amrex::make_alias, 0, mf.nComp());
+    } else {
+        amrex::ignore_unused(mf);
+        amrex::Abort("MLLinOpT::makeAlias: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
 MF
 MLLinOpT<MF>::makeCoarseMG (int amrlev, int mglev, IntVect const& ng) const
 {
-    BoxArray cba = m_grids[amrlev][mglev];
-    IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev];
-    cba.coarsen(ratio);
-    cba.convert(m_ixtype);
-    return MF(cba, m_dmap[amrlev][mglev], getNComp(), ng);
-
+    if constexpr (IsMultiFabLike_v<MF>) {
+        BoxArray cba = m_grids[amrlev][mglev];
+        IntVect ratio = (amrlev > 0) ? IntVect(2) : mg_coarsen_ratio_vec[mglev];
+        cba.coarsen(ratio);
+        cba.convert(m_ixtype);
+        return MF(cba, m_dmap[amrlev][mglev], getNComp(), ng);
+    } else {
+        amrex::ignore_unused(amrlev, mglev, ng);
+        amrex::Abort("MLLinOpT::makeCoarseMG: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
 MF
 MLLinOpT<MF>::makeCoarseAmr (int famrlev, IntVect const& ng) const
 {
-    BoxArray cba = m_grids[famrlev][0];
-    IntVect ratio(AMRRefRatio(famrlev-1));
-    cba.coarsen(ratio);
-    cba.convert(m_ixtype);
-    return MF(cba, m_dmap[famrlev][0], getNComp(), ng);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        BoxArray cba = m_grids[famrlev][0];
+        IntVect ratio(AMRRefRatio(famrlev-1));
+        cba.coarsen(ratio);
+        cba.convert(m_ixtype);
+        return MF(cba, m_dmap[famrlev][0], getNComp(), ng);
+    } else {
+        amrex::ignore_unused(famrlev, ng);
+        amrex::Abort("MLLinOpT::makeCoarseAmr: how did we get here?");
+        return {};
+    }
 }
 
 template <typename MF>
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLMG.H b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
index 84adba7dfdc..9bfc2f0007d 100644
--- a/Src/LinearSolvers/MLMG/AMReX_MLMG.H
+++ b/Src/LinearSolvers/MLMG/AMReX_MLMG.H
@@ -21,8 +21,8 @@ public:
 
     template <typename T> friend class MLCGSolverT;
 
-    using FAB = typename MF::fab_type;
-    using RT  = typename MF::value_type;
+    using FAB = typename MLLinOpT<MF>::FAB;
+    using RT  = typename MLLinOpT<MF>::RT;
 
     using BCMode   = typename MLLinOpT<MF>::BCMode;
     using Location = typename MLLinOpT<MF>::Location;
@@ -507,7 +507,7 @@ MLMGT<MF>::solve (const Vector<AMF*>& a_sol, const Vector<AMF const*>& a_rhs,
     for (int alev = 0; alev < namrlevs; ++alev)
     {
         if (!sol_is_alias[alev]) {
-            a_sol[alev]->LocalCopy(sol[alev], 0, 0, ncomp, ng_back);
+            LocalCopy(*a_sol[alev], sol[alev], 0, 0, ncomp, ng_back);
         }
     }
 
@@ -541,11 +541,11 @@ MLMGT<MF>::getGradSolution (const Vector<Array<AMF*,AMREX_SPACEDIM> >& a_grad_so
             Array<MF,AMREX_SPACEDIM> grad_sol;
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
                 auto const& amf = *(a_grad_sol[alev][idim]);
-                grad_sol[idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                grad_sol[idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
             linop.compGrad(alev, GetArrOfPtrs(grad_sol), sol[alev], a_loc);
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-                a_grad_sol[alev][idim]->LocalCopy(grad_sol[idim], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_grad_sol[alev][idim], grad_sol[idim], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -578,13 +578,13 @@ MLMGT<MF>::getFluxes (const Vector<Array<AMF*,AMREX_SPACEDIM> >& a_flux,
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
                 auto const& amf = *(a_flux[ilev][idim]);
-                fluxes[ilev][idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                fluxes[ilev][idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
         }
         getFluxes(GetVecOfArrOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-                a_flux[ilev][idim]->LocalCopy(fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_flux[ilev][idim], fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -618,14 +618,14 @@ MLMGT<MF>::getFluxes (const Vector<Array<AMF*,AMREX_SPACEDIM> >& a_flux,
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
                 auto const& amf = *(a_flux[ilev][idim]);
-                fluxes[ilev][idim].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                fluxes[ilev][idim].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
-            sol[ilev].LocalCopy(*a_sol[ilev], 0, 0, ncomp, sol[ilev].nGrowVect());
+            LocalCopy(sol[ilev], *a_sol[ilev], 0, 0, ncomp, nGrowVect(sol[ilev]));
         }
         linop.getFluxes(GetVecOfArrOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) {
-                a_flux[ilev][idim]->LocalCopy(fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_flux[ilev][idim], fluxes[ilev][idim], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -653,11 +653,11 @@ MLMGT<MF>::getFluxes (const Vector<AMF*> & a_flux, Location a_loc)
         Vector<MF> fluxes(namrlevs);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
             auto const& amf = *a_flux[ilev];
-            fluxes[ilev].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+            fluxes[ilev].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
         }
         getFluxes(GetVecOfPtrs(fluxes), GetVecOfPtrs(sol), a_loc);
         for (int ilev = 0; ilev < namrlevs; ++ilev) {
-            a_flux[ilev]->LocalCopy(fluxes[ilev], 0, 0, ncomp, IntVect(0));
+            LocalCopy(*a_flux[ilev], fluxes[ilev], 0, 0, ncomp, IntVect(0));
         }
     }
 }
@@ -676,11 +676,11 @@ void
 MLMGT<MF>::getFluxes (const Vector<AMF*> & a_flux,
                       const Vector<AMF*>& a_sol, Location /*a_loc*/)
 {
-    AMREX_ASSERT(a_flux[0]->nComp() >= AMREX_SPACEDIM);
+    AMREX_ASSERT(nComp(*a_flux[0]) >= AMREX_SPACEDIM);
 
     if constexpr (! std::is_same<AMF,MF>()) {
         for (int alev = 0; alev < namrlevs; ++alev) {
-            sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, sol[alev].nGrowVect());
+            LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, nGrowVect(sol[alev]));
         }
     }
 
@@ -718,11 +718,11 @@ MLMGT<MF>::getFluxes (const Vector<AMF*> & a_flux,
             Vector<MF> fluxes(namrlevs);
             for (int ilev = 0; ilev < namrlevs; ++ilev) {
                 auto const& amf = *a_flux[ilev];
-                fluxes[ilev].define(amf.boxArray(), amf.DistributionMap(), ncomp, 0);
+                fluxes[ilev].define(boxArray(amf), DistributionMap(amf), ncomp, 0);
             }
             linop.getFluxes(GetVecOfPtrs(fluxes), GetVecOfPtrs(sol));
             for (int ilev = 0; ilev < namrlevs; ++ilev) {
-                a_flux[ilev]->LocalCopy(fluxes[ilev], 0, 0, ncomp, IntVect(0));
+                LocalCopy(*a_flux[ilev], fluxes[ilev], 0, 0, ncomp, IntVect(0));
             }
         }
     }
@@ -779,7 +779,7 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
     sol_is_alias.resize(namrlevs,true);
     for (int alev = 0; alev < namrlevs; ++alev)
     {
-        if (cf_strategy == CFStrategy::ghostnodes || a_sol[alev]->nGrowVect() == ng_sol)
+        if (cf_strategy == CFStrategy::ghostnodes || nGrowVect(*a_sol[alev]) == ng_sol)
         {
             sol[alev] = linop.makeAlias(*a_sol[alev]);
             sol_is_alias[alev] = true;
@@ -790,7 +790,7 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
             {
                 sol[alev] = linop.make(alev, 0, ng_sol);
             }
-            sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, IntVect(0));
+            LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, IntVect(0));
         }
     }
 
@@ -808,9 +808,9 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
         const MF* prhs = a_rhs[alev];
 #if (AMREX_SPACEDIM != 3)
         int nghost = (cf_strategy == CFStrategy::ghostnodes) ? linop.getNGrow(alev) : 0;
-        MF rhstmp(prhs->boxArray(), prhs->DistributionMap(), ncomp, nghost,
+        MF rhstmp(boxArray(*prhs), DistributionMap(*prhs), ncomp, nghost,
                   MFInfo(), *linop.Factory(alev));
-        rhstmp.LocalCopy(*prhs, 0, 0, ncomp, IntVect(nghost));
+        LocalCopy(rhstmp, *prhs, 0, 0, ncomp, IntVect(nghost));
         linop.applyMetricTerm(alev, 0, rhstmp);
         linop.unimposeNeumannBC(alev, rhstmp);
         linop.applyInhomogNeumannTerm(alev, rhstmp);
@@ -822,9 +822,9 @@ MLMGT<MF>::compResidual (const Vector<MF*>& a_res, const Vector<MF*>& a_sol,
                          *a_res[alev+1], sol[alev+1], *a_rhs[alev+1]);
             if (linop.isCellCentered()) {
 #ifdef AMREX_USE_EB
-                amrex::EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
+                EB_average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
 #else
-                amrex::average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
+                average_down(*a_res[alev+1], *a_res[alev], 0, ncomp, amrrr[alev]);
 #endif
             }
         }
@@ -858,7 +858,7 @@ MLMGT<MF>::apply (const Vector<MF*>& out, const Vector<MF*>& a_in)
             nghost = linop.getNGrow(alev);
             in[alev] = a_in[alev];
         }
-        else if (a_in[alev]->nGrowVect() == ng_sol)
+        else if (nGrowVect(*a_in[alev]) == ng_sol)
         {
             in[alev] = a_in[alev];
         }
@@ -866,18 +866,18 @@ MLMGT<MF>::apply (const Vector<MF*>& out, const Vector<MF*>& a_in)
         {
             IntVect ng = ng_sol;
             if (cf_strategy == CFStrategy::ghostnodes) { ng = IntVect(nghost); }
-            in_raii[alev].define(a_in[alev]->boxArray(),
-                                 a_in[alev]->DistributionMap(),
-                                 a_in[alev]->nComp(), ng,
+            in_raii[alev].define(boxArray       (*a_in[alev]),
+                                 DistributionMap(*a_in[alev]),
+                                 nComp          (*a_in[alev]), ng,
                                  MFInfo(), *linop.Factory(alev));
-            in_raii[alev].LocalCopy(*a_in[alev], 0, 0, ncomp, IntVect(nghost));
+            LocalCopy(in_raii[alev], *a_in[alev], 0, 0, ncomp, IntVect(nghost));
             in[alev] = &(in_raii[alev]);
         }
-        rh[alev].define(a_in[alev]->boxArray(),
-                        a_in[alev]->DistributionMap(),
-                        a_in[alev]->nComp(), nghost, MFInfo(),
+        rh[alev].define(boxArray       (*a_in[alev]),
+                        DistributionMap(*a_in[alev]),
+                        nComp          (*a_in[alev]), nghost, MFInfo(),
                         *linop.Factory(alev));
-        rh[alev].setVal(RT(0.0));
+        setVal(rh[alev], RT(0.0));
     }
 
     if (!linop_prepared) {
@@ -901,9 +901,9 @@ MLMGT<MF>::apply (const Vector<MF*>& out, const Vector<MF*>& a_in)
                          *out[alev+1], *in[alev+1], rh[alev+1]);
             if (linop.isCellCentered()) {
 #ifdef AMREX_USE_EB
-                amrex::EB_average_down(*out[alev+1], *out[alev], 0, out[alev]->nComp(), amrrr[alev]);
+                EB_average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]);
 #else
-                amrex::average_down(*out[alev+1], *out[alev], 0, out[alev]->nComp(), amrrr[alev]);
+                average_down(*out[alev+1], *out[alev], 0, nComp(*out[alev]), amrrr[alev]);
 #endif
             }
         }
@@ -970,10 +970,10 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
         }
         else
         {
-            if (a_sol[alev]->nGrowVect() == ng_sol) {
+            if (nGrowVect(*a_sol[alev]) == ng_sol) {
                 if constexpr (std::is_same<AMF,MF>()) {
                     sol[alev] = linop.makeAlias(*a_sol[alev]);
-                    sol[alev].setBndry(RT(0.0), 0, ncomp);
+                    setBndry(sol[alev], RT(0.0), 0, ncomp);
                     sol_is_alias[alev] = true;
                 }
             }
@@ -981,8 +981,8 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
                 if (!solve_called) {
                     sol[alev] = linop.make(alev, 0, ng_sol);
                 }
-                sol[alev].LocalCopy(*a_sol[alev], 0, 0, ncomp, IntVect(0));
-                sol[alev].setBndry(RT(0.0), 0, ncomp);
+                LocalCopy(sol[alev], *a_sol[alev], 0, 0, ncomp, IntVect(0));
+                setBndry(sol[alev], RT(0.0), 0, ncomp);
             }
         }
     }
@@ -994,7 +994,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
         if (!solve_called) {
             rhs[alev] = linop.make(alev, 0, ng_rhs);
         }
-        rhs[alev].LocalCopy(*a_rhs[alev], 0, 0, ncomp, ng_rhs);
+        LocalCopy(rhs[alev], *a_rhs[alev], 0, 0, ncomp, ng_rhs);
         linop.applyMetricTerm(alev, 0, rhs[alev]);
         linop.unimposeNeumannBC(alev, rhs[alev]);
         linop.applyInhomogNeumannTerm(alev, rhs[alev]);
@@ -1036,8 +1036,8 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
         const int nmglevs = linop.NMGLevels(alev);
         for (int mglev = 0; mglev < nmglevs; ++mglev)
         {
-            res   [alev][mglev].setVal(RT(0.0));
-            rescor[alev][mglev].setVal(RT(0.0));
+            setVal(res   [alev][mglev], RT(0.0));
+            setVal(rescor[alev][mglev], RT(0.0));
         }
     }
 
@@ -1054,7 +1054,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
                 if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev,mglev)); }
                 cor[alev][mglev] = linop.make(alev, mglev, _ng);
             }
-            cor[alev][mglev].setVal(RT(0.0));
+            setVal(cor[alev][mglev], RT(0.0));
         }
     }
 
@@ -1070,7 +1070,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
                 if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev,mglev)); }
                 cor_hold[alev][mglev] = linop.make(alev, mglev, _ng);
             }
-            cor_hold[alev][mglev].setVal(RT(0.0));
+            setVal(cor_hold[alev][mglev], RT(0.0));
         }
     }
     for (int alev = 1; alev < finest_amr_lev; ++alev)
@@ -1081,7 +1081,7 @@ MLMGT<MF>::prepareForSolve (Vector<AMF*> const& a_sol, Vector<AMF const*> const&
             if (cf_strategy == CFStrategy::ghostnodes) { _ng=IntVect(linop.getNGrow(alev)); }
             cor_hold[alev][0] = linop.make(alev, 0, _ng);
         }
-        cor_hold[alev][0].setVal(RT(0.0));
+        setVal(cor_hold[alev][0], RT(0.0));
     }
 
     if (linop.m_parent // no embedded N-Solve
@@ -1110,30 +1110,32 @@ template <typename MF>
 void
 MLMGT<MF>::prepareForNSolve ()
 {
-    ns_linop = linop.makeNLinOp(nsolve_grid_size);
+    if constexpr (IsMultiFabLike_v<MF>) {
+        ns_linop = linop.makeNLinOp(nsolve_grid_size);
 
-    int nghost = 0;
-    if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(); }
-
-    const BoxArray& ba = (*ns_linop).m_grids[0][0];
-    const DistributionMapping& dm =(*ns_linop).m_dmap[0][0];
-
-    int ng = 1;
-    if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
-    ns_sol = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
-    ng = 0;
-    if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
-    ns_rhs = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
-    ns_sol->setVal(RT(0.0));
-    ns_rhs->setVal(RT(0.0));
-
-    ns_linop->setLevelBC(0, ns_sol.get());
-
-    ns_mlmg = std::make_unique<MLMGT<MF>>(*ns_linop);
-    ns_mlmg->setVerbose(0);
-    ns_mlmg->setFixedIter(1);
-    ns_mlmg->setMaxFmgIter(20);
-    ns_mlmg->setBottomSolver(BottomSolver::smoother);
+        int nghost = 0;
+        if (cf_strategy == CFStrategy::ghostnodes) { nghost = linop.getNGrow(); }
+
+        const BoxArray& ba = (*ns_linop).m_grids[0][0];
+        const DistributionMapping& dm =(*ns_linop).m_dmap[0][0];
+
+        int ng = 1;
+        if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
+        ns_sol = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
+        ng = 0;
+        if (cf_strategy == CFStrategy::ghostnodes) { ng = nghost; }
+        ns_rhs = std::make_unique<MF>(ba, dm, ncomp, ng, MFInfo(), *(ns_linop->Factory(0,0)));
+        setVal(*ns_sol, RT(0.0));
+        setVal(*ns_rhs, RT(0.0));
+
+        ns_linop->setLevelBC(0, ns_sol.get());
+
+        ns_mlmg = std::make_unique<MLMGT<MF>>(*ns_linop);
+        ns_mlmg->setVerbose(0);
+        ns_mlmg->setFixedIter(1);
+        ns_mlmg->setMaxFmgIter(20);
+        ns_mlmg->setBottomSolver(BottomSolver::smoother);
+    }
 }
 
 // in  : Residual (res) on the finest AMR level
@@ -1149,7 +1151,7 @@ void MLMGT<MF>::oneIter (int iter)
 
         IntVect nghost(0);
         if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(alev)); }
-        sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost);
 
         // compute residual for the coarse AMR level
         computeResWithCrseSolFineCor(alev-1,alev);
@@ -1175,7 +1177,7 @@ void MLMGT<MF>::oneIter (int iter)
 
         IntVect nghost(0);
         if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(0)); }
-        sol[0].LocalAdd(cor[0][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[0], cor[0][0], 0, 0, ncomp, nghost);
     }
 
     for (int alev = 1; alev <= finest_amr_lev; ++alev)
@@ -1185,10 +1187,10 @@ void MLMGT<MF>::oneIter (int iter)
 
         IntVect nghost(0);
         if (cf_strategy == CFStrategy::ghostnodes) { nghost = IntVect(linop.getNGrow(alev)); }
-        sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost);
 
         if (alev != finest_amr_lev) {
-            cor_hold[alev][0].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+            LocalAdd(cor_hold[alev][0], cor[alev][0], 0, 0, ncomp, nghost);
         }
 
         // Update fine AMR level correction
@@ -1196,10 +1198,10 @@ void MLMGT<MF>::oneIter (int iter)
 
         miniCycle(alev);
 
-        sol[alev].LocalAdd(cor[alev][0], 0, 0, ncomp, nghost);
+        LocalAdd(sol[alev], cor[alev][0], 0, 0, ncomp, nghost);
 
         if (alev != finest_amr_lev) {
-            cor[alev][0].LocalAdd(cor_hold[alev][0], 0, 0, ncomp, nghost);
+            LocalAdd(cor[alev][0], cor_hold[alev][0], 0, 0, ncomp, nghost);
         }
     }
 
@@ -1231,12 +1233,12 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
 
         if (verbose >= 4)
         {
-            RT norm = res[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(res[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   DN: Norm before smooth " << norm << "\n";
         }
 
-        cor[amrlev][mglev].setVal(RT(0.0));
+        setVal(cor[amrlev][mglev], RT(0.0));
         bool skip_fillboundary = true;
         for (int i = 0; i < nu1; ++i) {
             linop.smooth(amrlev, mglev, cor[amrlev][mglev], res[amrlev][mglev], skip_fillboundary);
@@ -1248,7 +1250,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
 
         if (verbose >= 4)
         {
-            RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   DN: Norm after  smooth " << norm << "\n";
         }
@@ -1262,7 +1264,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
     {
         if (verbose >= 4)
         {
-            RT norm = res[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev_bottom
                            << "   DN: Norm before bottom " << norm << "\n";
         }
@@ -1270,7 +1272,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev_bottom);
-            RT norm = rescor[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev_bottom
                            << "   UP: Norm after  bottom " << norm << "\n";
         }
@@ -1279,11 +1281,11 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
     {
         if (verbose >= 4)
         {
-            RT norm = res[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(res[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev_bottom
                            << "       Norm before smooth " << norm << "\n";
         }
-        cor[amrlev][mglev_bottom].setVal(RT(0.0));
+        setVal(cor[amrlev][mglev_bottom], RT(0.0));
         bool skip_fillboundary = true;
         for (int i = 0; i < nu1; ++i) {
             linop.smooth(amrlev, mglev_bottom, cor[amrlev][mglev_bottom],
@@ -1293,7 +1295,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev_bottom);
-            RT norm = rescor[amrlev][mglev_bottom].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev_bottom],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev  << " " << mglev_bottom
                            << "       Norm after  smooth " << norm << "\n";
         }
@@ -1308,7 +1310,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev);
-            RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   UP: Norm before smooth " << norm << "\n";
         }
@@ -1321,7 +1323,7 @@ MLMGT<MF>::mgVcycle (int amrlev, int mglev_top)
         if (verbose >= 4)
         {
             computeResOfCorrection(amrlev, mglev);
-            RT norm = rescor[amrlev][mglev].norminf(0,ncomp,IntVect(0));
+            RT norm = norminf(rescor[amrlev][mglev],0,ncomp,IntVect(0));
             amrex::Print() << "AT LEVEL "  << amrlev << " " << mglev
                            << "   UP: Norm after  smooth " << norm << "\n";
         }
@@ -1361,12 +1363,12 @@ MLMGT<MF>::mgFcycle ()
         // rescor = res - L(cor)
         computeResOfCorrection(amrlev, mglev);
         // res = rescor; this provides b to the vcycle below
-        res[amrlev][mglev].LocalCopy(rescor[amrlev][mglev], 0, 0, ncomp, nghost);
+        LocalCopy(res[amrlev][mglev], rescor[amrlev][mglev], 0, 0, ncomp, nghost);
 
         // save cor; do v-cycle; add the saved to cor
         std::swap(cor[amrlev][mglev], cor_hold[amrlev][mglev]);
         mgVcycle(amrlev, mglev);
-        cor[amrlev][mglev].LocalAdd(cor_hold[amrlev][mglev], 0, 0, ncomp, nghost);
+        LocalAdd(cor[amrlev][mglev], cor_hold[amrlev][mglev], 0, 0, ncomp, nghost);
     }
 }
 
@@ -1393,16 +1395,16 @@ MLMGT<MF>::NSolve (MLMGT<MF>& a_solver, MF& a_sol, MF& a_rhs)
 {
     BL_PROFILE("MLMG::NSolve()");
 
-    a_sol.setVal(RT(0.0));
+    setVal(a_sol, RT(0.0));
 
     MF const& res_bottom = res[0].back();
-    if (BoxArray::SameRefs(a_rhs.boxArray(),res_bottom.boxArray()) &&
-        DistributionMapping::SameRefs(a_rhs.DistributionMap(),res_bottom.DistributionMap()))
+    if (BoxArray::SameRefs(boxArray(a_rhs),boxArray(res_bottom)) &&
+        DistributionMapping::SameRefs(DistributionMap(a_rhs),DistributionMap(res_bottom)))
     {
-        a_rhs.LocalCopy(res_bottom, 0, 0, ncomp, IntVect(0));
+        LocalCopy(a_rhs, res_bottom, 0, 0, ncomp, IntVect(0));
     } else {
-        a_rhs.setVal(RT(0.0));
-        a_rhs.ParallelCopy(res_bottom);
+        setVal(a_rhs, RT(0.0));
+        ParallelCopy(a_rhs, res_bottom, 0, 0, ncomp);
     }
 
     a_solver.solve(Vector<MF*>{&a_sol}, Vector<MF const*>{&a_rhs},
@@ -1428,7 +1430,7 @@ MLMGT<MF>::actualBottomSolve ()
     auto& x = cor[amrlev][mglev];
     auto& b = res[amrlev][mglev];
 
-    x.setVal(RT(0.0));
+    setVal(x, RT(0.0));
 
     if (bottom_solver == BottomSolver::smoother)
     {
@@ -1444,9 +1446,9 @@ MLMGT<MF>::actualBottomSolve ()
         MF raii_b;
         if (linop.isBottomSingular() && linop.getEnforceSingularSolvable())
         {
-            const IntVect ng = b.nGrowVect();
+            const IntVect ng = nGrowVect(b);
             raii_b = linop.make(amrlev, mglev, ng);
-            raii_b.LocalCopy(b, 0, 0, ncomp, ng);
+            LocalCopy(raii_b, b, 0, 0, ncomp, ng);
             bottom_b = &raii_b;
 
             makeSolvable(amrlev,mglev,*bottom_b);
@@ -1486,7 +1488,7 @@ MLMGT<MF>::actualBottomSolve ()
             int ret = bottomSolveWithCG(x, *bottom_b, cg_type);
             // If the MLMG solve failed then set the correction to zero
             if (ret != 0) {
-                cor[amrlev][mglev].setVal(RT(0.0));
+                setVal(cor[amrlev][mglev], RT(0.0));
                 if (bottom_solver == BottomSolver::cgbicg ||
                     bottom_solver == BottomSolver::bicgcg) {
                     if (bottom_solver == BottomSolver::cgbicg) {
@@ -1496,7 +1498,7 @@ MLMGT<MF>::actualBottomSolve ()
                     }
                     ret = bottomSolveWithCG(x, *bottom_b, cg_type);
                     if (ret != 0) {
-                        cor[amrlev][mglev].setVal(RT(0.0));
+                        setVal(cor[amrlev][mglev], RT(0.0));
                     } else { // switch permanently
                         if (cg_type == MLCGSolverT<MF>::Type::CG) {
                             bottom_solver = BottomSolver::cg;
@@ -1591,7 +1593,7 @@ MLMGT<MF>::computeResWithCrseSolFineCor (int calev, int falev)
     linop.solutionResidual(calev, crse_res, crse_sol, crse_rhs, crse_bcdata);
 
     linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res, BCMode::Homogeneous);
-    fine_res.LocalCopy(fine_rescor, 0, 0, ncomp, nghost);
+    LocalCopy(fine_res, fine_rescor, 0, 0, ncomp, nghost);
 
     linop.reflux(calev, crse_res, crse_sol, crse_rhs, fine_res, fine_sol, fine_rhs);
 
@@ -1619,7 +1621,7 @@ MLMGT<MF>::computeResWithCrseCorFineCor (int falev)
     // fine_rescor = fine_res - L(fine_cor)
     linop.correctionResidual(falev, 0, fine_rescor, fine_cor, fine_res,
                              BCMode::Inhomogeneous, &crse_cor);
-    fine_res.LocalCopy(fine_rescor, 0, 0, ncomp, nghost);
+    LocalCopy(fine_res, fine_rescor, 0, 0, ncomp, nghost);
 }
 
 // Interpolate correction from coarse to fine AMR level.
@@ -1648,9 +1650,9 @@ MLMGT<MF>::interpCorrection (int alev)
     }
 
     MF cfine = linop.makeCoarseAmr(alev, IntVect(ng_dst));
-    cfine.setVal(RT(0.0));
-    cfine.ParallelCopy(crse_cor, 0, 0, ncomp,  IntVect(ng_src), IntVect(ng_dst),
-                       crse_geom.periodicity());
+    setVal(cfine, RT(0.0));
+    ParallelCopy(cfine, crse_cor, 0, 0, ncomp, IntVect(ng_src), IntVect(ng_dst),
+                 crse_geom.periodicity());
 
     linop.interpolationAmr(alev, fine_cor, cfine, nghost); // NOLINT(readability-suspicious-call-argument)
 }
@@ -1689,7 +1691,7 @@ MLMGT<MF>::addInterpCorrection (int alev, int mglev)
     else
     {
         cfine = linop.makeCoarseMG(alev, mglev, IntVect(0));
-        cfine.ParallelCopy(crse_cor,0,0,ncomp,IntVect(0),IntVect(0));
+        ParallelCopy(cfine, crse_cor, 0, 0, ncomp);
         cmf = &cfine;
     }
 

From a068330e6c66b5d9a7c6ca0e1c874f318e73f4cc Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Tue, 2 Jan 2024 07:42:38 -0800
Subject: [PATCH 14/15] Update CHANGES for 24.01 (#3686)

---
 CHANGES | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/CHANGES b/CHANGES
index 82b28a03e87..cad5363b99e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,3 +1,53 @@
+# 24.01
+
+  -- MLMG: Use free functions instead of MF member functions (#3681)
+
+  -- Add a few free functions for MLMG (#3680)
+
+  -- Eliminating Matrix operations in MLMG CG bottom solver if initial vector is zero (#3668)
+
+  -- Add a for loop that is unrolled at compile time (#3674)
+
+  -- Add PTD version of getParticleCell (#3675)
+
+  -- Improve ParIter docs (#3676)
+
+  -- Fix CI for ROCm 6.0 (#3673)
+
+  -- PureSoA IdCpu fixes (#3671)
+
+  -- CMake: AMReX_PARALLEL_LINK_JOBS (#3628)
+
+  -- Clang-Tidy in CI: Keep Going after Errors (#3667)
+
+  -- Delete empty below comments on classes and functions (#3669)
+
+  -- Documentation for Profiling: Hot Spots and Load Balance (#3622)
+
+  -- Fix warnings in SortParticlesForDeposition (#3664)
+
+  -- Fix Resize Issue of Fab with the Async Arena (#3663)
+
+  -- Fix SuperParticle `push_back` (#3661)
+
+  -- Pure SoA Particle: Separate Array for IdCPU (#3585)
+
+  -- Limit the scope of gpu_rand_generator (#3659)
+
+  -- Fix a typo in doxygen for NonLocalBC::FillBoundary (#3658)
+
+  -- GNU Make: Fix name collision for aurora (#3656)
+
+  -- two separate fixes -- particle_compare and ref_ratio=1 (#3655)
+
+  -- Clarify documentation on setEBDirchlet() and fix link to AMReX-Hydro (#3652)
+
+  -- Robustify the Cache Cleanup Scripts (#3650)
+
+  -- Disable CodeQL scheduled jobs on forks (#3649)
+
+  -- Work around compiler bug in nvcc 12.2 by using functor instead of lambda (#3653)
+
 # 23.12
 
   -- solve_cg: avoid use of MF `z` (#3637)

From f1ec8df75c562d2a4822cea84d284cf8e72c2e14 Mon Sep 17 00:00:00 2001
From: Weiqun Zhang <WeiqunZhang@lbl.gov>
Date: Thu, 4 Jan 2024 08:58:46 -0800
Subject: [PATCH 15/15] Install Codeplay packages via APT (#3687)

Codeplay now provides packages via APT. Token is no longer needed.
---
 .../dependencies/dependencies_codeplay.sh     | 26 ++++++++++++++-----
 .../dependencies/dependencies_hip.sh          |  6 +++--
 .github/workflows/intel.yml                   | 22 +++++-----------
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/dependencies/dependencies_codeplay.sh b/.github/workflows/dependencies/dependencies_codeplay.sh
index dfd2e5a03f7..bf692d3c2c7 100755
--- a/.github/workflows/dependencies/dependencies_codeplay.sh
+++ b/.github/workflows/dependencies/dependencies_codeplay.sh
@@ -6,10 +6,24 @@
 
 set -eu -o pipefail
 
-curl -o oneapi_nvidia.sh -L "https://developer.codeplay.com/api/v1/products/download?product=oneapi&variant=nvidia&filters[]=linux&aat=$1"
-chmod +x oneapi_nvidia.sh
-sudo ./oneapi_nvidia.sh --yes
+# `man apt.conf`:
+#   Number of retries to perform. If this is non-zero APT will retry
+#   failed files the given number of times.
+echo 'Acquire::Retries "3";' | sudo tee /etc/apt/apt.conf.d/80-retries
 
-curl -o oneapi_amd.sh -L "https://developer.codeplay.com/api/v1/products/download?product=oneapi&variant=amd&filters[]=linux&aat=$1"
-chmod +x oneapi_amd.sh
-sudo ./oneapi_amd.sh --yes
+# https://developer.codeplay.com/apt/index.html
+sudo wget -qO - https://developer.codeplay.com/apt/public.key | gpg --dearmor | sudo tee /usr/share/keyrings/codeplay-keyring.gpg > /dev/null
+echo "deb [signed-by=/usr/share/keyrings/codeplay-keyring.gpg] https://developer.codeplay.com/apt all main" | sudo tee /etc/apt/sources.list.d/codeplay.list
+
+sudo apt-get update
+
+# try apt install up to five times, to avoid connection splits
+status=1
+for itry in {1..5}
+do
+    sudo apt-get install -y --no-install-recommends \
+        $1 \
+        && { sudo apt-get clean; status=0; break; }  \
+        || { sleep 10; }
+done
+if [[ ${status} -ne 0 ]]; then exit 1; fi
diff --git a/.github/workflows/dependencies/dependencies_hip.sh b/.github/workflows/dependencies/dependencies_hip.sh
index 36df2f384ba..4673a7caed5 100755
--- a/.github/workflows/dependencies/dependencies_hip.sh
+++ b/.github/workflows/dependencies/dependencies_hip.sh
@@ -43,8 +43,10 @@ sudo apt-get install -y --no-install-recommends \
     roctracer-dev   \
     rocprofiler-dev \
     rocrand-dev     \
-    rocprim-dev     \
-    hiprand-dev
+    rocprim-dev
+
+# hiprand-dev is a new package that does not exist in old versions
+sudo apt-get install -y --no-install-recommends hiprand-dev || true
 
 # activate
 #
diff --git a/.github/workflows/intel.yml b/.github/workflows/intel.yml
index 6474214e0a2..aa1036ddf94 100644
--- a/.github/workflows/intel.yml
+++ b/.github/workflows/intel.yml
@@ -97,19 +97,16 @@ jobs:
   tests-oneapi-sycl-eb-nvidia:
     name: oneAPI SYCL for Nvidia GPUs [tests w/ EB]
     runs-on: ubuntu-latest
-    env:
-      CODEPLAYTOKEN: ${{ secrets.CODEPLAYTOKEN }}
     steps:
     - uses: actions/checkout@v3
     - name: Dependencies
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       run: |
-        .github/workflows/dependencies/dependencies_nvcc.sh
+        .github/workflows/dependencies/ubuntu_free_disk_space.sh
         .github/workflows/dependencies/dependencies_dpcpp.sh
-        .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }}
+        .github/workflows/dependencies/dependencies_nvcc.sh 12.0
+        .github/workflows/dependencies/dependencies_codeplay.sh oneapi-nvidia-12.0
         .github/workflows/dependencies/dependencies_ccache.sh
     - name: Set Up Cache
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       uses: actions/cache@v3
       with:
         path: ~/.cache/ccache
@@ -117,9 +114,7 @@ jobs:
         restore-keys: |
              ccache-${{ github.workflow }}-${{ github.job }}-git-
     - name: Build & Install
-      if: ${{ env.CODEPLAYTOKEN != '' }}
-      # clang currently supports CUDA up to version 11.5 and a warning is issued with newer versions
-      env: {CXXFLAGS: "-fsycl -fsycl-targets=nvptx64-nvidia-cuda -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor -Wno-unknown-cuda-version"}
+      env: {CXXFLAGS: "-fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --offload-arch=sm_80 -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"}
       run: |
         export CCACHE_COMPRESS=1
         export CCACHE_COMPRESSLEVEL=10
@@ -148,20 +143,16 @@ jobs:
   no-tests-oneapi-sycl-amd:
     name: oneAPI SYCL for AMD GPUs
     runs-on: ubuntu-20.04
-    env:
-      CODEPLAYTOKEN: ${{ secrets.CODEPLAYTOKEN }}
     steps:
     - uses: actions/checkout@v3
     - name: Dependencies
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       run: |
         .github/workflows/dependencies/ubuntu_free_disk_space.sh
-        .github/workflows/dependencies/dependencies_hip.sh 5.4.6
         .github/workflows/dependencies/dependencies_dpcpp.sh
-        .github/workflows/dependencies/dependencies_codeplay.sh ${{ env.CODEPLAYTOKEN }}
+        .github/workflows/dependencies/dependencies_hip.sh 5.4.3
+        .github/workflows/dependencies/dependencies_codeplay.sh oneapi-amd-5.4.3
         .github/workflows/dependencies/dependencies_ccache.sh
     - name: Set Up Cache
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       uses: actions/cache@v3
       with:
         path: ~/.cache/ccache
@@ -169,7 +160,6 @@ jobs:
         restore-keys: |
              ccache-${{ github.workflow }}-${{ github.job }}-git-
     - name: Build & Install
-      if: ${{ env.CODEPLAYTOKEN != '' }}
       env: {CXXFLAGS: "-fsycl -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx90a -fno-operator-names -Werror -Wall -Wextra -Wpedantic -Wnull-dereference -Wfloat-conversion -Wshadow -Woverloaded-virtual -Wextra-semi -Wunreachable-code -Wnon-virtual-dtor"}
       run: |
         export CCACHE_COMPRESS=1