Update Monte Carlo Collisions (#2085)

* Update Monte Carlo Collisions This addresses a number of issues in the Monte Carlo collision code. * `MCCProcess` is not trivially copyable because it contains `ManagedVector`. Therefore, it cannot be captured by GPU device lambda. * The use of managed memory may have performance issues. * There are memory leaks because some raw pointers allocated by `new` are not `delete`d. * `BackgroundMCCCollision` derives from a virtual base class, but the compiler generated destructor is not automatically virtual. * Apply suggestions from code review Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja> * Apply the suggestion from @PhilMiller to get rid of unique_ptr Co-authored-by: Axel Huebl <axel.huebl@plasma.ninja>
ECP-WarpX · Jul 14, 2021 · 40e36e1 · 40e36e1
1 parent 521112f
commit 40e36e1
Show file tree

Hide file tree

Showing 7 changed files with 165 additions and 122 deletions.
diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml
@@ -78,6 +78,7 @@ jobs:
           -DWarpX_openpmd_internal=OFF \
           -DWarpX_PRECISION=SINGLE     \
           -DWarpX_PSATD=ON             \
+          -DAMReX_CUDA_ERROR_CROSS_EXECUTION_SPACE_CALL=ON \
           -DAMReX_CUDA_ERROR_CAPTURE_THIS=ON
         cmake --build build_sp -j 2
 

diff --git a/Source/Particles/Collision/BackgroundMCCCollision.H b/Source/Particles/Collision/BackgroundMCCCollision.H
@@ -12,20 +12,21 @@
 #include "MCCProcess.H"
 
 #include <AMReX_REAL.H>
+#include <AMReX_Vector.H>
 #include <AMReX_GpuContainers.H>
 
+#include <memory>
 #include <string>
 
-class BackgroundMCCCollision
+class BackgroundMCCCollision final
     : public CollisionBase
 {
 public:
-    template <typename T>
-    using VectorType = amrex::Gpu::ManagedVector<T>;
-
     BackgroundMCCCollision (std::string collision_name);
 
-    amrex::Real get_nu_max ( VectorType<MCCProcess*> const& mcc_processes );
+    virtual ~BackgroundMCCCollision () = default;
+
+    amrex::Real get_nu_max (amrex::Vector<MCCProcess> const& mcc_processes);
 
     /** Perform the collisions
      *
@@ -57,8 +58,10 @@ public:
 
 private:
 
-    VectorType< MCCProcess* > m_scattering_processes;
-    VectorType< MCCProcess* > m_ionization_processes;
+    amrex::Vector<MCCProcess> m_scattering_processes;
+    amrex::Vector<MCCProcess> m_ionization_processes;
+    amrex::Gpu::DeviceVector<MCCProcess::Executor> m_scattering_processes_exe;
+    amrex::Gpu::DeviceVector<MCCProcess::Executor> m_ionization_processes_exe;
 
     bool init_flag = false;
     bool ionization_flag = false;

diff --git a/Source/Particles/Collision/BackgroundMCCCollision.cpp b/Source/Particles/Collision/BackgroundMCCCollision.cpp
@@ -58,17 +58,17 @@ BackgroundMCCCollision::BackgroundMCCCollision (std::string const collision_name
             pp.get(kw_energy.c_str(), energy);
         }
 
-        auto process = new MCCProcess(scattering_process, cross_section_file, energy);
+        MCCProcess process(scattering_process, cross_section_file, energy);
 
-        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(process->m_type != MCCProcessType::INVALID,
+        AMREX_ALWAYS_ASSERT_WITH_MESSAGE(process.type() != MCCProcessType::INVALID,
                                          "Cannot add an unknown MCC process type");
 
         // if the scattering process is ionization get the secondary species
         // only one ionization process is supported, the vector
         // m_ionization_processes is only used to make it simple to calculate
         // the maximum collision frequency with the same function used for
         // particle conserving processes
-        if (process->m_type == MCCProcessType::IONIZATION) {
+        if (process.type() == MCCProcessType::IONIZATION) {
             AMREX_ALWAYS_ASSERT_WITH_MESSAGE(!ionization_flag,
                                              "Background MCC only supports a single ionization process");
             ionization_flag = true;
@@ -77,20 +77,43 @@ BackgroundMCCCollision::BackgroundMCCCollision (std::string const collision_name
             pp.get("ionization_species", secondary_species);
             m_species_names.push_back(secondary_species);
 
-            m_ionization_processes.push_back(process);
+            m_ionization_processes.push_back(std::move(process));
         } else {
-            m_scattering_processes.push_back(process);
+            m_scattering_processes.push_back(std::move(process));
         }
+    }
 
-        amrex::Gpu::synchronize();
+#ifdef AMREX_USE_GPU
+    amrex::Gpu::HostVector<MCCProcess::Executor> h_scattering_processes_exe;
+    amrex::Gpu::HostVector<MCCProcess::Executor> h_ionization_processes_exe;
+    for (auto const& p : m_scattering_processes) {
+        h_scattering_processes_exe.push_back(p.executor());
+    }
+    for (auto const& p : m_ionization_processes) {
+        h_ionization_processes_exe.push_back(p.executor());
+    }
+    m_scattering_processes_exe.resize(h_scattering_processes_exe.size());
+    m_ionization_processes_exe.resize(h_ionization_processes_exe.size());
+    amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, h_scattering_processes_exe.begin(),
+                          h_scattering_processes_exe.end(), m_scattering_processes_exe.begin());
+    amrex::Gpu::copyAsync(amrex::Gpu::hostToDevice, h_ionization_processes_exe.begin(),
+                          h_ionization_processes_exe.end(), m_ionization_processes_exe.begin());
+    amrex::Gpu::streamSynchronize();
+#else
+    for (auto const& p : m_scattering_processes) {
+        m_scattering_processes_exe.push_back(p.executor());
     }
+    for (auto const& p : m_ionization_processes) {
+        m_ionization_processes_exe.push_back(p.executor());
+    }
+#endif
 }
 
 /** Calculate the maximum collision frequency using a fixed energy grid that
  *  ranges from 1e-4 to 5000 eV in 0.2 eV increments
  */
 amrex::Real
-BackgroundMCCCollision::get_nu_max(amrex::Gpu::ManagedVector<MCCProcess*> const& mcc_processes)
+BackgroundMCCCollision::get_nu_max(amrex::Vector<MCCProcess> const& mcc_processes)
 {
     using namespace amrex::literals;
     amrex::Real nu, nu_max = 0.0;
@@ -101,7 +124,7 @@ BackgroundMCCCollision::get_nu_max(amrex::Gpu::ManagedVector<MCCProcess*> const&
         // loop through all collision pathways
         for (const auto &scattering_process : mcc_processes) {
             // get collision cross-section
-            sigma_E += scattering_process->getCrossSection(E);
+            sigma_E += scattering_process.getCrossSection(E);
         }
 
         // calculate collision frequency
@@ -229,8 +252,8 @@ void BackgroundMCCCollision::doBackgroundCollisionsWithinTile
     amrex::Real vel_std = sqrt(PhysConst::kb * T_a / mass_a);
 
     // get collision parameters
-    auto scattering_processes = m_scattering_processes.data();
-    auto process_count = m_scattering_processes.size();
+    auto scattering_processes = m_scattering_processes_exe.data();
+    int const process_count   = m_scattering_processes_exe.size();
 
     amrex::Real total_collision_prob = m_total_collision_prob;
     amrex::Real nu_max = m_nu_max;
@@ -288,8 +311,8 @@ void BackgroundMCCCollision::doBackgroundCollisionsWithinTile
                               v_coll = sqrt(v_coll2);
 
                               // loop through all collision pathways
-                              for (size_t i = 0; i < process_count; i++) {
-                                  auto const& scattering_process = **(scattering_processes + i);
+                              for (int i = 0; i < process_count; i++) {
+                                  auto const& scattering_process = *(scattering_processes + i);
 
                                   // get collision cross-section
                                   sigma_E = scattering_process.getCrossSection(E_coll);
@@ -348,7 +371,7 @@ void BackgroundMCCCollision::doBackgroundIonization
     const auto CopyIon = copy_factory_ion.getSmartCopy();
 
     const auto Filter = ImpactIonizationFilterFunc(
-                                                   *m_ionization_processes[0],
+                                                   m_ionization_processes[0],
                                                    m_mass1, m_total_collision_prob_ioniz,
                                                    m_nu_max_ioniz / m_background_density
                                                    );
@@ -368,7 +391,7 @@ void BackgroundMCCCollision::doBackgroundIonization
         const auto np_ion = ion_tile.numParticles();
 
         auto Transform = ImpactIonizationTransformFunc(
-                                                       m_ionization_processes[0]->m_energy_penalty, m_mass1, vel_std
+                                                       m_ionization_processes[0].getEnergyPenalty(), m_mass1, vel_std
                                                        );
 
         const auto num_added = filterCopyTransformParticles<1>(

diff --git a/Source/Particles/Collision/BinaryCollision.H b/Source/Particles/Collision/BinaryCollision.H
@@ -54,7 +54,7 @@
  *
  */
 template <typename FunctorType>
-class BinaryCollision
+class BinaryCollision final
     : public CollisionBase
 {
     // Define shortcuts for frequently-used type names
@@ -84,6 +84,8 @@ public:
             m_isSameSpecies = false;
     }
 
+    virtual ~BinaryCollision () = default;
+
     /** Perform the collisions
      *
      * @param lev AMR level of the tile
@@ -109,10 +111,10 @@ public:
         amrex::LayoutData<amrex::Real>* cost = WarpX::getCosts(lev);
 
             // Loop over all grids/tiles at this level
-    #ifdef AMREX_USE_OMP
+#ifdef AMREX_USE_OMP
             info.SetDynamic(true);
-    #pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
-    #endif
+#pragma omp parallel if (amrex::Gpu::notInLaunchRegion())
+#endif
             for (amrex::MFIter mfi = species1.MakeMFIter(lev, info); mfi.isValid(); ++mfi){
                 if (cost && WarpX::load_balance_costs_update_algo == LoadBalanceCostsUpdateAlgo::Timers)
                 {
@@ -178,18 +180,18 @@ public:
 
             const amrex::Real dt = WarpX::GetInstance().getdt(lev);
             amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev);
-    #if defined WARPX_DIM_XZ
+#if defined WARPX_DIM_XZ
             auto dV = geom.CellSize(0) * geom.CellSize(1);
-    #elif defined WARPX_DIM_RZ
+#elif defined WARPX_DIM_RZ
             amrex::Box const& cbx = mfi.tilebox(amrex::IntVect::TheZeroVector()); //Cell-centered box
             const auto lo = lbound(cbx);
             const auto hi = ubound(cbx);
             int const nz = hi.y-lo.y+1;
             auto dr = geom.CellSize(0);
             auto dz = geom.CellSize(1);
-    #elif (AMREX_SPACEDIM == 3)
+#elif (AMREX_SPACEDIM == 3)
             auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
-    #endif
+#endif
 
             // Loop over cells
             amrex::ParallelForRNG( n_cells,
@@ -207,10 +209,10 @@ public:
                     // shuffle
                     ShuffleFisherYates(
                         indices_1, cell_start_1, cell_half_1, engine );
-    #if defined WARPX_DIM_RZ
+#if defined WARPX_DIM_RZ
                     int ri = (i_cell - i_cell%nz) / nz;
                     auto dV = MathConst::pi*(2.0_rt*ri+1.0_rt)*dr*dr*dz;
-    #endif
+#endif
                     // Call the function in order to perform collisions
                     binary_collision_functor(
                         cell_start_1, cell_half_1,
@@ -262,18 +264,18 @@ public:
 
             const amrex::Real dt = WarpX::GetInstance().getdt(lev);
             amrex::Geometry const& geom = WarpX::GetInstance().Geom(lev);
-    #if defined WARPX_DIM_XZ
+#if defined WARPX_DIM_XZ
             auto dV = geom.CellSize(0) * geom.CellSize(1);
-    #elif defined WARPX_DIM_RZ
+#elif defined WARPX_DIM_RZ
             amrex::Box const& cbx = mfi.tilebox(amrex::IntVect::TheZeroVector()); //Cell-centered box
             const auto lo = lbound(cbx);
             const auto hi = ubound(cbx);
             int nz = hi.y-lo.y+1;
             auto dr = geom.CellSize(0);
             auto dz = geom.CellSize(1);
-    #elif (AMREX_SPACEDIM == 3)
+#elif (AMREX_SPACEDIM == 3)
             auto dV = geom.CellSize(0) * geom.CellSize(1) * geom.CellSize(2);
-    #endif
+#endif
 
             // Loop over cells
             amrex::ParallelForRNG( n_cells,
@@ -298,10 +300,10 @@ public:
                     // shuffle
                     ShuffleFisherYates(indices_1, cell_start_1, cell_stop_1, engine);
                     ShuffleFisherYates(indices_2, cell_start_2, cell_stop_2, engine);
-    #if defined WARPX_DIM_RZ
+#if defined WARPX_DIM_RZ
                     int ri = (i_cell - i_cell%nz) / nz;
                     auto dV = MathConst::pi*(2.0_rt*ri+1.0_rt)*dr*dr*dz;
-    #endif
+#endif
                     // Call the function in order to perform collisions
                     binary_collision_functor(
                         cell_start_1, cell_stop_1, cell_start_2, cell_stop_2,