diff --git a/.github/workflows/ci-extended.yml b/.github/workflows/ci-extended.yml index 8ca646cfc2ebd..1dd39c782477c 100644 --- a/.github/workflows/ci-extended.yml +++ b/.github/workflows/ci-extended.yml @@ -21,6 +21,8 @@ env: CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build MACHINE_CFG: cmake/machinecfg/CI.cmake OMPI_MCA_mpi_common_cuda_event_max: 1000 + # CUDA IPC within docker repeated seem to cause issue on the CI machine + OMPI_MCA_btl_smcuda_use_cuda_ipc: 0 # https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231 OMPI_MCA_btl_vader_single_copy_mechanism: none @@ -34,7 +36,7 @@ jobs: container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent # map to local user id on CI machine to allow writing to build cache - options: --user 1001 + options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728 steps: - uses: actions/checkout@v3 with: diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index ecb4052411eea..7e0fd8bf759ac 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -13,6 +13,8 @@ env: CMAKE_BUILD_PARALLEL_LEVEL: 5 # num threads for build MACHINE_CFG: cmake/machinecfg/CI.cmake OMPI_MCA_mpi_common_cuda_event_max: 1000 + # CUDA IPC within docker repeated seem to cause issue on the CI machine + OMPI_MCA_btl_smcuda_use_cuda_ipc: 0 # https://github.com/open-mpi/ompi/issues/4948#issuecomment-395468231 OMPI_MCA_btl_vader_single_copy_mechanism: none @@ -22,7 +24,7 @@ jobs: container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent # map to local user id on CI machine to allow writing to build cache - options: --user 1001 + options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728 steps: - uses: actions/checkout@v3 with: @@ -47,7 +49,7 @@ jobs: container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent # map to local user id on CI machine to allow writing to build cache - options: --user 1001 + options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728 steps: - uses: actions/checkout@v3 with: @@ -79,7 +81,7 @@ jobs: container: image: ghcr.io/parthenon-hpc-lab/cuda11.6-mpi-hdf5-ascent # map to local user id on CI machine to allow writing to build cache - options: --user 1001 + options: --user 1001 --cap-add CAP_SYS_PTRACE --shm-size="8g" --ulimit memlock=134217728 steps: - uses: actions/checkout@v3 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 17f8a009242d1..a33493866b25b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) -- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185/files) Bugfix to particle defragmentation +- [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185) Bugfix to particle defragmentation - [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D - [[PR 1183]](https://github.com/parthenon-hpc-lab/parthenon/pull/1183) Fix particle leapfrog example initialization data - [[PR 1179]](https://github.com/parthenon-hpc-lab/parthenon/pull/1179) Make a global variable for whether simulation is a restart @@ -11,10 +11,10 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR 1191]](https://github.com/parthenon-hpc-lab/parthenon/pull/1191) Update Kokkos version to 4.4.1 - [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image -- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag +- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag - [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize -- [[Issue 1165]](https://github.com/parthenon-hpc-lab/parthenon/issues/1165) Bump Kokkos submodule to 4.4.1 - [[PR 1171]](https://github.com/parthenon-hpc-lab/parthenon/pull/1171) Add PARTHENON_USE_SYSTEM_PACKAGES build option - [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls @@ -31,7 +31,7 @@ ### Incompatibilities (i.e. breaking changes) -- [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag +- [[PR 1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag ## Release 24.08 Date: 2024-08-30 @@ -155,12 +155,12 @@ Date: 2024-03-21 - [[PR 973]](https://github.com/parthenon-hpc-lab/parthenon/pull/973) Multigrid performance upgrades ### Fixed (not changing behavior/API/variables/...) -- [[PR1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool -- [[PR1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code -- [[PR992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools -- [[PR988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes -- [[PR986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing -- [[PR978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check +- [[PR 1023]](https://github.com/parthenon-hpc-lab/parthenon/pull/1023) Fix broken param of a scalar bool +- [[PR 1012]](https://github.com/parthenon-hpc-lab/parthenon/pull/1012) Remove accidentally duplicated code +- [[PR 992]](https://github.com/parthenon-hpc-lab/parthenon/pull/992) Allow custom PR ops with sparse pools +- [[PR 988]](https://github.com/parthenon-hpc-lab/parthenon/pull/988) Fix bug in neighbor finding routine for small, periodic, refined meshes +- [[PR 986]](https://github.com/parthenon-hpc-lab/parthenon/pull/986) Fix bug in sparse boundary communication BndInfo cacheing +- [[PR 978]](https://github.com/parthenon-hpc-lab/parthenon/pull/978) remove erroneous sparse check ### Infrastructure (changes irrelevant to downstream codes) - [[PR 1027]](https://github.com/parthenon-hpc-lab/parthenon/pull/1027) Refactor RestartReader as abstract class @@ -227,7 +227,7 @@ Date: 2023-11-16 - [[PR 901]](https://github.com/parthenon-hpc-lab/parthenon/pull/901) Implement shared element ownership model ### Removed (removing behavior/API/varaibles/...) -- [[PR 930](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage. +- [[PR 930]](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage. ## Release 0.8.0 diff --git a/README.md b/README.md index ed6a1bb05a169..f049c44ecd7d5 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Parthenon -- a performance portable block-structured adaptive mesh refinement fr * CMake 3.16 or greater * C++17 compatible compiler -* Kokkos 4.0.1 or greater +* Kokkos 4.1.1 or greater ## Optional (enabling features) diff --git a/cmake/machinecfg/GitHubActions.cmake b/cmake/machinecfg/GitHubActions.cmake index 663dcb38d6821..1adba870bdb20 100644 --- a/cmake/machinecfg/GitHubActions.cmake +++ b/cmake/machinecfg/GitHubActions.cmake @@ -19,6 +19,7 @@ message(STATUS "Loading machine configuration for GitHub Actions CI. ") # common options set(NUM_MPI_PROC_TESTING "2" CACHE STRING "CI runs tests with 2 MPI ranks") +set(Kokkos_ENABLE_ROCTHRUST OFF CACHE BOOL "Temporarily disabled as the container needs to be updated to the `-complete` base image.") set(MACHINE_CXX_FLAGS "") if (${MACHINE_VARIANT} MATCHES "cuda") diff --git a/doc/sphinx/src/development.rst b/doc/sphinx/src/development.rst index dbab91d8d5ce6..98ac9cef90a85 100644 --- a/doc/sphinx/src/development.rst +++ b/doc/sphinx/src/development.rst @@ -62,6 +62,34 @@ parallelism interface that is needed for managing memory cached in tightly nested loops. The wrappers are documented :ref:`here `. +View of Views +------------- + +Special care needs to be taken when working with a ``View`` of ``View``. + +To repeat the Kokkos documenation: `Don't use them `__ + +But if you have to (which is the case in some places inside Parthenon) +then follow this pattern: + +.. code:: c++ + + Kokkos::View *> view_of_pararrays(parthenon::ViewOfViewAlloc("myname"), 10); + +The ``ViewOfViewAlloc`` ensures that the ``Kokkos::SequentialHostInit`` property is added, +which results in the (inner ``View`` ) deallocators being called on the host (rather than on +the device by default). + +Similarly, when you create a host mirror of said ``View`` of ``View`` add the additional +property for the same reason. + +.. code:: c++ + + auto view_of_pararrays_h = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), view_of_pararrays); + +Note that the ``SequentialHostInit`` was only added in Kokkos 4.4.1 (which is now the default in Parthenon). + The need for reductions within function handling ``MeshBlock`` data ------------------------------------------------------------------- diff --git a/external/Kokkos b/external/Kokkos index 62d2b6c879b74..15dc143e5f399 160000 --- a/external/Kokkos +++ b/external/Kokkos @@ -1 +1 @@ -Subproject commit 62d2b6c879b74b6ae7bd06eb3e5e80139c4708e6 +Subproject commit 15dc143e5f39949eece972a798e175c4b463d4b8 diff --git a/src/bvals/comms/bnd_info.cpp b/src/bvals/comms/bnd_info.cpp index 736992260913f..54a6ae2b50fd3 100644 --- a/src/bvals/comms/bnd_info.cpp +++ b/src/bvals/comms/bnd_info.cpp @@ -41,7 +41,8 @@ namespace parthenon { void ProResCache_t::Initialize(int n_regions, StateDescriptor *pkg) { prores_info = ParArray1D("prores_info", n_regions); - prores_info_h = Kokkos::create_mirror_view(prores_info); + prores_info_h = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::SequentialHostInit), prores_info); int nref_funcs = pkg->NumRefinementFuncs(); // Note that assignment of Kokkos views resets them, but // buffer_subset_sizes is a std::vector. It must be cleared, then diff --git a/src/bvals/comms/bnd_info.hpp b/src/bvals/comms/bnd_info.hpp index e6214ceba3226..8800f6fd867f3 100644 --- a/src/bvals/comms/bnd_info.hpp +++ b/src/bvals/comms/bnd_info.hpp @@ -127,7 +127,7 @@ struct ProResInfo { int GetBufferSize(MeshBlock *pmb, const NeighborBlock &nb, std::shared_ptr> v); -using BndInfoArr_t = ParArray1D; +using BndInfoArr_t = Kokkos::View; using BndInfoArrHost_t = typename BndInfoArr_t::HostMirror; using ProResInfoArr_t = ParArray1D; diff --git a/src/bvals/comms/bvals_utils.hpp b/src/bvals/comms/bvals_utils.hpp index f185c1207747f..8a47c716aa22c 100644 --- a/src/bvals/comms/bvals_utils.hpp +++ b/src/bvals/comms/bvals_utils.hpp @@ -216,7 +216,8 @@ inline void RebuildBufferCache(std::shared_ptr> md, int nbound, using namespace loops::shorthands; BvarsSubCache_t &cache = md->GetBvarsCache().GetSubCache(BOUND_TYPE, SENDER); cache.bnd_info = BndInfoArr_t("bnd_info", nbound); - cache.bnd_info_h = Kokkos::create_mirror_view(cache.bnd_info); + cache.bnd_info_h = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::SequentialHostInit), cache.bnd_info); // prolongation/restriction sub-sets // TODO(JMM): Right now I exclude fluxcorrection boundaries but if diff --git a/src/interface/mesh_data.hpp b/src/interface/mesh_data.hpp index 14ae3959c32ad..7d7d1cabcbe5a 100644 --- a/src/interface/mesh_data.hpp +++ b/src/interface/mesh_data.hpp @@ -150,7 +150,8 @@ const MeshBlockPack

&PackOnMesh(M &map, BlockDataList_t &block_data_, if (make_new_pack) { ParArray1D

packs("MeshData::PackVariables::packs", nblocks); - auto packs_host = Kokkos::create_mirror_view(packs); + auto packs_host = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), packs); for (size_t i = 0; i < nblocks; i++) { const auto &pack = packing_function(block_data_[i], this_map, this_key); diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index 2a7a5b70c41c2..4ea5a558c3f52 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -152,7 +152,8 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, leading_dim += 2; } pack.pack_ = pack_t("data_ptr", leading_dim, pack.nblocks_, max_size); - pack.pack_h_ = Kokkos::create_mirror_view(pack.pack_); + pack.pack_h_ = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::SequentialHostInit), pack.pack_); // For non-flat packs, shape of pack is type x block x var x k x j x i // where type here might be a flux. @@ -168,7 +169,8 @@ SparsePackBase SparsePackBase::Build(T *pmd, const PackDescriptor &desc, pack.block_props_h_ = Kokkos::create_mirror_view(pack.block_props_); pack.coords_ = coords_t("coords", desc.flat ? max_size : nblocks); - auto coords_h = Kokkos::create_mirror_view(pack.coords_); + auto coords_h = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::SequentialHostInit), pack.coords_); // Fill the views int idx = 0; diff --git a/src/interface/swarm_pack_base.hpp b/src/interface/swarm_pack_base.hpp index 0733aa51f329e..52a2c3c47fc7b 100644 --- a/src/interface/swarm_pack_base.hpp +++ b/src/interface/swarm_pack_base.hpp @@ -109,7 +109,8 @@ class SwarmPackBase { // Allocate the views int leading_dim = 1; pack.pack_ = pack_t("data_ptr", leading_dim, nblocks, max_size); - auto pack_h = Kokkos::create_mirror_view(pack.pack_); + auto pack_h = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::SequentialHostInit), pack.pack_); pack.bounds_ = bounds_t("bounds", 2, nblocks, nvar); auto bounds_h = Kokkos::create_mirror_view(pack.bounds_); @@ -154,7 +155,8 @@ class SwarmPackBase { Kokkos::deep_copy(pack.bounds_, bounds_h); pack.contexts_ = contexts_t("contexts", nblocks); - pack.contexts_h_ = Kokkos::create_mirror_view(pack.contexts_); + pack.contexts_h_ = Kokkos::create_mirror_view( + Kokkos::view_alloc(Kokkos::SequentialHostInit), pack.contexts_); pack.max_active_indices_ = max_active_indices_t("max_active_indices", nblocks); pack.flat_index_map_ = max_active_indices_t("flat_index_map", nblocks + 1); BuildSupplemental(pmd, desc, pack); diff --git a/src/interface/variable_pack.hpp b/src/interface/variable_pack.hpp index 037731093ce13..76fce5f6b8616 100644 --- a/src/interface/variable_pack.hpp +++ b/src/interface/variable_pack.hpp @@ -244,10 +244,11 @@ class PackIndexMap { }; template -using ViewOfParArrays = ParArray1D>; +using ViewOfParArrays = + Kokkos::View *, LayoutWrapper, DevMemSpace>; template -using ViewOfParArrays1D = ParArray1D>; +using ViewOfParArrays1D = Kokkos::View *, LayoutWrapper, DevMemSpace>; // forward declaration template @@ -570,10 +571,11 @@ void FillVarView(const VariableVector &vars, int vsize, bool coarse, assert(vsize == sparse_id_out.size()); assert(vsize == vector_component_out.size()); - auto host_cv = Kokkos::create_mirror_view(Kokkos::HostSpace(), cv_out); - auto host_sp = Kokkos::create_mirror_view(Kokkos::HostSpace(), sparse_id_out); - auto host_vc = Kokkos::create_mirror_view(Kokkos::HostSpace(), vector_component_out); - auto host_al = Kokkos::create_mirror_view(Kokkos::HostSpace(), allocated_out); + auto host_cv = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), cv_out); + auto host_sp = Kokkos::create_mirror_view(sparse_id_out); + auto host_vc = Kokkos::create_mirror_view(vector_component_out); + auto host_al = Kokkos::create_mirror_view(allocated_out); int vindex = 0; for (const auto &v : vars) { @@ -634,7 +636,8 @@ void FillSwarmVarView(const vpack_types::SwarmVarList &vars, ViewOfParArrays1D &cv_out, PackIndexMap *pvmap) { using vpack_types::IndexPair; - auto host_cv = Kokkos::create_mirror_view(Kokkos::HostSpace(), cv_out); + auto host_cv = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), cv_out); int vindex = 0; for (const auto v : vars) { @@ -675,10 +678,13 @@ void FillFluxViews(const VariableVector &vars, const int ndim, PackIndexMap *pvmap) { using vpack_types::IndexPair; - auto host_f1 = Kokkos::create_mirror_view(Kokkos::HostSpace(), f1_out); - auto host_f2 = Kokkos::create_mirror_view(Kokkos::HostSpace(), f2_out); - auto host_f3 = Kokkos::create_mirror_view(Kokkos::HostSpace(), f3_out); - auto host_al = Kokkos::create_mirror_view(Kokkos::HostSpace(), flux_allocated_out); + auto host_f1 = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), f1_out); + auto host_f2 = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), f2_out); + auto host_f3 = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), f3_out); + auto host_al = Kokkos::create_mirror_view(flux_allocated_out); int vindex = 0; for (const auto &v : vars) { @@ -755,10 +761,11 @@ VariableFluxPack MakeFluxPack(const VarListWithKeys &var_list, } // make the outer view - ViewOfParArrays cv("MakeFluxPack::cv", vsize * (extra_components ? 3 : 1)); - ViewOfParArrays f1("MakeFluxPack::f1", fsize); - ViewOfParArrays f2("MakeFluxPack::f2", fsize); - ViewOfParArrays f3("MakeFluxPack::f3", fsize); + ViewOfParArrays cv(ViewOfViewAlloc("MakeFluxPack::cv"), + vsize * (extra_components ? 3 : 1)); + ViewOfParArrays f1(ViewOfViewAlloc("MakeFluxPack::f1"), fsize); + ViewOfParArrays f2(ViewOfViewAlloc("MakeFluxPack::f2"), fsize); + ViewOfParArrays f3(ViewOfViewAlloc("MakeFluxPack::f3"), fsize); ParArray1D flux_allocated("MakePack::allocated", fsize); ParArray1D sparse_id("MakeFluxPack::sparse_id", vsize); ParArray1D vector_component("MakeFluxPack::vector_component", vsize); @@ -809,7 +816,8 @@ VariablePack MakePack(const VarListWithKeys &var_list, bool coarse, } // make the outer view - ViewOfParArrays cv("MakePack::cv", vsize * (extra_components ? 3 : 1)); + ViewOfParArrays cv(ViewOfViewAlloc("MakePack::cv"), + vsize * (extra_components ? 3 : 1)); ParArray1D sparse_id("MakePack::sparse_id", vsize); ParArray1D vector_component("MakePack::vector_component", vsize); ParArray1D allocated("MakePack::allocated", vsize); @@ -842,7 +850,7 @@ SwarmVariablePack MakeSwarmPack(const vpack_types::SwarmVarList &vars, } // make the outer view - ViewOfParArrays1D cv("MakePack::cv", vsize); + ViewOfParArrays1D cv(ViewOfViewAlloc("MakePack::cv"), vsize); std::array cv_size{0, 0}; if (vsize > 0) { diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 8fa89f82e95e8..37262dd0356cc 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -1035,6 +1035,20 @@ par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int il, cons reduction); } +// For ViewOfView we need to call the destructor of the inner views on +// the host and not on the device (which would happen by default). +// Thus, we need to pass `SquentialHostInit` as allocator, but only if the ViewOfView is +// on the host. If the ViewOfViews in on the device, then `SequentialHostInit` should be +// passed when calling `create_mirror_view`. +template +auto ViewOfViewAlloc(const std::string &label) { + if constexpr (std::is_same_v) { + return Kokkos::view_alloc(Kokkos::SequentialHostInit, label); + } else { + return Kokkos::view_alloc(label); + } +} + // reused from kokoks/core/perf_test/PerfTest_ExecSpacePartitioning.cpp // commit a0d011fb30022362c61b3bb000ae3de6906cb6a7 template diff --git a/src/parthenon_array_generic.hpp b/src/parthenon_array_generic.hpp index d527707f9070a..ac38b6cb5e5f3 100644 --- a/src/parthenon_array_generic.hpp +++ b/src/parthenon_array_generic.hpp @@ -221,6 +221,8 @@ class ParArrayGeneric : public State { // return GetDim(1) * GetDim(2) * GetDim(3) * GetDim(4) * GetDim(5) * GetDim(6); } + // TODO(PG?) Can we use concepts here to add a + // Kokkos::view_alloc(Kokkos::SequentialHostInit) when the original is a ViewOfView? template auto GetMirror(MemSpace const &memspace) { auto mirror = Kokkos::create_mirror_view(memspace, data_); @@ -333,6 +335,8 @@ inline auto subview(std::index_sequence, return parthenon::ParArrayGeneric(v, arr); } +// TODO(PG?) Can we use concepts here to add a +// Kokkos::view_alloc(Kokkos::SequentialHostInit) when the original is a ViewOfView? template inline auto create_mirror_view_and_copy(Space const &space, const parthenon::ParArrayGeneric &arr) { diff --git a/tst/unit/test_pararrays.cpp b/tst/unit/test_pararrays.cpp index e79927c13b20f..9e1816b0e6690 100644 --- a/tst/unit/test_pararrays.cpp +++ b/tst/unit/test_pararrays.cpp @@ -451,8 +451,9 @@ TEST_CASE("ParArray state", "[ParArrayND]") { } GIVEN("An array of ParArrays filled with the values contained in their state") { - parthenon::ParArray1D pack("test pack", NS); - auto pack_h = Kokkos::create_mirror_view(pack); + Kokkos::View pack(parthenon::ViewOfViewAlloc("test pack"), NS); + auto pack_h = + Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), pack); for (int b = 0; b < NS; ++b) { state_t state(static_cast(b)); @@ -544,7 +545,8 @@ TEST_CASE("Check registry pressure", "[ParArrayND][performance]") { new (&views[n]) view_3d_t(Kokkos::view_alloc(label, Kokkos::WithoutInitializing), N, N, N); auto a_h = arrays(n).GetHostMirror(); - auto v_h = Kokkos::create_mirror_view(views(n)); + auto v_h = Kokkos::create_mirror_view(Kokkos::view_alloc(Kokkos::SequentialHostInit), + views(n)); for (int k = 0; k < N; k++) { for (int j = 0; j < N; j++) { for (int i = 0; i < N; i++) {