diff --git a/CHANGELOG.md b/CHANGELOG.md index 36c2ffc5c16e..b4427452edd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ - [[PR 885]](https://github.com/parthenon-hpc-lab/parthenon/pull/885) Expose PackDescriptor and use uids in SparsePacks ### Fixed (not changing behavior/API/variables/...) +- [[PR 947]](https://github.com/parthenon-hpc-lab/parthenon/pull/947) Add missing ForceRemeshComm dependencies +- [[PR 928]](https://github.com/parthenon-hpc-lab/parthenon/pull/928) Fix boundary comms during refinement next to refined blocks - [[PR 937]](https://github.com/parthenon-hpc-lab/parthenon/pull/937) Fix multiple line continuations - [[PR 933]](https://github.com/parthenon-hpc-lab/parthenon/pull/933) Remove extraneous debug check - [[PR 917]](https://github.com/parthenon-hpc-lab/parthenon/pull/917) Update Iterative Tasking Infrastructure @@ -32,8 +34,9 @@ ### Infrastructure (changes irrelevant to downstream codes) - [[PR 938]](https://github.com/parthenon-hpc-lab/parthenon/pull/938) Restructure buffer packing/unpacking kernel hierarchical parallelism +- [[PR 944]](https://github.com/parthenon-hpc-lab/parthenon/pull/944) Move sparse pack identifier creation to descriptor - [[PR 904]](https://github.com/parthenon-hpc-lab/parthenon/pull/904) Move to prolongation/restriction in one for AMR and communicate non-cell centered fields -- [[PR 918]](https://github.com/parthenon-hpc-lab/parthenon/pull/918) Refactor RegionSize +- [[PR 918]](https://github.com/parthenon-hpc-lab/parthenon/pull/918) Refactor RegionSize - [[PR 901]](https://github.com/parthenon-hpc-lab/parthenon/pull/901) Implement shared element ownership model ### Removed (removing behavior/API/varaibles/...) diff --git a/src/bvals/bvals.hpp b/src/bvals/bvals.hpp index 827abac792ca..718499a9f804 100644 --- a/src/bvals/bvals.hpp +++ b/src/bvals/bvals.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include "basic_types.hpp" @@ -78,7 +79,9 @@ class BoundaryBase { static int BufferID(int dim, bool multilevel); static int FindBufferID(int ox1, int ox2, int ox3, int fi1, int fi2); - void SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, int *nslist); + void + SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, int *nslist, + const std::unordered_set &newly_refined = {}); protected: // 1D refined or unrefined=2 @@ -90,7 +93,8 @@ class BoundaryBase { RegionSize block_size_; ParArrayND sarea_[2]; - void SetNeighborOwnership(); + void + SetNeighborOwnership(const std::unordered_set &newly_refined = {}); private: // calculate 3x shared static data members when constructing only the 1st class instance diff --git a/src/bvals/bvals_base.cpp b/src/bvals/bvals_base.cpp index d9ad5317ac93..057493f186e8 100644 --- a/src/bvals/bvals_base.cpp +++ b/src/bvals/bvals_base.cpp @@ -29,6 +29,7 @@ #include // c_str() #include "globals.hpp" +#include "mesh/logical_location.hpp" #include "mesh/mesh.hpp" #include "utils/buffer_utils.hpp" #include "utils/error_checking.hpp" @@ -300,8 +301,9 @@ int BoundaryBase::CreateBvalsMPITag(int lid, int bufid) { // TODO(felker): break-up this long function -void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, - int *nslist) { +void BoundaryBase::SearchAndSetNeighbors( + MeshBlockTree &tree, int *ranklist, int *nslist, + const std::unordered_set &newly_refined) { Kokkos::Profiling::pushRegion("SearchAndSetNeighbors"); MeshBlockTree *neibt; int myox1, myox2 = 0, myox3 = 0, myfx1, myfx2, myfx3; @@ -368,7 +370,7 @@ void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, } } if (block_size_.nx(X2DIR) == 1) { - SetNeighborOwnership(); + SetNeighborOwnership(newly_refined); Kokkos::Profiling::popRegion(); // SearchAndSetNeighbors return; } @@ -503,7 +505,7 @@ void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, } if (block_size_.nx(X3DIR) == 1) { - SetNeighborOwnership(); + SetNeighborOwnership(newly_refined); Kokkos::Profiling::popRegion(); // SearchAndSetNeighbors return; } @@ -626,11 +628,12 @@ void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, } } - SetNeighborOwnership(); + SetNeighborOwnership(newly_refined); Kokkos::Profiling::popRegion(); // SearchAndSetNeighbors } -void BoundaryBase::SetNeighborOwnership() { +void BoundaryBase::SetNeighborOwnership( + const std::unordered_set &newly_refined) { // Set neighbor block ownership std::set allowed_neighbors; allowed_neighbors.insert(loc); // Insert the location of this block @@ -642,7 +645,7 @@ void BoundaryBase::SetNeighborOwnership() { RootGridInfo rg_info = pmy_mesh_->GetRootGridInfo(); for (int n = 0; n < nneighbor; ++n) { neighbor[n].ownership = - DetermineOwnership(neighbor[n].loc, allowed_neighbors, rg_info); + DetermineOwnership(neighbor[n].loc, allowed_neighbors, rg_info, newly_refined); neighbor[n].ownership.initialized = true; } } diff --git a/src/bvals/comms/boundary_communication.cpp b/src/bvals/comms/boundary_communication.cpp index 54fd5945028e..fc5231c2da79 100644 --- a/src/bvals/comms/boundary_communication.cpp +++ b/src/bvals/comms/boundary_communication.cpp @@ -157,7 +157,7 @@ template TaskStatus StartReceiveBoundBufs(std::shared_ptr> &md) { Kokkos::Profiling::pushRegion("Task_StartReceiveBoundBufs"); Mesh *pmesh = md->GetMeshPointer(); - auto &cache = md->GetBvarsCache().GetSubCache(BoundaryType::flxcor_send, false); + auto &cache = md->GetBvarsCache().GetSubCache(bound_type, false); if (cache.buf_vec.size() == 0) InitializeBufferCache(md, &(pmesh->boundary_comm_map), &cache, ReceiveKey, false); diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index 6ddc77194fba..d370c0f5f734 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -276,21 +276,28 @@ template SparsePackBase SparsePackBase::Build>(MeshData *, template SparsePackBase &SparsePackCache::Get(T *pmd, const PackDescriptor &desc, const std::vector &include_block) { - std::string ident = GetIdentifier(desc, include_block); - if (pack_map.count(ident) > 0) { - auto &pack = pack_map[ident].first; + if (pack_map.count(desc.identifier) > 0) { + auto &cache_tuple = pack_map[desc.identifier]; + auto &pack = std::get<0>(cache_tuple); auto alloc_status_in = SparsePackBase::GetAllocStatus(pmd, desc, include_block); - auto &alloc_status = pack_map[ident].second; + auto &alloc_status = std::get<1>(cache_tuple); if (alloc_status.size() != alloc_status_in.size()) - return BuildAndAdd(pmd, desc, ident, include_block); + return BuildAndAdd(pmd, desc, include_block); for (int i = 0; i < alloc_status_in.size(); ++i) { if (alloc_status[i] != alloc_status_in[i]) - return BuildAndAdd(pmd, desc, ident, include_block); + return BuildAndAdd(pmd, desc, include_block); + } + auto &include_status = std::get<2>(cache_tuple); + if (include_status.size() != include_block.size()) + return BuildAndAdd(pmd, desc, include_block); + for (int i = 0; i < include_block.size(); ++i) { + if (include_status[i] != include_block[i]) + return BuildAndAdd(pmd, desc, include_block); } // Cached version is not stale, so just return a reference to it - return pack_map[ident].first; + return std::get<0>(cache_tuple); } - return BuildAndAdd(pmd, desc, ident, include_block); + return BuildAndAdd(pmd, desc, include_block); } template SparsePackBase &SparsePackCache::Get>(MeshData *, const PackDescriptor &, @@ -301,37 +308,17 @@ SparsePackCache::Get>(MeshBlockData *, const PackDescr template SparsePackBase &SparsePackCache::BuildAndAdd(T *pmd, const PackDescriptor &desc, - const std::string &ident, const std::vector &include_block) { - if (pack_map.count(ident) > 0) pack_map.erase(ident); - pack_map[ident] = {SparsePackBase::Build(pmd, desc, include_block), - SparsePackBase::GetAllocStatus(pmd, desc, include_block)}; - return pack_map[ident].first; + if (pack_map.count(desc.identifier) > 0) pack_map.erase(desc.identifier); + pack_map[desc.identifier] = {SparsePackBase::Build(pmd, desc, include_block), + SparsePackBase::GetAllocStatus(pmd, desc, include_block), + include_block}; + return std::get<0>(pack_map[desc.identifier]); } template SparsePackBase & SparsePackCache::BuildAndAdd>(MeshData *, const PackDescriptor &, - const std::string &, const std::vector &); template SparsePackBase &SparsePackCache::BuildAndAdd>( - MeshBlockData *, const PackDescriptor &, const std::string &, - const std::vector &); - -std::string SparsePackCache::GetIdentifier(const PackDescriptor &desc, - const std::vector &include_block) const { - std::string identifier(""); - for (const auto &vgroup : desc.var_groups) { - for (const auto &[vid, uid] : vgroup) { - identifier += std::to_string(uid) + "_"; - } - identifier += "|"; - } - identifier += std::to_string(desc.with_fluxes); - identifier += std::to_string(desc.coarse); - identifier += std::to_string(desc.flat); - for (const auto b : include_block) { - identifier += std::to_string(b); - } - return identifier; -} + MeshBlockData *, const PackDescriptor &, const std::vector &); } // namespace parthenon diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index 7970d86f330a..26a5f3fd7b61 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -54,6 +54,7 @@ class SparsePackBase { friend class SparsePackCache; using alloc_t = std::vector; + using include_t = std::vector; using pack_t = ParArray3D>; using bounds_t = ParArray3D; using bounds_h_t = typename ParArray3D::HostMirror; @@ -114,12 +115,10 @@ class SparsePackCache { template SparsePackBase &BuildAndAdd(T *pmd, const impl::PackDescriptor &desc, - const std::string &ident, const std::vector &include_block); - std::string GetIdentifier(const impl::PackDescriptor &desc, - const std::vector &include_block) const; - std::unordered_map> + std::unordered_map> pack_map; friend class SparsePackBase; @@ -136,7 +135,7 @@ struct PackDescriptor { // default constructor needed for certain use cases PackDescriptor() : nvar_groups(0), var_group_names({}), var_groups({}), with_fluxes(false), - coarse(false), flat(false) {} + coarse(false), flat(false), identifier("") {} template PackDescriptor(StateDescriptor *psd, const std::vector &var_groups_in, @@ -144,7 +143,8 @@ struct PackDescriptor { : nvar_groups(var_groups_in.size()), var_group_names(MakeGroupNames(var_groups_in)), var_groups(BuildUids(var_groups_in.size(), psd, selector)), with_fluxes(options.count(PDOpt::WithFluxes)), - coarse(options.count(PDOpt::Coarse)), flat(options.count(PDOpt::Flatten)) { + coarse(options.count(PDOpt::Coarse)), flat(options.count(PDOpt::Flatten)), + identifier(GetIdentifier()) { PARTHENON_REQUIRE(!(with_fluxes && coarse), "Probably shouldn't be making a coarse pack with fine fluxes."); } @@ -155,8 +155,22 @@ struct PackDescriptor { const bool with_fluxes; const bool coarse; const bool flat; + const std::string identifier; private: + std::string GetIdentifier() { + std::string ident(""); + for (const auto &vgroup : var_groups) { + for (const auto &[vid, uid] : vgroup) { + ident += std::to_string(uid) + "_"; + } + ident += "|"; + } + ident += std::to_string(with_fluxes); + ident += std::to_string(coarse); + ident += std::to_string(flat); + return ident; + } template std::vector BuildUids(int nvgs, const StateDescriptor *const psd, const FUNC_t &selector) { diff --git a/src/interface/variable.cpp b/src/interface/variable.cpp index fd8b534c95e5..ce62354f0631 100644 --- a/src/interface/variable.cpp +++ b/src/interface/variable.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -81,7 +81,8 @@ void Variable::CopyFluxesAndBdryVar(const Variable *src) { } } - if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent)) { + if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent) || + IsSet(Metadata::ForceRemeshComm)) { // no need to check mesh->multilevel, if false, we're just making a shallow copy of // an empty ParArrayND coarse_s = src->coarse_s; @@ -172,7 +173,8 @@ void Variable::AllocateFluxesAndCoarse(std::weak_ptr wpmb) { } // Create the boundary object - if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent)) { + if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent) || + IsSet(Metadata::ForceRemeshComm)) { if (wpmb.expired()) return; std::shared_ptr pmb = wpmb.lock(); @@ -205,7 +207,8 @@ std::int64_t Variable::Deallocate() { } } - if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent)) { + if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent) || + IsSet(Metadata::ForceRemeshComm)) { mem_size += coarse_s.size() * sizeof(T); coarse_s.Reset(); } diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index ed84bee4a65e..32ee6d398d37 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -702,6 +702,7 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput oldtonew[mb_idx] = ntot - 1; current_level = 0; + std::unordered_set newly_refined; for (int n = 0; n < ntot; n++) { // "on" = "old n" = "old gid" = "old global MeshBlock ID" int on = newtoold[n]; @@ -709,6 +710,10 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput current_level = newloc[n].level(); if (newloc[n].level() >= loclist[on].level()) { // same or refined newcost[n] = costlist[on]; + // Keep a list of all blocks refined for below + if (newloc[n].level() > loclist[on].level()) { + newly_refined.insert(newloc[n]); + } } else { double acost = 0.0; for (int l = 0; l < nleaf; l++) @@ -917,28 +922,49 @@ void Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput } } prolongation_cache.CopyToDevice(); + refinement::ProlongateShared(resolved_packages.get(), prolongation_cache, block_list[0]->cellbounds, block_list[0]->c_cellbounds); - refinement::ProlongateInternal(resolved_packages.get(), prolongation_cache, - block_list[0]->cellbounds, block_list[0]->c_cellbounds); + // update the lists + loclist = std::move(newloc); + ranklist = std::move(newrank); + costlist = std::move(newcost); + + // A block newly refined and prolongated may have neighbors which were + // already refined to the new level. + // If so, the prolongated versions of shared elements will not reflect + // the true, finer versions present in the neighbor block. + // We must create any new fine buffers and fill them from these neighbors + // in order to maintain a consistent global state. + // Thus we rebuild and synchronize the mesh now, but using a unique + // neighbor precedence favoring the "old" fine blocks over "new" ones + for (auto &pmb : block_list) { + pmb->pbval->SearchAndSetNeighbors(tree, ranklist.data(), nslist.data(), + newly_refined); + } + // Make sure all old sends/receives are done before we reconfigure the mesh #ifdef MPI_PARALLEL if (send_reqs.size() != 0) PARTHENON_MPI_CHECK( MPI_Waitall(send_reqs.size(), send_reqs.data(), MPI_STATUSES_IGNORE)); #endif - Kokkos::Profiling::popRegion(); // AMR: Recv data and unpack + // Re-initialize the mesh with our temporary ownership/neighbor configurations. + // No buffers are different when we switch to the final precedence order. + Initialize(false, pin, app_in); - // update the lists - loclist = std::move(newloc); - ranklist = std::move(newrank); - costlist = std::move(newcost); + // Internal refinement relies on the fine shared values, which are only consistent after + // being updated with any previously fine versions + refinement::ProlongateInternal(resolved_packages.get(), prolongation_cache, + block_list[0]->cellbounds, block_list[0]->c_cellbounds); - // re-initialize the MeshBlocks + // Rebuild just the ownership model, this time weighting the "new" fine blocks just like + // any other blocks at their level. for (auto &pmb : block_list) { pmb->pbval->SearchAndSetNeighbors(tree, ranklist.data(), nslist.data()); } - Initialize(false, pin, app_in); + + Kokkos::Profiling::popRegion(); // AMR: Recv data and unpack ResetLoadBalanceVariables(); diff --git a/src/mesh/logical_location.hpp b/src/mesh/logical_location.hpp index 21dd7a52f513..33abdebb96d8 100644 --- a/src/mesh/logical_location.hpp +++ b/src/mesh/logical_location.hpp @@ -23,12 +23,25 @@ #include #include #include +#include #include #include +#include "logical_location.hpp" #include "utils/error_checking.hpp" #include "utils/morton_number.hpp" +namespace parthenon { +class LogicalLocation; +} + +// This must be declared before an unordered_set of LogicalLocation is used +// below, but must be *implemented* after the class definition +template <> +struct std::hash { + std::size_t operator()(const parthenon::LogicalLocation &key) const noexcept; +}; + namespace parthenon { struct RootGridInfo { @@ -251,15 +264,25 @@ struct block_ownership_t { inline block_ownership_t DetermineOwnership(const LogicalLocation &main_block, const std::set &allowed_neighbors, - const RootGridInfo &rg_info = RootGridInfo()) { + const RootGridInfo &rg_info = RootGridInfo(), + const std::unordered_set &newly_refined = {}) { block_ownership_t main_owns; - auto ownership_less_than = [](const LogicalLocation &a, const LogicalLocation &b) { + auto ownership_level = [&](const LogicalLocation &a) { + // Newly-refined blocks are treated as higher-level than blocks at their + // parent level, but lower-level than previously-refined blocks at their + // current level. + if (newly_refined.count(a)) return 2 * a.level() - 1; + return 2 * a.level(); + }; + + auto ownership_less_than = [ownership_level](const LogicalLocation &a, + const LogicalLocation &b) { // Ownership is first determined by block with the highest level, then by maximum // Morton number this is reversed in precedence from the normal comparators where // Morton number takes precedence - if (a.level() == b.level()) return a.morton() < b.morton(); - return a.level() < b.level(); + if (ownership_level(a) == ownership_level(b)) return a.morton() < b.morton(); + return ownership_level(a) < ownership_level(b); }; for (int ox1 : {-1, 0, 1}) { @@ -346,14 +369,12 @@ inline auto GetIndexRangeMaskFromOwnership(TopologicalElement el, } // namespace parthenon -template <> -struct std::hash { - std::size_t operator()(const parthenon::LogicalLocation &key) const noexcept { - // TODO(LFR): Think more carefully about what the best choice for this key is, - // probably the least significant sizeof(size_t) * 8 bits of the morton number - // with 3 * (level - 21) trailing bits removed. - return key.morton().bits[0]; - } -}; +inline std::size_t std::hash::operator()( + const parthenon::LogicalLocation &key) const noexcept { + // TODO(LFR): Think more carefully about what the best choice for this key is, + // probably the least significant sizeof(size_t) * 8 bits of the morton number + // with 3 * (level - 21) trailing bits removed. + return key.morton().bits[0]; +} #endif // MESH_LOGICAL_LOCATION_HPP_ diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 32f4d1b3245a..ed81e3e4e219 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -1272,7 +1272,8 @@ void Mesh::SetupMPIComms() { auto &metadata = pair.second; // Create both boundary and flux communicators for everything with either FillGhost // or WithFluxes just to be safe - if (metadata.IsSet(Metadata::FillGhost) || metadata.IsSet(Metadata::WithFluxes)) { + if (metadata.IsSet(Metadata::FillGhost) || metadata.IsSet(Metadata::WithFluxes) || + metadata.IsSet(Metadata::ForceRemeshComm)) { MPI_Comm mpi_comm; PARTHENON_MPI_CHECK(MPI_Comm_dup(MPI_COMM_WORLD, &mpi_comm)); const auto ret = mpi_comm_map_.insert({pair.first.label(), mpi_comm});