From c3bd370cec7ad86bc00482f629a0c18cc990c6d6 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 08:52:20 -0700 Subject: [PATCH 1/6] Missing send size init --- src/bvals/bvals.cpp | 11 +++++++++++ src/interface/swarm_comms.cpp | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index 37a4d46899169..c6d8a571be30c 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -100,9 +100,20 @@ void BoundarySwarm::Send(BoundaryCommSubset phase) { #ifdef MPI_PARALLEL PARTHENON_REQUIRE(bd_var_.req_send[nb.bufid] == MPI_REQUEST_NULL, "Trying to create a new send before previous send completes!"); + // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); + printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, + bd_var_.send[nb.bufid].data(), send_size[nb.bufid], + bd_var_.send[nb.bufid].span()); + if (send_size[nb.bufid] > 1000) { + printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, + bd_var_.send[nb.bufid].data(), send_size[nb.bufid], + bd_var_.send[nb.bufid].span()); + PARTHENON_FAIL("help!"); + } PARTHENON_MPI_CHECK(MPI_Isend(bd_var_.send[nb.bufid].data(), send_size[nb.bufid], MPI_PARTHENON_REAL, nb.rank, send_tag[nb.bufid], swarm_comm, &(bd_var_.req_send[nb.bufid]))); + // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); #endif // MPI_PARALLEL } else { MeshBlock &target_block = *pmy_mesh_->FindMeshBlock(nb.gid); diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 7b236b6e4e1f2..3ab3a367689eb 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -146,10 +146,13 @@ void Swarm::SetupPersistentMPI() { // Build device array mapping neighbor index to neighbor bufid if (pmb->neighbors.size() > 0) { + // printf("SANITIZING!\n"); ParArrayND neighbor_buffer_index("Neighbor buffer index", pmb->neighbors.size()); auto neighbor_buffer_index_h = neighbor_buffer_index.GetHostMirror(); for (int n = 0; n < pmb->neighbors.size(); n++) { neighbor_buffer_index_h(n) = pmb->neighbors[n].bufid; + // const int bufid = pmb->neighbors[n].bufid; + // vbswarm->bd_var_.send[bufid] = BufArray1D("Buffer", GetParticleDataSize()); } neighbor_buffer_index.DeepCopy(neighbor_buffer_index_h); neighbor_buffer_index_ = neighbor_buffer_index; @@ -229,6 +232,8 @@ void Swarm::LoadBuffers_() { auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); auto buffer_start_h = buffer_start.GetHostMirrorAndCopy(); + Kokkos::fence(); + // Resize send buffers if too small for (int n = 0; n < pmb->neighbors.size(); n++) { num_particles_to_send_h(n) -= buffer_start_h(n); @@ -237,8 +242,13 @@ void Swarm::LoadBuffers_() { if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); vbswarm->bd_var_.send[bufid] = sendbuf; + printf("[%i] new buf %i! size: %i span: %i\n", Globals::my_rank, bufid, + num_particles_to_send_h(n) * particle_size, + vbswarm->bd_var_.send[bufid].span()); } vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; + printf("send size: %i (%i %i)\n", vbswarm->send_size[bufid], + num_particles_to_send_h(n), particle_size); } auto &bdvar = vbswarm->bd_var_; @@ -269,6 +279,11 @@ void Swarm::LoadBuffers_() { // Remove particles that were loaded to send to another block from this block RemoveMarkedParticles(); + } else { + for (int n = 0; n < pmb->neighbors.size(); n++) { + const int bufid = pmb->neighbors[n].bufid; + vbswarm->send_size[bufid] = 0; + } } } From 50069a25b237a95de229f63d5ee6fa98d3dcd77c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 09:14:11 -0700 Subject: [PATCH 2/6] cleanup, CHANGELOG --- CHANGELOG.md | 1 + src/bvals/bvals.cpp | 11 ----------- src/interface/swarm_comms.cpp | 10 ---------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17f8a009242d1..c29941247e438 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix - [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image - [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag - [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index c6d8a571be30c..37a4d46899169 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -100,20 +100,9 @@ void BoundarySwarm::Send(BoundaryCommSubset phase) { #ifdef MPI_PARALLEL PARTHENON_REQUIRE(bd_var_.req_send[nb.bufid] == MPI_REQUEST_NULL, "Trying to create a new send before previous send completes!"); - // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); - printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, - bd_var_.send[nb.bufid].data(), send_size[nb.bufid], - bd_var_.send[nb.bufid].span()); - if (send_size[nb.bufid] > 1000) { - printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, - bd_var_.send[nb.bufid].data(), send_size[nb.bufid], - bd_var_.send[nb.bufid].span()); - PARTHENON_FAIL("help!"); - } PARTHENON_MPI_CHECK(MPI_Isend(bd_var_.send[nb.bufid].data(), send_size[nb.bufid], MPI_PARTHENON_REAL, nb.rank, send_tag[nb.bufid], swarm_comm, &(bd_var_.req_send[nb.bufid]))); - // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); #endif // MPI_PARALLEL } else { MeshBlock &target_block = *pmy_mesh_->FindMeshBlock(nb.gid); diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 3ab3a367689eb..054ded34fabb1 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -146,13 +146,10 @@ void Swarm::SetupPersistentMPI() { // Build device array mapping neighbor index to neighbor bufid if (pmb->neighbors.size() > 0) { - // printf("SANITIZING!\n"); ParArrayND neighbor_buffer_index("Neighbor buffer index", pmb->neighbors.size()); auto neighbor_buffer_index_h = neighbor_buffer_index.GetHostMirror(); for (int n = 0; n < pmb->neighbors.size(); n++) { neighbor_buffer_index_h(n) = pmb->neighbors[n].bufid; - // const int bufid = pmb->neighbors[n].bufid; - // vbswarm->bd_var_.send[bufid] = BufArray1D("Buffer", GetParticleDataSize()); } neighbor_buffer_index.DeepCopy(neighbor_buffer_index_h); neighbor_buffer_index_ = neighbor_buffer_index; @@ -232,8 +229,6 @@ void Swarm::LoadBuffers_() { auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); auto buffer_start_h = buffer_start.GetHostMirrorAndCopy(); - Kokkos::fence(); - // Resize send buffers if too small for (int n = 0; n < pmb->neighbors.size(); n++) { num_particles_to_send_h(n) -= buffer_start_h(n); @@ -242,13 +237,8 @@ void Swarm::LoadBuffers_() { if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); vbswarm->bd_var_.send[bufid] = sendbuf; - printf("[%i] new buf %i! size: %i span: %i\n", Globals::my_rank, bufid, - num_particles_to_send_h(n) * particle_size, - vbswarm->bd_var_.send[bufid].span()); } vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - printf("send size: %i (%i %i)\n", vbswarm->send_size[bufid], - num_particles_to_send_h(n), particle_size); } auto &bdvar = vbswarm->bd_var_; From af087fdf98d822597fa9556f455e4b2e1dcdb788 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 09:37:09 -0700 Subject: [PATCH 3/6] verbose CI --- .github/workflows/ci-short.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index ecb4052411eea..45617edcfde76 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -164,7 +164,7 @@ jobs: run: | cmake --build build -t particle-leapfrog cd build - ctest -R regression_mpi_test:particle_leapfrog + ctest -R regression_mpi_test:particle_leapfrog -V - uses: actions/upload-artifact@v3 with: From d6145e1055c136d126ecb554035118ba08580708 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 09:47:12 -0700 Subject: [PATCH 4/6] further CI debugging --- .github/workflows/ci-short.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index 45617edcfde76..ec56441a564f4 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -154,16 +154,18 @@ jobs: -DMACHINE_VARIANT=hip-mpi \ -DCMAKE_CXX_COMPILER=hipcc # Test example with "variables" and output - - name: advection - run: | - cmake --build build -t advection-example - cd build - ctest -R regression_mpi_test:output_hdf5 + #- name: advection + # run: | + # cmake --build build -t advection-example + # cd build + # ctest -R regression_mpi_test:output_hdf5 # Test example with swarms - name: particle-leapfrog run: | cmake --build build -t particle-leapfrog cd build + /usr/bin/mpiexec -n 2 --allow-run-as-root /__w/parthenon/parthenon/build/example/particle_leapfrog/particle-leapfrog -i /__w/parthenon/parthenon/tst/regression/test_suites/particle_leapfrog/parthinput.particle_leapfrog parthenon/job/problem_id=gold --kokkos-map-device-id-by=mpi_rank + ls ctest -R regression_mpi_test:particle_leapfrog -V - uses: actions/upload-artifact@v3 From 8c6f4d00b7458fe0cba56f1cc5543bdea606a4e2 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 13:35:43 -0700 Subject: [PATCH 5/6] This should be working... --- src/utils/sort.hpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index 97e9c77a88e43..a4ab8139dab35 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -61,7 +61,7 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -74,6 +74,13 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d, comparator); #endif +#elif defined(KOKKOS_ENABLE_HIP) + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1, comparator); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data, comparator); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1, comparator); @@ -89,7 +96,7 @@ template void sort(ParArray1D data, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -102,6 +109,12 @@ void sort(ParArray1D data, size_t min_idx, size_t max_idx) { thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d); #endif + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1); From 42a9356b4916d085d5a269cdb4cc439c8a87a7d5 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 13:57:21 -0700 Subject: [PATCH 6/6] This should be fixed... but I get a segfault on GPU --- src/utils/sort.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index a4ab8139dab35..3c91b1ec85ca6 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -59,6 +59,8 @@ KOKKOS_INLINE_FUNCTION int upper_bound(const T &arr, Real val) { template void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { + Kokkos::fence(); + printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); #if defined(KOKKOS_ENABLE_CUDA) @@ -90,6 +92,8 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, "touch by opening an issue on the Parthenon GitHub."); } #endif // KOKKOS_ENABLE_CUDA + Kokkos::fence(); + printf("%s:%i\n", __FILE__, __LINE__); } template