Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
+ Call the member variable in SwarmKey the sort_key
+ Remove CountParticlesInBuffer function
+ Add buffer_start and buffer_sorted as swarm member variables
  • Loading branch information
alexrlongne committed Oct 29, 2024
1 parent 433b63c commit d1e3e37
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 103 deletions.
23 changes: 14 additions & 9 deletions src/interface/swarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ SwarmDeviceContext Swarm::GetDeviceContext() const {
context.block_index_ = block_index_;
context.neighbor_indices_ = neighbor_indices_;
context.cell_sorted_ = cell_sorted_;
context.buffer_sorted_ = buffer_sorted_;
context.cell_sorted_begin_ = cell_sorted_begin_;
context.cell_sorted_number_ = cell_sorted_number_;

Expand Down Expand Up @@ -73,9 +74,10 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_
new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_),
scratch_b_("scratch_b_", nmax_pool_),
num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS),
buffer_counters_("buffer_counters_", NMAX_NEIGHBORS),
buffer_start_("buffer_start_", NMAX_NEIGHBORS),
neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS),
cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) {
cell_sorted_("cell_sorted_", nmax_pool_),
buffer_sorted_("buffer_sorted_", nmax_pool_), mpiStatus(true) {
PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian),
"SwarmDeviceContext only supports a uniform Cartesian mesh!");

Expand Down Expand Up @@ -209,6 +211,9 @@ void Swarm::SetPoolMax(const std::int64_t nmax_pool) {
Kokkos::resize(cell_sorted_, nmax_pool);
pmb->LogMemUsage(n_new * sizeof(SwarmKey));

Kokkos::resize(buffer_sorted_, nmax_pool);
pmb->LogMemUsage(n_new * sizeof(SwarmKey));

block_index_.Resize(nmax_pool);
pmb->LogMemUsage(n_new * sizeof(int));

Expand Down Expand Up @@ -490,35 +495,35 @@ void Swarm::SortParticlesByCell() {
break;
}

if (cell_sorted(start_index).cell_idx_1d_ == cell_idx_1d) {
if (cell_sorted(start_index).sort_idx_ == cell_idx_1d) {
if (start_index == 0) {
break;
} else if (cell_sorted(start_index - 1).cell_idx_1d_ != cell_idx_1d) {
} else if (cell_sorted(start_index - 1).sort_idx_ != cell_idx_1d) {
break;
} else {
start_index--;
continue;
}
}
if (cell_sorted(start_index).cell_idx_1d_ >= cell_idx_1d) {
if (cell_sorted(start_index).sort_idx_ >= cell_idx_1d) {
start_index--;
if (start_index < 0) {
start_index = -1;
break;
}
if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) {
if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) {
start_index = -1;
break;
}
continue;
}
if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) {
if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) {
start_index++;
if (start_index > max_active_index) {
start_index = -1;
break;
}
if (cell_sorted(start_index).cell_idx_1d_ > cell_idx_1d) {
if (cell_sorted(start_index).sort_idx_ > cell_idx_1d) {
start_index = -1;
break;
}
Expand All @@ -532,7 +537,7 @@ void Swarm::SortParticlesByCell() {
int number = 0;
int current_index = start_index;
while (current_index <= max_active_index &&
cell_sorted(current_index).cell_idx_1d_ == cell_idx_1d) {
cell_sorted(current_index).sort_idx_ == cell_idx_1d) {
current_index++;
number++;
cell_sorted_number(k, j, i) = number;
Expand Down
5 changes: 4 additions & 1 deletion src/interface/swarm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ class Swarm {
constexpr static int unset_index_ = -1;

ParArray1D<int> num_particles_to_send_;
ParArray1D<int> buffer_counters_;
ParArray1D<int> buffer_start_;
ParArray1D<int> neighbor_received_particles_;
int total_received_particles_;

Expand All @@ -298,6 +298,9 @@ class Swarm {
ParArray1D<SwarmKey>
cell_sorted_; // 1D per-cell sorted array of key-value swarm memory indices

ParArray1D<SwarmKey>
buffer_sorted_; // 1D per-buffer sorted array of key-value swarm memory indices

ParArrayND<int>
cell_sorted_begin_; // Per-cell array of starting indices in cell_sorted_

Expand Down
112 changes: 22 additions & 90 deletions src/interface/swarm_comms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,67 +156,6 @@ void Swarm::SetupPersistentMPI() {
}
}

void Swarm::CountParticlesToSend_() {
auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_);
auto swarm_d = GetDeviceContext();
auto pmb = GetBlockPointer();
const int nbmax = vbswarm->bd_var_.nbmax;

// Fence to make sure particles aren't currently being transported locally
// TODO(BRR) do this operation on device.
pmb->exec_space.fence();
const int particle_size = GetParticleDataSize();
vbswarm->particle_size = particle_size;

// TODO(BRR) This kernel launch should be folded into the subsequent logic once we
// convert that to kernel-based reductions
auto &x = Get<Real>(swarm_position::x::name()).Get();
auto &y = Get<Real>(swarm_position::y::name()).Get();
auto &z = Get<Real>(swarm_position::z::name()).Get();
const int max_active_index = GetMaxActiveIndex();
pmb->par_for(
PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) {
if (swarm_d.IsActive(n)) {
bool on_current_mesh_block = true;
swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block);
}
});

// Facilitate lambda captures
auto &block_index = block_index_;
auto &num_particles_to_send = num_particles_to_send_;

// Zero out number of particles to send before accumulating
pmb->par_for(
PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1,
KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; });

parthenon::par_for(
PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) {
if (swarm_d.IsActive(n)) {
bool on_current_mesh_block = true;
swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block);

if (block_index(n) >= 0) {
Kokkos::atomic_add(&num_particles_to_send(block_index(n)), 1);
}
}
});

auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy();

// Resize send buffers if too small
for (int n = 0; n < pmb->neighbors.size(); n++) {
const int bufid = pmb->neighbors[n].bufid;
auto sendbuf = vbswarm->bd_var_.send[bufid];
if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) {
sendbuf = BufArray1D<Real>("Buffer", num_particles_to_send_h(n) * particle_size);
vbswarm->bd_var_.send[bufid] = sendbuf;
}
vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size;
}
}

void Swarm::LoadBuffers_() {
auto swarm_d = GetDeviceContext();
auto pmb = GetBlockPointer();
Expand All @@ -240,25 +179,22 @@ void Swarm::LoadBuffers_() {
auto &y = Get<Real>(swarm_position::y::name()).Get();
auto &z = Get<Real>(swarm_position::z::name()).Get();

if(max_active_index_ >= 0) {
// Make an n particle sized array of index, buffer pairs (with SwarmKey struct)
ParArray1D<SwarmKey> buffer_sorted("buffer_sorted", max_active_index_+1);
ParArray1D<int> buffer_start("buffer_start", nneighbor);
if (max_active_index_ >= 0) {
auto &buffer_sorted = buffer_sorted_;
auto &buffer_start = buffer_start_;

pmb->par_for(
PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) {
if(swarm_d.IsActive(n)) {
if (swarm_d.IsActive(n)) {
bool on_current_mesh_block = true;
const int m =
swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block);
swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block);
buffer_sorted(n) = SwarmKey(m, n);
}
else {
buffer_sorted(n) = SwarmKey(-1, n);
} else {
buffer_sorted(n) = SwarmKey(this_block_, n);
}
});


// sort by buffer index
sort(buffer_sorted, SwarmKeyComparator(), 0, max_active_index_);

Expand All @@ -268,28 +204,27 @@ void Swarm::LoadBuffers_() {

// Zero out number of particles to send before accumulating
pmb->par_for(
PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1,
KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; });
PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) {
num_particles_to_send[n] = 0;
buffer_start[n] = 0;
});

pmb->par_for(
PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) {
auto m = buffer_sorted(n).cell_idx_1d_;
auto m = buffer_sorted(n).sort_idx_;
// start checks (used for index of particle in buffer)
if (m >= 0 && n ==0 ) {
if (m >= 0 && n == 0) {
buffer_start(m) = 0;
}
else if (m >= 0 && m != buffer_sorted(n-1).cell_idx_1d_) {
} else if (m >= 0 && m != buffer_sorted(n - 1).sort_idx_) {
buffer_start(m) = n;
}

// end checks (used to to size particle buffers)
if (m >= 0 && n == max_active_index ) {
num_particles_to_send(m) = n +1;
if (m >= 0 && n == max_active_index) {
num_particles_to_send(m) = n + 1;
} else if (m >= 0 && m != buffer_sorted(n + 1).sort_idx_) {
num_particles_to_send(m) = n + 1;
}
else if (m >= 0 && m != buffer_sorted(n+1).cell_idx_1d_ ) {
num_particles_to_send(m) = n +1;
}
});
});

// copy values back to host for buffer sizing
auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy();
Expand All @@ -315,7 +250,7 @@ void Swarm::LoadBuffers_() {
PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) {
auto p_index = buffer_sorted(n).swarm_idx_;
if (swarm_d.IsActive(p_index)) {
const int m = buffer_sorted(n).cell_idx_1d_;
const int m = buffer_sorted(n).sort_idx_;
const int bufid = neighbor_buffer_index(m);
if (m >= 0) {
const int bid = n - buffer_start[m];
Expand Down Expand Up @@ -343,10 +278,8 @@ void Swarm::Send(BoundaryCommSubset phase) {
const int nneighbor = pmb->neighbors.size();
auto swarm_d = GetDeviceContext();

// Query particles for those to be sent
//CountParticlesToSend_();

// Prepare buffers for send operations
// Potentially resize buffer, get consistent index from particle array, get ready to
// send
LoadBuffers_();

// Send buffer data
Expand Down Expand Up @@ -507,4 +440,3 @@ void Swarm::AllocateComms(std::weak_ptr<MeshBlock> wpmb) {
}

} // namespace parthenon

7 changes: 4 additions & 3 deletions src/interface/swarm_device_context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,16 @@ struct SwarmKey {
SwarmKey() {}
KOKKOS_INLINE_FUNCTION
SwarmKey(const int cell_idx_1d, const int swarm_idx_1d)
: cell_idx_1d_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {}
: sort_idx_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {}

int cell_idx_1d_;
int sort_idx_;
int swarm_idx_;
};

struct SwarmKeyComparator {
KOKKOS_INLINE_FUNCTION
bool operator()(const SwarmKey &s1, const SwarmKey &s2) {
return s1.cell_idx_1d_ < s2.cell_idx_1d_;
return s1.sort_idx_ < s2.sort_idx_;
}
};

Expand Down Expand Up @@ -139,6 +139,7 @@ class SwarmDeviceContext {
ParArrayND<int> block_index_;
ParArrayND<int> neighbor_indices_; // 4x4x4 array of possible block AMR regions
ParArray1D<SwarmKey> cell_sorted_;
ParArray1D<SwarmKey> buffer_sorted_;
ParArrayND<int> cell_sorted_begin_;
ParArrayND<int> cell_sorted_number_;
int ndim_;
Expand Down

0 comments on commit d1e3e37

Please sign in to comment.