Skip to content

Commit

Permalink
Fix offset in send buffer of single precision particle communication
Browse files Browse the repository at this point in the history
The bug manifests when a process needs to send more than 2GB single
precision data. The offset must guarantee that the send buffer's data
pointer incremented by the offset has the appropriate alignment for any MPI
data type. Thus we use `Arena::align_size` as the requirement of the
alignment.

Also remove an incorrect assertion. The confusion probably came from the
misnamed function ParallelDescriptor::alignof_comm_data. Thus we rename it
sizeof_selected_comm_data_type.
  • Loading branch information
WeiqunZhang committed Feb 13, 2024
1 parent 68244ec commit 82413db
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 15 deletions.
8 changes: 4 additions & 4 deletions Src/Base/AMReX_FabArrayCommI.H
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ FabArray<FAB>::PrepareSendBuffers (const MapOfCopyComTagContainers& SndTags,
nbytes += cct.sbox.numPts() * ncomp * sizeof(BUF);
}

std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned

// Also need to align the offset properly
Expand Down Expand Up @@ -757,7 +757,7 @@ FabArray<FAB>::PostRcvs (const MapOfCopyComTagContainers& RcvTags,
nbytes += cct.dbox.numPts() * ncomp * sizeof(BUF);
}

std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned

// Also need to align the offset properly
Expand Down Expand Up @@ -992,7 +992,7 @@ FillBoundary (Vector<MF*> const& mf, Vector<int> const& scomp,
}
}
std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned
// Also need to align the offset properly
Expand Down Expand Up @@ -1065,7 +1065,7 @@ FillBoundary (Vector<MF*> const& mf, Vector<int> const& scomp,
}
}
std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned
// Also need to align the offset properly
Expand Down
4 changes: 2 additions & 2 deletions Src/Base/AMReX_MPMD.H
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ void Copier::send (FabArray<FAB> const& mf, int icomp, int ncomp) const
nbytes += cct.sbox.numPts() * ncomp * sizeof(typename FAB::value_type);
}

std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that bytes are aligned

// Also need to align the offset properly
Expand Down Expand Up @@ -128,7 +128,7 @@ void Copier::recv (FabArray<FAB>& mf, int icomp, int ncomp) const
nbytes += cct.dbox.numPts() * ncomp * sizeof(typename FAB::value_type);
}

std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned

// Also need to align the offset properly
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_NonLocalBC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void PrepareCommBuffers(CommData& comm,
nbytes += cct.sbox.numPts() * object_size * n_components;
}

std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
nbytes = amrex::aligned_size(acd, nbytes); // so that nbytes are aligned

// Also need to align the offset properly
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_ParallelDescriptor.H
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ while ( false )

#ifdef BL_USE_MPI
int select_comm_data_type (std::size_t nbytes);
std::size_t alignof_comm_data (std::size_t nbytes);
std::size_t sizeof_selected_comm_data_type (std::size_t nbytes);
#endif
}
}
Expand Down
2 changes: 1 addition & 1 deletion Src/Base/AMReX_ParallelDescriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,7 @@ select_comm_data_type (std::size_t nbytes)
}

std::size_t
alignof_comm_data (std::size_t nbytes)
sizeof_selected_comm_data_type (std::size_t nbytes)
{
const int t = select_comm_data_type(nbytes);
if (t == 1) {
Expand Down
7 changes: 3 additions & 4 deletions Src/Particle/AMReX_ParticleCommunication.H
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ void communicateParticlesStart (const PC& pc, ParticleCopyPlan& plan, const SndB
RcvProc.push_back(i);
rOffset.push_back(TotRcvBytes);
Long nbytes = plan.m_rcv_num_particles[i]*psize;
std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
TotRcvBytes = Long(amrex::aligned_size(acd, TotRcvBytes));
TotRcvBytes += Long(amrex::aligned_size(acd, nbytes));
plan.m_rcv_pad_correction_h.push_back(plan.m_rcv_pad_correction_h.back() + nbytes);
Expand Down Expand Up @@ -516,7 +516,7 @@ void communicateParticlesStart (const PC& pc, ParticleCopyPlan& plan, const SndB
const auto Who = RcvProc[i];
const auto offset = rOffset[i];
Long nbytes = plan.m_rcv_num_particles[Who]*psize;
std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
const auto Cnt = amrex::aligned_size(acd, nbytes);

AMREX_ASSERT(Cnt > 0);
Expand All @@ -538,9 +538,8 @@ void communicateParticlesStart (const PC& pc, ParticleCopyPlan& plan, const SndB
if (Cnt == 0) { continue; }

auto snd_offset = plan.m_snd_offsets[i];
AMREX_ASSERT(plan.m_snd_counts[i] % ParallelDescriptor::alignof_comm_data(plan.m_snd_num_particles[i]*psize) == 0);
AMREX_ASSERT(plan.m_snd_counts[i] % ParallelDescriptor::sizeof_selected_comm_data_type(plan.m_snd_num_particles[i]*psize) == 0);
AMREX_ASSERT(Who >= 0 && Who < NProcs);
AMREX_ASSERT(snd_offset % ParallelDescriptor::alignof_comm_data(plan.m_snd_num_particles[i]*psize) == 0);

ParallelDescriptor::Send((char const*)(snd_buffer.dataPtr()+snd_offset), Cnt, Who, SeqNum,
ParallelContext::CommunicatorSub());
Expand Down
5 changes: 3 additions & 2 deletions Src/Particle/AMReX_ParticleCommunication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,12 @@ void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize)
for (int i = 0; i < NProcs; ++i)
{
Long nbytes = m_snd_num_particles[i]*psize;
std::size_t acd = ParallelDescriptor::alignof_comm_data(nbytes);
std::size_t acd = ParallelDescriptor::sizeof_selected_comm_data_type(nbytes);
auto Cnt = static_cast<Long>(amrex::aligned_size(acd, nbytes));
Long bytes_to_send = (i == MyProc) ? 0 : Cnt;
m_snd_counts.push_back(bytes_to_send);
m_snd_offsets.push_back(amrex::aligned_size(acd, m_snd_offsets.back()) + Cnt);
m_snd_offsets.push_back(amrex::aligned_size(Arena::align_size,
m_snd_offsets.back() + Cnt));
m_snd_pad_correction_h.push_back(m_snd_pad_correction_h.back() + nbytes);
}

Expand Down

0 comments on commit 82413db

Please sign in to comment.