diff --git a/Src/Particle/AMReX_Particle.H b/Src/Particle/AMReX_Particle.H index 974a2e77cd4..e1c37c68471 100644 --- a/Src/Particle/AMReX_Particle.H +++ b/Src/Particle/AMReX_Particle.H @@ -15,13 +15,127 @@ namespace amrex { namespace { - constexpr int GhostParticleID = std::numeric_limits::max(); - constexpr int VirtualParticleID = std::numeric_limits::max()-1; - constexpr int LastParticleID = std::numeric_limits::max()-2; - constexpr int DoSplitParticleID = std::numeric_limits::max()-3; - constexpr int NoSplitParticleID = std::numeric_limits::max()-4; + constexpr Long GhostParticleID = 549755813887L; // 2**39-1 + constexpr Long VirtualParticleID = GhostParticleID-1; + constexpr Long LastParticleID = GhostParticleID-2; + constexpr Long DoSplitParticleID = GhostParticleID-3; + constexpr Long NoSplitParticleID = GhostParticleID-4; } +struct ParticleIDWrapper +{ + uint64_t& m_idata; + + AMREX_GPU_HOST_DEVICE + ParticleIDWrapper (uint64_t& idata) noexcept + : m_idata(idata) + {} + + AMREX_GPU_HOST_DEVICE + ParticleIDWrapper& operator= (const Long id) noexcept + { + // zero out the 40 leftmost bits, which store the sign and the abs of the id; + m_idata &= 0x00FFFFFF; + + uint64_t val; + uint64_t sign = id >= 0; + if (sign) + { + // 2**39-1, the max value representible in this fashion + AMREX_ASSERT(id <= 549755813887L); + val = id; + } + else + { + // -2**39-1, the min value representible in this fashion + AMREX_ASSERT(id >= -549755813887L); + val = -id; + } + + m_idata |= (sign << 63); // put the sign in the leftmost bit + m_idata |= (val << 24); // put the val in the next 39 + return *this; + } + + AMREX_GPU_HOST_DEVICE + operator Long () const noexcept + { + Long r = 0; + + uint64_t sign = m_idata >> 63; // extract leftmost sign bit + uint64_t val = ((m_idata >> 24) & 0x7FFFFFFFFF); // extract next 39 id bits + + Long lval = static_cast(val); // bc we take - + r = (sign) ? lval : -lval; + return r; + } +}; + +struct ParticleCPUWrapper +{ + uint64_t& m_idata; + + AMREX_GPU_HOST_DEVICE + ParticleCPUWrapper (uint64_t& idata) noexcept + : m_idata(idata) + {} + + AMREX_GPU_HOST_DEVICE + ParticleCPUWrapper& operator= (const int cpu) noexcept + { + // zero out the first 24 bits, which are used to store the cpu number + m_idata &= (~ 0x00FFFFFF); + + AMREX_ASSERT(cpu >= 0); + AMREX_ASSERT(cpu <= 16777215); // 2**24-1, the max representable number + + m_idata |= cpu; + return *this; + } + + AMREX_GPU_HOST_DEVICE + operator int () noexcept + { + return (m_idata & 0x00FFFFFF); + } +}; + +struct ConstParticleIDWrapper +{ + const uint64_t& m_idata; + + AMREX_GPU_HOST_DEVICE + ConstParticleIDWrapper (const uint64_t& idata) noexcept + : m_idata(idata) + {} + + AMREX_GPU_HOST_DEVICE + operator Long () const noexcept + { + Long r = 0; + + uint64_t sign = m_idata >> 63; // extract leftmost sign bit + uint64_t val = ((m_idata >> 24) & 0x7FFFFFFFFF); // extract next 39 id bits + + Long lval = static_cast(val); // bc we take - + r = (sign) ? lval : -lval; + return r; + } +}; + +struct ConstParticleCPUWrapper +{ + const uint64_t& m_idata; + + AMREX_GPU_HOST_DEVICE + ConstParticleCPUWrapper (const uint64_t& idata) noexcept + : m_idata(idata) + {} + + AMREX_GPU_HOST_DEVICE + operator int () noexcept { return (m_idata & 0x00FFFFFF); } +}; + /** \brief The struct used to store particles. * * \tparam T_NReal The number of extra Real components @@ -54,14 +168,19 @@ struct Particle * The integer data. We always have id and cpu, and optionally we * have NInt additional integer attributes. */ - int m_idata[2+NInt]; + union im_t + { + uint64_t ids = 0; + int arr[2+NInt]; + }; + im_t m_idata; - static int the_next_id; + static Long the_next_id; - AMREX_GPU_HOST_DEVICE int& id () & {return m_idata[0];} - AMREX_GPU_HOST_DEVICE int id () const & {return m_idata[0];} - AMREX_GPU_HOST_DEVICE int& cpu () & {return m_idata[1];} - AMREX_GPU_HOST_DEVICE int cpu () const & {return m_idata[1];} + AMREX_GPU_HOST_DEVICE ParticleCPUWrapper cpu () & { return ParticleCPUWrapper(m_idata.ids); } + AMREX_GPU_HOST_DEVICE ParticleIDWrapper id () & { return ParticleIDWrapper(m_idata.ids); } + AMREX_GPU_HOST_DEVICE ConstParticleCPUWrapper cpu () const & { return ConstParticleCPUWrapper(m_idata.ids); } + AMREX_GPU_HOST_DEVICE ConstParticleIDWrapper id () const & { return ConstParticleIDWrapper(m_idata.ids); } AMREX_GPU_HOST_DEVICE RealVect pos () const & {return RealVect(AMREX_D_DECL(m_rdata.pos[0], m_rdata.pos[1], m_rdata.pos[2]));} @@ -105,12 +224,12 @@ struct Particle AMREX_GPU_HOST_DEVICE int& idata (int index) & { AMREX_ASSERT(index < NInt); - return m_idata[2 + index]; + return m_idata.arr[2 + index]; } AMREX_GPU_HOST_DEVICE int idata (int index) const & { AMREX_ASSERT(index < NInt); - return m_idata[2 + index]; + return m_idata.arr[2 + index]; } static Real InterpDoit (const FArrayBox& fab, const Real* fracs, const IntVect* cells, int comp); @@ -138,13 +257,13 @@ struct Particle * across all processors must be checkpointed and then restored on restart * so that we don't reuse particle IDs. */ - static int NextID (); + static Long NextID (); /** * \brief This version can only be used inside omp critical. */ - static int UnprotectedNextID (); + static Long UnprotectedNextID (); /** @@ -152,7 +271,7 @@ struct Particle * * \param nextid */ - static void NextID (int nextid); + static void NextID (Long nextid); static void CIC_Fracs (const Real* frac, Real* fracs); @@ -207,7 +326,7 @@ struct Particle Vector& fracs, Vector& cells); }; -template int Particle::the_next_id = 1; +template Long Particle::the_next_id = 1; template inline @@ -559,17 +678,17 @@ Particle::Version () } template -int +Long Particle::NextID () { - int next; + Long next; // we should be able to test on _OPENMP < 201107 for capture (version 3.1) // but we must work around a bug in gcc < 4.9 #if defined(_OPENMP) && _OPENMP < 201307 #pragma omp critical (amrex_particle_nextid) #elif defined(_OPENMP) #pragma omp atomic capture -#endif +#endif next = the_next_id++; if (next > LastParticleID) @@ -579,10 +698,10 @@ Particle::NextID () } template -int +Long Particle::UnprotectedNextID () { - int next = the_next_id++; + Long next = the_next_id++; if (next > LastParticleID) amrex::Abort("Particle::NextID() -- too many particles"); return next; @@ -590,7 +709,7 @@ Particle::UnprotectedNextID () template void -Particle::NextID (int nextid) +Particle::NextID (Long nextid) { the_next_id = nextid; } @@ -694,7 +813,7 @@ operator<< (std::ostream& os, const Particle& p) os << p.m_rdata.arr[i] << ' '; for (int i = 2; i < 2 + NInt; i++) - os << p.m_idata[i] << ' '; + os << p.m_idata.arr[i] << ' '; if (!os.good()) amrex::Error("operator<<(ostream&,Particle&) failed"); diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index 70e7e91c886..1a889da7832 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -61,9 +61,10 @@ ParticleContainer :: Initialize for (int i=0; i::value - && std::is_trivial::value, - "Particle type must be standard layout and trivial."); + static_assert(std::is_standard_layout::value, + "Particle type must be standard layout"); + // && std::is_trivial::value, + // "Particle type must be standard layout and trivial."); pp.query("use_prepost", usePrePost); pp.query("do_unlink", doUnlink);