diff --git a/Src/Base/AMReX_GpuAllocators.H b/Src/Base/AMReX_GpuAllocators.H index 6e8fe639459..6f9377c1857 100644 --- a/Src/Base/AMReX_GpuAllocators.H +++ b/Src/Base/AMReX_GpuAllocators.H @@ -166,7 +166,6 @@ namespace amrex { }; - #ifdef AMREX_USE_GPU template using DefaultAllocator = amrex::ArenaAllocator; diff --git a/Src/Particle/AMReX_ParticleContainerI.H b/Src/Particle/AMReX_ParticleContainerI.H index a7ed5e9c06d..644d379f705 100644 --- a/Src/Particle/AMReX_ParticleContainerI.H +++ b/Src/Particle/AMReX_ParticleContainerI.H @@ -32,7 +32,6 @@ template void ParticleContainer :: Initialize () { - levelDirectoriesCreated = false; usePrePost = false; doUnlink = true; @@ -43,7 +42,7 @@ ParticleContainer :: Initialize { static_assert(sizeof(ParticleType)%sizeof(RealType) == 0, "sizeof ParticleType is not a multiple of sizeof RealType"); - + ParmParse pp("particles"); pp.query("do_tiling", do_tiling); Vector tilesize(AMREX_SPACEDIM); @@ -54,7 +53,7 @@ ParticleContainer :: Initialize static_assert(std::is_standard_layout::value && std::is_trivial::value, "Particle type must be standard layout and trivial."); - + pp.query("use_prepost", usePrePost); pp.query("do_unlink", doUnlink); @@ -909,7 +908,7 @@ ParticleContainer:: copyParticles (const ParticleContainerType& other, bool local) { BL_PROFILE("ParticleContainer::copyParticles"); - clearParticles(); + clearParticles(); addParticles(other, local); } @@ -926,7 +925,7 @@ addParticles (const ParticleContainerType& other, bool local) auto& plevel = GetParticles(lev); for(MFIter mfi = other.MakeMFIter(lev); mfi.isValid(); ++mfi) { - auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex()); + auto index = std::make_pair(mfi.index(), mfi.LocalTileIndex()); if(plevel_other.find(index) == plevel_other.end()) continue; auto& ptile = plevel[index]; @@ -937,10 +936,10 @@ addParticles (const ParticleContainerType& other, bool local) auto dst_index = ptile.numParticles(); ptile.resize(dst_index + np); - amrex::copyParticles(ptile, ptile_other, 0, dst_index, np); + amrex::copyParticles(ptile, ptile_other, 0, dst_index, np); } } - + if (not local) Redistribute(); } diff --git a/Src/Particle/AMReX_ParticleHDF5.H b/Src/Particle/AMReX_ParticleHDF5.H index 528f4fc35a6..359b0c34fb0 100644 --- a/Src/Particle/AMReX_ParticleHDF5.H +++ b/Src/Particle/AMReX_ParticleHDF5.H @@ -296,35 +296,31 @@ ParticleContainer std::string pdir = dir; if ( not pdir.empty() and pdir[pdir.size()-1] != '/') pdir += '/'; - - if ( ! levelDirectoriesCreated) - { - if (ParallelDescriptor::IOProcessor()) { - if ( ! amrex::UtilCreateDirectory(pdir, 0755)) - amrex::CreateDirectoryFailed(pdir); - - int set_stripe = 0; - char setstripe[1024]; - int stripe_count = 128; - int stripe_size = 1; - char *stripe_count_str = getenv("HDF5_STRIPE_COUNT"); - char *stripe_size_str = getenv("HDF5_STRIPE_SIZE"); - if (stripe_count_str) { - stripe_count = atoi(stripe_count_str); - set_stripe = 1; - } - if (stripe_size_str) { - stripe_size = atoi(stripe_size_str); - set_stripe = 1; - } - if (set_stripe == 1) { - sprintf(setstripe, "lfs setstripe -c %d -S %dm %s", stripe_count, stripe_size, pdir.c_str()); - std::cout << "Setting stripe parameters for HDF5 output: " << setstripe << std::endl; - amrex::ignore_unused(std::system(setstripe)); - } + if (ParallelDescriptor::IOProcessor()) { + if ( ! amrex::UtilCreateDirectory(pdir, 0755)) + amrex::CreateDirectoryFailed(pdir); + + int set_stripe = 0; + char setstripe[1024]; + int stripe_count = 128; + int stripe_size = 1; + char *stripe_count_str = getenv("HDF5_STRIPE_COUNT"); + char *stripe_size_str = getenv("HDF5_STRIPE_SIZE"); + if (stripe_count_str) { + stripe_count = atoi(stripe_count_str); + set_stripe = 1; + } + if (stripe_size_str) { + stripe_size = atoi(stripe_size_str); + set_stripe = 1; + } + if (set_stripe == 1) { + sprintf(setstripe, "lfs setstripe -c %d -S %dm %s", stripe_count, stripe_size, pdir.c_str()); + std::cout << "Setting stripe parameters for HDF5 output: " << setstripe << std::endl; + amrex::ignore_unused(std::system(setstripe)); } - ParallelDescriptor::Barrier(); } + ParallelDescriptor::Barrier(); Long nparticles = 0; int maxnextid; diff --git a/Src/Particle/AMReX_ParticleIO.H b/Src/Particle/AMReX_ParticleIO.H index e26e5f9cd92..c73d6edbbbb 100644 --- a/Src/Particle/AMReX_ParticleIO.H +++ b/Src/Particle/AMReX_ParticleIO.H @@ -1,6 +1,8 @@ #ifndef AMREX_PARTICLEIO_H #define AMREX_PARTICLEIO_H +#include + template void ParticleContainer @@ -51,7 +53,7 @@ ParticleContainer tmp_real_comp_names.push_back(real_comp_names[i]); } } - + Vector write_int_comp; Vector tmp_int_comp_names; for (int i = 0; i < NStructInt + NumIntComps(); ++i ) @@ -91,7 +93,7 @@ ParticleContainer ss << "real_comp" << i; real_comp_names.push_back(ss.str()); } - + Vector write_int_comp; Vector int_comp_names; for (int i = 0; i < NStructInt + NumIntComps(); ++i ) @@ -101,7 +103,7 @@ ParticleContainer ss << "int_comp" << i; int_comp_names.push_back(ss.str()); } - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, [=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p) -> int @@ -124,7 +126,7 @@ ParticleContainer ss << "real_comp" << i; real_comp_names.push_back(ss.str()); } - + Vector write_int_comp; Vector int_comp_names; for (int i = 0; i < NStructInt + NumIntComps(); ++i ) @@ -134,7 +136,7 @@ ParticleContainer ss << "int_comp" << i; int_comp_names.push_back(ss.str()); } - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, [=] AMREX_GPU_HOST_DEVICE (const SuperParticleType& p) @@ -149,7 +151,7 @@ ParticleContainer ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names) const -{ +{ AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); AMREX_ASSERT( int_comp_names.size() == NStructInt + NumIntComps() ); @@ -173,7 +175,7 @@ void ParticleContainer ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names) const -{ +{ AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); Vector write_real_comp; @@ -189,7 +191,7 @@ ParticleContainer ss << "int_comp" << i; int_comp_names.push_back(ss.str()); } - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, @@ -206,10 +208,10 @@ ParticleContainer const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp) const -{ +{ AMREX_ASSERT(write_real_comp.size() == NStructReal + NumRealComps()); AMREX_ASSERT(write_int_comp.size() == NStructInt + NArrayInt ); - + Vector real_comp_names; for (int i = 0; i < NStructReal + NumRealComps(); ++i ) { @@ -239,12 +241,12 @@ void ParticleContainer:: WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, - const Vector& write_int_comp, + const Vector& write_int_comp, const Vector& real_comp_names, const Vector& int_comp_names) const { BL_PROFILE("ParticleContainer::WritePlotFile()"); - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, @@ -269,7 +271,7 @@ ParticleContainer ss << "real_comp" << i; real_comp_names.push_back(ss.str()); } - + Vector write_int_comp; Vector int_comp_names; for (int i = 0; i < NStructInt + NumIntComps(); ++i ) @@ -279,7 +281,7 @@ ParticleContainer ss << "int_comp" << i; int_comp_names.push_back(ss.str()); } - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, std::forward(f)); @@ -292,7 +294,7 @@ ParticleContainer ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, const Vector& int_comp_names, F&& f) const -{ +{ AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); AMREX_ASSERT( int_comp_names.size() == NStructInt + NArrayInt ); @@ -314,7 +316,7 @@ void ParticleContainer ::WritePlotFile (const std::string& dir, const std::string& name, const Vector& real_comp_names, F&& f) const -{ +{ AMREX_ASSERT(real_comp_names.size() == NStructReal + NumRealComps()); Vector write_real_comp; @@ -330,7 +332,7 @@ ParticleContainer ss << "int_comp" << i; int_comp_names.push_back(ss.str()); } - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, @@ -345,10 +347,10 @@ ParticleContainer const std::string& name, const Vector& write_real_comp, const Vector& write_int_comp, F&& f) const -{ +{ AMREX_ASSERT(write_real_comp.size() == NStructReal + NumRealComps()); AMREX_ASSERT(write_int_comp.size() == NStructInt + NumIntComps() ); - + Vector real_comp_names; for (int i = 0; i < NStructReal + NumRealComps(); ++i ) { @@ -376,13 +378,13 @@ void ParticleContainer:: WritePlotFile (const std::string& dir, const std::string& name, const Vector& write_real_comp, - const Vector& write_int_comp, + const Vector& write_int_comp, const Vector& real_comp_names, const Vector& int_comp_names, F&& f) const { BL_PROFILE("ParticleContainer::WritePlotFile()"); - + WriteBinaryParticleData(dir, name, write_real_comp, write_int_comp, real_comp_names, int_comp_names, @@ -400,302 +402,19 @@ ParticleContainer const Vector& int_comp_names, F&& f) const { - BL_PROFILE("ParticleContainer::WriteBinaryParticleData()"); - AMREX_ASSERT(OK()); - - AMREX_ASSERT(sizeof(typename ParticleType::RealType) == 4 || - sizeof(typename ParticleType::RealType) == 8); - - const int NProcs = ParallelDescriptor::NProcs(); - const int IOProcNumber = ParallelDescriptor::IOProcessorNumber(); - const Real strttime = amrex::second(); - - AMREX_ALWAYS_ASSERT(real_comp_names.size() == NumRealComps() + NStructReal); - AMREX_ALWAYS_ASSERT( int_comp_names.size() == NumIntComps() + NStructInt); - - std::string pdir = dir; - if ( not pdir.empty() and pdir[pdir.size()-1] != '/') pdir += '/'; - pdir += name; - - if ( ! levelDirectoriesCreated) - { - if (ParallelDescriptor::IOProcessor()) - if ( ! amrex::UtilCreateDirectory(pdir, 0755)) - amrex::CreateDirectoryFailed(pdir); - ParallelDescriptor::Barrier(); - } - - std::ofstream HdrFile; - - Long nparticles = 0; - int maxnextid; - - // evaluate f for every particle to determine which ones to output - Vector, Gpu::DeviceVector > > particle_io_flags(m_particles.size()); - for (int lev = 0; lev < m_particles.size(); lev++) - { - const auto& pmap = m_particles[lev]; - for (const auto& kv : pmap) - { - const auto ptd = kv.second.getConstParticleTileData(); - const auto np = kv.second.numParticles(); - particle_io_flags[lev][kv.first].resize(np, 0); - auto pflags = particle_io_flags[lev][kv.first].data(); - AMREX_HOST_DEVICE_FOR_1D( np, k, - { - const auto p = ptd.getSuperParticle(k); - pflags[k] = f(p); - }); - } - } - - Gpu::Device::synchronize(); - - if(usePrePost) - { - nparticles = nparticlesPrePost; - maxnextid = maxnextidPrePost; - } - else - { - nparticles = 0; - maxnextid = ParticleType::NextID(); - - for (int lev = 0; lev < m_particles.size(); lev++) - { - const auto& pmap = m_particles[lev]; - for (const auto& kv : pmap) - { - const auto& pflags = particle_io_flags[lev][kv.first]; - for (int k = 0; k < kv.second.numParticles(); ++k) - { - if (pflags[k]) nparticles++; - } - } - } - - ParallelDescriptor::ReduceLongSum(nparticles, IOProcNumber); - ParticleType::NextID(maxnextid); - ParallelDescriptor::ReduceIntMax(maxnextid, IOProcNumber); - } - - if (ParallelDescriptor::IOProcessor()) - { - std::string HdrFileName = pdir; - - if ( ! HdrFileName.empty() && HdrFileName[HdrFileName.size()-1] != '/') - HdrFileName += '/'; - - HdrFileName += "Header"; - HdrFileNamePrePost = HdrFileName; - - HdrFile.open(HdrFileName.c_str(), std::ios::out|std::ios::trunc); - - if ( ! HdrFile.good()) amrex::FileOpenFailed(HdrFileName); - - // - // First thing written is our Checkpoint/Restart version string. - // We append "_single" or "_double" to the version string indicating - // whether we're using "float" or "double" floating point data in the - // particles so that we can Restart from the checkpoint files. - // - if (sizeof(typename ParticleType::RealType) == 4) - { - HdrFile << ParticleType::Version() << "_single" << '\n'; - } - else - { - HdrFile << ParticleType::Version() << "_double" << '\n'; - } - - int num_output_real = 0; - for (int i = 0; i < NumRealComps() + NStructReal; ++i) - if (write_real_comp[i]) ++num_output_real; - - int num_output_int = 0; - for (int i = 0; i < NumIntComps() + NStructInt; ++i) - if (write_int_comp[i]) ++num_output_int; - - // AMREX_SPACEDIM and N for sanity checking. - HdrFile << AMREX_SPACEDIM << '\n'; - - // The number of extra real parameters - HdrFile << num_output_real << '\n'; - - // Real component names - for (int i = 0; i < NStructReal + NumRealComps(); ++i ) - if (write_real_comp[i]) HdrFile << real_comp_names[i] << '\n'; - - // The number of extra int parameters - HdrFile << num_output_int << '\n'; - - // int component names - for (int i = 0; i < NStructInt + NumIntComps(); ++i ) - if (write_int_comp[i]) HdrFile << int_comp_names[i] << '\n'; - - bool is_checkpoint = true; // legacy - HdrFile << is_checkpoint << '\n'; - - // The total number of particles. - HdrFile << nparticles << '\n'; - - // The value of nextid that we need to restore on restart. - HdrFile << maxnextid << '\n'; - - // Then the finest level of the AMR hierarchy. - HdrFile << finestLevel() << '\n'; - - // Then the number of grids at each level. - for (int lev = 0; lev <= finestLevel(); lev++) - HdrFile << ParticleBoxArray(lev).size() << '\n'; - } - - // We want to write the data out in parallel. - // We'll allow up to nOutFiles active writers at a time. - int nOutFiles(256); - - ParmParse pp("particles"); - pp.query("particles_nfiles",nOutFiles); - if(nOutFiles == -1) nOutFiles = NProcs; - nOutFiles = std::max(1, std::min(nOutFiles,NProcs)); - nOutFilesPrePost = nOutFiles; - - for (int lev = 0; lev <= finestLevel(); lev++) - { - bool gotsome; - if(usePrePost) - { - gotsome = (nParticlesAtLevelPrePost[lev] > 0); - } - else - { - gotsome = (NumberOfParticlesAtLevel(lev) > 0); - } - - // We store the particles at each level in their own subdirectory. - std::string LevelDir = pdir; - - if (gotsome) - { - if ( ! LevelDir.empty() && LevelDir[LevelDir.size()-1] != '/') LevelDir += '/'; - - LevelDir = amrex::Concatenate(LevelDir + "Level_", lev, 1); - - if ( ! levelDirectoriesCreated) { - if (ParallelDescriptor::IOProcessor()) - if ( ! amrex::UtilCreateDirectory(LevelDir, 0755)) - amrex::CreateDirectoryFailed(LevelDir); - // - // Force other processors to wait until directory is built. - // - ParallelDescriptor::Barrier(); - } - } - - // Write out the header for each particle - if (gotsome and ParallelDescriptor::IOProcessor()) { - std::string HeaderFileName = LevelDir; - HeaderFileName += "/Particle_H"; - std::ofstream ParticleHeader(HeaderFileName); - - ParticleBoxArray(lev).writeOn(ParticleHeader); - ParticleHeader << '\n'; - - ParticleHeader.flush(); - ParticleHeader.close(); - } - - MFInfo info; - info.SetAlloc(false); - MultiFab state(ParticleBoxArray(lev), - ParticleDistributionMap(lev), - 1,0,info); - - // We eventually want to write out the file name and the offset - // into that file into which each grid of particles is written. - Vector which(state.size(),0); - Vector count(state.size(),0); - Vector where(state.size(),0); - - std::string filePrefix(LevelDir); - filePrefix += '/'; - filePrefix += ParticleType::DataPrefix(); - if(usePrePost) { - filePrefixPrePost[lev] = filePrefix; - } - bool groupSets(false), setBuf(true); - - if (gotsome) - { - for(NFilesIter nfi(nOutFiles, filePrefix, groupSets, setBuf); nfi.ReadyToWrite(); ++nfi) - { - std::ofstream& myStream = (std::ofstream&) nfi.Stream(); - WriteParticles(lev, myStream, nfi.FileNumber(), which, count, where, - write_real_comp, write_int_comp, particle_io_flags); - } - - if(usePrePost) { - whichPrePost[lev] = which; - countPrePost[lev] = count; - wherePrePost[lev] = where; - } else { - ParallelDescriptor::ReduceIntSum (which.dataPtr(), which.size(), IOProcNumber); - ParallelDescriptor::ReduceIntSum (count.dataPtr(), count.size(), IOProcNumber); - ParallelDescriptor::ReduceLongSum(where.dataPtr(), where.size(), IOProcNumber); - } - } - - if (ParallelDescriptor::IOProcessor()) - { - if(usePrePost) { - // ---- write to the header and unlink in CheckpointPost - } else { - for (int j = 0; j < state.size(); j++) - { - HdrFile << which[j] << ' ' << count[j] << ' ' << where[j] << '\n'; - } - - if (gotsome && doUnlink) - { - // Unlink any zero-length data files. - Vector cnt(nOutFiles,0); - - for (int i = 0, N=count.size(); i < N; i++) { - cnt[which[i]] += count[i]; - } - - for (int i = 0, N=cnt.size(); i < N; i++) - { - if (cnt[i] == 0) - { - std::string FullFileName = NFilesIter::FileName(i, filePrefix); - FileSystem::Remove(FullFileName); - } - } - } - } - } - } - - if (ParallelDescriptor::IOProcessor()) + if (AsyncOut::UseAsyncOut()) { + WriteBinaryParticleDataAsync(*this, dir, name, + write_real_comp, write_int_comp, + real_comp_names, int_comp_names); + } else { - HdrFile.flush(); - HdrFile.close(); - if ( ! HdrFile.good()) - { - amrex::Abort("ParticleContainer::Checkpoint(): problem writing HdrFile"); - } - } - - if (m_verbose > 1) - { - Real stoptime = amrex::second() - strttime; - ParallelDescriptor::ReduceRealMax(stoptime, IOProcNumber); - amrex::Print() << "ParticleContainer::Checkpoint() time: " << stoptime << '\n'; + WriteBinaryParticleDataSync(*this, dir, name, + write_real_comp, write_int_comp, + real_comp_names, int_comp_names, + std::forward(f)); } } - template void ParticleContainer @@ -704,13 +423,13 @@ ParticleContainer if( ! usePrePost) { return; } - + BL_PROFILE("ParticleContainer::CheckpointPre()"); - + const int IOProcNumber = ParallelDescriptor::IOProcessorNumber(); Long nparticles = 0; int maxnextid = ParticleType::NextID(); - + for (int lev = 0; lev < m_particles.size(); lev++) { const auto& pmap = m_particles[lev]; for (const auto& kv : pmap) { @@ -730,23 +449,23 @@ ParticleContainer ParticleType::NextID(maxnextid); ParallelDescriptor::ReduceIntMax(maxnextid, IOProcNumber); - + nparticlesPrePost = nparticles; maxnextidPrePost = maxnextid; - + nParticlesAtLevelPrePost.clear(); nParticlesAtLevelPrePost.resize(finestLevel() + 1, 0); for(int lev(0); lev <= finestLevel(); ++lev) { nParticlesAtLevelPrePost[lev] = NumberOfParticlesAtLevel(lev); } - + whichPrePost.clear(); whichPrePost.resize(finestLevel() + 1); countPrePost.clear(); countPrePost.resize(finestLevel() + 1); wherePrePost.clear(); wherePrePost.resize(finestLevel() + 1); - + filePrefixPrePost.clear(); filePrefixPrePost.resize(finestLevel() + 1); } @@ -760,34 +479,34 @@ ParticleContainer if( ! usePrePost) { return; } - + BL_PROFILE("ParticleContainer::CheckpointPost()"); - + const int IOProcNumber = ParallelDescriptor::IOProcessorNumber(); std::ofstream HdrFile; HdrFile.open(HdrFileNamePrePost.c_str(), std::ios::out | std::ios::app); - + for(int lev(0); lev <= finestLevel(); ++lev) { ParallelDescriptor::ReduceIntSum (whichPrePost[lev].dataPtr(), whichPrePost[lev].size(), IOProcNumber); ParallelDescriptor::ReduceIntSum (countPrePost[lev].dataPtr(), countPrePost[lev].size(), IOProcNumber); ParallelDescriptor::ReduceLongSum(wherePrePost[lev].dataPtr(), wherePrePost[lev].size(), IOProcNumber); - - + + if(ParallelDescriptor::IOProcessor()) { for(int j(0); j < whichPrePost[lev].size(); ++j) { HdrFile << whichPrePost[lev][j] << ' ' << countPrePost[lev][j] << ' ' << wherePrePost[lev][j] << '\n'; } - + const bool gotsome = (nParticlesAtLevelPrePost[lev] > 0); if(gotsome && doUnlink) { // BL_PROFILE_VAR("PC::Checkpoint:unlink", unlink_post); // Unlink any zero-length data files. Vector cnt(nOutFilesPrePost,0); - + for(int i(0), N = countPrePost[lev].size(); i < N; ++i) { cnt[whichPrePost[lev][i]] += countPrePost[lev][i]; } - + for(int i(0), N = cnt.size(); i < N; ++i) { if(cnt[i] == 0) { std::string FullFileName = NFilesIter::FileName(i, filePrefixPrePost[lev]); @@ -797,7 +516,7 @@ ParticleContainer } } } - + if(ParallelDescriptor::IOProcessor()) { HdrFile.flush(); HdrFile.close(); @@ -845,39 +564,39 @@ ParticleContainer const int tile = kv.first.second; tile_map[grid].push_back(tile); const auto& pflags = particle_io_flags[lev].at(kv.first); - + // Only write out valid particles. - int cnt = 0; + int cnt = 0; for (int k = 0; k < kv.second.GetArrayOfStructs().numParticles(); ++k) { if (pflags[k]) cnt++; } - + count[grid] += cnt; } - + MFInfo info; info.SetAlloc(false); MultiFab state(ParticleBoxArray(lev), ParticleDistributionMap(lev), 1,0,info); - + for (MFIter mfi(state); mfi.isValid(); ++mfi) { const int grid = mfi.index(); - + which[grid] = fnum; where[grid] = VisMF::FileOffset(ofs); - + if (count[grid] == 0) continue; - + // First write out the integer data in binary. int num_output_int = 0; for (int i = 0; i < NumIntComps() + NStructInt; ++i) if (write_int_comp[i]) ++num_output_int; - + const int iChunkSize = 2 + num_output_int; Vector istuff(count[grid]*iChunkSize); int* iptr = istuff.dataPtr(); - + for (unsigned i = 0; i < tile_map[grid].size(); i++) { auto ptile_index = std::make_pair(grid, tile_map[grid][i]); const auto& pbox = m_particles[lev].at(ptile_index); @@ -913,19 +632,19 @@ ParticleContainer } } } - + writeIntData(istuff.dataPtr(), istuff.size(), ofs); ofs.flush(); // Some systems require this flush() (probably due to a bug) - + // Write the Real data in binary. int num_output_real = 0; for (int i = 0; i < NumRealComps() + NStructReal; ++i) if (write_real_comp[i]) ++num_output_real; - + const int rChunkSize = AMREX_SPACEDIM + num_output_real; Vector rstuff(count[grid]*rChunkSize); typename ParticleType::RealType* rptr = rstuff.dataPtr(); - + for (unsigned i = 0; i < tile_map[grid].size(); i++) { auto ptile_index = std::make_pair(grid, tile_map[grid][i]); const auto& pbox = m_particles[lev].at(ptile_index); @@ -984,9 +703,9 @@ ParticleContainer BL_PROFILE("ParticleContainer::Restart()"); AMREX_ASSERT(!dir.empty()); AMREX_ASSERT(!file.empty()); - + const Real strttime = amrex::second(); - + int DATA_Digits_Read(5); ParmParse pp("particles"); pp.query("datadigits_read",DATA_Digits_Read); @@ -999,16 +718,16 @@ ParticleContainer if (!HdrFileName.empty() && HdrFileName[HdrFileName.size()-1] != '/') HdrFileName += '/'; HdrFileName += "Header"; - + Vector fileCharPtr; ParallelDescriptor::ReadAndBcastFile(HdrFileName, fileCharPtr); std::string fileCharPtrString(fileCharPtr.dataPtr()); std::istringstream HdrFile(fileCharPtrString, std::istringstream::in); - + std::string version; HdrFile >> version; AMREX_ASSERT(!version.empty()); - + // What do our version strings mean? // "Version_One_Dot_Zero" -- hard-wired to write out in double precision. // "Version_One_Dot_One" -- can write out either as either single or double precision. @@ -1038,45 +757,45 @@ ParticleContainer msg += version; amrex::Abort(msg.c_str()); } - + int dm; HdrFile >> dm; if (dm != AMREX_SPACEDIM) amrex::Abort("ParticleContainer::Restart(): dm != AMREX_SPACEDIM"); - + int nr; HdrFile >> nr; if (nr != NStructReal + NumRealComps()) amrex::Abort("ParticleContainer::Restart(): nr != NStructReal + NumRealComps()"); - + std::string comp_name; for (int i = 0; i < nr; ++i) HdrFile >> comp_name; - + int ni; HdrFile >> ni; if (ni != NStructInt + NumIntComps()) amrex::Abort("ParticleContainer::Restart(): ni != NStructInt"); - + for (int i = 0; i < ni; ++i) HdrFile >> comp_name; - + bool checkpoint; HdrFile >> checkpoint; - + Long nparticles; HdrFile >> nparticles; AMREX_ASSERT(nparticles >= 0); - + int maxnextid; HdrFile >> maxnextid; AMREX_ASSERT(maxnextid > 0); ParticleType::NextID(maxnextid); - + int finest_level_in_file; HdrFile >> finest_level_in_file; AMREX_ASSERT(finest_level_in_file >= 0); - + // Determine whether this is a dual-grid restart or not. Vector particle_box_arrays(finest_level_in_file + 1); bool dual_grid = false; @@ -1091,7 +810,7 @@ ParticleContainer if (amrex::FileExists(phdr_name)) { have_pheaders = true; break; - } + } } if (have_pheaders) @@ -1103,18 +822,18 @@ ParticleContainer phdr_name += "/Particle_H"; if (not amrex::FileExists(phdr_name)) continue; - + Vector phdr_chars; ParallelDescriptor::ReadAndBcastFile(phdr_name, phdr_chars); std::string phdr_string(phdr_chars.dataPtr()); std::istringstream phdr_file(phdr_string, std::istringstream::in); - + if (lev > finestLevel()) { dual_grid = true; break; } - + particle_box_arrays[lev].readFrom(phdr_file); if (not particle_box_arrays[lev].CellEqual(ParticleBoxArray(lev))) dual_grid = true; } @@ -1130,7 +849,7 @@ ParticleContainer SetParticleDistributionMap(lev, pdm); } } - + Vector ngrids(finest_level_in_file+1); for (int lev = 0; lev <= finest_level_in_file; lev++) { HdrFile >> ngrids[lev]; @@ -1139,13 +858,13 @@ ParticleContainer AMREX_ASSERT(ngrids[lev] == int(ParticleBoxArray(lev).size())); } } - + resizeData(); - + if (finest_level_in_file > finestLevel()) { m_particles.resize(finest_level_in_file+1); } - + for (int lev = 0; lev <= finest_level_in_file; lev++) { Vector which(ngrids[lev]); Vector count(ngrids[lev]); @@ -1153,65 +872,65 @@ ParticleContainer for (int i = 0; i < ngrids[lev]; i++) { HdrFile >> which[i] >> count[i] >> where[i]; } - + Vector grids_to_read; if (lev <= finestLevel()) { for (MFIter mfi(*m_dummy_mf[lev]); mfi.isValid(); ++mfi) { grids_to_read.push_back(mfi.index()); } } else { - + // we lost a level on restart. we still need to read in particles // on finer levels, and put them in the right place via Redistribute() - + const int rank = ParallelDescriptor::MyProc(); const int NReaders = ParticleType::MaxReaders(); if (rank >= NReaders) return; - + const int Navg = ngrids[lev] / NReaders; const int Nleft = ngrids[lev] - Navg * NReaders; - + int lo, hi; if (rank < Nleft) { lo = rank*(Navg + 1); hi = lo + Navg + 1; - } + } else { lo = rank * Navg + Nleft; hi = lo + Navg; } - + for (int i = lo; i < hi; ++i) { grids_to_read.push_back(i); } } - + for(int igrid = 0; igrid < static_cast(grids_to_read.size()); ++igrid) { const int grid = grids_to_read[igrid]; - + if (count[grid] <= 0) continue; - + // The file names in the header file are relative. std::string name = fullname; - + if (!name.empty() && name[name.size()-1] != '/') name += '/'; - + name += "Level_"; name += amrex::Concatenate("", lev, 1); name += '/'; name += ParticleType::DataPrefix(); name += amrex::Concatenate("", which[grid], DATA_Digits_Read); - + std::ifstream ParticleFile; - + ParticleFile.open(name.c_str(), std::ios::in | std::ios::binary); - + if (!ParticleFile.good()) amrex::FileOpenFailed(name); - + ParticleFile.seekg(where[grid], std::ios::beg); - + if (how == "single") { ReadParticles(count[grid], grid, lev, ParticleFile, finest_level_in_file); } @@ -1223,20 +942,20 @@ ParticleContainer msg += how; amrex::Error(msg.c_str()); } - + ParticleFile.close(); - + if (!ParticleFile.good()) amrex::Abort("ParticleContainer::Restart(): problem reading particles"); } } - + Redistribute(); - + AMREX_ASSERT(OK()); - + if (m_verbose > 1) { - Real stoptime = amrex::second() - strttime; + Real stoptime = amrex::second() - strttime; ParallelDescriptor::ReduceRealMax(stoptime, ParallelDescriptor::IOProcessorNumber()); amrex::Print() << "ParticleContainer::Restart() time: " << stoptime << '\n'; } @@ -1259,16 +978,16 @@ ParticleContainer const int iChunkSize = 2 + NStructInt + NumIntComps(); Vector istuff(cnt*iChunkSize); readIntData(istuff.dataPtr(), istuff.size(), ifs, FPC::NativeIntDescriptor()); - + // Then the real data in binary. const int rChunkSize = AMREX_SPACEDIM + NStructReal + NumRealComps(); Vector rstuff(cnt*rChunkSize); ReadParticleRealData(rstuff.dataPtr(), rstuff.size(), ifs); - + // Now reassemble the particles. int* iptr = istuff.dataPtr(); RTYPE* rptr = rstuff.dataPtr(); - + ParticleType p; ParticleLocData pld; @@ -1318,7 +1037,7 @@ ParticleContainer host_real_attribs[lev][ind].resize(NumRealComps()); host_int_attribs[lev][ind].resize(NumIntComps()); - + // add the struct host_particles[lev][ind].push_back(p); @@ -1327,12 +1046,12 @@ ParticleContainer host_real_attribs[lev][ind][icomp].push_back(*rptr); ++rptr; } - + // ... and int array data for (int icomp = 0; icomp < NumIntComps(); icomp++) { host_int_attribs[lev][ind][icomp].push_back(*iptr); ++iptr; - } + } } for (int host_lev = 0; host_lev < static_cast(host_particles.size()); ++host_lev) @@ -1341,22 +1060,22 @@ ParticleContainer auto grid = kv.first.first; auto tile = kv.first.second; const auto& src_tile = kv.second; - + auto& dst_tile = DefineAndReturnParticleTile(host_lev, grid, tile); auto old_size = dst_tile.GetArrayOfStructs().size(); auto new_size = old_size + src_tile.size(); dst_tile.resize(new_size); - + Gpu::copy(Gpu::hostToDevice, src_tile.begin(), src_tile.end(), dst_tile.GetArrayOfStructs().begin() + old_size); - + for (int i = 0; i < NumRealComps(); ++i) { Gpu::copy(Gpu::hostToDevice, host_real_attribs[host_lev][std::make_pair(grid,tile)][i].begin(), host_real_attribs[host_lev][std::make_pair(grid,tile)][i].end(), dst_tile.GetStructOfArrays().GetRealData(i).begin() + old_size); } - + for (int i = 0; i < NumIntComps(); ++i) { Gpu::copy(Gpu::hostToDevice, host_int_attribs[host_lev][std::make_pair(grid,tile)][i].begin(), @@ -1365,7 +1084,7 @@ ParticleContainer } } } - + Gpu::streamSynchronize(); } @@ -1399,7 +1118,7 @@ ParticleContainer::WriteAsciiFil } } } - + // // And send count to I/O processor. // @@ -1422,7 +1141,7 @@ ParticleContainer::WriteAsciiFil File << NStructInt << '\n'; File << NumRealComps() << '\n'; File << NumIntComps() << '\n'; - + File.flush(); File.close(); diff --git a/Src/Particle/AMReX_ParticleReduce.H b/Src/Particle/AMReX_ParticleReduce.H index 0962746755a..f7b26e04b58 100644 --- a/Src/Particle/AMReX_ParticleReduce.H +++ b/Src/Particle/AMReX_ParticleReduce.H @@ -16,7 +16,7 @@ namespace amrex /** * \brief A general reduction method for the particles in a ParticleContainer that can run on either CPUs or GPUs. * This version operates over all particles on all levels. - * + * * This version uses "Sum" as the reduction operation. The quantity reduced over is an arbitrary function * of a "superparticle", which contains all the data in the particle type, whether it is stored in AoS or * SoA form. @@ -25,12 +25,12 @@ namespace amrex * call the MPI reduction operations described in ParallelDescriptor if they want that behavior. * * \tparam PC the ParticleContainer type - * \tparam F a function object + * \tparam F a function object * - * \param pc the ParticleContainer to operate on + * \param pc the ParticleContainer to operate on * \param f a function that takes a "superparticle" and returns the value to be reduced over all particles. * - */ + */ template ::value, int> foo = 0> auto ReduceSum (PC const& pc, F&& f) -> decltype(f(typename PC::SuperParticleType())) diff --git a/Src/Particle/AMReX_ParticleTransformation.H b/Src/Particle/AMReX_ParticleTransformation.H index 7f44453c9b0..efeb36da842 100644 --- a/Src/Particle/AMReX_ParticleTransformation.H +++ b/Src/Particle/AMReX_ParticleTransformation.H @@ -11,7 +11,7 @@ namespace amrex { /** - * \brief A general single particle copying routine that can run on the GPU. + * \brief A general single particle copying routine that can run on the GPU. * * \tparam NSR number of extra reals in the particle struct * \tparam NSI number of extra ints in the particle struct @@ -26,8 +26,8 @@ namespace amrex */ template AMREX_GPU_HOST_DEVICE AMREX_INLINE -void copyParticle (const ParticleTileData& dst, - const ConstParticleTileData& src, +void copyParticle (const ParticleTileData& dst, + const ConstParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); @@ -45,7 +45,7 @@ void copyParticle (const ParticleTileData& dst, } /** - * \brief A general single particle copying routine that can run on the GPU. + * \brief A general single particle copying routine that can run on the GPU. * * \tparam NSR number of extra reals in the particle struct * \tparam NSI number of extra ints in the particle struct @@ -60,8 +60,8 @@ void copyParticle (const ParticleTileData& dst, */ template AMREX_GPU_HOST_DEVICE AMREX_INLINE -void copyParticle (const ParticleTileData& dst, - const ParticleTileData& src, +void copyParticle (const ParticleTileData& dst, + const ParticleTileData& src, int src_i, int dst_i) noexcept { AMREX_ASSERT(dst.m_num_runtime_real == src.m_num_runtime_real); @@ -77,9 +77,9 @@ void copyParticle (const ParticleTileData& dst, for (int j = 0; j < dst.m_num_runtime_int; ++j) dst.m_runtime_idata[j][dst_i] = src.m_runtime_idata[j][src_i]; } - + /** - * \brief A general single particle swapping routine that can run on the GPU. + * \brief A general single particle swapping routine that can run on the GPU. * * \tparam NSR number of extra reals in the particle struct * \tparam NSI number of extra ints in the particle struct @@ -111,20 +111,21 @@ void swapParticle (const ParticleTileData& dst, for (int j = 0; j < dst.m_num_runtime_int; ++j) amrex::Swap(dst.m_runtime_idata[j][dst_i], src.m_runtime_idata[j][src_i]); } - + /** - * \brief Copy particles from src to dst. This version copies all the + * \brief Copy particles from src to dst. This version copies all the * particles, writing them to the beginning of dst. - * - * \tparam PTile the particle tile type + * + * \tparam DstTile the dst particle tile type + * \tparam SrcTile the src particle tile type * * \param dst the destination tile * \param src the source tile * \param f the function that will be applied to each particle * - */ -template -void copyParticles (PTile& dst, const PTile& src) noexcept + */ +template +void copyParticles (DstTile& dst, const SrcTile& src) noexcept { auto np = src.numParticles(); copyParticles(dst, src, 0, 0, np); @@ -133,8 +134,9 @@ void copyParticles (PTile& dst, const PTile& src) noexcept /** * \brief Copy particles from src to dst. This version copies n particles * starting at index src_start, writing the result starting at dst_start. - * - * \tparam PTile the particle tile type + * + * \tparam DstTile the dst particle tile type + * \tparam SrcTile the src particle tile type * \tparam Index the index type, e.g. unsigned int * \tparam N the size type, e.g. Long * @@ -144,9 +146,9 @@ void copyParticles (PTile& dst, const PTile& src) noexcept * \param dst_start the offset at which to start writing particles to dst * */ -template ::value, int> foo = 0> -void copyParticles (PTile& dst, const PTile& src, +void copyParticles (DstTile& dst, const SrcTile& src, Index src_start, Index dst_start, N n) noexcept { const auto src_data = src.getConstParticleTileData(); @@ -157,11 +159,11 @@ void copyParticles (PTile& dst, const PTile& src, copyParticle(dst_data, src_data, src_start+i, dst_start+i); }); } - + /** * \brief Apply the function f to all the particles in src, writing the * result to dst. This version does all the particles in src. - * + * * \tparam DstTile the dst particle tile type * \tparam SrcTile the src particle tile type * \tparam F a function object @@ -272,7 +274,8 @@ void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src, /** * \brief Conditionally copy particles from src to dst based on the value of mask. * - * \tparam PTile the particle tile type + * \tparam DstTile the dst particle tile type + * \tparam SrcTile the src particle tile type * \tparam Index the index type, e.g. unsigned int * * \param dst the destination tile @@ -280,9 +283,9 @@ void transformParticles (DstTile1& dst1, DstTile2& dst2, const SrcTile& src, * \param mask pointer to the mask - 1 means copy, 0 means don't copy * */ -template ::value, int> foo = 0> -Index filterParticles (PTile& dst, const PTile& src, const Index* mask) noexcept +Index filterParticles (DstTile& dst, const SrcTile& src, const Index* mask) noexcept { auto np = src.numParticles(); Gpu::DeviceVector offsets(np); @@ -309,7 +312,8 @@ Index filterParticles (PTile& dst, const PTile& src, const Index* mask) noexcept /** * \brief Conditionally copy particles from src to dst based on a predicate. * - * \tparam PTile the particle tile type + * \tparam DstTile the dst particle tile type + * \tparam SrcTile the src particle tile type * \tparam Pred a function object * * \param dst the destination tile @@ -317,11 +321,11 @@ Index filterParticles (PTile& dst, const PTile& src, const Index* mask) noexcept * \param p predicate function - particles will be copied if p returns true * */ -template -auto filterParticles (PTile& dst, const PTile& src, Pred&& p) noexcept - -> decltype(p(typename PTile::ConstParticleTileDataType(), 0)) +template +auto filterParticles (DstTile& dst, const SrcTile& src, Pred&& p) noexcept + -> decltype(p(typename SrcTile::ConstParticleTileDataType(), 0)) { - using Index = decltype(p(typename PTile::ConstParticleTileDataType(), 0)); + using Index = decltype(p(typename SrcTile::ConstParticleTileDataType(), 0)); auto np = src.numParticles(); Gpu::DeviceVector mask(np); diff --git a/Src/Particle/AMReX_Particles.H b/Src/Particle/AMReX_Particles.H index 54e4dee55c9..1c89f814622 100644 --- a/Src/Particle/AMReX_Particles.H +++ b/Src/Particle/AMReX_Particles.H @@ -113,9 +113,9 @@ struct ParticleInitType std::array real_array_data; std::array int_array_data; }; - + class ParticleContainerBase {}; - + /** * \brief A distributed container for Particles sorted onto the levels, grids, * and tiles of a block-structured AMR hierarachy. @@ -151,7 +151,7 @@ public: using ParticleType = Particle; //! \brief The type of the "SuperParticle" which stored all components in AoS form using SuperParticleType = Particle; - //! \brief The type of the Real data + //! \brief The type of the Real data using RealType = typename Particle::RealType; #ifdef AMREX_SINGLE_PRECISION_PARTICLES @@ -188,7 +188,7 @@ public: m_gdb(nullptr), m_runtime_comps_defined(false), m_num_runtime_real(0), - m_num_runtime_int(0) + m_num_runtime_int(0) { Initialize (); } @@ -199,7 +199,7 @@ public: //! \param gdb A pointer to a ParGDBBase, which contains pointers to the Geometry, //! DistributionMapping, and BoxArray objects that define the AMR hierarchy. Usually, //! this is generated by an AmrCore or AmrLevel object. - //! + //! ParticleContainer (ParGDBBase* gdb) : communicate_real_comp(NArrayReal, true), @@ -997,7 +997,7 @@ public: m_particles[lev][index].define(NumRuntimeRealComps(), NumRuntimeIntComps()); return ParticlesAt(lev, iter); } - + /** * \brief Functions depending the layout of the data. Use with caution. * @@ -1066,22 +1066,21 @@ public: static bool do_tiling; static IntVect tile_size; - void SetLevelDirectoriesCreated(bool tf) { - levelDirectoriesCreated = tf; - } - - void SetUsePrePost(bool tf) { + void SetUsePrePost (bool tf) const { usePrePost = tf; } - bool GetUsePrePost() { + bool GetUsePrePost() const { return usePrePost; } - void SetUseUnlink(bool tf) { + int GetMaxNextIDPrePost () const { return maxnextidPrePost; } + Long GetNParticlesPrePost () const { return nparticlesPrePost; } + + void SetUseUnlink (bool tf) const { doUnlink = tf; } - bool GetUseUnlink() { + bool GetUseUnlink() const { return doUnlink; } @@ -1126,6 +1125,25 @@ public: bool OnSameGrids (int level, const MultiFab& mf) const { return m_gdb->OnSameGrids(level, mf); } + int NumRuntimeRealComps () const { return m_num_runtime_real; } + int NumRuntimeIntComps () const { return m_num_runtime_int; } + + int NumRealComps () const { return NArrayReal + NumRuntimeRealComps(); } + int NumIntComps () const { return NArrayInt + NumRuntimeIntComps() ; } + + //! ---- variables for i/o optimization saved for pre and post checkpoint + mutable bool usePrePost; + mutable bool doUnlink; + int maxnextidPrePost; + mutable int nOutFilesPrePost; + Long nparticlesPrePost; + Vector nParticlesAtLevelPrePost; //!< ---- [level] + mutable Vector> whichPrePost; //!< ---- [level] + mutable Vector> countPrePost; //!< ---- [level] + mutable Vector> wherePrePost; //!< ---- [level] + mutable std::string HdrFileNamePrePost; + mutable Vector filePrefixPrePost; + protected: mutable amrex::Vector neighbor_procs; @@ -1160,11 +1178,14 @@ protected: bool EnforcePeriodicWhere (ParticleType& prt, ParticleLocData& pld, int lev_min = 0, int lev_max = -1, int local_grid=-1) const; +public: void - WriteParticles (int level, std::ofstream& ofs, int fnum, - Vector& which, Vector& count, Vector& where, - const Vector& write_real_comp, const Vector& write_int_comp, - const Vector, Gpu::DeviceVector>>& particle_io_flags) const; + WriteParticles (int level, std::ofstream& ofs, int fnum, + Vector& which, Vector& count, Vector& where, + const Vector& write_real_comp, const Vector& write_int_comp, + const Vector, Gpu::DeviceVector>>& particle_io_flags) const; +protected: + #ifdef AMREX_USE_HDF5 void WriteParticlesHDF5 ( hid_t grp, int level, Vector& count, Vector& where ) const; @@ -1174,7 +1195,7 @@ void ReadParticlesHDF5 (hsize_t offset, hsize_t cnt, int grd, int lev, hid_t int template void ReadParticles (int cnt, int grd, int lev, std::ifstream& ifs, int finest_level_in_file); - + void SetParticleSize (); void BuildRedistributeMask(int lev, int nghost=1) const; @@ -1189,32 +1210,12 @@ void ReadParticlesHDF5 (hsize_t offset, hsize_t cnt, int grd, int lev, hid_t int ParGDBBase* m_gdb; ParGDB m_gdb_object; - //! ---- variables for i/o optimization saved for pre and post checkpoint - bool levelDirectoriesCreated; - bool usePrePost; - bool doUnlink; - int maxnextidPrePost; - mutable int nOutFilesPrePost; - Long nparticlesPrePost; - Vector nParticlesAtLevelPrePost; //!< ---- [level] - mutable Vector> whichPrePost; //!< ---- [level] - mutable Vector> countPrePost; //!< ---- [level] - mutable Vector> wherePrePost; //!< ---- [level] - mutable std::string HdrFileNamePrePost; - mutable Vector filePrefixPrePost; - DenseBins m_bins; - + #ifdef AMREX_USE_GPU mutable AmrParticleLocator > m_particle_locator; #endif - int NumRuntimeRealComps () const { return m_num_runtime_real; } - int NumRuntimeIntComps () const { return m_num_runtime_int; } - - int NumRealComps () const { return NArrayReal + NumRuntimeRealComps(); } - int NumIntComps () const { return NArrayInt + NumRuntimeIntComps() ; } - private: virtual void particlePostLocate(ParticleType& /*p*/, const ParticleLocData& /*pld*/, @@ -1234,7 +1235,7 @@ private: bool m_runtime_comps_defined; int m_num_runtime_real; int m_num_runtime_int; - + size_t particle_size, superparticle_size; int num_real_comm_comps, num_int_comm_comps; Vector m_particles; diff --git a/Src/Particle/AMReX_WriteBinaryParticleData.H b/Src/Particle/AMReX_WriteBinaryParticleData.H new file mode 100644 index 00000000000..7b61438870b --- /dev/null +++ b/Src/Particle/AMReX_WriteBinaryParticleData.H @@ -0,0 +1,720 @@ +#ifndef AMREX_WRITE_BINARY_PARTICLE_DATA_H +#define AMREX_WRITE_BINARY_PARTICLE_DATA_H + +#include +#include + +// note - namespace + +struct KeepValidFilter +{ + template + AMREX_GPU_HOST_DEVICE + int operator() (const SrcData& src, int i) const noexcept + { + return (src.m_aos[i].id() > 0); + } +}; + +template +std::size_t PSizeInFile (const Vector& wrc, const Vector& wic) +{ + std::size_t rsize = sizeof(ParticleReal)*std::accumulate(wrc.begin(), wrc.end(), 0); + std::size_t isize = sizeof(int)*std::accumulate(wic.begin(), wic.end(), 0); + return rsize + isize + AMREX_SPACEDIM*sizeof(ParticleReal) + 2*sizeof(int); +} + +template ::value, int> foo = 0> +void WriteBinaryParticleDataSync (PC const& pc, + const std::string& dir, const std::string& name, + const Vector& write_real_comp, + const Vector& write_int_comp, + const Vector& real_comp_names, + const Vector& int_comp_names, + F&& f) +{ + BL_PROFILE("WriteBinaryParticleData()"); + AMREX_ASSERT(pc.OK()); + + AMREX_ASSERT(sizeof(typename PC::ParticleType::RealType) == 4 || + sizeof(typename PC::ParticleType::RealType) == 8); + + constexpr int NStructReal = PC::NStructReal; + constexpr int NStructInt = PC::NStructInt; + + const int NProcs = ParallelDescriptor::NProcs(); + const int IOProcNumber = ParallelDescriptor::IOProcessorNumber(); + + AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal); + AMREX_ALWAYS_ASSERT( int_comp_names.size() == pc.NumIntComps() + NStructInt); + + std::string pdir = dir; + if ( not pdir.empty() and pdir[pdir.size()-1] != '/') pdir += '/'; + pdir += name; + + if (ParallelDescriptor::IOProcessor()) + { + if ( ! amrex::UtilCreateDirectory(pdir, 0755)) + { + amrex::CreateDirectoryFailed(pdir); + } + } + ParallelDescriptor::Barrier(); + + std::ofstream HdrFile; + + Long nparticles = 0; + int maxnextid; + + // evaluate f for every particle to determine which ones to output + Vector, Gpu::DeviceVector > > particle_io_flags(pc.GetParticles().size()); + for (int lev = 0; lev < pc.GetParticles().size(); lev++) + { + const auto& pmap = pc.GetParticles(lev); + for (const auto& kv : pmap) + { + const auto ptd = kv.second.getConstParticleTileData(); + const auto np = kv.second.numParticles(); + particle_io_flags[lev][kv.first].resize(np, 0); + auto pflags = particle_io_flags[lev][kv.first].data(); + AMREX_HOST_DEVICE_FOR_1D( np, k, + { + const auto p = ptd.getSuperParticle(k); + pflags[k] = f(p); + }); + } + } + + Gpu::Device::synchronize(); + + if(pc.GetUsePrePost()) + { + nparticles = pc.GetNParticlesPrePost(); + maxnextid = pc.GetMaxNextIDPrePost(); + } + else + { + nparticles = 0; + maxnextid = PC::ParticleType::NextID(); + + for (int lev = 0; lev < pc.GetParticles().size(); lev++) + { + const auto& pmap = pc.GetParticles(lev); + for (const auto& kv : pmap) + { + const auto& pflags = particle_io_flags[lev][kv.first]; + for (int k = 0; k < kv.second.numParticles(); ++k) + { + if (pflags[k]) nparticles++; + } + } + } + + ParallelDescriptor::ReduceLongSum(nparticles, IOProcNumber); + PC::ParticleType::NextID(maxnextid); + ParallelDescriptor::ReduceIntMax(maxnextid, IOProcNumber); + } + + if (ParallelDescriptor::IOProcessor()) + { + std::string HdrFileName = pdir; + + if ( ! HdrFileName.empty() && HdrFileName[HdrFileName.size()-1] != '/') + HdrFileName += '/'; + + HdrFileName += "Header"; + pc.HdrFileNamePrePost = HdrFileName; + + HdrFile.open(HdrFileName.c_str(), std::ios::out|std::ios::trunc); + + if ( ! HdrFile.good()) amrex::FileOpenFailed(HdrFileName); + + // + // First thing written is our Checkpoint/Restart version string. + // We append "_single" or "_double" to the version string indicating + // whether we're using "float" or "double" floating point data in the + // particles so that we can Restart from the checkpoint files. + // + if (sizeof(typename PC::ParticleType::RealType) == 4) + { + HdrFile << PC::ParticleType::Version() << "_single" << '\n'; + } + else + { + HdrFile << PC::ParticleType::Version() << "_double" << '\n'; + } + + int num_output_real = 0; + for (int i = 0; i < pc.NumRealComps() + NStructReal; ++i) + if (write_real_comp[i]) ++num_output_real; + + int num_output_int = 0; + for (int i = 0; i < pc.NumIntComps() + NStructInt; ++i) + if (write_int_comp[i]) ++num_output_int; + + // AMREX_SPACEDIM and N for sanity checking. + HdrFile << AMREX_SPACEDIM << '\n'; + + // The number of extra real parameters + HdrFile << num_output_real << '\n'; + + // Real component names + for (int i = 0; i < NStructReal + pc.NumRealComps(); ++i ) + if (write_real_comp[i]) HdrFile << real_comp_names[i] << '\n'; + + // The number of extra int parameters + HdrFile << num_output_int << '\n'; + + // int component names + for (int i = 0; i < NStructInt + pc.NumIntComps(); ++i ) + if (write_int_comp[i]) HdrFile << int_comp_names[i] << '\n'; + + bool is_checkpoint = true; // legacy + HdrFile << is_checkpoint << '\n'; + + // The total number of particles. + HdrFile << nparticles << '\n'; + + // The value of nextid that we need to restore on restart. + HdrFile << maxnextid << '\n'; + + // Then the finest level of the AMR hierarchy. + HdrFile << pc.finestLevel() << '\n'; + + // Then the number of grids at each level. + for (int lev = 0; lev <= pc.finestLevel(); lev++) + HdrFile << pc.ParticleBoxArray(lev).size() << '\n'; + } + + // We want to write the data out in parallel. + // We'll allow up to nOutFiles active writers at a time. + int nOutFiles(256); + + ParmParse pp("particles"); + pp.query("particles_nfiles",nOutFiles); + if(nOutFiles == -1) nOutFiles = NProcs; + nOutFiles = std::max(1, std::min(nOutFiles,NProcs)); + pc.nOutFilesPrePost = nOutFiles; + + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + bool gotsome; + if(pc.usePrePost) + { + gotsome = (pc.nParticlesAtLevelPrePost[lev] > 0); + } + else + { + gotsome = (pc.NumberOfParticlesAtLevel(lev) > 0); + } + + // We store the particles at each level in their own subdirectory. + std::string LevelDir = pdir; + + if (gotsome) + { + if ( ! LevelDir.empty() && LevelDir[LevelDir.size()-1] != '/') LevelDir += '/'; + + LevelDir = amrex::Concatenate(LevelDir + "Level_", lev, 1); + + if (ParallelDescriptor::IOProcessor()) + if ( ! amrex::UtilCreateDirectory(LevelDir, 0755)) + amrex::CreateDirectoryFailed(LevelDir); + ParallelDescriptor::Barrier(); + } + + // Write out the header for each particle + if (gotsome and ParallelDescriptor::IOProcessor()) { + std::string HeaderFileName = LevelDir; + HeaderFileName += "/Particle_H"; + std::ofstream ParticleHeader(HeaderFileName); + + pc.ParticleBoxArray(lev).writeOn(ParticleHeader); + ParticleHeader << '\n'; + + ParticleHeader.flush(); + ParticleHeader.close(); + } + + MFInfo info; + info.SetAlloc(false); + MultiFab state(pc.ParticleBoxArray(lev), + pc.ParticleDistributionMap(lev), + 1,0,info); + + // We eventually want to write out the file name and the offset + // into that file into which each grid of particles is written. + Vector which(state.size(),0); + Vector count(state.size(),0); + Vector where(state.size(),0); + + std::string filePrefix(LevelDir); + filePrefix += '/'; + filePrefix += PC::ParticleType::DataPrefix(); + if(pc.usePrePost) { + pc.filePrefixPrePost[lev] = filePrefix; + } + bool groupSets(false), setBuf(true); + + if (gotsome) + { + for(NFilesIter nfi(nOutFiles, filePrefix, groupSets, setBuf); nfi.ReadyToWrite(); ++nfi) + { + std::ofstream& myStream = (std::ofstream&) nfi.Stream(); + pc.WriteParticles(lev, myStream, nfi.FileNumber(), which, count, where, + write_real_comp, write_int_comp, particle_io_flags); + } + + if(pc.usePrePost) { + pc.whichPrePost[lev] = which; + pc.countPrePost[lev] = count; + pc.wherePrePost[lev] = where; + } else { + ParallelDescriptor::ReduceIntSum (which.dataPtr(), which.size(), IOProcNumber); + ParallelDescriptor::ReduceIntSum (count.dataPtr(), count.size(), IOProcNumber); + ParallelDescriptor::ReduceLongSum(where.dataPtr(), where.size(), IOProcNumber); + } + } + + if (ParallelDescriptor::IOProcessor()) + { + if(pc.GetUsePrePost()) { + // ---- write to the header and unlink in CheckpointPost + } else { + for (int j = 0; j < state.size(); j++) + { + HdrFile << which[j] << ' ' << count[j] << ' ' << where[j] << '\n'; + } + + if (gotsome && pc.doUnlink) + { + // Unlink any zero-length data files. + Vector cnt(nOutFiles,0); + + for (int i = 0, N=count.size(); i < N; i++) { + cnt[which[i]] += count[i]; + } + + for (int i = 0, N=cnt.size(); i < N; i++) + { + if (cnt[i] == 0) + { + std::string FullFileName = NFilesIter::FileName(i, filePrefix); + FileSystem::Remove(FullFileName); + } + } + } + } + } + } + + if (ParallelDescriptor::IOProcessor()) + { + HdrFile.flush(); + HdrFile.close(); + if ( ! HdrFile.good()) + { + amrex::Abort("ParticleContainer::Checkpoint(): problem writing HdrFile"); + } + } +} + +template ::value, int> foo = 0> +void WriteBinaryParticleDataAsync (PC const& pc, + const std::string& dir, const std::string& name, + const Vector& write_real_comp, + const Vector& write_int_comp, + const Vector& real_comp_names, + const Vector& int_comp_names) +{ + BL_PROFILE("WriteBinaryParticleDataAsync"); + AMREX_ASSERT(pc.OK()); + + AMREX_ASSERT(sizeof(typename PC::ParticleType::RealType) == 4 || + sizeof(typename PC::ParticleType::RealType) == 8); + + constexpr int NStructReal = PC::NStructReal; + constexpr int NStructInt = PC::NStructInt; + constexpr int NArrayReal = PC::NArrayReal; + constexpr int NArrayInt = PC::NArrayInt; + + const int MyProc = ParallelDescriptor::MyProc(); + const int NProcs = ParallelDescriptor::NProcs(); + const int IOProcNumber = NProcs - 1; + + AMREX_ALWAYS_ASSERT(real_comp_names.size() == pc.NumRealComps() + NStructReal); + AMREX_ALWAYS_ASSERT( int_comp_names.size() == pc.NumIntComps() + NStructInt); + + Vector > np_per_grid_local(pc.finestLevel()+1); + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + np_per_grid_local[lev].define(pc.ParticleBoxArray(lev), pc.ParticleDistributionMap(lev)); + for (MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + const auto& ptile = pc.ParticlesAt(lev, mfi); + const auto& aos = ptile.GetArrayOfStructs(); + const auto pstruct = aos().dataPtr(); + const int np = ptile.numParticles(); + + ReduceOps reduce_op; + ReduceData reduce_data(reduce_op); + using ReduceTuple = typename decltype(reduce_data)::Type; + + reduce_op.eval(np, reduce_data, + [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple + { + return (pstruct[i].id() > 0) ? 1 : 0; + }); + + int np_valid = amrex::get<0>(reduce_data.value()); + np_per_grid_local[lev][gid] += np_valid; + } + } + + Vector > np_per_grid_global(pc.finestLevel()+1); + Long total_np = 0; + Vector np_per_level(pc.finestLevel()+1); + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + np_per_grid_global[lev].resize(np_per_grid_local[lev].size()); + ParallelDescriptor::GatherLayoutDataToVector(np_per_grid_local[lev], + np_per_grid_global[lev], + IOProcNumber); + np_per_level[lev] = std::accumulate(np_per_grid_global[lev].begin(), + np_per_grid_global[lev].end(), 0L); + total_np += np_per_level[lev]; + } + + std::string pdir = dir; + if ( not pdir.empty() and pdir[pdir.size()-1] != '/') pdir += '/'; + pdir += name; + + if (MyProc == IOProcNumber) + { + if ( ! amrex::UtilCreateDirectory(pdir, 0755)) + { + amrex::CreateDirectoryFailed(pdir); + } + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + std::string LevelDir = pdir; + bool gotsome = np_per_level[lev]; + + if (gotsome) + { + if ( ! LevelDir.empty() && LevelDir[LevelDir.size()-1] != '/') LevelDir += '/'; + + LevelDir = amrex::Concatenate(LevelDir + "Level_", lev, 1); + + if ( ! amrex::UtilCreateDirectory(LevelDir, 0755)) + { + amrex::CreateDirectoryFailed(LevelDir); + } + + std::string HeaderFileName = LevelDir; + HeaderFileName += "/Particle_H"; + std::ofstream ParticleHeader(HeaderFileName); + + pc.ParticleBoxArray(lev).writeOn(ParticleHeader); + ParticleHeader << '\n'; + + ParticleHeader.flush(); + ParticleHeader.close(); + } + } + } + ParallelDescriptor::Barrier(); + + int maxnextid = PC::ParticleType::NextID(); + ParallelDescriptor::ReduceIntMax(maxnextid, IOProcNumber); + + Vector np_on_rank(NProcs, 0L); + std::size_t psize = PSizeInFile(write_real_comp, write_int_comp); + Vector rank_start_offset(NProcs); + if (MyProc == IOProcNumber) + { + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + for (int k = 0; k < pc.ParticleBoxArray(lev).size(); ++k) + { + int rank = pc.ParticleDistributionMap(lev)[k]; + np_on_rank[rank] += np_per_grid_global[lev][k]; + } + } + + for (int ip = 0; ip < NProcs; ++ip) + { + auto info = AsyncOut::GetWriteInfo(ip); + rank_start_offset[ip] = (info.ispot == 0) ? 0 : rank_start_offset[ip-1] + np_on_rank[ip-1]*psize; + } + } + + // make tmp particle tiles in pinned memory to write + using PinnedPTile = ParticleTile; + auto myptiles = std::make_shared,PinnedPTile> > >(); + myptiles->resize(pc.finestLevel()+1); + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + for (MFIter mfi = pc.MakeMFIter(lev); mfi.isValid(); ++mfi) + { + auto& new_ptile = (*myptiles)[lev][std::make_pair(mfi.index(), + mfi.LocalTileIndex())]; + + if (np_per_grid_local[lev][mfi.index()] > 0) + { + const auto& ptile = pc.ParticlesAt(lev, mfi); + const auto& aos = ptile.GetArrayOfStructs(); + const auto pstruct = aos().dataPtr(); + new_ptile.resize(np_per_grid_local[lev][mfi.index()]); + amrex::filterParticles(new_ptile, ptile, KeepValidFilter()); + } + } + } + + int finest_level = pc.finestLevel(); + Vector bas; + Vector dms; + for (int lev = 0; lev <= pc.finestLevel(); lev++) + { + bas.push_back(pc.ParticleBoxArray(lev)); + dms.push_back(pc.ParticleDistributionMap(lev)); + } + + int nrc = pc.NumRealComps(); + int nic = pc.NumIntComps(); + + auto RD = pc.ParticleRealDescriptor; + + AsyncOut::Submit([=] () + { + if (MyProc == IOProcNumber) + { + std::string HdrFileName = pdir; + std::ofstream HdrFile; + + if ( ! HdrFileName.empty() && HdrFileName[HdrFileName.size()-1] != '/') + HdrFileName += '/'; + + HdrFileName += "Header"; + + HdrFile.open(HdrFileName.c_str(), std::ios::out|std::ios::trunc); + + if ( ! HdrFile.good()) amrex::FileOpenFailed(HdrFileName); + + if (sizeof(typename PC::ParticleType) == 4) + { + HdrFile << PC::ParticleType::Version() << "_single" << '\n'; + } + else + { + HdrFile << PC::ParticleType::Version() << "_double" << '\n'; + } + + int num_output_real = 0; + for (int i = 0; i < nrc + NStructReal; ++i) + if (write_real_comp[i]) ++num_output_real; + + int num_output_int = 0; + for (int i = 0; i < nic + NStructInt; ++i) + if (write_int_comp[i]) ++num_output_int; + + // AMREX_SPACEDIM and N for sanity checking. + HdrFile << AMREX_SPACEDIM << '\n'; + + // The number of extra real parameters + HdrFile << num_output_real << '\n'; + + // Real component names + for (int i = 0; i < NStructReal + nrc; ++i ) + if (write_real_comp[i]) HdrFile << real_comp_names[i] << '\n'; + + // The number of extra int parameters + HdrFile << num_output_int << '\n'; + + // int component names + for (int i = 0; i < NStructInt + nic; ++i ) + if (write_int_comp[i]) HdrFile << int_comp_names[i] << '\n'; + + bool is_checkpoint = true; // legacy + HdrFile << is_checkpoint << '\n'; + + // The total number of particles. + HdrFile << total_np << '\n'; + + // The value of nextid that we need to restore on restart. + HdrFile << maxnextid << '\n'; + + // Then the finest level of the AMR hierarchy. + HdrFile << finest_level << '\n'; + + // Then the number of grids at each level. + for (int lev = 0; lev <= finest_level; lev++) + HdrFile << dms[lev].size() << '\n'; + + for (int lev = 0; lev <= finest_level; lev++) + { + Vector grid_offset(NProcs, 0); + for (int k = 0; k < bas[lev].size(); ++k) + { + int rank = dms[lev][k]; + auto info = AsyncOut::GetWriteInfo(rank); + HdrFile << info.ifile << ' ' + << np_per_grid_global[lev][k] << ' ' + << grid_offset[rank] + rank_start_offset[rank] << '\n'; + grid_offset[rank] += np_per_grid_global[lev][k]*psize; + } + } + + HdrFile.flush(); + HdrFile.close(); + if ( ! HdrFile.good()) + { + amrex::Abort("ParticleContainer::Checkpoint(): problem writing HdrFile"); + } + } + + AsyncOut::Wait(); // Wait for my turn + + for (int lev = 0; lev <= finest_level; lev++) + { + // For a each grid, the tiles it contains + std::map > tile_map; + + for (const auto& kv : (*myptiles)[lev]) + { + const int grid = kv.first.first; + const int tile = kv.first.second; + tile_map[grid].push_back(tile); + } + + std::string LevelDir = pdir; + if ( ! LevelDir.empty() && LevelDir[LevelDir.size()-1] != '/') LevelDir += '/'; + LevelDir = amrex::Concatenate(LevelDir + "Level_", lev, 1); + std::string filePrefix(LevelDir); + filePrefix += '/'; + filePrefix += PC::ParticleType::DataPrefix(); + auto info = AsyncOut::GetWriteInfo(MyProc); + std::string file_name = amrex::Concatenate(filePrefix, info.ifile, 5); + std::ofstream ofs; + ofs.open(file_name.c_str(), (info.ispot == 0) ? (std::ios::binary | std::ios::trunc) + : (std::ios::binary | std::ios::app)); + + for (int k = 0; k < bas[lev].size(); ++k) + { + int rank = dms[lev][k]; + if (rank != MyProc) continue; + const int grid = k; + if (np_per_grid_local[lev][grid] == 0) continue; + + // First write out the integer data in binary. + int num_output_int = 0; + for (int i = 0; i < nic + NStructInt; ++i) + if (write_int_comp[i]) ++num_output_int; + + const int iChunkSize = 2 + num_output_int; + Vector istuff(np_per_grid_local[lev][grid]*iChunkSize); + int* iptr = istuff.dataPtr(); + + for (unsigned i = 0; i < tile_map[grid].size(); i++) { + auto ptile_index = std::make_pair(grid, tile_map[grid][i]); + const auto& pbox = (*myptiles)[lev][ptile_index]; + for (int pindex = 0; + pindex < pbox.GetArrayOfStructs().numParticles(); ++pindex) + { + const auto& aos = pbox.GetArrayOfStructs(); + const auto& p = aos[pindex]; + + if (p.id() <= 0) continue; + + // always write these + *iptr = p.id(); ++iptr; + *iptr = p.cpu(); ++iptr; + + // optionally write these + for (int j = 0; j < NStructInt; j++) + { + if (write_int_comp[j]) + { + *iptr = p.idata(j); + ++iptr; + } + } + + const auto& soa = pbox.GetStructOfArrays(); + for (int j = 0; j < nic; j++) + { + if (write_int_comp[NStructInt+j]) + { + *iptr = soa.GetIntData(j)[pindex]; + ++iptr; + } + } + } + } + + writeIntData(istuff.dataPtr(), istuff.size(), ofs); + ofs.flush(); // Some systems require this flush() (probably due to a bug) + + // Write the Real data in binary. + int num_output_real = 0; + for (int i = 0; i < nrc + NStructReal; ++i) + if (write_real_comp[i]) ++num_output_real; + + const int rChunkSize = AMREX_SPACEDIM + num_output_real; + Vector rstuff(np_per_grid_local[lev][grid]*rChunkSize); + typename PC::ParticleType::RealType* rptr = rstuff.dataPtr(); + + for (unsigned i = 0; i < tile_map[grid].size(); i++) { + auto ptile_index = std::make_pair(grid, tile_map[grid][i]); + const auto& pbox = (*myptiles)[lev][ptile_index]; + for (int pindex = 0; + pindex < pbox.GetArrayOfStructs().numParticles(); ++pindex) + { + const auto& aos = pbox.GetArrayOfStructs(); + const auto& p = aos[pindex]; + + if (p.id() <= 0) continue; + + // always write these + for (int j = 0; j < AMREX_SPACEDIM; j++) rptr[j] = p.pos(j); + rptr += AMREX_SPACEDIM; + + // optionally write these + for (int j = 0; j < NStructReal; j++) + { + if (write_real_comp[j]) + { + *rptr = p.rdata(j); + ++rptr; + } + } + + const auto& soa = pbox.GetStructOfArrays(); + for (int j = 0; j < nrc; j++) + { + if (write_real_comp[NStructReal+j]) + { + *rptr = (typename PC::ParticleType::RealType) soa.GetRealData(j)[pindex]; + ++rptr; + } + } + } + } + + if (sizeof(typename PC::ParticleType::RealType) == 4) { + writeFloatData((float*) rstuff.dataPtr(), rstuff.size(), ofs, RD); + } + else if (sizeof(typename PC::ParticleType::RealType) == 8) { + writeDoubleData((double*) rstuff.dataPtr(), rstuff.size(), ofs, RD); + } + + ofs.flush(); // Some systems require this flush() (probably due to a bug) + } + } + AsyncOut::Notify(); // Notify others I am done + }); +} + +#endif diff --git a/Src/Particle/CMakeLists.txt b/Src/Particle/CMakeLists.txt index 8a220e4208c..31315e27ff1 100644 --- a/Src/Particle/CMakeLists.txt +++ b/Src/Particle/CMakeLists.txt @@ -42,4 +42,5 @@ target_sources( amrex AMReX_DenseBins.H AMReX_BinIterator.H AMReX_ParticleTransformation.H + AMReX_WriteBinaryParticleData.H ) diff --git a/Src/Particle/Make.package b/Src/Particle/Make.package index 1987f9a2042..0646d5e07ae 100644 --- a/Src/Particle/Make.package +++ b/Src/Particle/Make.package @@ -10,6 +10,7 @@ C$(AMREX_PARTICLE)_headers += AMReX_ParIter.H AMReX_ParticleMPIUtil.H AMReX_Stru C$(AMREX_PARTICLE)_headers += AMReX_ParticleUtil.H AMReX_NeighborList.H AMReX_ParticleBufferMap.H AMReX_ParticleCommunication.H AMReX_ParticleReduce.H AMReX_ParticleLocator.H C$(AMREX_PARTICLE)_headers += AMReX_NeighborParticlesCPUImpl.H AMReX_NeighborParticlesGPUImpl.H C$(AMREX_PARTICLE)_headers += AMReX_Particle_mod_K.H AMReX_TracerParticle_mod_K.H AMReX_ParticleMesh.H AMReX_ParticleIO.H AMReX_ParticleHDF5.H AMReX_DenseBins.H AMReX_ParticleTransformation.H AMReX_SparseBins.H AMReX_BinIterator.H +C$(AMREX_PARTICLE)_headers += AMReX_WriteBinaryParticleData.H VPATH_LOCATIONS += $(AMREX_HOME)/Src/Particle INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Particle diff --git a/Tests/Particles/AsyncIO/GNUmakefile b/Tests/Particles/AsyncIO/GNUmakefile new file mode 100644 index 00000000000..d227a7b2656 --- /dev/null +++ b/Tests/Particles/AsyncIO/GNUmakefile @@ -0,0 +1,32 @@ +AMREX_HOME ?= ../../../ + +DEBUG = TRUE +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +TINY_PROFILE = TRUE +USE_PARTICLES = TRUE + +PRECISION = DOUBLE + +USE_MPI = TRUE +USE_OMP = FALSE + +MPI_THREAD_MULTIPLE = TRUE + +################################################### + +EBASE = main + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/Boundary/Make.package +include $(AMREX_HOME)/Src/AmrCore/Make.package +include $(AMREX_HOME)/Src/Particle/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/Particles/AsyncIO/Make.package b/Tests/Particles/AsyncIO/Make.package new file mode 100644 index 00000000000..7f43e5e87cb --- /dev/null +++ b/Tests/Particles/AsyncIO/Make.package @@ -0,0 +1,2 @@ +CEXE_sources += main.cpp + diff --git a/Tests/Particles/AsyncIO/inputs b/Tests/Particles/AsyncIO/inputs new file mode 100644 index 00000000000..9d81c23f8a3 --- /dev/null +++ b/Tests/Particles/AsyncIO/inputs @@ -0,0 +1,29 @@ + +# Domain size + +#nx = 32 # number of grid points along the x axis +#ny = 32 # number of grid points along the y axis +#nz = 32 # number of grid points along the z axis + +#nx = 64 # number of grid points along the x axis +#ny = 64 # number of grid points along the y axis +#nz = 64 # number of grid points along the z axis + +nx = 128 # number of grid points along the x axis +ny = 128 # number of grid points along the y axis +nz = 128 # number of grid points along the z axis + +# Maximum allowable size of each subdomain in the problem domain; +# this is used to decompose the domain for parallel calculations. +max_grid_size = 16 + +# Number of particles per cell +nppc = 10 + +# Verbosity +verbose = true # set to true to get more verbosity + +# Number of levels +nlevs = 2 + +amrex.async_io=1 \ No newline at end of file diff --git a/Tests/Particles/AsyncIO/main.cpp b/Tests/Particles/AsyncIO/main.cpp new file mode 100644 index 00000000000..7fffa7a9f3b --- /dev/null +++ b/Tests/Particles/AsyncIO/main.cpp @@ -0,0 +1,312 @@ +#include + +#include +#include +#include +#include +#include + +#include + +using namespace amrex; + +static constexpr int NSR = 1 + AMREX_SPACEDIM; +static constexpr int NSI = 0; +static constexpr int NAR = 0; +static constexpr int NAI = 0; + +struct TestParams { + int nx; + int ny; + int nz; + int max_grid_size; + int nppc; + int nlevs; + bool verbose; +}; + +void get_position_unit_cell(Real* r, const IntVect& nppc, int i_part) +{ + int nx = nppc[0]; +#if AMREX_SPACEDIM > 1 + int ny = nppc[1]; +#else + int ny = 1; +#endif +#if AMREX_SPACEDIM > 2 + int nz = nppc[2]; +#else + int nz = 1; +#endif + + int ix_part = i_part/(ny * nz); + int iy_part = (i_part % (ny * nz)) % ny; + int iz_part = (i_part % (ny * nz)) / ny; + + r[0] = (0.5+ix_part)/nx; + r[1] = (0.5+iy_part)/ny; + r[2] = (0.5+iz_part)/nz; +} + +class MyParticleContainer + : public amrex::ParticleContainer +{ + +public: + + MyParticleContainer (const Vector & a_geom, + const Vector & a_dmap, + const Vector & a_ba, + const Vector & a_rr) + : amrex::ParticleContainer(a_geom, a_dmap, a_ba, a_rr) + {} + + void InitParticles (const amrex::IntVect& a_num_particles_per_cell) + { + BL_PROFILE("InitParticles"); + + const int lev = 0; // only add particles on level 0 + const Real* dx = Geom(lev).CellSize(); + const Real* plo = Geom(lev).ProbLo(); + + const int num_ppc = AMREX_D_TERM( a_num_particles_per_cell[0], + *a_num_particles_per_cell[1], + *a_num_particles_per_cell[2]); + + for(MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + const Box& tile_box = mfi.tilebox(); + + Gpu::HostVector host_particles; + std::array, NAR> host_real; + std::array, NAI> host_int; + + std::vector > host_runtime_real(NumRuntimeRealComps()); + std::vector > host_runtime_int(NumRuntimeIntComps()); + + for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv)) + { + for (int i_part=0; i_part 1 + p.pos(1) = plo[1] + (iv[1] + r[1])*dx[1]; +#endif +#if AMREX_SPACEDIM > 2 + p.pos(2) = plo[2] + (iv[2] + r[2])*dx[2]; +#endif + + for (int i = 0; i < NSR; ++i) p.rdata(i) = p.id(); + for (int i = 0; i < NSI; ++i) p.idata(i) = p.id(); + + host_particles.push_back(p); + for (int i = 0; i < NAR; ++i) + host_real[i].push_back(p.id()); + for (int i = 0; i < NAI; ++i) + host_int[i].push_back(p.id()); + for (int i = 0; i < NumRuntimeRealComps(); ++i) + host_runtime_real[i].push_back(p.id()); + for (int i = 0; i < NumRuntimeIntComps(); ++i) + host_runtime_int[i].push_back(p.id()); + } + } + + auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex()); + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + host_particles.size(); + particle_tile.resize(new_size); + + Gpu::copy(Gpu::hostToDevice, + host_particles.begin(), + host_particles.end(), + particle_tile.GetArrayOfStructs().begin() + old_size); + + auto& soa = particle_tile.GetStructOfArrays(); + for (int i = 0; i < NAR; ++i) + { + Gpu::copy(Gpu::hostToDevice, + host_real[i].begin(), + host_real[i].end(), + soa.GetRealData(i).begin() + old_size); + } + + for (int i = 0; i < NAI; ++i) + { + Gpu::copy(Gpu::hostToDevice, + host_int[i].begin(), + host_int[i].end(), + soa.GetIntData(i).begin() + old_size); + } + for (int i = 0; i < NumRuntimeRealComps(); ++i) + { + Gpu::copy(Gpu::hostToDevice, + host_runtime_real[i].begin(), + host_runtime_real[i].end(), + soa.GetRealData(NAR+i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeIntComps(); ++i) + { + Gpu::copy(Gpu::hostToDevice, + host_runtime_int[i].begin(), + host_runtime_int[i].end(), + soa.GetIntData(NAI+i).begin() + old_size); + } + } + + Redistribute(); + } +}; + +void test_async_io(TestParams& parms) +{ + int nlevs = parms.nlevs; + + RealBox real_box; + for (int n = 0; n < BL_SPACEDIM; n++) { + real_box.setLo(n, 0.0); + real_box.setHi(n, 1.0); + } + + RealBox fine_box; + for (int n = 0; n < BL_SPACEDIM; n++) + { + fine_box.setLo(n,0.25); + fine_box.setHi(n,0.75); + } + + IntVect domain_lo(D_DECL(0 , 0, 0)); + IntVect domain_hi(D_DECL(parms.nx - 1, parms.ny - 1, parms.nz-1)); + const Box domain(domain_lo, domain_hi); + + // Define the refinement ratio + Vector rr(nlevs-1); + for (int lev = 1; lev < nlevs; lev++) + rr[lev-1] = IntVect(AMREX_D_DECL(2, 2, 2)); + + // This sets the boundary conditions to be doubly or triply periodic + int is_per[BL_SPACEDIM]; + for (int i = 0; i < BL_SPACEDIM; i++) + is_per[i] = 1; + + // This defines a Geometry object which is useful for writing the plotfiles + Vector geom(nlevs); + geom[0].define(domain, &real_box, CoordSys::cartesian, is_per); + for (int lev = 1; lev < nlevs; lev++) { + geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]), + &real_box, CoordSys::cartesian, is_per); + } + + Vector ba(nlevs); + ba[0].define(domain); + + if (nlevs > 1) { + int n_fine = parms.nx*rr[0][0]; + IntVect refined_lo(D_DECL(n_fine/4,n_fine/4,n_fine/4)); + IntVect refined_hi(D_DECL(3*n_fine/4-1,3*n_fine/4-1,3*n_fine/4-1)); + + // Build a box for the level 1 domain + Box refined_patch(refined_lo, refined_hi); + ba[1].define(refined_patch); + } + + // break the BoxArrays at both levels into max_grid_size^3 boxes + for (int lev = 0; lev < nlevs; lev++) { + ba[lev].maxSize(parms.max_grid_size); + } + + Vector dmap(nlevs); + + Vector > partMF(nlevs); + Vector > density(nlevs); + Vector > acceleration(nlevs); + for (int lev = 0; lev < nlevs; lev++) { + dmap[lev] = DistributionMapping{ba[lev]}; + density[lev].reset(new MultiFab(ba[lev], dmap[lev], 1, 0)); + density[lev]->setVal(0.0); + acceleration[lev].reset(new MultiFab(ba[lev], dmap[lev], 3, 1)); + acceleration[lev]->setVal(5.0, 1); + } + + MyParticleContainer myPC(geom, dmap, ba, rr); + myPC.SetVerbose(false); + + myPC.InitParticles(IntVect(2, 2, 2)); + + for (int step = 0; step < 4000; ++step) + { + myPC.AssignDensity(0, partMF, 0, 1, nlevs-1); + + for (int lev = 0; lev < nlevs; ++lev) { + MultiFab::Copy(*density[lev], *partMF[lev], 0, 0, 1, 0); + } + + if (step % 1000 == 0) { + Vector varnames; + varnames.push_back("density"); + + Vector particle_varnames; + particle_varnames.push_back("mass"); + + Vector level_steps; + level_steps.push_back(0); + level_steps.push_back(0); + + int output_levs = nlevs; + + Vector outputMF(output_levs); + Vector outputRR(output_levs); + for (int lev = 0; lev < output_levs; ++lev) { + outputMF[lev] = density[lev].get(); + outputRR[lev] = IntVect(D_DECL(2, 2, 2)); + } + + std::string fn = amrex::Concatenate("plt", step, 5); + + WriteMultiLevelPlotfile(fn, output_levs, outputMF, + varnames, geom, 0.0, level_steps, outputRR); + + myPC.WritePlotFile(fn, "particle0"); + } + } +} + +int main(int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + ParmParse pp; + + TestParams parms; + + pp.get("nx", parms.nx); + pp.get("ny", parms.ny); + pp.get("nz", parms.nz); + pp.get("max_grid_size", parms.max_grid_size); + pp.get("nlevs", parms.nlevs); + pp.get("nppc", parms.nppc); + if (parms.nppc < 1 && ParallelDescriptor::IOProcessor()) + amrex::Abort("Must specify at least one particle per cell"); + + parms.verbose = false; + pp.query("verbose", parms.verbose); + + if (parms.verbose && ParallelDescriptor::IOProcessor()) { + std::cout << std::endl; + std::cout << "Number of particles per cell : "; + std::cout << parms.nppc << std::endl; + std::cout << "Size of domain : "; + std::cout << "Num levels: "; + std::cout << parms.nlevs << std::endl; + std::cout << parms.nx << " " << parms.ny << " " << parms.nz << std::endl; + } + + test_async_io(parms); + + amrex::Finalize(); +}