diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index a9a475101c..dca96f4155 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -166,7 +166,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release ` -DAMReX_GPU_BACKEND=CUDA ` -DAMReX_CUDA_ARCH="8.0" ` - -DAMReX_ENABLE_TESTS=OFF ` + -DAMReX_ENABLE_TESTS=ON ` -DAMReX_EB=OFF ` -DAMReX_FFT=OFF ` -DAMReX_LINEAR_SOLVERS=OFF ` diff --git a/Src/AmrCore/AMReX_FillPatchUtil_I.H b/Src/AmrCore/AMReX_FillPatchUtil_I.H index 0c4f333935..bf6684bda1 100644 --- a/Src/AmrCore/AMReX_FillPatchUtil_I.H +++ b/Src/AmrCore/AMReX_FillPatchUtil_I.H @@ -142,22 +142,14 @@ FillPatchSingleLevel (MF& mf, IntVect const& nghost, Real time, if (time == t0) { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D ( bx, ncomp, i, j, k, n, -#endif { dfab(i,j,k,n+destcomp) = sfab0(i,j,k,n+scomp); }); } else if (time == t1) { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D ( bx, ncomp, i, j, k, n, -#endif { dfab(i,j,k,n+destcomp) = sfab1(i,j,k,n+scomp); }); @@ -166,11 +158,7 @@ FillPatchSingleLevel (MF& mf, IntVect const& nghost, Real time, { Real alpha = (t1-time)/(t1-t0); Real beta = (time-t0)/(t1-t0); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D ( bx, ncomp, i, j, k, n, -#endif { dfab(i,j,k,n+destcomp) = alpha*sfab0(i,j,k,n+scomp) + beta*sfab1(i,j,k,n+scomp); @@ -178,11 +166,7 @@ FillPatchSingleLevel (MF& mf, IntVect const& nghost, Real time, } else { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D ( bx, ncomp, i, j, k, n, -#endif { dfab(i,j,k,n+destcomp) = sfab0(i,j,k,n+scomp); }); diff --git a/Src/AmrCore/AMReX_TagBox.cpp b/Src/AmrCore/AMReX_TagBox.cpp index b3071c7e29..8f69b54a6a 100644 --- a/Src/AmrCore/AMReX_TagBox.cpp +++ b/Src/AmrCore/AMReX_TagBox.cpp @@ -43,11 +43,7 @@ TagBox::coarsen (const IntVect& ratio, const Box& cbox) noexcept Dim3 r{1,1,1}; AMREX_D_TERM(r.x = ratio[0];, r.y = ratio[1];, r.z = ratio[2]); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(cbox, [=] AMREX_GPU_DEVICE (int i, int j, int k) -#else AMREX_HOST_DEVICE_FOR_3D(cbox, i, j, k, -#endif { TagType t = TagBox::CLEAR; for (int koff = 0; koff < r.z; ++koff) { @@ -87,11 +83,7 @@ TagBox::buffer (const IntVect& a_nbuff, const IntVect& a_nwid) noexcept Box const& interiorplusbuf = amrex::grow(interior, a_nbuff); const auto lo = amrex::lbound(interiorplusbuf); const auto hi = amrex::ubound(interiorplusbuf); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(interiorplusbuf, [=] AMREX_GPU_DEVICE (int i, int j, int k) -#else AMREX_HOST_DEVICE_FOR_3D(interiorplusbuf, i, j, k, -#endif { if (a(i,j,k) == TagBox::CLEAR) { bool to_buf = false; diff --git a/Src/Base/AMReX_FBI.H b/Src/Base/AMReX_FBI.H index 71576bcb8c..e20f8bb4b7 100644 --- a/Src/Base/AMReX_FBI.H +++ b/Src/Base/AMReX_FBI.H @@ -422,11 +422,7 @@ FabArray::FB_local_copy_cuda_graph_1 (const FB& TheFB, int scomp, int ncomp { const auto offset = tag.offset.dim3(); CopyMemory* cmem = TheFB.m_localCopy.getDevicePtr(idx++); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(tag.dbox, [=] AMREX_GPU_DEVICE (int i, int j, int k) -#else AMREX_HOST_DEVICE_FOR_3D (tag.dbox, i, j, k, -#endif { // Build the Array4's. auto const dst = cmem->getDst(); @@ -511,11 +507,7 @@ FabArray::FB_local_copy_cuda_graph_n (const FB& TheFB, int scomp, int ncomp const Dim3 offset = tag.offset.dim3(); CopyMemory* cmem = TheFB.m_localCopy.getDevicePtr(idx++); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(tag.dbox, [=] AMREX_GPU_DEVICE (int i, int j, int k) -#else AMREX_HOST_DEVICE_FOR_3D(tag.dbox, i, j, k, -#endif { auto const dst = cmem->getDst(); auto const src = cmem->getSrc(); @@ -601,11 +593,7 @@ FabArray::FB_pack_send_buffer_cuda_graph (const FB& TheFB, int scomp, int n { const Box& bx = tag.sbox; CopyMemory* cmem = TheFB.m_copyToBuffer.getDevicePtr(idx++); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int ii, int jj, int kk) -#else AMREX_HOST_DEVICE_FOR_3D (bx, ii, jj, kk, -#endif { auto const pfab = cmem->getDst(); auto const sfab = cmem->getSrc(); @@ -697,11 +685,7 @@ FabArray::FB_unpack_recv_buffer_cuda_graph (const FB& TheFB, int dcomp, int for (auto const & tag : tags) { CopyMemory* cmem = TheFB.m_copyFromBuffer.getDevicePtr(idx++); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(tag.dbox, [=] AMREX_GPU_DEVICE (int i, int j, int k) -#else AMREX_HOST_DEVICE_FOR_3D (tag.dbox, i, j, k, -#endif { auto const pfab = cmem->getSrc(); auto const dfab = cmem->getDst(); diff --git a/Src/Base/AMReX_FabArrayUtility.H b/Src/Base/AMReX_FabArrayUtility.H index cdccef8d2a..bfac76c471 100644 --- a/Src/Base/AMReX_FabArrayUtility.H +++ b/Src/Base/AMReX_FabArrayUtility.H @@ -1136,11 +1136,7 @@ Subtract (FabArray& dst, FabArray const& src, int srccomp, int dstcomp { auto const srcFab = src.array(mfi); auto dstFab = dst.array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n, -#endif { dstFab(i,j,k,n+dstcomp) -= srcFab(i,j,k,n+srccomp); }); @@ -1188,11 +1184,7 @@ Multiply (FabArray& dst, FabArray const& src, int srccomp, int dstcomp { auto const srcFab = src.array(mfi); auto dstFab = dst.array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n, -#endif { dstFab(i,j,k,n+dstcomp) *= srcFab(i,j,k,n+srccomp); }); @@ -1240,11 +1232,7 @@ Divide (FabArray& dst, FabArray const& src, int srccomp, int dstcomp, { auto const srcFab = src.array(mfi); auto dstFab = dst.array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n, -#endif { dstFab(i,j,k,n+dstcomp) /= srcFab(i,j,k,n+srccomp); }); @@ -1289,11 +1277,7 @@ Abs (FabArray& fa, int icomp, int numcomp, const IntVect& nghost) if (bx.ok()) { auto const& fab = fa.array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n, -#endif { fab(i,j,k,n+icomp) = std::abs(fab(i,j,k,n+icomp)); }); @@ -1380,11 +1364,7 @@ OverrideSync_nowait (FabArray & fa, FabArray const& msk, const Period const Box& bx = mfi.tilebox(); auto fab = fa.array(mfi); auto const ifab = msk.array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n, -#endif { if (!ifab(i,j,k)) { fab(i,j,k,n) = 0; } }); diff --git a/Src/Base/AMReX_MultiFabUtil.H b/Src/Base/AMReX_MultiFabUtil.H index e1eaa49e23..8c14adb24f 100644 --- a/Src/Base/AMReX_MultiFabUtil.H +++ b/Src/Base/AMReX_MultiFabUtil.H @@ -251,11 +251,7 @@ namespace amrex const Long n = mfi.fabbox().numPts() * mf_in.nComp(); auto * pdst = mf_out[mfi].dataPtr(); auto const* psrc = mf_in [mfi].dataPtr(); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_1D ( n, i, -#endif { pdst[i] = static_cast(psrc[i]); // NOLINT(bugprone-signed-char-misuse) }); @@ -580,20 +576,12 @@ void average_down (const FabArray& S_fine, FabArray& S_crse, Array4 const& finearr = S_fine.const_array(mfi); if (is_cell_centered) { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, -#endif { amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio); }); } else { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, -#endif { amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio); }); @@ -643,20 +631,12 @@ void average_down (const FabArray& S_fine, FabArray& S_crse, // not part of the actual crse multifab which came in. if (is_cell_centered) { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, -#endif { amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio); }); } else { -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, -#endif { amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio); }); @@ -851,22 +831,14 @@ void average_face_to_cellcenter (CMF& cc, int dcomp, auto const& fzarr = fc[2]->const_array(mfi);); #if (AMREX_SPACEDIM == 1) -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_3D( bx, i, j, k, -#endif { GeometryData gd; gd.coord = 0; amrex_avg_fc_to_cc(i,j,k, ccarr, fxarr, dcomp, gd); }); -#else -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) #else AMREX_HOST_DEVICE_PARALLEL_FOR_3D( bx, i, j, k, -#endif { amrex_avg_fc_to_cc(i,j,k, ccarr, AMREX_D_DECL(fxarr,fyarr,fzarr), dcomp); }); @@ -957,11 +929,7 @@ void average_down_faces (const FabArray& fine, FabArray& crse, const Box& bx = mfi.growntilebox(ngcrse); auto const& crsearr = crse.array(mfi); auto const& finearr = fine.const_array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n, -#endif { amrex_avgdown_faces(i,j,k,n, crsearr, finearr, 0, 0, ratio, dir); }); diff --git a/Src/Base/AMReX_NonLocalBCImpl.H b/Src/Base/AMReX_NonLocalBCImpl.H index 9adf95c0c8..d2bf545eab 100644 --- a/Src/Base/AMReX_NonLocalBCImpl.H +++ b/Src/Base/AMReX_NonLocalBCImpl.H @@ -550,11 +550,7 @@ Rotate90 (FabArray& mf, int scomp, int ncomp, IntVect const& nghost, Box co Box const& bx = corner & mfi.fabbox(); if (bx.ok()) { auto const& fab = mf.array(mfi); -#if defined(AMREX_USE_GPU) && defined(_WIN32) - amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) -#else AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx,ncomp,i,j,k,n, -#endif { fab(i,j,k,n) = fab(-i-1,-j-1,k,n); });