Skip to content

Commit

Permalink
rework the macros
Browse files Browse the repository at this point in the history
  • Loading branch information
WeiqunZhang committed Jan 23, 2025
1 parent 323d997 commit 9d3beb4
Show file tree
Hide file tree
Showing 6 changed files with 116 additions and 121 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,12 @@ jobs:
-DCMAKE_BUILD_TYPE=Release `
-DAMReX_GPU_BACKEND=CUDA `
-DAMReX_CUDA_ARCH="8.0" `
-DAMReX_ENABLE_TESTS=ON `
-DAMReX_ENABLE_TESTS=OFF `
-DAMReX_EB=OFF `
-DAMReX_FFT=OFF `
-DAMReX_LINEAR_SOLVERS=OFF `
-DAMReX_PARTICLES=OFF `
-DAMReX_FORTRAN=OFF `
-DAMReX_MPI=OFF
cmake --build build --config Release -j 4
cmake --build build --config Release --target install
#cmake --build build --config Release --target install
8 changes: 0 additions & 8 deletions Src/Base/AMReX_BaseFab.H
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,7 @@ template <typename T>
std::enable_if_t<!std::is_trivially_default_constructible_v<T>>
placementNew (T* const ptr, Long n)
{
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i)
#else
AMREX_HOST_DEVICE_FOR_1D ( n, i,
#endif
{
new (ptr+i) T;
});
Expand All @@ -127,11 +123,7 @@ template <typename T>
std::enable_if_t<!std::is_trivially_destructible_v<T>>
placementDelete (T* const ptr, Long n)
{
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (Long i)
#else
AMREX_HOST_DEVICE_FOR_1D (n, i,
#endif
{
(ptr+i)->~T();
});
Expand Down
60 changes: 0 additions & 60 deletions Src/Base/AMReX_FabArray.H
Original file line number Diff line number Diff line change
Expand Up @@ -225,11 +225,7 @@ Copy (FabArray<DFAB>& dst, FabArray<SFAB> const& src, int srccomp, int dstcomp,
{
auto const& srcFab = src.const_array(mfi);
auto const& dstFab = dst.array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
#endif
{
dstFab(i,j,k,dstcomp+n) = DT(srcFab(i,j,k,srccomp+n));
});
Expand Down Expand Up @@ -278,11 +274,7 @@ Add (FabArray<FAB>& dst, FabArray<FAB> const& src, int srccomp, int dstcomp, int
{
auto const srcFab = src.array(mfi);
auto dstFab = dst.array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
#endif
{
dstFab(i,j,k,n+dstcomp) += srcFab(i,j,k,n+srccomp);
});
Expand Down Expand Up @@ -2476,11 +2468,7 @@ FabArray<FAB>::setVal (value_type val,
{
const Box& bx = fai.growntilebox(nghost);
auto fab = this->array(fai);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) = val;
});
Expand Down Expand Up @@ -2540,11 +2528,7 @@ FabArray<FAB>::setVal (value_type val,

if (b.ok()) {
auto fab = this->array(fai);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(b, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( b, ncomp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) = val;
});
Expand Down Expand Up @@ -2591,11 +2575,7 @@ FabArray<FAB>::abs (int comp, int ncomp, const IntVect& nghost)
{
const Box& bx = mfi.growntilebox(nghost);
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) = std::abs(fab(i,j,k,n+comp));
});
Expand Down Expand Up @@ -2631,11 +2611,7 @@ FabArray<FAB>::plus (value_type val, int comp, int num_comp, int nghost)
{
const Box& bx = mfi.growntilebox(nghost);
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, num_comp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) += val;
});
Expand Down Expand Up @@ -2674,11 +2650,7 @@ FabArray<FAB>::plus (value_type val, const Box& region, int comp, int num_comp,
const Box& bx = mfi.growntilebox(nghost) & region;
if (bx.ok()) {
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, num_comp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) += val;
});
Expand Down Expand Up @@ -2715,11 +2687,7 @@ FabArray<FAB>::mult (value_type val, int comp, int num_comp, int nghost)
{
const Box& bx = mfi.growntilebox(nghost);
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, num_comp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) *= val;
});
Expand Down Expand Up @@ -2758,11 +2726,7 @@ FabArray<FAB>::mult (value_type val, const Box& region, int comp, int num_comp,
const Box& bx = mfi.growntilebox(nghost) & region;
if (bx.ok()) {
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, num_comp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) *= val;
});
Expand Down Expand Up @@ -2799,11 +2763,7 @@ FabArray<FAB>::invert (value_type numerator, int comp, int num_comp, int nghost)
{
const Box& bx = mfi.growntilebox(nghost);
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, num_comp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) = numerator / fab(i,j,k,n+comp);
});
Expand Down Expand Up @@ -2842,11 +2802,7 @@ FabArray<FAB>::invert (value_type numerator, const Box& region, int comp, int nu
const Box& bx = mfi.growntilebox(nghost) & region;
if (bx.ok()) {
auto fab = this->array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, num_comp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, num_comp, i, j, k, n,
#endif
{
fab(i,j,k,n+comp) = numerator / fab(i,j,k,n+comp);
});
Expand Down Expand Up @@ -2908,11 +2864,7 @@ void FabArray<FAB>::Saxpy (FabArray<FAB>& y, value_type a, FabArray<FAB> const&
if (bx.ok()) {
auto const& xfab = x.const_array(mfi);
auto const& yfab = y.array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
#endif
{
yfab(i,j,k,ycomp+n) += a * xfab(i,j,k,xcomp+n);
});
Expand Down Expand Up @@ -2957,11 +2909,7 @@ FabArray<FAB>::Xpay (FabArray<FAB>& y, value_type a, FabArray<FAB> const& x,
const Box& bx = mfi.growntilebox(nghost);
auto const& xFab = x.const_array(mfi);
auto const& yFab = y.array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, ncomp, i, j, k, n,
#endif
{
yFab(i,j,k,n+ycomp) = xFab(i,j,k,n+xcomp)
+ a * yFab(i,j,k,n+ycomp);
Expand Down Expand Up @@ -3012,11 +2960,7 @@ FabArray<FAB>::LinComb (FabArray<FAB>& dst,
auto const& xfab = x.const_array(mfi);
auto const& yfab = y.const_array(mfi);
auto const& dfab = dst.array(mfi);
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(bx, numcomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_PARALLEL_FOR_4D( bx, numcomp, i, j, k, n,
#endif
{
dfab(i,j,k,dstcomp+n) = a*xfab(i,j,k,xcomp+n) + b*yfab(i,j,k,ycomp+n);
});
Expand Down Expand Up @@ -3412,11 +3356,7 @@ FabArray<FAB>::BuildMask (const Box& phys_domain, const Periodicity& period,
Box const& fbx = mfi.growntilebox();
Box const& gbx = fbx & domain;
Box const& vbx = mfi.validbox();
#if defined(AMREX_USE_GPU) && defined(_WIN32)
amrex::ParallelFor(fbx, ncomp, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n)
#else
AMREX_HOST_DEVICE_FOR_4D(fbx, ncomp, i, j, k, n,
#endif
{
if (vbx.contains(i,j,k)) {
fab(i,j,k,n) = interior;
Expand Down
16 changes: 0 additions & 16 deletions Src/Base/AMReX_GpuLaunch.nolint.H
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,6 @@
#define AMREX_LAUNCH_HOST_DEVICE_LAMBDA_DIM_FLAG(...) AMREX_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3_FLAG(__VA_ARGS__)
#endif

#define AMREX_FOR_1D(...) AMREX_GPU_DEVICE_FOR_1D(__VA_ARGS__)
#define AMREX_FOR_3D(...) AMREX_GPU_DEVICE_FOR_3D(__VA_ARGS__)
#define AMREX_FOR_4D(...) AMREX_GPU_DEVICE_FOR_4D(__VA_ARGS__)

#define AMREX_PARALLEL_FOR_1D(...) AMREX_GPU_DEVICE_PARALLEL_FOR_1D(__VA_ARGS__)
#define AMREX_PARALLEL_FOR_3D(...) AMREX_GPU_DEVICE_PARALLEL_FOR_3D(__VA_ARGS__)
#define AMREX_PARALLEL_FOR_4D(...) AMREX_GPU_DEVICE_PARALLEL_FOR_4D(__VA_ARGS__)

#define AMREX_HOST_DEVICE_FOR_1D(...) AMREX_GPU_HOST_DEVICE_FOR_1D(__VA_ARGS__)
#define AMREX_HOST_DEVICE_FOR_3D(...) AMREX_GPU_HOST_DEVICE_FOR_3D(__VA_ARGS__)
#define AMREX_HOST_DEVICE_FOR_4D(...) AMREX_GPU_HOST_DEVICE_FOR_4D(__VA_ARGS__)

#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(__VA_ARGS__)
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(__VA_ARGS__)
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(__VA_ARGS__)

#ifdef AMREX_USE_GPU

#ifndef AMREX_USE_SYCL
Expand Down
37 changes: 18 additions & 19 deletions Src/Base/AMReX_GpuLaunchMacrosC.nolint.H
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,18 @@
#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_2(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_2(__VA_ARGS__)
#define AMREX_GPU_LAUNCH_DEVICE_LAMBDA_RANGE_3(...) AMREX_GPU_LAUNCH_HOST_DEVICE_LAMBDA_RANGE_3(__VA_ARGS__)

#define AMREX_GPU_FOR_1D_IMPL(n,i,block) \
#define AMREX_FOR_1D_IMPL(n,i,block) \
for (auto i = decltype(n){0}; i < n; ++i) { \
block \
}

#define AMREX_GPU_PARALLEL_FOR_1D_IMPL(n,i,block) \
#define AMREX_PARALLEL_FOR_1D_IMPL(n,i,block) \
AMREX_PRAGMA_SIMD \
for (auto i = decltype(n){0}; i < n; ++i) { \
block \
}

#define AMREX_GPU_FOR_3D_IMPL(box,i,j,k,block) \
#define AMREX_FOR_3D_IMPL(box,i,j,k,block) \
{ \
const auto amrex_i_lo = amrex::lbound(box); \
const auto amrex_i_hi = amrex::ubound(box); \
Expand All @@ -57,7 +57,7 @@
}}} \
}

#define AMREX_GPU_PARALLEL_FOR_3D_IMPL(box,i,j,k,block) \
#define AMREX_PARALLEL_FOR_3D_IMPL(box,i,j,k,block) \
{ \
const auto amrex_i_lo = amrex::lbound(box); \
const auto amrex_i_hi = amrex::ubound(box); \
Expand All @@ -69,7 +69,7 @@
}}} \
}

#define AMREX_GPU_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
#define AMREX_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
{ \
const auto amrex_i_lo = amrex::lbound(box); \
const auto amrex_i_hi = amrex::ubound(box); \
Expand All @@ -81,7 +81,7 @@
}}}} \
}

#define AMREX_GPU_PARALLEL_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
#define AMREX_PARALLEL_FOR_4D_IMPL(box,ncomp,i,j,k,n,block) \
{ \
const auto amrex_i_lo = amrex::lbound(box); \
const auto amrex_i_hi = amrex::ubound(box); \
Expand All @@ -94,19 +94,18 @@
}}}} \
}

#define AMREX_GPU_HOST_DEVICE_FOR_1D(...) AMREX_GPU_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_GPU_DEVICE_FOR_1D(...) AMREX_GPU_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_FOR_1D(...) AMREX_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_FOR_3D(...) AMREX_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_FOR_4D(...) AMREX_FOR_4D_IMPL(__VA_ARGS__);

#define AMREX_GPU_HOST_DEVICE_FOR_3D(...) AMREX_GPU_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_GPU_DEVICE_FOR_3D(...) AMREX_GPU_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_PARALLEL_FOR_1D(...) AMREX_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_PARALLEL_FOR_3D(...) AMREX_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_PARALLEL_FOR_4D(...) AMREX_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);

#define AMREX_GPU_HOST_DEVICE_FOR_4D(...) AMREX_GPU_FOR_4D_IMPL(__VA_ARGS__);
#define AMREX_GPU_DEVICE_FOR_4D(...) AMREX_GPU_FOR_4D_IMPL(__VA_ARGS__);
#define AMREX_HOST_DEVICE_FOR_1D(...) AMREX_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_HOST_DEVICE_FOR_3D(...) AMREX_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_HOST_DEVICE_FOR_4D(...) AMREX_FOR_4D_IMPL(__VA_ARGS__);

#define AMREX_GPU_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_GPU_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_GPU_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);

#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_GPU_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_GPU_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_GPU_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_GPU_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
#define AMREX_HOST_DEVICE_PARALLEL_FOR_1D(...) AMREX_PARALLEL_FOR_1D_IMPL(__VA_ARGS__);
#define AMREX_HOST_DEVICE_PARALLEL_FOR_3D(...) AMREX_PARALLEL_FOR_3D_IMPL(__VA_ARGS__);
#define AMREX_HOST_DEVICE_PARALLEL_FOR_4D(...) AMREX_PARALLEL_FOR_4D_IMPL(__VA_ARGS__);
Loading

0 comments on commit 9d3beb4

Please sign in to comment.