Skip to content

Commit

Permalink
Guard some stream syncs
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexanderSinn committed Feb 26, 2025
1 parent 06b4a5b commit 056f54a
Show file tree
Hide file tree
Showing 10 changed files with 105 additions and 35 deletions.
20 changes: 15 additions & 5 deletions Src/AmrCore/AMReX_MFInterpolater.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ MFPCInterp::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf, int fco
fine[box_no](i,j,k,n+fcomp) = crse[box_no](ic,jc,kc,n+ccomp);)
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -202,7 +204,9 @@ MFCellConsLinInterp::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf
});
}

Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -373,7 +377,9 @@ MFCellConsLinMinmaxLimitInterp::interp (MultiFab const& crsemf, int ccomp, Multi
}
});

Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -457,7 +463,9 @@ MFCellBilinear::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf, int
mf_cell_bilin_interp(i,j,k,n, fine[box_no], fcomp, crse[box_no], ccomp, ratio);
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -512,7 +520,9 @@ MFNodeBilinear::interp (MultiFab const& crsemf, int ccomp, MultiFab& finemf, int
mf_nodebilin_interp(i,j,k,n, fine[box_no], fcomp, crse[box_no], ccomp, ratio);
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down
20 changes: 15 additions & 5 deletions Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H
Original file line number Diff line number Diff line change
Expand Up @@ -841,7 +841,9 @@ MLABecLaplacianT<MF>::Fapply (int amrlev, int mglev, MF& out, const MF& in) cons
dxinv, ascalar, bscalar);
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -1031,7 +1033,9 @@ MLABecLaplacianT<MF>::Fsmooth (int amrlev, int mglev, MF& sol, const MF& rhs, in
});
}
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -1271,7 +1275,9 @@ MLABecLaplacianT<MF>::normalize (int amrlev, int mglev, MF& mf) const
AMREX_D_DECL(bxma[box_no],byma[box_no],bzma[box_no]),
dxinv, ascalar, bscalar);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -1383,7 +1389,9 @@ MLABecLaplacianT<MF>::makeNLinOp (int /*grid_size*/) const
ama[box_no](i,j,k,n) = huge_alpha;
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -1434,7 +1442,9 @@ MLABecLaplacianT<MF>::copyNSolveSolution (MF& dst, MF const& src) const
dstma[box_no](i,j,k,n) = RT(0.0);
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down
12 changes: 9 additions & 3 deletions Src/LinearSolvers/MLMG/AMReX_MLCellABecLap.H
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ MLCellABecLapT<MF>::define (const Vector<Geometry>& a_geom,
{
coarsen_overset_mask(i,j,k, crsema[box_no], finema[box_no]);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -263,7 +265,9 @@ MLCellABecLapT<MF>::setDirichletNodesToZero (int amrlev, int mglev, MF& mf) cons
{
if (mskma[bno](i,j,k) == 0) { ma[bno](i,j,k,n) = RT(0.0); }
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}
}

Expand Down Expand Up @@ -639,7 +643,9 @@ MLCellABecLapT<MF>::applyOverset (int amrlev, MF& rhs) const
rhsa[box_no](i,j,k,n) = RT(0.0);
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down
20 changes: 15 additions & 5 deletions Src/LinearSolvers/MLMG/AMReX_MLCurlCurl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ void MLCurlCurl::restriction (int amrlev, int cmglev, MF& crse, MF& fine) const
{
mlcurlcurl_restriction(idim,i,j,k,crsema[bno],finema[bno],dinfo);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}

if (need_parallel_copy) {
crse[idim].ParallelCopy(cfine);
Expand Down Expand Up @@ -228,7 +230,9 @@ void MLCurlCurl::interpolation (int amrlev, int fmglev, MF& fine,
}
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}

void
Expand Down Expand Up @@ -377,15 +381,19 @@ void MLCurlCurl::smooth1D (int amrlev, int mglev, MF& sol, MF const& rhs,
bcx[bno],bcy[bno],bcz[bno],
adxinv,color,dinfo);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else {
ParallelFor( nmf, [=] AMREX_GPU_DEVICE(int bno, int i, int j, int k)
{
mlcurlcurl_1D(i,j,k,ex[bno],ey[bno],ez[bno],
rhsx[bno],rhsy[bno],rhsz[bno],
b,adxinv,color,dinfo);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}
}
#endif
Expand Down Expand Up @@ -450,7 +458,9 @@ void MLCurlCurl::smooth4 (int amrlev, int mglev, MF& sol, MF const& rhs,
});
}
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}
#endif

Expand Down
20 changes: 15 additions & 5 deletions Src/LinearSolvers/MLMG/AMReX_MLNodeABecLaplacian.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ MLNodeABecLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFa
yarr_ma[box_no](i,j,k) = (dmskarr_ma[box_no](i,j,k)) ? Real(0.0)
: alpha*acoef_ma[box_no](i,j,k)*xarr_ma[box_no](i,j,k) - beta*lap;
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}

void
Expand Down Expand Up @@ -145,7 +147,9 @@ MLNodeABecLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiF
acoef_ma[box_no], bcoef_ma[box_no],
dmskarr_ma[box_no], dxinvarr);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
if (m_smooth_num_sweeps > 1) { nodalSync(amrlev, mglev, sol); }
}
#else
Expand Down Expand Up @@ -193,7 +197,9 @@ MLNodeABecLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiF
{
mlndlap_restriction(i,j,k,pcrse_ma[box_no],fine_ma[box_no],msk_ma[box_no]);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}

if (need_parallel_copy) {
crse.ParallelCopy(cfine);
Expand Down Expand Up @@ -225,7 +231,9 @@ MLNodeABecLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, cons
mlndlap_interpadd_aa(i, j, k, fine_ma[box_no], crse_ma[box_no],
sig_ma[box_no], msk_ma[box_no]);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}

void
Expand Down Expand Up @@ -280,7 +288,9 @@ MLNodeABecLaplacian::fixUpResidualMask (int amrlev, iMultiFab& resmsk)
{
if (fmsk[bno](i,j,k) == nodelap_detail::crse_fine_node) { rmsk[bno](i,j,k) = 1; }
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}

void
Expand Down
16 changes: 12 additions & 4 deletions Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,9 @@ MLNodeLaplacian::fixSolvabilityByOffset (int amrlev, int mglev, MultiFab& rhs,
rhs_ma[box_no](i,j,k) -= offset * scale;
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else {
rhs.plus(-offset, 0, 1);
}
Expand Down Expand Up @@ -532,7 +534,9 @@ MLNodeLaplacian::restriction (int amrlev, int cmglev, MultiFab& crse, MultiFab&
mlndlap_restriction_rap(i,j,k,pcrse_ma[box_no],fine_ma[box_no],st_ma[box_no],msk_ma[box_no]);
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -665,7 +669,9 @@ MLNodeLaplacian::interpolation (int amrlev, int fmglev, MultiFab& fine, const Mu
mlndlap_semi_interpadd_aa(i, j, k, fine_ma[box_no], crse_ma[box_no], sig_ma[box_no], msk_ma[box_no], idir);
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -883,7 +889,9 @@ MLNodeLaplacian::normalize (int amrlev, int mglev, MultiFab& mf) const
mlndlap_normalize_aa(i,j,k,ma[box_no],sx_ma[box_no],dmsk_ma[box_no],dxinv);
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down
12 changes: 9 additions & 3 deletions Src/LinearSolvers/MLMG/AMReX_MLNodeLaplacian_misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,9 @@ MLNodeLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const MultiFab& i
#endif
});
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -558,7 +560,9 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab&
}
}

Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
nodalSync(amrlev, mglev, sol);
}
else
Expand Down Expand Up @@ -701,7 +705,9 @@ MLNodeLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const MultiFab&
}
}

Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
}
}

Expand Down
4 changes: 3 additions & 1 deletion Src/LinearSolvers/MLMG/AMReX_MLNodeLinOp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,9 @@ MLNodeLinOp::setDirichletNodesToZero (int amrlev, int mglev, MultiFab& mf) const
{
if (maskma[bno](i,j,k)) { ma[bno](i,j,k,n) = RT(0.0); }
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
#ifdef AMREX_USE_EB
EB_set_covered(mf, 0, ncomp, 0, RT(0.0));
#endif
Expand Down
8 changes: 6 additions & 2 deletions Src/LinearSolvers/MLMG/AMReX_MLNodeTensorLaplacian.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,9 @@ MLNodeTensorLaplacian::Fapply (int amrlev, int mglev, MultiFab& out, const Multi
{
mlndtslap_adotx(i,j,k, out_a[box_no], in_a[box_no], dmsk_a[box_no], s);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
#endif
}

Expand Down Expand Up @@ -261,7 +263,9 @@ MLNodeTensorLaplacian::Fsmooth (int amrlev, int mglev, MultiFab& sol, const Mult
mlndtslap_gauss_seidel(i, j, k, sol_a[box_no], rhs_a[box_no], dmsk_a[box_no], s);
}
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
#endif
}

Expand Down
8 changes: 6 additions & 2 deletions Src/LinearSolvers/MLMG/AMReX_MLPoisson.H
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,9 @@ MLPoissonT<MF>::Fapply (int amrlev, int mglev, MF& out, const MF& in) const
});
}
}
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down Expand Up @@ -333,7 +335,9 @@ MLPoissonT<MF>::normalize (int amrlev, int mglev, MF& mf) const
{
mlpoisson_normalize(i,j,k, ma[box_no], AMREX_D_DECL(dhx,dhy,dhz), dx, probxlo);
});
Gpu::streamSynchronize();
if (!Gpu::inNoSyncRegion()) {
Gpu::streamSynchronize();
}
} else
#endif
{
Expand Down

0 comments on commit 056f54a

Please sign in to comment.