diff --git a/Tests/GPU/CNS/CMakeLists.txt b/Tests/GPU/CNS/CMakeLists.txt index d7644c36743..6f13c092659 100644 --- a/Tests/GPU/CNS/CMakeLists.txt +++ b/Tests/GPU/CNS/CMakeLists.txt @@ -10,7 +10,7 @@ endif () # set(_sources main.cpp CNS_advance.cpp CNS_bcfill.cpp CNSBld.cpp CNS.cpp CNS_derive.cpp CNS_derive.H CNS.H CNS_index_macros.H CNS_io.cpp CNS_K.H CNS_parm.cpp CNS_parm.H CNS_setup.cpp CNS_tagging.H - hydro/CNS_hydro_K.H) + hydro/CNS_hydro_K.H diffusion/CNS_diffusion_K.H) list(TRANSFORM _sources PREPEND Source/) ########################################################################################## diff --git a/Tests/GPU/CNS/Exec/RT/inputs b/Tests/GPU/CNS/Exec/RT/inputs index ac3bbcb7130..e4c8588bf48 100644 --- a/Tests/GPU/CNS/Exec/RT/inputs +++ b/Tests/GPU/CNS/Exec/RT/inputs @@ -20,6 +20,8 @@ cns.hi_bc = 0 0 4 cns.gravity = -1.0 cns.eos_gamma = 1.6666666666 +cns.do_visc = false + cns.cfl = 0.3 # cfl number for hyperbolic system cns.refine_max_dengrad_lev = 10 diff --git a/Tests/GPU/CNS/Exec/RT/inputs-rt b/Tests/GPU/CNS/Exec/RT/inputs-rt index a0c3b458acf..cb8fb58b88e 100644 --- a/Tests/GPU/CNS/Exec/RT/inputs-rt +++ b/Tests/GPU/CNS/Exec/RT/inputs-rt @@ -20,6 +20,8 @@ cns.hi_bc = 0 0 4 cns.gravity = -1.0 cns.eos_gamma = 1.6666666666 +cns.do_visc = false + cns.cfl = 0.3 # cfl number for hyperbolic system cns.refine_max_dengrad_lev = 10 diff --git a/Tests/GPU/CNS/Exec/Sod/inputs b/Tests/GPU/CNS/Exec/Sod/inputs index aa246fcf872..31171261b0b 100644 --- a/Tests/GPU/CNS/Exec/Sod/inputs +++ b/Tests/GPU/CNS/Exec/Sod/inputs @@ -19,6 +19,8 @@ cns.hi_bc = 2 0 0 cns.cfl = 0.3 # cfl number for hyperbolic system +cns.do_visc = false + cns.v = 2 amr.v = 1 diff --git a/Tests/GPU/CNS/Exec/Sod/inputs-rt b/Tests/GPU/CNS/Exec/Sod/inputs-rt index 2e64a224a6c..e90ecf7f04c 100644 --- a/Tests/GPU/CNS/Exec/Sod/inputs-rt +++ b/Tests/GPU/CNS/Exec/Sod/inputs-rt @@ -19,6 +19,8 @@ cns.hi_bc = 2 0 0 cns.cfl = 0.3 # cfl number for hyperbolic system +cns.do_visc = false + cns.v = 2 amr.v = 1 diff --git a/Tests/GPU/CNS/Source/CNS.H b/Tests/GPU/CNS/Source/CNS.H index 69bb07b6287..104e84a181f 100644 --- a/Tests/GPU/CNS/Source/CNS.H +++ b/Tests/GPU/CNS/Source/CNS.H @@ -156,6 +156,9 @@ protected: static int do_reflux; + static bool do_visc; + static bool use_const_visc; + static int refine_max_dengrad_lev; static amrex::Real refine_dengrad; diff --git a/Tests/GPU/CNS/Source/CNS.cpp b/Tests/GPU/CNS/Source/CNS.cpp index aec9b58f0a2..c3b5e2fb600 100644 --- a/Tests/GPU/CNS/Source/CNS.cpp +++ b/Tests/GPU/CNS/Source/CNS.cpp @@ -22,6 +22,9 @@ int CNS::do_reflux = 1; int CNS::refine_max_dengrad_lev = -1; Real CNS::refine_dengrad = 1.0e10; +bool CNS::do_visc = true; // diffusion is on by default +bool CNS::use_const_visc = false; // diffusion does not use constant viscosity by default + Real CNS::gravity = 0.0; CNS::CNS () @@ -98,6 +101,9 @@ CNS::initData () cns_initdata(i, j, k, sfab, geomdata, *lparm, *lprobparm); }); } + + // Compute the initial temperature (will override what was set in initdata) + computeTemp(S_new,0); } void @@ -349,12 +355,37 @@ CNS::read_params () pp.query("do_reflux", do_reflux); + pp.query("do_visc", do_visc); + + if (do_visc) + { + pp.query("use_const_visc",use_const_visc); + if (use_const_visc) + { + pp.get("const_visc_mu",h_parm->const_visc_mu); + pp.get("const_visc_ki",h_parm->const_visc_ki); + pp.get("const_lambda" ,h_parm->const_lambda); + } + } else { + use_const_visc = true; + h_parm->const_visc_mu = 0.0; + h_parm->const_visc_ki = 0.0; + h_parm->const_lambda = 0.0; + } + pp.query("refine_max_dengrad_lev", refine_max_dengrad_lev); pp.query("refine_dengrad", refine_dengrad); pp.query("gravity", gravity); pp.query("eos_gamma", h_parm->eos_gamma); + pp.query("eos_mu" , h_parm->eos_mu); + + pp.query("eos_gamma", h_parm->eos_gamma); + pp.query("eos_mu" , h_parm->eos_mu); + pp.query("Pr" , h_parm->Pr); + pp.query("C_S" , h_parm->C_S); + pp.query("T_S" , h_parm->T_S); h_parm->Initialize(); amrex::Gpu::copy(amrex::Gpu::hostToDevice, h_parm, h_parm+1, d_parm); diff --git a/Tests/GPU/CNS/Source/CNS_advance.cpp b/Tests/GPU/CNS/Source/CNS_advance.cpp index f25083f4e28..c086cac0e9f 100644 --- a/Tests/GPU/CNS/Source/CNS_advance.cpp +++ b/Tests/GPU/CNS/Source/CNS_advance.cpp @@ -1,6 +1,7 @@ #include "CNS.H" #include "CNS_hydro_K.H" +#include "CNS_diffusion_K.H" #include "CNS_K.H" using namespace amrex; @@ -68,6 +69,7 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, const int neqns = 5; const int ncons = 7; const int nprim = 8; + const int ncoef = 3; Array fluxes; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { @@ -77,7 +79,7 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, Parm const* lparm = d_parm; - FArrayBox qtmp, slopetmp; + FArrayBox qtmp, slopetmp, diff_coeff; for (MFIter mfi(S); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); @@ -91,14 +93,40 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, const Box& bxg2 = amrex::grow(bx,2); qtmp.resize(bxg2, nprim); Elixir qeli = qtmp.elixir(); + Elixir dcoeff_eli; auto const& q = qtmp.array(); + if (do_visc) + { + diff_coeff.resize(bxg2, ncoef); + dcoeff_eli = diff_coeff.elixir(); + } + amrex::ParallelFor(bxg2, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { cns_ctoprim(i, j, k, sfab, q, *lparm); }); + if (do_visc) + { + auto const& coefs = diff_coeff.array(); + if(use_const_visc == 1 ) { + amrex::ParallelFor(bxg2, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + cns_constcoef(i, j, k, q, coefs, *lparm); + }); + } else { + amrex::ParallelFor(bxg2, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + cns_diffcoef(i, j, k, q, coefs, *lparm); + }); + } + } + + const Box& bxg1 = amrex::grow(bx,1); slopetmp.resize(bxg1,neqns); Elixir slopeeli = slopetmp.elixir(); @@ -120,6 +148,16 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, for (int n = neqns; n < ncons; ++n) fxfab(i,j,k,n) = Real(0.0); }); + if (do_visc) + { + auto const& coefs = diff_coeff.array(); + amrex::ParallelFor(xflxbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + cns_diff_x(i, j, k, q, coefs, dxinv, fxfab, *lparm); + }); + } + // y-direction cdir = 1; const Box& yslpbx = amrex::grow(bx, cdir, 1); @@ -136,6 +174,16 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, for (int n = neqns; n < ncons; ++n) fyfab(i,j,k,n) = Real(0.0); }); + if (do_visc) + { + auto const& coefs = diff_coeff.array(); + amrex::ParallelFor(yflxbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + cns_diff_y(i, j, k, q, coefs, dxinv, fyfab, *lparm); + }); + } + // z-direction cdir = 2; const Box& zslpbx = amrex::grow(bx, cdir, 1); @@ -152,10 +200,24 @@ CNS::compute_dSdt (const MultiFab& S, MultiFab& dSdt, Real dt, for (int n = neqns; n < ncons; ++n) fzfab(i,j,k,n) = Real(0.0); }); + if (do_visc) + { + auto const& coefs = diff_coeff.array(); + amrex::ParallelFor(zflxbx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + cns_diff_z(i, j, k, q, coefs, dxinv, fzfab, *lparm); + }); + } + // don't have to do this, but we could qeli.clear(); // don't need them anymore slopeeli.clear(); + if (do_visc) { + dcoeff_eli.clear(); + } + amrex::ParallelFor(bx, ncons, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { diff --git a/Tests/GPU/CNS/Source/CNS_index_macros.H b/Tests/GPU/CNS/Source/CNS_index_macros.H index e36fe682cad..bff05848c22 100644 --- a/Tests/GPU/CNS/Source/CNS_index_macros.H +++ b/Tests/GPU/CNS/Source/CNS_index_macros.H @@ -20,4 +20,8 @@ #define QTEMP 7 #define NPRIM 8 +#define CETA 0 +#define CXI 1 +#define CLAM 2 + #endif diff --git a/Tests/GPU/CNS/Source/CNS_parm.H b/Tests/GPU/CNS/Source/CNS_parm.H index 0b1676f2fc4..cdfea7e34ed 100644 --- a/Tests/GPU/CNS/Source/CNS_parm.H +++ b/Tests/GPU/CNS/Source/CNS_parm.H @@ -19,6 +19,10 @@ struct Parm amrex::Real smallr = 1.e-19; amrex::Real smallp = 1.e-10; + amrex::Real const_visc_mu = -1.0; + amrex::Real const_visc_ki = -1.0; + amrex::Real const_lambda = -1.0; + void Initialize (); }; diff --git a/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H new file mode 100644 index 00000000000..b9bf5a18f78 --- /dev/null +++ b/Tests/GPU/CNS/Source/diffusion/CNS_diffusion_K.H @@ -0,0 +1,150 @@ +#ifndef CNS_DIFFUSION_K_H_ +#define CNS_DIFFUSION_K_H_ + +#include "CNS_index_macros.H" +#include "CNS_parm.H" +#include +#include +#include + +AMREX_GPU_DEVICE +inline +void +cns_diffcoef (int i, int j, int k, + amrex::Array4 const& q, + amrex::Array4 const& coefs, + Parm const& parm) noexcept +{ + using amrex::Real; + + coefs(i,j,k,CETA) = parm.C_S * std::sqrt(q(i,j,k,QTEMP)) * q(i,j,k,QTEMP) / (q(i,j,k,QTEMP)+parm.T_S); + coefs(i,j,k,CXI) = Real(0.0); + coefs(i,j,k,CLAM) = coefs(i,j,k,CETA)*parm.cp/parm.Pr; +} + +AMREX_GPU_DEVICE +inline +void +cns_constcoef (int i, int j, int k, + amrex::Array4 const& q, + amrex::Array4 const& coefs, + Parm const& parm) noexcept +{ + using amrex::Real; + + coefs(i,j,k,CETA) = parm.const_visc_mu; + coefs(i,j,k,CXI) = parm.const_visc_ki; + coefs(i,j,k,CLAM) = parm.const_lambda; +} + +AMREX_GPU_DEVICE +inline +void +cns_diff_x (int i, int j, int k, + amrex::Array4 const& q, + amrex::Array4 const& coeffs, + amrex::GpuArray const& dxinv, + amrex::Array4 const& fx, + Parm const& parm) noexcept +{ + using amrex::Real; + + Real dTdx = (q(i,j,k,QTEMP)-q(i-1,j,k,QTEMP))*dxinv[0]; + Real dudx = (q(i,j,k,QU)-q(i-1,j,k,QU))*dxinv[0]; + Real dvdx = (q(i,j,k,QV)-q(i-1,j,k,QV))*dxinv[0]; + Real dwdx = (q(i,j,k,QW)-q(i-1,j,k,QW))*dxinv[0]; + Real dudy = (q(i,j+1,k,QU)+q(i-1,j+1,k,QU)-q(i,j-1,k,QU)-q(i-1,j-1,k,QU))*(0.25*dxinv[1]); + Real dvdy = (q(i,j+1,k,QV)+q(i-1,j+1,k,QV)-q(i,j-1,k,QV)-q(i-1,j-1,k,QV))*(0.25*dxinv[1]); + Real dudz = (q(i,j,k+1,QU)+q(i-1,j,k+1,QU)-q(i,j,k-1,QU)-q(i-1,j,k-1,QU))*(0.25*dxinv[2]); + Real dwdz = (q(i,j,k+1,QW)+q(i-1,j,k+1,QW)-q(i,j,k-1,QW)-q(i-1,j,k-1,QW))*(0.250*dxinv[2]); + Real divu = dudx + dvdy + dwdz; + Real etaf = 0.5*(coeffs(i,j,k,CETA)+coeffs(i-1,j,k,CETA)); + Real xif = 0.5*(coeffs(i,j,k,CXI)+coeffs(i-1,j,k,CXI)); + Real tauxx = etaf*(2.0*dudx-(2.0/3.0)*divu) + xif*divu; + Real tauxy = etaf*(dudy+dvdx); + Real tauxz = etaf*(dudz+dwdx); + + fx(i,j,k,UMX) += -tauxx; + fx(i,j,k,UMY) += -tauxy; + fx(i,j,k,UMZ) += -tauxz; + fx(i,j,k,UEDEN) += -0.5*( (q(i,j,k,QU)+q(i-1,j,k,QU))*tauxx+ + (q(i,j,k,QV)+q(i-1,j,k,QV))*tauxy+ + (q(i,j,k,QW)+q(i-1,j,k,QW))*tauxz+ + (coeffs(i,j,k,CLAM) +coeffs(i-1,j,k,CLAM))*dTdx ); + +} + +AMREX_GPU_DEVICE +inline +void +cns_diff_y (int i, int j, int k, amrex::Array4 const& q, + amrex::Array4 const& coeffs, + amrex::GpuArray const& dxinv, + amrex::Array4 const& fy, + Parm const& parm) noexcept +{ + using amrex::Real; + + Real dTdy = (q(i,j,k,QTEMP)-q(i,j-1,k,QTEMP))*dxinv[1]; + Real dudy = (q(i,j,k,QU)-q(i,j-1,k,QU))*dxinv[1]; + Real dvdy = (q(i,j,k,QV)-q(i,j-1,k,QV))*dxinv[1]; + Real dwdy = (q(i,j,k,QW)-q(i,j-1,k,QW))*dxinv[1]; + Real dudx = (q(i+1,j,k,QU)+q(i+1,j-1,k,QU)-q(i-1,j,k,QU)-q(i-1,j-1,k,QU))*(0.25*dxinv[0]); + Real dvdx = (q(i+1,j,k,QV)+q(i+1,j-1,k,QV)-q(i-1,j,k,QV)-q(i-1,j-1,k,QV))*(0.25*dxinv[0]); + Real dvdz = (q(i,j,k+1,QV)+q(i,j-1,k+1,QV)-q(i,j,k-1,QV)-q(i,j-1,k-1,QV))*(0.25*dxinv[2]); + Real dwdz = (q(i,j,k+1,QW)+q(i,j-1,k+1,QW)-q(i,j,k-1,QW)-q(i,j-1,k-1,QW))*(0.25*dxinv[2]); + Real divu = dudx + dvdy + dwdz; + Real etaf = 0.5*(coeffs(i,j,k,CETA)+coeffs(i,j-1,k,CETA)); + Real xif = 0.5*(coeffs(i,j,k,CXI)+coeffs(i,j-1,k,CXI)); + Real tauyy = etaf*(2.0*dvdy-(2.0/3.0)*divu) + xif*divu; + Real tauxy = etaf*(dudy+dvdx); + Real tauyz = etaf*(dwdy+dvdz); + + + fy(i,j,k,UMX) += -tauxy; + fy(i,j,k,UMY) += -tauyy; + fy(i,j,k,UMZ) += -tauyz; + fy(i,j,k,UEDEN) += -0.5*( (q(i,j,k,QU)+q(i,j-1,k,QU))*tauxy + +(q(i,j,k,QV)+q(i,j-1,k,QV))*tauyy + +(q(i,j,k,QW)+q(i,j-1,k,QW))*tauyz + +(coeffs(i,j,k,CLAM) +coeffs(i,j-1,k,CLAM))*dTdy ); + +} + +AMREX_GPU_DEVICE +inline +void +cns_diff_z (int i, int j, int k, + amrex::Array4 const& q, + amrex::Array4 const& coeffs, + amrex::GpuArray const& dxinv, + amrex::Array4 const& fz, + Parm const& parm) noexcept +{ + using amrex::Real; + + Real dTdz = (q(i,j,k,QTEMP)-q(i,j,k-1,QTEMP))*dxinv[2]; + Real dudz = (q(i,j,k,QU)-q(i,j,k-1,QU))*dxinv[2]; + Real dvdz = (q(i,j,k,QV)-q(i,j,k-1,QV))*dxinv[2]; + Real dwdz = (q(i,j,k,QW)-q(i,j,k-1,QW))*dxinv[2]; + Real dudx = (q(i+1,j,k,QU)+q(i+1,j,k-1,QU)-q(i-1,j,k,QU)-q(i-1,j,k-1,QU))*(0.25*dxinv[0]); + Real dwdx = (q(i+1,j,k,QW)+q(i+1,j,k-1,QW)-q(i-1,j,k,QW)-q(i-1,j,k-1,QW))*(0.25*dxinv[0]); + Real dvdy = (q(i,j+1,k,QV)+q(i,j+1,k-1,QV)-q(i,j-1,k,QV)-q(i,j-1,k-1,QV))*(0.25*dxinv[1]); + Real dwdy = (q(i,j+1,k,QW)+q(i,j+1,k-1,QW)-q(i,j-1,k,QW)-q(i,j-1,k-1,QW))*(0.25*dxinv[1]); + Real divu = dudx + dvdy + dwdz; + Real etaf = 0.5*(coeffs(i,j,k,CETA)+coeffs(i,j,k-1,CETA)); + Real xif = 0.5*(coeffs(i,j,k,CXI)+coeffs(i,j,k-1,CXI)); + Real tauxz = etaf*(dudz+dwdx); + Real tauyz = etaf*(dvdz+dwdy); + Real tauzz = etaf*(2.0*dwdz-(2.0/3.0)*divu) + xif*divu; + + fz(i,j,k,UMX) += -tauxz; + fz(i,j,k,UMY) += -tauyz; + fz(i,j,k,UMZ) += -tauzz; + fz(i,j,k,UEDEN) += -0.5*( (q(i,j,k,QU)+q(i,j,k-1,QU))*tauxz + +(q(i,j,k,QV)+q(i,j,k-1,QV))*tauyz + +(q(i,j,k,QW)+q(i,j,k-1,QW))*tauzz + +(coeffs(i,j,k,CLAM) +coeffs(i,j,k-1,CLAM))*dTdz ); +} + +#endif diff --git a/Tests/GPU/CNS/Source/diffusion/Make.package b/Tests/GPU/CNS/Source/diffusion/Make.package index 8b137891791..4b778f38904 100644 --- a/Tests/GPU/CNS/Source/diffusion/Make.package +++ b/Tests/GPU/CNS/Source/diffusion/Make.package @@ -1 +1 @@ - +CEXE_headers += CNS_diffusion_K.H diff --git a/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H b/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H index ff81a9a4325..a570444f2c0 100644 --- a/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H +++ b/Tests/GPU/CNS/Source/hydro/CNS_hydro_K.H @@ -25,6 +25,8 @@ cns_ctoprim (int i, int j, int k, Real ei = u(i,j,k,UEDEN) - kineng; if (ei <= Real(0.0)) ei = u(i,j,k,UEINT); Real p = amrex::max((parm.eos_gamma-Real(1.0))*ei,parm.smallp); + + // q(i,j,k,QEINT) is e, not (rho e) ei *= rhoinv; q(i,j,k,QRHO) = rho; @@ -34,7 +36,7 @@ cns_ctoprim (int i, int j, int k, q(i,j,k,QEINT) = ei; q(i,j,k,QPRES) = p; q(i,j,k,QCS) = std::sqrt(parm.eos_gamma*p*rhoinv); - q(i,j,k,QTEMP) = Real(0.0); + q(i,j,k,QTEMP) = ei / parm.cv; } AMREX_GPU_DEVICE