-
Notifications
You must be signed in to change notification settings - Fork 849
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hybrid parallel (OpenMP) support for incompressible solvers #1178
Changes from 9 commits
9c958b7
203369d
f6bc631
d858f70
bbfd9c0
fc4cfd1
cc2946e
ce04458
5bad047
dec8abf
396e8d5
7bd274a
6118dff
30184d3
726f4e7
afe4928
a7e6695
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -125,6 +125,8 @@ class CFVMFlowSolverBase : public CSolver { | |
AeroCoeffsArray SurfaceCoeff; /*!< \brief Totals for each monitoring surface. */ | ||
AeroCoeffs TotalCoeff; /*!< \brief Totals for all boundaries. */ | ||
|
||
su2double AeroCoeffForceRef = 1.0; /*!< \brief Reference force for aerodynamic coefficients. */ | ||
|
||
su2double InverseDesign = 0.0; /*!< \brief Inverse design functional for each boundary. */ | ||
su2double Total_ComboObj = 0.0; /*!< \brief Total 'combo' objective for all monitored boundaries */ | ||
su2double Total_Custom_ObjFunc = 0.0; /*!< \brief Total custom objective function for all the boundaries. */ | ||
|
@@ -249,6 +251,22 @@ class CFVMFlowSolverBase : public CSolver { | |
*/ | ||
inline virtual void InstantiateEdgeNumerics(const CSolver* const* solvers, const CConfig* config) {} | ||
|
||
/*! | ||
* \brief Compute the viscous contribution for a particular edge. | ||
* \note The convective residual methods include a call to this for each edge, | ||
* this allows convective and viscous loops to be "fused". | ||
* \param[in] iEdge - Edge for which the flux and Jacobians are to be computed. | ||
* \param[in] geometry - Geometrical definition of the problem. | ||
* \param[in] solver_container - Container vector with all the solutions. | ||
* \param[in] numerics - Description of the numerical method. | ||
* \param[in] config - Definition of the particular problem. | ||
*/ | ||
inline virtual void Viscous_Residual(unsigned long iEdge, CGeometry *geometry, CSolver **solver_container, | ||
CNumerics *numerics, CConfig *config) { } | ||
void Viscous_Residual_impl(unsigned long iEdge, CGeometry *geometry, CSolver **solver_container, | ||
CNumerics *numerics, CConfig *config); | ||
using CSolver::Viscous_Residual; /*--- Silence warning ---*/ | ||
Comment on lines
+264
to
+268
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The idea behind this to be able to 'fake' override Viscous_Residual for viscous flows with just calling Viscous_Residual_impl because for inviscid flow the empty implementation from here is taken. This trick has to be used because FVMBase -> Euler -> NS is the inheritance and Viscous_Residual itself cannot be overloaded because Euler is the middleman? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, in Cintegration the empty impl of CSolver is used and in the centered/upwind residual the correct Viscous_Residual impl with the Viscous_Residual_impl init ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep that's all correct. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Except the self flagellation part, no need for that xD |
||
|
||
/*! | ||
* \brief Generic implementation to compute the time step based on CFL and conv/visc eigenvalues. | ||
* \param[in] geometry - Geometrical definition of the problem. | ||
|
@@ -949,36 +967,90 @@ class CFVMFlowSolverBase : public CSolver { | |
|
||
/*! | ||
* \brief Evaluate the vorticity and strain rate magnitude. | ||
* \tparam VelocityOffset: Index in the primitive variables where the velocity starts. | ||
*/ | ||
inline void ComputeVorticityAndStrainMag(const CConfig& config, unsigned short iMesh) { | ||
template<size_t VelocityOffset> | ||
void ComputeVorticityAndStrainMag(const CConfig& config, unsigned short iMesh) { | ||
|
||
const auto& Gradient_Primitive = nodes->GetGradient_Primitive(); | ||
auto& StrainMag = nodes->GetStrainMag(); | ||
|
||
SU2_OMP_MASTER { | ||
StrainMag_Max = 0.0; | ||
Omega_Max = 0.0; | ||
} | ||
SU2_OMP_BARRIER | ||
|
||
nodes->SetVorticity_StrainMag(); | ||
su2double strainMax = 0.0, omegaMax = 0.0; | ||
|
||
/*--- Min and Max are not really differentiable ---*/ | ||
const bool wasActive = AD::BeginPassive(); | ||
SU2_OMP_FOR_STAT(omp_chunk_size) | ||
for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint) { | ||
|
||
su2double strainMax = 0.0, omegaMax = 0.0; | ||
constexpr size_t u = VelocityOffset; | ||
constexpr size_t v = VelocityOffset+1; | ||
constexpr size_t w = VelocityOffset+2; | ||
|
||
SU2_OMP(for schedule(static,omp_chunk_size) nowait) | ||
for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) { | ||
strainMax = max(strainMax, nodes->GetStrainMag(iPoint)); | ||
omegaMax = max(omegaMax, GeometryToolbox::Norm(3, nodes->GetVorticity(iPoint))); | ||
} | ||
SU2_OMP_CRITICAL { | ||
StrainMag_Max = max(StrainMag_Max, strainMax); | ||
Omega_Max = max(Omega_Max, omegaMax); | ||
/*--- Vorticity ---*/ | ||
|
||
su2double* Vorticity = nodes->GetVorticity(iPoint); | ||
|
||
Vorticity[0] = 0.0; Vorticity[1] = 0.0; | ||
|
||
Vorticity[2] = Gradient_Primitive(iPoint,v,0)-Gradient_Primitive(iPoint,u,1); | ||
|
||
if (nDim == 3) { | ||
Vorticity[0] = Gradient_Primitive(iPoint,w,1)-Gradient_Primitive(iPoint,v,2); | ||
Vorticity[1] = -(Gradient_Primitive(iPoint,w,0)-Gradient_Primitive(iPoint,u,2)); | ||
} | ||
|
||
/*--- Strain Magnitude ---*/ | ||
|
||
AD::StartPreacc(); | ||
AD::SetPreaccIn(&Gradient_Primitive[iPoint][VelocityOffset], nDim, nDim); | ||
|
||
su2double Div = 0.0; | ||
for (unsigned long iDim = 0; iDim < nDim; iDim++) | ||
Div += Gradient_Primitive(iPoint, iDim+VelocityOffset, iDim); | ||
Div /= 3.0; | ||
|
||
StrainMag(iPoint) = 0.0; | ||
|
||
/*--- Add diagonal part ---*/ | ||
|
||
for (unsigned long iDim = 0; iDim < nDim; iDim++) { | ||
StrainMag(iPoint) += pow(Gradient_Primitive(iPoint, iDim+VelocityOffset, iDim) - Div, 2); | ||
} | ||
if (nDim == 2) { | ||
StrainMag(iPoint) += pow(Div, 2); | ||
} | ||
|
||
/*--- Add off diagonals ---*/ | ||
|
||
StrainMag(iPoint) += 2.0*pow(0.5*(Gradient_Primitive(iPoint,u,1) + Gradient_Primitive(iPoint,v,0)), 2); | ||
|
||
if (nDim == 3) { | ||
StrainMag(iPoint) += 2.0*pow(0.5*(Gradient_Primitive(iPoint,u,2) + Gradient_Primitive(iPoint,w,0)), 2); | ||
StrainMag(iPoint) += 2.0*pow(0.5*(Gradient_Primitive(iPoint,v,2) + Gradient_Primitive(iPoint,w,1)), 2); | ||
} | ||
|
||
StrainMag(iPoint) = sqrt(2.0*StrainMag(iPoint)); | ||
AD::SetPreaccOut(StrainMag(iPoint)); | ||
|
||
/*--- Max is not differentiable, we so not register for preacc. ---*/ | ||
strainMax = max(strainMax, StrainMag(iPoint)); | ||
omegaMax = max(omegaMax, GeometryToolbox::Norm(3, Vorticity)); | ||
|
||
AD::EndPreacc(); | ||
} | ||
|
||
if ((iMesh == MESH_0) && (config.GetComm_Level() == COMM_FULL)) { | ||
SU2_OMP_CRITICAL { | ||
StrainMag_Max = max(StrainMag_Max, strainMax); | ||
Omega_Max = max(Omega_Max, omegaMax); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suppose you do the reduction over the threads yourself because of efficiency, when the if statement is false, but wouldn't it be more readable to have a reduction clause in the OMP for loop? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for having a look at the code Edwin, the issues with these kind of reductions were:
For performance the reduction clause would be more efficient, as the compiler can make min/max atomic, however I do not know of a nice simple way to implement atomic min/max. |
||
SU2_OMP_BARRIER | ||
SU2_OMP_MASTER | ||
{ | ||
SU2_OMP_MASTER { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For efficiency reasons then the Allreduce could be done in a single call with two arguments, as for both Omega and the Strain you compute the maximum. |
||
su2double MyOmega_Max = Omega_Max; | ||
su2double MyStrainMag_Max = StrainMag_Max; | ||
|
||
|
@@ -988,7 +1060,6 @@ class CFVMFlowSolverBase : public CSolver { | |
SU2_OMP_BARRIER | ||
} | ||
|
||
AD::EndPassive(wasActive); | ||
} | ||
|
||
/*! | ||
|
@@ -1490,6 +1561,11 @@ class CFVMFlowSolverBase : public CSolver { | |
*/ | ||
inline su2double GetTotal_CEff() const final { return TotalCoeff.CEff; } | ||
|
||
/*! | ||
* \brief Get the reference force used to compute CL, CD, etc. | ||
*/ | ||
inline su2double GetAeroCoeffsReferenceForce() const final { return AeroCoeffForceRef; } | ||
|
||
/*! | ||
* \brief Provide the total (inviscid + viscous) non dimensional lift coefficient. | ||
* \return Value of the lift coefficient (inviscid + viscous contribution). | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a speed-up due to this? Or is this just for simplification?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fast code goes brrrrrrrrrrrrrrrrrrrr