Merge branch 'vectorize_when_possible' into improve_doxydocs

RichRoos · Sep 25, 2022 · 0c1b07b · 0c1b07b
2 parents e616fa1 + c9af050
commit 0c1b07b
Show file tree

Hide file tree

Showing 27 changed files with 119 additions and 125 deletions.
diff --git a/.github/workflows/release-management.yml b/.github/workflows/release-management.yml
@@ -29,7 +29,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Cache Object Files
-        uses: actions/cache@v1
+        uses: actions/cache@v3
         with:
           path: ccache
           key: ${{ matrix.os_bin }}-${{ github.sha }}
@@ -44,7 +44,7 @@ jobs:
           zip -r ../${{matrix.os_bin}}.zip bin/*
         # Uploads binaries as artifacts (just as a backup)
       - name: Upload Binaries
-        uses: actions/upload-artifact@v1
+        uses: actions/upload-artifact@v3
         with:
           name: ${{matrix.os_bin}}
           path: ${{matrix.os_bin}}.zip

diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp
@@ -114,6 +114,21 @@ namespace SU2_TYPE {
   FORCEINLINE void SetDerivative(su2double &, const passivedouble &) {}
 #endif
 
+  /*!
+   * \brief Get the passive value of any variable. For most types return directly,
+   * specialize for su2double to call GetValue.
+   * \note This is a struct instead of a function because the return type of the
+   * su2double specialization changes.
+   */
+  template <class T>
+  struct Passive {
+    FORCEINLINE static T Value(const T& val) {return val;}
+  };
+  template <>
+  struct Passive<su2double> {
+    FORCEINLINE static passivedouble Value(const su2double& val) {return GetValue(val);}
+  };
+
   /*!
    * \brief Casts the primitive value to int (uses GetValue, already implemented for each type).
    * \param[in] data - The non-primitive datatype.

diff --git a/Common/include/linear_algebra/vector_expressions.hpp b/Common/include/linear_algebra/vector_expressions.hpp
@@ -33,6 +33,7 @@
 #include <cassert>
 #include <cstdlib>
 #include <cmath>
+#include <cstdint>
 
 namespace VecExpr {
 
@@ -158,21 +159,28 @@ FORCEINLINE auto FUN(decay_t<S> u, const CVecExpr<V,S>& v)                    \
   RETURNS( EXPR<Bcast<S>,V,S>(Bcast<S>(u), v.derived())                       \
 )                                                                             \
 
-/*--- std::max/min have issues (maybe because they return by reference).
- * For AD codi::max/min need to be used to avoid issues in debug builds. ---*/
-
-#if defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)
-#define max_impl math::max
-#define min_impl math::min
-#else
-#define max_impl(a,b) a<b? Scalar(b) : Scalar(a)
-#define min_impl(a,b) b<a? Scalar(b) : Scalar(a)
-#endif
-MAKE_BINARY_FUN(fmax, max_, max_impl)
-MAKE_BINARY_FUN(fmin, min_, min_impl)
+/*--- std::max/min have issues (because they return by reference).
+ * fmin and fmax return by value and thus are fine, but they would force
+ * conversions to double, to avoid that we provide integer overloads.
+ * We use int32/64 instead of int/long to avoid issues with Windows,
+ * where long is 32 bits (instead of 64 bits). ---*/
+
+#define MAKE_FMINMAX_OVERLOADS(TYPE)                          \
+FORCEINLINE TYPE fmax(TYPE a, TYPE b) { return a<b? b : a; }  \
+FORCEINLINE TYPE fmin(TYPE a, TYPE b) { return a<b? a : b; }
+MAKE_FMINMAX_OVERLOADS(int32_t)
+MAKE_FMINMAX_OVERLOADS(int64_t)
+MAKE_FMINMAX_OVERLOADS(uint32_t)
+MAKE_FMINMAX_OVERLOADS(uint64_t)
+/*--- Make the float and double versions of fmin/max available in this
+ * namespace to avoid ambiguous overloads. ---*/
+using std::fmax;
+using std::fmin;
+#undef MAKE_FMINMAX_OVERLOADS
+
+MAKE_BINARY_FUN(fmax, max_, fmax)
+MAKE_BINARY_FUN(fmin, min_, fmin)
 MAKE_BINARY_FUN(pow, pow_, math::pow)
-#undef max_impl
-#undef min_impl
 
 /*--- sts::plus and co. were tried, the code was horrendous (due to the forced
  * conversion between different types) and creating functions for these ops
@@ -191,20 +199,25 @@ MAKE_BINARY_FUN(operator/, div_, div_impl)
 #undef mul_impl
 #undef div_impl
 
-/*--- Relational operators need to be cast to the scalar type to allow vectorization. ---*/
-
-#define le_impl(a,b) Scalar(a<=b)
-#define ge_impl(a,b) Scalar(a>=b)
-#define eq_impl(a,b) Scalar(a==b)
-#define ne_impl(a,b) Scalar(a!=b)
-#define lt_impl(a,b) Scalar(a<b)
-#define gt_impl(a,b) Scalar(a>b)
+/*--- Relational operators need to be cast to the scalar type to allow vectorization.
+ * TO_PASSIVE is used to convert active scalars to passive, which CoDi will then capture
+ * by value in its expressions, and thus dangling references are avoided. No AD info
+ * is lost since these operators are non-differentiable. ---*/
+
+#define TO_PASSIVE(IMPL) SU2_TYPE::Passive<Scalar>::Value(IMPL)
+#define le_impl(a,b) TO_PASSIVE(a<=b)
+#define ge_impl(a,b) TO_PASSIVE(a>=b)
+#define eq_impl(a,b) TO_PASSIVE(a==b)
+#define ne_impl(a,b) TO_PASSIVE(a!=b)
+#define lt_impl(a,b) TO_PASSIVE(a<b)
+#define gt_impl(a,b) TO_PASSIVE(a>b)
 MAKE_BINARY_FUN(operator<=, le_, le_impl)
 MAKE_BINARY_FUN(operator>=, ge_, ge_impl)
 MAKE_BINARY_FUN(operator==, eq_, eq_impl)
 MAKE_BINARY_FUN(operator!=, ne_, ne_impl)
 MAKE_BINARY_FUN(operator<, lt_, lt_impl)
 MAKE_BINARY_FUN(operator>, gt_, gt_impl)
+#undef TO_PASSIVE
 #undef le_impl
 #undef ge_impl
 #undef eq_impl

diff --git a/Common/src/grid_movement/CFreeFormDefBox.cpp b/Common/src/grid_movement/CFreeFormDefBox.cpp
@@ -102,19 +102,34 @@ CFreeFormDefBox::CFreeFormDefBox(const unsigned short Degree[], unsigned short B
 CFreeFormDefBox::~CFreeFormDefBox(void) {
   unsigned short iOrder, jOrder, kOrder, iCornerPoints, iDim;
 
-  for (iOrder = 0; iOrder < lOrder; iOrder++)
-    for (jOrder = 0; jOrder < mOrder; jOrder++)
+  for (iOrder = 0; iOrder < lOrder; iOrder++) {
+    for (jOrder = 0; jOrder < mOrder; jOrder++) {
       for (kOrder = 0; kOrder < nOrder; kOrder++) {
         delete [] Coord_Control_Points[iOrder][jOrder][kOrder];
         delete [] ParCoord_Control_Points[iOrder][jOrder][kOrder];
         delete [] Coord_Control_Points_Copy[iOrder][jOrder][kOrder];
+        if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder][jOrder][kOrder];
       }
+      delete [] Coord_Control_Points[iOrder][jOrder];
+      delete [] ParCoord_Control_Points[iOrder][jOrder];
+      delete [] Coord_Control_Points_Copy[iOrder][jOrder];
+      if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder][jOrder];
+    }
+    delete [] Coord_Control_Points[iOrder];
+    delete [] ParCoord_Control_Points[iOrder];
+    delete [] Coord_Control_Points_Copy[iOrder];
+    if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder];
+    }
+
   delete [] Coord_Control_Points;
   delete [] ParCoord_Control_Points;
   delete [] Coord_Control_Points_Copy;
+  delete [] Coord_SupportCP;
 
   delete [] ParamCoord;
+  delete [] ParamCoord_;
   delete [] cart_coord;
+  delete [] cart_coord_;
   delete [] Gradient;
 
   for (iCornerPoints = 0; iCornerPoints < nCornerPoints; iCornerPoints++)
@@ -969,26 +984,37 @@ su2double *CFreeFormDefBox::GetParametricCoord_Iterative(unsigned long iPoint, s
 }
 
 bool CFreeFormDefBox::GetPointFFD(CGeometry *geometry, CConfig *config, unsigned long iPoint) const {
-  su2double Coord[3] = {0.0, 0.0, 0.0};
-  unsigned short iVar, jVar, iDim;
-  su2double X_0, Y_0, Z_0, Xbar, Ybar, Zbar;
 
-  bool Inside = false;
+  bool Inside = true;
   bool cylindrical = (config->GetFFD_CoordSystem() == CYLINDRICAL);
   bool spherical = (config->GetFFD_CoordSystem() == SPHERICAL);
   bool polar = (config->GetFFD_CoordSystem() == POLAR);
 
-  unsigned short Index[5][7] = {
-    {0, 1, 2, 5, 0, 1, 2},
-    {0, 2, 7, 5, 0, 2, 7},
-    {0, 2, 3, 7, 0, 2, 3},
-    {0, 5, 7, 4, 0, 5, 7},
-    {2, 7, 5, 6, 2, 7, 5}};
+  /*--- indices of the FFD box. Note that the front face is labelled 0,1,2,3 and the back face is 4,5,6,7 ---*/
+
+  unsigned short Index[6][5] = {
+  {0,1,2,3,0},  // front side
+  {1,5,6,2,1},  // right side
+  {2,6,7,3,2},  // top side
+  {3,7,4,0,3},  // left side
+  {4,5,1,0,4},  // bottom side
+  {4,7,6,5,4}}; // back side
+
+  /*--- The current approach is to subdivide each of the 6 faces of the hexahedral FFD box into 4 triangles 
+      by defining a supporting middle point. This allows nonplanar FFD boxes.
+      Note that the definition of the FFD box is as follows: the FFD box is a 6-sided die and we are looking at the side "1".
+      The opposite side is side "6". 
+      If we are looking at side "1", we define the nodes counterclockwise. 
+      If we are looking at side "6", we define the face clockwise  ---*/
+
   unsigned short nDim = geometry->GetnDim();
 
-  for (iDim = 0; iDim < nDim; iDim++)
+  su2double Coord[3] = {0.0, 0.0, 0.0};
+  for (unsigned short iDim = 0; iDim < nDim; iDim++)
     Coord[iDim] = geometry->nodes->GetCoord(iPoint, iDim);
 
+  su2double X_0, Y_0, Z_0, Xbar, Ybar, Zbar;
+
   if (cylindrical) {
 
     X_0 = config->GetFFD_Axis(0); Y_0 = config->GetFFD_Axis(1);  Z_0 = config->GetFFD_Axis(2);
@@ -1013,78 +1039,38 @@ bool CFreeFormDefBox::GetPointFFD(CGeometry *geometry, CConfig *config, unsigned
 
   }
 
-  /*--- 1st tetrahedron {V0, V1, V2, V5}
-   2nd tetrahedron {V0, V2, V7, V5}
-   3th tetrahedron {V0, V2, V3, V7}
-   4th tetrahedron {V0, V5, V7, V4}
-   5th tetrahedron {V2, V7, V5, V6} ---*/
+  /*--- loop over the faces of the FFD box  ---*/
+
+  for (unsigned short iVar = 0; iVar < 6; iVar++) {
+
+    su2double P[3] = {0.0, 0.0, 0.0};
+
+    /*--- every face needs an interpolated middle point for the triangles ---*/
+
+    for (int p = 0; p < 4; p++){
+      P[0] += 0.25*Coord_Corner_Points[Index[iVar][p]][0];
+      P[1] += 0.25*Coord_Corner_Points[Index[iVar][p]][1];
+      P[2] += 0.25*Coord_Corner_Points[Index[iVar][p]][2];
+    }
+
+    /*--- loop over the 4 triangles making up the FFD box. The sign is equal for all distances ---*/
 
-  for (iVar = 0; iVar < 5; iVar++) {
-    Inside = true;
-    for (jVar = 0; jVar < 4; jVar++) {
+    for (unsigned short jVar = 0; jVar < 4; jVar++) {
       su2double Distance_Point = geometry->Point2Plane_Distance(Coord,
+                                                                Coord_Corner_Points[Index[iVar][jVar]],
                                                                 Coord_Corner_Points[Index[iVar][jVar+1]],
-                                                                Coord_Corner_Points[Index[iVar][jVar+2]],
-                                                                Coord_Corner_Points[Index[iVar][jVar+3]]);
-
-      su2double Distance_Vertex = geometry->Point2Plane_Distance(Coord_Corner_Points[Index[iVar][jVar]],
-                                                                 Coord_Corner_Points[Index[iVar][jVar+1]],
-                                                                 Coord_Corner_Points[Index[iVar][jVar+2]],
-                                                                 Coord_Corner_Points[Index[iVar][jVar+3]]);
-      if (Distance_Point*Distance_Vertex < 0.0) Inside = false;
+                                                                P);
+      if (Distance_Point < 0) {
+        Inside = false;
+        return Inside;
+      }
     }
-    if (Inside) break;
   }
 
   return Inside;
 
 }
 
-void CFreeFormDefBox::SetDeformationZone(CGeometry *geometry, CConfig *config, unsigned short iFFDBox) const {
-  su2double *Coord;
-  unsigned short iMarker, iVar, jVar;
-  unsigned long iVertex, iPoint;
-  bool Inside = false;
-
-  unsigned short Index[5][7] = {
-    {0, 1, 2, 5, 0, 1, 2},
-    {0, 2, 7, 5, 0, 2, 7},
-    {0, 2, 3, 7, 0, 2, 3},
-    {0, 5, 7, 4, 0, 5, 7},
-    {2, 7, 5, 6, 2, 7, 5}};
-
-  for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
-    if (config->GetMarker_All_DV(iMarker) == YES) {
-      for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
-        iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
-
-        Coord = geometry->nodes->GetCoord(iPoint);
-
-        /*--- 1st tetrahedron {V0, V1, V2, V5}
-         2nd tetrahedron {V0, V2, V7, V5}
-         3th tetrahedron {V0, V2, V3, V7}
-         4th tetrahedron {V0, V5, V7, V4}
-         5th tetrahedron {V2, V7, V5, V6} ---*/
-
-        for (iVar = 0; iVar < 5; iVar++) {
-          Inside = true;
-          for (jVar = 0; jVar < 4; jVar++) {
-            su2double Distance_Point = geometry->Point2Plane_Distance(Coord,
-                                                                   Coord_Corner_Points[Index[iVar][jVar+1]],
-                                                                   Coord_Corner_Points[Index[iVar][jVar+2]],
-                                                                   Coord_Corner_Points[Index[iVar][jVar+3]]);
-            su2double Distance_Vertex = geometry->Point2Plane_Distance(Coord_Corner_Points[Index[iVar][jVar]],
-                                                                    Coord_Corner_Points[Index[iVar][jVar+1]],
-                                                                    Coord_Corner_Points[Index[iVar][jVar+2]],
-                                                                    Coord_Corner_Points[Index[iVar][jVar+3]]);
-            if (Distance_Point*Distance_Vertex < 0.0) Inside = false;
-          }
-          if (Inside) break;
-        }
-      }
-    }
-  }
-}
 
 su2double CFreeFormDefBox::GetDerivative1(su2double *uvw, unsigned short val_diff, unsigned short *ijk, unsigned short *lmn) const {
 

diff --git a/SU2_CFD/include/numerics_simd/flow/convection/common.hpp b/SU2_CFD/include/numerics_simd/flow/convection/common.hpp
@@ -133,10 +133,8 @@ FORCEINLINE CPair<ReconVarType> reconstructPrimitives(Int iEdge, Int iPoint, Int
       break;
     }
     /*--- Detect a non-physical reconstruction based on negative pressure or density. ---*/
-    /*--- Some weird issues with forward AD if this is all done in one line. ---*/
-    const Double neg_p = fmin(V.i.pressure(), V.j.pressure()) < 0.0;
-    const Double neg_rho = fmin(V.i.density(), V.j.density()) < 0.0;
-    const Double neg_p_or_rho = fmax(neg_p, neg_rho);
+    const Double neg_p_or_rho = fmax(fmin(V.i.pressure(), V.j.pressure()) < 0.0,
+                                     fmin(V.i.density(), V.j.density()) < 0.0);
     /*--- Test the sign of the Roe-averaged speed of sound. ---*/
     const Double R = sqrt(abs(V.j.density() / V.i.density()));
     /*--- Delay dividing by R+1 until comparing enthalpy and velocity magnitude. ---*/

diff --git a/TestCases/disc_adj_rans/naca0012/turb_NACA0012_sst.cfg b/TestCases/disc_adj_rans/naca0012/turb_NACA0012_sst.cfg
@@ -143,7 +143,6 @@ MG_DAMP_PROLONGATION= 0.75
 % Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
 %                              TURKEL_PREC, MSW)
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 %
 % Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
 MUSCL_FLOW= YES

diff --git a/TestCases/navierstokes/flatplate/lam_flatplate.cfg b/TestCases/navierstokes/flatplate/lam_flatplate.cfg
@@ -69,7 +69,6 @@ MG_DAMP_PROLONGATION= 0.75
 % -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
 %
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 MUSCL_FLOW= YES
 SLOPE_LIMITER_FLOW= NONE
 TIME_DISCRE_FLOW= EULER_IMPLICIT

diff --git a/TestCases/navierstokes/flatplate/lam_flatplate_unst.cfg b/TestCases/navierstokes/flatplate/lam_flatplate_unst.cfg
@@ -69,7 +69,6 @@ LINEAR_SOLVER_ITER= 2
 % -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
 %
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 MUSCL_FLOW= YES
 SLOPE_LIMITER_FLOW= NONE
 

diff --git a/TestCases/navierstokes/periodic2D/config.cfg b/TestCases/navierstokes/periodic2D/config.cfg
@@ -55,7 +55,6 @@ LINEAR_SOLVER_ITER= 4
 % -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
 %
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 MUSCL_FLOW= YES
 SLOPE_LIMITER_FLOW= VENKATAKRISHNAN_WANG
 VENKAT_LIMITER_COEFF= 0.05

diff --git a/TestCases/py_wrapper/translating_NACA0012/config.cfg b/TestCases/py_wrapper/translating_NACA0012/config.cfg
@@ -50,7 +50,6 @@ VENKAT_LIMITER_COEFF= 0.1
 
 % SOLUTION ACCELERATION
 %
-USE_VECTORIZATION= YES
 CFL_NUMBER= 1e3
 CFL_ADAPT= NO
 %

diff --git a/TestCases/rans/naca0012/turb_NACA0012_sa.cfg b/TestCases/rans/naca0012/turb_NACA0012_sa.cfg
@@ -145,7 +145,6 @@ MG_DAMP_PROLONGATION= 0.75
 % Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
 %                              TURKEL_PREC, MSW)
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 %
 % Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
 MUSCL_FLOW= YES

diff --git a/TestCases/rans/naca0012/turb_NACA0012_sst.cfg b/TestCases/rans/naca0012/turb_NACA0012_sst.cfg
@@ -147,7 +147,6 @@ MG_DAMP_PROLONGATION= 0.75
 % Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
 %                              TURKEL_PREC, MSW)
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 %
 % Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
 MUSCL_FLOW= YES

diff --git a/TestCases/rans/naca0012/turb_NACA0012_sst_1994-KLm.cfg b/TestCases/rans/naca0012/turb_NACA0012_sst_1994-KLm.cfg
@@ -65,7 +65,6 @@ LINEAR_SOLVER_ITER= 20
 % -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
 %
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 MUSCL_FLOW= YES
 SLOPE_LIMITER_FLOW= NONE
 JST_SENSOR_COEFF= ( 0.5, 0.02 )

diff --git a/TestCases/rans/naca0012/turb_NACA0012_sst_2003-Vm.cfg b/TestCases/rans/naca0012/turb_NACA0012_sst_2003-Vm.cfg
@@ -65,7 +65,6 @@ LINEAR_SOLVER_ITER= 20
 % -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
 %
 CONV_NUM_METHOD_FLOW= ROE
-USE_VECTORIZATION= YES
 MUSCL_FLOW= YES
 SLOPE_LIMITER_FLOW= NONE
 JST_SENSOR_COEFF= ( 0.5, 0.02 )