Skip to content

Commit

Permalink
Merge branch 'vectorize_when_possible' into improve_doxydocs
Browse files Browse the repository at this point in the history
  • Loading branch information
pcarruscag committed Sep 25, 2022
2 parents e616fa1 + c9af050 commit 0c1b07b
Show file tree
Hide file tree
Showing 27 changed files with 119 additions and 125 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/release-management.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Cache Object Files
uses: actions/cache@v1
uses: actions/cache@v3
with:
path: ccache
key: ${{ matrix.os_bin }}-${{ github.sha }}
Expand All @@ -44,7 +44,7 @@ jobs:
zip -r ../${{matrix.os_bin}}.zip bin/*
# Uploads binaries as artifacts (just as a backup)
- name: Upload Binaries
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v3
with:
name: ${{matrix.os_bin}}
path: ${{matrix.os_bin}}.zip
Expand Down
15 changes: 15 additions & 0 deletions Common/include/basic_types/datatype_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,21 @@ namespace SU2_TYPE {
FORCEINLINE void SetDerivative(su2double &, const passivedouble &) {}
#endif

/*!
* \brief Get the passive value of any variable. For most types return directly,
* specialize for su2double to call GetValue.
* \note This is a struct instead of a function because the return type of the
* su2double specialization changes.
*/
template <class T>
struct Passive {
FORCEINLINE static T Value(const T& val) {return val;}
};
template <>
struct Passive<su2double> {
FORCEINLINE static passivedouble Value(const su2double& val) {return GetValue(val);}
};

/*!
* \brief Casts the primitive value to int (uses GetValue, already implemented for each type).
* \param[in] data - The non-primitive datatype.
Expand Down
57 changes: 35 additions & 22 deletions Common/include/linear_algebra/vector_expressions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <cassert>
#include <cstdlib>
#include <cmath>
#include <cstdint>

namespace VecExpr {

Expand Down Expand Up @@ -158,21 +159,28 @@ FORCEINLINE auto FUN(decay_t<S> u, const CVecExpr<V,S>& v) \
RETURNS( EXPR<Bcast<S>,V,S>(Bcast<S>(u), v.derived()) \
) \

/*--- std::max/min have issues (maybe because they return by reference).
* For AD codi::max/min need to be used to avoid issues in debug builds. ---*/

#if defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)
#define max_impl math::max
#define min_impl math::min
#else
#define max_impl(a,b) a<b? Scalar(b) : Scalar(a)
#define min_impl(a,b) b<a? Scalar(b) : Scalar(a)
#endif
MAKE_BINARY_FUN(fmax, max_, max_impl)
MAKE_BINARY_FUN(fmin, min_, min_impl)
/*--- std::max/min have issues (because they return by reference).
* fmin and fmax return by value and thus are fine, but they would force
* conversions to double, to avoid that we provide integer overloads.
* We use int32/64 instead of int/long to avoid issues with Windows,
* where long is 32 bits (instead of 64 bits). ---*/

#define MAKE_FMINMAX_OVERLOADS(TYPE) \
FORCEINLINE TYPE fmax(TYPE a, TYPE b) { return a<b? b : a; } \
FORCEINLINE TYPE fmin(TYPE a, TYPE b) { return a<b? a : b; }
MAKE_FMINMAX_OVERLOADS(int32_t)
MAKE_FMINMAX_OVERLOADS(int64_t)
MAKE_FMINMAX_OVERLOADS(uint32_t)
MAKE_FMINMAX_OVERLOADS(uint64_t)
/*--- Make the float and double versions of fmin/max available in this
* namespace to avoid ambiguous overloads. ---*/
using std::fmax;
using std::fmin;
#undef MAKE_FMINMAX_OVERLOADS

MAKE_BINARY_FUN(fmax, max_, fmax)
MAKE_BINARY_FUN(fmin, min_, fmin)
MAKE_BINARY_FUN(pow, pow_, math::pow)
#undef max_impl
#undef min_impl

/*--- sts::plus and co. were tried, the code was horrendous (due to the forced
* conversion between different types) and creating functions for these ops
Expand All @@ -191,20 +199,25 @@ MAKE_BINARY_FUN(operator/, div_, div_impl)
#undef mul_impl
#undef div_impl

/*--- Relational operators need to be cast to the scalar type to allow vectorization. ---*/

#define le_impl(a,b) Scalar(a<=b)
#define ge_impl(a,b) Scalar(a>=b)
#define eq_impl(a,b) Scalar(a==b)
#define ne_impl(a,b) Scalar(a!=b)
#define lt_impl(a,b) Scalar(a<b)
#define gt_impl(a,b) Scalar(a>b)
/*--- Relational operators need to be cast to the scalar type to allow vectorization.
* TO_PASSIVE is used to convert active scalars to passive, which CoDi will then capture
* by value in its expressions, and thus dangling references are avoided. No AD info
* is lost since these operators are non-differentiable. ---*/

#define TO_PASSIVE(IMPL) SU2_TYPE::Passive<Scalar>::Value(IMPL)
#define le_impl(a,b) TO_PASSIVE(a<=b)
#define ge_impl(a,b) TO_PASSIVE(a>=b)
#define eq_impl(a,b) TO_PASSIVE(a==b)
#define ne_impl(a,b) TO_PASSIVE(a!=b)
#define lt_impl(a,b) TO_PASSIVE(a<b)
#define gt_impl(a,b) TO_PASSIVE(a>b)
MAKE_BINARY_FUN(operator<=, le_, le_impl)
MAKE_BINARY_FUN(operator>=, ge_, ge_impl)
MAKE_BINARY_FUN(operator==, eq_, eq_impl)
MAKE_BINARY_FUN(operator!=, ne_, ne_impl)
MAKE_BINARY_FUN(operator<, lt_, lt_impl)
MAKE_BINARY_FUN(operator>, gt_, gt_impl)
#undef TO_PASSIVE
#undef le_impl
#undef ge_impl
#undef eq_impl
Expand Down
136 changes: 61 additions & 75 deletions Common/src/grid_movement/CFreeFormDefBox.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,19 +102,34 @@ CFreeFormDefBox::CFreeFormDefBox(const unsigned short Degree[], unsigned short B
CFreeFormDefBox::~CFreeFormDefBox(void) {
unsigned short iOrder, jOrder, kOrder, iCornerPoints, iDim;

for (iOrder = 0; iOrder < lOrder; iOrder++)
for (jOrder = 0; jOrder < mOrder; jOrder++)
for (iOrder = 0; iOrder < lOrder; iOrder++) {
for (jOrder = 0; jOrder < mOrder; jOrder++) {
for (kOrder = 0; kOrder < nOrder; kOrder++) {
delete [] Coord_Control_Points[iOrder][jOrder][kOrder];
delete [] ParCoord_Control_Points[iOrder][jOrder][kOrder];
delete [] Coord_Control_Points_Copy[iOrder][jOrder][kOrder];
if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder][jOrder][kOrder];
}
delete [] Coord_Control_Points[iOrder][jOrder];
delete [] ParCoord_Control_Points[iOrder][jOrder];
delete [] Coord_Control_Points_Copy[iOrder][jOrder];
if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder][jOrder];
}
delete [] Coord_Control_Points[iOrder];
delete [] ParCoord_Control_Points[iOrder];
delete [] Coord_Control_Points_Copy[iOrder];
if (Coord_SupportCP != nullptr) delete [] Coord_SupportCP[iOrder];
}

delete [] Coord_Control_Points;
delete [] ParCoord_Control_Points;
delete [] Coord_Control_Points_Copy;
delete [] Coord_SupportCP;

delete [] ParamCoord;
delete [] ParamCoord_;
delete [] cart_coord;
delete [] cart_coord_;
delete [] Gradient;

for (iCornerPoints = 0; iCornerPoints < nCornerPoints; iCornerPoints++)
Expand Down Expand Up @@ -969,26 +984,37 @@ su2double *CFreeFormDefBox::GetParametricCoord_Iterative(unsigned long iPoint, s
}

bool CFreeFormDefBox::GetPointFFD(CGeometry *geometry, CConfig *config, unsigned long iPoint) const {
su2double Coord[3] = {0.0, 0.0, 0.0};
unsigned short iVar, jVar, iDim;
su2double X_0, Y_0, Z_0, Xbar, Ybar, Zbar;

bool Inside = false;
bool Inside = true;
bool cylindrical = (config->GetFFD_CoordSystem() == CYLINDRICAL);
bool spherical = (config->GetFFD_CoordSystem() == SPHERICAL);
bool polar = (config->GetFFD_CoordSystem() == POLAR);

unsigned short Index[5][7] = {
{0, 1, 2, 5, 0, 1, 2},
{0, 2, 7, 5, 0, 2, 7},
{0, 2, 3, 7, 0, 2, 3},
{0, 5, 7, 4, 0, 5, 7},
{2, 7, 5, 6, 2, 7, 5}};
/*--- indices of the FFD box. Note that the front face is labelled 0,1,2,3 and the back face is 4,5,6,7 ---*/

unsigned short Index[6][5] = {
{0,1,2,3,0}, // front side
{1,5,6,2,1}, // right side
{2,6,7,3,2}, // top side
{3,7,4,0,3}, // left side
{4,5,1,0,4}, // bottom side
{4,7,6,5,4}}; // back side

/*--- The current approach is to subdivide each of the 6 faces of the hexahedral FFD box into 4 triangles
by defining a supporting middle point. This allows nonplanar FFD boxes.
Note that the definition of the FFD box is as follows: the FFD box is a 6-sided die and we are looking at the side "1".
The opposite side is side "6".
If we are looking at side "1", we define the nodes counterclockwise.
If we are looking at side "6", we define the face clockwise ---*/

unsigned short nDim = geometry->GetnDim();

for (iDim = 0; iDim < nDim; iDim++)
su2double Coord[3] = {0.0, 0.0, 0.0};
for (unsigned short iDim = 0; iDim < nDim; iDim++)
Coord[iDim] = geometry->nodes->GetCoord(iPoint, iDim);

su2double X_0, Y_0, Z_0, Xbar, Ybar, Zbar;

if (cylindrical) {

X_0 = config->GetFFD_Axis(0); Y_0 = config->GetFFD_Axis(1); Z_0 = config->GetFFD_Axis(2);
Expand All @@ -1013,78 +1039,38 @@ bool CFreeFormDefBox::GetPointFFD(CGeometry *geometry, CConfig *config, unsigned

}

/*--- 1st tetrahedron {V0, V1, V2, V5}
2nd tetrahedron {V0, V2, V7, V5}
3th tetrahedron {V0, V2, V3, V7}
4th tetrahedron {V0, V5, V7, V4}
5th tetrahedron {V2, V7, V5, V6} ---*/
/*--- loop over the faces of the FFD box ---*/

for (unsigned short iVar = 0; iVar < 6; iVar++) {

su2double P[3] = {0.0, 0.0, 0.0};

/*--- every face needs an interpolated middle point for the triangles ---*/

for (int p = 0; p < 4; p++){
P[0] += 0.25*Coord_Corner_Points[Index[iVar][p]][0];
P[1] += 0.25*Coord_Corner_Points[Index[iVar][p]][1];
P[2] += 0.25*Coord_Corner_Points[Index[iVar][p]][2];
}

/*--- loop over the 4 triangles making up the FFD box. The sign is equal for all distances ---*/

for (iVar = 0; iVar < 5; iVar++) {
Inside = true;
for (jVar = 0; jVar < 4; jVar++) {
for (unsigned short jVar = 0; jVar < 4; jVar++) {
su2double Distance_Point = geometry->Point2Plane_Distance(Coord,
Coord_Corner_Points[Index[iVar][jVar]],
Coord_Corner_Points[Index[iVar][jVar+1]],
Coord_Corner_Points[Index[iVar][jVar+2]],
Coord_Corner_Points[Index[iVar][jVar+3]]);

su2double Distance_Vertex = geometry->Point2Plane_Distance(Coord_Corner_Points[Index[iVar][jVar]],
Coord_Corner_Points[Index[iVar][jVar+1]],
Coord_Corner_Points[Index[iVar][jVar+2]],
Coord_Corner_Points[Index[iVar][jVar+3]]);
if (Distance_Point*Distance_Vertex < 0.0) Inside = false;
P);
if (Distance_Point < 0) {
Inside = false;
return Inside;
}
}
if (Inside) break;
}

return Inside;

}

void CFreeFormDefBox::SetDeformationZone(CGeometry *geometry, CConfig *config, unsigned short iFFDBox) const {
su2double *Coord;
unsigned short iMarker, iVar, jVar;
unsigned long iVertex, iPoint;
bool Inside = false;

unsigned short Index[5][7] = {
{0, 1, 2, 5, 0, 1, 2},
{0, 2, 7, 5, 0, 2, 7},
{0, 2, 3, 7, 0, 2, 3},
{0, 5, 7, 4, 0, 5, 7},
{2, 7, 5, 6, 2, 7, 5}};

for (iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
if (config->GetMarker_All_DV(iMarker) == YES) {
for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
iPoint = geometry->vertex[iMarker][iVertex]->GetNode();

Coord = geometry->nodes->GetCoord(iPoint);

/*--- 1st tetrahedron {V0, V1, V2, V5}
2nd tetrahedron {V0, V2, V7, V5}
3th tetrahedron {V0, V2, V3, V7}
4th tetrahedron {V0, V5, V7, V4}
5th tetrahedron {V2, V7, V5, V6} ---*/

for (iVar = 0; iVar < 5; iVar++) {
Inside = true;
for (jVar = 0; jVar < 4; jVar++) {
su2double Distance_Point = geometry->Point2Plane_Distance(Coord,
Coord_Corner_Points[Index[iVar][jVar+1]],
Coord_Corner_Points[Index[iVar][jVar+2]],
Coord_Corner_Points[Index[iVar][jVar+3]]);
su2double Distance_Vertex = geometry->Point2Plane_Distance(Coord_Corner_Points[Index[iVar][jVar]],
Coord_Corner_Points[Index[iVar][jVar+1]],
Coord_Corner_Points[Index[iVar][jVar+2]],
Coord_Corner_Points[Index[iVar][jVar+3]]);
if (Distance_Point*Distance_Vertex < 0.0) Inside = false;
}
if (Inside) break;
}
}
}
}
}

su2double CFreeFormDefBox::GetDerivative1(su2double *uvw, unsigned short val_diff, unsigned short *ijk, unsigned short *lmn) const {

Expand Down
6 changes: 2 additions & 4 deletions SU2_CFD/include/numerics_simd/flow/convection/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,8 @@ FORCEINLINE CPair<ReconVarType> reconstructPrimitives(Int iEdge, Int iPoint, Int
break;
}
/*--- Detect a non-physical reconstruction based on negative pressure or density. ---*/
/*--- Some weird issues with forward AD if this is all done in one line. ---*/
const Double neg_p = fmin(V.i.pressure(), V.j.pressure()) < 0.0;
const Double neg_rho = fmin(V.i.density(), V.j.density()) < 0.0;
const Double neg_p_or_rho = fmax(neg_p, neg_rho);
const Double neg_p_or_rho = fmax(fmin(V.i.pressure(), V.j.pressure()) < 0.0,
fmin(V.i.density(), V.j.density()) < 0.0);
/*--- Test the sign of the Roe-averaged speed of sound. ---*/
const Double R = sqrt(abs(V.j.density() / V.i.density()));
/*--- Delay dividing by R+1 until comparing enthalpy and velocity magnitude. ---*/
Expand Down
1 change: 0 additions & 1 deletion TestCases/disc_adj_rans/naca0012/turb_NACA0012_sst.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ MG_DAMP_PROLONGATION= 0.75
% Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
% TURKEL_PREC, MSW)
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
%
% Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
MUSCL_FLOW= YES
Expand Down
1 change: 0 additions & 1 deletion TestCases/navierstokes/flatplate/lam_flatplate.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ MG_DAMP_PROLONGATION= 0.75
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
%
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= NONE
TIME_DISCRE_FLOW= EULER_IMPLICIT
Expand Down
1 change: 0 additions & 1 deletion TestCases/navierstokes/flatplate/lam_flatplate_unst.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ LINEAR_SOLVER_ITER= 2
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
%
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= NONE

Expand Down
1 change: 0 additions & 1 deletion TestCases/navierstokes/periodic2D/config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ LINEAR_SOLVER_ITER= 4
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
%
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= VENKATAKRISHNAN_WANG
VENKAT_LIMITER_COEFF= 0.05
Expand Down
1 change: 0 additions & 1 deletion TestCases/py_wrapper/translating_NACA0012/config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ VENKAT_LIMITER_COEFF= 0.1

% SOLUTION ACCELERATION
%
USE_VECTORIZATION= YES
CFL_NUMBER= 1e3
CFL_ADAPT= NO
%
Expand Down
1 change: 0 additions & 1 deletion TestCases/rans/naca0012/turb_NACA0012_sa.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ MG_DAMP_PROLONGATION= 0.75
% Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
% TURKEL_PREC, MSW)
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
%
% Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
MUSCL_FLOW= YES
Expand Down
1 change: 0 additions & 1 deletion TestCases/rans/naca0012/turb_NACA0012_sst.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ MG_DAMP_PROLONGATION= 0.75
% Convective numerical method (JST, LAX-FRIEDRICH, CUSP, ROE, AUSM, HLLC,
% TURKEL_PREC, MSW)
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
%
% Spatial numerical order integration (1ST_ORDER, 2ND_ORDER, 2ND_ORDER_LIMITER)
MUSCL_FLOW= YES
Expand Down
1 change: 0 additions & 1 deletion TestCases/rans/naca0012/turb_NACA0012_sst_1994-KLm.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ LINEAR_SOLVER_ITER= 20
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
%
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= NONE
JST_SENSOR_COEFF= ( 0.5, 0.02 )
Expand Down
1 change: 0 additions & 1 deletion TestCases/rans/naca0012/turb_NACA0012_sst_2003-Vm.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ LINEAR_SOLVER_ITER= 20
% -------------------- FLOW NUMERICAL METHOD DEFINITION -----------------------%
%
CONV_NUM_METHOD_FLOW= ROE
USE_VECTORIZATION= YES
MUSCL_FLOW= YES
SLOPE_LIMITER_FLOW= NONE
JST_SENSOR_COEFF= ( 0.5, 0.02 )
Expand Down
Loading

0 comments on commit 0c1b07b

Please sign in to comment.