diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index cea9c098ee4..3d6c7308431 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP] + config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP, ReverseOMP, ForwardOMP] include: - config_set: BaseMPI flags: '-Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror' @@ -32,6 +32,10 @@ jobs: flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-tests=true --warnlevel=3 --werror' - config_set: BaseOMP flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' + - config_set: ReverseOMP + flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' + - config_set: ForwardOMP + flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' runs-on: ubuntu-latest steps: - name: Cache Object Files diff --git a/.gitmodules b/.gitmodules index f160f2e549e..ae2967618b2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -15,3 +15,6 @@ [submodule "subprojects/Mutationpp"] path = subprojects/Mutationpp url = https://github.com/mutationpp/Mutationpp.git +[submodule "externals/opdi"] + path = externals/opdi + url = https://github.com/SciCompKL/OpDiLib diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 0037465b0e4..620da3246f5 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -1,7 +1,7 @@ /*! * \file ad_structure.hpp * \brief Main routines for the algorithmic differentiation (AD) structure. - * \author T. Albring + * \author T. Albring, J. Blühdorn * \version 7.1.1 "Blackbird" * * SU2 Project Website: https://su2code.github.io @@ -27,7 +27,8 @@ #pragma once -#include "datatype_structure.hpp" +#include "../code_config.hpp" +#include "../parallelization/omp_structure.hpp" /*! * \namespace AD @@ -278,19 +279,23 @@ namespace AD{ extern int adjointVectorPosition; - /*--- Reference to the tape ---*/ - - extern su2double::TapeType& globalTape; - extern bool Status; extern bool PreaccActive; extern bool PreaccEnabled; - extern su2double::TapeType::Position StartPosition, EndPosition; +#ifdef HAVE_OPDI + using CoDiTapePosition = su2double::TapeType::Position; + using OpDiState = void*; + using TapePosition = std::pair; +#else + using TapePosition = su2double::TapeType::Position; +#endif + + extern TapePosition StartPosition, EndPosition; - extern std::vector TapePositions; + extern std::vector TapePositions; extern std::vector localInputValues; @@ -298,42 +303,68 @@ namespace AD{ extern codi::PreaccumulationHelper PreaccHelper; + /*--- Reference to the tape. ---*/ + + FORCEINLINE su2double::TapeType& getGlobalTape() { + return su2double::getGlobalTape(); + } + FORCEINLINE void RegisterInput(su2double &data, bool push_index = true) { - AD::globalTape.registerInput(data); + AD::getGlobalTape().registerInput(data); if (push_index) { inputValues.push_back(data.getGradientData()); } } - FORCEINLINE void RegisterOutput(su2double& data) {AD::globalTape.registerOutput(data);} + FORCEINLINE void RegisterOutput(su2double& data) {AD::getGlobalTape().registerOutput(data);} FORCEINLINE void ResetInput(su2double &data) {data.getGradientData() = su2double::GradientData();} - FORCEINLINE void StartRecording() {AD::globalTape.setActive();} + FORCEINLINE void StartRecording() {AD::getGlobalTape().setActive();} - FORCEINLINE void StopRecording() {AD::globalTape.setPassive();} + FORCEINLINE void StopRecording() {AD::getGlobalTape().setPassive();} - FORCEINLINE bool TapeActive() { return AD::globalTape.isActive(); } + FORCEINLINE bool TapeActive() { return AD::getGlobalTape().isActive(); } - FORCEINLINE void PrintStatistics() {AD::globalTape.printStatistics();} + FORCEINLINE void PrintStatistics() {AD::getGlobalTape().printStatistics();} - FORCEINLINE void ClearAdjoints() {AD::globalTape.clearAdjoints(); } + FORCEINLINE void ClearAdjoints() {AD::getGlobalTape().clearAdjoints(); } - FORCEINLINE void ComputeAdjoint() {AD::globalTape.evaluate(); adjointVectorPosition = 0;} + FORCEINLINE void ComputeAdjoint() { + #if defined(HAVE_OPDI) + opdi::logic->prepareEvaluate(); + #endif + AD::getGlobalTape().evaluate(); + adjointVectorPosition = 0; + } FORCEINLINE void ComputeAdjoint(unsigned short enter, unsigned short leave) { - AD::globalTape.evaluate(TapePositions[enter], TapePositions[leave]); + #if defined(HAVE_OPDI) + opdi::logic->recoverState(TapePositions[enter].second); + opdi::logic->prepareEvaluate(); + AD::getGlobalTape().evaluate(TapePositions[enter].first, TapePositions[leave].first); + #else + AD::getGlobalTape().evaluate(TapePositions[enter], TapePositions[leave]); + #endif if (leave == 0) adjointVectorPosition = 0; } FORCEINLINE void Reset() { - globalTape.reset(); + AD::getGlobalTape().reset(); + #if defined(HAVE_OPDI) + opdi::logic->reset(); + #endif if (inputValues.size() != 0) { adjointVectorPosition = 0; inputValues.clear(); } if (TapePositions.size() != 0) { + #if defined(HAVE_OPDI) + for (TapePosition& pos : TapePositions) { + opdi::logic->freeState(pos.second); + } + #endif TapePositions.clear(); } } @@ -343,11 +374,11 @@ namespace AD{ } FORCEINLINE void SetDerivative(int index, const double val) { - AD::globalTape.setGradient(index, val); + AD::getGlobalTape().setGradient(index, val); } FORCEINLINE double GetDerivative(int index) { - return AD::globalTape.getGradient(index); + return AD::getGlobalTape().getGradient(index); } /*--- Base case for parameter pack expansion. ---*/ @@ -361,6 +392,11 @@ namespace AD{ SetPreaccIn(moreData...); } + template::value> = 0> + FORCEINLINE void SetPreaccIn(T&& data, Ts&&... moreData) { + static_assert(!std::is_same::value, "rvalues cannot be registered"); + } + template FORCEINLINE void SetPreaccIn(const T& data, const int size) { if (PreaccActive) { @@ -384,20 +420,8 @@ namespace AD{ } } - template - FORCEINLINE void SetPreaccIn(const T& data, const int size_x, const int size_y, const int size_z) { - if (!PreaccActive) return; - for (int i = 0; i < size_x; i++) { - for (int j = 0; j < size_y; j++) { - for (int k = 0; k < size_z; k++) { - if (data[i][j][k].isActive()) PreaccHelper.addInput(data[i][j][k]); - } - } - } - } - FORCEINLINE void StartPreacc() { - if (globalTape.isActive() && PreaccEnabled) { + if (AD::getGlobalTape().isActive() && PreaccEnabled) { PreaccHelper.start(); PreaccActive = true; } @@ -438,7 +462,11 @@ namespace AD{ } FORCEINLINE void Push_TapePosition() { - TapePositions.push_back(AD::globalTape.getPosition()); + #if defined(HAVE_OPDI) + TapePositions.push_back({AD::getGlobalTape().getPosition(), opdi::logic->exportState()}); + #else + TapePositions.push_back(AD::getGlobalTape().getPosition()); + #endif } FORCEINLINE void EndPreacc(){ @@ -478,7 +506,7 @@ namespace AD{ } FORCEINLINE void SetExtFuncOut(su2double& data) { - if (globalTape.isActive()) { + if (AD::getGlobalTape().isActive()) { FuncHelper->addOutput(data); } } @@ -486,7 +514,7 @@ namespace AD{ template FORCEINLINE void SetExtFuncOut(T&& data, const int size) { for (int i = 0; i < size; i++) { - if (globalTape.isActive()) { + if (AD::getGlobalTape().isActive()) { FuncHelper->addOutput(data[i]); } } @@ -496,7 +524,7 @@ namespace AD{ FORCEINLINE void SetExtFuncOut(T&& data, const int size_x, const int size_y) { for (int i = 0; i < size_x; i++) { for (int j = 0; j < size_y; j++) { - if (globalTape.isActive()) { + if (AD::getGlobalTape().isActive()) { FuncHelper->addOutput(data[i][j]); } } @@ -511,7 +539,7 @@ namespace AD{ FORCEINLINE void EndExtFunc() { delete FuncHelper; } FORCEINLINE bool BeginPassive() { - if(AD::globalTape.isActive()) { + if(AD::getGlobalTape().isActive()) { StopRecording(); return true; } diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp index 2c4c2bfa885..039df331200 100644 --- a/Common/include/basic_types/datatype_structure.hpp +++ b/Common/include/basic_types/datatype_structure.hpp @@ -30,87 +30,10 @@ #include #include #include -#include - -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) -#define FORCEINLINE inline __attribute__((always_inline)) -#else -#define FORCEINLINE inline -#endif - -#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) -#define NEVERINLINE inline __attribute__((noinline)) -#else -#define NEVERINLINE inline -#endif - -#if defined(__INTEL_COMPILER) -/*--- Disable warnings related to inline attributes. ---*/ -#pragma warning disable 2196 -#pragma warning disable 3415 -/*--- Disable warnings related to overloaded virtual. ---*/ -#pragma warning disable 654 -#pragma warning disable 1125 -#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE) -#pragma warning disable 1875 -#endif -#endif - -/*--- Convenience SFINAE typedef to conditionally - * enable/disable function template overloads. ---*/ -template -using su2enable_if = typename std::enable_if::type; - -/*--- Depending on the datatype defined during the configuration, - * include the correct definition, and create the main typedef. ---*/ - -#if defined(CODI_REVERSE_TYPE) // reverse mode AD -#include "codi.hpp" -#include "codi/tools/dataStore.hpp" - -#ifndef CODI_INDEX_TAPE -#define CODI_INDEX_TAPE 0 -#endif -#ifndef CODI_PRIMAL_TAPE -#define CODI_PRIMAL_TAPE 0 -#endif -#ifndef CODI_PRIMAL_INDEX_TAPE -#define CODI_PRIMAL_INDEX_TAPE 0 -#endif - -#if CODI_INDEX_TAPE -using su2double = codi::RealReverseIndex; -#elif CODI_PRIMAL_TAPE -using su2double = codi::RealReversePrimal; -#elif CODI_PRIMAL_INDEX_TAPE -using su2double = codi::RealReversePrimalIndex; -#else -using su2double = codi::RealReverse; -#endif - -#elif defined(CODI_FORWARD_TYPE) // forward mode AD -#include "codi.hpp" -using su2double = codi::RealForward; - -#else // primal / direct / no AD -using su2double = double; -#endif +#include "../code_config.hpp" #include "ad_structure.hpp" -/*--- This type can be used for (rare) compatiblity cases or for - * computations that are intended to be (always) passive. ---*/ -using passivedouble = double; - -/*--- Define a type for potentially lower precision operations. ---*/ -#ifdef USE_MIXED_PRECISION -using su2mixedfloat = float; -#else -using su2mixedfloat = passivedouble; -#endif - /*! * \namespace SU2_TYPE * \brief Namespace for defining the datatype wrapper routines, this acts as a base @@ -174,11 +97,11 @@ namespace SU2_TYPE { #ifdef CODI_REVERSE_TYPE FORCEINLINE passivedouble GetSecondary(const su2double& data) { - return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]); + return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]); } FORCEINLINE passivedouble GetDerivative(const su2double& data) { - return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]); + return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]); } #else // forward FORCEINLINE passivedouble GetSecondary(const su2double& data) {return data.getGradient();} diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp new file mode 100644 index 00000000000..377432ee945 --- /dev/null +++ b/Common/include/code_config.hpp @@ -0,0 +1,124 @@ +/*! + * \file code_config.hpp + * \brief Header file for collecting common macros, definitions and type configurations. + * \author T. Albring, P. Gomes, J. Blühdorn + * \version 7.1.1 "Blackbird" + * + * SU2 Project Website: https://su2code.github.io + * + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) + * + * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) + * + * SU2 is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * SU2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with SU2. If not, see . + */ + +#pragma once + +#include + +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +#define FORCEINLINE inline __attribute__((always_inline)) +#else +#define FORCEINLINE inline +#endif + +#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +#define NEVERINLINE inline __attribute__((noinline)) +#else +#define NEVERINLINE inline +#endif + +#if defined(__INTEL_COMPILER) +/*--- Disable warnings related to inline attributes. ---*/ +#pragma warning disable 2196 +#pragma warning disable 3415 +/*--- Disable warnings related to overloaded virtual. ---*/ +#pragma warning disable 654 +#pragma warning disable 1125 +#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE) +#pragma warning disable 1875 +#endif +#endif + +/*--- Convenience SFINAE typedef to conditionally + * enable/disable function template overloads. ---*/ +template +using su2enable_if = typename std::enable_if::type; + +/*--- Detect compilation with OpenMP. ---*/ +#if defined(_OPENMP) +#define HAVE_OMP +#endif + +/*--- Depending on the datatype defined during the configuration, + * include the correct definition, and create the main typedef. ---*/ + +#if defined(CODI_REVERSE_TYPE) // reverse mode AD +#include "codi.hpp" +#include "codi/tools/dataStore.hpp" + +#ifndef CODI_INDEX_TAPE +#define CODI_INDEX_TAPE 0 +#endif +#ifndef CODI_PRIMAL_TAPE +#define CODI_PRIMAL_TAPE 0 +#endif +#ifndef CODI_PRIMAL_INDEX_TAPE +#define CODI_PRIMAL_INDEX_TAPE 0 +#endif + +#if defined(HAVE_OMP) +using su2double = codi::RealReverseIndexParallel; +#else +#if CODI_INDEX_TAPE +using su2double = codi::RealReverseIndex; +#elif CODI_PRIMAL_TAPE +using su2double = codi::RealReversePrimal; +#elif CODI_PRIMAL_INDEX_TAPE +using su2double = codi::RealReversePrimalIndex; +#else +using su2double = codi::RealReverse; +#endif +#endif +#elif defined(CODI_FORWARD_TYPE) // forward mode AD +#include "codi.hpp" +using su2double = codi::RealForward; + +#else // primal / direct / no AD +using su2double = double; +#endif + +/*--- This type can be used for (rare) compatiblity cases or for + * computations that are intended to be (always) passive. ---*/ +using passivedouble = double; + +/*--- Define a type for potentially lower precision operations. ---*/ +#ifdef USE_MIXED_PRECISION +using su2mixedfloat = float; +#else +using su2mixedfloat = passivedouble; +#endif + +/*--- Detect if OpDiLib has to be used. ---*/ +#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) +#define HAVE_OPDI +#endif + +#if (_OPENMP >= 201811 && !defined(FORCE_OPDI_MACRO_BACKEND)) || defined(FORCE_OPDI_OMPT_BACKEND) +#define HAVE_OMPT +#endif diff --git a/Common/include/containers/C2DContainer.hpp b/Common/include/containers/C2DContainer.hpp index 963cddc99fe..c2d08269294 100644 --- a/Common/include/containers/C2DContainer.hpp +++ b/Common/include/containers/C2DContainer.hpp @@ -77,12 +77,17 @@ class AccessorImpl * Static size specializations use this do-nothing allocation macro. */ #define DUMMY_ALLOCATOR \ - void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {} + void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {}\ + void m_destroy() noexcept {} + /*! * Dynamic size specializations use this one, EXTRA is used to set some * runtime internal value that depend on the number of rows/columns. * What values need setting depends on the specialization as not all have * members for e.g. number of rows and cols (static size optimization). + * Because aligned allocation is used, "placement new" is used after to + * default construct the elements of non-trivial type. Such types also + * need to be destructed explicitly before freeing the memory. */ #define REAL_ALLOCATOR(EXTRA) \ static_assert(MemoryAllocation::is_power_of_two(AlignSize), \ @@ -91,6 +96,14 @@ class AccessorImpl void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept { \ EXTRA; \ m_data = MemoryAllocation::aligned_alloc(AlignSize,sz); \ + if (!std::is_trivial::value) \ + for (size_t i = 0; i < size(); ++i) new (m_data+i) Scalar_t(); \ + } \ + \ + void m_destroy() noexcept { \ + if (!std::is_trivial::value) \ + for (size_t i = 0; i < size(); ++i) m_data[i].~Scalar_t(); \ + MemoryAllocation::aligned_free(m_data); \ } DUMMY_ALLOCATOR @@ -114,15 +127,13 @@ class AccessorImpl \ AccessorImpl& operator= (AccessorImpl&& other) noexcept \ { \ - MemoryAllocation::aligned_free(m_data); \ + m_destroy(); \ MOVE; m_data=other.m_data; other.m_data=nullptr; \ return *this; \ } \ \ - ~AccessorImpl() noexcept \ - { \ - MemoryAllocation::aligned_free(m_data); \ - } + ~AccessorImpl() noexcept {m_destroy();} + /*! * Shorthand for when specialization has only one more member than m_data. */ @@ -380,6 +391,7 @@ class C2DContainer : using Base = container_helpers::AccessorImpl; using Base::m_data; using Base::m_allocate; + using Base::m_destroy; public: using Base::size; using Base::rows; @@ -473,7 +485,7 @@ class C2DContainer : if(rows==this->rows() && cols==this->cols()) return reqSize; - MemoryAllocation::aligned_free(m_data); + m_destroy(); /*--- request actual allocation to base class as it needs specialization ---*/ size_t bytes = reqSize*sizeof(Scalar_t); diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp index 583ffab12fd..07dc11447d9 100644 --- a/Common/include/geometry/CGeometry.hpp +++ b/Common/include/geometry/CGeometry.hpp @@ -1242,13 +1242,7 @@ class CGeometry { * \brief Register the coordinates of the mesh nodes. * \param[in] config */ - void RegisterCoordinates(CConfig *config) const; - - /*! - * \brief Register the coordinates of the mesh nodes as output. - * \param[in] config - */ - void RegisterOutput_Coordinates(CConfig *config) const; + void RegisterCoordinates(const CConfig *config) const; /*! * \brief Update the multi-grid structure and the wall-distance. diff --git a/Common/include/geometry/dual_grid/CPoint.hpp b/Common/include/geometry/dual_grid/CPoint.hpp index 86ac53d4936..9db963524ad 100644 --- a/Common/include/geometry/dual_grid/CPoint.hpp +++ b/Common/include/geometry/dual_grid/CPoint.hpp @@ -423,7 +423,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Value of the distance to the nearest wall. */ - inline su2double GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); } + inline su2double& GetWall_Distance(unsigned long iPoint) { return Wall_Distance(iPoint); } + inline const su2double& GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); } /*! * \brief Set the value of the distance to the nearest wall. @@ -451,7 +452,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Value of the distance to the nearest wall. */ - inline su2double GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); } + inline su2double& GetSharpEdge_Distance(unsigned long iPoint) { return SharpEdge_Distance(iPoint); } + inline const su2double& GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); } /*! * \brief Set the value of the curvature at a surface node. @@ -486,7 +488,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Area or volume of the control volume. */ - inline su2double GetVolume(unsigned long iPoint) const { return Volume(iPoint); } + inline su2double& GetVolume(unsigned long iPoint) { return Volume(iPoint); } + inline const su2double& GetVolume(unsigned long iPoint) const { return Volume(iPoint); } /*! * \brief Set the volume of the control volume. @@ -507,7 +510,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Periodic component of area or volume for a control volume on a periodic marker. */ - inline su2double GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); } + inline su2double& GetPeriodicVolume(unsigned long iPoint) { return Periodic_Volume(iPoint); } + inline const su2double& GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); } /*! * \brief Set the missing component of area or volume for a control volume on a periodic marker. diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index d5e8dfb1dce..c69643cefe1 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -221,6 +221,7 @@ class CSysSolve { LinSysRes_ptr = &LinSysRes; LinSysSol_ptr = &LinSysSol; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -242,6 +243,7 @@ class CSysSolve { LinSysRes_ptr = &LinSysRes_tmp; LinSysSol_ptr = &LinSysSol_tmp; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -258,6 +260,7 @@ class CSysSolve { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -277,6 +280,7 @@ class CSysSolve { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index f64e2873e0f..59ee1304fe3 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -45,12 +45,14 @@ */ #ifdef HAVE_OMP #ifdef HAVE_OMP_SIMD -#define CSYSVEC_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait) +#define CSYSVEC_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT) #else -#define CSYSVEC_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait) +#define CSYSVEC_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) #endif +#define END_CSYSVEC_PARFOR END_SU2_OMP_FOR #else #define CSYSVEC_PARFOR SU2_OMP_SIMD +#define END_CSYSVEC_PARFOR #endif /*! @@ -186,10 +188,12 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> SU2_OMP_MASTER Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false); + END_SU2_OMP_MASTER SU2_OMP_BARRIER CSYSVEC_PARFOR for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]); + END_CSYSVEC_PARFOR } /*! @@ -250,6 +254,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> CSysVector& operator=(const CSysVector& other) { CSYSVEC_PARFOR for (auto i = 0ul; i < nElm; ++i) vec_val[i] = other.vec_val[i]; + END_CSYSVEC_PARFOR return *this; } @@ -261,12 +266,14 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> CSysVector& operator OP(ScalarType val) { \ CSYSVEC_PARFOR \ for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP val; \ + END_CSYSVEC_PARFOR \ return *this; \ } \ template \ CSysVector& operator OP(const VecExpr::CVecExpr& expr) { \ CSYSVEC_PARFOR \ for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP expr.derived()[i]; \ + END_CSYSVEC_PARFOR \ return *this; \ } MAKE_COMPOUND(=) @@ -293,6 +300,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> SU2_OMP_BARRIER SU2_OMP_MASTER dotRes = 0.0; + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Local dot product for each thread. ---*/ @@ -302,6 +310,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> for (auto i = 0ul; i < nElmDomain; ++i) { sum += vec_val[i] * expr.derived()[i]; } + END_CSYSVEC_PARFOR /*--- Update shared variable with "our" partial sum. ---*/ atomicAdd(sum, dotRes); @@ -314,6 +323,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE; SelectMPIWrapper::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER #endif /*--- Make view of result consistent across threads. ---*/ SU2_OMP_BARRIER @@ -440,3 +450,4 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> }; #undef CSYSVEC_PARFOR +#undef END_CSYSVEC_PARFOR diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp new file mode 100644 index 00000000000..6432b6bb482 --- /dev/null +++ b/Common/include/parallelization/omp_structure.cpp @@ -0,0 +1,61 @@ +/*! + * \file omp_structure.cpp + * \brief Source file counterpart for omp_structure.hpp. + * \note Contains OpDiLib initialization, finalization and includes the OpDiLib source file. + * \author J. Blühdorn + * \version 7.1.1 "Blackbird" + * + * SU2 Project Website: https://su2code.github.io + * + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) + * + * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) + * + * SU2 is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * SU2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with SU2. If not, see . + */ + +#include "omp_structure.hpp" + +void omp_initialize() { +#ifdef HAVE_OPDI +#if !defined(HAVE_OMPT) + opdi::backend = new opdi::MacroBackend; + opdi::backend->init(); +#endif + opdi::logic = new opdi::OmpLogic; + opdi::logic->init(); + su2double::getGlobalTape().initialize(); + opdi::tool = new CoDiOpDiTool; + opdi::tool->init(); +#endif +} + +void omp_finalize() { +#ifdef HAVE_OPDI + opdi::tool->finalize(); + su2double::getGlobalTape().finalize(); + opdi::logic->finalize(); + opdi::backend->finalize(); + delete opdi::tool; + delete opdi::logic; +#if !defined(HAVE_OMPT) + delete opdi::backend; +#endif +#endif +} + +#ifdef HAVE_OPDI +#include "opdi.cpp" +#endif diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index 42a96d5428f..c96d7383f94 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -12,7 +12,7 @@ * e.g. SU2_OMP_PARALLEL. Exotic pragmas of limited portability should be * defined here with suitable fallback versions to limit the spread of * compiler tricks in other areas of the code. - * \author P. Gomes + * \author P. Gomes, J. Blühdorn * \version 7.1.1 "Blackbird" * * SU2 Project Website: https://su2code.github.io @@ -38,7 +38,9 @@ #pragma once -#include "../basic_types/datatype_structure.hpp" +#include + +#include "../code_config.hpp" #if defined(_MSC_VER) #define PRAGMIZE(X) __pragma(X) @@ -46,12 +48,19 @@ #define PRAGMIZE(X) _Pragma(#X) #endif -/*--- Detect compilation with OpenMP support, protect agaisnt - * using OpenMP with Reverse AD (not supported yet). ---*/ -#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE) -#define HAVE_OMP +#if defined(HAVE_OMP) #include +#if defined(HAVE_OPDI) +#if defined(HAVE_OMPT) +#include "opdi/backend/ompt/omptBackend.hpp" +#else +#include "opdi/backend/macro/macroBackend.hpp" +#endif +#include "codi/externals/codiOpdiTool.hpp" +#include "opdi.hpp" +#endif + /*--- The generic start of OpenMP constructs. ---*/ #define SU2_OMP(ARGS) PRAGMIZE(omp ARGS) @@ -106,6 +115,11 @@ inline void omp_destroy_lock(omp_lock_t*){} #endif // end OpenMP detection +/*--- Initialization and finalization ---*/ + +void omp_initialize(); +void omp_finalize(); + /*--- Detect SIMD support (version 4+, after Jul 2013). ---*/ #ifdef _OPENMP #if _OPENMP >= 201307 @@ -125,8 +139,11 @@ inline void omp_destroy_lock(omp_lock_t*){} /*--- Convenience macros (do not use excessive nesting). ---*/ -#define SU2_OMP_MASTER SU2_OMP(master) #define SU2_OMP_ATOMIC SU2_OMP(atomic) + +#ifndef HAVE_OPDI + +#define SU2_OMP_MASTER SU2_OMP(master) #define SU2_OMP_BARRIER SU2_OMP(barrier) #define SU2_OMP_CRITICAL SU2_OMP(critical) @@ -134,9 +151,40 @@ inline void omp_destroy_lock(omp_lock_t*){} #define SU2_OMP_PARALLEL_(ARGS) SU2_OMP(parallel ARGS) #define SU2_OMP_PARALLEL_ON(NTHREADS) SU2_OMP(parallel num_threads(NTHREADS)) +#define SU2_OMP_FOR_(ARGS) SU2_OMP(for ARGS) #define SU2_OMP_FOR_DYN(CHUNK) SU2_OMP(for schedule(dynamic,CHUNK)) #define SU2_OMP_FOR_STAT(CHUNK) SU2_OMP(for schedule(static,CHUNK)) +#define SU2_NOWAIT nowait + +#define END_SU2_OMP_MASTER +#define END_SU2_OMP_CRITICAL +#define END_SU2_OMP_PARALLEL +#define END_SU2_OMP_FOR + +#else + +#define SU2_OMP_MASTER OPDI_MASTER() +#define SU2_OMP_BARRIER OPDI_BARRIER() +#define SU2_OMP_CRITICAL OPDI_CRITICAL() + +#define SU2_OMP_PARALLEL OPDI_PARALLEL() +#define SU2_OMP_PARALLEL_(ARGS) OPDI_PARALLEL(ARGS) +#define SU2_OMP_PARALLEL_ON(NTHREADS) OPDI_PARALLEL(num_threads(NTHREADS)) + +#define SU2_OMP_FOR_(ARGS) OPDI_FOR(ARGS) +#define SU2_OMP_FOR_DYN(CHUNK) OPDI_FOR(schedule(dynamic,CHUNK)) +#define SU2_OMP_FOR_STAT(CHUNK) OPDI_FOR(schedule(static,CHUNK)) + +#define SU2_NOWAIT OPDI_NOWAIT + +#define END_SU2_OMP_MASTER OPDI_END_MASTER +#define END_SU2_OMP_CRITICAL OPDI_END_CRITICAL +#define END_SU2_OMP_PARALLEL OPDI_END_PARALLEL +#define END_SU2_OMP_FOR OPDI_END_FOR + +#endif + /*--- Convenience functions (e.g. to compute chunk sizes). ---*/ /*! @@ -184,6 +232,7 @@ void parallelCopy(size_t size, const T* src, U* dst) { SU2_OMP_FOR_STAT(2048) for(size_t i=0; i::value> = 0> inline void atomicAdd(T rhs, T& lhs) diff --git a/Common/include/toolboxes/CLinearPartitioner.hpp b/Common/include/toolboxes/CLinearPartitioner.hpp index 4a86acebf68..5e2a4d24dea 100644 --- a/Common/include/toolboxes/CLinearPartitioner.hpp +++ b/Common/include/toolboxes/CLinearPartitioner.hpp @@ -52,63 +52,68 @@ class CLinearPartitioner { vector cumulativeSizeBeforeRank; /*!< \brief Vector containing the cumulative size of all linear partitions before the current rank. */ public: + CLinearPartitioner() = default; /*! - * \brief Constructor of the CLinearPartitioner class. - * \param[in] val_global_count - global count to be linearly partitioned. - * \param[in] val_offset - offset from 0 for the first index on rank 0 (typically 0). - * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false). + * \brief Constructor of the CLinearPartitioner class, see Initialize. */ - CLinearPartitioner(unsigned long val_global_count, - unsigned long val_offset, - bool isDisjoint = false); + CLinearPartitioner(unsigned long global_count, + unsigned long offset, + bool isDisjoint = false) { + Initialize(global_count, offset, isDisjoint); + } /*! - * \brief Destructor of the CLinearPartitioner class. + * \brief Initialize the CLinearPartitioner class. + * \param[in] global_count - global count to be linearly partitioned. + * \param[in] offset - offset from 0 for the first index on rank 0 (typically 0). + * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false). */ - ~CLinearPartitioner(void); + void Initialize(unsigned long global_count, + unsigned long offset, + bool isDisjoint = false); /*! * \brief Get the rank that owns the index based on the linear partitioning. - * \param[in] val_index - Current index. + * \param[in] index - Current index. * \returns Owning rank for the current index based on linear partitioning. */ - unsigned long GetRankContainingIndex(unsigned long val_index); + unsigned long GetRankContainingIndex(unsigned long index) const; /*! * \brief Get the first index of the current rank's linear partition. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns First index of the current rank's linear partition. */ - inline unsigned long GetFirstIndexOnRank(int val_rank) { - return firstIndex[val_rank]; + inline unsigned long GetFirstIndexOnRank(int rank) const { + return firstIndex[rank]; } /*! * \brief Get the last index of the current rank's linear partition. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns Last index of the current rank's linear partition. */ - inline unsigned long GetLastIndexOnRank(int val_rank) { - return lastIndex[val_rank]; + inline unsigned long GetLastIndexOnRank(int rank) const { + return lastIndex[rank]; } /*! * \brief Get the total size of the current rank's linear partition. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns Size of the current rank's linear partition. */ - inline unsigned long GetSizeOnRank(int val_rank) { - return sizeOnRank[val_rank]; + inline unsigned long GetSizeOnRank(int rank) const { + return sizeOnRank[rank]; } /*! * \brief Get the cumulative size of all linear partitions before the current rank. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns Cumulative size of all linear partitions before the current rank. */ - inline unsigned long GetCumulativeSizeBeforeRank(int val_rank) { - return cumulativeSizeBeforeRank[val_rank]; + inline unsigned long GetCumulativeSizeBeforeRank(int rank) const { + return cumulativeSizeBeforeRank[rank]; } }; diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp index f513752ba26..2d5d3bb4409 100644 --- a/Common/include/toolboxes/allocation_toolbox.hpp +++ b/Common/include/toolboxes/allocation_toolbox.hpp @@ -36,6 +36,8 @@ #include #endif +#include + #include namespace MemoryAllocation @@ -55,9 +57,10 @@ inline constexpr size_t round_up(size_t multiple, size_t x) * \brief Aligned memory allocation compatible across platforms. * \param[in] alignment, in bytes, of the memory being allocated. * \param[in] size, also in bytes. + * \tparam ZeroInit, initialize memory to 0. * \return Pointer to memory, always use su2::aligned_free to deallocate. */ -template +template inline T* aligned_alloc(size_t alignment, size_t size) noexcept { assert(is_power_of_two(alignment)); @@ -78,6 +81,7 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept #else ptr = ::aligned_alloc(alignment, size); #endif + if (ZeroInit) memset(ptr, 0, size); return static_cast(ptr); } diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp index d170cce5ae3..9dba7b4d955 100644 --- a/Common/include/toolboxes/graph_toolbox.hpp +++ b/Common/include/toolboxes/graph_toolbox.hpp @@ -166,6 +166,7 @@ class CCompressedSparsePattern { SU2_OMP_PARALLEL_(for schedule(static,roundUpDiv(getOuterSize(),omp_get_max_threads()))) for(Index_t k = 0; k < getOuterSize(); ++k) m_diagPtr(k) = findInnerIdx(k,k); + END_SU2_OMP_PARALLEL } /*! @@ -184,6 +185,7 @@ class CCompressedSparsePattern { assert(m_innerIdxTransp(k) != m_innerIdx.size() && "The pattern is not symmetric."); } } + END_SU2_OMP_PARALLEL } /*! diff --git a/Common/lib/Makefile.am b/Common/lib/Makefile.am index 2e698b72336..1e7a8761c6f 100644 --- a/Common/lib/Makefile.am +++ b/Common/lib/Makefile.am @@ -10,7 +10,7 @@ # The SU2 Project is maintained by the SU2 Foundation # (http://su2foundation.org) # -# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -63,6 +63,7 @@ lib_sources = \ ../src/grid_movement/CVolumetricMovement.cpp \ ../src/grid_movement/CSurfaceMovement.cpp \ ../include/parallelization/mpi_structure.cpp \ + ../include/parallelization/omp_structure.cpp \ ../src/basic_types/ad_structure.cpp \ ../src/fem/fem_gauss_jacobi_quadrature.cpp \ ../src/geometry/CGeometry.cpp \ diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp index f9df51d521a..14061dfbb86 100644 --- a/Common/src/CConfig.cpp +++ b/Common/src/CConfig.cpp @@ -4425,7 +4425,11 @@ void CConfig::SetPostprocessing(unsigned short val_software, unsigned short val_ #if defined CODI_REVERSE_TYPE AD_Mode = YES; +#if defined HAVE_OMP + AD::PreaccEnabled = false; +#else AD::PreaccEnabled = AD_Preaccumulation; +#endif #else if (AD_Mode == YES) { diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp index 6739bbc1e73..18342e13a90 100644 --- a/Common/src/basic_types/ad_structure.cpp +++ b/Common/src/basic_types/ad_structure.cpp @@ -37,9 +37,8 @@ namespace AD { std::vector localInputValues; std::vector localOutputValues; - su2double::TapeType& globalTape = su2double::getGlobalTape(); - su2double::TapeType::Position StartPosition, EndPosition; - std::vector TapePositions; + TapePosition StartPosition, EndPosition; + std::vector TapePositions; bool PreaccActive = false; bool PreaccEnabled = true; diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp index d8fb07b0eed..caa11e9239a 100644 --- a/Common/src/geometry/CGeometry.cpp +++ b/Common/src/geometry/CGeometry.cpp @@ -400,7 +400,9 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) { delete [] bufS_P2PRecv; bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] (); - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } @@ -504,6 +506,7 @@ void CGeometry::PostP2PRecvs(CGeometry *geometry, } } + END_SU2_OMP_MASTER } @@ -601,6 +604,7 @@ void CGeometry::PostP2PSends(CGeometry *geometry, } } + END_SU2_OMP_MASTER } @@ -736,6 +740,7 @@ void CGeometry::InitiateComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR /*--- Launch the point-to-point MPI send for this message. ---*/ @@ -782,6 +787,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, SU2_OMP_MASTER SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -839,6 +845,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR } /*--- Verify that all non-blocking point-to-point sends have finished. @@ -848,6 +855,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, #ifdef HAVE_MPI SU2_OMP_MASTER SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER @@ -1226,7 +1234,9 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) { delete [] bufS_PeriodicRecv; bufS_PeriodicRecv = new unsigned short[nRecv] (); - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CGeometry::PostPeriodicRecvs(CGeometry *geometry, @@ -1283,6 +1293,7 @@ void CGeometry::PostPeriodicRecvs(CGeometry *geometry, } } + END_SU2_OMP_MASTER #endif @@ -1337,7 +1348,8 @@ void CGeometry::PostPeriodicSends(CGeometry *geometry, CURRENT_FUNCTION); break; } - } // end master + } + END_SU2_OMP_MASTER #else /*--- Copy my own rank's data into the recv buffer directly in serial. ---*/ @@ -2480,44 +2492,24 @@ void CGeometry::ComputeAirfoil_Section(su2double *Plane_P0, su2double *Plane_Nor } -void CGeometry::RegisterCoordinates(CConfig *config) const { - unsigned short iDim; - unsigned long iPoint; - bool input = true; - bool push_index = config->GetMultizone_Problem()? false : true; +void CGeometry::RegisterCoordinates(const CConfig *config) const { + const bool input = true; + const bool push_index = config->GetMultizone_Problem()? false : true; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (iDim = 0; iDim < nDim; iDim++) { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { + for (auto iDim = 0u; iDim < nDim; iDim++) { AD::RegisterInput(nodes->GetCoord(iPoint)[iDim], push_index); } if(!push_index) { nodes->SetIndex(iPoint, input); } } -} - -void CGeometry::RegisterOutput_Coordinates(CConfig *config) const{ - unsigned short iDim; - unsigned long iPoint; - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - if(config->GetMultizone_Problem()) { - for (iDim = 0; iDim < nDim; iDim++) { - AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]); - } - } - else { - for (iDim = 0; iDim < nDim; iDim++) { - AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]); - } - } - } + END_SU2_OMP_FOR } void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config) { - unsigned short iMesh; - geometry_container[MESH_0]->InitiateComms(geometry_container[MESH_0], config, COORDINATES); geometry_container[MESH_0]->CompleteComms(geometry_container[MESH_0], config, COORDINATES); if (config->GetDynamic_Grid()){ @@ -2529,7 +2521,7 @@ void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config) geometry_container[MESH_0]->SetBoundControlVolume(config, UPDATE); geometry_container[MESH_0]->SetMaxLength(config); - for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) { + for (unsigned short iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) { /*--- Update the control volume structures ---*/ geometry_container[iMesh]->SetControlVolume(config,geometry_container[iMesh-1], UPDATE); @@ -3159,6 +3151,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, cg_elem[nDim*iElem+iDim] = 0.0; vol_elem[iElem] = 0.0; } + END_SU2_OMP_FOR /*--- Populate ---*/ SU2_OMP_FOR_STAT(256) @@ -3168,6 +3161,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, cg_elem[nDim*iElem_global+iDim] = elem[iElem]->GetCG(iDim); vol_elem[iElem_global] = elem[iElem]->GetVolume(); } + END_SU2_OMP_FOR #ifdef HAVE_MPI /*--- Account for the duplication introduced by the halo elements and the @@ -3175,10 +3169,12 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElemGetGlobalIndex()] = 1; + END_SU2_OMP_FOR /*--- Share with all processors ---*/ SU2_OMP_MASTER @@ -3195,6 +3191,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm()); halo_detect.swap(char_buffer); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER SU2_OMP_FOR_STAT(256) @@ -3204,6 +3201,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, cg_elem[nDim*iElem+iDim] /= numRepeat; vol_elem[iElem] /= numRepeat; } + END_SU2_OMP_FOR #endif /*--- SECOND: Each processor performs the average for its elements. For each @@ -3223,11 +3221,13 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElemGetGlobalIndex()] = values[iElem]; + END_SU2_OMP_FOR #ifdef HAVE_MPI /*--- Share with all processors ---*/ @@ -3237,6 +3237,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); swap(buffer, work_values); delete [] buffer; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Account for duplication ---*/ @@ -3245,6 +3246,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, su2double numRepeat = halo_detect[iElem]; work_values[iElem] /= numRepeat; } + END_SU2_OMP_FOR #endif /*--- Filter ---*/ @@ -3308,9 +3310,11 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_MPI::Error("Unknown type of filter kernel",CURRENT_FUNCTION); } } + END_SU2_OMP_FOR } - } // end OpenMP parallel section + } + END_SU2_OMP_PARALLEL limited_searches /= kernels.size(); @@ -3342,13 +3346,16 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector &neighbour SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElemGetGlobalIndex(); nFaces_elem[iElem_global] = elem[iElem]->GetnFaces(); } + END_SU2_OMP_FOR } + END_SU2_OMP_PARALLEL #ifdef HAVE_MPI /*--- Share with all processors ---*/ { @@ -3378,6 +3385,7 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector &neighbour /*--- Initialize ---*/ SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElem &neighbour } } } + END_SU2_OMP_FOR } + END_SU2_OMP_PARALLEL #ifdef HAVE_MPI /*--- Share with all processors ---*/ { @@ -3523,6 +3533,7 @@ void CGeometry::SetElemVolume() if(nDim==2) elem[iElem]->SetVolume(element->ComputeArea()); else elem[iElem]->SetVolume(element->ComputeVolume()); } + END_SU2_OMP_FOR delete elements[0]; delete elements[1]; @@ -3531,7 +3542,8 @@ void CGeometry::SetElemVolume() delete elements[3]; } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CGeometry::SetRotationalVelocity(CConfig *config, bool print) { diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp index d15a4d073a8..c027b51fcf8 100644 --- a/Common/src/geometry/CMultiGridGeometry.cpp +++ b/Common/src/geometry/CMultiGridGeometry.cpp @@ -1142,7 +1142,9 @@ void CMultiGridGeometry::SetControlVolume(CConfig *config, CGeometry *fine_grid, } } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_grid, unsigned short action) { @@ -1184,7 +1186,9 @@ void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_ if (Area == 0.0) for (iDim = 0; iDim < nDim; iDim++) NormalFace[iDim] = EPS*EPS; } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CMultiGridGeometry::SetCoord(CGeometry *geometry) { @@ -1202,6 +1206,7 @@ void CMultiGridGeometry::SetCoord(CGeometry *geometry) { } nodes->SetCoord(Point_Coarse, Coordinates); } + END_SU2_OMP_FOR } void CMultiGridGeometry::SetMultiGridWallHeatFlux(CGeometry *geometry, unsigned short val_marker){ @@ -1320,6 +1325,7 @@ void CMultiGridGeometry::SetRestricted_GridVelocity(CGeometry *fine_mesh, CConfi for (unsigned short iDim = 0; iDim < nDim; iDim++) nodes->SetGridVel(Point_Coarse, iDim, Grid_Vel[iDim]); } + END_SU2_OMP_FOR } diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 9496a093054..688972ce1f1 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -4375,7 +4375,10 @@ void CPhysicalGeometry::Check_IntElem_Orientation(const CConfig *config) { } } - }} // end SU2_OMP_PARALLEL + } + END_SU2_OMP_FOR + } + END_SU2_OMP_PARALLEL auto reduce = [](unsigned long& val) { unsigned long tmp = val; @@ -4522,7 +4525,10 @@ void CPhysicalGeometry::Check_BoundElem_Orientation(const CConfig *config) { } } } - }} // end SU2_OMP_PARALLEL + END_SU2_OMP_FOR + } + } + END_SU2_OMP_PARALLEL auto reduce = [](unsigned long& val) { unsigned long tmp = val; @@ -4698,6 +4704,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() { } nodes->SetElems(elems); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Loop over all the points ---*/ @@ -4734,11 +4741,14 @@ void CPhysicalGeometry::SetPoint_Connectivity() { /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel. ---*/ nodes->SetnNeighbor(iPoint, points[iPoint].size()); } + END_SU2_OMP_FOR SU2_OMP_MASTER nodes->SetPoints(points); + END_SU2_OMP_MASTER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CPhysicalGeometry::SetRCM_Ordering(CConfig *config) { @@ -6635,6 +6645,7 @@ void CPhysicalGeometry::SetMaxLength(CConfig* config) { max_delta = GeometryToolbox::Distance(nDim, Coord_i, Coord_j); nodes->SetMaxLength(iPoint, max_delta); } + END_SU2_OMP_FOR InitiateComms(this, config, MAX_LENGTH); CompleteComms(this, config, MAX_LENGTH); @@ -7515,10 +7526,12 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) SU2_OMP_FOR_STAT(1024) for (auto iEdge = 0ul; iEdge < nEdge; iEdge++) edges->SetNormal(iEdge, ZeroArea); + END_SU2_OMP_FOR SU2_OMP_FOR_STAT(1024) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) nodes->SetVolume(iPoint, 0.0); + END_SU2_OMP_FOR } SU2_OMP_MASTER { /*--- The following is difficult to parallelize with threads. ---*/ @@ -7653,7 +7666,9 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) if (nDim == 3) cout <<"Volume of the computational grid: "<< DomainVolume <<"."<< endl; } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER /*--- Check if there is a normal with null area ---*/ SU2_OMP_FOR_STAT(1024) @@ -7662,6 +7677,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) su2double DefaultArea[MAXNDIM] = {EPS*EPS}; if (Area2 == 0.0) edges->SetNormal(iEdge, DefaultArea); } + END_SU2_OMP_FOR } void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned short action) { @@ -7673,6 +7689,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) for (unsigned long iVertex = 0; iVertex < nVertex[iMarker]; iVertex++) vertex[iMarker][iVertex]->SetZeroValues(); + END_SU2_OMP_FOR } /*--- Loop over all the boundary elements ---*/ @@ -7738,6 +7755,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh AD::EndPreacc(); } } + END_SU2_OMP_FOR /*--- Check if there is a normal with null area ---*/ @@ -7749,6 +7767,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh if (Area2 == 0.0) vertex[iMarker][iVertex]->SetNormal(DefaultArea); } } + END_SU2_OMP_FOR } void CPhysicalGeometry::VisualizeControlVolume(const CConfig *config) const { @@ -11030,9 +11049,10 @@ void CPhysicalGeometry::SetWallDistance(const CConfig *config, CADTElemClass *Wa nodes->SetRoughnessHeight(iPoint, localRoughness); } } + END_SU2_OMP_FOR } - // end SU2_OMP_PARALLEL + END_SU2_OMP_PARALLEL } void CPhysicalGeometry::SetGlobalMarkerRoughness(const CConfig* config) { diff --git a/Common/src/interface_interpolation/CIsoparametric.cpp b/Common/src/interface_interpolation/CIsoparametric.cpp index b1ad55af5aa..6cf27e002dd 100644 --- a/Common/src/interface_interpolation/CIsoparametric.cpp +++ b/Common/src/interface_interpolation/CIsoparametric.cpp @@ -253,13 +253,16 @@ void CIsoparametric::SetTransferCoeff(const CConfig* const* config) { } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { MaxDistance = max(MaxDistance, maxDist); ErrorCounter += errorCount; nGlobalVertexTarget += totalCount; } - } // end SU2_OMP_PARALLEL + END_SU2_OMP_CRITICAL + } + END_SU2_OMP_PARALLEL } // end nMarkerInt loop diff --git a/Common/src/interface_interpolation/CMirror.cpp b/Common/src/interface_interpolation/CMirror.cpp index 079bc917bbd..ba805a58c89 100644 --- a/Common/src/interface_interpolation/CMirror.cpp +++ b/Common/src/interface_interpolation/CMirror.cpp @@ -231,7 +231,8 @@ void CMirror::SetTransferCoeff(const CConfig* const* config) { } } - } // end target loop + } + END_SU2_OMP_PARALLEL /*--- Free the heap allocations. ---*/ for (auto ptr : GlobalIndex) if (ptr != sendGlobalIndex.data()) delete [] ptr; diff --git a/Common/src/interface_interpolation/CNearestNeighbor.cpp b/Common/src/interface_interpolation/CNearestNeighbor.cpp index 5d5b00c30c3..0c292068101 100644 --- a/Common/src/interface_interpolation/CNearestNeighbor.cpp +++ b/Common/src/interface_interpolation/CNearestNeighbor.cpp @@ -158,13 +158,16 @@ void CNearestNeighbor::SetTransferCoeff(const CConfig* const* config) { target_vertex.coefficient[iDonor] = donorInfo[iDonor].dist/denom; } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { totalTargetPoints += numTarget; AvgDistance += avgDist; MaxDistance = max(MaxDistance, maxDist); } - } // end SU2_OMP_PARALLEL + END_SU2_OMP_CRITICAL + } + END_SU2_OMP_PARALLEL delete[] Buffer_Send_Coord; delete[] Buffer_Send_GlobalPoint; diff --git a/Common/src/interface_interpolation/CRadialBasisFunction.cpp b/Common/src/interface_interpolation/CRadialBasisFunction.cpp index 2a58e3c107a..8beb3483ecf 100644 --- a/Common/src/interface_interpolation/CRadialBasisFunction.cpp +++ b/Common/src/interface_interpolation/CRadialBasisFunction.cpp @@ -218,6 +218,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) { keepPolynomialRowVec[iMarkerInt], CinvTrucVec[iMarkerInt]); } } + END_SU2_OMP_PARALLEL /*--- Final loop over interface markers to compute the interpolation coefficients. ---*/ @@ -381,7 +382,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) { } } } // end target vertex loop - + END_SU2_OMP_FOR SU2_OMP_CRITICAL { totalDonorPoints += totalDonors; @@ -390,7 +391,9 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) { AvgCorrection += sumCorr; MaxCorrection = max(MaxCorrection, maxCorr); } - } // end SU2_OMP_PARALLEL + END_SU2_OMP_CRITICAL + } + END_SU2_OMP_PARALLEL /*--- Free global data that will no longer be used. ---*/ donorCoord.resize(0,0); diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index fd574c9dd53..c8bd4164d32 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -94,12 +94,10 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi if(npoint == 0) return; if(matrix != nullptr) { - SU2_OMP_MASTER SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION); } if(nvar > MAXNVAR) { - SU2_OMP_MASTER SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION); } @@ -160,22 +158,17 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi } /*--- Allocate data. ---*/ -#define ALLOC_AND_INIT(ptr,num) {\ - ptr = MemoryAllocation::aligned_alloc(64,num*sizeof(ScalarType));\ - for(size_t k=0; k(64, num*sizeof(ScalarType)); + }; - ALLOC_AND_INIT(matrix, nnz*nVar*nEqn) + allocAndInit(matrix, nnz*nVar*nEqn); /*--- Preconditioners. ---*/ - if (ilu_needed) { - ALLOC_AND_INIT(ILU_matrix, nnz_ilu*nVar*nEqn) - } + if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu*nVar*nEqn); - if (diag_needed) { - ALLOC_AND_INIT(invM, nPointDomain*nVar*nEqn); - } -#undef ALLOC_AND_INIT + if (diag_needed) allocAndInit(invM, nPointDomain*nVar*nEqn); /*--- Thread parallel initialization. ---*/ @@ -293,6 +286,7 @@ void CSysMatrixComms::Initiate(const CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) bufDSend[buf_offset+iVar] = x(iPoint,iVar); } + END_SU2_OMP_FOR break; } @@ -330,6 +324,7 @@ void CSysMatrixComms::Initiate(const CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) bufDSend[buf_offset+iVar] = x(iPoint,iVar); } + END_SU2_OMP_FOR break; } @@ -372,6 +367,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, SU2_OMP_MASTER SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -411,6 +407,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) x(iPoint,iVar) = CSysMatrix::template ActiveAssign(bufDRecv[buf_offset+iVar]); } + END_SU2_OMP_FOR break; } @@ -450,6 +447,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) x(iPoint,iVar) += CSysMatrix::template ActiveAssign(bufDRecv[buf_offset+iVar]); } + END_SU2_OMP_FOR break; } @@ -466,6 +464,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, #ifdef HAVE_MPI SU2_OMP_MASTER SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER @@ -474,7 +473,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, template void CSysMatrix::SetValZero() { const auto size = nnz*nVar*nEqn; - const auto chunk = roundUpDiv(size,omp_get_max_threads()); + const auto chunk = roundUpDiv(size,omp_get_num_threads()); const auto begin = chunk * omp_get_thread_num(); const auto mySize = min(chunk, size-begin) * sizeof(ScalarType); memset(&matrix[begin], 0, mySize); @@ -487,6 +486,7 @@ void CSysMatrix::SetValDiagonalZero() { for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint) for (auto index = 0ul; index < nVar*nEqn; ++index) matrix[dia_ptr[iPoint]*nVar*nEqn + index] = 0.0; + END_SU2_OMP_FOR } template @@ -598,11 +598,9 @@ void CSysMatrix::MatrixVectorProduct(const CSysVector & /*--- Some checks for consistency between CSysMatrix and the CSysVectors ---*/ #ifndef NDEBUG if ((nEqn != vec.GetNVar()) || (nVar != prod.GetNVar())) { - SU2_OMP_MASTER SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION); } if (nPoint != prod.GetNBlk()) { - SU2_OMP_MASTER SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION); } #endif @@ -617,6 +615,7 @@ void CSysMatrix::MatrixVectorProduct(const CSysVector & for (auto row_i = 0ul; row_i < nPointDomain; row_i++) { RowProduct(vec, row_i, &prod[row_i*nVar]); } + END_SU2_OMP_FOR /*--- MPI Parallelization. ---*/ @@ -629,9 +628,10 @@ template void CSysMatrix::BuildJacobiPreconditioner() { /*--- Build Jacobi preconditioner (M = D), compute and store the inverses of the diagonal blocks. ---*/ - SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait) + SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar])); + END_SU2_OMP_FOR } @@ -644,6 +644,7 @@ void CSysMatrix::ComputeJacobiPreconditioner(const CSysVector::BuildILUPreconditioner() { SU2_OMP_FOR_STAT(omp_light_size) for (auto iVar = 0ul; iVar < nnz*nVar*nVar; ++iVar) ILU_matrix[iVar] = matrix[iVar]; + END_SU2_OMP_FOR } else { /*--- ILUn clear the ILU matrix first. ---*/ SU2_OMP_FOR_STAT(omp_light_size) for (auto iVar = 0ul; iVar < nnz_ilu*nVar*nVar; iVar++) ILU_matrix[iVar] = 0.0; + END_SU2_OMP_FOR /*--- ILUn, traverse matrix to access its blocks * sequentially and set them in the ILU matrix. ---*/ @@ -677,6 +680,7 @@ void CSysMatrix::BuildILUPreconditioner() { SetBlock_ILUMatrix(iPoint, jPoint, &matrix[index*nVar*nVar]); } } + END_SU2_OMP_FOR } /*--- Transform system in Upper Matrix ---*/ @@ -751,6 +755,7 @@ void CSysMatrix::BuildILUPreconditioner() { InverseDiagonalBlock_ILUMatrix(end-1, &invM[(end-1)*nVar*nVar]); } + END_SU2_OMP_FOR } @@ -804,6 +809,7 @@ void CSysMatrix::ComputeILUPreconditioner(const CSysVector::ComputeLU_SGSPreconditioner(const CSysVector::ComputeLU_SGSPreconditioner(const CSysVector::ComputeLineletPreconditioner(const CSysVector::ComputeLineletPreconditioner(const CSysVector::ComputeResidual(const CSysVector & sol, RowProduct(sol, iPoint, aux_vec); VectorSubtraction(aux_vec, &f[iPoint*nVar], &res[iPoint*nVar]); } + END_SU2_OMP_FOR } template @@ -1262,6 +1273,7 @@ void CSysMatrix::SetDiagonalAsColumnSum() { if (block_ji != block_ii) MatrixSubtraction(block_ii, block_ji, block_ii); } } + END_SU2_OMP_FOR } template @@ -1287,13 +1299,14 @@ void CSysMatrix::TransposeInPlace() { if (edge_ptr) { /*--- The FV way. ---*/ - SU2_OMP_FOR_DYN(omp_light_size/2) + SU2_OMP_FOR_DYN(omp_heavy_size*2) for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) { auto bij = &matrix[edge_ptr(iEdge,0)*nVar*nVar]; auto bji = &matrix[edge_ptr(iEdge,1)*nVar*nVar]; swapAndTransp(nVar, bij, bji); } + END_SU2_OMP_FOR } else if (col_ptr) { /*--- If the column pointer was built. ---*/ @@ -1306,6 +1319,7 @@ void CSysMatrix::TransposeInPlace() { swapAndTransp(nVar, bij, bji); } } + END_SU2_OMP_FOR } else { /*--- Slow fallback, needs to search for ji. ---*/ @@ -1320,6 +1334,7 @@ void CSysMatrix::TransposeInPlace() { swapAndTransp(nVar, bij, bji); } } + END_SU2_OMP_FOR } /*--- Transpose the diagonal blocks. ---*/ @@ -1331,9 +1346,12 @@ void CSysMatrix::TransposeInPlace() { for (auto j=0ul; j::MatrixMatrixAddition(ScalarType alpha, const CSysMa (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz); if (!ok) { - SU2_OMP_MASTER SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); } SU2_OMP_FOR_STAT(omp_light_size) for (auto i = 0ul; i < nnz*nVar*nEqn; ++i) matrix[i] += alpha*B.matrix[i]; + END_SU2_OMP_FOR } @@ -1366,9 +1384,9 @@ void CSysMatrix::BuildPastixPreconditioner(CGeometry *geometry, cons pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix); pastix_wrapper.Factorize(geometry, config, kind_fact); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER #else - SU2_OMP_MASTER SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION); #endif } @@ -1380,12 +1398,12 @@ void CSysMatrix::ComputePastixPreconditioner(const CSysVector::ModGramSchmidt(int i, su2matrix& Hsbg, if ((nrm <= 0.0) || (nrm != nrm)) { /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/ - SU2_OMP_MASTER SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION); } @@ -209,7 +208,6 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & /*--- Check the subspace size ---*/ if (m < 1) { - SU2_OMP_MASTER SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); } @@ -230,6 +228,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & cg_ready = true; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -350,12 +349,10 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector 5000) { - SU2_OMP_MASTER SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION); } @@ -371,6 +368,7 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector::RFGMRES_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVectorGetLinear_Solver_Smoother_Relaxation()); if (m < 1) { - SU2_OMP_MASTER SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); } @@ -730,6 +728,7 @@ unsigned long CSysSolve::Smoother_LinSolver(const CSysVector::Solve(CSysMatrix & Jacobian, co if (config->GetDiscrete_Adjoint()) { #ifdef CODI_REVERSE_TYPE - TapeActive = AD::globalTape.isActive(); - - AD::StartExtFunc(false, false); + TapeActive = AD::getGlobalTape().isActive(); - AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize()); + SU2_OMP_MASTER { + AD::StartExtFunc(false, false); + AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize()); + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER AD::StopRecording(); #endif @@ -920,6 +922,7 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co Residual = residual; Iterations = IterLinSol; } + END_SU2_OMP_MASTER HandleTemporariesOut(LinSysSol); @@ -933,22 +936,6 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co if (!mesh_deform) KindPrecond = config->GetKind_DiscAdj_Linear_Prec(); else KindPrecond = config->GetKind_Deform_Linear_Solver_Prec(); - /*--- Start recording if it was stopped for the linear solver ---*/ - - AD::StartRecording(); - - AD::SetExtFuncOut(&LinSysSol[0], (int)LinSysSol.GetLocSize()); - -#ifdef CODI_REVERSE_TYPE - AD::FuncHelper->addUserData(&LinSysRes); - AD::FuncHelper->addUserData(&LinSysSol); - AD::FuncHelper->addUserData(&Jacobian); - AD::FuncHelper->addUserData(geometry); - AD::FuncHelper->addUserData(config); - AD::FuncHelper->addUserData(this); - AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); -#endif - /*--- Build preconditioner for the transposed Jacobian ---*/ if (RequiresTranspose) Jacobian.TransposeInPlace(); @@ -972,7 +959,30 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co break; } + /*--- Start recording if it was stopped for the linear solver ---*/ +#ifdef CODI_REVERSE_TYPE + AD::StartRecording(); + SU2_OMP_BARRIER + + SU2_OMP_MASTER { + AD::SetExtFuncOut(&LinSysSol[0], LinSysSol.GetLocSize()); + AD::FuncHelper->addUserData(&LinSysRes); + AD::FuncHelper->addUserData(&LinSysSol); + AD::FuncHelper->addUserData(&Jacobian); + AD::FuncHelper->addUserData(geometry); + AD::FuncHelper->addUserData(config); + AD::FuncHelper->addUserData(this); + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER + + AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); + SU2_OMP_BARRIER + + SU2_OMP_MASTER AD::EndExtFunc(); + END_SU2_OMP_MASTER +#endif } return IterLinSol; @@ -1059,7 +1069,10 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, delete precond; + SU2_OMP_MASTER Iterations = IterLinSol; + END_SU2_OMP_MASTER + return IterLinSol; } diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp index 4804fdfb2ed..a2d737a502c 100644 --- a/Common/src/linear_algebra/CSysSolve_b.cpp +++ b/Common/src/linear_algebra/CSysSolve_b.cpp @@ -1,7 +1,7 @@ /*! * \file CSysSolve_b.cpp * \brief Routines for the linear solver used in the reverse sweep of AD. - * \author T. Albring + * \author T. Albring, J. Blühdorn * \version 7.1.1 "Blackbird" * * SU2 Project Website: https://su2code.github.io @@ -37,36 +37,40 @@ void CSysSolve_b::Solve_b(const codi::RealReverse::Real* x, codi::Re codi::DataStore* d) { CSysVector* LinSysRes_b = nullptr; - d->getData(LinSysRes_b); + d->getDataByIndex(LinSysRes_b, 0); CSysVector* LinSysSol_b = nullptr; - d->getData(LinSysSol_b); + d->getDataByIndex(LinSysSol_b, 1); CSysMatrix* Jacobian = nullptr; - d->getData(Jacobian); + d->getDataByIndex(Jacobian, 2); CGeometry* geometry = nullptr; - d->getData(geometry); + d->getDataByIndex(geometry, 3); const CConfig* config = nullptr; - d->getData(config); + d->getDataByIndex(config, 4); CSysSolve* solver = nullptr; - d->getData(solver); + d->getDataByIndex(solver, 5); /*--- Initialize the right-hand side with the gradient of the solution of the primal linear system ---*/ + SU2_OMP_BARRIER + SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads())) for (unsigned long i = 0; i < n; i++) { (*LinSysRes_b)[i] = y_b[i]; (*LinSysSol_b)[i] = 0.0; } + END_SU2_OMP_FOR solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config, false); + SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads())) for (unsigned long i = 0; i < n; i ++) { - x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i)); + x_b[i] = SU2_TYPE::GetValue((*LinSysSol_b)[i]); } - + END_SU2_OMP_FOR } template class CSysSolve_b; diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index 4477d8b3fe1..9cb66905fde 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -50,7 +50,7 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE); - if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, nElm * sizeof(ScalarType)); + if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, nElm*sizeof(ScalarType)); if (val != nullptr) { if (!valIsArray) { @@ -63,6 +63,8 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB template CSysVector::~CSysVector() { + if (!std::is_trivial::value) + for (auto i = 0ul; i < nElm; i++) vec_val[i].~ScalarType(); MemoryAllocation::aligned_free(vec_val); } diff --git a/Common/src/meson.build b/Common/src/meson.build index 5dcbb57c66f..b3e0726e70c 100644 --- a/Common/src/meson.build +++ b/Common/src/meson.build @@ -3,7 +3,8 @@ common_src =files(['graph_coloring_structure.cpp', 'CConfig.cpp', 'basic_types/ad_structure.cpp', 'wall_model.cpp', - '../include/parallelization/mpi_structure.cpp']) + '../include/parallelization/mpi_structure.cpp', + '../include/parallelization/omp_structure.cpp']) subdir('linear_algebra') subdir('toolboxes') diff --git a/Common/src/toolboxes/CLinearPartitioner.cpp b/Common/src/toolboxes/CLinearPartitioner.cpp index 6a45f4fb20f..16ac5373762 100644 --- a/Common/src/toolboxes/CLinearPartitioner.cpp +++ b/Common/src/toolboxes/CLinearPartitioner.cpp @@ -28,9 +28,9 @@ #include "../../include/toolboxes/CLinearPartitioner.hpp" -CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, - unsigned long val_offset, - bool isDisjoint) { +void CLinearPartitioner::Initialize(unsigned long global_count, + unsigned long offset, + bool isDisjoint) { /*--- Store MPI size ---*/ @@ -48,10 +48,10 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, balancing for any remainder points. ---*/ unsigned long quotient = 0; - if (val_global_count >= (unsigned long)size) - quotient = val_global_count/size; + if (global_count >= (unsigned long)size) + quotient = global_count/size; - int remainder = int(val_global_count%size); + int remainder = int(global_count%size); for (int ii = 0; ii < size; ii++) { sizeOnRank[ii] = quotient + int(ii < remainder); } @@ -63,7 +63,7 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, unsigned long adjust = 0; if (isDisjoint) adjust = 1; - firstIndex[0] = val_offset; + firstIndex[0] = offset; lastIndex[0] = firstIndex[0] + sizeOnRank[0] - adjust; cumulativeSizeBeforeRank[0] = 0; for (int iProc = 1; iProc < size; iProc++) { @@ -72,17 +72,15 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, cumulativeSizeBeforeRank[iProc] = (cumulativeSizeBeforeRank[iProc-1] + sizeOnRank[iProc-1]); } - cumulativeSizeBeforeRank[size] = val_global_count; + cumulativeSizeBeforeRank[size] = global_count; } -CLinearPartitioner::~CLinearPartitioner(void) { } - -unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index) { +unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long index) const { /*--- Initial guess ---*/ - unsigned long iProcessor = val_index/sizeOnRank[0]; + unsigned long iProcessor = index/sizeOnRank[0]; /*--- Guard against going over size. ---*/ @@ -91,11 +89,11 @@ unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index /*--- Move up or down until we find the processor. ---*/ - if (val_index >= cumulativeSizeBeforeRank[iProcessor]) - while(val_index >= cumulativeSizeBeforeRank[iProcessor+1]) + if (index >= cumulativeSizeBeforeRank[iProcessor]) + while(index >= cumulativeSizeBeforeRank[iProcessor+1]) iProcessor++; else - while(val_index < cumulativeSizeBeforeRank[iProcessor]) + while(index < cumulativeSizeBeforeRank[iProcessor]) iProcessor--; return iProcessor; diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py index cbf999f412e..1d4d187fb83 100755 --- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py +++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py @@ -6,22 +6,12 @@ # \author T. Economon # \version 7.1.1 "Blackbird" # -# The current SU2 release has been coordinated by the -# SU2 International Developers Society -# with selected contributions from the open-source community. +# SU2 Project Website: https://su2code.github.io # -# The main research teams contributing to the current release are: -# - Prof. Juan J. Alonso's group at Stanford University. -# - Prof. Piero Colonna's group at Delft University of Technology. -# - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology. -# - Prof. Alberto Guardone's group at Polytechnic University of Milan. -# - Prof. Rafael Palacios' group at Imperial College London. -# - Prof. Vincent Terrapon's group at the University of Liege. -# - Prof. Edwin van der Weide's group at the University of Twente. -# - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics. +# The SU2 Project is maintained by the SU2 Foundation +# (http://su2foundation.org) # -# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon, -# Tim Albring, and the SU2 contributors. +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py index 614c458c103..c38335336aa 100755 --- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py +++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py @@ -6,22 +6,12 @@ # \author T. Economon # \version 7.1.1 "Blackbird" # -# The current SU2 release has been coordinated by the -# SU2 International Developers Society -# with selected contributions from the open-source community. +# SU2 Project Website: https://su2code.github.io # -# The main research teams contributing to the current release are: -# - Prof. Juan J. Alonso's group at Stanford University. -# - Prof. Piero Colonna's group at Delft University of Technology. -# - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology. -# - Prof. Alberto Guardone's group at Polytechnic University of Milan. -# - Prof. Rafael Palacios' group at Imperial College London. -# - Prof. Vincent Terrapon's group at the University of Liege. -# - Prof. Edwin van der Weide's group at the University of Twente. -# - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics. +# The SU2 Project is maintained by the SU2 Foundation +# (http://su2foundation.org) # -# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon, -# Tim Albring, and the SU2 contributors. +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp index 9b69188b570..38934f8a2d9 100644 --- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp +++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp @@ -127,6 +127,7 @@ void computeGradientsGreenGauss(CSolver* solver, AD::EndPreacc(); } + END_SU2_OMP_FOR /*--- Add boundary fluxes. ---*/ @@ -160,6 +161,7 @@ void computeGradientsGreenGauss(CSolver* solver, gradient(iPoint, iVar, iDim) -= flux * area[iDim]; } } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp index 802de977dbf..dcd923901dc 100644 --- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp +++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp @@ -76,35 +76,35 @@ FORCEINLINE void solveLeastSquares(size_t iPoint, /*--- Entries of upper triangular matrix R. ---*/ + if (periodic) { + AD::StartPreacc(); + AD::SetPreaccIn(Rmatrix(iPoint,0,0)); + AD::SetPreaccIn(Rmatrix(iPoint,0,1)); + AD::SetPreaccIn(Rmatrix(iPoint,1,1)); + } + su2double r11 = Rmatrix(iPoint,0,0); su2double r12 = Rmatrix(iPoint,0,1); su2double r22 = Rmatrix(iPoint,1,1); su2double r13 = 0.0, r23 = 0.0, r33 = 1.0; - if (periodic) { - AD::StartPreacc(); - AD::SetPreaccIn(r11); - AD::SetPreaccIn(r12); - AD::SetPreaccIn(r22); - } - r11 = sqrt(max(r11, eps)); r12 /= r11; r22 = sqrt(max(r22 - r12*r12, eps)); if (nDim == 3) { + if (periodic) { + AD::SetPreaccIn(Rmatrix(iPoint,0,2)); + AD::SetPreaccIn(Rmatrix(iPoint,1,2)); + AD::SetPreaccIn(Rmatrix(iPoint,2,1)); + AD::SetPreaccIn(Rmatrix(iPoint,2,2)); + } + r13 = Rmatrix(iPoint,0,2); r33 = Rmatrix(iPoint,2,2); const auto r23_a = Rmatrix(iPoint,1,2); const auto r23_b = Rmatrix(iPoint,2,1); - if (periodic) { - AD::SetPreaccIn(r13); - AD::SetPreaccIn(r23_a); - AD::SetPreaccIn(r23_b); - AD::SetPreaccIn(r33); - } - r13 /= r11; r23 = r23_a/r22 - r23_b*r12/(r11*r22); r33 = sqrt(max(r33 - r23*r23 - r13*r13, eps)); @@ -284,6 +284,7 @@ void computeGradientsLeastSquares(CSolver* solver, solveLeastSquares(iPoint, varBegin, varEnd, Rmatrix, gradient); } } + END_SU2_OMP_FOR /*--- Correct the gradient values across any periodic boundaries. ---*/ @@ -300,6 +301,7 @@ void computeGradientsLeastSquares(CSolver* solver, SU2_OMP_FOR_DYN(chunkSize) for (size_t iPoint = 0; iPoint < nPointDomain; ++iPoint) solveLeastSquares(iPoint, varBegin, varEnd, Rmatrix, gradient); + END_SU2_OMP_FOR } /*--- If no solver was provided we do not communicate ---*/ diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp index 8d9b4bfc3b6..1149e704fb6 100644 --- a/SU2_CFD/include/integration/CNewtonIntegration.hpp +++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp @@ -33,12 +33,14 @@ #ifdef HAVE_OMP #ifdef HAVE_OMP_SIMD -#define CNEWTON_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait) +#define CNEWTON_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT) #else -#define CNEWTON_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait) +#define CNEWTON_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) #endif +#define END_CNEWTON_PARFOR END_SU2_OMP_FOR #else #define CNEWTON_PARFOR SU2_OMP_SIMD +#define END_CNEWTON_PARFOR #endif /*! @@ -114,6 +116,7 @@ class CNewtonIntegration final : public CIntegration { inline void SetSolutionResult(CSysVector& x) const { CNEWTON_PARFOR for (auto i = 0ul; i < x.GetLocSize(); ++i) x[i] = LinSysSol[i]; + END_CNEWTON_PARFOR } /*--- Preconditioner objects for each active solver. ---*/ @@ -127,11 +130,13 @@ class CNewtonIntegration final : public CIntegration { unsigned long iters, Scalar& eps) const { CNEWTON_PARFOR for (auto i = 0ul; i < u.GetLocSize(); ++i) precondIn[i] = u[i]; + END_CNEWTON_PARFOR iters = Preconditioner_impl(precondIn, precondOut, iters, eps); CNEWTON_PARFOR for (auto i = 0ul; i < u.GetLocSize(); ++i) v[i] = precondOut[i]; + END_CNEWTON_PARFOR SU2_OMP_BARRIER return iters; @@ -212,3 +217,4 @@ class CNewtonIntegration final : public CIntegration { }; #undef CNEWTON_PARFOR +#undef END_CNEWTON_PARFOR diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp index 82b2485ffef..083222664b8 100644 --- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp @@ -37,11 +37,23 @@ class CFEAIteration; * \brief Class for driving an iteration of the discrete adjoint FEM system. * \author R. Sanchez */ -class CDiscAdjFEAIteration : public CIteration { +class CDiscAdjFEAIteration final : public CIteration { private: CFEAIteration* fem_iteration; /*!< \brief Pointer to the primal iteration class. */ unsigned short CurrentRecording; /*!< \brief Stores the current status of the recording. */ + /*! + * \brief load solution for dynamic problems + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] solver - Container vector with all the solutions. + * \param[in] config - Definition of the particular problem. + * \param[in] val_iZone - Index of the zone. + * \param[in] val_iInst - Index of the instance. + * \param[in] val_DirectIter - Direct iteration to load. + */ + void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, + unsigned short val_iInst, int val_DirectIter); + public: /*! * \brief Constructor of the class. @@ -92,25 +104,6 @@ class CDiscAdjFEAIteration : public CIteration { CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) override; - /*! - * \brief Updates the containers for the discrete adjoint mean flow system. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - */ - void Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) override; - /*! * \brief Monitors the convergence and other metrics for the discrete adjoint mean flow system. * \param[in] output - Pointer to the COutput class. @@ -180,26 +173,6 @@ class CDiscAdjFEAIteration : public CIteration { void InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst) override; - /*! - * \brief Record a single iteration of the direct FEM system. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - * \param[in] kind_recording - The kind of recording (geometry or flow). - */ - void SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst, unsigned short kind_recording); - /*! * \brief Record a single iteration of the direct FEM system. * \param[in] solver - Container vector with all the solutions. @@ -209,7 +182,6 @@ class CDiscAdjFEAIteration : public CIteration { * \param[in] val_iInst - Index of the instance. * \param[in] kind_recording - The kind of recording (geometry or flow). */ - void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) override; @@ -226,15 +198,4 @@ class CDiscAdjFEAIteration : public CIteration { void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override; - /*! - * \brief load solution for dynamic problems - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - * \param[in] val_DirectIter - Direct iteration to load. - */ - void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, - unsigned short val_iInst, int val_DirectIter) override; }; diff --git a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp index 93d1a9d2052..8647f709285 100644 --- a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp @@ -37,10 +37,22 @@ class CFluidIteration; * \brief Class for driving an iteration of the discrete adjoint fluid system. * \author T. Economon */ -class CDiscAdjFluidIteration : public CIteration { +class CDiscAdjFluidIteration final : public CIteration { private: const bool turbulent; /*!< \brief Stores the turbulent flag. */ + /*! + * \brief load unsteady solution for unsteady problems + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] solver - Container vector with all the solutions. + * \param[in] config - Definition of the particular problem. + * \param[in] val_iZone - Index of the zone. + * \param[in] val_iInst - Index of the instance. + * \param[in] val_DirectIter - Direct iteration to load. + */ + void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, + unsigned short val_iInst, int val_DirectIter); + public: /*! * \brief Constructor of the class. @@ -126,25 +138,6 @@ class CDiscAdjFluidIteration : public CIteration { CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) override; - /*! - * \brief Postprocess the discrete adjoint fluid iteration. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - */ - void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) override; - /*! * \brief Registers all input variables of the fluid iteration. * \param[in] solver - Container vector with all the solutions. @@ -188,7 +181,6 @@ class CDiscAdjFluidIteration : public CIteration { * \param[in] val_iInst - Index of the instance. * \param[in] kind_recording - The kind of recording (geometry or flow). */ - void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) override; @@ -205,15 +197,4 @@ class CDiscAdjFluidIteration : public CIteration { void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override; - /*! - * \brief load unsteady solution for unsteady problems - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - * \param[in] val_DirectIter - Direct iteration to load. - */ - void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, - unsigned short val_iInst, int val_DirectIter) override; }; diff --git a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp index 8c69d1162f8..ce981317897 100644 --- a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp @@ -35,7 +35,20 @@ * \brief Class for driving an iteration of the discrete adjoint heat equation. * \author O. Burghardt */ -class CDiscAdjHeatIteration : public CIteration { +class CDiscAdjHeatIteration final : public CIteration { + + /*! + * \brief load unsteady solution for unsteady problems + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] solver - Container vector with all the solutions. + * \param[in] config - Definition of the particular problem. + * \param[in] val_iZone - Index of the zone. + * \param[in] val_iInst - Index of the instance layer. + * \param[in] val_DirectIter - Direct iteration to load. + */ + void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, + unsigned short val_iInst, int val_DirectIter); + public: /*! * \brief Constructor of the class. @@ -108,31 +121,6 @@ class CDiscAdjHeatIteration : public CIteration { CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) override; - /*! - * \brief Outputs desired files and quantities for the discrete adjoint fluid system. - */ - void Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned long InnerIter, - bool StopCalc, unsigned short val_iZone, unsigned short val_iInst); - - /*! - * \brief Perform a single iteration of the adjoint fluid system. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance layer. - */ - void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) override; - /*! * \brief Registers all input variables of the fluid iteration. * \param[in] solver - Container vector with all the solutions. @@ -180,15 +168,4 @@ class CDiscAdjHeatIteration : public CIteration { void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override; - /*! - * \brief load unsteady solution for unsteady problems - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance layer. - * \param[in] val_DirectIter - Direct iteration to load. - */ - void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, - unsigned short val_iInst, int val_DirectIter) override; }; diff --git a/SU2_CFD/include/iteration/CIteration.hpp b/SU2_CFD/include/iteration/CIteration.hpp index 961fdb9ed6a..05947c02402 100644 --- a/SU2_CFD/include/iteration/CIteration.hpp +++ b/SU2_CFD/include/iteration/CIteration.hpp @@ -280,12 +280,6 @@ class CIteration { virtual void RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output, unsigned short iZone, unsigned short iInst) {} - virtual void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {} - - virtual void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {} - virtual void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) {} }; diff --git a/SU2_CFD/include/limiters/CLimiterDetails.hpp b/SU2_CFD/include/limiters/CLimiterDetails.hpp index 24a38c5cc53..85fc7a1322e 100644 --- a/SU2_CFD/include/limiters/CLimiterDetails.hpp +++ b/SU2_CFD/include/limiters/CLimiterDetails.hpp @@ -177,6 +177,7 @@ struct CLimiterDetails sharedMin.resize(varEnd) = largeNum; sharedMax.resize(varEnd) =-largeNum; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Per thread reduction. ---*/ @@ -185,7 +186,7 @@ struct CLimiterDetails localMin = largeNum; localMax =-largeNum; - SU2_OMP(for schedule(static, 512) nowait) + SU2_OMP_FOR_(schedule(static, 512) SU2_NOWAIT) for(size_t iPoint = 0; iPoint < geometry.GetnPointDomain(); ++iPoint) { for(size_t iVar = varBegin; iVar < varEnd; ++iVar) @@ -194,6 +195,7 @@ struct CLimiterDetails localMax(iVar) = max(localMax(iVar), field(iPoint, iVar)); } } + END_SU2_OMP_FOR /*--- Per rank reduction. ---*/ @@ -203,6 +205,7 @@ struct CLimiterDetails sharedMin(iVar) = min(sharedMin(iVar), localMin(iVar)); sharedMax(iVar) = max(sharedMax(iVar), localMax(iVar)); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Global reduction. ---*/ @@ -215,6 +218,7 @@ struct CLimiterDetails localMax = sharedMax; SU2_MPI::Allreduce(localMax.data(), sharedMax.data(), varEnd, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Compute eps^2 (each thread has its own copy of it). ---*/ diff --git a/SU2_CFD/include/limiters/computeLimiters.hpp b/SU2_CFD/include/limiters/computeLimiters.hpp index e5324ecec27..e3016ab0d45 100644 --- a/SU2_CFD/include/limiters/computeLimiters.hpp +++ b/SU2_CFD/include/limiters/computeLimiters.hpp @@ -68,6 +68,7 @@ if (geometry.GetnDim() == 2) {\ for(size_t iPoint = 0; iPoint < geometry.GetnPoint(); ++iPoint) for(size_t iVar = varBegin; iVar < varEnd; ++iVar) limiter(iPoint, iVar) = 1.0; + END_SU2_OMP_FOR break; } case BARTH_JESPERSEN: diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp index b8725f355b8..2876c889f66 100644 --- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp +++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp @@ -115,6 +115,7 @@ void computeLimiters_impl(CSolver* solver, for (size_t iPoint = 0; iPoint < nPoint; ++iPoint) for (size_t iVar = varBegin; iVar < varEnd; ++iVar) fieldMax(iPoint,iVar) = fieldMin(iPoint,iVar) = field(iPoint,iVar); + END_SU2_OMP_FOR for (size_t iPeriodic = 1; iPeriodic <= config.GetnMarker_Periodic()/2; ++iPeriodic) { @@ -215,6 +216,7 @@ void computeLimiters_impl(CSolver* solver, AD::EndPreacc(); } + END_SU2_OMP_FOR /*--- Account for periodic effects, take the minimum limiter on each periodic pair. ---*/ if (periodic) diff --git a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp index 7e0bd6f8870..9b62a3a89db 100644 --- a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp +++ b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp @@ -77,7 +77,7 @@ class CCenteredBase : public Base { public: /*! - * \brief Implementation of the base Roe flux. + * \brief Implementation of the base centered flux. */ void ComputeFlux(Int iEdge, const CConfig& config, diff --git a/SU2_CFD/include/numerics_simd/util.hpp b/SU2_CFD/include/numerics_simd/util.hpp index 7127912329b..21c99c7e529 100644 --- a/SU2_CFD/include/numerics_simd/util.hpp +++ b/SU2_CFD/include/numerics_simd/util.hpp @@ -115,14 +115,13 @@ FORCEINLINE Double squaredNorm(const VectorDbl& vector) { template FORCEINLINE Double norm(const VectorDbl& vector) { return sqrt(squaredNorm(vector)); } +#ifndef CODI_REVERSE_TYPE /*! * \brief Gather a single variable from index iPoint of a 1D container. */ template FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) { - auto x = *vars.innerIter(iPoint); - AD::SetPreaccIn(x, Double::Size); - return x; + return *vars.innerIter(iPoint); } /*! @@ -130,9 +129,7 @@ FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) { */ template FORCEINLINE VectorDbl gatherVariables(Int iPoint, const Container& vars) { - auto x = vars.template get >(iPoint); - AD::SetPreaccIn(x, nVar, Double::Size); - return x; + return vars.template get >(iPoint); } /*! @@ -140,10 +137,55 @@ FORCEINLINE VectorDbl gatherVariables(Int iPoint, const Container& vars) { */ template FORCEINLINE MatrixDbl gatherVariables(Int iPoint, const Container& vars) { - auto x = vars.template get >(iPoint); - AD::SetPreaccIn(x, nRows, nCols, Double::Size); + return vars.template get >(iPoint); +} +#else + +namespace { + template = 0> + FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint); } + + /*--- When getting 1 variable from a matrix container, we assume it is the first. ---*/ + template = 0> + FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint,0); } +} + +template +FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) { + Double x; + for (size_t k=0; k +FORCEINLINE VectorDbl gatherVariables(Int iPoint, const Container& vars) { + VectorDbl x; + for (size_t i=0; i +FORCEINLINE MatrixDbl gatherVariables(Int iPoint, const Container& vars) { + MatrixDbl x; + for (size_t i=0; i &valFieldNames); - /*! - * \brief Destructor - */ - ~CFEMDataSorter() override; - /*! * \brief Sort the connectivities (volume and surface) into data structures used for output file writing. * \param[in] config - Definition of the particular problem. @@ -60,7 +55,7 @@ class CFEMDataSorter final: public CParallelDataSorter{ * \return Global index of a specific point. */ unsigned long GetGlobalIndex(unsigned long iPoint) const override{ - return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint; + return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint; } private: diff --git a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp index f2f70e23a7e..cd561c6a7bf 100644 --- a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp @@ -34,10 +34,9 @@ class CFVMDataSorter final: public CParallelDataSorter{ private: - int* Local_Halo; //!< Array containing the flag whether a point is a halo node + vector Local_Halo; //!< Array containing the flag whether a point is a halo node public: - /*! * \brief Constructor * \param[in] config - Pointer to the current config structure @@ -46,11 +45,6 @@ class CFVMDataSorter final: public CParallelDataSorter{ */ CFVMDataSorter(CConfig *config, CGeometry *geometry, const vector &valFieldNames); - /*! - * \brief Destructor - */ - ~CFVMDataSorter() override; - /*! * \brief Sort the connectivities (volume and surface) into data structures used for output file writing. * \param[in] config - Definition of the particular problem. @@ -65,7 +59,7 @@ class CFVMDataSorter final: public CParallelDataSorter{ * \return Global index of a specific point. */ unsigned long GetGlobalIndex(unsigned long iPoint) const override { - return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint; + return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint; } /*! diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp index 5122eed672e..1a22dbda832 100644 --- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp @@ -31,6 +31,7 @@ #include "../../../../Common/include/option_structure.hpp" #include "../../../../Common/include/toolboxes/CLinearPartitioner.hpp" #include +#include class CGeometry; class CConfig; @@ -41,12 +42,12 @@ class CParallelDataSorter{ /*! * \brief The MPI rank */ - int rank; + const int rank; /*! * \brief The MPI size, aka the number of processors. */ - int size; + const int size; unsigned long nGlobalPointBeforeSort; //!< Global number of points without halos before sorting unsigned long nLocalPointsBeforeSort; //!< Local number of points without halos before sorting on this proc @@ -66,7 +67,20 @@ class CParallelDataSorter{ * \brief Map that stores the index for each GEO_TYPE type where to find information * in the element arrays. */ - static const map TypeMap; + struct { + static unsigned short at(unsigned short type) { + switch(type) { + case LINE: return 0; + case TRIANGLE: return 1; + case QUADRILATERAL: return 2; + case TETRAHEDRON: return 3; + case HEXAHEDRON: return 4; + case PRISM: return 5; + case PYRAMID: return 6; + default: assert(false); return 0; + }; + } + } TypeMap; unsigned long nPointsGlobal; //!< Global number of points without halos unsigned long nElemGlobal; //!< Global number of elems without halos @@ -75,7 +89,7 @@ class CParallelDataSorter{ unsigned long nElem; //!< Local number of elements unsigned long nConn; //!< Local size of the connectivity array - CLinearPartitioner* linearPartitioner; //!< Linear partitioner based on the global number of points. + CLinearPartitioner linearPartitioner; //!< Linear partitioner based on the global number of points. unsigned short GlobalField_Counter; //!< Number of output fields @@ -88,11 +102,8 @@ class CParallelDataSorter{ int *nElemConn_Send; //!< Number of element connectivity this processor has to send to other processors int *nElemConn_Cum; //!< Cumulative number of element connectivity entries unsigned long *Index; //!< Index each point has in the send buffer - su2double *connSend; //!< Send buffer holding the data that will be send to other processors - passivedouble *passiveDoubleBuffer; //!< Buffer holding the sorted, partitioned data as passivedouble types - su2double *doubleBuffer; //!< Buffer holding the sorted, partitioned data as su2double types - /// Pointer used to allocate the memory used for ::passiveDoubleBuffer and ::doubleBuffer. - char *dataBuffer; + passivedouble *connSend; //!< Send buffer holding the data that will be send to other processors + passivedouble *dataBuffer; //!< Buffer holding the sorted, partitioned data as passivedouble types unsigned long *idSend; //!< Send buffer holding global indices that will be send to other processors int nSends, //!< Number of sends nRecvs; //!< Number of receives @@ -243,7 +254,7 @@ class CParallelDataSorter{ * \return The beginning node ID. */ virtual unsigned long GetNodeBegin(unsigned short rank) const { - return linearPartitioner->GetFirstIndexOnRank(rank); + return linearPartitioner.GetFirstIndexOnRank(rank); } /*! @@ -252,7 +263,7 @@ class CParallelDataSorter{ * \return The ending node ID. */ unsigned long GetNodeEnd(unsigned short rank) const { - return linearPartitioner->GetLastIndexOnRank(rank); + return linearPartitioner.GetLastIndexOnRank(rank); } /*! @@ -261,13 +272,13 @@ class CParallelDataSorter{ * \input iPoint - the point ID. * \return the value of the data field at a point. */ - passivedouble GetData(unsigned short iField, unsigned long iPoint) const {return passiveDoubleBuffer[iPoint*GlobalField_Counter + iField];} + passivedouble GetData(unsigned short iField, unsigned long iPoint) const {return dataBuffer[iPoint*GlobalField_Counter + iField];} /*! * \brief Get the pointer to the sorted linear partitioned data. * \return Pointer to the sorted data. */ - const passivedouble *GetData() const {return passiveDoubleBuffer;} + const passivedouble *GetData() const {return dataBuffer;} /*! * \brief Get the global index of a point. @@ -281,14 +292,14 @@ class CParallelDataSorter{ * \input rank - the processor rank. * \return The cumulated number of points up to certain processor rank. */ - virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner->GetCumulativeSizeBeforeRank(rank);} + virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner.GetCumulativeSizeBeforeRank(rank);} /*! * \brief Get the linear number of points * \input rank - the processor rank. * \return The linear number of points up to certain processor rank. */ - unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner->GetSizeOnRank(rank);} + unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner.GetSizeOnRank(rank);} /*! * \brief Check whether the current connectivity is sorted (i.e. if SortConnectivity has been called) @@ -305,10 +316,10 @@ class CParallelDataSorter{ * \param[in] data - Value of the field */ void SetUnsorted_Data(unsigned long iPoint, unsigned short iField, su2double data){ - connSend[Index[iPoint] + iField] = data; + connSend[Index[iPoint] + iField] = SU2_TYPE::GetValue(data); } - su2double GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const { + passivedouble GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const { return connSend[Index[iPoint] + iField]; } @@ -318,7 +329,7 @@ class CParallelDataSorter{ * \return The rank/processor number. */ virtual unsigned short FindProcessor(unsigned long iPoint) const { - return linearPartitioner->GetRankContainingIndex(iPoint); + return linearPartitioner.GetRankContainingIndex(iPoint); } /*! diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp index 7775761f81c..5d2e7481364 100644 --- a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp @@ -31,7 +31,7 @@ class CSurfaceFEMDataSorter final: public CParallelDataSorter{ - CFEMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance + const CFEMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance vector globalSurfaceDOFIDs; //!< Structure to map the local sorted point ID to the global point ID vector nSurfaceDOFsRanks; //!< Number of points on each rank @@ -43,12 +43,7 @@ class CSurfaceFEMDataSorter final: public CParallelDataSorter{ * \param[in] geometry - Pointer to the current geometry * \param[in] valVolumeSorter - The datasorter containing the volume data */ - CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter); - - /*! - * \brief Destructor - */ - ~CSurfaceFEMDataSorter() override; + CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter); /*! * \brief Sort the output data for each grid node into a linear partitioning across all processors. diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp index dd6132c4248..d65a2a03260 100644 --- a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp @@ -31,7 +31,7 @@ class CSurfaceFVMDataSorter final: public CParallelDataSorter{ - CFVMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance + const CFVMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance map Renumber2Global; //! Structure to map the local sorted point ID to the global point ID public: @@ -41,12 +41,7 @@ class CSurfaceFVMDataSorter final: public CParallelDataSorter{ * \param[in] geometry - Pointer to the current geometry * \param[in] valVolumeSorter - The datasorter containing the volume data */ - CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, CFVMDataSorter* valVolumeSorter); - - /*! - * \brief Destructor - */ - ~CSurfaceFVMDataSorter() override; + CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, const CFVMDataSorter* valVolumeSorter); /*! * \brief Sort the output data for each grid node into a linear partitioning across all processors. diff --git a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp index dec9c21e348..48c29f76dc7 100644 --- a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp @@ -38,57 +38,84 @@ */ class CDiscAdjFEASolver final : public CSolver { private: + static constexpr size_t MAXNVAR = 3; /*!< \brief Max number of variables, for static arrays. */ + unsigned short KindDirect_Solver = 0; CSolver *direct_solver = nullptr; - su2double *Sens_E = nullptr, /*!< \brief Young modulus sensitivity coefficient for each boundary. */ - *Sens_Nu = nullptr, /*!< \brief Poisson's ratio sensitivity coefficient for each boundary. */ - *Sens_nL = nullptr, /*!< \brief Normal pressure sensitivity coefficient for each boundary. */ - **CSensitivity = nullptr; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */ - - su2double *Solution_Vel = nullptr, /*!< \brief Velocity componenent of the solution. */ - *Solution_Accel = nullptr; /*!< \brief Acceleration componenent of the solution. */ - - su2double *normalLoads = nullptr; /*!< \brief Values of the normal loads for each marker iMarker_nL. */ - - unsigned short nMPROP = 0; /*!< \brief Number of material properties */ - - su2double *E_i = nullptr, /*!< \brief Values of the Young's Modulus. */ - *Nu_i = nullptr, /*!< \brief Values of the Poisson's ratio. */ - *Rho_i = nullptr, /*!< \brief Values of the density (for inertial effects). */ - *Rho_DL_i = nullptr; /*!< \brief Values of the density (for volume loading). */ - int *AD_Idx_E_i = nullptr, /*!< \brief Derivative index of the Young's Modulus. */ - *AD_Idx_Nu_i = nullptr, /*!< \brief Derivative index of the Poisson's ratio. */ - *AD_Idx_Rho_i = nullptr, /*!< \brief Derivative index of the density (for inertial effects). */ - *AD_Idx_Rho_DL_i = nullptr; /*!< \brief Derivative index of the density (for volume loading). */ - - su2double *Local_Sens_E = nullptr, /*!< \brief Local sensitivity of the Young's modulus. */ - *Global_Sens_E = nullptr, /*!< \brief Global sensitivity of the Young's modulus. */ - *Total_Sens_E = nullptr; /*!< \brief Total sensitivity of the Young's modulus (time domain). */ - su2double *Local_Sens_Nu = nullptr, /*!< \brief Local sensitivity of the Poisson ratio. */ - *Global_Sens_Nu = nullptr, /*!< \brief Global sensitivity of the Poisson ratio. */ - *Total_Sens_Nu = nullptr; /*!< \brief Total sensitivity of the Poisson ratio (time domain). */ - su2double *Local_Sens_Rho = nullptr, /*!< \brief Local sensitivity of the density. */ - *Global_Sens_Rho = nullptr, /*!< \brief Global sensitivity of the density. */ - *Total_Sens_Rho = nullptr; /*!< \brief Total sensitivity of the density (time domain). */ - su2double *Local_Sens_Rho_DL = nullptr, /*!< \brief Local sensitivity of the volume load. */ - *Global_Sens_Rho_DL = nullptr, /*!< \brief Global sensitivity of the volume load. */ - *Total_Sens_Rho_DL = nullptr; /*!< \brief Total sensitivity of the volume load (time domain). */ - - bool de_effects = false; /*!< \brief Determines if DE effects are considered. */ - unsigned short nEField = 0; /*!< \brief Number of electric field areas in the problem. */ - su2double *EField = nullptr; /*!< \brief Array that stores the electric field as design variables. */ - int *AD_Idx_EField = nullptr; /*!< \brief Derivative index of the electric field as design variables. */ - su2double *Local_Sens_EField = nullptr, /*!< \brief Local sensitivity of the Electric Field. */ - *Global_Sens_EField = nullptr, /*!< \brief Global sensitivity of the Electric Field. */ - *Total_Sens_EField = nullptr; /*!< \brief Total sensitivity of the Electric Field (time domain). */ - - bool fea_dv = false; /*!< \brief Determines if the design variable we study is a FEA parameter. */ - unsigned short nDV = 0; /*!< \brief Number of design variables in the problem. */ - su2double *DV_Val = nullptr; /*!< \brief Values of the design variables. */ - int *AD_Idx_DV_Val = nullptr; /*!< \brief Derivative index of the design variables. */ - su2double *Local_Sens_DV = nullptr, /*!< \brief Local sensitivity of the design variables. */ - *Global_Sens_DV = nullptr, /*!< \brief Global sensitivity of the design variables. */ - *Total_Sens_DV = nullptr; /*!< \brief Total sensitivity of the design variables (time domain). */ + + /*! + * \brief A type to manage sensitivities of design variables. + */ + struct SensData { + unsigned short size = 0; + su2double* val = nullptr; /*!< \brief Value of the variable. */ + int* AD_Idx = nullptr; /*!< \brief Derivative index in the AD tape. */ + bool localIdx = false; + su2double* LocalSens = nullptr; /*!< \brief Local sensitivity (domain). */ + su2double* GlobalSens = nullptr; /*!< \brief Global sensitivity (mpi). */ + su2double* TotalSens = nullptr; /*!< \brief Total sensitivity (time domain). */ + + su2double& operator[] (unsigned short i) { return val[i]; } + const su2double& operator[] (unsigned short i) const { return val[i]; } + + void resize(unsigned short n) { + clear(); + size = n; + val = new su2double[n](); + AD_Idx = new int[n](); + LocalSens = new su2double[n](); + GlobalSens = new su2double[n](); + TotalSens = new su2double[n](); + } + + void clear() { + size = 0; + localIdx = false; + delete [] val; + delete [] AD_Idx; + delete [] LocalSens; + delete [] GlobalSens; + delete [] TotalSens; + } + + void Register(bool push_index) { + for (auto i = 0u; i < size; ++i) AD::RegisterInput(val[i], push_index); + } + + void SetIndex() { + for (auto i = 0u; i < size; ++i) AD::SetIndex(AD_Idx[i], val[i]); + localIdx = true; + } + + void GetDerivative() { + if (localIdx) + for (auto i = 0u; i < size; ++i) LocalSens[i] = AD::GetDerivative(AD_Idx[i]); + else + for (auto i = 0u; i < size; ++i) LocalSens[i] = SU2_TYPE::GetDerivative(val[i]); + + SU2_MPI::Allreduce(LocalSens, GlobalSens, size, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + } + + void UpdateTotal() { + for (auto i = 0u; i < size; ++i) TotalSens[i] += GlobalSens[i]; + } + + ~SensData() { clear(); } + }; + + unsigned short nMPROP = 0; /*!< \brief Number of material properties */ + SensData E; /*!< \brief Values of the Young's Modulus. */ + SensData Nu; /*!< \brief Values of the Poisson's ratio. */ + SensData Rho; /*!< \brief Values of the density (for inertial effects). */ + SensData Rho_DL; /*!< \brief Values of the density (for volume loading). */ + + bool de_effects = false; /*!< \brief Determines if DE effects are considered. */ + unsigned short nEField = 0; /*!< \brief Number of electric field areas in the problem. */ + SensData EField; /*!< \brief Array that stores the electric field as design variables. */ + + bool fea_dv = false; /*!< \brief Determines if the design variable we study is a FEA parameter. */ + unsigned short nDV = 0; /*!< \brief Number of design variables in the problem. */ + SensData DV; /*!< \brief Values of the design variables. */ CDiscAdjFEABoundVariable* nodes = nullptr; /*!< \brief The highest level in the variable hierarchy this solver can safely use. */ @@ -97,20 +124,17 @@ class CDiscAdjFEASolver final : public CSolver { */ inline CVariable* GetBaseClassPointerToNodes() override { return nodes; } -public: - /*! - * \brief Constructor of the class. + * \brief Read the design variables for the adjoint solver */ - CDiscAdjFEASolver(void); + void ReadDV(const CConfig *config); + +public: /*! - * \overload - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - * \param[in] iMesh - Index of the mesh in multigrid computations. + * \brief Constructor of the class. */ - CDiscAdjFEASolver(CGeometry *geometry, CConfig *config); + CDiscAdjFEASolver() = default; /*! * \overload @@ -124,7 +148,7 @@ class CDiscAdjFEASolver final : public CSolver { /*! * \brief Destructor of the class. */ - ~CDiscAdjFEASolver(void) override; + ~CDiscAdjFEASolver() override; /*! * \brief Performs the preprocessing of the adjoint AD-based solver. @@ -159,13 +183,6 @@ class CDiscAdjFEASolver final : public CSolver { */ void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Set the surface sensitivity. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - */ - void SetSurface_Sensitivity(CGeometry *geometry, CConfig* config) override; - /*! * \brief Extract and set the geometrical sensitivity. * \param[in] geometry - Geometrical definition of the problem. @@ -178,97 +195,97 @@ class CDiscAdjFEASolver final : public CSolver { * \return Value of the total Young's modulus sensitivity * (inviscid + viscous contribution). */ - inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return Total_Sens_E[iVal]; } + inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return E.TotalSens[iVal]; } /*! * \brief Set the total Poisson's ratio sensitivity. * \return Value of the Poisson's ratio sensitivity */ - inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Total_Sens_Nu[iVal]; } + inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Nu.TotalSens[iVal]; } /*! * \brief Get the total sensitivity for the structural density * \return Value of the structural density sensitivity */ - inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Total_Sens_Rho[iVal]; } + inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Rho.TotalSens[iVal]; } /*! * \brief Get the total sensitivity for the structural weight * \return Value of the structural weight sensitivity */ - inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Total_Sens_Rho_DL[iVal]; } + inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.TotalSens[iVal]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the Electric Field in the region iEField (time averaged) */ - inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return Total_Sens_EField[iEField]; } + inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return EField.TotalSens[iEField]; } /*! * \brief A virtual member. * \return Value of the total sensitivity coefficient for the FEA DV in the region iDVFEA (time averaged) */ - inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return Total_Sens_DV[iDVFEA]; } + inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.TotalSens[iDVFEA]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the Young Modulus E */ - inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return Global_Sens_E[iVal]; } + inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return E.GlobalSens[iVal]; } /*! * \brief A virtual member. * \return Value of the Mach sensitivity for the Poisson's ratio Nu */ - inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Global_Sens_Nu[iVal]; } + inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Nu.GlobalSens[iVal]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the Electric Field in the region iEField */ - inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return Global_Sens_EField[iEField]; } + inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return EField.GlobalSens[iEField]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the FEA DV in the region iDVFEA */ - inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return Global_Sens_DV[iDVFEA]; } + inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.GlobalSens[iDVFEA]; } /*! * \brief Get the total sensitivity for the structural density * \return Value of the structural density sensitivity */ - inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Global_Sens_Rho[iVal]; } + inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Rho.GlobalSens[iVal]; } /*! * \brief Get the total sensitivity for the structural weight * \return Value of the structural weight sensitivity */ - inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Global_Sens_Rho_DL[iVal]; } + inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.GlobalSens[iVal]; } /*! * \brief Get the value of the Young modulus from the adjoint solver * \return Value of the Young modulus from the adjoint solver */ - inline su2double GetVal_Young(unsigned short iVal) const override { return E_i[iVal]; } + inline su2double GetVal_Young(unsigned short iVal) const override { return E[iVal]; } /*! * \brief Get the value of the Poisson's ratio from the adjoint solver * \return Value of the Poisson's ratio from the adjoint solver */ - inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu_i[iVal]; } + inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu[iVal]; } /*! * \brief Get the value of the density from the adjoint solver, for inertial effects * \return Value of the density from the adjoint solver */ - inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho_i[iVal]; } + inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho[iVal]; } /*! * \brief Get the value of the density from the adjoint solver, for dead loads * \return Value of the density for dead loads, from the adjoint solver */ - inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL_i[iVal]; } + inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL[iVal]; } /*! * \brief Get the number of variables for the Electric Field from the adjoint solver @@ -276,11 +293,6 @@ class CDiscAdjFEASolver final : public CSolver { */ inline unsigned short GetnEField(void) const override { return nEField; } - /*! - * \brief Read the design variables for the adjoint solver - */ - void ReadDV(CConfig *config) override; - /*! * \brief Get the number of design variables from the adjoint solver, * \return Number of design variables from the adjoint solver @@ -297,7 +309,7 @@ class CDiscAdjFEASolver final : public CSolver { * \brief Get the value of the design variables from the adjoint solver * \return Pointer to the values of the design variables */ - inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV_Val[iVal]; } + inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV[iVal]; } /*! * \brief Prepare the solver for a new recording. diff --git a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp index 4caa7e597e2..d7ba9d80b75 100644 --- a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp @@ -39,6 +39,13 @@ */ class CDiscAdjMeshSolver final : public CSolver { private: + static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */ + static constexpr size_t MAXNVAR = 3; /*!< \brief Max number of variables, for static arrays. */ + + static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */ + + unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */ + CSolver *direct_solver = nullptr; CDiscAdjMeshBoundVariable* nodes = nullptr; /*!< \brief Variables of the discrete adjoint mesh solver. */ @@ -53,15 +60,7 @@ class CDiscAdjMeshSolver final : public CSolver { /*! * \brief Constructor of the class. */ - CDiscAdjMeshSolver(void); - - /*! - * \overload - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - * \param[in] iMesh - Index of the mesh in multigrid computations. - */ - CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config); + CDiscAdjMeshSolver() = default; /*! * \overload @@ -75,7 +74,7 @@ class CDiscAdjMeshSolver final : public CSolver { /*! * \brief Destructor of the class. */ - ~CDiscAdjMeshSolver(void) override; + ~CDiscAdjMeshSolver() override; /*! * \brief Performs the preprocessing of the AD-based mesh adjoint solver. @@ -124,24 +123,6 @@ class CDiscAdjMeshSolver final : public CSolver { */ void ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Update the dual-time derivatives. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver_container - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] iMesh - Index of the mesh in multigrid computations. - * \param[in] iRKStep - Current step of the Runge-Kutta iteration. - * \param[in] RunTime_EqSystem - System of equations which is going to be solved. - * \param[in] Output - boolean to determine whether to print output. - */ - void Preprocessing(CGeometry *geometry, - CSolver **solver_container, - CConfig *config, - unsigned short iMesh, - unsigned short iRKStep, - unsigned short RunTime_EqSystem, - bool Output) override; - /*! * \brief Load a solution from a restart file. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp index c14ed6ab149..cac68c1ab93 100644 --- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp @@ -38,9 +38,17 @@ */ class CDiscAdjSolver final : public CSolver { private: + static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */ + static constexpr size_t MAXNVAR = 32; /*!< \brief Max number of variables, for static arrays. */ + + static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */ + + unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */ + unsigned short KindDirect_Solver; CSolver *direct_solver; - su2double **CSensitivity; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */ + vector > CSensitivity; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */ + vector Sens_Geo; /*!< \brief Total shape sensitivity for each monitored boundary. */ su2double Total_Sens_Mach; /*!< \brief Total mach sensitivity coefficient for all the boundaries. */ su2double Total_Sens_AoA; /*!< \brief Total angle of attack sensitivity coefficient for all the boundaries. */ su2double Total_Sens_Geo; /*!< \brief Total shape sensitivity coefficient for all the boundaries. */ @@ -52,8 +60,6 @@ class CDiscAdjSolver final : public CSolver { su2double Mach, Alpha, Beta, Pressure, Temperature, BPressure, ModVel; su2double TemperatureRad, Total_Sens_Temp_Rad; - su2double *Solution_Geometry; /*!< \brief Auxiliary vector for the geometry solution (dimension nDim instead of nVar). */ - CDiscAdjVariable* nodes = nullptr; /*!< \brief The highest level in the variable hierarchy this solver can safely use. */ /*! @@ -66,14 +72,7 @@ class CDiscAdjSolver final : public CSolver { /*! * \brief Constructor of the class. */ - CDiscAdjSolver(void); - - /*! - * \overload - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - */ - CDiscAdjSolver(CGeometry *geometry, CConfig *config); + CDiscAdjSolver() = default; /*! * \overload @@ -88,7 +87,7 @@ class CDiscAdjSolver final : public CSolver { /*! * \brief Destructor of the class. */ - ~CDiscAdjSolver(void) override; + ~CDiscAdjSolver() override; /*! * \brief Performs the preprocessing of the adjoint AD-based solver. @@ -115,14 +114,6 @@ class CDiscAdjSolver final : public CSolver { */ void SetAdjoint_Output(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Sets the adjoint values of the output of the mesh deformation iteration - * before evaluation of the tape. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] config - The particular config. - */ - void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) override; - /*! * \brief Sets the adjoint values of the input variables of the flow (+turb.) iteration * after tape has been evaluated. @@ -131,14 +122,6 @@ class CDiscAdjSolver final : public CSolver { */ void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override; - /*! - * \brief A virtual member. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] solver_container - The solver container holding all solutions. - * \param[in] config - The particular config. - */ - void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) override; - /*! * \brief Set the surface sensitivity. * \param[in] geometry - Geometrical definition of the problem. @@ -225,14 +208,6 @@ class CDiscAdjSolver final : public CSolver { */ void SetRecording(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Prepare the solver for a new recording. - * \param[in] kind_recording - Kind of AD recording. - */ - void SetMesh_Recording(CGeometry **geometry, - CVolumetricMovement *grid_movement, - CConfig *config) override; - /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. @@ -282,4 +257,12 @@ class CDiscAdjSolver final : public CSolver { int val_iter, bool val_update_geo) override; + /*! + * \brief Depends on the direct solver. + */ + inline bool GetHasHybridParallel() const override { + if (direct_solver) return direct_solver->GetHasHybridParallel(); + return false; + } + }; diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp index 0517f9f03b2..94afc9e227a 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp @@ -34,6 +34,15 @@ class CNumericsSIMD; template class CFVMFlowSolverBase : public CSolver { + private: + static void recursiveAssign() {} + + template + static void recursiveAssign(U& d, const V& s, Ts&&... otherPairs) { + d = s; + recursiveAssign(otherPairs...); + } + protected: static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */ static constexpr size_t MAXNVAR = VariableType::MAXNVAR; /*!< \brief Max number of variables, for static arrays. */ @@ -43,6 +52,18 @@ class CFVMFlowSolverBase : public CSolver { unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */ + /*! + * \brief Utility to set the value of a member variables safely, and so that the new values are seen by all threads. + * \param[in] lhsRhsPairs - Pairs of destination and source e.g. a,0,b,-1. + */ + template + static void ompMasterAssignBarrier(Ts&&... lhsRhsPairs) { + SU2_OMP_MASTER + recursiveAssign(lhsRhsPairs...); + END_SU2_OMP_MASTER + SU2_OMP_BARRIER + } + su2double Mach_Inf = 0.0; /*!< \brief Mach number at the infinity. */ su2double Density_Inf = 0.0; /*!< \brief Density at the infinity. */ su2double Energy_Inf = 0.0; /*!< \brief Energy at the infinity. */ @@ -318,13 +339,7 @@ class CFVMFlowSolverBase : public CSolver { * Critical sections are used for this instead of reduction * clauses for compatibility with OpenMP 2.0 (Windows...). ---*/ - SU2_OMP_MASTER - { - Min_Delta_Time = 1e30; - Max_Delta_Time = 0.0; - Global_Delta_UnstTimeND = 1e30; - } - SU2_OMP_BARRIER + ompMasterAssignBarrier(Min_Delta_Time,1e30, Max_Delta_Time,0.0, Global_Delta_UnstTimeND,1e30); /*--- Loop domain points. ---*/ @@ -377,6 +392,7 @@ class CFVMFlowSolverBase : public CSolver { } } + END_SU2_OMP_FOR /*--- Loop boundary edges ---*/ @@ -419,6 +435,7 @@ class CFVMFlowSolverBase : public CSolver { Lambda = lambdaVisc(*nodes,iPoint) * Area2; nodes->AddMax_Lambda_Visc(iPoint, Lambda); } + END_SU2_OMP_FOR } } @@ -427,7 +444,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Thread-local variables for min/max reduction. ---*/ su2double minDt = 1e30, maxDt = 0.0; - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { su2double Vol = geometry->nodes->GetVolume(iPoint); @@ -449,6 +466,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->SetDelta_Time(iPoint,0.0); } } + END_SU2_OMP_FOR /*--- Min/max over threads. ---*/ SU2_OMP_CRITICAL { @@ -456,6 +474,7 @@ class CFVMFlowSolverBase : public CSolver { Max_Delta_Time = max(Max_Delta_Time, maxDt); Global_Delta_Time = Min_Delta_Time; } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER } @@ -470,6 +489,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_MPI::Allreduce(&Max_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); Max_Delta_Time = rbuf_time; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- For exact time solution use the minimum delta time of the whole mesh. ---*/ @@ -490,6 +510,7 @@ class CFVMFlowSolverBase : public CSolver { config->SetDelta_UnstTimeND(Global_Delta_Time); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Sets the regular CFL equal to the unsteady CFL. ---*/ @@ -499,6 +520,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->SetLocalCFL(iPoint, config->GetUnst_CFL()); nodes->SetDelta_Time(iPoint, Global_Delta_Time); } + END_SU2_OMP_FOR } @@ -509,12 +531,14 @@ class CFVMFlowSolverBase : public CSolver { /*--- Thread-local variable for reduction. ---*/ su2double glbDtND = 1e30; - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { glbDtND = min(glbDtND, config->GetUnst_CFL()*Global_Delta_Time / nodes->GetLocalCFL(iPoint)); } + END_SU2_OMP_FOR SU2_OMP_CRITICAL Global_Delta_UnstTimeND = min(Global_Delta_UnstTimeND, glbDtND); + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -524,6 +548,7 @@ class CFVMFlowSolverBase : public CSolver { config->SetDelta_UnstTimeND(Global_Delta_UnstTimeND); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -535,6 +560,7 @@ class CFVMFlowSolverBase : public CSolver { su2double dt = min((2.0/3.0)*config->GetDelta_UnstTimeND(), nodes->GetDelta_Time(iPoint)); nodes->SetDelta_Time(iPoint, dt); } + END_SU2_OMP_FOR } } @@ -585,6 +611,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed); } } + END_SU2_OMP_FOR /*--- Loop boundary edges ---*/ @@ -619,6 +646,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed); } + END_SU2_OMP_FOR } } @@ -681,6 +709,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]); } } + END_SU2_OMP_FOR if (isPeriodic) { /*--- Correct the sensor values across any periodic boundaries. ---*/ @@ -695,6 +724,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]); + END_SU2_OMP_FOR } /*--- MPI parallelization ---*/ @@ -739,7 +769,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Update the solution and residuals ---*/ if (!adjoint) { - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { su2double Vol = geometry->nodes->GetVolume(iPoint) + geometry->nodes->GetPeriodicVolume(iPoint); @@ -792,12 +822,14 @@ class CFVMFlowSolverBase : public CSolver { } } } + END_SU2_OMP_FOR /*--- Reduce residual information over all threads in this rank. ---*/ SU2_OMP_CRITICAL for (unsigned short iVar = 0; iVar < nVar; iVar++) { Residual_RMS[iVar] += resRMS[iVar]; AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER } @@ -807,16 +839,11 @@ class CFVMFlowSolverBase : public CSolver { CompleteComms(geometry, config, SOLUTION); if (!adjoint) { - SU2_OMP_MASTER { - /*--- Compute the root mean square residual ---*/ - - SetResidual_RMS(geometry, config); + /*--- Compute the root mean square residual ---*/ + SetResidual_RMS(geometry, config); - /*--- For verification cases, compute the global error metrics. ---*/ - - ComputeVerificationError(geometry, config); - } - SU2_OMP_BARRIER + /*--- For verification cases, compute the global error metrics. ---*/ + ComputeVerificationError(geometry, config); } } @@ -859,7 +886,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Add pseudotime term to Jacobian. ---*/ if (implicit) { - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { /*--- Modify matrix diagonal to improve diagonal dominance. ---*/ @@ -879,11 +906,12 @@ class CFVMFlowSolverBase : public CSolver { Jacobian.SetVal2Diag(iPoint, 1.0); } } + END_SU2_OMP_FOR } /*--- Right hand side of the system (-Residual) and initial guess (x = 0) ---*/ - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { /*--- Multigrid contribution to residual. ---*/ @@ -911,17 +939,17 @@ class CFVMFlowSolverBase : public CSolver { } } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL for (unsigned short iVar = 0; iVar < nVar; iVar++) { Residual_RMS[iVar] += resRMS[iVar]; AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Compute the root mean square residual ---*/ - SU2_OMP_MASTER SetResidual_RMS(geometry, config); - SU2_OMP_BARRIER } /*! @@ -942,6 +970,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->AddSolution(iPoint, iVar, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint*nVar+iVar]); } } + END_SU2_OMP_FOR } for (unsigned short iPeriodic = 1; iPeriodic <= config->GetnMarker_Periodic()/2; iPeriodic++) { @@ -953,9 +982,7 @@ class CFVMFlowSolverBase : public CSolver { CompleteComms(geometry, config, SOLUTION); /*--- For verification cases, compute the global error metrics. ---*/ - SU2_OMP_MASTER ComputeVerificationError(geometry, config); - SU2_OMP_BARRIER } /*! @@ -968,11 +995,7 @@ class CFVMFlowSolverBase : public CSolver { const auto& Gradient_Primitive = nodes->GetGradient_Primitive(); auto& StrainMag = nodes->GetStrainMag(); - SU2_OMP_MASTER { - StrainMag_Max = 0.0; - Omega_Max = 0.0; - } - SU2_OMP_BARRIER + ompMasterAssignBarrier(StrainMag_Max,0.0, Omega_Max,0.0); su2double strainMax = 0.0, omegaMax = 0.0; @@ -1035,12 +1058,14 @@ class CFVMFlowSolverBase : public CSolver { AD::EndPreacc(); } + END_SU2_OMP_FOR if ((iMesh == MESH_0) && (config.GetComm_Level() == COMM_FULL)) { SU2_OMP_CRITICAL { StrainMag_Max = max(StrainMag_Max, strainMax); Omega_Max = max(Omega_Max, omegaMax); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER { @@ -1050,6 +1075,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_MPI::Allreduce(&MyStrainMag_Max, &StrainMag_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&MyOmega_Max, &Omega_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index 948fb1da421..a668db4f00d 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -471,6 +471,7 @@ void CFVMFlowSolverBase::Viscous_Residual_impl(unsigned long iEdge, CGeome template void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CConfig* config) { + /*--- The errors only need to be computed on the finest grid. ---*/ if (MGLevel != MESH_0) return; @@ -485,6 +486,8 @@ void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CCo (config->GetInnerIter() == 1)); if (!write_heads) return; + SU2_OMP_MASTER { + /*--- Check if there actually is an exact solution for this verification case, if computed at all. ---*/ if (VerificationSolution && VerificationSolution->ExactSolutionKnown()) { @@ -524,6 +527,10 @@ void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CCo PrintVerificationError(config); } + + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } template @@ -560,6 +567,7 @@ void CFVMFlowSolverBase::ComputeUnderRelaxationFactor(const CConfig* confi nodes->SetUnderRelaxation(iPoint, localUnderRelaxation); } + END_SU2_OMP_FOR } template @@ -569,11 +577,12 @@ void CFVMFlowSolverBase::ImplicitEuler_Iteration(CGeometry *geometry, CSol /*--- Solve or smooth the linear system. ---*/ - SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait) + SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT) for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) { LinSysRes.SetBlock_Zero(iPoint); LinSysSol.SetBlock_Zero(iPoint); } + END_SU2_OMP_FOR auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config); @@ -581,6 +590,7 @@ void CFVMFlowSolverBase::ImplicitEuler_Iteration(CGeometry *geometry, CSol SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER CompleteImplicitIteration(geometry, nullptr, config); @@ -836,7 +846,8 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** SU2_MPI::Error(string("The solution file ") + restart_filename + string(" doesn't match with the mesh file!\n") + string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); } - } // end SU2_OMP_MASTER + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Update the geometry for flows on deforming meshes ---*/ @@ -906,6 +917,7 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** } solver[iMesh][FLOW_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse); } + END_SU2_OMP_FOR solver[iMesh][FLOW_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION); solver[iMesh][FLOW_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION); @@ -929,7 +941,8 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** delete [] Restart_Vars; Restart_Vars = nullptr; delete [] Restart_Data; Restart_Data = nullptr; - } // end SU2_OMP_MASTER + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -975,6 +988,7 @@ void CFVMFlowSolverBase::SetInitialCondition(CGeometry **geometry, CSolver but this is not necessary. */ VerificationSolution->GetInitialCondition(coor, solDOF); } + END_SU2_OMP_FOR } } @@ -984,7 +998,8 @@ void CFVMFlowSolverBase::SetInitialCondition(CGeometry **geometry, CSolver PushSolutionBackInTime(TimeIter, restart, rans, solver_container, geometry, config); } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1269,6 +1284,7 @@ void CFVMFlowSolverBase::BC_Sym_Plane(CGeometry* geometry, CSolver** solve } // if viscous } // if GetDomain } // for iVertex + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ for (iVar = 0; iVar < nPrimVarGrad; iVar++) delete[] Grad_Reflected[iVar]; @@ -1450,6 +1466,7 @@ void CFVMFlowSolverBase::BC_Fluid_Interface(CGeometry* geometry, } } } + END_SU2_OMP_FOR } } @@ -1512,6 +1529,7 @@ void CFVMFlowSolverBase::BC_Custom(CGeometry* geometry, CSolver** solver_c } } } + END_SU2_OMP_FOR } else { /* The user must specify the custom BC's here. */ @@ -1546,6 +1564,7 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, edgeNumerics->ComputeFlux(iEdge, *config, *geometry, *nodes, UpdateType::COLORING, mask, LinSysRes, Jacobian); } } + END_SU2_OMP_FOR } if (ReducerStrategy) { @@ -1571,6 +1590,7 @@ void CFVMFlowSolverBase::SumEdgeFluxes(const CGeometry* geometry) { LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge)); } } + END_SU2_OMP_FOR } template @@ -1634,6 +1654,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep)); } } + END_SU2_OMP_FOR } @@ -1672,6 +1693,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } } + END_SU2_OMP_FOR /*--- Loop over the boundary edges ---*/ @@ -1704,6 +1726,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry for (iVar = 0; iVar < nVar; iVar++) LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } + END_SU2_OMP_FOR } } @@ -1746,6 +1769,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep)); } } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp index 9110ec14344..e7656728c19 100644 --- a/SU2_CFD/include/solvers/CSolver.hpp +++ b/SU2_CFD/include/solvers/CSolver.hpp @@ -428,6 +428,7 @@ class CSolver { for (auto& r : Residual_Max) r = 0; for (auto& p : Point_Max) p = 0; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -3354,11 +3355,6 @@ class CSolver { */ inline virtual unsigned short GetnDVFEA(void) const { return 0; } - /*! - * \brief A virtual member. - */ - inline virtual void ReadDV(CConfig *config) { } - /*! * \brief A virtual member. * \return Pointer to the values of the Electric Field @@ -3464,6 +3460,18 @@ class CSolver { inline virtual void SetAitken_Relaxation(CGeometry *geometry, CConfig *config) { } + /*! + * \brief Loads the solution from the restart file. + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] config - Definition of the particular problem. + * \param[in] filename - Name of the restart file. + * \param[in] skipVars - Number of variables preceeding the solution. + */ + void BasicLoadRestart(CGeometry *geometry, + const CConfig *config, + const string& filename, + unsigned long skipVars); + /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. @@ -3625,13 +3633,6 @@ class CSolver { */ inline virtual void SetAdjoint_Output(CGeometry *geometry, CConfig *config){} - /*! - * \brief A virtual member. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] config - The particular config. - */ - inline virtual void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) {} - /*! * \brief A virtual member. * \param[in] geometry - The geometrical definition of the problem. @@ -3640,14 +3641,6 @@ class CSolver { */ inline virtual void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){} - /*! - * \brief A virtual member. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] solver_container - The solver container holding all solutions. - * \param[in] config - The particular config. - */ - inline virtual void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) {} - /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. @@ -3830,14 +3823,6 @@ class CSolver { */ inline virtual void SetRecording(CGeometry *geometry, CConfig *config){} - /*! - * \brief A virtual member. - * \param[in] kind_recording - Kind of AD recording. - */ - inline virtual void SetMesh_Recording(CGeometry **geometry, - CVolumetricMovement *grid_movement, - CConfig *config) {} - /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/include/solvers/CTurbSolver.hpp b/SU2_CFD/include/solvers/CTurbSolver.hpp index b157e3d3291..9e032a659f8 100644 --- a/SU2_CFD/include/solvers/CTurbSolver.hpp +++ b/SU2_CFD/include/solvers/CTurbSolver.hpp @@ -260,6 +260,7 @@ class CTurbSolver : public CSolver { for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){ nodes->SetSolution(iPoint, Solution_Inf); } + END_SU2_OMP_FOR } /*! diff --git a/SU2_CFD/obj/Makefile.am b/SU2_CFD/obj/Makefile.am index 30e7636a0e5..054df6fa267 100644 --- a/SU2_CFD/obj/Makefile.am +++ b/SU2_CFD/obj/Makefile.am @@ -10,7 +10,7 @@ # The SU2 Project is maintained by the SU2 Foundation # (http://su2foundation.org) # -# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp index 8b19f5d6134..d9dfbbdc12f 100644 --- a/SU2_CFD/src/SU2_CFD.cpp +++ b/SU2_CFD/src/SU2_CFD.cpp @@ -56,6 +56,10 @@ int main(int argc, char *argv[]) { CLI11_PARSE(app, argc, argv) + /*--- OpenMP initialization ---*/ + + omp_initialize(); + omp_set_num_threads(num_threads); /*--- MPI initialization, and buffer setting ---*/ @@ -69,6 +73,11 @@ int main(int argc, char *argv[]) { #endif SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm(); + /*--- AD initialization ---*/ +#ifdef HAVE_OPDI + AD::getGlobalTape().initialize(); +#endif + /*--- Uncomment the following line if runtime NaN catching is desired. ---*/ // feenableexcept(FE_INVALID | FE_OVERFLOW); @@ -160,9 +169,17 @@ int main(int argc, char *argv[]) { libxsmm_finalize(); #endif + /*--- Finalize AD, if necessary. ---*/ +#ifdef HAVE_OPDI + AD::getGlobalTape().finalize(); +#endif + /*--- Finalize MPI parallelization. ---*/ SU2_MPI::Finalize(); + /*--- Finalize OpenMP. ---*/ + omp_finalize(); + return EXIT_SUCCESS; } diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index a3912276f13..605378c96ae 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -639,7 +639,7 @@ void CDiscAdjMultizoneDriver::SetRecording(unsigned short kind_recording, Kind_T if (rank == MASTER_NODE) AD::PrintStatistics(); #ifdef CODI_REVERSE_TYPE if (size > SINGLE_NODE) { - su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0; + su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0; SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); if (rank == MASTER_NODE) { cout << "MPI\n"; diff --git a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp index 2806edd830a..208ce710e8c 100644 --- a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp @@ -295,7 +295,7 @@ void CDiscAdjSinglezoneDriver::SetRecording(unsigned short kind_recording){ if (rank == MASTER_NODE) AD::PrintStatistics(); #ifdef CODI_REVERSE_TYPE if (size > SINGLE_NODE) { - su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0; + su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0; SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); if (rank == MASTER_NODE) { cout << "MPI\n"; diff --git a/SU2_CFD/src/drivers/CDriver.cpp b/SU2_CFD/src/drivers/CDriver.cpp index 7fc307204ab..214f83c5b64 100644 --- a/SU2_CFD/src/drivers/CDriver.cpp +++ b/SU2_CFD/src/drivers/CDriver.cpp @@ -815,6 +815,7 @@ void CDriver::Geometrical_Preprocessing_FVM(CConfig *config, CGeometry **&geomet geometry[MESH_0]->SetControlVolume(config, ALLOCATE); geometry[MESH_0]->SetBoundControlVolume(config, ALLOCATE); } + END_SU2_OMP_PARALLEL /*--- Visualize a dual control volume if requested ---*/ @@ -1280,6 +1281,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry, if (euler || ns) { SU2_OMP_PARALLEL_(if(solver[MESH_0][FLOW_SOL]->GetHasHybridParallel())) solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); + END_SU2_OMP_PARALLEL } if (NEMO_euler || NEMO_ns) { solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); @@ -1287,6 +1289,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry, if (turbulent) { SU2_OMP_PARALLEL_(if(solver[MESH_0][TURB_SOL]->GetHasHybridParallel())) solver[MESH_0][TURB_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); + END_SU2_OMP_PARALLEL } if (config->AddRadiation()) { solver[MESH_0][RAD_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); @@ -1598,6 +1601,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Convective scheme not implemented (template_solver).", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1624,6 +1628,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_CONVECTIVE : SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_CENTERED : @@ -1643,6 +1648,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid centered scheme or not implemented.\n Currently, only JST and LAX-FRIEDRICH are available for incompressible flows.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } for (iMGlevel = 1; iMGlevel <= config->GetnMGLevels(); iMGlevel++) @@ -1761,6 +1767,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1777,6 +1784,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid upwind scheme or not implemented.\n Currently, only FDS is available for incompressible flows.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } } @@ -1785,6 +1793,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the Euler / Navier-Stokes equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1885,6 +1894,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_CONVECTIVE : SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_CENTERED : @@ -1895,6 +1905,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid centered scheme or not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1948,6 +1959,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1957,6 +1969,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the NEMO Euler / Navier-Stokes equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2028,6 +2041,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Riemann solver not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2043,6 +2057,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_UPWIND: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_UPWIND : for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) { @@ -2055,6 +2070,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the turbulence equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2106,6 +2122,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_UPWIND: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_UPWIND: for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) { @@ -2115,6 +2132,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the transition equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2159,6 +2177,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the heat transfer equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } } @@ -2184,6 +2203,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol if (incompressible) SU2_OMP_MASTER SU2_MPI::Error("Convective schemes not implemented for incompressible continuous adjoint.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER /*--- Definition of the convective scheme for each equation and mesh level ---*/ @@ -2191,6 +2211,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_CONVECTIVE: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJFLOW option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_CENTERED : @@ -2205,6 +2226,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Centered scheme not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2233,6 +2255,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Upwind scheme not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } } @@ -2241,6 +2264,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the continuous adjoint Euler / Navier-Stokes equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2304,12 +2328,14 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol if (!spalart_allmaras) SU2_OMP_MASTER SU2_MPI::Error("Only the SA turbulence model can be used with the continuous adjoint solver.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER /*--- Definition of the convective scheme for each equation and mesh level ---*/ switch (config->GetKind_ConvNumScheme_AdjTurb()) { case NO_CONVECTIVE: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJTURB option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_UPWIND : for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) @@ -2318,6 +2344,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Convective scheme not implemented (adjoint turbulence).", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2350,10 +2377,12 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NEO_HOOKEAN: SU2_OMP_MASTER SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; default: SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } break; @@ -2362,6 +2391,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case LINEAR_ELASTIC: SU2_OMP_MASTER SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case NEO_HOOKEAN: if (config->GetMaterialCompressibility() == COMPRESSIBLE_MAT) { @@ -2369,6 +2399,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol } else { SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } break; case KNOWLES: @@ -2377,6 +2408,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol } else { SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } break; case IDEAL_DE: @@ -2385,17 +2417,20 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol } else { SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } break; default: SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } break; default: SU2_OMP_MASTER SU2_MPI::Error("Solver not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } diff --git a/SU2_CFD/src/fluid/CPengRobinson.cpp b/SU2_CFD/src/fluid/CPengRobinson.cpp index 7dc56bd4d25..09d0e67a557 100644 --- a/SU2_CFD/src/fluid/CPengRobinson.cpp +++ b/SU2_CFD/src/fluid/CPengRobinson.cpp @@ -130,7 +130,7 @@ void CPengRobinson::SetTDState_rhoe(su2double rho, su2double e) { dTde_rho = 1 / Cv; - Zed = Pressure / (Gas_Constant * Temperature * Density); + Zed = Pressure / (Gas_Constant * Temperature * rho); AD::SetPreaccOut(Temperature); AD::SetPreaccOut(SoundSpeed2); diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index 47b1743ef4d..146c453988f 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -217,7 +217,8 @@ void CIntegration::SetDualTime_Geometry(CGeometry *geometry, CSolver *mesh_solve if ((iMesh==MESH_0) && config->GetDeform_Mesh()) mesh_solver->SetDualTime_Mesh(); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver, const CConfig *config, unsigned short iMesh) { @@ -230,6 +231,7 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver SU2_OMP_MASTER solver->ResetCFLAdapt(); + END_SU2_OMP_MASTER SU2_OMP_BARRIER SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPoint(), omp_get_num_threads())) @@ -241,6 +243,8 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver /*--- Initialize the local CFL number ---*/ solver->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(iMesh)); } + END_SU2_OMP_FOR - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp index 43ca2c9e9c3..f3ae3835121 100644 --- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp +++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp @@ -94,6 +94,7 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry, SU2_OMP_MASTER config[iZone]->SubtractFinestMesh(); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -120,7 +121,8 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry, numerics_container[iZone][iInst], config[iZone], FinestMesh, RunTime_EqSystem, &monitor); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -334,6 +336,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS for (iVar = 0; iVar < nVar; iVar++) sol_coarse->GetNodes()->SetSolution_Old(Point_Coarse,Solution); } + END_SU2_OMP_FOR delete [] Solution; @@ -354,6 +357,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS sol_coarse->GetNodes()->SetVelocity_Old(Point_Coarse, zero); } + END_SU2_OMP_FOR } } @@ -369,6 +373,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS sol_fine->LinSysRes.SetBlock(Point_Fine, sol_coarse->GetNodes()->GetSolution_Old(Point_Coarse)); } } + END_SU2_OMP_FOR } @@ -389,6 +394,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ Residual_Old = solver->LinSysRes.GetBlock(iPoint); solver->GetNodes()->SetResidual_Old(iPoint,Residual_Old); } + END_SU2_OMP_FOR /*--- Jacobi iterations. ---*/ @@ -408,6 +414,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ } } + END_SU2_OMP_FOR /*--- Loop over all mesh points (update residuals with the neighbor averages). ---*/ @@ -422,6 +429,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ for (iVar = 0; iVar < nVar; iVar++) solver->LinSysRes(iPoint,iVar) = (Residual_Old[iVar] + val_smooth_coeff*Residual_Sum[iVar])*factor; } + END_SU2_OMP_FOR /*--- Restore original residuals (without average) at boundary points. ---*/ @@ -435,6 +443,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ Residual_Old = solver->GetNodes()->GetResidual_Old(iPoint); solver->LinSysRes.SetBlock(iPoint, Residual_Old); } + END_SU2_OMP_FOR } } @@ -462,6 +471,7 @@ void CMultiGridIntegration::SetProlongated_Correction(CSolver *sol_fine, CGeomet Solution_Fine[iVar] += factor*Residual_Fine[iVar]; } } + END_SU2_OMP_FOR /*--- MPI the new interpolated solution ---*/ @@ -482,6 +492,7 @@ void CMultiGridIntegration::SetProlongated_Solution(unsigned short RunTime_EqSys sol_fine->GetNodes()->SetSolution(Point_Fine, sol_coarse->GetNodes()->GetSolution(Point_Coarse)); } } + END_SU2_OMP_FOR } void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coarse, CGeometry *geo_fine, @@ -511,6 +522,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar } sol_coarse->GetNodes()->AddRes_TruncError(Point_Coarse, Residual); } + END_SU2_OMP_FOR delete [] Residual; @@ -521,6 +533,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode(); sol_coarse->GetNodes()->SetVel_ResTruncError_Zero(Point_Coarse); } + END_SU2_OMP_FOR } } @@ -528,6 +541,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar for (Point_Coarse = 0; Point_Coarse < geo_coarse->GetnPointDomain(); Point_Coarse++) { sol_coarse->GetNodes()->SubtractRes_TruncError(Point_Coarse, sol_coarse->LinSysRes.GetBlock(Point_Coarse)); } + END_SU2_OMP_FOR } @@ -536,6 +550,7 @@ void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solve SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++) solver->LinSysRes.AddBlock(iPoint, solver->GetNodes()->GetResTruncError(iPoint)); + END_SU2_OMP_FOR } @@ -575,6 +590,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst sol_coarse->GetNodes()->SetSolution(Point_Coarse, Solution); } + END_SU2_OMP_FOR delete [] Solution; @@ -609,6 +625,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst } } + END_SU2_OMP_FOR } } @@ -652,6 +669,7 @@ void CMultiGridIntegration::SetRestricted_Gradient(unsigned short RunTime_EqSyst } sol_coarse->GetNodes()->SetGradient(Point_Coarse,Gradient); } + END_SU2_OMP_FOR for (iVar = 0; iVar < nVar; iVar++) delete [] Gradient[iVar]; @@ -693,6 +711,7 @@ void CMultiGridIntegration::NonDimensional_Parameters(CGeometry **geometry, CSol numerics_container[FinestMesh][ADJFLOW_SOL][CONV_BOUND_TERM], config); break; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -719,6 +738,7 @@ void CMultiGridIntegration::Adjoint_Setup(CGeometry ****geometry, CSolver *****s solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CT(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CT()); solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CQ(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CQ()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Restrict solution and gradients to the coarse levels ---*/ diff --git a/SU2_CFD/src/integration/CNewtonIntegration.cpp b/SU2_CFD/src/integration/CNewtonIntegration.cpp index 127640df26d..da3ec5e14e4 100644 --- a/SU2_CFD/src/integration/CNewtonIntegration.cpp +++ b/SU2_CFD/src/integration/CNewtonIntegration.cpp @@ -113,6 +113,7 @@ void CNewtonIntegration::PerturbSolution(const CSysVector& dir, Scalar m for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar) solvers[FLOW_SOL]->GetNodes()->AddSolution(iPoint,iVar, mag*dir(iPoint,iVar)); } + END_SU2_OMP_FOR } void CNewtonIntegration::ComputeResiduals(ResEvalType type) { @@ -122,6 +123,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) { if (type == EXPLICIT) { SU2_OMP_MASTER config->SetKind_TimeIntScheme(EULER_EXPLICIT); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -133,6 +135,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) { if (type == EXPLICIT) { SU2_OMP_MASTER config->SetKind_TimeIntScheme(TimeIntScheme); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -145,11 +148,13 @@ void CNewtonIntegration::ComputeFinDiffStep() { SU2_OMP_MASTER rmsSol = 0.0; + END_SU2_OMP_MASTER SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar) rmsSol_loc += pow(solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint,iVar), 2); + END_SU2_OMP_FOR atomicAdd(rmsSol_loc, rmsSol); @@ -159,6 +164,7 @@ void CNewtonIntegration::ComputeFinDiffStep() { SU2_MPI::Allreduce(&t, &rmsSol, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); finDiffStep = finDiffStepND * max(1.0, sqrt(SU2_TYPE::GetValue(rmsSol) / geometry->GetGlobal_nPointDomain())); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -194,6 +200,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** SU2_OMP_FOR_STAT(omp_chunk_size) for (auto i = 0ul; i < LinSysRes.GetNElmDomain(); ++i) LinSysRes[i] = SU2_TYPE::GetValue(solvers[FLOW_SOL]->LinSysRes[i]); + END_SU2_OMP_FOR su2double residual = 0.0; for (auto iVar = 0ul; iVar < LinSysRes.GetNVar(); ++iVar) @@ -208,6 +215,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** firstResidual = max(firstResidual, residual); if (startupIters) startupIters -= 1; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER endStartup = (startupIters == 0) && (residual - firstResidual < startupResidual); } @@ -219,6 +227,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** if (!startupPeriod && tolRelaxFactor > 1 && fullTolResidual < 0.0) { SU2_OMP_MASTER firstResidual = max(firstResidual, residual); + END_SU2_OMP_MASTER SU2_OMP_BARRIER su2double x = (residual - firstResidual) / fullTolResidual; toleranceFactor = 1.0 + (tolRelaxFactor-1)*max(0.0, 1.0-SU2_TYPE::GetValue(x)); @@ -249,6 +258,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** solvers[FLOW_SOL]->SetIterLinSolver(iter); solvers[FLOW_SOL]->SetResLinSolver(eps); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /// TODO: Clever back-tracking and CFL adaptation based on residual reduction. @@ -268,6 +278,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** solvers[FLOW_SOL]->Momentum_Forces(geometry, config); solvers[FLOW_SOL]->Friction_Forces(geometry, config); } + END_SU2_OMP_MASTER /*--- At the end of the startup period the CFL is reset to the initial value. ---*/ @@ -276,12 +287,15 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** startupPeriod = false; firstResidual = residual; } + END_SU2_OMP_MASTER SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < geometry->GetnPoint(); ++iPoint) solvers[FLOW_SOL]->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(MESH_0)); + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CNewtonIntegration::MatrixFreeProduct(const CSysVector& u, CSysVector& v) { @@ -310,6 +324,7 @@ void CNewtonIntegration::MatrixFreeProduct(const CSysVector& u, CSysVect v(iPoint,iVar) += SU2_TYPE::GetValue(delta) * u(iPoint,iVar); } } + END_SU2_OMP_FOR CSysMatrixComms::Initiate(v, geometry, config); CSysMatrixComms::Complete(v, geometry, config); @@ -332,6 +347,7 @@ void CNewtonIntegration::Preconditioner(const CSysVector& u, CSysVector< for (auto iVar = 0ul; iVar < u.GetNVar(); ++iVar) v(iPoint,iVar) = SU2_TYPE::GetValue(delta) * u(iPoint,iVar); } + END_SU2_OMP_FOR CSysMatrixComms::Initiate(v, geometry, config); CSysMatrixComms::Complete(v, geometry, config); diff --git a/SU2_CFD/src/integration/CSingleGridIntegration.cpp b/SU2_CFD/src/integration/CSingleGridIntegration.cpp index 6fff9748820..d20f2013820 100644 --- a/SU2_CFD/src/integration/CSingleGridIntegration.cpp +++ b/SU2_CFD/src/integration/CSingleGridIntegration.cpp @@ -79,6 +79,7 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve if (RunTime_EqSystem == RUNTIME_HEAT_SYS) { SU2_OMP_MASTER solvers_fine[HEAT_SOL]->Heat_Fluxes(geometry_fine, solvers_fine, config[iZone]); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -105,8 +106,8 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve } - } // end SU2_OMP_PARALLEL - + } + END_SU2_OMP_PARALLEL } void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSystem, CSolver *sol_fine, CSolver *sol_coarse, @@ -141,6 +142,7 @@ void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSys sol_coarse->GetNodes()->SetSolution(Point_Coarse,Solution); } + END_SU2_OMP_FOR delete [] Solution; @@ -177,6 +179,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys sol_coarse->GetNodes()->SetmuT(Point_Coarse,EddyVisc); } + END_SU2_OMP_FOR /*--- Update solution at the no slip wall boundary, only the first variable (nu_tilde -in SA and SA_NEG- and k -in SST-), to guarantee that the eddy viscoisty @@ -189,6 +192,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode(); sol_coarse->GetNodes()->SetmuT(Point_Coarse,0.0); } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp index d4237ca4f13..a19210d468f 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp @@ -63,20 +63,22 @@ CDiscAdjFEAIteration::CDiscAdjFEAIteration(const CConfig *config) : CIteration(c } CDiscAdjFEAIteration::~CDiscAdjFEAIteration(void) {} + void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { unsigned long iPoint; - unsigned short TimeIter = config[val_iZone]->GetTimeIter(); - bool dynamic = (config[val_iZone]->GetTime_Domain()); - - int Direct_Iter; + auto solvers0 = solver[val_iZone][val_iInst][MESH_0]; + auto geometry0 = geometry[val_iZone][val_iInst][MESH_0]; + auto dirNodes = solvers0[FEA_SOL]->GetNodes(); + auto adjNodes = solvers0[ADJFEA_SOL]->GetNodes(); /*--- For the dynamic adjoint, load direct solutions from restart files. ---*/ - if (dynamic) { - Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1; + if (config[val_iZone]->GetTime_Domain()) { + const int TimeIter = config[val_iZone]->GetTimeIter(); + const int Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - TimeIter - 1; /*--- We want to load the already converged solution at timesteps n and n-1 ---*/ @@ -86,15 +88,15 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat /*--- Push solution back to correct array ---*/ - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->Set_Solution_time_n(); + dirNodes->Set_Solution_time_n(); /*--- Push solution back to correct array ---*/ - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Accel_time_n(); + dirNodes->SetSolution_Accel_time_n(); /*--- Push solution back to correct array ---*/ - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Vel_time_n(); + dirNodes->SetSolution_Vel_time_n(); /*--- Load solution timestep n ---*/ @@ -102,33 +104,28 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/ - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint)); } - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Accel_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Accel(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Accel_Direct(iPoint, dirNodes->GetSolution_Accel(iPoint)); } - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Vel_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Vel(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Vel_Direct(iPoint, dirNodes->GetSolution_Vel(iPoint)); } } else { /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/ - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint)); } } - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJFEA_SYS, false); + solvers0[ADJFEA_SOL]->Preprocessing(geometry0, solvers0, config[val_iZone], MESH_0, 0, RUNTIME_ADJFEA_SYS, false); + } void CDiscAdjFEAIteration::LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, @@ -162,7 +159,6 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { - bool dynamic = (config[val_iZone]->GetTime_Domain()); /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/ @@ -171,92 +167,11 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->ExtractAdjoint_Variables(geometry[val_iZone][val_iInst][MESH_0], config[val_iZone]); - if (dynamic) { + if (config[val_iZone]->GetTime_Domain()) { integration[val_iZone][val_iInst][ADJFEA_SOL]->SetConvergence(false); } } -void CDiscAdjFEAIteration::SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst, - unsigned short kind_recording) { - unsigned long InnerIter = config[ZONE_0]->GetInnerIter(); - unsigned long TimeIter = config[val_iZone]->GetTimeIter(), DirectTimeIter; - bool dynamic = (config[val_iZone]->GetTime_Domain()); - - DirectTimeIter = 0; - if (dynamic) { - DirectTimeIter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1; - } - - /*--- Reset the tape ---*/ - - AD::Reset(); - - /*--- We only need to reset the indices if the current recording is different from the recording we want to have ---*/ - - if (CurrentRecording != kind_recording && (CurrentRecording != NONE)) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0], - config[val_iZone]); - - /*--- Clear indices of coupling variables ---*/ - - SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, SOLUTION_AND_MESH); - - /*--- Run one iteration while tape is passive - this clears all indices ---*/ - - fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement, - FFDBox, val_iZone, val_iInst); - } - - /*--- Prepare for recording ---*/ - - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0], - config[val_iZone]); - - /*--- Start the recording of all operations ---*/ - - AD::StartRecording(); - - /*--- Register FEA variables ---*/ - - RegisterInput(solver, geometry, config, val_iZone, val_iInst, kind_recording); - - /*--- Compute coupling or update the geometry ---*/ - - SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, kind_recording); - - /*--- Set the correct direct iteration number ---*/ - - if (dynamic) { - config[val_iZone]->SetTimeIter(DirectTimeIter); - } - - /*--- Run the direct iteration ---*/ - - fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement, - FFDBox, val_iZone, val_iInst); - - config[val_iZone]->SetTimeIter(TimeIter); - - /*--- Register structural variables and objective function as output ---*/ - - RegisterOutput(solver, geometry, config, val_iZone, val_iInst); - - /*--- Stop the recording ---*/ - - AD::StopRecording(); - - /*--- Set the recording status ---*/ - - CurrentRecording = kind_recording; - - /* --- Reset the number of the internal iterations---*/ - - config[ZONE_0]->SetInnerIter(InnerIter); -} - void CDiscAdjFEAIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) { @@ -296,10 +211,21 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge auto structural_numerics = numerics[iZone][iInst][MESH_0][FEA_SOL]; /*--- Some numerics are only instanciated under these conditions ---*/ - bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem(); - bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS; - bool de_effects = config[iZone]->GetDE_Effects() && nonlinear; - bool element_based = dir_solver->IsElementBased() && nonlinear; + const bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem(); + const bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS; + const bool de_effects = config[iZone]->GetDE_Effects() && nonlinear; + const bool element_based = dir_solver->IsElementBased() && nonlinear; + + SU2_OMP_PARALLEL + { + + const int thread = omp_get_thread_num(); + const int offset = thread*MAX_TERMS; + const int fea_term = FEA_TERM+offset; + const int mat_nhcomp = MAT_NHCOMP+offset; + const int mat_idealde = MAT_IDEALDE+offset; + const int mat_knowles = MAT_KNOWLES+offset; + const int de_term = DE_TERM+offset; for (unsigned short iProp = 0; iProp < config[iZone]->GetnElasticityMod(); iProp++) { su2double E = adj_solver->GetVal_Young(iProp); @@ -309,33 +235,33 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge /*--- Add dependencies for E and Nu ---*/ - structural_numerics[FEA_TERM]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[fea_term]->SetMaterial_Properties(iProp, E, nu); /*--- Add dependencies for Rho and Rho_DL ---*/ - structural_numerics[FEA_TERM]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[fea_term]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Add dependencies for element-based simulations. ---*/ if (element_based) { /*--- Neo Hookean Compressible ---*/ - structural_numerics[MAT_NHCOMP]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_NHCOMP]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[mat_nhcomp]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[mat_nhcomp]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Ideal DE ---*/ - structural_numerics[MAT_IDEALDE]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_IDEALDE]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[mat_idealde]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[mat_idealde]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Knowles ---*/ - structural_numerics[MAT_KNOWLES]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_KNOWLES]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[mat_knowles]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[mat_knowles]->SetMaterial_Density(iProp, rho, rhoDL); } } if (de_effects) { for (unsigned short iEField = 0; iEField < adj_solver->GetnEField(); iEField++) { - structural_numerics[FEA_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); - structural_numerics[DE_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); + structural_numerics[fea_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); + structural_numerics[de_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); } } @@ -351,14 +277,14 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge for (unsigned short iDV = 0; iDV < adj_solver->GetnDVFEA(); iDV++) { su2double dvfea = adj_solver->GetVal_DVFEA(iDV); - structural_numerics[FEA_TERM]->Set_DV_Val(iDV, dvfea); + structural_numerics[fea_term]->Set_DV_Val(iDV, dvfea); - if (de_effects) structural_numerics[DE_TERM]->Set_DV_Val(iDV, dvfea); + if (de_effects) structural_numerics[de_term]->Set_DV_Val(iDV, dvfea); if (element_based) { - structural_numerics[MAT_NHCOMP]->Set_DV_Val(iDV, dvfea); - structural_numerics[MAT_IDEALDE]->Set_DV_Val(iDV, dvfea); - structural_numerics[MAT_KNOWLES]->Set_DV_Val(iDV, dvfea); + structural_numerics[mat_nhcomp]->Set_DV_Val(iDV, dvfea); + structural_numerics[mat_idealde]->Set_DV_Val(iDV, dvfea); + structural_numerics[mat_knowles]->Set_DV_Val(iDV, dvfea); } } break; @@ -374,6 +300,9 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge structural_geometry->CompleteComms(structural_geometry, config[iZone], COORDINATES); } + } + END_SU2_OMP_PARALLEL + /*--- FSI specific dependencies. ---*/ if (fsi) { /*--- Set relation between solution and predicted displacements, which are the transferred ones. ---*/ @@ -390,6 +319,7 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge /// making it a virtual method of CSolver does not feel "right" as its purpose could be confused. static_cast(dir_solver)->FilterElementDensities(structural_geometry, config[iZone]); } + } void CDiscAdjFEAIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, @@ -406,11 +336,6 @@ void CDiscAdjFEAIteration::InitializeAdjoint(CSolver***** solver, CGeometry**** solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]); } -void CDiscAdjFEAIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {} - bool CDiscAdjFEAIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, @@ -427,13 +352,14 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { - bool dynamic = (config[val_iZone]->GetTime_Domain()); + const bool dynamic = (config[val_iZone]->GetTime_Domain()); + auto solvers0 = solver[val_iZone][val_iInst][MESH_0]; // TEMPORARY output only for standalone structural problems if ((!config[val_iZone]->GetFSI_Simulation()) && (rank == MASTER_NODE)) { unsigned short iVar; - bool de_effects = config[val_iZone]->GetDE_Effects(); + const bool de_effects = config[val_iZone]->GetDE_Effects(); /*--- Header of the temporary output file ---*/ ofstream myfile_res; @@ -443,24 +369,23 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra myfile_res << config[val_iZone]->GetTimeIter() << "\t"; - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]); - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetTotal_ComboObj() << "\t"; + solvers0[FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]); + myfile_res << scientific << solvers0[FEA_SOL]->GetTotal_ComboObj() << "\t"; for (iVar = 0; iVar < config[val_iZone]->GetnElasticityMod(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t"; for (iVar = 0; iVar < config[val_iZone]->GetnPoissonRatio(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t"; if (dynamic) { for (iVar = 0; iVar < config[val_iZone]->GetnMaterialDensity(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t"; } if (de_effects) { for (iVar = 0; iVar < config[val_iZone]->GetnElectric_Field(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_EField(iVar) - << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_EField(iVar) << "\t"; } - for (iVar = 0; iVar < solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA(); iVar++) { - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t"; + for (iVar = 0; iVar < solvers0[ADJFEA_SOL]->GetnDVFEA(); iVar++) { + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t"; } myfile_res << endl; @@ -499,7 +424,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra if (outputDVFEA) { unsigned short iDV; - unsigned short nDV = solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA(); + unsigned short nDV = solvers0[ADJFEA_SOL]->GetnDVFEA(); myfile_res << "INDEX" << "\t" @@ -510,7 +435,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra for (iDV = 0; iDV < nDV; iDV++) { myfile_res << iDV; myfile_res << "\t"; - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV); + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV); myfile_res << endl; } diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 59b212c8b9e..dbf2a950dfb 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -31,91 +31,90 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { + CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { StartTime = SU2_MPI::Wtime(); - unsigned long iPoint; - unsigned short TimeIter = config[val_iZone]->GetTimeIter(); - bool dual_time_1st = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST); - bool dual_time_2nd = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND); - bool dual_time = (dual_time_1st || dual_time_2nd); - unsigned short iMesh; - int Direct_Iter; - bool heat = config[val_iZone]->GetWeakly_Coupled_Heat(); - bool grid_IsMoving = config[val_iZone]->GetGrid_Movement(); + const auto TimeIter = config[iZone]->GetTimeIter(); + const bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST); + const bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND); + const bool dual_time = (dual_time_1st || dual_time_2nd); + const bool grid_IsMoving = config[iZone]->GetGrid_Movement(); + const bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + auto solvers0 = solver[iZone][iInst][MESH_0]; + auto geometries = geometry[iZone][iInst]; // /*--- Read the target pressure for inverse design. ---------------------------------------------*/ - // if (config[val_iZone]->GetInvDesign_Cp() == YES) - // output->SetCp_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL], - // geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter); + // if (config[iZone]->GetInvDesign_Cp() == YES) + // output->SetCp_InverseDesign(solvers0[FLOW_SOL], + // geometries[MESH_0], config[iZone], ExtIter); // /*--- Read the target heat flux ----------------------------------------------------------------*/ // if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES) - // output->SetHeatFlux_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL], - // geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter); + // output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL], + // geometries[MESH_0], config[iZone], ExtIter); /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/ - if (config[val_iZone]->GetTime_Marching()) { - Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2; + if (config[iZone]->GetTime_Marching()) { + const int Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2 + dual_time; /*--- For dual-time stepping we want to load the already converged solution at timestep n ---*/ - if (dual_time) { - Direct_Iter += 1; - } - if (TimeIter == 0) { if (dual_time_2nd) { /*--- Load solution at timestep n-2 ---*/ - LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 2); + LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 2); /*--- Push solution back to correct array ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(); - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1(); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(); - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1(); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1(); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(); - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1(); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1(); } if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(); - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1(); + geometries[iMesh]->nodes->SetCoord_n(); + geometries[iMesh]->nodes->SetCoord_n1(); } } } if (dual_time) { /*--- Load solution at timestep n-1 ---*/ - LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 1); + LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 1); /*--- Push solution back to correct array ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(); } if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(); + geometries[iMesh]->nodes->SetCoord_n(); } } } /*--- Load solution timestep n ---*/ - LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter); + LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter); - if (config[val_iZone]->GetDeform_Mesh()) { - solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true); + if (config[iZone]->GetDeform_Mesh()) { + solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true); } } else if ((TimeIter > 0) && dual_time) { @@ -126,114 +125,120 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr Afterwards the GridVelocity is computed based on the Coordinates. ---*/ - if (config[val_iZone]->GetDeform_Mesh()) { - solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true); + if (config[iZone]->GetDeform_Mesh()) { + solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true); } /*--- Load solution timestep n-1 | n-2 for DualTimestepping 1st | 2nd order ---*/ if (dual_time_1st) { - LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 1); + LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 1); } else { - LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 2); + LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 2); } /*--- Temporarily store the loaded solution in the Solution_Old array ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Set_OldSolution(); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + solvers[FLOW_SOL]->Set_OldSolution(); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Set_OldSolution(); + solvers[TURB_SOL]->Set_OldSolution(); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Set_OldSolution(); + solvers[HEAT_SOL]->Set_OldSolution(); } if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_Old(); + geometries[iMesh]->nodes->SetCoord_Old(); } } /*--- Set Solution at timestep n to solution at n-1 ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->SetSolution( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint)); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { + solvers[FLOW_SOL]->GetNodes()->SetSolution( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n(iPoint)); + geometries[iMesh]->nodes->SetCoord( + iPoint, geometries[iMesh]->nodes->GetCoord_n(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->SetSolution( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint)); + solvers[TURB_SOL]->GetNodes()->SetSolution( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->SetSolution( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint)); + solvers[HEAT_SOL]->GetNodes()->SetSolution( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint)); } } } if (dual_time_1st) { /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint)); + geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); } } } } if (dual_time_2nd) { /*--- Set Solution at timestep n-1 to solution at n-2 ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n1(iPoint)); + geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_n1(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); } } } /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint)); + geometries[iMesh]->nodes->SetCoord_n1(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); } } } @@ -243,97 +248,98 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Compute & set Grid Velocity via finite differences of the Coordinates. ---*/ if (grid_IsMoving) - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) - geometry[val_iZone][val_iInst][iMesh]->SetGridVelocity(config[val_iZone], TimeIter); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) + geometries[iMesh]->SetGridVelocity(config[iZone], TimeIter); } // if unsteady + SU2_OMP_PARALLEL_(if(solvers0[ADJFLOW_SOL]->GetHasHybridParallel())) { + /*--- Store flow solution also in the adjoint solver in order to be able to reset it later ---*/ if (TimeIter == 0 || dual_time) { - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution(iPoint)); - } + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) + solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint)); + END_SU2_OMP_FOR } - if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->GetNodes()->GetSolution(iPoint)); - } + if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) { + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++) + solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint)); + END_SU2_OMP_FOR } if (heat) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->GetNodes()->GetSolution(iPoint)); - } + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++) + solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint)); + END_SU2_OMP_FOR } - if (config[val_iZone]->AddRadiation()) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][RAD_SOL]->GetNodes()->GetSolution(iPoint)); - } + if (config[iZone]->AddRadiation()) { + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++) + solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint)); + END_SU2_OMP_FOR } } - solver[val_iZone][val_iInst][MESH_0][ADJFLOW_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJFLOW_SYS, false); - if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) { - solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJTURB_SYS, false); + solvers0[ADJFLOW_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], + MESH_0, 0, RUNTIME_ADJFLOW_SYS, false); + + if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) { + solvers0[ADJTURB_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], + MESH_0, 0, RUNTIME_ADJTURB_SYS, false); } if (heat) { - solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJHEAT_SYS, false); + solvers0[ADJHEAT_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], + MESH_0, 0, RUNTIME_ADJHEAT_SYS, false); } - if (config[val_iZone]->AddRadiation()) { - solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJRAD_SYS, false); + if (config[iZone]->AddRadiation()) { + solvers0[ADJRAD_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], + MESH_0, 0, RUNTIME_ADJRAD_SYS, false); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned short val_iZone, unsigned short val_iInst, - int val_DirectIter) { + unsigned short iZone, unsigned short iInst, int DirectIter) { unsigned short iMesh; - bool heat = config[val_iZone]->GetWeakly_Coupled_Heat(); + bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + auto solvers = solver[iZone][iInst]; + + if (DirectIter >= 0) { + if (rank == MASTER_NODE && iZone == ZONE_0) + cout << " Loading flow solution from direct iteration " << DirectIter << "." << endl; + + solvers[MESH_0][FLOW_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, true); - if (val_DirectIter >= 0) { - if (rank == MASTER_NODE && val_iZone == ZONE_0) - cout << " Loading flow solution from direct iteration " << val_DirectIter << "." << endl; - solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, true); if (turbulent) { - solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false); + solvers[MESH_0][TURB_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false); } if (heat) { - solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false); + solvers[MESH_0][HEAT_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false); } } else { /*--- If there is no solution file we set the freestream condition ---*/ - if (rank == MASTER_NODE && val_iZone == ZONE_0) - cout << " Setting freestream conditions at direct iteration " << val_DirectIter << "." << endl; - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->SetFreeStream_Solution(config[val_iZone]); - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh, - val_DirectIter, RUNTIME_FLOW_SYS, false); + if (rank == MASTER_NODE && iZone == ZONE_0) + cout << " Setting freestream conditions at direct iteration " << DirectIter << "." << endl; + + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + solvers[iMesh][FLOW_SOL]->SetFreeStream_Solution(config[iZone]); + solvers[iMesh][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh, + DirectIter, RUNTIME_FLOW_SYS, false); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->SetFreeStream_Solution(config[val_iZone]); - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Postprocessing( - geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh); + solvers[iMesh][TURB_SOL]->SetFreeStream_Solution(config[iZone]); + solvers[iMesh][TURB_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->SetFreeStream_Solution(config[val_iZone]); - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Postprocessing( - geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh); + solvers[iMesh][HEAT_SOL]->SetFreeStream_Solution(config[iZone]); + solvers[iMesh][HEAT_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh); } } } @@ -343,8 +349,11 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement, CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); + const bool heat = config[iZone]->GetWeakly_Coupled_Heat(); /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/ @@ -364,12 +373,17 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati solver[iZone][iInst][MESH_0][ADJRAD_SOL]->ExtractAdjoint_Variables(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); /*--- Initialize the adjoints the conservative variables ---*/ @@ -381,7 +395,7 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry*** solver[iZone][iInst][MESH_0][ADJTURB_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]); } - if (heat) { + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]); } @@ -392,12 +406,17 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry*** if (config[iZone]->GetFluidProblem()) { solver[iZone][iInst][MESH_0][FLOW_SOL]->SetVertexTractionsAdjoint(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); if (kind_recording == SOLUTION_VARIABLES || kind_recording == SOLUTION_AND_MESH) { /*--- Register flow and turbulent variables as input ---*/ @@ -411,7 +430,7 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge if (turbulent && !frozen_visc) { solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]); } - if (heat) { + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]); } if (config[iZone]->AddRadiation()) { @@ -435,17 +454,19 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge /*--- Boundary displacements ---*/ solver[iZone][iInst][MESH_0][ADJMESH_SOL]->RegisterVariables(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/ + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { - if (solver[iZone][iInst][MESH_0][ADJFEA_SOL]) { - solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[iZone][iInst][MESH_0], config[iZone]); - } + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); + + /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/ for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { solver[iZone][iInst][iMesh][ADJFLOW_SOL]->SetRecording(geometry[iZone][iInst][iMesh], config[iZone]); @@ -459,13 +480,16 @@ void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geo if (config[iZone]->AddRadiation()) { solver[iZone][INST_0][MESH_0][ADJRAD_SOL]->SetRecording(geometry[iZone][INST_0][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); if ((kind_recording == MESH_COORDS) || (kind_recording == NONE) || (kind_recording == SOLUTION_AND_MESH)) { /*--- Update geometry to get the influence on other geometry variables (normals, volume etc) ---*/ @@ -475,6 +499,8 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** CGeometry::ComputeWallDistance(config, geometry); } + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + /*--- Compute coupling between flow and turbulent equations ---*/ solver[iZone][iInst][MESH_0][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0], config[iZone], MESH_0, NO_RK_ITER, RUNTIME_FLOW_SYS, true); @@ -488,7 +514,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** solver[iZone][iInst][MESH_0][TURB_SOL]->CompleteComms(geometry[iZone][iInst][MESH_0], config[iZone], SOLUTION); } - if (heat) { + } + END_SU2_OMP_PARALLEL + + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][HEAT_SOL]->Set_Heatflux_Areas(geometry[iZone][iInst][MESH_0], config[iZone]); solver[iZone][iInst][MESH_0][HEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0], config[iZone], MESH_0, NO_RK_ITER, RUNTIME_HEAT_SYS, true); @@ -507,8 +536,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output, unsigned short iZone, unsigned short iInst) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); /*--- Register conservative variables as output of the iteration ---*/ @@ -518,7 +549,7 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g if (turbulent && !frozen_visc) { solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); } - if (heat) { + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); } if (config[iZone]->AddRadiation()) { @@ -527,20 +558,21 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g if (config[iZone]->GetFluidProblem()) { solver[iZone][iInst][MESH_0][FLOW_SOL]->RegisterVertexTractions(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { - unsigned short iMesh; - + CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { /*--- Dual time stepping strategy ---*/ - if ((config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST) || - (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND)) { - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - integration[val_iZone][val_iInst][ADJFLOW_SOL]->SetConvergence(false); + if ((config[iZone]->GetTime_Marching() == DT_STEPPING_1ST) || + (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND)) { + for (unsigned short iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + integration[iZone][iInst][ADJFLOW_SOL]->SetConvergence(false); } } } @@ -548,21 +580,16 @@ void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integratio bool CDiscAdjFluidIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { + CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { StopTime = SU2_MPI::Wtime(); UsedTime = StopTime - StartTime; /*--- Write the convergence history for the fluid (only screen output) ---*/ - output->SetHistory_Output(geometry[val_iZone][INST_0][MESH_0], solver[val_iZone][INST_0][MESH_0], config[val_iZone], - config[val_iZone]->GetTimeIter(), config[val_iZone]->GetOuterIter(), - config[val_iZone]->GetInnerIter()); + output->SetHistory_Output(geometry[iZone][INST_0][MESH_0], solver[iZone][INST_0][MESH_0], config[iZone], + config[iZone]->GetTimeIter(), config[iZone]->GetOuterIter(), + config[iZone]->GetInnerIter()); return output->GetConvergence(); } -void CDiscAdjFluidIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) {} diff --git a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp index 4258a631a52..5d278eb97f9 100644 --- a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp @@ -227,9 +227,8 @@ void CDiscAdjHeatIteration::SetDependencies(CSolver***** solver, CGeometry**** g void CDiscAdjHeatIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output, unsigned short iZone, unsigned short iInst) { - solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); - geometry[iZone][iInst][MESH_0]->RegisterOutput_Coordinates(config[iZone]); + solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); } void CDiscAdjHeatIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, @@ -258,13 +257,3 @@ bool CDiscAdjHeatIteration::Monitor(COutput* output, CIntegration**** integratio return output->GetConvergence(); } - -void CDiscAdjHeatIteration::Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned long InnerIter, bool StopCalc, unsigned short val_iZone, - unsigned short val_iInst) {} - -void CDiscAdjHeatIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { -} diff --git a/SU2_CFD/src/iteration/CFluidIteration.cpp b/SU2_CFD/src/iteration/CFluidIteration.cpp index 859a11cee03..478235d9973 100644 --- a/SU2_CFD/src/iteration/CFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CFluidIteration.cpp @@ -141,6 +141,7 @@ void CFluidIteration::Iterate(COutput* output, CIntegration**** integration, CGe SU2_OMP_PARALLEL solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->AdaptCFLNumber(geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone]); + END_SU2_OMP_PARALLEL } /*--- Call Dynamic mesh update if AEROELASTIC motion was specified ---*/ diff --git a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp index 4025f5b9d0d..c1d78ea11a7 100644 --- a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp +++ b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp @@ -333,6 +333,7 @@ void CFEAElasticity::ReadDV(const CConfig *config) { bool master_node = false; SU2_OMP_MASTER master_node = (rank == MASTER_NODE); + END_SU2_OMP_MASTER unsigned long index; diff --git a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp index ed75ae1e0d4..f9052653abb 100644 --- a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp @@ -66,7 +66,7 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto /*--- Create a linear partition --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); /*--- Prepare the send buffers ---*/ @@ -74,17 +74,6 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto } -CFEMDataSorter::~CFEMDataSorter(){ - - delete [] Index; - delete [] idSend; - delete linearPartitioner; - -} - - - - void CFEMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) { /*--- Sort connectivity for each type of element (excluding halos). Note diff --git a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp index 768438a9b39..c193c98c6a6 100644 --- a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp @@ -39,7 +39,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto nGlobalPointBeforeSort = geometry->GetGlobal_nPointDomain(); nLocalPointsBeforeSort = geometry->GetnPointDomain(); - Local_Halo = new int[geometry->GetnPoint()](); + Local_Halo.resize(geometry->GetnPoint()); for (unsigned long iPoint = 0; iPoint < geometry->GetnPoint(); iPoint++){ @@ -60,7 +60,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto /*--- Create the linear partitioner --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); /*--- Prepare the send buffers ---*/ @@ -68,16 +68,6 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto } -CFVMDataSorter::~CFVMDataSorter(){ - - delete [] Local_Halo; - - delete [] Index; - delete [] idSend; - delete linearPartitioner; - -} - void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){ unsigned long iPoint, iVertex; @@ -107,9 +97,6 @@ void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){ } } - - - void CFVMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) { /*--- Sort connectivity for each type of element (excluding halos). Note @@ -218,7 +205,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, own elements into the connectivity data structure. ---*/ if (val_sort) { - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); } else { iProcessor = rank; } @@ -262,14 +249,11 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, /*--- Allocate memory to hold the connectivity that we are sending. ---*/ - unsigned long *connSend = nullptr; - connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]](); + auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]](); /*--- Allocate arrays for storing halo flags. ---*/ - unsigned short *haloSend = new unsigned short[nElem_Send[size]](); - for (int ii = 0; ii < nElem_Send[size]; ii++) - haloSend[ii] = false; + auto haloSend = new unsigned short[nElem_Send[size]](); /*--- Create an index variable to keep track of our index position as we load up the send buffer. ---*/ @@ -308,7 +292,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, own elements into the connectivity data structure. ---*/ if (val_sort) { - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); } else { iProcessor = rank; } @@ -356,10 +340,9 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *connRecv = nullptr; - connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]](); + auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]](); - unsigned short *haloRecv = new unsigned short[nElem_Cum[size]](); + auto haloRecv = new unsigned short[nElem_Cum[size]](); #ifdef HAVE_MPI diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp index d65e55317ce..e600566155f 100644 --- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp @@ -29,24 +29,12 @@ #include #include - -const map CParallelDataSorter::TypeMap = { - {LINE, 0}, - {TRIANGLE, 1}, - {QUADRILATERAL, 2}, - {TETRAHEDRON, 3}, - {HEXAHEDRON, 4}, - {PRISM, 5}, - {PYRAMID, 6} -}; - CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector &valFieldNames) : - fieldNames(std::move(valFieldNames)){ + rank(SU2_MPI::GetRank()), + size(SU2_MPI::GetSize()), + fieldNames(std::move(valFieldNames)) { - rank = SU2_MPI::GetRank(); - size = SU2_MPI::GetSize(); - - GlobalField_Counter = this->fieldNames.size(); + GlobalField_Counter = fieldNames.size(); Conn_Line_Par = nullptr; Conn_Hexa_Par = nullptr; @@ -56,13 +44,9 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector & Conn_Tria_Par = nullptr; Conn_Pyra_Par = nullptr; - nPoint_Send = nullptr; - nPoint_Recv = nullptr; Index = nullptr; connSend = nullptr; dataBuffer = nullptr; - passiveDoubleBuffer = nullptr; - doubleBuffer = nullptr; idSend = nullptr; nSends = 0; nRecvs = 0; @@ -77,8 +61,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector & nElemConn_Send = new int[size+1](); nElemConn_Cum = new int[size+1](); - linearPartitioner = nullptr; - nElemPerType.fill(0); nElemPerTypeGlobal.fill(0); @@ -104,34 +86,32 @@ CParallelDataSorter::~CParallelDataSorter(){ delete [] Conn_Pyra_Par; delete [] connSend; - delete [] dataBuffer; + delete [] Index; + delete [] idSend; + } void CParallelDataSorter::SortOutputData() { - int VARS_PER_POINT = GlobalField_Counter; - -#ifdef HAVE_MPI - SU2_MPI::Request *send_req, *recv_req; - SU2_MPI::Status status; - int ind; -#endif + const int VARS_PER_POINT = GlobalField_Counter; /*--- Allocate the memory that we need for receiving the conn values and then cue up the non-blocking receives. Note that we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - - unsigned long *idRecv = new unsigned long[nPoint_Recv[size]](); + vector idRecv(nPoint_Recv[size], 0); #ifdef HAVE_MPI - /*--- We need double the number of messages to send both the conn. - and the global IDs. ---*/ + /*--- NOTE: This function calls MPI routines directly, instead of via SU2_MPI::, + * because it communicates passivedoubles and not AD types. This avoids some + * creative C++ to communicate AD types and then convert to passive. ---*/ + + /*--- We need double the number of messages to send both the conn. and the global IDs. ---*/ - send_req = new SU2_MPI::Request[2*nSends]; - recv_req = new SU2_MPI::Request[2*nRecvs]; + auto send_req = new MPI_Request[2*nSends]; + auto recv_req = new MPI_Request[2*nRecvs]; unsigned long iMessage = 0; for (int ii=0; ii tmpBuffer(nPoint_Recv[size]); - if (!std::is_same::value){ - for (int jj = 0; jj < VARS_PER_POINT*nPoint_Recv[size]; jj++){ - const passivedouble tmpVal = SU2_TYPE::GetValue(doubleBuffer[jj]); - passiveDoubleBuffer[jj] = tmpVal; - /*--- For some AD datatypes a call of the destructor is - * necessary to properly delete the AD type ---*/ - doubleBuffer[jj].~su2double(); - } - } - - /*--- Step 2: Reorder the data in the buffer --- */ - - passivedouble *tmpBuffer = new passivedouble[nPoint_Recv[size]]; for (int jj = 0; jj < VARS_PER_POINT; jj++){ for (int ii = 0; ii < nPoint_Recv[size]; ii++){ - tmpBuffer[idRecv[ii]] = passiveDoubleBuffer[ii*VARS_PER_POINT+jj]; + tmpBuffer[idRecv[ii]] = dataBuffer[ii*VARS_PER_POINT+jj]; } for (int ii = 0; ii < nPoint_Recv[size]; ii++){ - passiveDoubleBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii]; + dataBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii]; } } - delete [] tmpBuffer; - /*--- Store the total number of local points my rank has for the current section after completing the communications. ---*/ @@ -261,12 +228,8 @@ void CParallelDataSorter::SortOutputData() { /*--- Reduce the total number of points we will write in the output files. ---*/ - SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1, - MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); + SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); - /*--- Free temporary memory from communications ---*/ - - delete [] idRecv; } void CParallelDataSorter::PrepareSendBuffers(std::vector& globalID){ @@ -285,7 +248,7 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++ ) { - iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]); + iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]); /*--- If we have not visited this node yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -318,18 +281,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI /*--- Allocate memory to hold the connectivity that we are sending. ---*/ - connSend = nullptr; - connSend = new su2double[VARS_PER_POINT*nPoint_Send[size]](); + connSend = new passivedouble[VARS_PER_POINT*nPoint_Send[size]] (); /*--- Allocate the data buffer to hold the sorted data. We have to make it large enough * to hold passivedoubles and su2doubles ---*/ - unsigned short maxSize = max(sizeof(passivedouble), sizeof(su2double)); - dataBuffer = new char[VARS_PER_POINT*nPoint_Recv[size]*maxSize] {}; - - /*--- doubleBuffer and passiveDouble buffer use the same memory allocated above using the dataBuffer. ---*/ - doubleBuffer = reinterpret_cast(dataBuffer); - passiveDoubleBuffer = reinterpret_cast(dataBuffer); + dataBuffer = new passivedouble[VARS_PER_POINT*nPoint_Recv[size]] (); /*--- Allocate arrays for sending the global ID. ---*/ @@ -338,11 +295,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI /*--- Create an index variable to keep track of our index positions as we load up the send buffer. ---*/ - unsigned long *index = new unsigned long[size](); - for (int ii=0; ii < size; ii++) index[ii] = VARS_PER_POINT*nPoint_Send[ii]; + vector index(size), idIndex(size); - unsigned long *idIndex = new unsigned long[size](); - for (int ii=0; ii < size; ii++) idIndex[ii] = nPoint_Send[ii]; + for (int ii=0; ii < size; ii++) { + index[ii] = VARS_PER_POINT*nPoint_Send[ii]; + idIndex[ii] = nPoint_Send[ii]; + } Index = new unsigned long[nLocalPointsBeforeSort](); @@ -351,13 +309,13 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++) { - iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]); + iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]); /*--- Load the global ID (minus offset) for sorting the points once they all reach the correct processor. ---*/ unsigned long nn = idIndex[iProcessor]; - idSend[nn] = globalID[iPoint] - linearPartitioner->GetFirstIndexOnRank(iProcessor); + idSend[nn] = globalID[iPoint] - linearPartitioner.GetFirstIndexOnRank(iProcessor); /*--- Store the index this point has in the send buffer ---*/ @@ -368,13 +326,8 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI index[iProcessor] += VARS_PER_POINT; idIndex[iProcessor]++; - } - /*--- Free memory after loading up the send buffer. ---*/ - - delete [] index; - delete [] idIndex; } unsigned long CParallelDataSorter::GetElem_Connectivity(GEO_TYPE type, unsigned long iElem, unsigned long iNode) const { @@ -463,6 +416,4 @@ void CParallelDataSorter::SetTotalElements(){ nElemConn_Cum[ii+1] += nElemConn_Cum[ii]; } - } - diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp index 99c314f2e5c..9a7bc400418 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp @@ -29,12 +29,12 @@ #include "../../../../Common/include/fem/fem_geometry_structure.hpp" #include -CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter) : +CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter) : CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){ nDim = geometry->GetnDim(); - this->volumeSorter = valVolumeSorter; + volumeSorter = valVolumeSorter; connectivitySorted = false; @@ -62,19 +62,10 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr /*--- Create the linear partitioner --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); } -CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter(){ - - delete linearPartitioner; - delete [] passiveDoubleBuffer; - -} - - - void CSurfaceFEMDataSorter::SortOutputData() { if (!connectivitySorted){ @@ -136,7 +127,7 @@ void CSurfaceFEMDataSorter::SortOutputData() { for(unsigned long i=0; iGetRankContainingIndex(globalSurfaceDOFIDs[i]); + unsigned long iProcessor = linearPartitioner.GetRankContainingIndex(globalSurfaceDOFIDs[i]); /* Store the global ID in the send buffer for iProcessor. */ sendBuf[iProcessor].push_back(globalSurfaceDOFIDs[i]); @@ -220,19 +211,16 @@ void CSurfaceFEMDataSorter::SortOutputData() { /* Allocate the memory for Parallel_Surf_Data. */ nPoints = globalSurfaceDOFIDs.size(); - - delete [] passiveDoubleBuffer; - - - passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT]; + delete [] dataBuffer; + dataBuffer = new passivedouble[nPoints*VARS_PER_POINT]; /* Determine the local index of the global surface DOFs and copy the data into Parallel_Surf_Data. */ for(unsigned long i=0; iGetCumulativeSizeBeforeRank(rank); + const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner.GetCumulativeSizeBeforeRank(rank); for(int jj=0; jjGetData(jj,ii); + dataBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii); } /*--- Reduce the total number of surf points we have. This will be diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp index a4dc31c32a5..f9b36ab1648 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp @@ -29,12 +29,12 @@ #include "../../../../Common/include/geometry/CGeometry.hpp" #include -CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, CFVMDataSorter* valVolumeSorter) : +CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, const CFVMDataSorter* valVolumeSorter) : CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){ nDim = geometry->GetnDim(); - this->volumeSorter = valVolumeSorter; + volumeSorter = valVolumeSorter; connectivitySorted = false; @@ -43,14 +43,7 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr /*--- Create the linear partitioner --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); - -} - -CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter(){ - - delete linearPartitioner; - delete [] passiveDoubleBuffer; + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); } @@ -101,7 +94,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -129,7 +122,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -157,7 +150,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -216,7 +209,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load global ID into the buffer for sending ---*/ @@ -250,7 +243,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load global ID into the buffer for sending ---*/ @@ -284,7 +277,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load global ID into the buffer for sending ---*/ @@ -438,17 +431,14 @@ void CSurfaceFVMDataSorter::SortOutputData() { we can allocate the new data structure to hold these points alone. Here, we also copy the data for those points from our volume data structure. ---*/ - - delete [] passiveDoubleBuffer; - - - passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT]; + delete [] dataBuffer; + dataBuffer = new passivedouble[nPoints*VARS_PER_POINT]; for (int jj = 0; jj < VARS_PER_POINT; jj++) { count = 0; for (int ii = 0; ii < (int)volumeSorter->GetnPoints(); ii++) { if (surfPoint[ii] !=-1) { - passiveDoubleBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii); + dataBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii); count++; } } @@ -507,7 +497,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -545,14 +535,12 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Allocate memory to hold the globals that we are sending. ---*/ - unsigned long *globalSend = nullptr; - globalSend = new unsigned long[nElem_Send[size]](); + auto globalSend = new unsigned long[nElem_Send[size]](); /*--- Allocate memory to hold the renumbering that we are sending. ---*/ - unsigned long *renumbSend = nullptr; - renumbSend = new unsigned long[nElem_Send[size]](); + auto renumbSend = new unsigned long[nElem_Send[size]](); /*--- Create an index variable to keep track of our index position as we load up the send buffer. ---*/ @@ -569,7 +557,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); if (nElem_Flag[iProcessor] != ii) { @@ -595,11 +583,8 @@ void CSurfaceFVMDataSorter::SortOutputData() { we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *globalRecv = nullptr; - globalRecv = new unsigned long[nElem_Recv[size]](); - - unsigned long *renumbRecv = nullptr; - renumbRecv = new unsigned long[nElem_Recv[size]](); + auto globalRecv = new unsigned long[nElem_Recv[size]](); + auto renumbRecv = new unsigned long[nElem_Recv[size]](); #ifdef HAVE_MPI /*--- We need double the number of messages to send both the conn. @@ -731,7 +716,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Store the global ID if it is outside our own linear partition. ---*/ @@ -752,7 +737,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Store the global ID if it is outside our own linear partition. ---*/ @@ -773,7 +758,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Store the global ID if it is outside our own linear partition. ---*/ @@ -808,7 +793,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -860,7 +845,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -1205,7 +1190,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -1247,16 +1232,11 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * /*--- Allocate memory to hold the connectivity that we are sending. ---*/ - unsigned long *connSend = nullptr; - connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]]; - for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Send[size]; ii++) - connSend[ii] = 0; + auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]] (); /*--- Allocate arrays for storing halo flags. ---*/ - unsigned short *haloSend = new unsigned short[nElem_Send[size]]; - for (int ii = 0; ii < nElem_Send[size]; ii++) - haloSend[ii] = false; + auto haloSend = new unsigned short[nElem_Send[size]] (); /*--- Create an index variable to keep track of our index position as we load up the send buffer. ---*/ @@ -1300,7 +1280,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load connectivity into the buffer for sending ---*/ @@ -1346,14 +1326,9 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *connRecv = nullptr; - connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]]; - for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Recv[size]; ii++) - connRecv[ii] = 0; + auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]] (); - unsigned short *haloRecv = new unsigned short[nElem_Recv[size]]; - for (int ii = 0; ii < nElem_Recv[size]; ii++) - haloRecv[ii] = false; + auto haloRecv = new unsigned short[nElem_Recv[size]] (); #ifdef HAVE_MPI /*--- We need double the number of messages to send both the conn. diff --git a/SU2_CFD/src/python_wrapper_structure.cpp b/SU2_CFD/src/python_wrapper_structure.cpp index 6125d80966e..62cfc4a23bd 100644 --- a/SU2_CFD/src/python_wrapper_structure.cpp +++ b/SU2_CFD/src/python_wrapper_structure.cpp @@ -602,6 +602,7 @@ void CSinglezoneDriver::SetInitialMesh() { /*--- Set the grid velocity for this coarse node. ---*/ geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetGridVel(iPoint, Grid_Vel); } + END_SU2_OMP_FOR /*--- Push back the volume. ---*/ geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_n(); geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_nM1(); @@ -610,6 +611,7 @@ void CSinglezoneDriver::SetInitialMesh() { solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n(); solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n1(); } + END_SU2_OMP_PARALLEL } void CDriver::BoundaryConditionsUpdate(){ diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp index 287e1c50eaa..9b377b54751 100644 --- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp @@ -25,26 +25,17 @@ * License along with SU2. If not, see . */ - #include "../../include/solvers/CDiscAdjFEASolver.hpp" #include "../../include/variables/CDiscAdjFEAVariable.hpp" -CDiscAdjFEASolver::CDiscAdjFEASolver(void) : CSolver() { } - -CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config) : CSolver() { } - CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { adjoint = true; - unsigned short iVar, iMarker; - unsigned long iPoint; - string text_line, mesh_filename; - string filename, AdjExt; - bool dynamic = (config->GetTime_Domain()); + const bool dynamic = (config->GetTime_Domain()); nVar = direct_solver->GetnVar(); nDim = geometry->GetnDim(); @@ -59,8 +50,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv /*--- Define some auxiliary vectors related to the residual ---*/ - Residual = new su2double[nVar]; for (iVar = 0; iVar < nVar; iVar++) Residual[iVar] = 1.0; - Residual_RMS.resize(nVar,1.0); Residual_Max.resize(nVar,1.0); Point_Max.resize(nVar,0); @@ -76,32 +65,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0); } - /*--- Define some auxiliary vectors related to the solution ---*/ + /*--- Initialize the adjoint solution. ---*/ - Solution = new su2double[nVar]; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16; - - if (dynamic) { - Solution_Vel = new su2double[nVar]; - Solution_Accel = new su2double[nVar]; - - for (iVar = 0; iVar < nVar; iVar++) Solution_Vel[iVar] = 1e-16; - for (iVar = 0; iVar < nVar; iVar++) Solution_Accel[iVar] = 1e-16; - } - - /*--- Sensitivity definition and coefficient in all the markers ---*/ - - CSensitivity = new su2double* [nMarker]; - - for (iMarker = 0; iMarker < nMarker; iMarker++) { - CSensitivity[iMarker] = new su2double [geometry->nVertex[iMarker]](); - } - - Sens_E = new su2double[nMarker](); - Sens_Nu = new su2double[nMarker](); - Sens_nL = new su2double[nMarker](); - - nodes = new CDiscAdjFEABoundVariable(Solution, Solution_Accel, Solution_Vel, nPoint, nDim, nVar, dynamic, config); + vector init(nVar,1e-16); + nodes = new CDiscAdjFEABoundVariable(init.data(), init.data(), init.data(), nPoint, nDim, nVar, dynamic, config); SetBaseClassPointerToNodes(); /*--- Set which points are vertices and allocate boundary data. ---*/ @@ -116,23 +83,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv } nodes->AllocateBoundaryVariables(config); - - /*--- Store the direct solution ---*/ - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - nodes->SetSolution_Direct(iPoint, direct_solver->GetNodes()->GetSolution(iPoint)); - } - - if (dynamic){ - for (iPoint = 0; iPoint < nPoint; iPoint++){ - nodes->SetSolution_Accel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Accel(iPoint)); - } - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - nodes->SetSolution_Vel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Vel(iPoint)); - } - } - /*--- Initialize vector structures for multiple material definition ---*/ nMPROP = config->GetnElasticityMod(); @@ -146,29 +96,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv SU2_MPI::Error("WARNING: For a material to be fully defined, E, Nu and Rho need to have the same dimensions.", CURRENT_FUNCTION); } - E_i = new su2double[nMPROP](); - Local_Sens_E = new su2double[nMPROP](); - Global_Sens_E = new su2double[nMPROP](); - Total_Sens_E = new su2double[nMPROP](); - AD_Idx_E_i = new int[nMPROP](); - - Nu_i = new su2double[nMPROP](); - Local_Sens_Nu = new su2double[nMPROP](); - Global_Sens_Nu = new su2double[nMPROP](); - Total_Sens_Nu = new su2double[nMPROP](); - AD_Idx_Nu_i = new int[nMPROP](); - - Rho_i = new su2double[nMPROP](); // For inertial effects - Local_Sens_Rho = new su2double[nMPROP](); - Global_Sens_Rho = new su2double[nMPROP](); - Total_Sens_Rho = new su2double[nMPROP](); - AD_Idx_Rho_i = new int[nMPROP](); - - Rho_DL_i = new su2double[nMPROP](); // For dead loads - Local_Sens_Rho_DL = new su2double[nMPROP](); - Global_Sens_Rho_DL = new su2double[nMPROP](); - Total_Sens_Rho_DL = new su2double[nMPROP](); - AD_Idx_Rho_DL_i = new int[nMPROP](); + E.resize(nMPROP); + Nu.resize(nMPROP); + Rho.resize(nMPROP); + Rho_DL.resize(nMPROP); /*--- Initialize vector structures for multiple electric regions ---*/ @@ -176,12 +107,7 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv if (de_effects) { nEField = config->GetnElectric_Field(); - - EField = new su2double[nEField](); - Local_Sens_EField = new su2double[nEField](); - Global_Sens_EField = new su2double[nEField](); - Total_Sens_EField = new su2double[nEField](); - AD_Idx_EField = new int[nEField](); + EField.resize(nEField); } /*--- Initialize vector structures for structural-based design variables ---*/ @@ -199,80 +125,14 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv break; } - if (fea_dv) { - ReadDV(config); - Local_Sens_DV = new su2double[nDV](); - Global_Sens_DV = new su2double[nDV](); - Total_Sens_DV = new su2double[nDV](); - AD_Idx_DV_Val = new int[nDV](); - } + if (fea_dv) ReadDV(config); } -CDiscAdjFEASolver::~CDiscAdjFEASolver(void){ - - unsigned short iMarker; - - if (CSensitivity != nullptr) { - for (iMarker = 0; iMarker < nMarker; iMarker++) { - delete [] CSensitivity[iMarker]; - } - delete [] CSensitivity; - } - - delete [] E_i; - delete [] Nu_i; - delete [] Rho_i; - delete [] Rho_DL_i; - - delete [] AD_Idx_E_i; - delete [] AD_Idx_Nu_i; - delete [] AD_Idx_Rho_i; - delete [] AD_Idx_Rho_DL_i; - - delete [] Local_Sens_E; - delete [] Local_Sens_Nu; - delete [] Local_Sens_Rho; - delete [] Local_Sens_Rho_DL; - - delete [] Global_Sens_E; - delete [] Global_Sens_Nu; - delete [] Global_Sens_Rho; - delete [] Global_Sens_Rho_DL; - - delete [] Total_Sens_E; - delete [] Total_Sens_Nu; - delete [] Total_Sens_Rho; - delete [] Total_Sens_Rho_DL; - - delete [] normalLoads; - delete [] Sens_E; - delete [] Sens_Nu; - delete [] Sens_nL; - - delete [] EField; - delete [] Local_Sens_EField; - delete [] Global_Sens_EField; - delete [] Total_Sens_EField; - delete [] AD_Idx_EField; - - delete [] DV_Val; - delete [] Local_Sens_DV; - delete [] Global_Sens_DV; - delete [] Total_Sens_DV; - delete [] AD_Idx_DV_Val; - - delete [] Solution_Vel; - delete [] Solution_Accel; - - delete nodes; -} +CDiscAdjFEASolver::~CDiscAdjFEASolver() { delete nodes; } void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){ - - bool dynamic (config->GetTime_Domain()); - unsigned long iPoint; unsigned short iVar; @@ -282,7 +142,7 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){ direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint)); } - if (dynamic){ + if (config->GetTime_Domain()){ /*--- Reset the solution to the initial (converged) solution ---*/ for (iPoint = 0; iPoint < nPoint; iPoint++){ @@ -326,9 +186,9 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){ void CDiscAdjFEASolver::RegisterSolution(CGeometry *geometry, CConfig *config){ - bool input = true; - bool dynamic = config->GetTime_Domain(); - bool push_index = !config->GetMultizone_Problem(); + const bool input = true; + const bool dynamic = config->GetTime_Domain(); + const bool push_index = !config->GetMultizone_Problem(); /*--- Register solution at all necessary time instances and other variables on the tape ---*/ @@ -359,13 +219,13 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config, if (KindDirect_Solver == RUNTIME_FEA_SYS) { - bool pseudo_static = config->GetPseudoStatic(); + const bool pseudo_static = config->GetPseudoStatic(); for (iVar = 0; iVar < nMPROP; iVar++) { - E_i[iVar] = config->GetElasticyMod(iVar); - Nu_i[iVar] = config->GetPoissonRatio(iVar); - Rho_i[iVar] = pseudo_static? 0.0 : config->GetMaterialDensity(iVar); - Rho_DL_i[iVar] = config->GetMaterialDensity(iVar); + E[iVar] = config->GetElasticyMod(iVar); + Nu[iVar] = config->GetPoissonRatio(iVar); + Rho[iVar] = pseudo_static? 0.0 : config->GetMaterialDensity(iVar); + Rho_DL[iVar] = config->GetMaterialDensity(iVar); } /*--- Read the values of the electric field ---*/ @@ -376,48 +236,28 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config, /*--- Reset index, otherwise messes up other derivatives ---*/ if (fea_dv) { - for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV_Val[iVar]); + for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV[iVar]); } if (!reset) { - bool local_index = config->GetMultizone_Problem(); - bool push_index = !local_index; - - for (iVar = 0; iVar < nMPROP; iVar++) { - AD::RegisterInput(E_i[iVar], push_index); - AD::RegisterInput(Nu_i[iVar], push_index); - AD::RegisterInput(Rho_i[iVar], push_index); - AD::RegisterInput(Rho_DL_i[iVar], push_index); - } - - if(de_effects){ - for (iVar = 0; iVar < nEField; iVar++) - AD::RegisterInput(EField[iVar], push_index); - } + const bool local_index = config->GetMultizone_Problem(); + const bool push_index = !local_index; - if(fea_dv){ - for (iVar = 0; iVar < nDV; iVar++) - AD::RegisterInput(DV_Val[iVar], push_index); - } + E.Register(push_index); + Nu.Register(push_index); + Rho.Register(push_index); + Rho_DL.Register(push_index); + if (de_effects) EField.Register(push_index); + if (fea_dv) DV.Register(push_index); /*--- Explicitly store the tape indices for when we extract the derivatives ---*/ if (local_index) { - for (iVar = 0; iVar < nMPROP; iVar++) { - AD::SetIndex(AD_Idx_E_i[iVar], E_i[iVar]); - AD::SetIndex(AD_Idx_Nu_i[iVar], Nu_i[iVar]); - AD::SetIndex(AD_Idx_Rho_i[iVar], Rho_i[iVar]); - AD::SetIndex(AD_Idx_Rho_DL_i[iVar], Rho_DL_i[iVar]); - } - - if (de_effects) { - for (iVar = 0; iVar < nEField; iVar++) - AD::SetIndex(AD_Idx_EField[iVar], EField[iVar]); - } - - if (fea_dv) { - for (iVar = 0; iVar < nDV; iVar++) - AD::SetIndex(AD_Idx_DV_Val[iVar], DV_Val[iVar]); - } + E.SetIndex(); + Nu.SetIndex(); + Rho.SetIndex(); + Rho_DL.SetIndex(); + if (de_effects) EField.SetIndex(); + if (fea_dv) DV.SetIndex(); } /*--- Register the flow tractions ---*/ @@ -427,16 +267,16 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config, } - /*--- Here it is possible to register other variables as input that influence the flow solution - * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be - * extracted in the ExtractAdjointVariables routine. ---*/ + /*--- Here it is possible to register other variables as input that influence the flow solution + * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be + * extracted in the ExtractAdjointVariables routine. ---*/ } void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){ - bool input = false; - bool dynamic = config->GetTime_Domain(); - bool push_index = !config->GetMultizone_Problem(); + const bool input = false; + const bool dynamic = config->GetTime_Domain(); + const bool push_index = !config->GetMultizone_Problem(); /*--- Register variables as output of the solver iteration ---*/ @@ -452,13 +292,15 @@ void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){ - bool dynamic = config->GetTime_Domain(); - bool multizone = config->GetMultizone_Problem(); + const bool dynamic = config->GetTime_Domain(); + const bool multizone = config->GetMultizone_Problem(); unsigned short iVar; unsigned long iPoint; su2double residual; + su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0}; + /*--- Set Residuals to zero ---*/ SetResToZero(); @@ -600,78 +442,40 @@ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *co void CDiscAdjFEASolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config){ - unsigned short iVar; - bool local_index = config->GetMultizone_Problem(); - - /*--- Extract the adjoint values of the farfield values ---*/ - - if (KindDirect_Solver == RUNTIME_FEA_SYS){ - - if (local_index) { - for (iVar = 0; iVar < nMPROP; iVar++) { - Local_Sens_E[iVar] = AD::GetDerivative(AD_Idx_E_i[iVar]); - Local_Sens_Nu[iVar] = AD::GetDerivative(AD_Idx_Nu_i[iVar]); - Local_Sens_Rho[iVar] = AD::GetDerivative(AD_Idx_Rho_i[iVar]); - Local_Sens_Rho_DL[iVar] = AD::GetDerivative(AD_Idx_Rho_DL_i[iVar]); - } - } - else { - for (iVar = 0; iVar < nMPROP; iVar++) { - Local_Sens_E[iVar] = SU2_TYPE::GetDerivative(E_i[iVar]); - Local_Sens_Nu[iVar] = SU2_TYPE::GetDerivative(Nu_i[iVar]); - Local_Sens_Rho[iVar] = SU2_TYPE::GetDerivative(Rho_i[iVar]); - Local_Sens_Rho_DL[iVar] = SU2_TYPE::GetDerivative(Rho_DL_i[iVar]); - } - } - - SU2_MPI::Allreduce(Local_Sens_E, Global_Sens_E, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(Local_Sens_Nu, Global_Sens_Nu, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(Local_Sens_Rho, Global_Sens_Rho, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(Local_Sens_Rho_DL, Global_Sens_Rho_DL, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - - /*--- Extract the adjoint values of the electric field in the case that it is a parameter of the problem. ---*/ + if (KindDirect_Solver != RUNTIME_FEA_SYS) return; - if (de_effects) { - for (iVar = 0; iVar < nEField; iVar++) { - if (local_index) Local_Sens_EField[iVar] = AD::GetDerivative(AD_Idx_EField[iVar]); - else Local_Sens_EField[iVar] = SU2_TYPE::GetDerivative(EField[iVar]); - } - SU2_MPI::Allreduce(Local_Sens_EField, Global_Sens_EField, nEField, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - } + /*--- Sensitivities of material properties and design variables. ---*/ - if (fea_dv) { - for (iVar = 0; iVar < nDV; iVar++) { - if (local_index) Local_Sens_DV[iVar] = AD::GetDerivative(AD_Idx_DV_Val[iVar]); - else Local_Sens_DV[iVar] = SU2_TYPE::GetDerivative(DV_Val[iVar]); - } - SU2_MPI::Allreduce(Local_Sens_DV, Global_Sens_DV, nDV, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - } + E.GetDerivative(); + Nu.GetDerivative(); + Rho.GetDerivative(); + Rho_DL.GetDerivative(); + if (de_effects) EField.GetDerivative(); + if (fea_dv) DV.GetDerivative(); - /*--- Extract the flow traction sensitivities ---*/ + /*--- Extract the flow traction sensitivities. ---*/ - if (config->GetnMarker_Fluid_Load() > 0){ - su2double val_sens; - for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){ - for (unsigned short iDim = 0; iDim < nDim; iDim++){ - val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim); - nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens); - } + if (config->GetnMarker_Fluid_Load() > 0) { + for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){ + for (unsigned short iDim = 0; iDim < nDim; iDim++){ + su2double val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim); + nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens); } } - } } void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){ - bool dynamic = (config->GetTime_Domain()); - bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0); + const bool dynamic = (config->GetTime_Domain()); + const bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0); + + su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0}; unsigned short iVar; - unsigned long iPoint; - for (iPoint = 0; iPoint < nPoint; iPoint++){ + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ for (iVar = 0; iVar < nVar; iVar++){ Solution[iVar] = nodes->GetSolution(iPoint,iVar); } @@ -681,20 +485,14 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){ } } if (dynamic){ - for (iVar = 0; iVar < nVar; iVar++){ - Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar); - } - for (iVar = 0; iVar < nVar; iVar++){ - Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar); - } for (iVar = 0; iVar < nVar; iVar++){ Solution[iVar] += nodes->GetDynamic_Derivative_n(iPoint,iVar); } for (iVar = 0; iVar < nVar; iVar++){ - Solution_Accel[iVar] += nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar); + Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar) + nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar); } for (iVar = 0; iVar < nVar; iVar++){ - Solution_Vel[iVar] += nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar); + Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar) + nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar); } } direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution); @@ -709,12 +507,10 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){ void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){ - bool dynamic = (config_container->GetTime_Domain()); - unsigned long iPoint; unsigned short iVar; - if (dynamic){ - for (iPoint = 0; iPointGetnPoint(); iPoint++){ + if (config_container->GetTime_Domain()){ + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ for (iVar=0; iVar < nVar; iVar++){ nodes->SetDynamic_Derivative_n(iPoint, iVar, nodes->GetSolution_time_n(iPoint, iVar)); } @@ -731,26 +527,14 @@ void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_cont void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*){ - unsigned short iVar; - - for (iVar = 0; iVar < nMPROP; iVar++){ - Total_Sens_E[iVar] += Global_Sens_E[iVar]; - Total_Sens_Nu[iVar] += Global_Sens_Nu[iVar]; - Total_Sens_Rho[iVar] += Global_Sens_Rho[iVar]; - Total_Sens_Rho_DL[iVar] += Global_Sens_Rho_DL[iVar]; - } - - if (de_effects){ - for (iVar = 0; iVar < nEField; iVar++) - Total_Sens_EField[iVar]+= Global_Sens_EField[iVar]; - } - - if (fea_dv){ - for (iVar = 0; iVar < nDV; iVar++) - Total_Sens_DV[iVar] += Global_Sens_DV[iVar]; - } + E.UpdateTotal(); + Nu.UpdateTotal(); + Rho.UpdateTotal(); + Rho_DL.UpdateTotal(); + if (de_effects) EField.UpdateTotal(); + if (fea_dv) DV.UpdateTotal(); - /*--- Extract the topology optimization density sensitivities ---*/ + /*--- Extract the topology optimization density sensitivities. ---*/ direct_solver->ExtractAdjoint_Variables(geometry, config); @@ -776,45 +560,34 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo nodes->SetSensitivity(iPoint, iDim, Sensitivity); } } - SetSurface_Sensitivity(geometry, config); -} - -void CDiscAdjFEASolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config){ - } -void CDiscAdjFEASolver::ReadDV(CConfig *config) { - - unsigned long index; +void CDiscAdjFEASolver::ReadDV(const CConfig *config) { string filename; ifstream properties_file; /*--- Choose the filename of the design variable ---*/ - string input_name; - switch (config->GetDV_FEA()) { case YOUNG_MODULUS: - input_name = "dv_young.opt"; + filename = "dv_young.opt"; break; case POISSON_RATIO: - input_name = "dv_poisson.opt"; + filename = "dv_poisson.opt"; break; case DENSITY_VAL: case DEAD_WEIGHT: - input_name = "dv_density.opt"; + filename = "dv_density.opt"; break; case ELECTRIC_FIELD: - input_name = "dv_efield.opt"; + filename = "dv_efield.opt"; break; default: - input_name = "dv.opt"; + filename = "dv.opt"; break; } - filename = input_name; - if (rank == MASTER_NODE) cout << "Filename: " << filename << "." << endl; properties_file.open(filename.data(), ios::in); @@ -826,55 +599,32 @@ void CDiscAdjFEASolver::ReadDV(CConfig *config) { if (rank == MASTER_NODE) cout << "There is no design variable file." << endl; - nDV = 1; - DV_Val = new su2double[nDV]; - for (unsigned short iDV = 0; iDV < nDV; iDV++) - DV_Val[iDV] = 1.0; - + nDV = 1; + DV.resize(nDV); + DV[0] = 1.0; } else{ string text_line; - - /*--- First pass: determine number of design variables ---*/ - - unsigned short iDV = 0; - - /*--- Skip the first line: it is the header ---*/ - - getline (properties_file, text_line); - - while (getline (properties_file, text_line)) iDV++; - - /*--- Close the restart file ---*/ - - properties_file.close(); - - nDV = iDV; - DV_Val = new su2double[nDV]; - - /*--- Reopen the file (TODO: improve this) ---*/ - - properties_file.open(filename.data(), ios::in); + vector values; /*--- Skip the first line: it is the header ---*/ - getline (properties_file, text_line); - iDV = 0; while (getline (properties_file, text_line)) { - istringstream point_line(text_line); - point_line >> index >> DV_Val[iDV]; - - iDV++; + unsigned long index; + su2double value; + point_line >> index >> value; + values.push_back(value); } - /*--- Close the restart file ---*/ - - properties_file.close(); + nDV = values.size(); + DV.resize(nDV); + unsigned short iDV = 0; + for (auto x : values) DV[iDV++] = x; } @@ -882,72 +632,12 @@ void CDiscAdjFEASolver::ReadDV(CConfig *config) { void CDiscAdjFEASolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { - unsigned short iVar; - unsigned long index, counter; - string restart_filename, filename; - /*--- Restart the solution from file information ---*/ - filename = config->GetSolution_AdjFileName(); - restart_filename = config->GetObjFunc_Extension(filename); + auto filename = config->GetSolution_AdjFileName(); + auto restart_filename = config->GetObjFunc_Extension(filename); restart_filename = config->GetFilename(restart_filename, "", val_iter); - /*--- Read and store the restart metadata. ---*/ - -// Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename); - - /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ - - if (config->GetRead_Binary_Restart()) { - Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename); - } else { - Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename); - } - - /*--- Read all lines in the restart file ---*/ - - long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0; - - /*--- Skip coordinates ---*/ - - unsigned short skipVars = geometry[MESH_0]->GetnDim(); - - /*--- Load data from the restart into correct containers. ---*/ - - counter = 0; - for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) { - - /*--- Retrieve local index. If this node from the restart file lives - on the current processor, we will load and instantiate the vars. ---*/ - - iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global); - - if (iPoint_Local > -1) { - - /*--- We need to store this point's data, so jump to the correct - offset in the buffer of data from the restart file and load it. ---*/ - - index = counter*Restart_Vars[1] + skipVars; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar]; - nodes->SetSolution(iPoint_Local,Solution); - iPoint_Global_Local++; - - /*--- Increment the overall counter for how many points have been loaded. ---*/ - counter++; - } - - } - - /*--- Detect a wrong solution file ---*/ - - if (iPoint_Global_Local < nPointDomain) { - SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + - string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); - } - - /*--- Delete the class memory that is used to load the restart. ---*/ - - delete [] Restart_Vars; Restart_Vars = nullptr; - delete [] Restart_Data; Restart_Data = nullptr; + BasicLoadRestart(geometry[MESH_0], config, restart_filename, geometry[MESH_0]->GetnDim()); } diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp index 249c59b4722..610d2026043 100644 --- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp @@ -28,15 +28,8 @@ #include "../../include/solvers/CDiscAdjMeshSolver.hpp" #include "../../include/variables/CDiscAdjMeshBoundVariable.hpp" - -CDiscAdjMeshSolver::CDiscAdjMeshSolver() : CSolver () {} - -CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config) : CSolver() {} - CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver) : CSolver() { - unsigned short iVar; - nVar = geometry->GetnDim(); nDim = geometry->GetnDim(); @@ -46,6 +39,8 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo nPoint = geometry->GetnPoint(); nPointDomain = geometry->GetnPointDomain(); + omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE); + /*--- Define some auxiliary vectors related to the residual ---*/ Residual_RMS.resize(nVar,1.0); @@ -63,20 +58,17 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0); } - /*--- Define some auxiliary vectors related to the solution ---*/ - - Solution = new su2double[nVar]; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16; - /*--- Initialize the node structure ---*/ nodes = new CDiscAdjMeshBoundVariable(nPoint,nDim,config); SetBaseClassPointerToNodes(); /*--- Set which points are vertices and allocate boundary data. ---*/ + vector Solution(nVar,1e-16); + for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) { - nodes->SetSolution(iPoint,Solution); + nodes->SetSolution(iPoint,Solution.data()); for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) { long iVertex = geometry->nodes->GetVertex(iPoint, iMarker); @@ -90,22 +82,17 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo } -CDiscAdjMeshSolver::~CDiscAdjMeshSolver(void){ - delete nodes; -} - - -void CDiscAdjMeshSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, - unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){ -} +CDiscAdjMeshSolver::~CDiscAdjMeshSolver() { delete nodes; } void CDiscAdjMeshSolver::SetRecording(CGeometry* geometry, CConfig *config){ /*--- Reset the solution to the initial (converged) solution ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { direct_solver->GetNodes()->SetBound_Disp(iPoint,nodes->GetBoundDisp_Direct(iPoint)); } + END_SU2_OMP_FOR /*--- Set indices to zero ---*/ @@ -123,20 +110,25 @@ void CDiscAdjMeshSolver::RegisterSolution(CGeometry *geometry, CConfig *config){ void CDiscAdjMeshSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset){ - /*--- Register boundary displacements as input ---*/ - bool input = true; - direct_solver->GetNodes()->Register_BoundDisp(input); - + SU2_OMP_MASTER { + /*--- Register boundary displacements as input ---*/ + bool input = true; + direct_solver->GetNodes()->Register_BoundDisp(input); + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){ /*--- Extract the sensitivities of the mesh coordinates ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ /*--- Extract the adjoint solution from the original mesh coordinates ---*/ + su2double Solution[MAXNVAR] = {0.0}; direct_solver->GetNodes()->GetAdjoint_MeshCoord(iPoint,Solution); /*--- Store the adjoint solution (the container is reused) ---*/ @@ -144,6 +136,7 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *c nodes->SetSolution(iPoint,Solution); } + END_SU2_OMP_FOR } @@ -151,10 +144,12 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig * /*--- Extract the sensitivities of the boundary displacements ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ /*--- Extract the adjoint solution of the boundary displacements ---*/ + su2double Solution[MAXNVAR] = {0.0}; direct_solver->GetNodes()->GetAdjoint_BoundDisp(iPoint,Solution); /*--- Store the sensitivities of the boundary displacements ---*/ @@ -162,11 +157,14 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig * nodes->SetBoundDisp_Sens(iPoint,Solution); } + END_SU2_OMP_FOR } void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver *solver) { + SU2_OMP_PARALLEL { + const bool time_stepping = (config->GetTime_Marching() != STEADY); const auto eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength(); @@ -177,6 +175,8 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS ExtractAdjoint_Variables(geometry, config); /*--- Store the sensitivities in the flow adjoint container ---*/ + + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { /*--- If sharp edge, set the sensitivity to 0 on that region ---*/ @@ -198,8 +198,12 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS } } } + END_SU2_OMP_FOR + solver->SetSurface_Sensitivity(geometry, config); + } + END_SU2_OMP_PARALLEL } void CDiscAdjMeshSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index 77ba74ab257..39e0cf2001d 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -27,22 +27,10 @@ #include "../../include/solvers/CDiscAdjSolver.hpp" #include "../../../Common/include/toolboxes/geometry_toolbox.hpp" +#include "../../../Common/include/parallelization/omp_structure.hpp" -CDiscAdjSolver::CDiscAdjSolver(void) : CSolver () { - -} - -CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config) : CSolver() { - -} - -CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { - - unsigned short iVar, iMarker, iDim; - unsigned long iVertex; - string text_line, mesh_filename; - ifstream restart_file; - string filename, AdjExt; +CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, + unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { adjoint = true; @@ -51,21 +39,17 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di /*--- Initialize arrays to NULL ---*/ - CSensitivity = nullptr; - /*-- Store some information about direct solver ---*/ this->KindDirect_Solver = Kind_Solver; this->direct_solver = direct_solver; - nMarker = config->GetnMarker_All(); nPoint = geometry->GetnPoint(); nPointDomain = geometry->GetnPointDomain(); - /*--- Define some auxiliary vectors related to the residual ---*/ + omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE); - Residual = new su2double[nVar]; for (iVar = 0; iVar < nVar; iVar++) Residual[iVar] = 1.0; - Solution_Geometry = new su2double[nDim]; for (iDim = 0; iDim < nDim; iDim++) Solution_Geometry[iDim] = 1.0; + /*--- Define some auxiliary vectors related to the residual ---*/ Residual_RMS.resize(nVar,1.0); Residual_Max.resize(nVar,1.0); @@ -82,27 +66,24 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0); } - /*--- Define some auxiliary vectors related to the solution ---*/ - - Solution = new su2double[nVar]; - - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16; - /*--- Sensitivity definition and coefficient in all the markers ---*/ - CSensitivity = new su2double* [nMarker]; - - for (iMarker = 0; iMarker < nMarker; iMarker++) { - unsigned long nVertex = geometry->nVertex[iMarker]; - CSensitivity[iMarker] = new su2double [nVertex]; - - for (iVertex = 0; iVertex < nVertex; iVertex++) - CSensitivity[iMarker][iVertex] = 0.0; + CSensitivity.resize(nMarker); + for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) { + const auto nVertex = geometry->nVertex[iMarker]; + CSensitivity[iMarker].resize(nVertex, 0.0); } + Sens_Geo.resize(config->GetnMarker_Monitoring(), 0.0); + /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/ - nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config); + if (nVar > MAXNVAR) { + SU2_MPI::Error("Oops! The CDiscAdjSolver static array sizes are not large enough.",CURRENT_FUNCTION); + } + + vector Solution(nVar,1e-16); + nodes = new CDiscAdjVariable(Solution.data(), nPoint, nDim, nVar, config); SetBaseClassPointerToNodes(); switch(KindDirect_Solver){ @@ -124,47 +105,41 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di } } -CDiscAdjSolver::~CDiscAdjSolver(void) { - - unsigned short iMarker; - - if (CSensitivity != nullptr) { - for (iMarker = 0; iMarker < nMarker; iMarker++) { - delete [] CSensitivity[iMarker]; - } - delete [] CSensitivity; - } - - delete nodes; -} +CDiscAdjSolver::~CDiscAdjSolver(void) { delete nodes; } void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){ - bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND; - bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; + const bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND; + const bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; unsigned long iPoint; unsigned short iVar; /*--- Reset the solution to the initial (converged) solution ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint)); } + END_SU2_OMP_FOR if (time_n_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { for (iVar = 0; iVar < nVar; iVar++) { AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint)[iVar]); } } + END_SU2_OMP_FOR } if (time_n1_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { for (iVar = 0; iVar < nVar; iVar++) { AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint)[iVar]); } } + END_SU2_OMP_FOR } /*--- Set the Jacobian to zero since this is not done inside the fluid iteration @@ -178,64 +153,12 @@ void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){ } -void CDiscAdjSolver::SetMesh_Recording(CGeometry** geometry, CVolumetricMovement *grid_movement, CConfig *config) { - - -// bool time_n_needed = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) || -// (config->GetUnsteady_Simulation() == DT_STEPPING_2ND)), -// time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND; - -// unsigned long ExtIter = config->GetExtIter(); - - unsigned long iPoint; - unsigned short iDim; - - /*--- Reset the solution to the initial (converged) position ---*/ - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - for (iDim = 0; iDim < nDim; iDim++){ - geometry[MESH_0]->nodes->SetCoord(iPoint, iDim,nodes->GetGeometry_Direct(iPoint,iDim)); - } - } - - /*--- After moving all nodes, update the dual mesh. Recompute the edges and - dual mesh control volumes in the domain and on the boundaries. ---*/ - - grid_movement->UpdateDualGrid(geometry[MESH_0], config); - - /*--- After updating the dual mesh, compute the grid velocities (only dynamic problems). ---*/ -// if (time_n_needed){ -// geometry[MESH_0]->SetGridVelocity(config, ExtIter); -// } - - /*--- Update the multigrid structure after moving the finest grid, - including computing the grid velocities on the coarser levels. ---*/ - - grid_movement->UpdateMultiGrid(geometry, config); - -// if (time_n_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// for (iVar = 0; iVar < nVar; iVar++){ -// AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint,iVar)); -// } -// } -// } -// if (time_n1_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// for (iVar = 0; iVar < nVar; iVar++){ -// AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint,iVar)); -// } -// } -// } - -} - void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) { - bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND); - bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; - bool input = true; - bool push_index = !config->GetMultizone_Problem(); + const bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND); + const bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; + const bool input = true; + const bool push_index = !config->GetMultizone_Problem(); /*--- Register solution at all necessary time instances and other variables on the tape ---*/ @@ -250,6 +173,8 @@ void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) { void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset) { + SU2_OMP_MASTER { + /*--- Register farfield values as input ---*/ if((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS && !config->GetBoolTurbomachinery())) { @@ -363,12 +288,16 @@ void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, boo /*--- Here it is possible to register other variables as input that influence the flow solution * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be * extracted in the ExtractAdjointVariables routine. ---*/ + + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CDiscAdjSolver::RegisterOutput(CGeometry *geometry, CConfig *config) { - bool input = false; - bool push_index = !config->GetMultizone_Problem(); + const bool input = false; + const bool push_index = !config->GetMultizone_Problem(); /*--- Register variables as output of the solver iteration ---*/ @@ -383,18 +312,23 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi const su2double relax = (config->GetInnerIter()==0)? 1.0 : config->GetRelaxation_Factor_Adjoint(); + su2double Solution[MAXNVAR] = {0.0}; + /*--- Set Residuals to zero ---*/ SetResToZero(); + su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0}; + const su2double* coordMax[MAXNVAR] = {nullptr}; + unsigned long idxMax[MAXNVAR] = {0}; + /*--- Set the old solution and compute residuals. ---*/ if(!multizone) nodes->Set_OldSolution(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { - const su2double isdomain = (iPoint < nPointDomain)? 1.0 : 0.0; - /*--- Extract the adjoint solution ---*/ if(config->GetMultizone_Problem()) { @@ -410,18 +344,38 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi su2double residual = Solution[iVar]-nodes->GetSolution_Old(iPoint,iVar); nodes->AddSolution(iPoint, iVar, relax*residual); - residual *= isdomain; - Residual_RMS[iVar] += pow(residual,2); - AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint)); + if (iPoint < nPointDomain) { + /*--- Update residual information for current thread. ---*/ + resRMS[iVar] += residual*residual; + if (fabs(residual) > resMax[iVar]) { + resMax[iVar] = fabs(residual); + idxMax[iVar] = iPoint; + coordMax[iVar] = geometry->nodes->GetCoord(iPoint); + } + } } } + END_SU2_OMP_FOR + + /*--- Reduce residual information over all threads in this rank. ---*/ + SU2_OMP_CRITICAL + for (auto iVar = 0u; iVar < nVar; iVar++) { + Residual_RMS[iVar] += resRMS[iVar]; + AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); + } + END_SU2_OMP_CRITICAL + SU2_OMP_BARRIER SetResidual_RMS(geometry, config); - SetIterLinSolver(direct_solver->System.GetIterations()); - SetResLinSolver(direct_solver->System.GetResidual()); + SU2_OMP_MASTER { + SetIterLinSolver(direct_solver->System.GetIterations()); + SetResLinSolver(direct_solver->System.GetResidual()); + } + END_SU2_OMP_MASTER if (time_n_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { /*--- Extract the adjoint solution at time n ---*/ @@ -432,9 +386,11 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi nodes->Set_Solution_time_n(iPoint,Solution); } + END_SU2_OMP_FOR } if (time_n1_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { /*--- Extract the adjoint solution at time n-1 ---*/ @@ -445,12 +401,15 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi nodes->Set_Solution_time_n1(iPoint,Solution); } + END_SU2_OMP_FOR } } void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) { + SU2_OMP_MASTER { + /*--- Extract the adjoint values of the farfield values ---*/ if ((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS) && !config->GetBoolTurbomachinery()) { @@ -508,98 +467,25 @@ void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *conf /*--- Extract here the adjoint values of everything else that is registered as input in RegisterInput. ---*/ -} - - -void CDiscAdjSolver::ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) { - -// bool time_n_needed = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) || -// (config->GetUnsteady_Simulation() == DT_STEPPING_2ND)); - -// bool time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND; - -// unsigned short iVar; - unsigned long iPoint; - - /*--- Set Residuals to zero ---*/ - -// for (iVar = 0; iVar < nVar; iVar++){ -// SetRes_RMS(iVar,0.0); -// SetRes_Max(iVar,0.0,0); -// } - - /*--- Set the old solution ---*/ - - nodes->Set_OldSolution_Geometry(); - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - - /*--- Extract the adjoint solution ---*/ - - if (config->GetMultizone_Problem()) - geometry->nodes->GetAdjointCoord_LocalIndex(iPoint, Solution_Geometry); - else - geometry->nodes->GetAdjointCoord(iPoint, Solution_Geometry); - - /*--- Store the adjoint solution ---*/ - - nodes->SetSolution_Geometry(iPoint,Solution_Geometry); - } - -// if (time_n_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// -// /*--- Extract the adjoint solution at time n ---*/ -// -// direct_solver->GetNodes()->GetAdjointSolution_time_n(iPoint,Solution); -// -// /*--- Store the adjoint solution at time n ---*/ -// -// nodes->Set_Solution_time_n(iPoint,Solution); -// } -// } -// if (time_n1_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// -// /*--- Extract the adjoint solution at time n-1 ---*/ -// -// direct_solver->GetNodes()->GetAdjointSolution_time_n1(iPoint,Solution); -// -// /*--- Store the adjoint solution at time n-1 ---*/ -// -// nodes->Set_Solution_time_n1(iPoint,Solution); -// } -// } - - /*--- Set the residuals ---*/ - -// for (iPoint = 0; iPoint < nPointDomain; iPoint++){ -// for (iVar = 0; iVar < nVar; iVar++){ -// residual = node[iPoint]->GetSolution_Geometry(iVar) - node[iPoint]->Get_OldSolution_Geometry(iVar); -// -// Residual_RMS[iVar] += residual*residual; -// AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint)); -// } -// } -// -// SetResidual_RMS(geometry, config); + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { - bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST || - config->GetTime_Marching() == DT_STEPPING_2ND); + const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST || + config->GetTime_Marching() == DT_STEPPING_2ND); - unsigned short iVar; - unsigned long iPoint; + su2double Solution[MAXNVAR] = {0.0}; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (iVar = 0; iVar < nVar; iVar++) { + SU2_OMP_FOR_STAT(omp_chunk_size) + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { + for (auto iVar = 0u; iVar < nVar; iVar++) { Solution[iVar] = nodes->GetSolution(iPoint,iVar); } if (dual_time) { - for (iVar = 0; iVar < nVar; iVar++) { + for (auto iVar = 0u; iVar < nVar; iVar++) { Solution[iVar] += nodes->GetDual_Time_Derivative(iPoint,iVar); } } @@ -610,45 +496,24 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution); } } + END_SU2_OMP_FOR } -void CDiscAdjSolver::SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config){ - -// bool dual_time = (config->GetUnsteady_Simulation() == DT_STEPPING_1ST || -// config->GetUnsteady_Simulation() == DT_STEPPING_2ND); - - unsigned short iDim; - unsigned long iPoint; - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - for (iDim = 0; iDim < nDim; iDim++){ - Solution_Geometry[iDim] = 0.0; - } -// if (dual_time){ -// for (iDim = 0; iDim < nVar; iDim++){ -// Solution_Geometry[iDim] += nodes->GetDual_Time_Derivative_Geometry(iPoint,iDim); -// } -// } - for (iDim = 0; iDim < nDim; iDim++){ - nodes->SetSensitivity(iPoint,iDim, Solution_Geometry[iDim]); - } - geometry->nodes->SetAdjointCoord(iPoint, Solution_Geometry); - } +void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) { -} + SU2_OMP_PARALLEL { -void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) { + const bool time_stepping = (config->GetTime_Marching() != STEADY); + const su2double eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength(); - unsigned long iPoint; - unsigned short iDim; - su2double *Coord, Sensitivity, eps; + SU2_OMP_FOR_STAT(omp_chunk_size) + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { - bool time_stepping = (config->GetTime_Marching() != STEADY); + auto Coord = geometry->nodes->GetCoord(iPoint); - for (iPoint = 0; iPoint < nPoint; iPoint++) { - Coord = geometry->nodes->GetCoord(iPoint); + for (auto iDim = 0u; iDim < nDim; iDim++) { - for (iDim = 0; iDim < nDim; iDim++) { + su2double Sensitivity = 0.0; if(config->GetMultizone_Problem()) { Sensitivity = geometry->nodes->GetAdjointSolution(iPoint, iDim); @@ -663,235 +528,151 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve /*--- If sharp edge, set the sensitivity to 0 on that region ---*/ - if (config->GetSens_Remove_Sharp()) { - eps = config->GetVenkat_LimiterCoeff()*config->GetRefElemLength(); - if ( geometry->nodes->GetSharpEdge_Distance(iPoint) < config->GetAdjSharp_LimiterCoeff()*eps ) - Sensitivity = 0.0; + if (config->GetSens_Remove_Sharp() && geometry->nodes->GetSharpEdge_Distance(iPoint) < eps) { + Sensitivity = 0.0; } + if (!time_stepping) { nodes->SetSensitivity(iPoint,iDim, Sensitivity); } else { - nodes->SetSensitivity(iPoint, iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity); + nodes->SetSensitivity(iPoint,iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity); } } } + END_SU2_OMP_FOR + SetSurface_Sensitivity(geometry, config); -} -void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) { - unsigned short iMarker, iDim, iMarker_Monitoring; - unsigned long iVertex, iPoint; - su2double *Normal, Prod, Sens = 0.0, SensDim, Area, Sens_Vertex, *Sens_Geo; - Total_Sens_Geo = 0.0; - string Monitoring_Tag, Marker_Tag; - - Sens_Geo = new su2double[config->GetnMarker_Monitoring()]; - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - Sens_Geo[iMarker_Monitoring] = 0.0; } + END_SU2_OMP_PARALLEL +} - for (iMarker = 0; iMarker < nMarker; iMarker++) { +void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) { - /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/ + SU2_OMP_MASTER + for (auto& x : Sens_Geo) x = 0.0; + END_SU2_OMP_MASTER - if(config->GetSolid_Wall(iMarker)) { + /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/ - Sens = 0.0; + for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) { - for (iVertex = 0; iVertex < geometry->GetnVertex(iMarker); iVertex++) { + if (!config->GetSolid_Wall(iMarker)) continue; - iPoint = geometry->vertex[iMarker][iVertex]->GetNode(); - Normal = geometry->vertex[iMarker][iVertex]->GetNormal(); - Prod = 0.0; - for (iDim = 0; iDim < nDim; iDim++) { - /*--- retrieve the gradient calculated with AD -- */ - SensDim = nodes->GetSensitivity(iPoint,iDim); + su2double Sens = 0.0; - /*--- calculate scalar product for projection onto the normal vector ---*/ - Prod += Normal[iDim]*SensDim; + SU2_OMP_FOR_STAT(OMP_MIN_SIZE) + for (auto iVertex = 0ul; iVertex < geometry->GetnVertex(iMarker); iVertex++) { - } + /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/ - Area = GeometryToolbox::Norm(nDim, Normal); + const auto iPoint = geometry->vertex[iMarker][iVertex]->GetNode(); + const auto Normal = geometry->vertex[iMarker][iVertex]->GetNormal(); + su2double Sens_Vertex = 0.0; + for (auto iDim = 0u; iDim < nDim; iDim++) { + Sens_Vertex += Normal[iDim] * nodes->GetSensitivity(iPoint,iDim); + } + Sens_Vertex /= GeometryToolbox::Norm(nDim, Normal); - /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/ + CSensitivity[iMarker][iVertex] = -Sens_Vertex; + Sens += pow(Sens_Vertex,2); + } + END_SU2_OMP_FOR - Sens_Vertex = Prod/Area; - CSensitivity[iMarker][iVertex] = -Sens_Vertex; - Sens += Sens_Vertex*Sens_Vertex; - } + if (config->GetMarker_All_Monitoring(iMarker) == NO) continue; - if (config->GetMarker_All_Monitoring(iMarker) == YES){ + /*--- Compute sensitivity for each surface point ---*/ - /*--- Compute sensitivity for each surface point ---*/ + const auto Marker_Tag = config->GetMarker_All_TagBound(iMarker); - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - Monitoring_Tag = config->GetMarker_Monitoring_TagBound(iMarker_Monitoring); - Marker_Tag = config->GetMarker_All_TagBound(iMarker); - if (Marker_Tag == Monitoring_Tag) { - Sens_Geo[iMarker_Monitoring] = Sens; - } - } + for (size_t iMarker_Mon = 0; iMarker_Mon < Sens_Geo.size(); iMarker_Mon++) { + if (Marker_Tag == config->GetMarker_Monitoring_TagBound(iMarker_Mon)) { + atomicAdd(Sens, Sens_Geo[iMarker_Mon]); + break; } } } -#ifdef HAVE_MPI - su2double *MySens_Geo; - MySens_Geo = new su2double[config->GetnMarker_Monitoring()]; + SU2_OMP_BARRIER + SU2_OMP_MASTER { + auto local = Sens_Geo; + SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - MySens_Geo[iMarker_Monitoring] = Sens_Geo[iMarker_Monitoring]; - Sens_Geo[iMarker_Monitoring] = 0.0; + Total_Sens_Geo = 0.0; + for (auto& x : Sens_Geo) { + x = sqrt(x); + Total_Sens_Geo += x; + } } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER - SU2_MPI::Allreduce(MySens_Geo, Sens_Geo, config->GetnMarker_Monitoring(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - delete [] MySens_Geo; -#endif +} - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - Sens_Geo[iMarker_Monitoring] = sqrt(Sens_Geo[iMarker_Monitoring]); - Total_Sens_Geo += Sens_Geo[iMarker_Monitoring]; - } +void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh, + unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { - delete [] Sens_Geo; + const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST) || (config->GetTime_Marching() == DT_STEPPING_2ND); -} + if (!dual_time) return; -void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { - const bool dual_time_1st = (config_container->GetTime_Marching() == DT_STEPPING_1ST); - const bool dual_time_2nd = (config_container->GetTime_Marching() == DT_STEPPING_2ND); - const bool dual_time = (dual_time_1st || dual_time_2nd); - su2double *solution_n, *solution_n1; - - if (dual_time) { - for (auto iPoint = 0ul; iPointGetnPoint(); iPoint++) { - solution_n = nodes->GetSolution_time_n(iPoint); - solution_n1 = nodes->GetSolution_time_n1(iPoint); - for (unsigned short iVar=0; iVar < nVar; iVar++) { - nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar)); - nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]); - } - } // for iPoint - } // if dual_time + SU2_OMP_FOR_STAT(omp_chunk_size) + for (auto iPoint = 0ul; iPointGetnPoint(); iPoint++) { + const auto solution_n = nodes->GetSolution_time_n(iPoint); + const auto solution_n1 = nodes->GetSolution_time_n1(iPoint); + + for (auto iVar = 0u; iVar < nVar; iVar++) { + nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar)); + nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]); + } + } + END_SU2_OMP_FOR } void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { - unsigned short iVar, iMesh; - unsigned long iPoint, index, iChildren, Point_Fine, counter; - su2double Area_Children, Area_Parent, *Solution_Fine; - string restart_filename, filename; - - bool compressible = (config->GetKind_Regime() == COMPRESSIBLE); - bool incompressible = (config->GetKind_Regime() == INCOMPRESSIBLE); - bool rans = ((config->GetKind_Solver() == DISC_ADJ_RANS) || (config->GetKind_Solver() == DISC_ADJ_INC_RANS)) ; - /*--- Restart the solution from file information ---*/ - filename = config->GetSolution_AdjFileName(); - restart_filename = config->GetObjFunc_Extension(filename); - + auto filename = config->GetSolution_AdjFileName(); + auto restart_filename = config->GetObjFunc_Extension(filename); restart_filename = config->GetFilename(restart_filename, "", val_iter); - - /*--- Read and store the restart metadata. ---*/ - -// Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename); - - /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ - - if (config->GetRead_Binary_Restart()) { - Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename); - } else { - Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename); - } - - /*--- Read all lines in the restart file ---*/ - - long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0; - unsigned short rbuf_NotMatching = 0, sbuf_NotMatching = 0; + const bool rans = (config->GetKind_Turb_Model() != NONE); /*--- Skip coordinates ---*/ unsigned short skipVars = geometry[MESH_0]->GetnDim(); /*--- Skip flow adjoint variables ---*/ if (KindDirect_Solver== RUNTIME_TURB_SYS) { - if (compressible) { - skipVars += nDim + 2; - } - if (incompressible) { - skipVars += nDim + 2; - } + skipVars += nDim + 2; } /*--- Skip flow adjoint and turbulent variables ---*/ if (KindDirect_Solver == RUNTIME_RADIATION_SYS) { - if (compressible) skipVars += nDim + 2; - if (incompressible) skipVars += nDim + 2; + skipVars += nDim + 2; if (rans) skipVars += solver[MESH_0][TURB_SOL]->GetnVar(); } - /*--- Load data from the restart into correct containers. ---*/ - - counter = 0; - for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) { - - /*--- Retrieve local index. If this node from the restart file lives - on the current processor, we will load and instantiate the vars. ---*/ + BasicLoadRestart(geometry[MESH_0], config, restart_filename, skipVars); - iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global); + /*--- Interpolate solution on coarse grids ---*/ - if (iPoint_Local > -1) { + for (auto iMesh = 1u; iMesh <= config->GetnMGLevels(); iMesh++) { - /*--- We need to store this point's data, so jump to the correct - offset in the buffer of data from the restart file and load it. ---*/ + const auto& fineSol = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution(); - index = counter*Restart_Vars[1] + skipVars; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar]; - nodes->SetSolution(iPoint_Local,Solution); - iPoint_Global_Local++; - - /*--- Increment the overall counter for how many points have been loaded. ---*/ - counter++; - } - - } + for (auto iPoint = 0ul; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) { + su2double Solution[MAXNVAR] = {0.0}; + const su2double Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint); - /*--- Detect a wrong solution file ---*/ + for (auto iChildren = 0u; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) { + const auto Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren); + const su2double weight = geometry[iMesh-1]->nodes->GetVolume(Point_Fine) / Area_Parent; - if (iPoint_Global_Local < nPointDomain) { sbuf_NotMatching = 1; } - - SU2_MPI::Allreduce(&sbuf_NotMatching, &rbuf_NotMatching, 1, MPI_UNSIGNED_SHORT, MPI_SUM, SU2_MPI::GetComm()); - - if (rbuf_NotMatching != 0) { - SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + - string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); - } - - /*--- Communicate the loaded solution on the fine grid before we transfer - it down to the coarse levels. ---*/ - - for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) { - Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint); - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 0.0; - for (iChildren = 0; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) { - Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren); - Area_Children = geometry[iMesh-1]->nodes->GetVolume(Point_Fine); - Solution_Fine = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution(Point_Fine); - for (iVar = 0; iVar < nVar; iVar++) { - Solution[iVar] += Solution_Fine[iVar]*Area_Children/Area_Parent; - } + for (auto iVar = 0u; iVar < nVar; iVar++) Solution[iVar] += weight * fineSol(Point_Fine, iVar); } solver[iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution(iPoint, Solution); } } - - /*--- Delete the class memory that is used to load the restart. ---*/ - - delete [] Restart_Vars; Restart_Vars = nullptr; - delete [] Restart_Data; Restart_Data = nullptr; - } diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index fd0a6598fef..d220d815a4d 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -372,6 +372,7 @@ void CEulerSolver::InstantiateEdgeNumerics(const CSolver* const* solver_containe "support vectorization.", CURRENT_FUNCTION); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1388,7 +1389,8 @@ void CEulerSolver::SetNondimensionalization(CConfig *config, unsigned short iMes GetFluidModel()->SetThermalConductivityModel(config); } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL Energy_FreeStreamND = GetFluidModel()->GetStaticEnergy() + 0.5*ModVel_FreeStreamND*ModVel_FreeStreamND; @@ -1724,12 +1726,14 @@ void CEulerSolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_c } } + END_SU2_OMP_FOR FlowNodes->Set_OldSolution(); } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1753,61 +1757,47 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con config->GetKind_Upwind_Flow() == SLAU || config->GetKind_Upwind_Flow() == SLAU2); - /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/ - - if (fixed_cl && !disc_adjoint && !cont_adjoint) { - SU2_OMP_MASTER - SetFarfield_AoA(geometry, solver_container, config, iMesh, Output); - SU2_OMP_BARRIER - } - /*--- Set the primitive variables ---*/ - SU2_OMP_MASTER - ErrorCounter = 0; - SU2_OMP_BARRIER + ompMasterAssignBarrier(ErrorCounter, 0); SU2_OMP_ATOMIC ErrorCounter += SetPrimitive_Variables(solver_container, config); + SU2_OMP_BARRIER - if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { - SU2_OMP_BARRIER - SU2_OMP_MASTER - { + SU2_OMP_MASTER { /*--- Ops that are not OpenMP parallel go in this block. ---*/ + + if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { unsigned long tmp = ErrorCounter; SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); config->SetNonphysical_Points(ErrorCounter); } - SU2_OMP_BARRIER - } - /*--- Compute the engine properties ---*/ + /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/ - if (engine) { - SU2_OMP_MASTER - GetPower_Properties(geometry, config, iMesh, Output); - SU2_OMP_BARRIER - } + if (fixed_cl && !disc_adjoint && !cont_adjoint) { + SetFarfield_AoA(geometry, solver_container, config, iMesh, Output); + } - /*--- Compute the actuator disk properties and distortion levels ---*/ + /*--- Compute the engine properties ---*/ - if (actuator_disk) { - SU2_OMP_MASTER - { + if (engine) GetPower_Properties(geometry, config, iMesh, Output); + + /*--- Compute the actuator disk properties and distortion levels ---*/ + + if (actuator_disk) { Set_MPI_ActDisk(solver_container, geometry, config); GetPower_Properties(geometry, config, iMesh, Output); SetActDisk_BCThrust(geometry, solver_container, config, iMesh, Output); } - SU2_OMP_BARRIER - } - /*--- Compute NearField MPI ---*/ + /*--- Compute NearField MPI ---*/ + + if (nearfield) Set_MPI_Nearfield(geometry, config); - if (nearfield) { - SU2_OMP_MASTER - Set_MPI_Nearfield(geometry, config); - SU2_OMP_BARRIER } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER /*--- Artificial dissipation ---*/ @@ -1891,6 +1881,7 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c if (!physical) nonPhysicalPoints++; } + END_SU2_OMP_FOR return nonPhysicalPoints; } @@ -1975,6 +1966,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain unsigned long counter_local = 0; SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER /*--- Pick one numerics object per thread. ---*/ CNumerics* numerics = numerics_container[CONV_TERM + omp_get_thread_num()*MAX_TERMS]; @@ -2164,6 +2156,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -2186,6 +2179,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2290,6 +2284,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } if (rotating_frame) { @@ -2320,6 +2315,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (axisymmetric) { @@ -2396,6 +2392,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (gravity) { @@ -2417,6 +2414,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } @@ -2434,6 +2432,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes(iPoint,iVar) += Volume * nodes->GetHarmonicBalance_Source(iPoint,iVar); } } + END_SU2_OMP_FOR } if (windgust) { @@ -2464,6 +2463,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } /*--- Check if a verification solution is to be computed. ---*/ @@ -2494,6 +2494,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes(iPoint,iVar) -= sourceMan[iVar]*Volume; } } + END_SU2_OMP_FOR } } @@ -2564,6 +2565,7 @@ void CEulerSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *co nodes->AddUnd_Lapl(iPoint, nVar-1, Pressure_j-Pressure_i); } } + END_SU2_OMP_FOR /*--- Correct the Laplacian across any periodic boundaries. ---*/ @@ -2636,6 +2638,7 @@ void CEulerSolver::SetUpwind_Ducros_Sensor(CGeometry *geometry, CConfig *config) nodes->SetSensor(iPoint, Ducros_i); } + END_SU2_OMP_FOR InitiateComms(geometry, config, SENSOR); CompleteComms(geometry, config, SENSOR); @@ -4816,6 +4819,7 @@ void CEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -5307,6 +5311,7 @@ void CEulerSolver::BC_Riemann(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -5823,7 +5828,8 @@ void CEulerSolver::BC_TurboRiemann(CGeometry *geometry, CSolver **solver_contain } } } -} + END_SU2_OMP_FOR + } /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -6723,6 +6729,7 @@ void CEulerSolver::BC_Giles(CGeometry *geometry, CSolver **solver_container, CNu } } + END_SU2_OMP_FOR } /*--- Free locally allocated memory ---*/ @@ -7048,6 +7055,7 @@ void CEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -7224,6 +7232,7 @@ void CEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -7371,6 +7380,7 @@ void CEulerSolver::BC_Supersonic_Inlet(CGeometry *geometry, CSolver **solver_con } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -7496,6 +7506,7 @@ void CEulerSolver::BC_Supersonic_Outlet(CGeometry *geometry, CSolver **solver_co } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -7720,6 +7731,7 @@ void CEulerSolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_contai } } + END_SU2_OMP_FOR delete [] Normal; @@ -7974,6 +7986,7 @@ void CEulerSolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_conta } } + END_SU2_OMP_FOR delete [] Normal; @@ -8033,6 +8046,7 @@ void CEulerSolver::BC_Interface_Boundary(CGeometry *geometry, CSolver **solver_c } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -8096,6 +8110,7 @@ void CEulerSolver::BC_NearField_Boundary(CGeometry *geometry, CSolver **solver_c } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -8539,6 +8554,7 @@ void CEulerSolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, C } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -8777,6 +8793,7 @@ void CEulerSolver::BC_ActDisk_VariableLoad(CGeometry *geometry, CSolver **solver if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } } + END_SU2_OMP_FOR } void CEulerSolver::PrintVerificationError(const CConfig *config) const { @@ -8830,6 +8847,7 @@ void CEulerSolver::SetFreeStream_Solution(const CConfig *config) { } nodes->SetSolution(iPoint,nVar-1, Density_Inf*Energy_Inf); } + END_SU2_OMP_FOR } void CEulerSolver::SetFreeStream_TurboSolution(CConfig *config) { diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp index c96fef43746..79502c49923 100644 --- a/SU2_CFD/src/solvers/CFEASolver.cpp +++ b/SU2_CFD/src/solvers/CFEASolver.cpp @@ -114,6 +114,7 @@ CFEASolver::CFEASolver(CGeometry *geometry, CConfig *config) : CSolver() { } } } + END_SU2_OMP_PARALLEL /*--- Set element properties ---*/ Set_ElementProperties(geometry, config); @@ -669,6 +670,7 @@ void CFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, { LinSysSol.SetValZero(); } + END_SU2_OMP_PARALLEL /*--- Clear external forces. ---*/ nodes->Clear_SurfaceLoad_Res(); @@ -687,13 +689,16 @@ void CFEASolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_con SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) nodes->SetSolution(iPoint, zeros); + END_SU2_OMP_FOR } else { SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) nodes->SetSolution(iPoint, nodes->GetPrestretch(iPoint)); + END_SU2_OMP_FOR } - } // end parallel + } + END_SU2_OMP_PARALLEL } void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics, const CConfig *config) { @@ -778,10 +783,12 @@ void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics, } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -918,10 +925,12 @@ void CFEASolver::Compute_StiffMatrix_NodalStressRes(CGeometry *geometry, CNumeri } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1001,10 +1010,12 @@ void CFEASolver::Compute_MassMatrix(const CGeometry *geometry, CNumerics **numer } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL AD::EndPassive(wasActive); @@ -1080,6 +1091,7 @@ void CFEASolver::Compute_MassRes(const CGeometry *geometry, CNumerics **numerics } } // end iElem loop + END_SU2_OMP_FOR } // end color loop @@ -1169,10 +1181,12 @@ void CFEASolver::Compute_NodalStressRes(CGeometry *geometry, CNumerics **numeric } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1210,6 +1224,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, nodes->SetStress_FEM(iPoint,iStress, 0.0); } } + END_SU2_OMP_FOR AD::EndPassive(wasActive); for(auto color : ElemColoring) { @@ -1297,6 +1312,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, AD::EndPassive(wasActive); } // end iElem loop + END_SU2_OMP_FOR atomicAdd(stressPen, StressPenalty); } // end color loop @@ -1306,7 +1322,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, /*--- Compute the von Misses stress at each point, and the maximum for the domain. ---*/ su2double maxVonMises = 0.0; - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { const auto vms = CFEAElasticity::VonMisesStress(nDim, nodes->GetStress_FEM(iPoint)); @@ -1315,12 +1331,15 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, maxVonMises = max(maxVonMises, vms); } + END_SU2_OMP_FOR SU2_OMP_CRITICAL MaxVonMises_Stress = max(MaxVonMises_Stress, maxVonMises); + END_SU2_OMP_CRITICAL AD::EndPassive(wasActive); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL /*--- Set the value of the MaxVonMises_Stress as the CFEA coeffient ---*/ SU2_MPI::Allreduce(&MaxVonMises_Stress, &Total_CFEA, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); @@ -1462,6 +1481,7 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) nodes->Clear_BodyForces_Res(iPoint); + END_SU2_OMP_FOR for(auto color : ElemColoring) { @@ -1519,11 +1539,13 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con if (LockStrategy) omp_unset_lock(&UpdateLocks[indexNode[iNode]]); } - } // end iElem loop + } + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1732,6 +1754,7 @@ CSysVector computeLinearResidual(const CSysMatrix& A, const CSysVector& b) { CSysVector r(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr); SU2_OMP_PARALLEL { A.ComputeResidual(x, b, r); } + END_SU2_OMP_PARALLEL return r; } @@ -1751,6 +1774,7 @@ CSysVector computeLinearResidual(const CSysMatrix& A, btmp.PassiveCopy(b); A.ComputeResidual(xtmp, btmp, r); } + END_SU2_OMP_PARALLEL return r; } @@ -1812,7 +1836,9 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics Conv_Check[1] = rtol; Conv_Check[2] = etol; } - } // end parallel + END_SU2_OMP_MASTER + } + END_SU2_OMP_PARALLEL } else { @@ -1845,18 +1871,20 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics } } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL for (auto iVar = 0ul; iVar < nVar; iVar++) { Residual_RMS[iVar] += resRMS[iVar]; AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Compute the root mean square residual. ---*/ - SU2_OMP_MASTER SetResidual_RMS(geometry, config); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2210,6 +2238,7 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics } } + END_SU2_OMP_FOR /*--- Dynamic contribution. ---*/ @@ -2241,13 +2270,15 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t) } } + END_SU2_OMP_FOR /*--- Add M*TimeRes_Aux to the residual. ---*/ Compute_MassRes(geometry, numerics, config); LinSysRes += TimeRes; } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2268,6 +2299,7 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig for (iVar = 0; iVar < nVar; iVar++) nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar)); } + END_SU2_OMP_FOR if (dynamic) { SU2_OMP_FOR_STAT(omp_chunk_size) @@ -2294,8 +2326,10 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig nodes->SetSolution_Vel(iPoint, iVar, sol); } } + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CConfig *config) { @@ -2313,6 +2347,7 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo nodes->SetSolution(iPoint, nodes->GetSolution_Pred(iPoint)); nodes->SetSolution_Pred_Old(iPoint, nodes->GetSolution(iPoint)); } + END_SU2_OMP_FOR if (dynamic) { SU2_OMP_FOR_STAT(omp_chunk_size) @@ -2339,9 +2374,11 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo nodes->SetSolution_Vel(iPoint, iVar, sol); } } + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2392,6 +2429,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics } } + END_SU2_OMP_FOR } /*--- Loads for dynamic problems. ---*/ @@ -2416,6 +2454,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t) } } + END_SU2_OMP_FOR /*--- Add M*TimeRes_Aux to the residual. ---*/ Compute_MassRes(geometry, numerics, config); @@ -2448,9 +2487,11 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics alpha_f * nodes->Get_FlowTraction_n(iPoint,iVar) ); } } + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2462,6 +2503,7 @@ void CFEASolver::GeneralizedAlpha_UpdateDisp(const CGeometry *geometry, const CC for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) for (unsigned short iVar = 0; iVar < nVar; iVar++) nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar)); + END_SU2_OMP_PARALLEL } @@ -2516,6 +2558,7 @@ void CFEASolver::GeneralizedAlpha_UpdateSolution(const CGeometry *geometry, cons } } + END_SU2_OMP_PARALLEL } @@ -2542,6 +2585,7 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) { /*--- This is required for the discrete adjoint. ---*/ SU2_OMP_FOR_STAT(OMP_MIN_SIZE) for (auto i = nPointDomain*nVar; i < nPoint*nVar; ++i) LinSysRes[i] = 0.0; + END_SU2_OMP_FOR /*--- Solve or smooth the linear system. ---*/ @@ -2552,8 +2596,10 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) { SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } + END_SU2_OMP_MASTER //SU2_OMP_BARRIER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2602,6 +2648,7 @@ void CFEASolver::PredictStruct_Displacement(CGeometry *geometry, CConfig *config } } + END_SU2_OMP_PARALLEL } @@ -2720,6 +2767,7 @@ void CFEASolver::SetAitken_Relaxation(CGeometry *geometry, CConfig *config) { nodes->SetSolution_Pred(iPoint, newDispPred); } + END_SU2_OMP_PARALLEL } @@ -2811,6 +2859,7 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){ for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint)); } + END_SU2_OMP_FOR } else { for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) { @@ -2826,12 +2875,14 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){ if (geometry->nodes->GetDomain(iPoint)) obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint)); } + END_SU2_OMP_FOR } } } atomicAdd(obj_fun_local, objective_function); atomicAdd(nSurf_local, nSurfPoints); } + END_SU2_OMP_PARALLEL SU2_MPI::Allreduce(&objective_function, &Total_OFRefGeom, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); unsigned long nPointsOF = geometry->GetGlobal_nPointDomain(); @@ -2926,10 +2977,13 @@ void CFEASolver::Compute_OFVolFrac(CGeometry *geometry, const CConfig *config) discrete_loc += volume*4.0*rho*(1.0-rho); } } + END_SU2_OMP_FOR + atomicAdd(tot_vol_loc, total_volume); atomicAdd(integral_loc, integral); atomicAdd(discrete_loc, discreteness); } + END_SU2_OMP_PARALLEL su2double tmp; SU2_MPI::Allreduce(&total_volume,&tmp,1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); @@ -2989,8 +3043,11 @@ void CFEASolver::Compute_OFCompliance(CGeometry *geometry, const CConfig *config for (iVar = 0; iVar < nVar; iVar++) comp_local += nodalForce[iVar]*nodes->GetSolution(iPoint,iVar); } + END_SU2_OMP_FOR + atomicAdd(comp_local, compliance); } + END_SU2_OMP_PARALLEL SU2_MPI::Allreduce(&compliance, &Total_OFCompliance, 1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); @@ -3059,9 +3116,12 @@ void CFEASolver::Stiffness_Penalty(CGeometry *geometry, CNumerics **numerics, CC } } + END_SU2_OMP_FOR + atomicAdd(totalVol_loc, totalVolume); atomicAdd(weighted_loc, weightedValue); } + END_SU2_OMP_PARALLEL // Reduce value across processors for parallelization @@ -3253,6 +3313,7 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf else if (rho < 0.0) physical_rho[iElem] = 0.0; else physical_rho[iElem] = rho; } + END_SU2_OMP_PARALLEL geometry->FilterValuesAtElementCG(filter_radius, kernels, search_lim, physical_rho); @@ -3265,15 +3326,18 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iElem=0ul; iElemSetPhysicalDensity(0.0); else element_properties[iElem]->SetPhysicalDensity(physical_rho[iElem]); } + END_SU2_OMP_FOR /*--- Compute nodal averages for output. ---*/ SU2_OMP_FOR_STAT(omp_chunk_size) @@ -3297,7 +3362,9 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf } nodes->SetAuxVar(iPoint, 0, sum/vol); } + END_SU2_OMP_FOR } + END_SU2_OMP_PARALLEL delete [] physical_rho; diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index 79c8034b5ac..830cdd25b4d 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -825,9 +825,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ /*--- Set the primitive variables ---*/ - SU2_OMP_MASTER - ErrorCounter = 0; - SU2_OMP_BARRIER + ompMasterAssignBarrier(ErrorCounter, 0); SU2_OMP_ATOMIC ErrorCounter += SetPrimitive_Variables(solver_container, config); @@ -840,6 +838,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); config->SetNonphysical_Points(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -862,6 +861,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ if (outlet) { SU2_OMP_MASTER GetOutlet_Properties(geometry, config, iMesh, Output); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -924,6 +924,7 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container if (!physical) nonPhysicalPoints++; } + END_SU2_OMP_FOR return nonPhysicalPoints; } @@ -1053,6 +1054,7 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -1076,6 +1078,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER const bool implicit = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT); const bool muscl = (config->GetMUSCL_Flow() && (iMesh == MESH_0)); @@ -1219,6 +1222,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -1241,6 +1245,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1298,6 +1303,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } if (boussinesq) { @@ -1330,6 +1336,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } if (rotating_frame) { @@ -1364,6 +1371,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (axisymmetric) { @@ -1388,6 +1396,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont nodes->SetAuxVar(iPoint, 0, AuxVar); } + END_SU2_OMP_FOR /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/ @@ -1451,6 +1460,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (radiation) { @@ -1493,6 +1503,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } } + END_SU2_OMP_FOR } @@ -1506,6 +1517,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Set the auxiliary variable, Eddy viscosity mu_t, for this node. ---*/ nodes->SetAuxVar(iPoint, 0, nodes->GetEddyViscosity(iPoint)); } + END_SU2_OMP_FOR /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/ if (config->GetKind_Gradient_Method() == GREEN_GAUSS) { @@ -1545,6 +1557,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } // for iPoint + END_SU2_OMP_FOR if(!streamwise_periodic_temperature && energy) { @@ -1584,6 +1597,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont LinSysRes.AddBlock(iPoint, residual); }// for iVertex + END_SU2_OMP_FOR }// if periodic inlet boundary }// for iMarker @@ -1619,6 +1633,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } } + END_SU2_OMP_FOR } } @@ -1754,9 +1769,11 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) maxVel2 = max(maxVel2, nodes->GetVelocity2(iPoint)); + END_SU2_OMP_FOR SU2_OMP_CRITICAL MaxVel2 = max(MaxVel2, maxVel2); + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER @@ -1766,6 +1783,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co config->SetMax_Vel2(max(1e-10, MaxVel2)); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1776,6 +1794,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) nodes->SetBetaInc2(iPoint, BetaInc2); + END_SU2_OMP_FOR } @@ -2008,6 +2027,7 @@ void CIncEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_contain Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i); } + END_SU2_OMP_FOR } @@ -2249,6 +2269,7 @@ void CIncEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i); } + END_SU2_OMP_FOR } void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, @@ -2446,6 +2467,7 @@ void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i); } + END_SU2_OMP_FOR } @@ -2536,6 +2558,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver Jacobian.AddVal2Diag(iPoint, nDim+1, delta); } } + END_SU2_OMP_FOR } else { @@ -2579,6 +2602,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } } + END_SU2_OMP_FOR /*--- Loop over the boundary edges ---*/ @@ -2615,6 +2639,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver for (iVar = 0; iVar < nVar-!energy; iVar++) LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } + END_SU2_OMP_FOR } } @@ -2675,6 +2700,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver Jacobian.AddVal2Diag(iPoint, nDim+1, delta); } } + END_SU2_OMP_FOR } } @@ -2954,4 +2980,6 @@ void CIncEulerSolver::SetFreeStream_Solution(const CConfig *config){ } nodes->SetSolution(iPoint,nDim+1, Temperature_Inf); } + END_SU2_OMP_FOR + } diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp index 05b354f769b..92b0fdfc925 100644 --- a/SU2_CFD/src/solvers/CIncNSSolver.cpp +++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp @@ -290,10 +290,12 @@ void CIncNSSolver::Compute_Streamwise_Periodic_Recovered_Values(CConfig *config, nodes->SetStreamwise_Periodic_RecoveredTemperature(iPoint, Temperature_Recovered); } } // for iPoint + END_SU2_OMP_FOR /*--- Compute the integrated Heatflux Q into the domain, and massflow over periodic markers ---*/ SU2_OMP_MASTER GetStreamwise_Periodic_Properties(geometry, config, iMesh); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -338,6 +340,7 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c nodes->SetDES_LengthScale(iPoint,DES_LengthScale); } + END_SU2_OMP_FOR return nonPhysicalPoints; @@ -476,6 +479,7 @@ void CIncNSSolver::BC_Wall_Generic(const CGeometry *geometry, const CConfig *con } } } + END_SU2_OMP_FOR } void CIncNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver**, CNumerics*, @@ -585,4 +589,5 @@ void CIncNSSolver::BC_ConjugateHeat_Interface(CGeometry *geometry, CSolver **sol nodes->SetSolution_Old(iPoint, nDim+1, Twall); nodes->SetEnergy_ResTruncError_Zero(iPoint); } + END_SU2_OMP_FOR } diff --git a/SU2_CFD/src/solvers/CMeshSolver.cpp b/SU2_CFD/src/solvers/CMeshSolver.cpp index 49807e01908..e784308a743 100644 --- a/SU2_CFD/src/solvers/CMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CMeshSolver.cpp @@ -147,6 +147,7 @@ CMeshSolver::CMeshSolver(CGeometry *geometry, CConfig *config) : CFEASolver(true SU2_OMP_PARALLEL { SetMinMaxVolume(geometry, config, false); } + END_SU2_OMP_PARALLEL /*--- Compute the wall distance using the reference coordinates ---*/ SetWallDistance(geometry, config); @@ -177,6 +178,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd MaxVolume = -1E22; MinVolume = 1E22; ElemCounter = 0; } + END_SU2_OMP_MASTER /*--- Local min/max, final reduction outside loop. ---*/ su2double maxVol = -1E22, minVol = 1E22; @@ -228,12 +230,14 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd /*--- Count distorted elements. ---*/ if (ElemVolume <= 0.0) elCount++; } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { MaxVolume = max(MaxVolume, maxVol); MinVolume = min(MinVolume, minVol); ElemCounter += elCount; } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -243,6 +247,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd SU2_MPI::Allreduce(&maxVol, &MaxVolume, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&minVol, &MinVolume, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Volume from 0 to 1 ---*/ @@ -258,6 +263,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd element[iElem].SetRef_Volume(ElemVolume); } } + END_SU2_OMP_FOR /*--- Store the maximum and minimum volume. ---*/ SU2_OMP_MASTER { @@ -273,7 +279,9 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd if ((ElemCounter != 0) && (rank == MASTER_NODE)) cout <<"There are " << ElemCounter << " elements with negative volume.\n" << endl; - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER AD::EndPassive(wasActive); } @@ -336,6 +344,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) { nodes->SetWallDistance(iPoint, MaxDistance); } + END_SU2_OMP_FOR } else { su2double MaxDistance_Local = -1E22, MinDistance_Local = 1E22; @@ -358,11 +367,13 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { if (dist > EPS) MinDistance_Local = min(MinDistance_Local, dist); } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { MaxDistance = max(MaxDistance, MaxDistance_Local); MinDistance = min(MinDistance, MinDistance_Local); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -372,6 +383,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { SU2_MPI::Allreduce(&MaxDistance_Local, &MaxDistance, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&MinDistance_Local, &MinDistance, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -381,6 +393,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { su2double nodeDist = nodes->GetWallDistance(iPoint)/MaxDistance; nodes->SetWallDistance(iPoint,nodeDist); } + END_SU2_OMP_FOR /*--- Compute the element distances ---*/ SU2_OMP_FOR_STAT(omp_chunk_size) @@ -401,8 +414,10 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { element[iElem].SetWallDistance(ElemDist); } + END_SU2_OMP_FOR - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics, CConfig *config){ @@ -456,6 +471,8 @@ void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics, break; } } + END_SU2_OMP_PARALLEL + stiffness_set = true; } @@ -486,6 +503,7 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig SU2_OMP_PARALLEL { LinSysRes.SetValZero(); } + END_SU2_OMP_PARALLEL /*--- Impose boundary conditions (all of them are ESSENTIAL BC's - displacements). ---*/ SetBoundaryDisplacements(geometry[MESH_0], numerics[FEA_TERM], config); @@ -511,7 +529,8 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig /*--- Check for failed deformation (negative volumes). ---*/ SetMinMaxVolume(geometry[MESH_0], config, true); - } // end parallel + } + END_SU2_OMP_PARALLEL } @@ -533,6 +552,7 @@ void CMeshSolver::UpdateGridCoord(CGeometry *geometry, CConfig *config){ geometry->nodes->SetCoord(iPoint, iDim, val_coord); } } + END_SU2_OMP_FOR /*--- Communicate the updated displacements and mesh coordinates. ---*/ geometry->InitiateComms(geometry, config, COORDINATES); @@ -590,6 +610,7 @@ void CMeshSolver::ComputeGridVelocity(CGeometry *geometry, CConfig *config){ } } + END_SU2_OMP_FOR /*--- The velocity was computed for nPointDomain, now we communicate it. ---*/ geometry->InitiateComms(geometry, config, GRID_VELOCITY); diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp index 778de7cad45..879ee715952 100644 --- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp @@ -525,6 +525,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con unsigned long counter_local = 0; SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER /*--- Pick one numerics object per thread. ---*/ CNumerics* numerics = numerics_container[CONV_TERM]; @@ -697,6 +698,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } } @@ -985,6 +987,7 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con }else eAxi_local++; } + END_SU2_OMP_FOR } /*--- Checking for NaN ---*/ diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp index 7e143a369f5..2e561b70a37 100644 --- a/SU2_CFD/src/solvers/CNSSolver.cpp +++ b/SU2_CFD/src/solvers/CNSSolver.cpp @@ -86,12 +86,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C turbulence solver, and post) only temperature and velocity are needed ---*/ const auto nPrimVarGrad_bak = nPrimVarGrad; - if (Output) { - SU2_OMP_BARRIER - SU2_OMP_MASTER - nPrimVarGrad = 1+nDim; - SU2_OMP_BARRIER - } + if (Output) ompMasterAssignBarrier(nPrimVarGrad, 1+nDim); if (config->GetReconstructionGradientRequired() && muscl && !center) { switch (config->GetKind_Gradient_Method_Recon()) { @@ -113,11 +108,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C SetPrimitive_Gradient_LS(geometry, config); } - if (Output) { - SU2_OMP_MASTER - nPrimVarGrad = nPrimVarGrad_bak; - SU2_OMP_BARRIER - } + if (Output) ompMasterAssignBarrier(nPrimVarGrad, nPrimVarGrad_bak); /*--- Compute the limiters ---*/ @@ -171,6 +162,7 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons nonPhysicalPoints += !physical; } + END_SU2_OMP_FOR return nonPhysicalPoints; } @@ -316,6 +308,7 @@ void CNSSolver::SetRoe_Dissipation(CGeometry *geometry, CConfig *config){ nodes->SetRoe_Dissipation_NTS(iPoint, delta, config->GetConst_DES()); } } + END_SU2_OMP_FOR } @@ -520,6 +513,7 @@ void CNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_container } } } + END_SU2_OMP_FOR if (Jacobian_i) for (auto iVar = 0u; iVar < nVar; iVar++) @@ -717,6 +711,7 @@ void CNSSolver::BC_Isothermal_Wall_Generic(CGeometry *geometry, CSolver **solver } } } + END_SU2_OMP_FOR if (Jacobian_i) for (auto iVar = 0u; iVar < nVar; iVar++) @@ -914,6 +909,7 @@ void CNSSolver::SetTauWall_WF(CGeometry *geometry, CSolver **solver_container, c nodes->SetTauWall(iPoint, Tau_Wall); } + END_SU2_OMP_FOR } diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index fa9aa896987..433e295132c 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -955,6 +955,7 @@ void CSolver::InitiatePeriodicComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR /*--- Launch the point-to-point MPI send for this message. ---*/ @@ -1037,6 +1038,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, SU2_MPI::Waitany(geometry->nPeriodicRecv, geometry->req_PeriodicRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER source = status.MPI_SOURCE; #else @@ -1283,6 +1285,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, } } } + END_SU2_OMP_FOR } /*--- Verify that all non-blocking point-to-point sends have finished. @@ -1294,6 +1297,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, SU2_MPI::Waitall(geometry->nPeriodicSend, geometry->req_PeriodicSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER } @@ -1520,6 +1524,7 @@ void CSolver::InitiateComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR /*--- Launch the point-to-point MPI send for this message. ---*/ @@ -1572,6 +1577,7 @@ void CSolver::CompleteComms(CGeometry *geometry, SU2_OMP_MASTER SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -1669,6 +1675,7 @@ void CSolver::CompleteComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR } /*--- Verify that all non-blocking point-to-point sends have finished. @@ -1678,6 +1685,7 @@ void CSolver::CompleteComms(CGeometry *geometry, #ifdef HAVE_MPI SU2_OMP_MASTER SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER } @@ -1804,6 +1812,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, } } } /* End SU2_OMP_MASTER, now all threads update the CFL number. */ + END_SU2_OMP_MASTER SU2_OMP_BARRIER /* Loop over all points on this grid and apply CFL adaption. */ @@ -1816,6 +1825,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, Max_CFL_Local = 0.0; Avg_CFL_Local = 0.0; } + END_SU2_OMP_MASTER SU2_OMP_FOR_STAT(roundUpDiv(geometry[iMesh]->GetnPointDomain(),omp_get_max_threads())) for (unsigned long iPoint = 0; iPoint < geometry[iMesh]->GetnPointDomain(); iPoint++) { @@ -1884,6 +1894,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, } } + END_SU2_OMP_FOR /* Reduce the min/max/avg local CFL numbers. */ @@ -1894,6 +1905,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, Max_CFL_Local = max(Max_CFL_Local,myCFLMax); Avg_CFL_Local += myCFLSum; } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -1904,6 +1916,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); Avg_CFL_Local /= su2double(geometry[iMesh]->GetGlobal_nPointDomain()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1915,6 +1928,8 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) if (geometry->GetMGLevel() != MESH_0) return; + SU2_OMP_MASTER { + /*--- Set the L2 Norm residual in all the processors. ---*/ vector rbuf_res(nVar); @@ -1947,30 +1962,36 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) /*--- Set the Maximum residual in all the processors. ---*/ - if (config->GetComm_Level() != COMM_FULL) return; + if (config->GetComm_Level() == COMM_FULL) { - const unsigned long nProcessor = size; + const unsigned long nProcessor = size; - su2activematrix rbuf_residual(nProcessor,nVar); - su2matrix rbuf_point(nProcessor,nVar); - su2activematrix rbuf_coord(nProcessor*nVar, nDim); + su2activematrix rbuf_residual(nProcessor,nVar); + su2matrix rbuf_point(nProcessor,nVar); + su2activematrix rbuf_coord(nProcessor*nVar, nDim); - SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); - for (unsigned short iVar = 0; iVar < nVar; iVar++) { - for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { - AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + for (unsigned short iVar = 0; iVar < nVar; iVar++) { + for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { + AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + } } } + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) { if (geometry->GetMGLevel() != MESH_0) return; + SU2_OMP_MASTER { + /*--- Set the L2 Norm residual in all the processors. ---*/ vector rbuf_res(nVar); @@ -1982,26 +2003,30 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) Residual_BGS[iVar] = max(EPS*EPS, sqrt(rbuf_res[iVar]/Global_nPointDomain)); } - if (config->GetComm_Level() != COMM_FULL) return; + if (config->GetComm_Level() == COMM_FULL) { - /*--- Set the Maximum residual in all the processors. ---*/ + /*--- Set the Maximum residual in all the processors. ---*/ - const unsigned long nProcessor = size; + const unsigned long nProcessor = size; - su2activematrix rbuf_residual(nProcessor,nVar); - su2matrix rbuf_point(nProcessor,nVar); - su2activematrix rbuf_coord(nProcessor*nVar, nDim); + su2activematrix rbuf_residual(nProcessor,nVar); + su2matrix rbuf_point(nProcessor,nVar); + su2activematrix rbuf_coord(nProcessor*nVar, nDim); - SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); - for (unsigned short iVar = 0; iVar < nVar; iVar++) { - for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { - AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + for (unsigned short iVar = 0; iVar < nVar; iVar++) { + for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { + AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + } } } + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { @@ -2033,6 +2058,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { LinSysRes(iPoint,iVar) += Flux * Solution_i[iVar]; } } + END_SU2_OMP_FOR /*--- Loop boundary edges ---*/ @@ -2058,6 +2084,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { for (auto iVar = 0u; iVar < nVar; iVar++) LinSysRes(iPoint,iVar) -= Flux * base_nodes->GetSolution(iPoint,iVar); } + END_SU2_OMP_FOR } } @@ -2145,6 +2172,7 @@ void CSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *config) } } } + END_SU2_OMP_FOR /*--- Correct the Laplacian across any periodic boundaries. ---*/ @@ -2686,7 +2714,9 @@ void CSolver::Restart_OldGeometry(CGeometry *geometry, CConfig *config) { } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER /*--- It's necessary to communicate this information ---*/ @@ -3721,6 +3751,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config if (!config->GetSolid_Wall(iMarker)) continue; /*--- Loop over the vertices ---*/ + SU2_OMP_FOR_STAT(OMP_MIN_SIZE) for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) { /*--- Recover the point index ---*/ @@ -3734,6 +3765,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config AD::RegisterOutput(VertexTraction[iMarker][iVertex][iDim]); } } + END_SU2_OMP_FOR } } @@ -3750,6 +3782,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf if (!config->GetSolid_Wall(iMarker)) continue; /*--- Loop over the vertices ---*/ + SU2_OMP_FOR_STAT(OMP_MIN_SIZE) for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) { /*--- Recover the point index ---*/ @@ -3764,6 +3797,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf SU2_TYPE::GetValue(VertexTractionAdjoint[iMarker][iVertex][iDim])); } } + END_SU2_OMP_FOR } } @@ -3816,6 +3850,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig Residual_BGS[iVar] = 0.0; Residual_Max_BGS[iVar] = 0.0; } + END_SU2_OMP_MASTER vector resMax(nVar,0.0), resRMS(nVar,0.0); vector coordMax(nVar,nullptr); @@ -3839,6 +3874,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig } } } + END_SU2_OMP_FOR /*--- Reduce residual information over all threads in this rank. ---*/ SU2_OMP_CRITICAL @@ -3846,11 +3882,65 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig Residual_BGS[iVar] += resRMS[iVar]; AddRes_Max_BGS(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER - SU2_OMP_MASTER SetResidual_BGS(geometry, config); - SU2_OMP_BARRIER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL +} + +void CSolver::BasicLoadRestart(CGeometry *geometry, const CConfig *config, const string& filename, unsigned long skipVars) { + + /*--- Read and store the restart metadata. ---*/ + +// Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, filename); + + /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ + + if (config->GetRead_Binary_Restart()) { + Read_SU2_Restart_Binary(geometry, config, filename); + } else { + Read_SU2_Restart_ASCII(geometry, config, filename); + } + + /*--- Load data from the restart into correct containers. ---*/ + + unsigned long iPoint_Global_Local = 0; + + for (auto iPoint_Global = 0ul; iPoint_Global < geometry->GetGlobal_nPointDomain(); iPoint_Global++ ) { + + /*--- Retrieve local index. If this node from the restart file lives + on the current processor, we will load and instantiate the vars. ---*/ + + const auto iPoint_Local = geometry->GetGlobal_to_Local_Point(iPoint_Global); + + if (iPoint_Local > -1) { + + /*--- We need to store this point's data, so jump to the correct + offset in the buffer of data from the restart file and load it. ---*/ + + const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars; + + for (auto iVar = 0u; iVar < nVar; iVar++) { + base_nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]); + } + + iPoint_Global_Local++; + } + + } + + /*--- Delete the class memory that is used to load the restart. ---*/ + + delete [] Restart_Vars; Restart_Vars = nullptr; + delete [] Restart_Data; Restart_Data = nullptr; + + /*--- Detect a wrong solution file ---*/ + + if (iPoint_Global_Local != nPointDomain) { + SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + + string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); + } } diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp index d72f80c8bdd..33690ea185f 100644 --- a/SU2_CFD/src/solvers/CTurbSASolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp @@ -236,6 +236,7 @@ void CTurbSASolver::Preprocessing(CGeometry *geometry, CSolver **solver_containe auto Laminar_Viscosity = solver_container[FLOW_SOL]->GetNodes()->GetLaminarViscosity(iPoint); nodes->SetVortex_Tilting(iPoint, PrimGrad_Flow, Vorticity, Laminar_Viscosity); } + END_SU2_OMP_FOR } /*--- Compute the DES length scale ---*/ @@ -281,6 +282,7 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain nodes->SetmuT(iPoint,muT); } + END_SU2_OMP_FOR } @@ -379,6 +381,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR if (harmonic_balance) { @@ -394,6 +397,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai LinSysRes(iPoint,iVar) += Source*Volume; } } + END_SU2_OMP_FOR } } @@ -410,6 +414,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta if (config->GetWall_Functions()) { SU2_OMP_MASTER SetNuTilde_WF(geometry, solver_container, conv_numerics, visc_numerics, config, val_marker); + END_SU2_OMP_MASTER SU2_OMP_BARRIER return; } @@ -475,6 +480,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta } } } + END_SU2_OMP_FOR } void CTurbSASolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, @@ -535,6 +541,7 @@ void CTurbSASolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container } } + END_SU2_OMP_FOR } @@ -622,6 +629,7 @@ void CTurbSASolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, CN } } + END_SU2_OMP_FOR } @@ -707,6 +715,7 @@ void CTurbSASolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, C } } + END_SU2_OMP_FOR } @@ -795,6 +804,7 @@ void CTurbSASolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_conta } } + END_SU2_OMP_FOR } @@ -883,6 +893,7 @@ void CTurbSASolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_cont } } + END_SU2_OMP_FOR } @@ -1032,6 +1043,7 @@ void CTurbSASolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, // Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } @@ -1125,6 +1137,7 @@ void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_c if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } @@ -1229,6 +1242,7 @@ void CTurbSASolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } @@ -1889,6 +1903,7 @@ void CTurbSASolver::SetDES_LengthScale(CSolver **solver, CGeometry *geometry, CC nodes->SetDES_LengthScale(iPoint, lengthScale); } + END_SU2_OMP_FOR } void CTurbSASolver::SetInletAtVertex(const su2double *val_inlet, diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp index 224df226e3b..0ba644fe1bc 100644 --- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp @@ -273,6 +273,7 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai nodes->SetmuT(iPoint,muT); } + END_SU2_OMP_FOR } @@ -347,6 +348,7 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } @@ -441,6 +443,7 @@ void CTurbSSTSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_cont } } } + END_SU2_OMP_FOR } void CTurbSSTSolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, @@ -501,6 +504,7 @@ void CTurbSSTSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_containe if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } } + END_SU2_OMP_FOR } @@ -594,6 +598,7 @@ void CTurbSSTSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, C } } + END_SU2_OMP_FOR } @@ -684,6 +689,7 @@ void CTurbSSTSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR } @@ -775,6 +781,7 @@ void CTurbSSTSolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_ if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } @@ -885,6 +892,7 @@ void CTurbSSTSolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contai if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 82df13a563a..2ace5c5c495 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -229,6 +229,7 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -305,6 +306,7 @@ void CTurbSolver::SumEdgeFluxes(CGeometry* geometry) { LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge)); } } + END_SU2_OMP_FOR } @@ -503,6 +505,7 @@ void CTurbSolver::BC_Fluid_Interface(CGeometry *geometry, CSolver **solver_conta Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } delete [] PrimVar_j; @@ -540,6 +543,7 @@ void CTurbSolver::Impose_Fixed_Values(const CGeometry *geometry, const CConfig * } } } + END_SU2_OMP_FOR } } @@ -559,7 +563,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver /*--- Build implicit system ---*/ - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { /// TODO: This could be the SetTime_Step of this solver. @@ -593,17 +597,17 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver } } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL for (unsigned short iVar = 0; iVar < nVar; iVar++) { Residual_RMS[iVar] += resRMS[iVar]; AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Compute the root mean square residual ---*/ - SU2_OMP_MASTER SetResidual_RMS(geometry, config); - SU2_OMP_BARRIER } void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solver_container, CConfig *config) { @@ -628,6 +632,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { nodes->AddSolution(iPoint, 0, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint]); } + END_SU2_OMP_FOR break; case SST: case SST_SUST: @@ -647,6 +652,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve density, density_old, lowerlimit[iVar], upperlimit[iVar]); } } + END_SU2_OMP_FOR break; } @@ -668,11 +674,12 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_ /*--- Solve or smooth the linear system. ---*/ - SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait) + SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT) for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) { LinSysRes.SetBlock_Zero(iPoint); LinSysSol.SetBlock_Zero(iPoint); } + END_SU2_OMP_FOR auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config); @@ -680,6 +687,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_ SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER CompleteImplicitIteration(geometry, solver_container, config); @@ -733,6 +741,7 @@ void CTurbSolver::ComputeUnderRelaxationFactor(const CConfig *config) { nodes->SetUnderRelaxation(iPoint, localUnderRelaxation); } + END_SU2_OMP_FOR } @@ -834,6 +843,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con } } + END_SU2_OMP_FOR } else { @@ -880,6 +890,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } } + END_SU2_OMP_FOR /*--- Loop over the boundary edges ---*/ @@ -927,6 +938,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con } } + END_SU2_OMP_FOR } } @@ -999,6 +1011,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep)); } } + END_SU2_OMP_FOR } // end dynamic grid @@ -1081,6 +1094,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig * } } // end SU2_OMP_MASTER, pre and postprocessing are thread-safe. + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- MPI solution and compute the eddy viscosity ---*/ @@ -1108,6 +1122,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig * } solver[iMesh][TURB_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse); } + END_SU2_OMP_FOR solver[iMesh][TURB_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION); solver[iMesh][TURB_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION); @@ -1124,7 +1139,8 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig * delete [] Restart_Vars; Restart_Vars = nullptr; delete [] Restart_Data; Restart_Data = nullptr; - } // end SU2_OMP_MASTER + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } diff --git a/SU2_CFD/src/variables/CMeshVariable.cpp b/SU2_CFD/src/variables/CMeshVariable.cpp index 0f35b0fc442..d4786c3ed75 100644 --- a/SU2_CFD/src/variables/CMeshVariable.cpp +++ b/SU2_CFD/src/variables/CMeshVariable.cpp @@ -50,13 +50,17 @@ CMeshVariable::CMeshVariable(unsigned long npoint, unsigned long ndim, CConfig * void CMeshVariable::Register_MeshCoord(bool input) { if (input) { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) for (unsigned long iDim = 0; iDim < nDim; iDim++) AD::RegisterInput(Mesh_Coord(iPoint,iDim)); + END_SU2_OMP_FOR } else { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) for (unsigned long iDim = 0; iDim < nDim; iDim++) AD::RegisterOutput(Mesh_Coord(iPoint,iDim)); + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/src/variables/CVariable.cpp b/SU2_CFD/src/variables/CVariable.cpp index 4d7b170bc47..ff9e9ef4eb3 100644 --- a/SU2_CFD/src/variables/CVariable.cpp +++ b/SU2_CFD/src/variables/CVariable.cpp @@ -113,6 +113,7 @@ void CVariable::Restore_BGSSolution_k() { void CVariable::SetExternalZero() { parallelSet(External.size(), 0.0, External.data()); } void CVariable::RegisterSolution(bool input, bool push_index) { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint) { for(unsigned long iVar=0; iVar." + ], + "pairs": + { + "SU2_OMP_MASTER": "END_SU2_OMP_MASTER", + "SU2_OMP_CRITICAL": "END_SU2_OMP_CRITICAL", + "SU2_OMP_PARALLEL": "END_SU2_OMP_PARALLEL", + "SU2_OMP_PARALLEL_": "END_SU2_OMP_PARALLEL", + "SU2_OMP_PARALLEL_ON": "END_SU2_OMP_PARALLEL", + "SU2_OMP_FOR_": "END_SU2_OMP_FOR", + "SU2_OMP_FOR_DYN": "END_SU2_OMP_FOR", + "SU2_OMP_FOR_STAT": "END_SU2_OMP_FOR", + "CSYSVEC_PARFOR": "END_CSYSVEC_PARFOR", + "CNEWTON_PARFOR": "END_CNEWTON_PARFOR" + } +}