From 5fd72ca40624c430f2e35a7b6914346e599583a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Sun, 28 Feb 2021 20:02:24 +0100 Subject: [PATCH 01/57] Add OpDiLib submodule. --- .gitmodules | 3 +++ externals/opdi | 1 + 2 files changed, 4 insertions(+) create mode 160000 externals/opdi diff --git a/.gitmodules b/.gitmodules index f160f2e549e..ae2967618b2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -15,3 +15,6 @@ [submodule "subprojects/Mutationpp"] path = subprojects/Mutationpp url = https://github.com/mutationpp/Mutationpp.git +[submodule "externals/opdi"] + path = externals/opdi + url = https://github.com/SciCompKL/OpDiLib diff --git a/externals/opdi b/externals/opdi new file mode 160000 index 00000000000..3c4132bbf12 --- /dev/null +++ b/externals/opdi @@ -0,0 +1 @@ +Subproject commit 3c4132bbf1266b2e999d22212c8de88ec085a3e0 From 679e979ab0a0baa7359a92be6aa797992f4e4a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Sun, 28 Feb 2021 20:07:18 +0100 Subject: [PATCH 02/57] Update meson script. --- meson.build | 5 +++++ meson_scripts/init.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/meson.build b/meson.build index cb688126fbe..3399630f9c3 100644 --- a/meson.build +++ b/meson.build @@ -104,6 +104,11 @@ endif if omp # add OpenMP dependency su2_deps += omp_dep + + # add opdi dependency + if get_option('enable-autodiff') + codi_dep += declare_dependency(include_directories: 'externals/opdi/include') + endif endif if get_option('enable-tecio') diff --git a/meson_scripts/init.py b/meson_scripts/init.py index fe0cc063aa9..e34d786a04a 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,6 +48,8 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' + sha_version_opdi = '3c4132bbf1266b2e999d22212c8de88ec085a3e0' + github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' sha_version_ninja = '52649de2c56b63f42bc59513d51286531c595b44' @@ -57,12 +59,14 @@ def init_submodules(method = 'auto'): medi_name = 'MeDiPack' codi_name = 'CoDiPack' + opdi_name = 'OpDiLib' meson_name = 'meson' ninja_name= 'ninja' mpp_name= 'Mutationpp' base_path = cur_dir + os.path.sep + 'externals' + os.path.sep alt_name_medi = base_path + 'medi' alt_name_codi = base_path + 'codi' + alt_name_opdi = base_path + 'opdi' alt_name_meson = base_path + 'meson' alt_name_ninja = base_path + 'ninja' alt_name_mpp = cur_dir + os.path.sep + 'subprojects' + os.path.sep + 'Mutationpp' @@ -83,6 +87,7 @@ def init_submodules(method = 'auto'): if is_git: submodule_status(alt_name_codi, sha_version_codi) submodule_status(alt_name_medi, sha_version_medi) + submodule_status(alt_name_opdi, sha_version_opdi) submodule_status(alt_name_meson, sha_version_meson) submodule_status(alt_name_ninja, sha_version_ninja) submodule_status(alt_name_mpp, sha_version_mpp) @@ -90,6 +95,7 @@ def init_submodules(method = 'auto'): else: download_module(codi_name, alt_name_codi, github_repo_codi, sha_version_codi) download_module(medi_name, alt_name_medi, github_repo_medi, sha_version_medi) + download_module(opdi_name, alt_name_opdi, github_repo_opdi, sha_version_opdi) download_module(meson_name, alt_name_meson, github_repo_meson, sha_version_meson) download_module(ninja_name, alt_name_ninja, github_repo_ninja, sha_version_ninja) download_module(mpp_name, alt_name_mpp, github_repo_mpp, sha_version_mpp) From b4650ba8f1f3a412677e937eba2da4e302abb4e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Sun, 28 Feb 2021 20:55:08 +0100 Subject: [PATCH 03/57] Update to thread-safe version of CoDiPack. --- externals/codi | 2 +- meson_scripts/init.py | 2 +- preconfigure.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/externals/codi b/externals/codi index 1b8d3f5f03d..2a0dbdbed2f 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit 1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8 +Subproject commit 2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index e34d786a04a..717c644cb20 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -44,7 +44,7 @@ def init_submodules(method = 'auto'): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8' + sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' diff --git a/preconfigure.py b/preconfigure.py index 639740a54d8..94314b5da33 100755 --- a/preconfigure.py +++ b/preconfigure.py @@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8' + sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' From caa15426eb84ffc9f6e604407a6becdd281c5e2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 00:18:06 +0100 Subject: [PATCH 04/57] Add parallel AD type. --- Common/include/basic_types/datatype_structure.hpp | 4 ++++ Common/include/parallelization/omp_structure.hpp | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp index 58bc9920c3b..63965aa92ad 100644 --- a/Common/include/basic_types/datatype_structure.hpp +++ b/Common/include/basic_types/datatype_structure.hpp @@ -80,6 +80,9 @@ using su2enable_if = typename std::enable_if::type; #define CODI_PRIMAL_INDEX_TAPE 0 #endif +#if defined(_OPENMP) +using su2double = codi::RealReverseIndexParallel; +#else #if CODI_INDEX_TAPE using su2double = codi::RealReverseIndex; #elif CODI_PRIMAL_TAPE @@ -89,6 +92,7 @@ using su2double = codi::RealReversePrimalIndex; #else using su2double = codi::RealReverse; #endif +#endif #elif defined(CODI_FORWARD_TYPE) // forward mode AD #include "codi.hpp" diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index d12f450219b..a211664c549 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -46,9 +46,8 @@ #define PRAGMIZE(X) _Pragma(#X) #endif -/*--- Detect compilation with OpenMP support, protect agaisnt - * using OpenMP with Reverse AD (not supported yet). ---*/ -#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE) +/*--- Detect compilation with OpenMP support. ---*/ +#if defined(_OPENMP) #define HAVE_OMP #include From d153a000c928372578a8bc0000f2222d51efd951 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 00:58:28 +0100 Subject: [PATCH 05/57] Add OpDiLib bindings. --- .../include/parallelization/omp_structure.cpp | 50 +++++++++++++++++++ .../include/parallelization/omp_structure.hpp | 12 +++++ Common/src/meson.build | 3 +- SU2_CFD/src/SU2_CFD.cpp | 7 +++ 4 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 Common/include/parallelization/omp_structure.cpp diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp new file mode 100644 index 00000000000..933f154eea6 --- /dev/null +++ b/Common/include/parallelization/omp_structure.cpp @@ -0,0 +1,50 @@ +/*! + * \file omp_structure.cpp + * \brief Source file counterpart for omp_structure.hpp. + * \note Contains OpDiLib logic and includes the OpDiLib source file. + * \author J. Blühdorn + * \version 7.1.0 "Blackbird" + * + * SU2 Project Website: https://su2code.github.io + * + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) + * + * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) + * + * SU2 is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * SU2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with SU2. If not, see . + */ + +#include "omp_structure.hpp" + +void omp_initialize() { +#ifdef HAVE_OPDI + opdi::logic = new opdi::OmpLogic; + opdi::logic->init(); + opdi::tool = new CoDiOpDiTool; +#endif +} + +void omp_finalize() { +#ifdef HAVE_OPDI + opdi::logic->finalize(); + opdi::backend->finalize(); + delete opdi::logic; + delete opdi::tool; +#endif +} + +#ifdef HAVE_OPDI +#include "opdi.cpp" +#endif diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index a211664c549..c74f2e434e0 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -51,6 +51,13 @@ #define HAVE_OMP #include +#if defined(CODI_REVERSE_TYPE) +#define HAVE_OPDI +#include "opdi/backend/ompt/omptBackend.hpp" +#include "codi/externals/codiOpdiTool.hpp" +#include "opdi.hpp" +#endif + /*--- The generic start of OpenMP constructs. ---*/ #define SU2_OMP(ARGS) PRAGMIZE(omp ARGS) @@ -105,6 +112,11 @@ inline void omp_destroy_lock(omp_lock_t*){} #endif // end OpenMP detection +/*--- Initialization and finalization ---*/ + +void omp_initialize(); +void omp_finalize(); + /*--- Detect SIMD support (version 4+, after Jul 2013). ---*/ #ifdef _OPENMP #if _OPENMP >= 201307 diff --git a/Common/src/meson.build b/Common/src/meson.build index 5dcbb57c66f..b3e0726e70c 100644 --- a/Common/src/meson.build +++ b/Common/src/meson.build @@ -3,7 +3,8 @@ common_src =files(['graph_coloring_structure.cpp', 'CConfig.cpp', 'basic_types/ad_structure.cpp', 'wall_model.cpp', - '../include/parallelization/mpi_structure.cpp']) + '../include/parallelization/mpi_structure.cpp', + '../include/parallelization/omp_structure.cpp']) subdir('linear_algebra') subdir('toolboxes') diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp index a73cb5126dc..0353d547eba 100644 --- a/SU2_CFD/src/SU2_CFD.cpp +++ b/SU2_CFD/src/SU2_CFD.cpp @@ -56,6 +56,10 @@ int main(int argc, char *argv[]) { CLI11_PARSE(app, argc, argv) + /*--- OpenMP initialization ---*/ + + omp_initialize(); + omp_set_num_threads(num_threads); /*--- MPI initialization, and buffer setting ---*/ @@ -173,6 +177,9 @@ int main(int argc, char *argv[]) { /*--- Finalize MPI parallelization. ---*/ SU2_MPI::Finalize(); + /*--- Finalize OpenMP. ---*/ + omp_finalize(); + return EXIT_SUCCESS; } From d9ce155649a05d2cfe6ff3ade526002884a9f140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 10:59:03 +0100 Subject: [PATCH 06/57] Update AD interface. --- Common/include/basic_types/ad_structure.hpp | 149 ++++++++++++------ .../basic_types/datatype_structure.hpp | 87 +--------- Common/include/code_config.hpp | 120 ++++++++++++++ .../include/parallelization/omp_structure.cpp | 2 +- .../include/parallelization/omp_structure.hpp | 11 +- Common/src/basic_types/ad_structure.cpp | 5 +- Common/src/linear_algebra/CSysSolve.cpp | 2 +- SU2_CFD/src/SU2_CFD.cpp | 10 ++ .../src/drivers/CDiscAdjMultizoneDriver.cpp | 2 +- .../src/drivers/CDiscAdjSinglezoneDriver.cpp | 2 +- 10 files changed, 248 insertions(+), 142 deletions(-) create mode 100644 Common/include/code_config.hpp diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 6353ec3046c..d93df0414fe 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -1,7 +1,7 @@ /*! * \file ad_structure.hpp * \brief Main routines for the algorithmic differentiation (AD) structure. - * \author T. Albring + * \author T. Albring, J. Blühdorn * \version 7.1.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io @@ -28,6 +28,7 @@ #pragma once #include "datatype_structure.hpp" +#include "../parallelization/omp_structure.hpp" /*! * \namespace AD @@ -278,19 +279,23 @@ namespace AD{ extern int adjointVectorPosition; - /*--- Reference to the tape ---*/ - - extern su2double::TapeType& globalTape; - extern bool Status; extern bool PreaccActive; extern bool PreaccEnabled; - extern su2double::TapeType::Position StartPosition, EndPosition; +#ifdef HAVE_OPDI + using CoDiTapePosition = su2double::TapeType::Position; + using OpDiState = void*; + using TapePosition = std::pair; +#else + using TapePosition = su2double::TapeType::Position; +#endif - extern std::vector TapePositions; + extern TapePosition StartPosition, EndPosition; + + extern std::vector TapePositions; extern std::vector localInputValues; @@ -298,42 +303,68 @@ namespace AD{ extern codi::PreaccumulationHelper PreaccHelper; + /*--- Reference to the tape. ---*/ + + FORCEINLINE su2double::TapeType& getGlobalTape() { + return su2double::getGlobalTape(); + } + FORCEINLINE void RegisterInput(su2double &data, bool push_index = true) { - AD::globalTape.registerInput(data); + AD::getGlobalTape().registerInput(data); if (push_index) { inputValues.push_back(data.getGradientData()); } } - FORCEINLINE void RegisterOutput(su2double& data) {AD::globalTape.registerOutput(data);} + FORCEINLINE void RegisterOutput(su2double& data) {AD::getGlobalTape().registerOutput(data);} FORCEINLINE void ResetInput(su2double &data) {data.getGradientData() = su2double::GradientData();} - FORCEINLINE void StartRecording() {AD::globalTape.setActive();} + FORCEINLINE void StartRecording() {AD::getGlobalTape().setActive();} - FORCEINLINE void StopRecording() {AD::globalTape.setPassive();} + FORCEINLINE void StopRecording() {AD::getGlobalTape().setPassive();} - FORCEINLINE bool TapeActive() { return AD::globalTape.isActive(); } + FORCEINLINE bool TapeActive() { return AD::getGlobalTape().isActive(); } - FORCEINLINE void PrintStatistics() {AD::globalTape.printStatistics();} + FORCEINLINE void PrintStatistics() {AD::getGlobalTape().printStatistics();} - FORCEINLINE void ClearAdjoints() {AD::globalTape.clearAdjoints(); } + FORCEINLINE void ClearAdjoints() {AD::getGlobalTape().clearAdjoints(); } - FORCEINLINE void ComputeAdjoint() {AD::globalTape.evaluate(); adjointVectorPosition = 0;} + FORCEINLINE void ComputeAdjoint() { + #if defined(HAVE_OPDI) + opdi::logic->prepareEvaluate(); + #endif + AD::getGlobalTape().evaluate(); + adjointVectorPosition = 0; + } FORCEINLINE void ComputeAdjoint(unsigned short enter, unsigned short leave) { - AD::globalTape.evaluate(TapePositions[enter], TapePositions[leave]); + #if defined(HAVE_OPDI) + opdi::logic->recoverState(TapePositions[enter].second); + opdi::logic->prepareEvaluate(); + AD::getGlobalTape().evaluate(TapePositions[enter].first, TapePositions[leave].first); + #else + AD::getGlobalTape().evaluate(TapePositions[enter], TapePositions[leave]); + #endif if (leave == 0) adjointVectorPosition = 0; } FORCEINLINE void Reset() { - globalTape.reset(); + AD::getGlobalTape().reset(); + #if defined(HAVE_OPDI) + opdi::logic->reset(); + #endif if (inputValues.size() != 0) { adjointVectorPosition = 0; inputValues.clear(); } if (TapePositions.size() != 0) { + #if defined(HAVE_OPDI) + for (TapePosition& pos : TapePositions) { + opdi::logic->freeState(pos.second); + } + #endif TapePositions.clear(); } } @@ -343,11 +374,11 @@ namespace AD{ } FORCEINLINE void SetDerivative(int index, const double val) { - AD::globalTape.setGradient(index, val); + AD::getGlobalTape().setGradient(index, val); } FORCEINLINE double GetDerivative(int index) { - return AD::globalTape.getGradient(index); + return AD::getGlobalTape().getGradient(index); } /*--- Base case for parameter pack expansion. ---*/ @@ -397,7 +428,7 @@ namespace AD{ } FORCEINLINE void StartPreacc() { - if (globalTape.isActive() && PreaccEnabled) { + if (AD::getGlobalTape().isActive() && PreaccEnabled) { PreaccHelper.start(); PreaccActive = true; } @@ -438,7 +469,11 @@ namespace AD{ } FORCEINLINE void Push_TapePosition() { - TapePositions.push_back(AD::globalTape.getPosition()); + #if defined(HAVE_OPDI) + TapePositions.push_back({AD::getGlobalTape().getPosition(), opdi::logic->exportState()}); + #else + TapePositions.push_back(AD::getGlobalTape().getPosition()); + #endif } FORCEINLINE void EndPreacc(){ @@ -448,56 +483,77 @@ namespace AD{ } FORCEINLINE void StartExtFunc(bool storePrimalInput, bool storePrimalOutput){ - FuncHelper = new ExtFuncHelper(true); - if (!storePrimalInput){ - FuncHelper->disableInputPrimalStore(); - } - if (!storePrimalOutput){ - FuncHelper->disableOutputPrimalStore(); + SU2_OMP_MASTER + { + FuncHelper = new ExtFuncHelper(true); + if (!storePrimalInput){ + FuncHelper->disableInputPrimalStore(); + } + if (!storePrimalOutput){ + FuncHelper->disableOutputPrimalStore(); + } } } FORCEINLINE void SetExtFuncIn(const su2double &data) { - FuncHelper->addInput(data); + SU2_OMP_MASTER + { + FuncHelper->addInput(data); + } } template FORCEINLINE void SetExtFuncIn(const T& data, const int size) { - for (int i = 0; i < size; i++) { - FuncHelper->addInput(data[i]); + SU2_OMP_MASTER + { + for (int i = 0; i < size; i++) { + FuncHelper->addInput(data[i]); + } } } template FORCEINLINE void SetExtFuncIn(const T& data, const int size_x, const int size_y) { - for (int i = 0; i < size_x; i++) { - for (int j = 0; j < size_y; j++) { - FuncHelper->addInput(data[i][j]); + SU2_OMP_MASTER + { + for (int i = 0; i < size_x; i++) { + for (int j = 0; j < size_y; j++) { + FuncHelper->addInput(data[i][j]); + } } } } FORCEINLINE void SetExtFuncOut(su2double& data) { - if (globalTape.isActive()) { - FuncHelper->addOutput(data); + SU2_OMP_MASTER + { + if (AD::getGlobalTape().isActive()) { + FuncHelper->addOutput(data); + } } } template FORCEINLINE void SetExtFuncOut(T&& data, const int size) { - for (int i = 0; i < size; i++) { - if (globalTape.isActive()) { - FuncHelper->addOutput(data[i]); + SU2_OMP_MASTER + { + for (int i = 0; i < size; i++) { + if (AD::getGlobalTape().isActive()) { + FuncHelper->addOutput(data[i]); + } } } } template FORCEINLINE void SetExtFuncOut(T&& data, const int size_x, const int size_y) { - for (int i = 0; i < size_x; i++) { - for (int j = 0; j < size_y; j++) { - if (globalTape.isActive()) { - FuncHelper->addOutput(data[i][j]); + SU2_OMP_MASTER + { + for (int i = 0; i < size_x; i++) { + for (int j = 0; j < size_y; j++) { + if (AD::getGlobalTape().isActive()) { + FuncHelper->addOutput(data[i][j]); + } } } } @@ -508,10 +564,15 @@ namespace AD{ checkpoint->clear(); } - FORCEINLINE void EndExtFunc() { delete FuncHelper; } + FORCEINLINE void EndExtFunc() { + SU2_OMP_MASTER + { + delete FuncHelper; + } + } FORCEINLINE bool BeginPassive() { - if(AD::globalTape.isActive()) { + if(AD::getGlobalTape().isActive()) { StopRecording(); return true; } diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp index 63965aa92ad..943e57a8fbf 100644 --- a/Common/include/basic_types/datatype_structure.hpp +++ b/Common/include/basic_types/datatype_structure.hpp @@ -30,91 +30,10 @@ #include #include #include -#include - -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) -#define FORCEINLINE inline __attribute__((always_inline)) -#else -#define FORCEINLINE inline -#endif - -#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) -#define NEVERINLINE inline __attribute__((noinline)) -#else -#define NEVERINLINE inline -#endif - -#if defined(__INTEL_COMPILER) -/*--- Disable warnings related to inline attributes. ---*/ -#pragma warning disable 2196 -#pragma warning disable 3415 -/*--- Disable warnings related to overloaded virtual. ---*/ -#pragma warning disable 654 -#pragma warning disable 1125 -#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE) -#pragma warning disable 1875 -#endif -#endif - -/*--- Convenience SFINAE typedef to conditionally - * enable/disable function template overloads. ---*/ -template -using su2enable_if = typename std::enable_if::type; - -/*--- Depending on the datatype defined during the configuration, - * include the correct definition, and create the main typedef. ---*/ - -#if defined(CODI_REVERSE_TYPE) // reverse mode AD -#include "codi.hpp" -#include "codi/tools/dataStore.hpp" - -#ifndef CODI_INDEX_TAPE -#define CODI_INDEX_TAPE 0 -#endif -#ifndef CODI_PRIMAL_TAPE -#define CODI_PRIMAL_TAPE 0 -#endif -#ifndef CODI_PRIMAL_INDEX_TAPE -#define CODI_PRIMAL_INDEX_TAPE 0 -#endif - -#if defined(_OPENMP) -using su2double = codi::RealReverseIndexParallel; -#else -#if CODI_INDEX_TAPE -using su2double = codi::RealReverseIndex; -#elif CODI_PRIMAL_TAPE -using su2double = codi::RealReversePrimal; -#elif CODI_PRIMAL_INDEX_TAPE -using su2double = codi::RealReversePrimalIndex; -#else -using su2double = codi::RealReverse; -#endif -#endif - -#elif defined(CODI_FORWARD_TYPE) // forward mode AD -#include "codi.hpp" -using su2double = codi::RealForward; - -#else // primal / direct / no AD -using su2double = double; -#endif +#include "../code_config.hpp" #include "ad_structure.hpp" -/*--- This type can be used for (rare) compatiblity cases or for - * computations that are intended to be (always) passive. ---*/ -using passivedouble = double; - -/*--- Define a type for potentially lower precision operations. ---*/ -#ifdef USE_MIXED_PRECISION -using su2mixedfloat = float; -#else -using su2mixedfloat = passivedouble; -#endif - /*! * \namespace SU2_TYPE * \brief Namespace for defining the datatype wrapper routines, this acts as a base @@ -178,11 +97,11 @@ namespace SU2_TYPE { #ifdef CODI_REVERSE_TYPE FORCEINLINE passivedouble GetSecondary(const su2double& data) { - return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]); + return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]); } FORCEINLINE passivedouble GetDerivative(const su2double& data) { - return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]); + return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]); } #else // forward FORCEINLINE passivedouble GetSecondary(const su2double& data) {return data.getGradient();} diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp new file mode 100644 index 00000000000..0c017612a6b --- /dev/null +++ b/Common/include/code_config.hpp @@ -0,0 +1,120 @@ +/*! + * \file code_config.hpp + * \brief Header file for collecting common macros, definitions and type configurations. + * \author T. Albring, P. Gomes, J. Blühdorn + * \version 7.1.0 "Blackbird" + * + * SU2 Project Website: https://su2code.github.io + * + * The SU2 Project is maintained by the SU2 Foundation + * (http://su2foundation.org) + * + * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) + * + * SU2 is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * SU2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with SU2. If not, see . + */ + +#pragma once + +#include + +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +#define FORCEINLINE inline __attribute__((always_inline)) +#else +#define FORCEINLINE inline +#endif + +#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +#define NEVERINLINE inline __attribute__((noinline)) +#else +#define NEVERINLINE inline +#endif + +#if defined(__INTEL_COMPILER) +/*--- Disable warnings related to inline attributes. ---*/ +#pragma warning disable 2196 +#pragma warning disable 3415 +/*--- Disable warnings related to overloaded virtual. ---*/ +#pragma warning disable 654 +#pragma warning disable 1125 +#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE) +#pragma warning disable 1875 +#endif +#endif + +/*--- Convenience SFINAE typedef to conditionally + * enable/disable function template overloads. ---*/ +template +using su2enable_if = typename std::enable_if::type; + +/*--- Detect compilation with OpenMP. ---*/ +#if defined(_OPENMP) +#define HAVE_OMP +#endif + +/*--- Depending on the datatype defined during the configuration, + * include the correct definition, and create the main typedef. ---*/ + +#if defined(CODI_REVERSE_TYPE) // reverse mode AD +#include "codi.hpp" +#include "codi/tools/dataStore.hpp" + +#ifndef CODI_INDEX_TAPE +#define CODI_INDEX_TAPE 0 +#endif +#ifndef CODI_PRIMAL_TAPE +#define CODI_PRIMAL_TAPE 0 +#endif +#ifndef CODI_PRIMAL_INDEX_TAPE +#define CODI_PRIMAL_INDEX_TAPE 0 +#endif + +#if defined(HAVE_OMP) +using su2double = codi::RealReverseIndexParallel; +#else +#if CODI_INDEX_TAPE +using su2double = codi::RealReverseIndex; +#elif CODI_PRIMAL_TAPE +using su2double = codi::RealReversePrimal; +#elif CODI_PRIMAL_INDEX_TAPE +using su2double = codi::RealReversePrimalIndex; +#else +using su2double = codi::RealReverse; +#endif +#endif +#elif defined(CODI_FORWARD_TYPE) // forward mode AD +#include "codi.hpp" +using su2double = codi::RealForward; + +#else // primal / direct / no AD +using su2double = double; +#endif + +/*--- This type can be used for (rare) compatiblity cases or for + * computations that are intended to be (always) passive. ---*/ +using passivedouble = double; + +/*--- Define a type for potentially lower precision operations. ---*/ +#ifdef USE_MIXED_PRECISION +using su2mixedfloat = float; +#else +using su2mixedfloat = passivedouble; +#endif + +/*--- Detect if OpDiLib has to be used. ---*/ +#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) +#define HAVE_OPDI +#endif diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp index 933f154eea6..13183c9178a 100644 --- a/Common/include/parallelization/omp_structure.cpp +++ b/Common/include/parallelization/omp_structure.cpp @@ -1,7 +1,7 @@ /*! * \file omp_structure.cpp * \brief Source file counterpart for omp_structure.hpp. - * \note Contains OpDiLib logic and includes the OpDiLib source file. + * \note Contains OpDiLib initialization, finalization and includes the OpDiLib source file. * \author J. Blühdorn * \version 7.1.0 "Blackbird" * diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index c74f2e434e0..7a258881f86 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -12,7 +12,7 @@ * e.g. SU2_OMP_PARALLEL. Exotic pragmas of limited portability should be * defined here with suitable fallback versions to limit the spread of * compiler tricks in other areas of the code. - * \author P. Gomes + * \author P. Gomes, J. Blühdorn * \version 7.1.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io @@ -38,7 +38,7 @@ #pragma once -#include "../basic_types/datatype_structure.hpp" +#include "../code_config.hpp" #if defined(_MSC_VER) #define PRAGMIZE(X) __pragma(X) @@ -46,13 +46,10 @@ #define PRAGMIZE(X) _Pragma(#X) #endif -/*--- Detect compilation with OpenMP support. ---*/ -#if defined(_OPENMP) -#define HAVE_OMP +#if defined(HAVE_OMP) #include -#if defined(CODI_REVERSE_TYPE) -#define HAVE_OPDI +#if defined(HAVE_OPDI) #include "opdi/backend/ompt/omptBackend.hpp" #include "codi/externals/codiOpdiTool.hpp" #include "opdi.hpp" diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp index 6d3a99c4375..f9fc9f460e2 100644 --- a/Common/src/basic_types/ad_structure.cpp +++ b/Common/src/basic_types/ad_structure.cpp @@ -37,9 +37,8 @@ namespace AD { std::vector localInputValues; std::vector localOutputValues; - su2double::TapeType& globalTape = su2double::getGlobalTape(); - su2double::TapeType::Position StartPosition, EndPosition; - std::vector TapePositions; + TapePosition StartPosition, EndPosition; + std::vector TapePositions; bool PreaccActive = false; bool PreaccEnabled = true; diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 2f4907f00cd..888cab7e41c 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -825,7 +825,7 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co if (config->GetDiscrete_Adjoint()) { #ifdef CODI_REVERSE_TYPE - TapeActive = AD::globalTape.isActive(); + TapeActive = AD::getGlobalTape().isActive(); AD::StartExtFunc(false, false); diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp index 0353d547eba..7a3675026d9 100644 --- a/SU2_CFD/src/SU2_CFD.cpp +++ b/SU2_CFD/src/SU2_CFD.cpp @@ -73,6 +73,11 @@ int main(int argc, char *argv[]) { #endif SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm(); + /*--- AD initialization ---*/ +#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) + AD::getGlobalTape().initialize(); +#endif + /*--- Uncomment the following line if runtime NaN catching is desired. ---*/ // feenableexcept(FE_INVALID | FE_OVERFLOW); @@ -174,6 +179,11 @@ int main(int argc, char *argv[]) { libxsmm_finalize(); #endif + /*--- Finalize AD, if necessary. ---*/ +#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) + AD::getGlobalTape().finalize(); +#endif + /*--- Finalize MPI parallelization. ---*/ SU2_MPI::Finalize(); diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp index 296065b8668..e09af868ef9 100644 --- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp @@ -624,7 +624,7 @@ void CDiscAdjMultizoneDriver::SetRecording(unsigned short kind_recording, Kind_T if (rank == MASTER_NODE) AD::PrintStatistics(); #ifdef CODI_REVERSE_TYPE if (size > SINGLE_NODE) { - su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0; + su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0; SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); if (rank == MASTER_NODE) { cout << "MPI\n"; diff --git a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp index 48a9463e00d..429bafcd796 100644 --- a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp +++ b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp @@ -295,7 +295,7 @@ void CDiscAdjSinglezoneDriver::SetRecording(unsigned short kind_recording){ if (rank == MASTER_NODE) AD::PrintStatistics(); #ifdef CODI_REVERSE_TYPE if (size > SINGLE_NODE) { - su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0; + su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0; SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); if (rank == MASTER_NODE) { cout << "MPI\n"; From c9ac197daeb1e606392cf1378d01da8f3ba7818b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 12:46:51 +0100 Subject: [PATCH 07/57] Linear algebra updates. --- Common/src/linear_algebra/CSysMatrix.cpp | 11 ++++--- Common/src/linear_algebra/CSysSolve.cpp | 30 +++++++++++++++---- Common/src/linear_algebra/CSysSolve_b.cpp | 35 ++++++++++++++--------- 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index e71afd5144b..003413cc93c 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -477,7 +477,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, template void CSysMatrix::SetValZero() { const auto size = nnz*nVar*nEqn; - const auto chunk = roundUpDiv(size,omp_get_max_threads()); + const auto chunk = roundUpDiv(size,omp_get_num_threads()); const auto begin = chunk * omp_get_thread_num(); const auto mySize = min(chunk, size-begin) * sizeof(ScalarType); memset(&matrix[begin], 0, mySize); @@ -633,8 +633,6 @@ void CSysMatrix::MatrixVectorProductTransposed(const CSysVectors ---*/ #ifndef NDEBUG @@ -647,8 +645,13 @@ void CSysMatrix::MatrixVectorProductTransposed(const CSysVector::Solve(CSysMatrix & Jacobian, co AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize()); + SU2_OMP_BARRIER + AD::StopRecording(); #endif } @@ -924,16 +926,26 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co AD::StartRecording(); + SU2_OMP_BARRIER + AD::SetExtFuncOut(&LinSysSol[0], (int)LinSysSol.GetLocSize()); + SU2_OMP_BARRIER + #ifdef CODI_REVERSE_TYPE - AD::FuncHelper->addUserData(&LinSysRes); - AD::FuncHelper->addUserData(&LinSysSol); - AD::FuncHelper->addUserData(&Jacobian); - AD::FuncHelper->addUserData(geometry); - AD::FuncHelper->addUserData(config); - AD::FuncHelper->addUserData(this); + SU2_OMP_MASTER + { + AD::FuncHelper->addUserData(&LinSysRes); + AD::FuncHelper->addUserData(&LinSysSol); + AD::FuncHelper->addUserData(&Jacobian); + AD::FuncHelper->addUserData(geometry); + AD::FuncHelper->addUserData(config); + AD::FuncHelper->addUserData(this); + } + SU2_OMP_BARRIER + AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); + SU2_OMP_BARRIER #endif /*--- Build preconditioner for the transposed Jacobian ---*/ @@ -953,7 +965,11 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co break; } + SU2_OMP_BARRIER + AD::EndExtFunc(); + + SU2_OMP_BARRIER } return IterLinSol; @@ -1055,7 +1071,9 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, delete precond; + SU2_OMP_MASTER Iterations = IterLinSol; + return IterLinSol; } diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp index 062708f9dc7..3953bd6cfb9 100644 --- a/Common/src/linear_algebra/CSysSolve_b.cpp +++ b/Common/src/linear_algebra/CSysSolve_b.cpp @@ -1,7 +1,7 @@ /*! * \file CSysSolve_b.cpp * \brief Routines for the linear solver used in the reverse sweep of AD. - * \author T. Albring + * \author T. Albring, J. Blühdorn * \version 7.1.0 "Blackbird" * * SU2 Project Website: https://su2code.github.io @@ -37,36 +37,45 @@ void CSysSolve_b::Solve_b(const codi::RealReverse::Real* x, codi::Re codi::DataStore* d) { CSysVector* LinSysRes_b = nullptr; - d->getData(LinSysRes_b); + d->getDataByIndex(LinSysRes_b, 0); CSysVector* LinSysSol_b = nullptr; - d->getData(LinSysSol_b); + d->getDataByIndex(LinSysSol_b, 1); CSysMatrix* Jacobian = nullptr; - d->getData(Jacobian); + d->getDataByIndex(Jacobian, 2); CGeometry* geometry = nullptr; - d->getData(geometry); + d->getDataByIndex(geometry, 3); const CConfig* config = nullptr; - d->getData(config); + d->getDataByIndex(config, 4); CSysSolve* solver = nullptr; - d->getData(solver); + d->getDataByIndex(solver, 5); /*--- Initialize the right-hand side with the gradient of the solution of the primal linear system ---*/ - for (unsigned long i = 0; i < n; i++) { - (*LinSysRes_b)[i] = y_b[i]; - (*LinSysSol_b)[i] = 0.0; + SU2_OMP_BARRIER + SU2_OMP_MASTER + { + for (unsigned long i = 0; i < n; i++) { + (*LinSysRes_b)[i] = y_b[i]; + (*LinSysSol_b)[i] = 0.0; + } } + SU2_OMP_BARRIER solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config); - for (unsigned long i = 0; i < n; i ++) { - x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i)); + SU2_OMP_BARRIER + SU2_OMP_MASTER + { + for (unsigned long i = 0; i < n; i ++) { + x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i)); + } } - + SU2_OMP_BARRIER } template class CSysSolve_b; From 5074ee34d2f66a0e826bdf02da552f7380f7d830 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 13:52:25 +0100 Subject: [PATCH 08/57] Zero-initialize memory. --- Common/include/toolboxes/allocation_toolbox.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp index 9754217d3a7..279e3d62a9b 100644 --- a/Common/include/toolboxes/allocation_toolbox.hpp +++ b/Common/include/toolboxes/allocation_toolbox.hpp @@ -36,6 +36,8 @@ #include #endif +#include + #include namespace MemoryAllocation @@ -78,6 +80,7 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept #else ptr = ::aligned_alloc(alignment, size); #endif + memset(ptr, 0, size); return static_cast(ptr); } From 33437ced739a9ed625e8232561a8cca8b5cf01b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 13:53:15 +0100 Subject: [PATCH 09/57] Fix CDiscAdjFEAIteration dependencies. --- .../src/iteration/CDiscAdjFEAIteration.cpp | 37 +++++++++++-------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp index b0887c79a41..42fe51675c1 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp @@ -301,6 +301,11 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge bool de_effects = config[iZone]->GetDE_Effects() && nonlinear; bool element_based = dir_solver->IsElementBased() && nonlinear; + SU2_OMP_PARALLEL + { + + int thread = omp_get_thread_num(); + for (unsigned short iProp = 0; iProp < config[iZone]->GetnElasticityMod(); iProp++) { su2double E = adj_solver->GetVal_Young(iProp); su2double nu = adj_solver->GetVal_Poisson(iProp); @@ -309,33 +314,33 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge /*--- Add dependencies for E and Nu ---*/ - structural_numerics[FEA_TERM]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); /*--- Add dependencies for Rho and Rho_DL ---*/ - structural_numerics[FEA_TERM]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Add dependencies for element-based simulations. ---*/ if (element_based) { /*--- Neo Hookean Compressible ---*/ - structural_numerics[MAT_NHCOMP]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_NHCOMP]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Ideal DE ---*/ - structural_numerics[MAT_IDEALDE]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_IDEALDE]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Knowles ---*/ - structural_numerics[MAT_KNOWLES]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_KNOWLES]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); } } if (de_effects) { for (unsigned short iEField = 0; iEField < adj_solver->GetnEField(); iEField++) { - structural_numerics[FEA_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); - structural_numerics[DE_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); + structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); + structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); } } @@ -351,19 +356,21 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge for (unsigned short iDV = 0; iDV < adj_solver->GetnDVFEA(); iDV++) { su2double dvfea = adj_solver->GetVal_DVFEA(iDV); - structural_numerics[FEA_TERM]->Set_DV_Val(iDV, dvfea); + structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); - if (de_effects) structural_numerics[DE_TERM]->Set_DV_Val(iDV, dvfea); + if (de_effects) structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); if (element_based) { - structural_numerics[MAT_NHCOMP]->Set_DV_Val(iDV, dvfea); - structural_numerics[MAT_IDEALDE]->Set_DV_Val(iDV, dvfea); - structural_numerics[MAT_KNOWLES]->Set_DV_Val(iDV, dvfea); + structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); + structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); + structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); } } break; } + } // end SU2_OMP_PARALLEL + /*--- MPI dependencies. ---*/ dir_solver->InitiateComms(structural_geometry, config[iZone], SOLUTION_FEA); From 5735c0e898054934633a826e9b457e2dac94eb95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 1 Mar 2021 14:26:39 +0100 Subject: [PATCH 10/57] Disable preaccumulation for OpenMP. --- Common/src/CConfig.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp index eddd0d68918..d4cf8637e8c 100644 --- a/Common/src/CConfig.cpp +++ b/Common/src/CConfig.cpp @@ -4390,7 +4390,11 @@ void CConfig::SetPostprocessing(unsigned short val_software, unsigned short val_ #if defined CODI_REVERSE_TYPE AD_Mode = YES; +#if defined HAVE_OMP + AD::PreaccEnabled = false; +#else AD::PreaccEnabled = AD_Preaccumulation; +#endif #else if (AD_Mode == YES) { From 4a820f715c746108db6ab89bf6ee4f5bd0ee9465 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 2 Mar 2021 19:04:39 +0100 Subject: [PATCH 11/57] Fix python wrapper builds. --- SU2_PY/pySU2/pySU2.i | 1 + SU2_PY/pySU2/pySU2ad.i | 1 + 2 files changed, 2 insertions(+) diff --git a/SU2_PY/pySU2/pySU2.i b/SU2_PY/pySU2/pySU2.i index ae4307d4c22..7e16b15f7ca 100644 --- a/SU2_PY/pySU2/pySU2.i +++ b/SU2_PY/pySU2/pySU2.i @@ -46,6 +46,7 @@ threads="1" %} // ----------- USED MODULES ------------ +%import "../../Common/include/code_config.hpp" %import "../../Common/include/basic_types/datatype_structure.hpp" %import "../../Common/include/parallelization/mpi_structure.hpp" %include "std_string.i" diff --git a/SU2_PY/pySU2/pySU2ad.i b/SU2_PY/pySU2/pySU2ad.i index 9af6ac16fff..d0e6605f672 100644 --- a/SU2_PY/pySU2/pySU2ad.i +++ b/SU2_PY/pySU2/pySU2ad.i @@ -46,6 +46,7 @@ threads="1" %} // ----------- USED MODULES ------------ +%import "../../Common/include/code_config.hpp" %import "../../Common/include/basic_types/datatype_structure.hpp" %import "../../Common/include/parallelization/mpi_structure.hpp" %include "std_string.i" From a26e2be40c2904e6fdae73ea5df7d9db3941b8cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 2 Mar 2021 19:10:02 +0100 Subject: [PATCH 12/57] Fix missing definition of size_t. --- Common/include/parallelization/omp_structure.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index 7a258881f86..9fd13aa9afc 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -38,6 +38,8 @@ #pragma once +#include + #include "../code_config.hpp" #if defined(_MSC_VER) From 94ac52ef4e6691a1a6e467d11e3dff7beed3e7ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 3 Mar 2021 11:56:06 +0100 Subject: [PATCH 13/57] Check OMPT support. --- Common/include/code_config.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 0c017612a6b..a708e2d3cb6 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -61,7 +61,7 @@ template using su2enable_if = typename std::enable_if::type; /*--- Detect compilation with OpenMP. ---*/ -#if defined(_OPENMP) +#if defined(_OPENMP) && (!defined(CODI_REVERSE_TYPE) || _OPENMP >= 201811) #define HAVE_OMP #endif From 7bbb9cd8673c892ed92da63bf221d15f9a774466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Mon, 8 Mar 2021 12:35:56 +0100 Subject: [PATCH 14/57] CoDiPack update. --- externals/codi | 2 +- meson_scripts/init.py | 2 +- preconfigure.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/externals/codi b/externals/codi index 2a0dbdbed2f..89958053647 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit 2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87 +Subproject commit 899580536474003370a912234bec4e2b48de2bcc diff --git a/meson_scripts/init.py b/meson_scripts/init.py index abf9518175a..a2bf57623b3 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -44,7 +44,7 @@ def init_submodules(method = 'auto'): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87' + sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' diff --git a/preconfigure.py b/preconfigure.py index 0fc86eaa75a..ee0e86e03c5 100755 --- a/preconfigure.py +++ b/preconfigure.py @@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87' + sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' From cfb7285034065f7da4868c4101b512868c74155b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 11 Mar 2021 22:21:03 +0100 Subject: [PATCH 15/57] OpDiLib update. --- externals/opdi | 2 +- meson_scripts/init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/opdi b/externals/opdi index 3c4132bbf12..f14b42f1255 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit 3c4132bbf1266b2e999d22212c8de88ec085a3e0 +Subproject commit f14b42f1255674bb10db91e3f45ceb39c1bccd17 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index a2bf57623b3..a7e018a0a6b 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,7 +48,7 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = '3c4132bbf1266b2e999d22212c8de88ec085a3e0' + sha_version_opdi = 'f14b42f1255674bb10db91e3f45ceb39c1bccd17' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' From 8fc09412fb52deb665f06682ef6e3ad29ff2c790 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 11 Mar 2021 22:25:39 +0100 Subject: [PATCH 16/57] CoDiPack update. --- externals/codi | 2 +- meson_scripts/init.py | 2 +- preconfigure.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/externals/codi b/externals/codi index 89958053647..6a67202a388 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit 899580536474003370a912234bec4e2b48de2bcc +Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index a7e018a0a6b..a42640f9fde 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -44,7 +44,7 @@ def init_submodules(method = 'auto'): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc' + sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' diff --git a/preconfigure.py b/preconfigure.py index ee0e86e03c5..ca8187afc2c 100755 --- a/preconfigure.py +++ b/preconfigure.py @@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False): # This information of the modules is used if projects was not cloned using git # The sha tag must be maintained manually to point to the correct commit - sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc' + sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692' github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' From e04f931abc10bab7db19e21a9f2bd26dca972ac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 11 Mar 2021 23:35:29 +0100 Subject: [PATCH 17/57] Enable OpDiLib macro backend. --- Common/include/code_config.hpp | 6 +++++- Common/include/parallelization/omp_structure.cpp | 13 ++++++++++++- Common/include/parallelization/omp_structure.hpp | 4 ++++ Common/src/basic_types/ad_structure.cpp | 2 ++ Common/src/meson.build | 3 +-- 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 11fb69bd296..a9aabf17bca 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -61,7 +61,7 @@ template using su2enable_if = typename std::enable_if::type; /*--- Detect compilation with OpenMP. ---*/ -#if defined(_OPENMP) && (!defined(CODI_REVERSE_TYPE) || _OPENMP >= 201811) +#if defined(_OPENMP) #define HAVE_OMP #endif @@ -118,3 +118,7 @@ using su2mixedfloat = passivedouble; #if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) #define HAVE_OPDI #endif + +#if _OPENMP >= 201811 +#define HAVE_OMPT +#endif diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp index 069be89f250..0daca1ca021 100644 --- a/Common/include/parallelization/omp_structure.cpp +++ b/Common/include/parallelization/omp_structure.cpp @@ -30,18 +30,29 @@ void omp_initialize() { #ifdef HAVE_OPDI +#if !defined(HAVE_OMPT) + opdi::backend = new opdi::MacroBackend; + opdi::backend->init(); +#endif opdi::logic = new opdi::OmpLogic; opdi::logic->init(); + su2double::getGlobalTape().initialize(); opdi::tool = new CoDiOpDiTool; + opdi::tool->init(); #endif } void omp_finalize() { #ifdef HAVE_OPDI + opdi::tool->finalize(); + su2double::getGlobalTape().finalize(); opdi::logic->finalize(); opdi::backend->finalize(); - delete opdi::logic; delete opdi::tool; + delete opdi::logic; +#if !defined(HAVE_OMPT) + delete opdi::backend; +#endif #endif } diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index 1f01245017b..bbf976f76f6 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -52,7 +52,11 @@ #include #if defined(HAVE_OPDI) +#if defined(HAVE_OMPT) #include "opdi/backend/ompt/omptBackend.hpp" +#else +#include "opdi/backend/macro/macroBackend.hpp" +#endif #include "codi/externals/codiOpdiTool.hpp" #include "opdi.hpp" #endif diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp index becea08156a..20bfe3e6d74 100644 --- a/Common/src/basic_types/ad_structure.cpp +++ b/Common/src/basic_types/ad_structure.cpp @@ -49,3 +49,5 @@ namespace AD { #endif } + +#include "../../include/parallelization/omp_structure.cpp" diff --git a/Common/src/meson.build b/Common/src/meson.build index b3e0726e70c..5dcbb57c66f 100644 --- a/Common/src/meson.build +++ b/Common/src/meson.build @@ -3,8 +3,7 @@ common_src =files(['graph_coloring_structure.cpp', 'CConfig.cpp', 'basic_types/ad_structure.cpp', 'wall_model.cpp', - '../include/parallelization/mpi_structure.cpp', - '../include/parallelization/omp_structure.cpp']) + '../include/parallelization/mpi_structure.cpp']) subdir('linear_algebra') subdir('toolboxes') From 1351c797cb535ab37aa38e1caf08b40c88dda381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 11 Mar 2021 23:38:37 +0100 Subject: [PATCH 18/57] Update SU2_OMP macros and introduce END macros. --- .../include/parallelization/omp_structure.hpp | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index bbf976f76f6..7b688d3f388 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -139,8 +139,11 @@ void omp_finalize(); /*--- Convenience macros (do not use excessive nesting). ---*/ -#define SU2_OMP_MASTER SU2_OMP(master) #define SU2_OMP_ATOMIC SU2_OMP(atomic) + +#ifndef HAVE_OPDI + +#define SU2_OMP_MASTER SU2_OMP(master) #define SU2_OMP_BARRIER SU2_OMP(barrier) #define SU2_OMP_CRITICAL SU2_OMP(critical) @@ -148,9 +151,40 @@ void omp_finalize(); #define SU2_OMP_PARALLEL_(ARGS) SU2_OMP(parallel ARGS) #define SU2_OMP_PARALLEL_ON(NTHREADS) SU2_OMP(parallel num_threads(NTHREADS)) +#define SU2_OMP_FOR_(ARGS) SU2_OMP(for ARGS) #define SU2_OMP_FOR_DYN(CHUNK) SU2_OMP(for schedule(dynamic,CHUNK)) #define SU2_OMP_FOR_STAT(CHUNK) SU2_OMP(for schedule(static,CHUNK)) +#define SU2_NOWAIT nowait + +#define END_SU2_OMP_MASTER +#define END_SU2_OMP_CRITICAL +#define END_SU2_OMP_PARALLEL +#define END_SU2_OMP_FOR + +#else + +#define SU2_OMP_MASTER OPDI_MASTER() +#define SU2_OMP_BARRIER OPDI_BARRIER() +#define SU2_OMP_CRITICAL OPDI_CRITICAL() + +#define SU2_OMP_PARALLEL OPDI_PARALLEL() +#define SU2_OMP_PARALLEL_(ARGS) OPDI_PARALLEL(ARGS) +#define SU2_OMP_PARALLEL_ON(NTHREADS) OPDI_PARALLEL(num_threads(NTHREADS)) + +#define SU2_OMP_FOR_(ARGS) OPDI_FOR(ARGS) +#define SU2_OMP_FOR_DYN(CHUNK) OPDI_FOR(schedule(dynamic,CHUNK)) +#define SU2_OMP_FOR_STAT(CHUNK) OPDI_FOR(schedule(static,CHUNK)) + +#define SU2_NOWAIT OPDI_NOWAIT + +#define END_SU2_OMP_MASTER OPDI_END_MASTER +#define END_SU2_OMP_CRITICAL OPDI_END_CRITICAL +#define END_SU2_OMP_PARALLEL OPDI_END_PARALLEL +#define END_SU2_OMP_FOR OPDI_END_FOR + +#endif + /*--- Convenience functions (e.g. to compute chunk sizes). ---*/ /*! From 6bf97a252d01bcad24b01e42d308478a886448df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 11 Mar 2021 23:42:24 +0100 Subject: [PATCH 19/57] Update specialized macros. --- Common/include/linear_algebra/CSysVector.hpp | 7 +++++-- SU2_CFD/include/integration/CNewtonIntegration.hpp | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index 6a54b84efa1..dae407c8973 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -45,12 +45,14 @@ */ #ifdef HAVE_OMP #ifdef HAVE_OMP_SIMD -#define CSYSVEC_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait) +#define CSYSVEC_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT) #else -#define CSYSVEC_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait) +#define CSYSVEC_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) #endif +#define END_CSYSVEC_PARFOR END_SU2_OMP_FOR #else #define CSYSVEC_PARFOR SU2_OMP_SIMD +#define END_CSYSVEC_PARFOR #endif /*! @@ -443,3 +445,4 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> }; #undef CSYSVEC_PARFOR +#undef END_CSYSVEC_PARFOR diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp index 86626b7629e..4450a71898c 100644 --- a/SU2_CFD/include/integration/CNewtonIntegration.hpp +++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp @@ -33,12 +33,14 @@ #ifdef HAVE_OMP #ifdef HAVE_OMP_SIMD -#define CNEWTON_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait) +#define CNEWTON_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT) #else -#define CNEWTON_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait) +#define CNEWTON_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) #endif +#define END_CNEWTON_PARFOR END_SU2_OMP_FOR #else #define CNEWTON_PARFOR SU2_OMP_SIMD +#define END_CNEWTON_PARFOR #endif /*! @@ -212,3 +214,4 @@ class CNewtonIntegration final : public CIntegration { }; #undef CNEWTON_PARFOR +#undef END_CNEWTON_PARFOR From aeaf25141fed3662c4782552954075f5628ccf0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 12 Mar 2021 00:09:29 +0100 Subject: [PATCH 20/57] Update macros throughout the code. --- Common/src/linear_algebra/CSysMatrix.cpp | 4 ++-- SU2_CFD/include/limiters/CLimiterDetails.hpp | 2 +- SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp | 10 +++++----- SU2_CFD/include/solvers/CFVMFlowSolverBase.inl | 2 +- SU2_CFD/src/solvers/CFEASolver.cpp | 2 +- SU2_CFD/src/solvers/CTurbSolver.cpp | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 5f523daf0cb..909b9542de8 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -675,7 +675,7 @@ template void CSysMatrix::BuildJacobiPreconditioner(bool transpose) { /*--- Build Jacobi preconditioner (M = D), compute and store the inverses of the diagonal blocks. ---*/ - SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait) + SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar]), transpose); @@ -1105,7 +1105,7 @@ void CSysMatrix::ComputeLineletPreconditioner(const CSysVector localMin = largeNum; localMax =-largeNum; - SU2_OMP(for schedule(static, 512) nowait) + SU2_OMP_FOR_(schedule(static, 512) SU2_NOWAIT) for(size_t iPoint = 0; iPoint < geometry.GetnPointDomain(); ++iPoint) { for(size_t iVar = varBegin; iVar < varEnd; ++iVar) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp index acb1135c426..251f494f8ed 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp @@ -427,7 +427,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Thread-local variables for min/max reduction. ---*/ su2double minDt = 1e30, maxDt = 0.0; - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { su2double Vol = geometry->nodes->GetVolume(iPoint); @@ -509,7 +509,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Thread-local variable for reduction. ---*/ su2double glbDtND = 1e30; - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { glbDtND = min(glbDtND, config->GetUnst_CFL()*Global_Delta_Time / nodes->GetLocalCFL(iPoint)); } @@ -744,7 +744,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Update the solution and residuals ---*/ if (!adjoint) { - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { su2double Vol = geometry->nodes->GetVolume(iPoint) + geometry->nodes->GetPeriodicVolume(iPoint); @@ -869,7 +869,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Add pseudotime term to Jacobian. ---*/ if (implicit) { - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { /*--- Modify matrix diagonal to improve diagonal dominance. ---*/ @@ -893,7 +893,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- Right hand side of the system (-Residual) and initial guess (x = 0) ---*/ - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { /*--- Multigrid contribution to residual. ---*/ diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index 5cc95853861..adaba33241d 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -581,7 +581,7 @@ void CFVMFlowSolverBase::ImplicitEuler_Iteration(CGeometry *geometry, CSol /*--- Solve or smooth the linear system. ---*/ - SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait) + SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT) for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) { LinSysRes.SetBlock_Zero(iPoint); LinSysSol.SetBlock_Zero(iPoint); diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp index 23a47c217f2..d36e156a250 100644 --- a/SU2_CFD/src/solvers/CFEASolver.cpp +++ b/SU2_CFD/src/solvers/CFEASolver.cpp @@ -1317,7 +1317,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, /*--- Compute the von Misses stress at each point, and the maximum for the domain. ---*/ su2double maxVonMises = 0.0; - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { const auto vms = CFEAElasticity::VonMisesStress(nDim, nodes->GetStress_FEM(iPoint)); diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 66d9a7c8308..4428f5a5f72 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -528,7 +528,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver /*--- Build implicit system ---*/ - SU2_OMP(for schedule(static,omp_chunk_size) nowait) + SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { /// TODO: This could be the SetTime_Step of this solver. @@ -637,7 +637,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_ /*--- Solve or smooth the linear system. ---*/ - SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait) + SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT) for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) { LinSysRes.SetBlock_Zero(iPoint); LinSysSol.SetBlock_Zero(iPoint); From 5cea3861e1e1e97d54796ce34ac16de4811da89d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 12 Mar 2021 00:22:02 +0100 Subject: [PATCH 21/57] Introduce END macros throughout the code. --- Common/include/basic_types/ad_structure.hpp | 8 ++ Common/include/linear_algebra/CSysSolve.hpp | 4 + Common/include/linear_algebra/CSysVector.hpp | 8 ++ .../include/parallelization/omp_structure.hpp | 3 + Common/include/toolboxes/graph_toolbox.hpp | 2 + Common/src/geometry/CGeometry.cpp | 42 +++++++- Common/src/geometry/CMultiGridGeometry.cpp | 10 +- Common/src/geometry/CPhysicalGeometry.cpp | 30 +++++- .../CIsoparametric.cpp | 5 +- .../src/interface_interpolation/CMirror.cpp | 3 +- .../CNearestNeighbor.cpp | 5 +- .../CRadialBasisFunction.cpp | 7 +- Common/src/linear_algebra/CSysMatrix.cpp | 35 ++++++- Common/src/linear_algebra/CSysSolve.cpp | 13 +++ Common/src/linear_algebra/CSysSolve_b.cpp | 2 + .../gradients/computeGradientsGreenGauss.hpp | 2 + .../computeGradientsLeastSquares.hpp | 2 + .../integration/CNewtonIntegration.hpp | 3 + SU2_CFD/include/limiters/CLimiterDetails.hpp | 4 + SU2_CFD/include/limiters/computeLimiters.hpp | 1 + .../include/limiters/computeLimiters_impl.hpp | 2 + .../include/solvers/CFVMFlowSolverBase.hpp | 31 ++++++ .../include/solvers/CFVMFlowSolverBase.inl | 23 ++++- SU2_CFD/include/solvers/CTurbSASolver.hpp | 1 + SU2_CFD/include/solvers/CTurbSSTSolver.hpp | 1 + SU2_CFD/src/drivers/CDriver.cpp | 35 +++++++ SU2_CFD/src/integration/CIntegration.cpp | 8 +- .../src/integration/CMultiGridIntegration.cpp | 22 ++++- .../src/integration/CNewtonIntegration.cpp | 18 +++- .../integration/CSingleGridIntegration.cpp | 8 +- .../src/iteration/CDiscAdjFEAIteration.cpp | 3 +- SU2_CFD/src/iteration/CFluidIteration.cpp | 1 + .../numerics/elasticity/CFEAElasticity.cpp | 1 + SU2_CFD/src/python_wrapper_structure.cpp | 2 + SU2_CFD/src/solvers/CEulerSolver.cpp | 44 ++++++++- SU2_CFD/src/solvers/CFEASolver.cpp | 99 ++++++++++++++++--- SU2_CFD/src/solvers/CIncEulerSolver.cpp | 31 ++++++ SU2_CFD/src/solvers/CIncNSSolver.cpp | 5 + SU2_CFD/src/solvers/CMeshSolver.cpp | 27 ++++- SU2_CFD/src/solvers/CNEMOEulerSolver.cpp | 3 + SU2_CFD/src/solvers/CNSSolver.cpp | 7 ++ SU2_CFD/src/solvers/CSolver.cpp | 20 +++- SU2_CFD/src/solvers/CTurbSASolver.cpp | 15 +++ SU2_CFD/src/solvers/CTurbSSTSolver.cpp | 8 ++ SU2_CFD/src/solvers/CTurbSolver.cpp | 21 +++- 45 files changed, 574 insertions(+), 51 deletions(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index a40bf482c2f..1699534828b 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -493,6 +493,7 @@ namespace AD{ FuncHelper->disableOutputPrimalStore(); } } + END_SU2_OMP_MASTER } FORCEINLINE void SetExtFuncIn(const su2double &data) { @@ -500,6 +501,7 @@ namespace AD{ { FuncHelper->addInput(data); } + END_SU2_OMP_MASTER } template @@ -510,6 +512,7 @@ namespace AD{ FuncHelper->addInput(data[i]); } } + END_SU2_OMP_MASTER } template @@ -522,6 +525,7 @@ namespace AD{ } } } + END_SU2_OMP_MASTER } FORCEINLINE void SetExtFuncOut(su2double& data) { @@ -531,6 +535,7 @@ namespace AD{ FuncHelper->addOutput(data); } } + END_SU2_OMP_MASTER } template @@ -543,6 +548,7 @@ namespace AD{ } } } + END_SU2_OMP_MASTER } template @@ -557,6 +563,7 @@ namespace AD{ } } } + END_SU2_OMP_MASTER } FORCEINLINE void delete_handler(void *handler) { @@ -569,6 +576,7 @@ namespace AD{ { delete FuncHelper; } + END_SU2_OMP_MASTER } FORCEINLINE bool BeginPassive() { diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index ecfb4a3a789..548e1986bac 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -221,6 +221,7 @@ class CSysSolve { LinSysRes_ptr = &LinSysRes; LinSysSol_ptr = &LinSysSol; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -242,6 +243,7 @@ class CSysSolve { LinSysRes_ptr = &LinSysRes_tmp; LinSysSol_ptr = &LinSysSol_tmp; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -258,6 +260,7 @@ class CSysSolve { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -277,6 +280,7 @@ class CSysSolve { LinSysRes_ptr = nullptr; LinSysSol_ptr = nullptr; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp index dae407c8973..9101eeae083 100644 --- a/Common/include/linear_algebra/CSysVector.hpp +++ b/Common/include/linear_algebra/CSysVector.hpp @@ -188,10 +188,12 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> SU2_OMP_MASTER Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false); + END_SU2_OMP_MASTER SU2_OMP_BARRIER CSYSVEC_PARFOR for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]); + END_CSYSVEC_PARFOR } /*! @@ -252,6 +254,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> CSysVector& operator=(const CSysVector& other) { CSYSVEC_PARFOR for (auto i = 0ul; i < nElm; ++i) vec_val[i] = other.vec_val[i]; + END_CSYSVEC_PARFOR return *this; } @@ -263,12 +266,14 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> CSysVector& operator OP(ScalarType val) { \ CSYSVEC_PARFOR \ for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP val; \ + END_CSYSVEC_PARFOR \ return *this; \ } \ template \ CSysVector& operator OP(const VecExpr::CVecExpr& expr) { \ CSYSVEC_PARFOR \ for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP expr.derived()[i]; \ + END_CSYSVEC_PARFOR \ return *this; \ } MAKE_COMPOUND(=) @@ -295,6 +300,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> SU2_OMP_BARRIER SU2_OMP_MASTER dotRes = 0.0; + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Local dot product for each thread. ---*/ @@ -304,6 +310,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> for (auto i = 0ul; i < nElmDomain; ++i) { sum += vec_val[i] * expr.derived()[i]; } + END_CSYSVEC_PARFOR /*--- Update shared variable with "our" partial sum. ---*/ atomicAdd(sum, dotRes); @@ -318,6 +325,7 @@ class CSysVector : public VecExpr::CVecExpr, ScalarType> const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE; SelectMPIWrapper::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER } #endif /*--- Make view of result consistent across threads. ---*/ diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp index 7b688d3f388..987e87dbd86 100644 --- a/Common/include/parallelization/omp_structure.hpp +++ b/Common/include/parallelization/omp_structure.hpp @@ -232,6 +232,7 @@ void parallelCopy(size_t size, const T* src, U* dst) { SU2_OMP_FOR_STAT(2048) for(size_t i=0; i::value> = 0> inline void atomicAdd(T rhs, T& lhs) diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp index a33e7d43117..c013d6d5b06 100644 --- a/Common/include/toolboxes/graph_toolbox.hpp +++ b/Common/include/toolboxes/graph_toolbox.hpp @@ -166,6 +166,7 @@ class CCompressedSparsePattern { SU2_OMP_PARALLEL_(for schedule(static,roundUpDiv(getOuterSize(),omp_get_max_threads()))) for(Index_t k = 0; k < getOuterSize(); ++k) m_diagPtr(k) = findInnerIdx(k,k); + END_SU2_OMP_PARALLEL } /*! @@ -184,6 +185,7 @@ class CCompressedSparsePattern { assert(m_innerIdxTransp(k) != m_innerIdx.size() && "The pattern is not symmetric."); } } + END_SU2_OMP_PARALLEL } /*! diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp index 69f337368de..7c62cdd38c4 100644 --- a/Common/src/geometry/CGeometry.cpp +++ b/Common/src/geometry/CGeometry.cpp @@ -400,7 +400,9 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) { delete [] bufS_P2PRecv; bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] (); - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } @@ -504,6 +506,7 @@ void CGeometry::PostP2PRecvs(CGeometry *geometry, } } + END_SU2_OMP_MASTER } @@ -601,6 +604,7 @@ void CGeometry::PostP2PSends(CGeometry *geometry, } } + END_SU2_OMP_MASTER } @@ -736,6 +740,7 @@ void CGeometry::InitiateComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR /*--- Launch the point-to-point MPI send for this message. ---*/ @@ -782,6 +787,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, SU2_OMP_MASTER SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -839,6 +845,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR } /*--- Verify that all non-blocking point-to-point sends have finished. @@ -848,6 +855,7 @@ void CGeometry::CompleteComms(CGeometry *geometry, #ifdef HAVE_MPI SU2_OMP_MASTER SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER @@ -1226,7 +1234,9 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) { delete [] bufS_PeriodicRecv; bufS_PeriodicRecv = new unsigned short[nRecv] (); - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CGeometry::PostPeriodicRecvs(CGeometry *geometry, @@ -1283,6 +1293,7 @@ void CGeometry::PostPeriodicRecvs(CGeometry *geometry, } } + END_SU2_OMP_MASTER #endif @@ -1337,7 +1348,8 @@ void CGeometry::PostPeriodicSends(CGeometry *geometry, CURRENT_FUNCTION); break; } - } // end master + } + END_SU2_OMP_MASTER #else /*--- Copy my own rank's data into the recv buffer directly in serial. ---*/ @@ -3159,6 +3171,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, cg_elem[nDim*iElem+iDim] = 0.0; vol_elem[iElem] = 0.0; } + END_SU2_OMP_FOR /*--- Populate ---*/ SU2_OMP_FOR_STAT(256) @@ -3168,6 +3181,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, cg_elem[nDim*iElem_global+iDim] = elem[iElem]->GetCG(iDim); vol_elem[iElem_global] = elem[iElem]->GetVolume(); } + END_SU2_OMP_FOR #ifdef HAVE_MPI /*--- Account for the duplication introduced by the halo elements and the @@ -3175,10 +3189,12 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElemGetGlobalIndex()] = 1; + END_SU2_OMP_FOR /*--- Share with all processors ---*/ SU2_OMP_MASTER @@ -3195,6 +3211,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm()); halo_detect.swap(char_buffer); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER SU2_OMP_FOR_STAT(256) @@ -3204,6 +3221,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, cg_elem[nDim*iElem+iDim] /= numRepeat; vol_elem[iElem] /= numRepeat; } + END_SU2_OMP_FOR #endif /*--- SECOND: Each processor performs the average for its elements. For each @@ -3223,11 +3241,13 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElemGetGlobalIndex()] = values[iElem]; + END_SU2_OMP_FOR #ifdef HAVE_MPI /*--- Share with all processors ---*/ @@ -3237,6 +3257,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); swap(buffer, work_values); delete [] buffer; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Account for duplication ---*/ @@ -3245,6 +3266,7 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, su2double numRepeat = halo_detect[iElem]; work_values[iElem] /= numRepeat; } + END_SU2_OMP_FOR #endif /*--- Filter ---*/ @@ -3308,9 +3330,11 @@ void CGeometry::FilterValuesAtElementCG(const vector &filter_radius, SU2_MPI::Error("Unknown type of filter kernel",CURRENT_FUNCTION); } } + END_SU2_OMP_FOR } - } // end OpenMP parallel section + } + END_SU2_OMP_PARALLEL limited_searches /= kernels.size(); @@ -3342,13 +3366,16 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector &neighbour SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElemGetGlobalIndex(); nFaces_elem[iElem_global] = elem[iElem]->GetnFaces(); } + END_SU2_OMP_FOR } + END_SU2_OMP_PARALLEL #ifdef HAVE_MPI /*--- Share with all processors ---*/ { @@ -3378,6 +3405,7 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector &neighbour /*--- Initialize ---*/ SU2_OMP_FOR_STAT(256) for(auto iElem=0ul; iElem &neighbour } } } + END_SU2_OMP_FOR } + END_SU2_OMP_PARALLEL #ifdef HAVE_MPI /*--- Share with all processors ---*/ { @@ -3523,6 +3553,7 @@ void CGeometry::SetElemVolume() if(nDim==2) elem[iElem]->SetVolume(element->ComputeArea()); else elem[iElem]->SetVolume(element->ComputeVolume()); } + END_SU2_OMP_FOR delete elements[0]; delete elements[1]; @@ -3531,7 +3562,8 @@ void CGeometry::SetElemVolume() delete elements[3]; } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CGeometry::SetGeometryPlanes(CConfig *config) { diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp index f6ba9222211..341d8cac8b2 100644 --- a/Common/src/geometry/CMultiGridGeometry.cpp +++ b/Common/src/geometry/CMultiGridGeometry.cpp @@ -1142,7 +1142,9 @@ void CMultiGridGeometry::SetControlVolume(CConfig *config, CGeometry *fine_grid, } } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_grid, unsigned short action) { @@ -1184,7 +1186,9 @@ void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_ if (Area == 0.0) for (iDim = 0; iDim < nDim; iDim++) NormalFace[iDim] = EPS*EPS; } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CMultiGridGeometry::SetCoord(CGeometry *geometry) { @@ -1202,6 +1206,7 @@ void CMultiGridGeometry::SetCoord(CGeometry *geometry) { } nodes->SetCoord(Point_Coarse, Coordinates); } + END_SU2_OMP_FOR } void CMultiGridGeometry::SetMultiGridWallHeatFlux(CGeometry *geometry, unsigned short val_marker){ @@ -1320,6 +1325,7 @@ void CMultiGridGeometry::SetRestricted_GridVelocity(CGeometry *fine_mesh, CConfi for (unsigned short iDim = 0; iDim < nDim; iDim++) nodes->SetGridVel(Point_Coarse, iDim, Grid_Vel[iDim]); } + END_SU2_OMP_FOR } diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 6ede288f986..3b37d54f1cc 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -4375,7 +4375,10 @@ void CPhysicalGeometry::Check_IntElem_Orientation(const CConfig *config) { } } - }} // end SU2_OMP_PARALLEL + } + END_SU2_OMP_FOR + } + END_SU2_OMP_PARALLEL auto reduce = [](unsigned long& val) { unsigned long tmp = val; @@ -4522,7 +4525,10 @@ void CPhysicalGeometry::Check_BoundElem_Orientation(const CConfig *config) { } } } - }} // end SU2_OMP_PARALLEL + END_SU2_OMP_FOR + } + } + END_SU2_OMP_PARALLEL auto reduce = [](unsigned long& val) { unsigned long tmp = val; @@ -4698,6 +4704,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() { } nodes->SetElems(elems); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Loop over all the points ---*/ @@ -4734,11 +4741,14 @@ void CPhysicalGeometry::SetPoint_Connectivity() { /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel. ---*/ nodes->SetnNeighbor(iPoint, points[iPoint].size()); } + END_SU2_OMP_FOR SU2_OMP_MASTER nodes->SetPoints(points); + END_SU2_OMP_MASTER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CPhysicalGeometry::SetRCM_Ordering(CConfig *config) { @@ -6681,6 +6691,7 @@ void CPhysicalGeometry::SetMaxLength(CConfig* config) { max_delta = GeometryToolbox::Distance(nDim, Coord_i, Coord_j); nodes->SetMaxLength(iPoint, max_delta); } + END_SU2_OMP_FOR InitiateComms(this, config, MAX_LENGTH); CompleteComms(this, config, MAX_LENGTH); @@ -7561,10 +7572,12 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) SU2_OMP_FOR_STAT(1024) for (auto iEdge = 0ul; iEdge < nEdge; iEdge++) edges->SetNormal(iEdge, ZeroArea); + END_SU2_OMP_FOR SU2_OMP_FOR_STAT(1024) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) nodes->SetVolume(iPoint, 0.0); + END_SU2_OMP_FOR } SU2_OMP_MASTER { /*--- The following is difficult to parallelize with threads. ---*/ @@ -7699,7 +7712,9 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) if (nDim == 3) cout <<"Volume of the computational grid: "<< DomainVolume <<"."<< endl; } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER /*--- Check if there is a normal with null area ---*/ SU2_OMP_FOR_STAT(1024) @@ -7708,6 +7723,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) su2double DefaultArea[MAXNDIM] = {EPS*EPS}; if (Area2 == 0.0) edges->SetNormal(iEdge, DefaultArea); } + END_SU2_OMP_FOR } void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned short action) { @@ -7719,6 +7735,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++) for (unsigned long iVertex = 0; iVertex < nVertex[iMarker]; iVertex++) vertex[iMarker][iVertex]->SetZeroValues(); + END_SU2_OMP_FOR } /*--- Loop over all the boundary elements ---*/ @@ -7784,6 +7801,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh AD::EndPreacc(); } } + END_SU2_OMP_FOR /*--- Check if there is a normal with null area ---*/ @@ -7795,6 +7813,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh if (Area2 == 0.0) vertex[iMarker][iVertex]->SetNormal(DefaultArea); } } + END_SU2_OMP_FOR } void CPhysicalGeometry::VisualizeControlVolume(const CConfig *config) const { @@ -11076,9 +11095,10 @@ void CPhysicalGeometry::SetWallDistance(const CConfig *config, CADTElemClass *Wa nodes->SetRoughnessHeight(iPoint, localRoughness); } } + END_SU2_OMP_FOR } - // end SU2_OMP_PARALLEL + END_SU2_OMP_PARALLEL } void CPhysicalGeometry::SetGlobalMarkerRoughness(const CConfig* config) { diff --git a/Common/src/interface_interpolation/CIsoparametric.cpp b/Common/src/interface_interpolation/CIsoparametric.cpp index c0589a2a88e..c60144035c2 100644 --- a/Common/src/interface_interpolation/CIsoparametric.cpp +++ b/Common/src/interface_interpolation/CIsoparametric.cpp @@ -253,13 +253,16 @@ void CIsoparametric::SetTransferCoeff(const CConfig* const* config) { } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { MaxDistance = max(MaxDistance, maxDist); ErrorCounter += errorCount; nGlobalVertexTarget += totalCount; } - } // end SU2_OMP_PARALLEL + END_SU2_OMP_CRITICAL + } + END_SU2_OMP_PARALLEL } // end nMarkerInt loop diff --git a/Common/src/interface_interpolation/CMirror.cpp b/Common/src/interface_interpolation/CMirror.cpp index 0ef8a55d751..e008720639f 100644 --- a/Common/src/interface_interpolation/CMirror.cpp +++ b/Common/src/interface_interpolation/CMirror.cpp @@ -231,7 +231,8 @@ void CMirror::SetTransferCoeff(const CConfig* const* config) { } } - } // end target loop + } + END_SU2_OMP_PARALLEL /*--- Free the heap allocations. ---*/ for (auto ptr : GlobalIndex) if (ptr != sendGlobalIndex.data()) delete [] ptr; diff --git a/Common/src/interface_interpolation/CNearestNeighbor.cpp b/Common/src/interface_interpolation/CNearestNeighbor.cpp index e29d893c1fe..91e25e75b89 100644 --- a/Common/src/interface_interpolation/CNearestNeighbor.cpp +++ b/Common/src/interface_interpolation/CNearestNeighbor.cpp @@ -158,13 +158,16 @@ void CNearestNeighbor::SetTransferCoeff(const CConfig* const* config) { target_vertex.coefficient[iDonor] = donorInfo[iDonor].dist/denom; } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { totalTargetPoints += numTarget; AvgDistance += avgDist; MaxDistance = max(MaxDistance, maxDist); } - } // end SU2_OMP_PARALLEL + END_SU2_OMP_CRITICAL + } + END_SU2_OMP_PARALLEL delete[] Buffer_Send_Coord; delete[] Buffer_Send_GlobalPoint; diff --git a/Common/src/interface_interpolation/CRadialBasisFunction.cpp b/Common/src/interface_interpolation/CRadialBasisFunction.cpp index 2b88464dea7..2c318414eb6 100644 --- a/Common/src/interface_interpolation/CRadialBasisFunction.cpp +++ b/Common/src/interface_interpolation/CRadialBasisFunction.cpp @@ -218,6 +218,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) { keepPolynomialRowVec[iMarkerInt], CinvTrucVec[iMarkerInt]); } } + END_SU2_OMP_PARALLEL /*--- Final loop over interface markers to compute the interpolation coefficients. ---*/ @@ -381,7 +382,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) { } } } // end target vertex loop - + END_SU2_OMP_FOR SU2_OMP_CRITICAL { totalDonorPoints += totalDonors; @@ -390,7 +391,9 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) { AvgCorrection += sumCorr; MaxCorrection = max(MaxCorrection, maxCorr); } - } // end SU2_OMP_PARALLEL + END_SU2_OMP_CRITICAL + } + END_SU2_OMP_PARALLEL /*--- Free global data that will no longer be used. ---*/ donorCoord.resize(0,0); diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 909b9542de8..1181b5b7212 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -97,11 +97,13 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi if(matrix != nullptr) { SU2_OMP_MASTER SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } if(nvar > MAXNVAR) { SU2_OMP_MASTER SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } /*--- Application of this matrix, FVM or FEM. ---*/ @@ -296,6 +298,7 @@ void CSysMatrixComms::Initiate(const CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) bufDSend[buf_offset+iVar] = x(iPoint,iVar); } + END_SU2_OMP_FOR break; } @@ -333,6 +336,7 @@ void CSysMatrixComms::Initiate(const CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) bufDSend[buf_offset+iVar] = x(iPoint,iVar); } + END_SU2_OMP_FOR break; } @@ -375,6 +379,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, SU2_OMP_MASTER SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -414,6 +419,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) x(iPoint,iVar) = CSysMatrix::template ActiveAssign(bufDRecv[buf_offset+iVar]); } + END_SU2_OMP_FOR break; } @@ -453,6 +459,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++) x(iPoint,iVar) += CSysMatrix::template ActiveAssign(bufDRecv[buf_offset+iVar]); } + END_SU2_OMP_FOR break; } @@ -469,6 +476,7 @@ void CSysMatrixComms::Complete(CSysVector& x, CGeometry *geometry, #ifdef HAVE_MPI SU2_OMP_MASTER SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER @@ -490,6 +498,7 @@ void CSysMatrix::SetValDiagonalZero() { for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint) for (auto index = 0ul; index < nVar*nEqn; ++index) matrix[dia_ptr[iPoint]*nVar*nEqn + index] = 0.0; + END_SU2_OMP_FOR } template @@ -603,10 +612,12 @@ void CSysMatrix::MatrixVectorProduct(const CSysVector & if ((nEqn != vec.GetNVar()) || (nVar != prod.GetNVar())) { SU2_OMP_MASTER SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } if (nPoint != prod.GetNBlk()) { SU2_OMP_MASTER SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } #endif @@ -620,6 +631,7 @@ void CSysMatrix::MatrixVectorProduct(const CSysVector & for (auto row_i = 0ul; row_i < nPointDomain; row_i++) { RowProduct(vec, row_i, &prod[row_i*nVar]); } + END_SU2_OMP_FOR /*--- MPI Parallelization. ---*/ @@ -639,10 +651,12 @@ void CSysMatrix::MatrixVectorProductTransposed(const CSysVector::MatrixVectorProductTransposed(const CSysVector::BuildJacobiPreconditioner(bool transpose) { SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar]), transpose); + END_SU2_OMP_FOR } @@ -690,6 +705,7 @@ void CSysMatrix::ComputeJacobiPreconditioner(const CSysVector::BuildILUPreconditioner(bool transposed) { SU2_OMP_FOR_STAT(omp_light_size) for (auto iVar = 0ul; iVar < nnz*nVar*nVar; ++iVar) ILU_matrix[iVar] = matrix[iVar]; + END_SU2_OMP_FOR } else { /*--- ILUn clear the ILU matrix first, for ILU0^T @@ -715,6 +732,7 @@ void CSysMatrix::BuildILUPreconditioner(bool transposed) { SU2_OMP_FOR_STAT(omp_light_size) for (auto iVar = 0ul; iVar < nnz_ilu*nVar*nVar; iVar++) ILU_matrix[iVar] = 0.0; + END_SU2_OMP_FOR } /*--- Transposed or ILUn, traverse matrix to access its blocks @@ -730,6 +748,7 @@ void CSysMatrix::BuildILUPreconditioner(bool transposed) { } } } + END_SU2_OMP_FOR } /*--- Transform system in Upper Matrix ---*/ @@ -804,6 +823,7 @@ void CSysMatrix::BuildILUPreconditioner(bool transposed) { InverseDiagonalBlock_ILUMatrix(end-1, &invM[(end-1)*nVar*nVar]); } + END_SU2_OMP_FOR } @@ -857,6 +877,7 @@ void CSysMatrix::ComputeILUPreconditioner(const CSysVector::ComputeLU_SGSPreconditioner(const CSysVector::ComputeLU_SGSPreconditioner(const CSysVector::ComputeLineletPreconditioner(const CSysVector::ComputeLineletPreconditioner(const CSysVector::ComputeResidual(const CSysVector & sol, RowProduct(sol, iPoint, aux_vec); VectorSubtraction(aux_vec, &f[iPoint*nVar], &res[iPoint*nVar]); } + END_SU2_OMP_FOR } template @@ -1315,6 +1341,7 @@ void CSysMatrix::SetDiagonalAsColumnSum() { if (block_ji != block_ii) MatrixSubtraction(block_ii, block_ji, block_ii); } } + END_SU2_OMP_FOR } template @@ -1328,11 +1355,13 @@ void CSysMatrix::MatrixMatrixAddition(ScalarType alpha, const CSysMa if (!ok) { SU2_OMP_MASTER SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } SU2_OMP_FOR_STAT(omp_light_size) for (auto i = 0ul; i < nnz*nVar*nEqn; ++i) matrix[i] += alpha*B.matrix[i]; + END_SU2_OMP_FOR } @@ -1346,10 +1375,12 @@ void CSysMatrix::BuildPastixPreconditioner(CGeometry *geometry, cons pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix); pastix_wrapper.Factorize(geometry, config, kind_fact, transposed); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER #else SU2_OMP_MASTER SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION); + END_SU2_OMP_MASTER #endif } @@ -1360,6 +1391,7 @@ void CSysMatrix::ComputePastixPreconditioner(const CSysVector::ComputePastixPreconditioner(const CSysVector::ModGramSchmidt(int i, su2matrix& Hsbg, /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/ SU2_OMP_MASTER SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } /*--- Begin main Gram-Schmidt loop ---*/ @@ -211,6 +212,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & if (m < 1) { SU2_OMP_MASTER SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } /*--- Allocate if not allocated yet, only one thread can @@ -230,6 +232,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & cg_ready = true; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -351,11 +354,13 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector 5000) { SU2_OMP_MASTER SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } /*--- Allocate if not allocated yet ---*/ @@ -368,6 +373,7 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::BCGSTAB_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVector::Solve(CSysMatrix & Jacobian, co Residual = residual; Iterations = IterLinSol; } + END_SU2_OMP_MASTER HandleTemporariesOut(LinSysSol); @@ -942,6 +953,7 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co AD::FuncHelper->addUserData(config); AD::FuncHelper->addUserData(this); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); @@ -1073,6 +1085,7 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, SU2_OMP_MASTER Iterations = IterLinSol; + END_SU2_OMP_MASTER return IterLinSol; diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp index 39eb711d3ad..33f4ae14c78 100644 --- a/Common/src/linear_algebra/CSysSolve_b.cpp +++ b/Common/src/linear_algebra/CSysSolve_b.cpp @@ -64,6 +64,7 @@ void CSysSolve_b::Solve_b(const codi::RealReverse::Real* x, codi::Re (*LinSysSol_b)[i] = 0.0; } } + END_SU2_OMP_MASTER SU2_OMP_BARRIER solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config); @@ -75,6 +76,7 @@ void CSysSolve_b::Solve_b(const codi::RealReverse::Real* x, codi::Re x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i)); } } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp index 0233daa886c..1522b49f208 100644 --- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp +++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp @@ -127,6 +127,7 @@ void computeGradientsGreenGauss(CSolver* solver, AD::EndPreacc(); } + END_SU2_OMP_FOR /*--- Add boundary fluxes. ---*/ @@ -160,6 +161,7 @@ void computeGradientsGreenGauss(CSolver* solver, gradient(iPoint, iVar, iDim) -= flux * area[iDim]; } } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp index 6cf2e739a7b..bd32f9b9542 100644 --- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp +++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp @@ -284,6 +284,7 @@ void computeGradientsLeastSquares(CSolver* solver, solveLeastSquares(iPoint, varBegin, varEnd, Rmatrix, gradient); } } + END_SU2_OMP_FOR /*--- Correct the gradient values across any periodic boundaries. ---*/ @@ -300,6 +301,7 @@ void computeGradientsLeastSquares(CSolver* solver, SU2_OMP_FOR_DYN(chunkSize) for (size_t iPoint = 0; iPoint < nPointDomain; ++iPoint) solveLeastSquares(iPoint, varBegin, varEnd, Rmatrix, gradient); + END_SU2_OMP_FOR } /*--- If no solver was provided we do not communicate ---*/ diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp index 4450a71898c..3f2582aae84 100644 --- a/SU2_CFD/include/integration/CNewtonIntegration.hpp +++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp @@ -116,6 +116,7 @@ class CNewtonIntegration final : public CIntegration { inline void SetSolutionResult(CSysVector& x) const { CNEWTON_PARFOR for (auto i = 0ul; i < x.GetLocSize(); ++i) x[i] = LinSysSol[i]; + END_CNEWTON_PARFOR } /*--- Preconditioner objects for each active solver. ---*/ @@ -129,11 +130,13 @@ class CNewtonIntegration final : public CIntegration { unsigned long iters, Scalar& eps) const { CNEWTON_PARFOR for (auto i = 0ul; i < u.GetLocSize(); ++i) precondIn[i] = u[i]; + END_CNEWTON_PARFOR iters = Preconditioner_impl(precondIn, precondOut, iters, eps); CNEWTON_PARFOR for (auto i = 0ul; i < u.GetLocSize(); ++i) v[i] = precondOut[i]; + END_CNEWTON_PARFOR SU2_OMP_BARRIER return iters; diff --git a/SU2_CFD/include/limiters/CLimiterDetails.hpp b/SU2_CFD/include/limiters/CLimiterDetails.hpp index d605c668b83..3cfa4f3febf 100644 --- a/SU2_CFD/include/limiters/CLimiterDetails.hpp +++ b/SU2_CFD/include/limiters/CLimiterDetails.hpp @@ -177,6 +177,7 @@ struct CLimiterDetails sharedMin.resize(varEnd) = largeNum; sharedMax.resize(varEnd) =-largeNum; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Per thread reduction. ---*/ @@ -194,6 +195,7 @@ struct CLimiterDetails localMax(iVar) = max(localMax(iVar), field(iPoint, iVar)); } } + END_SU2_OMP_FOR /*--- Per rank reduction. ---*/ @@ -203,6 +205,7 @@ struct CLimiterDetails sharedMin(iVar) = min(sharedMin(iVar), localMin(iVar)); sharedMax(iVar) = max(sharedMax(iVar), localMax(iVar)); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Global reduction. ---*/ @@ -215,6 +218,7 @@ struct CLimiterDetails localMax = sharedMax; SU2_MPI::Allreduce(localMax.data(), sharedMax.data(), varEnd, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Compute eps^2 (each thread has its own copy of it). ---*/ diff --git a/SU2_CFD/include/limiters/computeLimiters.hpp b/SU2_CFD/include/limiters/computeLimiters.hpp index a54832af6b9..f7d55cef0e7 100644 --- a/SU2_CFD/include/limiters/computeLimiters.hpp +++ b/SU2_CFD/include/limiters/computeLimiters.hpp @@ -68,6 +68,7 @@ if (geometry.GetnDim() == 2) {\ for(size_t iPoint = 0; iPoint < geometry.GetnPoint(); ++iPoint) for(size_t iVar = varBegin; iVar < varEnd; ++iVar) limiter(iPoint, iVar) = 1.0; + END_SU2_OMP_FOR break; } case BARTH_JESPERSEN: diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp index ae43d10e07a..2a68406a1f8 100644 --- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp +++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp @@ -115,6 +115,7 @@ void computeLimiters_impl(CSolver* solver, for (size_t iPoint = 0; iPoint < nPoint; ++iPoint) for (size_t iVar = varBegin; iVar < varEnd; ++iVar) fieldMax(iPoint,iVar) = fieldMin(iPoint,iVar) = field(iPoint,iVar); + END_SU2_OMP_FOR for (size_t iPeriodic = 1; iPeriodic <= config.GetnMarker_Periodic()/2; ++iPeriodic) { @@ -215,6 +216,7 @@ void computeLimiters_impl(CSolver* solver, AD::EndPreacc(); } + END_SU2_OMP_FOR /*--- Account for periodic effects, take the minimum limiter on each periodic pair. ---*/ if (periodic) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp index 251f494f8ed..dde3885682b 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp @@ -324,6 +324,7 @@ class CFVMFlowSolverBase : public CSolver { Max_Delta_Time = 0.0; Global_Delta_UnstTimeND = 1e30; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Loop domain points. ---*/ @@ -377,6 +378,7 @@ class CFVMFlowSolverBase : public CSolver { } } + END_SU2_OMP_FOR /*--- Loop boundary edges ---*/ @@ -419,6 +421,7 @@ class CFVMFlowSolverBase : public CSolver { Lambda = lambdaVisc(*nodes,iPoint) * Area2; nodes->AddMax_Lambda_Visc(iPoint, Lambda); } + END_SU2_OMP_FOR } } @@ -449,6 +452,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->SetDelta_Time(iPoint,0.0); } } + END_SU2_OMP_FOR /*--- Min/max over threads. ---*/ SU2_OMP_CRITICAL { @@ -456,6 +460,7 @@ class CFVMFlowSolverBase : public CSolver { Max_Delta_Time = max(Max_Delta_Time, maxDt); Global_Delta_Time = Min_Delta_Time; } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER } @@ -470,6 +475,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_MPI::Allreduce(&Max_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); Max_Delta_Time = rbuf_time; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- For exact time solution use the minimum delta time of the whole mesh. ---*/ @@ -490,6 +496,7 @@ class CFVMFlowSolverBase : public CSolver { config->SetDelta_UnstTimeND(Global_Delta_Time); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Sets the regular CFL equal to the unsteady CFL. ---*/ @@ -499,6 +506,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->SetLocalCFL(iPoint, config->GetUnst_CFL()); nodes->SetDelta_Time(iPoint, Global_Delta_Time); } + END_SU2_OMP_FOR } @@ -513,8 +521,10 @@ class CFVMFlowSolverBase : public CSolver { for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) { glbDtND = min(glbDtND, config->GetUnst_CFL()*Global_Delta_Time / nodes->GetLocalCFL(iPoint)); } + END_SU2_OMP_FOR SU2_OMP_CRITICAL Global_Delta_UnstTimeND = min(Global_Delta_UnstTimeND, glbDtND); + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -524,6 +534,7 @@ class CFVMFlowSolverBase : public CSolver { config->SetDelta_UnstTimeND(Global_Delta_UnstTimeND); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -535,6 +546,7 @@ class CFVMFlowSolverBase : public CSolver { su2double dt = min((2.0/3.0)*config->GetDelta_UnstTimeND(), nodes->GetDelta_Time(iPoint)); nodes->SetDelta_Time(iPoint, dt); } + END_SU2_OMP_FOR } } @@ -585,6 +597,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed); } } + END_SU2_OMP_FOR /*--- Loop boundary edges ---*/ @@ -619,6 +632,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed); } + END_SU2_OMP_FOR } } @@ -681,6 +695,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]); } } + END_SU2_OMP_FOR if (isPeriodic) { /*--- Correct the sensor values across any periodic boundaries. ---*/ @@ -695,6 +710,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]); + END_SU2_OMP_FOR } /*--- MPI parallelization ---*/ @@ -735,6 +751,7 @@ class CFVMFlowSolverBase : public CSolver { SetRes_RMS(iVar, 0.0); SetRes_Max(iVar, 0.0, 0); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0}; @@ -797,12 +814,14 @@ class CFVMFlowSolverBase : public CSolver { } } } + END_SU2_OMP_FOR /*--- Reduce residual information over all threads in this rank. ---*/ SU2_OMP_CRITICAL for (unsigned short iVar = 0; iVar < nVar; iVar++) { AddRes_RMS(iVar, resRMS[iVar]); AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER } @@ -821,6 +840,7 @@ class CFVMFlowSolverBase : public CSolver { ComputeVerificationError(geometry, config); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -860,6 +880,7 @@ class CFVMFlowSolverBase : public CSolver { SetRes_RMS(iVar, 0.0); SetRes_Max(iVar, 0.0, 0); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0}; @@ -889,6 +910,7 @@ class CFVMFlowSolverBase : public CSolver { Jacobian.SetVal2Diag(iPoint, 1.0); } } + END_SU2_OMP_FOR } /*--- Right hand side of the system (-Residual) and initial guess (x = 0) ---*/ @@ -921,16 +943,19 @@ class CFVMFlowSolverBase : public CSolver { } } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL for (unsigned short iVar = 0; iVar < nVar; iVar++) { AddRes_RMS(iVar, resRMS[iVar]); AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Compute the root mean square residual ---*/ SU2_OMP_MASTER SetResidual_RMS(geometry, config); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -952,6 +977,7 @@ class CFVMFlowSolverBase : public CSolver { nodes->AddSolution(iPoint, iVar, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint*nVar+iVar]); } } + END_SU2_OMP_FOR } for (unsigned short iPeriodic = 1; iPeriodic <= config->GetnMarker_Periodic()/2; iPeriodic++) { @@ -965,6 +991,7 @@ class CFVMFlowSolverBase : public CSolver { /*--- For verification cases, compute the global error metrics. ---*/ SU2_OMP_MASTER ComputeVerificationError(geometry, config); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -982,6 +1009,7 @@ class CFVMFlowSolverBase : public CSolver { StrainMag_Max = 0.0; Omega_Max = 0.0; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER su2double strainMax = 0.0, omegaMax = 0.0; @@ -1045,12 +1073,14 @@ class CFVMFlowSolverBase : public CSolver { AD::EndPreacc(); } + END_SU2_OMP_FOR if ((iMesh == MESH_0) && (config.GetComm_Level() == COMM_FULL)) { SU2_OMP_CRITICAL { StrainMag_Max = max(StrainMag_Max, strainMax); Omega_Max = max(Omega_Max, omegaMax); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER { @@ -1060,6 +1090,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_MPI::Allreduce(&MyStrainMag_Max, &StrainMag_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&MyOmega_Max, &Omega_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index adaba33241d..d62706df237 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -572,6 +572,7 @@ void CFVMFlowSolverBase::ComputeUnderRelaxationFactor(const CConfig* confi nodes->SetUnderRelaxation(iPoint, localUnderRelaxation); } + END_SU2_OMP_FOR } template @@ -586,6 +587,7 @@ void CFVMFlowSolverBase::ImplicitEuler_Iteration(CGeometry *geometry, CSol LinSysRes.SetBlock_Zero(iPoint); LinSysSol.SetBlock_Zero(iPoint); } + END_SU2_OMP_FOR auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config); @@ -593,6 +595,7 @@ void CFVMFlowSolverBase::ImplicitEuler_Iteration(CGeometry *geometry, CSol SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER CompleteImplicitIteration(geometry, nullptr, config); @@ -848,7 +851,8 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** SU2_MPI::Error(string("The solution file ") + restart_filename + string(" doesn't match with the mesh file!\n") + string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); } - } // end SU2_OMP_MASTER + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Update the geometry for flows on deforming meshes ---*/ @@ -918,6 +922,7 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** } solver[iMesh][FLOW_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse); } + END_SU2_OMP_FOR solver[iMesh][FLOW_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION); solver[iMesh][FLOW_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION); @@ -941,7 +946,8 @@ void CFVMFlowSolverBase::LoadRestart_impl(CGeometry **geometry, CSolver ** delete [] Restart_Vars; Restart_Vars = nullptr; delete [] Restart_Data; Restart_Data = nullptr; - } // end SU2_OMP_MASTER + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -987,6 +993,7 @@ void CFVMFlowSolverBase::SetInitialCondition(CGeometry **geometry, CSolver but this is not necessary. */ VerificationSolution->GetInitialCondition(coor, solDOF); } + END_SU2_OMP_FOR } } @@ -996,7 +1003,8 @@ void CFVMFlowSolverBase::SetInitialCondition(CGeometry **geometry, CSolver PushSolutionBackInTime(TimeIter, restart, rans, solver_container, geometry, config); } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1281,6 +1289,7 @@ void CFVMFlowSolverBase::BC_Sym_Plane(CGeometry* geometry, CSolver** solve } // if viscous } // if GetDomain } // for iVertex + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ for (iVar = 0; iVar < nPrimVarGrad; iVar++) delete[] Grad_Reflected[iVar]; @@ -1462,6 +1471,7 @@ void CFVMFlowSolverBase::BC_Fluid_Interface(CGeometry* geometry, } } } + END_SU2_OMP_FOR } } @@ -1524,6 +1534,7 @@ void CFVMFlowSolverBase::BC_Custom(CGeometry* geometry, CSolver** solver_c } } } + END_SU2_OMP_FOR } else { /* The user must specify the custom BC's here. */ @@ -1558,6 +1569,7 @@ void CFVMFlowSolverBase::EdgeFluxResidual(const CGeometry *geometry, edgeNumerics->ComputeFlux(iEdge, *config, *geometry, *nodes, UpdateType::COLORING, mask, LinSysRes, Jacobian); } } + END_SU2_OMP_FOR } if (ReducerStrategy) { @@ -1583,6 +1595,7 @@ void CFVMFlowSolverBase::SumEdgeFluxes(const CGeometry* geometry) { LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge)); } } + END_SU2_OMP_FOR } template @@ -1646,6 +1659,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep)); } } + END_SU2_OMP_FOR } @@ -1684,6 +1698,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } } + END_SU2_OMP_FOR /*--- Loop over the boundary edges ---*/ @@ -1716,6 +1731,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry for (iVar = 0; iVar < nVar; iVar++) LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } + END_SU2_OMP_FOR } } @@ -1758,6 +1774,7 @@ void CFVMFlowSolverBase::SetResidual_DualTime(CGeometry *geometry if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep)); } } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/include/solvers/CTurbSASolver.hpp b/SU2_CFD/include/solvers/CTurbSASolver.hpp index 320cc555715..2cca80813c0 100644 --- a/SU2_CFD/include/solvers/CTurbSASolver.hpp +++ b/SU2_CFD/include/solvers/CTurbSASolver.hpp @@ -373,6 +373,7 @@ class CTurbSASolver final : public CTurbSolver { SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) nodes->SetSolution(iPoint, 0, nu_tilde_Inf); + END_SU2_OMP_FOR } /*! diff --git a/SU2_CFD/include/solvers/CTurbSSTSolver.hpp b/SU2_CFD/include/solvers/CTurbSSTSolver.hpp index 7d38a8050aa..8fe2dd73401 100644 --- a/SU2_CFD/include/solvers/CTurbSSTSolver.hpp +++ b/SU2_CFD/include/solvers/CTurbSSTSolver.hpp @@ -245,6 +245,7 @@ class CTurbSSTSolver final : public CTurbSolver { nodes->SetSolution(iPoint, 0, kine_Inf); nodes->SetSolution(iPoint, 1, omega_Inf); } + END_SU2_OMP_FOR } /*! diff --git a/SU2_CFD/src/drivers/CDriver.cpp b/SU2_CFD/src/drivers/CDriver.cpp index 32b0e871c8e..7b7e35042c7 100644 --- a/SU2_CFD/src/drivers/CDriver.cpp +++ b/SU2_CFD/src/drivers/CDriver.cpp @@ -815,6 +815,7 @@ void CDriver::Geometrical_Preprocessing_FVM(CConfig *config, CGeometry **&geomet geometry[MESH_0]->SetControlVolume(config, ALLOCATE); geometry[MESH_0]->SetBoundControlVolume(config, ALLOCATE); } + END_SU2_OMP_PARALLEL /*--- Visualize a dual control volume if requested ---*/ @@ -1280,6 +1281,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry, if (euler || ns) { SU2_OMP_PARALLEL_(if(solver[MESH_0][FLOW_SOL]->GetHasHybridParallel())) solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); + END_SU2_OMP_PARALLEL } if (NEMO_euler || NEMO_ns) { solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); @@ -1287,6 +1289,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry, if (turbulent) { SU2_OMP_PARALLEL_(if(solver[MESH_0][TURB_SOL]->GetHasHybridParallel())) solver[MESH_0][TURB_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); + END_SU2_OMP_PARALLEL } if (config->AddRadiation()) { solver[MESH_0][RAD_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo); @@ -1598,6 +1601,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Convective scheme not implemented (template_solver).", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1624,6 +1628,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_CONVECTIVE : SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_CENTERED : @@ -1643,6 +1648,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid centered scheme or not implemented.\n Currently, only JST and LAX-FRIEDRICH are available for incompressible flows.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } for (iMGlevel = 1; iMGlevel <= config->GetnMGLevels(); iMGlevel++) @@ -1761,6 +1767,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1777,6 +1784,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid upwind scheme or not implemented.\n Currently, only FDS is available for incompressible flows.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } } @@ -1785,6 +1793,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the Euler / Navier-Stokes equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1885,6 +1894,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_CONVECTIVE : SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_CENTERED : @@ -1895,6 +1905,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid centered scheme or not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1948,6 +1959,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -1957,6 +1969,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the NEMO Euler / Navier-Stokes equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2028,6 +2041,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Riemann solver not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2043,6 +2057,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_UPWIND: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_UPWIND : for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) { @@ -2055,6 +2070,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the turbulence equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2106,6 +2122,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_UPWIND: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_UPWIND: for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) { @@ -2115,6 +2132,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the transition equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2159,6 +2177,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the heat transfer equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } } @@ -2184,6 +2203,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol if (incompressible) SU2_OMP_MASTER SU2_MPI::Error("Convective schemes not implemented for incompressible continuous adjoint.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER /*--- Definition of the convective scheme for each equation and mesh level ---*/ @@ -2191,6 +2211,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NO_CONVECTIVE: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJFLOW option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_CENTERED : @@ -2205,6 +2226,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Centered scheme not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2233,6 +2255,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Upwind scheme not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } } @@ -2241,6 +2264,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Invalid convective scheme for the continuous adjoint Euler / Navier-Stokes equations.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2304,12 +2328,14 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol if (!spalart_allmaras) SU2_OMP_MASTER SU2_MPI::Error("Only the SA turbulence model can be used with the continuous adjoint solver.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER /*--- Definition of the convective scheme for each equation and mesh level ---*/ switch (config->GetKind_ConvNumScheme_AdjTurb()) { case NO_CONVECTIVE: SU2_OMP_MASTER SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJTURB option.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case SPACE_UPWIND : for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) @@ -2318,6 +2344,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol default: SU2_OMP_MASTER SU2_MPI::Error("Convective scheme not implemented (adjoint turbulence).", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } @@ -2350,10 +2377,12 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case NEO_HOOKEAN: SU2_OMP_MASTER SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; default: SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } break; @@ -2362,6 +2391,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol case LINEAR_ELASTIC: SU2_OMP_MASTER SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; case NEO_HOOKEAN: if (config->GetMaterialCompressibility() == COMPRESSIBLE_MAT) { @@ -2369,6 +2399,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol } else { SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } break; case KNOWLES: @@ -2377,6 +2408,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol } else { SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } break; case IDEAL_DE: @@ -2385,17 +2417,20 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol } else { SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER } break; default: SU2_OMP_MASTER SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } break; default: SU2_OMP_MASTER SU2_MPI::Error("Solver not implemented.", CURRENT_FUNCTION); + END_SU2_OMP_MASTER break; } diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp index 729eb41958b..5c6d7647ede 100644 --- a/SU2_CFD/src/integration/CIntegration.cpp +++ b/SU2_CFD/src/integration/CIntegration.cpp @@ -223,7 +223,8 @@ void CIntegration::SetDualTime_Geometry(CGeometry *geometry, CSolver *mesh_solve if ((iMesh==MESH_0) && config->GetDeform_Mesh()) mesh_solver->SetDualTime_Mesh(); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver, const CConfig *config, unsigned short iMesh) { @@ -236,6 +237,7 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver SU2_OMP_MASTER solver->ResetCFLAdapt(); + END_SU2_OMP_MASTER SU2_OMP_BARRIER SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPoint(), omp_get_num_threads())) @@ -247,6 +249,8 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver /*--- Initialize the local CFL number ---*/ solver->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(iMesh)); } + END_SU2_OMP_FOR - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp index 3441fe82737..d5062c8c0cc 100644 --- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp +++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp @@ -94,6 +94,7 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry, SU2_OMP_MASTER config[iZone]->SubtractFinestMesh(); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -120,7 +121,8 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry, numerics_container[iZone][iInst], config[iZone], FinestMesh, RunTime_EqSystem, &monitor); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -334,6 +336,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS for (iVar = 0; iVar < nVar; iVar++) sol_coarse->GetNodes()->SetSolution_Old(Point_Coarse,Solution); } + END_SU2_OMP_FOR delete [] Solution; @@ -354,6 +357,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS sol_coarse->GetNodes()->SetVelocity_Old(Point_Coarse, zero); } + END_SU2_OMP_FOR } } @@ -369,6 +373,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS sol_fine->LinSysRes.SetBlock(Point_Fine, sol_coarse->GetNodes()->GetSolution_Old(Point_Coarse)); } } + END_SU2_OMP_FOR } @@ -389,6 +394,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ Residual_Old = solver->LinSysRes.GetBlock(iPoint); solver->GetNodes()->SetResidual_Old(iPoint,Residual_Old); } + END_SU2_OMP_FOR /*--- Jacobi iterations. ---*/ @@ -408,6 +414,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ } } + END_SU2_OMP_FOR /*--- Loop over all mesh points (update residuals with the neighbor averages). ---*/ @@ -422,6 +429,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ for (iVar = 0; iVar < nVar; iVar++) solver->LinSysRes(iPoint,iVar) = (Residual_Old[iVar] + val_smooth_coeff*Residual_Sum[iVar])*factor; } + END_SU2_OMP_FOR /*--- Restore original residuals (without average) at boundary points. ---*/ @@ -435,6 +443,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_ Residual_Old = solver->GetNodes()->GetResidual_Old(iPoint); solver->LinSysRes.SetBlock(iPoint, Residual_Old); } + END_SU2_OMP_FOR } } @@ -462,6 +471,7 @@ void CMultiGridIntegration::SetProlongated_Correction(CSolver *sol_fine, CGeomet Solution_Fine[iVar] += factor*Residual_Fine[iVar]; } } + END_SU2_OMP_FOR /*--- MPI the new interpolated solution ---*/ @@ -482,6 +492,7 @@ void CMultiGridIntegration::SetProlongated_Solution(unsigned short RunTime_EqSys sol_fine->GetNodes()->SetSolution(Point_Fine, sol_coarse->GetNodes()->GetSolution(Point_Coarse)); } } + END_SU2_OMP_FOR } void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coarse, CGeometry *geo_fine, @@ -511,6 +522,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar } sol_coarse->GetNodes()->AddRes_TruncError(Point_Coarse, Residual); } + END_SU2_OMP_FOR delete [] Residual; @@ -521,6 +533,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode(); sol_coarse->GetNodes()->SetVel_ResTruncError_Zero(Point_Coarse); } + END_SU2_OMP_FOR } } @@ -528,6 +541,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar for (Point_Coarse = 0; Point_Coarse < geo_coarse->GetnPointDomain(); Point_Coarse++) { sol_coarse->GetNodes()->SubtractRes_TruncError(Point_Coarse, sol_coarse->LinSysRes.GetBlock(Point_Coarse)); } + END_SU2_OMP_FOR } @@ -536,6 +550,7 @@ void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solve SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++) solver->LinSysRes.AddBlock(iPoint, solver->GetNodes()->GetResTruncError(iPoint)); + END_SU2_OMP_FOR } @@ -575,6 +590,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst sol_coarse->GetNodes()->SetSolution(Point_Coarse, Solution); } + END_SU2_OMP_FOR delete [] Solution; @@ -609,6 +625,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst } } + END_SU2_OMP_FOR } } @@ -652,6 +669,7 @@ void CMultiGridIntegration::SetRestricted_Gradient(unsigned short RunTime_EqSyst } sol_coarse->GetNodes()->SetGradient(Point_Coarse,Gradient); } + END_SU2_OMP_FOR for (iVar = 0; iVar < nVar; iVar++) delete [] Gradient[iVar]; @@ -693,6 +711,7 @@ void CMultiGridIntegration::NonDimensional_Parameters(CGeometry **geometry, CSol numerics_container[FinestMesh][ADJFLOW_SOL][CONV_BOUND_TERM], config); break; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -719,6 +738,7 @@ void CMultiGridIntegration::Adjoint_Setup(CGeometry ****geometry, CSolver *****s solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CT(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CT()); solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CQ(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CQ()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Restrict solution and gradients to the coarse levels ---*/ diff --git a/SU2_CFD/src/integration/CNewtonIntegration.cpp b/SU2_CFD/src/integration/CNewtonIntegration.cpp index a9f3a804c61..c8d9c56d4e3 100644 --- a/SU2_CFD/src/integration/CNewtonIntegration.cpp +++ b/SU2_CFD/src/integration/CNewtonIntegration.cpp @@ -131,6 +131,7 @@ void CNewtonIntegration::PerturbSolution(const CSysVector& dir, Scalar m for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar) solvers[FLOW_SOL]->GetNodes()->AddSolution(iPoint,iVar, mag*dir(iPoint,iVar)); } + END_SU2_OMP_FOR } void CNewtonIntegration::ComputeResiduals(ResEvalType type) { @@ -140,6 +141,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) { if (type == EXPLICIT) { SU2_OMP_MASTER config->SetKind_TimeIntScheme(EULER_EXPLICIT); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -151,6 +153,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) { if (type == EXPLICIT) { SU2_OMP_MASTER config->SetKind_TimeIntScheme(TimeIntScheme); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -163,11 +166,13 @@ void CNewtonIntegration::ComputeFinDiffStep() { SU2_OMP_MASTER rmsSol = 0.0; + END_SU2_OMP_MASTER SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint) for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar) rmsSol_loc += pow(solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint,iVar), 2); + END_SU2_OMP_FOR atomicAdd(rmsSol_loc, rmsSol); @@ -177,6 +182,7 @@ void CNewtonIntegration::ComputeFinDiffStep() { SU2_MPI::Allreduce(&t, &rmsSol, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); finDiffStep = finDiffStepND * max(1.0, sqrt(SU2_TYPE::GetValue(rmsSol) / geometry->GetGlobal_nPointDomain())); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -212,6 +218,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** SU2_OMP_FOR_STAT(omp_chunk_size) for (auto i = 0ul; i < LinSysRes.GetNElmDomain(); ++i) LinSysRes[i] = SU2_TYPE::GetValue(solvers[FLOW_SOL]->LinSysRes[i]); + END_SU2_OMP_FOR su2double residual = 0.0; for (auto iVar = 0ul; iVar < LinSysRes.GetNVar(); ++iVar) @@ -226,6 +233,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** firstResidual = max(firstResidual, residual); if (startupIters) startupIters -= 1; } + END_SU2_OMP_MASTER SU2_OMP_BARRIER endStartup = (startupIters == 0) && (residual - firstResidual < startupResidual); } @@ -237,6 +245,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** if (!startupPeriod && tolRelaxFactor > 1 && fullTolResidual < 0.0) { SU2_OMP_MASTER firstResidual = max(firstResidual, residual); + END_SU2_OMP_MASTER SU2_OMP_BARRIER su2double x = (residual - firstResidual) / fullTolResidual; toleranceFactor = 1.0 + (tolRelaxFactor-1)*max(0.0, 1.0-SU2_TYPE::GetValue(x)); @@ -267,6 +276,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** solvers[FLOW_SOL]->SetIterLinSolver(iter); solvers[FLOW_SOL]->SetResLinSolver(eps); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /// TODO: Clever back-tracking and CFL adaptation based on residual reduction. @@ -286,6 +296,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** solvers[FLOW_SOL]->Momentum_Forces(geometry, config); solvers[FLOW_SOL]->Friction_Forces(geometry, config); } + END_SU2_OMP_MASTER /*--- At the end of the startup period the CFL is reset to the initial value. ---*/ @@ -294,12 +305,15 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver ** startupPeriod = false; firstResidual = residual; } + END_SU2_OMP_MASTER SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < geometry->GetnPoint(); ++iPoint) solvers[FLOW_SOL]->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(MESH_0)); + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CNewtonIntegration::MatrixFreeProduct(const CSysVector& u, CSysVector& v) { @@ -328,6 +342,7 @@ void CNewtonIntegration::MatrixFreeProduct(const CSysVector& u, CSysVect v(iPoint,iVar) += SU2_TYPE::GetValue(delta) * u(iPoint,iVar); } } + END_SU2_OMP_FOR CSysMatrixComms::Initiate(v, geometry, config); CSysMatrixComms::Complete(v, geometry, config); @@ -350,6 +365,7 @@ void CNewtonIntegration::Preconditioner(const CSysVector& u, CSysVector< for (auto iVar = 0ul; iVar < u.GetNVar(); ++iVar) v(iPoint,iVar) = SU2_TYPE::GetValue(delta) * u(iPoint,iVar); } + END_SU2_OMP_FOR CSysMatrixComms::Initiate(v, geometry, config); CSysMatrixComms::Complete(v, geometry, config); diff --git a/SU2_CFD/src/integration/CSingleGridIntegration.cpp b/SU2_CFD/src/integration/CSingleGridIntegration.cpp index 88da5a71ef9..d2f0d4de49e 100644 --- a/SU2_CFD/src/integration/CSingleGridIntegration.cpp +++ b/SU2_CFD/src/integration/CSingleGridIntegration.cpp @@ -79,6 +79,7 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve if (RunTime_EqSystem == RUNTIME_HEAT_SYS) { SU2_OMP_MASTER solvers_fine[HEAT_SOL]->Heat_Fluxes(geometry_fine, solvers_fine, config[iZone]); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -105,8 +106,8 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve } - } // end SU2_OMP_PARALLEL - + } + END_SU2_OMP_PARALLEL } void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSystem, CSolver *sol_fine, CSolver *sol_coarse, @@ -141,6 +142,7 @@ void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSys sol_coarse->GetNodes()->SetSolution(Point_Coarse,Solution); } + END_SU2_OMP_FOR delete [] Solution; @@ -177,6 +179,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys sol_coarse->GetNodes()->SetmuT(Point_Coarse,EddyVisc); } + END_SU2_OMP_FOR /*--- Update solution at the no slip wall boundary, only the first variable (nu_tilde -in SA and SA_NEG- and k -in SST-), to guarantee that the eddy viscoisty @@ -192,6 +195,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode(); sol_coarse->GetNodes()->SetmuT(Point_Coarse,0.0); } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp index 2842768efef..d4fe65a2822 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp @@ -369,7 +369,8 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge break; } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL /*--- MPI dependencies. ---*/ diff --git a/SU2_CFD/src/iteration/CFluidIteration.cpp b/SU2_CFD/src/iteration/CFluidIteration.cpp index aeaf77f9621..7f67cea5cc5 100644 --- a/SU2_CFD/src/iteration/CFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CFluidIteration.cpp @@ -141,6 +141,7 @@ void CFluidIteration::Iterate(COutput* output, CIntegration**** integration, CGe SU2_OMP_PARALLEL solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->AdaptCFLNumber(geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone]); + END_SU2_OMP_PARALLEL } /*--- Call Dynamic mesh update if AEROELASTIC motion was specified ---*/ diff --git a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp index 4c47c6f3847..643bd7e5b94 100644 --- a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp +++ b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp @@ -333,6 +333,7 @@ void CFEAElasticity::ReadDV(const CConfig *config) { bool master_node = false; SU2_OMP_MASTER master_node = (rank == MASTER_NODE); + END_SU2_OMP_MASTER unsigned long index; diff --git a/SU2_CFD/src/python_wrapper_structure.cpp b/SU2_CFD/src/python_wrapper_structure.cpp index e324046844e..166a0c66c1a 100644 --- a/SU2_CFD/src/python_wrapper_structure.cpp +++ b/SU2_CFD/src/python_wrapper_structure.cpp @@ -602,6 +602,7 @@ void CSinglezoneDriver::SetInitialMesh() { /*--- Set the grid velocity for this coarse node. ---*/ geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetGridVel(iPoint, Grid_Vel); } + END_SU2_OMP_FOR /*--- Push back the volume. ---*/ geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_n(); geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_nM1(); @@ -610,6 +611,7 @@ void CSinglezoneDriver::SetInitialMesh() { solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n(); solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n1(); } + END_SU2_OMP_PARALLEL } void CDriver::BoundaryConditionsUpdate(){ diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 7cd1a186cb1..038b4ebd70d 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -617,6 +617,7 @@ void CEulerSolver::InstantiateEdgeNumerics(const CSolver* const* solver_containe "support vectorization.", CURRENT_FUNCTION); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1689,7 +1690,8 @@ void CEulerSolver::SetNondimensionalization(CConfig *config, unsigned short iMes GetFluidModel()->SetThermalConductivityModel(config); } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL Energy_FreeStreamND = GetFluidModel()->GetStaticEnergy() + 0.5*ModVel_FreeStreamND*ModVel_FreeStreamND; @@ -2025,12 +2027,14 @@ void CEulerSolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_c } } + END_SU2_OMP_FOR FlowNodes->Set_OldSolution(); } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2059,6 +2063,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con if (fixed_cl && !disc_adjoint && !cont_adjoint) { SU2_OMP_MASTER SetFarfield_AoA(geometry, solver_container, config, iMesh, Output); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2066,6 +2071,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER SU2_OMP_BARRIER SU2_OMP_ATOMIC @@ -2079,6 +2085,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); config->SetNonphysical_Points(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2087,6 +2094,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con if (engine) { SU2_OMP_MASTER GetPower_Properties(geometry, config, iMesh, Output); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2099,6 +2107,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con GetPower_Properties(geometry, config, iMesh, Output); SetActDisk_BCThrust(geometry, solver_container, config, iMesh, Output); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2107,6 +2116,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con if (nearfield) { SU2_OMP_MASTER Set_MPI_Nearfield(geometry, config); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2192,6 +2202,7 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c if (!physical) nonPhysicalPoints++; } + END_SU2_OMP_FOR return nonPhysicalPoints; } @@ -2276,6 +2287,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain unsigned long counter_local = 0; SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER /*--- Pick one numerics object per thread. ---*/ CNumerics* numerics = numerics_container[CONV_TERM + omp_get_thread_num()*MAX_TERMS]; @@ -2465,6 +2477,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -2487,6 +2500,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2591,6 +2605,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } if (rotating_frame) { @@ -2621,6 +2636,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (axisymmetric) { @@ -2697,6 +2713,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (gravity) { @@ -2718,6 +2735,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } @@ -2735,6 +2753,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes(iPoint,iVar) += Volume * nodes->GetHarmonicBalance_Source(iPoint,iVar); } } + END_SU2_OMP_FOR } if (windgust) { @@ -2765,6 +2784,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } /*--- Check if a verification solution is to be computed. ---*/ @@ -2795,6 +2815,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain LinSysRes(iPoint,iVar) -= sourceMan[iVar]*Volume; } } + END_SU2_OMP_FOR } } @@ -2865,6 +2886,7 @@ void CEulerSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *co nodes->AddUnd_Lapl(iPoint, nVar-1, Pressure_j-Pressure_i); } } + END_SU2_OMP_FOR /*--- Correct the Laplacian across any periodic boundaries. ---*/ @@ -2937,6 +2959,7 @@ void CEulerSolver::SetUpwind_Ducros_Sensor(CGeometry *geometry, CConfig *config) nodes->SetSensor(iPoint, Ducros_i); } + END_SU2_OMP_FOR InitiateComms(geometry, config, SENSOR); CompleteComms(geometry, config, SENSOR); @@ -5117,6 +5140,7 @@ void CEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -5608,6 +5632,7 @@ void CEulerSolver::BC_Riemann(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -6124,7 +6149,8 @@ void CEulerSolver::BC_TurboRiemann(CGeometry *geometry, CSolver **solver_contain } } } -} + END_SU2_OMP_FOR + } /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -7024,6 +7050,7 @@ void CEulerSolver::BC_Giles(CGeometry *geometry, CSolver **solver_container, CNu } } + END_SU2_OMP_FOR } /*--- Free locally allocated memory ---*/ @@ -7349,6 +7376,7 @@ void CEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -7525,6 +7553,7 @@ void CEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ delete [] Normal; @@ -7672,6 +7701,7 @@ void CEulerSolver::BC_Supersonic_Inlet(CGeometry *geometry, CSolver **solver_con } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -7797,6 +7827,7 @@ void CEulerSolver::BC_Supersonic_Outlet(CGeometry *geometry, CSolver **solver_co } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -8021,6 +8052,7 @@ void CEulerSolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_contai } } + END_SU2_OMP_FOR delete [] Normal; @@ -8275,6 +8307,7 @@ void CEulerSolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_conta } } + END_SU2_OMP_FOR delete [] Normal; @@ -8334,6 +8367,7 @@ void CEulerSolver::BC_Interface_Boundary(CGeometry *geometry, CSolver **solver_c } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -8397,6 +8431,7 @@ void CEulerSolver::BC_NearField_Boundary(CGeometry *geometry, CSolver **solver_c } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -8840,6 +8875,7 @@ void CEulerSolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, C } } + END_SU2_OMP_FOR /*--- Free locally allocated memory ---*/ @@ -9078,6 +9114,7 @@ void CEulerSolver::BC_ActDisk_VariableLoad(CGeometry *geometry, CSolver **solver if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } } + END_SU2_OMP_FOR } void CEulerSolver::PrintVerificationError(const CConfig *config) const { @@ -9131,6 +9168,7 @@ void CEulerSolver::SetFreeStream_Solution(const CConfig *config) { } nodes->SetSolution(iPoint,nVar-1, Density_Inf*Energy_Inf); } + END_SU2_OMP_FOR } void CEulerSolver::SetFreeStream_TurboSolution(CConfig *config) { diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp index d36e156a250..70bf7f110e4 100644 --- a/SU2_CFD/src/solvers/CFEASolver.cpp +++ b/SU2_CFD/src/solvers/CFEASolver.cpp @@ -116,6 +116,7 @@ CFEASolver::CFEASolver(CGeometry *geometry, CConfig *config) : CSolver() { } } } + END_SU2_OMP_PARALLEL /*--- Set element properties ---*/ Set_ElementProperties(geometry, config); @@ -680,6 +681,7 @@ void CFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, { LinSysSol.SetValZero(); } + END_SU2_OMP_PARALLEL /*--- Clear external forces. ---*/ nodes->Clear_SurfaceLoad_Res(); @@ -698,13 +700,16 @@ void CFEASolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_con SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) nodes->SetSolution(iPoint, zeros); + END_SU2_OMP_FOR } else { SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) nodes->SetSolution(iPoint, nodes->GetPrestretch(iPoint)); + END_SU2_OMP_FOR } - } // end parallel + } + END_SU2_OMP_PARALLEL } void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics, const CConfig *config) { @@ -789,10 +794,12 @@ void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics, } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -929,10 +936,12 @@ void CFEASolver::Compute_StiffMatrix_NodalStressRes(CGeometry *geometry, CNumeri } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1012,10 +1021,12 @@ void CFEASolver::Compute_MassMatrix(const CGeometry *geometry, CNumerics **numer } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL AD::EndPassive(wasActive); @@ -1091,6 +1102,7 @@ void CFEASolver::Compute_MassRes(const CGeometry *geometry, CNumerics **numerics } } // end iElem loop + END_SU2_OMP_FOR } // end color loop @@ -1180,10 +1192,12 @@ void CFEASolver::Compute_NodalStressRes(CGeometry *geometry, CNumerics **numeric } } // end iElem loop + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1221,6 +1235,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, nodes->SetStress_FEM(iPoint,iStress, 0.0); } } + END_SU2_OMP_FOR AD::EndPassive(wasActive); for(auto color : ElemColoring) { @@ -1308,6 +1323,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, AD::EndPassive(wasActive); } // end iElem loop + END_SU2_OMP_FOR atomicAdd(stressPen, StressPenalty); } // end color loop @@ -1326,12 +1342,15 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics, maxVonMises = max(maxVonMises, vms); } + END_SU2_OMP_FOR SU2_OMP_CRITICAL MaxVonMises_Stress = max(MaxVonMises_Stress, maxVonMises); + END_SU2_OMP_CRITICAL AD::EndPassive(wasActive); - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL /*--- Set the value of the MaxVonMises_Stress as the CFEA coeffient ---*/ SU2_MPI::Allreduce(&MaxVonMises_Stress, &Total_CFEA, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); @@ -1473,6 +1492,7 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con SU2_OMP_FOR_STAT(omp_chunk_size) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) nodes->Clear_BodyForces_Res(iPoint); + END_SU2_OMP_FOR for(auto color : ElemColoring) { @@ -1530,11 +1550,13 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con if (LockStrategy) omp_unset_lock(&UpdateLocks[indexNode[iNode]]); } - } // end iElem loop + } + END_SU2_OMP_FOR } // end color loop - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -1743,6 +1765,7 @@ CSysVector computeLinearResidual(const CSysMatrix& A, const CSysVector& b) { CSysVector r(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr); SU2_OMP_PARALLEL { A.ComputeResidual(x, b, r); } + END_SU2_OMP_PARALLEL return r; } @@ -1762,6 +1785,7 @@ CSysVector computeLinearResidual(const CSysMatrix& A, btmp.PassiveCopy(b); A.ComputeResidual(xtmp, btmp, r); } + END_SU2_OMP_PARALLEL return r; } @@ -1823,7 +1847,9 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics Conv_Check[1] = rtol; Conv_Check[2] = etol; } - } // end parallel + END_SU2_OMP_MASTER + } + END_SU2_OMP_PARALLEL } else { @@ -1859,18 +1885,22 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics } } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL for (auto iVar = 0ul; iVar < nVar; iVar++) { AddRes_RMS(iVar, resRMS[iVar]); AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Compute the root mean square residual. ---*/ SU2_OMP_MASTER SetResidual_RMS(geometry, config); + END_SU2_OMP_MASTER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2224,6 +2254,7 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics } } + END_SU2_OMP_FOR /*--- Dynamic contribution. ---*/ @@ -2255,13 +2286,15 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t) } } + END_SU2_OMP_FOR /*--- Add M*TimeRes_Aux to the residual. ---*/ Compute_MassRes(geometry, numerics, config); LinSysRes += TimeRes; } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2282,6 +2315,7 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig for (iVar = 0; iVar < nVar; iVar++) nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar)); } + END_SU2_OMP_FOR if (dynamic) { SU2_OMP_FOR_STAT(omp_chunk_size) @@ -2308,8 +2342,10 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig nodes->SetSolution_Vel(iPoint, iVar, sol); } } + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CConfig *config) { @@ -2327,6 +2363,7 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo nodes->SetSolution(iPoint, nodes->GetSolution_Pred(iPoint)); nodes->SetSolution_Pred_Old(iPoint, nodes->GetSolution(iPoint)); } + END_SU2_OMP_FOR if (dynamic) { SU2_OMP_FOR_STAT(omp_chunk_size) @@ -2353,9 +2390,11 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo nodes->SetSolution_Vel(iPoint, iVar, sol); } } + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2406,6 +2445,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics } } + END_SU2_OMP_FOR } /*--- Loads for dynamic problems. ---*/ @@ -2430,6 +2470,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t) } } + END_SU2_OMP_FOR /*--- Add M*TimeRes_Aux to the residual. ---*/ Compute_MassRes(geometry, numerics, config); @@ -2462,9 +2503,11 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics alpha_f * nodes->Get_FlowTraction_n(iPoint,iVar) ); } } + END_SU2_OMP_FOR } - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2476,6 +2519,7 @@ void CFEASolver::GeneralizedAlpha_UpdateDisp(const CGeometry *geometry, const CC for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) for (unsigned short iVar = 0; iVar < nVar; iVar++) nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar)); + END_SU2_OMP_PARALLEL } @@ -2530,6 +2574,7 @@ void CFEASolver::GeneralizedAlpha_UpdateSolution(const CGeometry *geometry, cons } } + END_SU2_OMP_PARALLEL } @@ -2556,6 +2601,7 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) { /*--- This is required for the discrete adjoint. ---*/ SU2_OMP_FOR_STAT(OMP_MIN_SIZE) for (auto i = nPointDomain*nVar; i < nPoint*nVar; ++i) LinSysRes[i] = 0.0; + END_SU2_OMP_FOR /*--- Solve or smooth the linear system. ---*/ @@ -2566,8 +2612,10 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) { SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } + END_SU2_OMP_MASTER //SU2_OMP_BARRIER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } @@ -2616,6 +2664,7 @@ void CFEASolver::PredictStruct_Displacement(CGeometry *geometry, CConfig *config } } + END_SU2_OMP_PARALLEL } @@ -2734,6 +2783,7 @@ void CFEASolver::SetAitken_Relaxation(CGeometry *geometry, CConfig *config) { nodes->SetSolution_Pred(iPoint, newDispPred); } + END_SU2_OMP_PARALLEL } @@ -2825,6 +2875,7 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){ for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint)); } + END_SU2_OMP_FOR } else { for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) { @@ -2840,12 +2891,14 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){ if (geometry->nodes->GetDomain(iPoint)) obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint)); } + END_SU2_OMP_FOR } } } atomicAdd(obj_fun_local, objective_function); atomicAdd(nSurf_local, nSurfPoints); } + END_SU2_OMP_PARALLEL SU2_MPI::Allreduce(&objective_function, &Total_OFRefGeom, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); unsigned long nPointsOF = geometry->GetGlobal_nPointDomain(); @@ -2940,10 +2993,13 @@ void CFEASolver::Compute_OFVolFrac(CGeometry *geometry, const CConfig *config) discrete_loc += volume*4.0*rho*(1.0-rho); } } + END_SU2_OMP_FOR + atomicAdd(tot_vol_loc, total_volume); atomicAdd(integral_loc, integral); atomicAdd(discrete_loc, discreteness); } + END_SU2_OMP_PARALLEL su2double tmp; SU2_MPI::Allreduce(&total_volume,&tmp,1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); @@ -3003,8 +3059,11 @@ void CFEASolver::Compute_OFCompliance(CGeometry *geometry, const CConfig *config for (iVar = 0; iVar < nVar; iVar++) comp_local += nodalForce[iVar]*nodes->GetSolution(iPoint,iVar); } + END_SU2_OMP_FOR + atomicAdd(comp_local, compliance); } + END_SU2_OMP_PARALLEL SU2_MPI::Allreduce(&compliance, &Total_OFCompliance, 1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm()); @@ -3073,9 +3132,12 @@ void CFEASolver::Stiffness_Penalty(CGeometry *geometry, CNumerics **numerics, CC } } + END_SU2_OMP_FOR + atomicAdd(totalVol_loc, totalVolume); atomicAdd(weighted_loc, weightedValue); } + END_SU2_OMP_PARALLEL // Reduce value across processors for parallelization @@ -3267,6 +3329,7 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf else if (rho < 0.0) physical_rho[iElem] = 0.0; else physical_rho[iElem] = rho; } + END_SU2_OMP_PARALLEL geometry->FilterValuesAtElementCG(filter_radius, kernels, search_lim, physical_rho); @@ -3279,15 +3342,18 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iElem=0ul; iElemSetPhysicalDensity(0.0); else element_properties[iElem]->SetPhysicalDensity(physical_rho[iElem]); } + END_SU2_OMP_FOR /*--- Compute nodal averages for output. ---*/ SU2_OMP_FOR_STAT(omp_chunk_size) @@ -3311,7 +3378,9 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf } nodes->SetAuxVar(iPoint, 0, sum/vol); } + END_SU2_OMP_FOR } + END_SU2_OMP_PARALLEL delete [] physical_rho; diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index e02b8fd2504..c5cd8a789af 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -827,6 +827,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER SU2_OMP_BARRIER SU2_OMP_ATOMIC @@ -840,6 +841,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); config->SetNonphysical_Points(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -862,6 +864,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ if (outlet) { SU2_OMP_MASTER GetOutlet_Properties(geometry, config, iMesh, Output); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -924,6 +927,7 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container if (!physical) nonPhysicalPoints++; } + END_SU2_OMP_FOR return nonPhysicalPoints; } @@ -1053,6 +1057,7 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -1076,6 +1081,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER const bool implicit = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT); const bool muscl = (config->GetMUSCL_Flow() && (iMesh == MESH_0)); @@ -1219,6 +1225,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -1241,6 +1248,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1298,6 +1306,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } if (boussinesq) { @@ -1330,6 +1339,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont LinSysRes.AddBlock(iPoint, residual); } + END_SU2_OMP_FOR } if (rotating_frame) { @@ -1364,6 +1374,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (axisymmetric) { @@ -1388,6 +1399,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont nodes->SetAuxVar(iPoint, 0, AuxVar); } + END_SU2_OMP_FOR /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/ @@ -1451,6 +1463,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } if (radiation) { @@ -1493,6 +1506,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } } + END_SU2_OMP_FOR } @@ -1506,6 +1520,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont /*--- Set the auxiliary variable, Eddy viscosity mu_t, for this node. ---*/ nodes->SetAuxVar(iPoint, 0, nodes->GetEddyViscosity(iPoint)); } + END_SU2_OMP_FOR /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/ if (config->GetKind_Gradient_Method() == GREEN_GAUSS) { @@ -1545,6 +1560,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } // for iPoint + END_SU2_OMP_FOR if(!streamwise_periodic_temperature && energy) { @@ -1584,6 +1600,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont LinSysRes.AddBlock(iPoint, residual); }// for iVertex + END_SU2_OMP_FOR }// if periodic inlet boundary }// for iMarker @@ -1619,6 +1636,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont } } + END_SU2_OMP_FOR } } @@ -1754,9 +1772,11 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) maxVel2 = max(maxVel2, nodes->GetVelocity2(iPoint)); + END_SU2_OMP_FOR SU2_OMP_CRITICAL MaxVel2 = max(MaxVel2, maxVel2); + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER @@ -1766,6 +1786,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co config->SetMax_Vel2(max(1e-10, MaxVel2)); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -1776,6 +1797,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) nodes->SetBetaInc2(iPoint, BetaInc2); + END_SU2_OMP_FOR } @@ -2008,6 +2030,7 @@ void CIncEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_contain Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i); } + END_SU2_OMP_FOR } @@ -2249,6 +2272,7 @@ void CIncEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i); } + END_SU2_OMP_FOR } void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, @@ -2446,6 +2470,7 @@ void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i); } + END_SU2_OMP_FOR } @@ -2536,6 +2561,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver Jacobian.AddVal2Diag(iPoint, nDim+1, delta); } } + END_SU2_OMP_FOR } else { @@ -2579,6 +2605,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } } + END_SU2_OMP_FOR /*--- Loop over the boundary edges ---*/ @@ -2615,6 +2642,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver for (iVar = 0; iVar < nVar-!energy; iVar++) LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } + END_SU2_OMP_FOR } } @@ -2675,6 +2703,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver Jacobian.AddVal2Diag(iPoint, nDim+1, delta); } } + END_SU2_OMP_FOR } } @@ -2954,4 +2983,6 @@ void CIncEulerSolver::SetFreeStream_Solution(const CConfig *config){ } nodes->SetSolution(iPoint,nDim+1, Temperature_Inf); } + END_SU2_OMP_FOR + } diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp index 2eb8acbd679..f105e7f64f8 100644 --- a/SU2_CFD/src/solvers/CIncNSSolver.cpp +++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp @@ -290,10 +290,12 @@ void CIncNSSolver::Compute_Streamwise_Periodic_Recovered_Values(CConfig *config, nodes->SetStreamwise_Periodic_RecoveredTemperature(iPoint, Temperature_Recovered); } } // for iPoint + END_SU2_OMP_FOR /*--- Compute the integrated Heatflux Q into the domain, and massflow over periodic markers ---*/ SU2_OMP_MASTER GetStreamwise_Periodic_Properties(geometry, config, iMesh); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -338,6 +340,7 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c nodes->SetDES_LengthScale(iPoint,DES_LengthScale); } + END_SU2_OMP_FOR return nonPhysicalPoints; @@ -476,6 +479,7 @@ void CIncNSSolver::BC_Wall_Generic(const CGeometry *geometry, const CConfig *con } } } + END_SU2_OMP_FOR } void CIncNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver**, CNumerics*, @@ -585,4 +589,5 @@ void CIncNSSolver::BC_ConjugateHeat_Interface(CGeometry *geometry, CSolver **sol nodes->SetSolution_Old(iPoint, nDim+1, Twall); nodes->SetEnergy_ResTruncError_Zero(iPoint); } + END_SU2_OMP_FOR } diff --git a/SU2_CFD/src/solvers/CMeshSolver.cpp b/SU2_CFD/src/solvers/CMeshSolver.cpp index f008ad0e812..314ba5e1b36 100644 --- a/SU2_CFD/src/solvers/CMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CMeshSolver.cpp @@ -157,6 +157,7 @@ CMeshSolver::CMeshSolver(CGeometry *geometry, CConfig *config) : CFEASolver(true SU2_OMP_PARALLEL { SetMinMaxVolume(geometry, config, false); } + END_SU2_OMP_PARALLEL /*--- Compute the wall distance using the reference coordinates ---*/ SetWallDistance(geometry, config); @@ -187,6 +188,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd MaxVolume = -1E22; MinVolume = 1E22; ElemCounter = 0; } + END_SU2_OMP_MASTER /*--- Local min/max, final reduction outside loop. ---*/ su2double maxVol = -1E22, minVol = 1E22; @@ -238,12 +240,14 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd /*--- Count distorted elements. ---*/ if (ElemVolume <= 0.0) elCount++; } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { MaxVolume = max(MaxVolume, maxVol); MinVolume = min(MinVolume, minVol); ElemCounter += elCount; } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -253,6 +257,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd SU2_MPI::Allreduce(&maxVol, &MaxVolume, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&minVol, &MinVolume, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Volume from 0 to 1 ---*/ @@ -268,6 +273,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd element[iElem].SetRef_Volume(ElemVolume); } } + END_SU2_OMP_FOR /*--- Store the maximum and minimum volume. ---*/ SU2_OMP_MASTER { @@ -283,7 +289,9 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd if ((ElemCounter != 0) && (rank == MASTER_NODE)) cout <<"There are " << ElemCounter << " elements with negative volume.\n" << endl; - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER AD::EndPassive(wasActive); } @@ -346,6 +354,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) { nodes->SetWallDistance(iPoint, MaxDistance); } + END_SU2_OMP_FOR } else { su2double MaxDistance_Local = -1E22, MinDistance_Local = 1E22; @@ -368,11 +377,13 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { if (dist > EPS) MinDistance_Local = min(MinDistance_Local, dist); } + END_SU2_OMP_FOR SU2_OMP_CRITICAL { MaxDistance = max(MaxDistance, MaxDistance_Local); MinDistance = min(MinDistance, MinDistance_Local); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -382,6 +393,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { SU2_MPI::Allreduce(&MaxDistance_Local, &MaxDistance, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm()); SU2_MPI::Allreduce(&MinDistance_Local, &MinDistance, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -391,6 +403,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { su2double nodeDist = nodes->GetWallDistance(iPoint)/MaxDistance; nodes->SetWallDistance(iPoint,nodeDist); } + END_SU2_OMP_FOR /*--- Compute the element distances ---*/ SU2_OMP_FOR_STAT(omp_chunk_size) @@ -411,8 +424,10 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) { element[iElem].SetWallDistance(ElemDist); } + END_SU2_OMP_FOR - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics, CConfig *config){ @@ -466,6 +481,8 @@ void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics, break; } } + END_SU2_OMP_PARALLEL + stiffness_set = true; } @@ -496,6 +513,7 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig SU2_OMP_PARALLEL { LinSysRes.SetValZero(); } + END_SU2_OMP_PARALLEL /*--- Impose boundary conditions (all of them are ESSENTIAL BC's - displacements). ---*/ SetBoundaryDisplacements(geometry[MESH_0], numerics[FEA_TERM], config); @@ -521,7 +539,8 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig /*--- Check for failed deformation (negative volumes). ---*/ SetMinMaxVolume(geometry[MESH_0], config, true); - } // end parallel + } + END_SU2_OMP_PARALLEL } @@ -543,6 +562,7 @@ void CMeshSolver::UpdateGridCoord(CGeometry *geometry, CConfig *config){ geometry->nodes->SetCoord(iPoint, iDim, val_coord); } } + END_SU2_OMP_FOR /*--- Communicate the updated displacements and mesh coordinates. ---*/ geometry->InitiateComms(geometry, config, COORDINATES); @@ -600,6 +620,7 @@ void CMeshSolver::ComputeGridVelocity(CGeometry *geometry, CConfig *config){ } } + END_SU2_OMP_FOR /*--- The velocity was computed for nPointDomain, now we communicate it. ---*/ geometry->InitiateComms(geometry, config, GRID_VELOCITY); diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp index aae061ef04d..824e2ccdae9 100644 --- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp @@ -525,6 +525,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con unsigned long counter_local = 0; SU2_OMP_MASTER ErrorCounter = 0; + END_SU2_OMP_MASTER /*--- Pick one numerics object per thread. ---*/ CNumerics* numerics = numerics_container[CONV_TERM]; @@ -697,6 +698,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm()); config->SetNonphysical_Reconstr(ErrorCounter); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } } @@ -985,6 +987,7 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con }else eAxi_local++; } + END_SU2_OMP_FOR } /*--- Checking for NaN ---*/ diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp index 87df3fb2c72..09e78f646ea 100644 --- a/SU2_CFD/src/solvers/CNSSolver.cpp +++ b/SU2_CFD/src/solvers/CNSSolver.cpp @@ -90,6 +90,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C SU2_OMP_BARRIER SU2_OMP_MASTER nPrimVarGrad = 1+nDim; + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -116,6 +117,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C if (Output) { SU2_OMP_MASTER nPrimVarGrad = nPrimVarGrad_bak; + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -171,6 +173,7 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons nonPhysicalPoints += !physical; } + END_SU2_OMP_FOR return nonPhysicalPoints; } @@ -316,6 +319,7 @@ void CNSSolver::SetRoe_Dissipation(CGeometry *geometry, CConfig *config){ nodes->SetRoe_Dissipation_NTS(iPoint, delta, config->GetConst_DES()); } } + END_SU2_OMP_FOR } @@ -520,6 +524,7 @@ void CNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_container } } } + END_SU2_OMP_FOR if (Jacobian_i) for (auto iVar = 0u; iVar < nVar; iVar++) @@ -717,6 +722,7 @@ void CNSSolver::BC_Isothermal_Wall_Generic(CGeometry *geometry, CSolver **solver } } } + END_SU2_OMP_FOR if (Jacobian_i) for (auto iVar = 0u; iVar < nVar; iVar++) @@ -914,6 +920,7 @@ void CNSSolver::SetTauWall_WF(CGeometry *geometry, CSolver **solver_container, c nodes->SetTauWall(iPoint, Tau_Wall); } + END_SU2_OMP_FOR } diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index 312e5b39d24..dfaf6d2d4f6 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -1155,6 +1155,7 @@ void CSolver::InitiatePeriodicComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR /*--- Launch the point-to-point MPI send for this message. ---*/ @@ -1232,6 +1233,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, SU2_MPI::Waitany(geometry->nPeriodicRecv, geometry->req_PeriodicRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER source = status.MPI_SOURCE; #else @@ -1543,6 +1545,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, } } } + END_SU2_OMP_FOR } /*--- Verify that all non-blocking point-to-point sends have finished. @@ -1554,6 +1557,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry, SU2_MPI::Waitall(geometry->nPeriodicSend, geometry->req_PeriodicSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER } @@ -1772,6 +1776,7 @@ void CSolver::InitiateComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR /*--- Launch the point-to-point MPI send for this message. ---*/ @@ -1818,6 +1823,7 @@ void CSolver::CompleteComms(CGeometry *geometry, SU2_OMP_MASTER SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status); + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- Once we have recv'd a message, get the source rank. ---*/ @@ -1932,6 +1938,7 @@ void CSolver::CompleteComms(CGeometry *geometry, break; } } + END_SU2_OMP_FOR } /*--- Verify that all non-blocking point-to-point sends have finished. @@ -1941,6 +1948,7 @@ void CSolver::CompleteComms(CGeometry *geometry, #ifdef HAVE_MPI SU2_OMP_MASTER SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE); + END_SU2_OMP_MASTER #endif SU2_OMP_BARRIER } @@ -2067,6 +2075,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, } } } /* End SU2_OMP_MASTER, now all threads update the CFL number. */ + END_SU2_OMP_MASTER SU2_OMP_BARRIER /* Loop over all points on this grid and apply CFL adaption. */ @@ -2079,6 +2088,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, Max_CFL_Local = 0.0; Avg_CFL_Local = 0.0; } + END_SU2_OMP_MASTER SU2_OMP_FOR_STAT(roundUpDiv(geometry[iMesh]->GetnPointDomain(),omp_get_max_threads())) for (unsigned long iPoint = 0; iPoint < geometry[iMesh]->GetnPointDomain(); iPoint++) { @@ -2147,6 +2157,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, } } + END_SU2_OMP_FOR /* Reduce the min/max/avg local CFL numbers. */ @@ -2157,6 +2168,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, Max_CFL_Local = max(Max_CFL_Local,myCFLMax); Avg_CFL_Local += myCFLSum; } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER @@ -2167,6 +2179,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry, SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); Avg_CFL_Local /= su2double(geometry[iMesh]->GetGlobal_nPointDomain()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -2401,6 +2414,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { LinSysRes(iPoint,iVar) += Flux * Solution_i[iVar]; } } + END_SU2_OMP_FOR /*--- Loop boundary edges ---*/ @@ -2426,6 +2440,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { for (auto iVar = 0u; iVar < nVar; iVar++) LinSysRes(iPoint,iVar) -= Flux * base_nodes->GetSolution(iPoint,iVar); } + END_SU2_OMP_FOR } } @@ -2508,6 +2523,7 @@ void CSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *config) } } } + END_SU2_OMP_FOR /*--- Correct the Laplacian across any periodic boundaries. ---*/ @@ -3049,7 +3065,9 @@ void CSolver::Restart_OldGeometry(CGeometry *geometry, CConfig *config) { } - } SU2_OMP_BARRIER + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER /*--- It's necessary to communicate this information ---*/ diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp index a33a4795822..fb774ed82b1 100644 --- a/SU2_CFD/src/solvers/CTurbSASolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp @@ -246,6 +246,7 @@ void CTurbSASolver::Preprocessing(CGeometry *geometry, CSolver **solver_containe auto Laminar_Viscosity = solver_container[FLOW_SOL]->GetNodes()->GetLaminarViscosity(iPoint); nodes->SetVortex_Tilting(iPoint, PrimGrad_Flow, Vorticity, Laminar_Viscosity); } + END_SU2_OMP_FOR } /*--- Compute the DES length scale ---*/ @@ -291,6 +292,7 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain nodes->SetmuT(iPoint,muT); } + END_SU2_OMP_FOR } @@ -389,6 +391,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR if (harmonic_balance) { @@ -404,6 +407,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai LinSysRes(iPoint,iVar) += Source*Volume; } } + END_SU2_OMP_FOR } } @@ -420,6 +424,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta if (config->GetWall_Functions()) { SU2_OMP_MASTER SetNuTilde_WF(geometry, solver_container, conv_numerics, visc_numerics, config, val_marker); + END_SU2_OMP_MASTER SU2_OMP_BARRIER return; } @@ -485,6 +490,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta } } } + END_SU2_OMP_FOR } void CTurbSASolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, @@ -545,6 +551,7 @@ void CTurbSASolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container } } + END_SU2_OMP_FOR } @@ -632,6 +639,7 @@ void CTurbSASolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, CN } } + END_SU2_OMP_FOR } @@ -717,6 +725,7 @@ void CTurbSASolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, C } } + END_SU2_OMP_FOR } @@ -805,6 +814,7 @@ void CTurbSASolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_conta } } + END_SU2_OMP_FOR } @@ -893,6 +903,7 @@ void CTurbSASolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_cont } } + END_SU2_OMP_FOR } @@ -1042,6 +1053,7 @@ void CTurbSASolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, // Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } @@ -1135,6 +1147,7 @@ void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_c if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } @@ -1239,6 +1252,7 @@ void CTurbSASolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contain if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } @@ -1899,6 +1913,7 @@ void CTurbSASolver::SetDES_LengthScale(CSolver **solver, CGeometry *geometry, CC nodes->SetDES_LengthScale(iPoint, lengthScale); } + END_SU2_OMP_FOR } void CTurbSASolver::SetInletAtVertex(const su2double *val_inlet, diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp index c03e2295c19..cc9ed8c8013 100644 --- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp @@ -282,6 +282,7 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai nodes->SetmuT(iPoint,muT); } + END_SU2_OMP_FOR } @@ -356,6 +357,7 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } @@ -450,6 +452,7 @@ void CTurbSSTSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_cont } } } + END_SU2_OMP_FOR } void CTurbSSTSolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics, @@ -512,6 +515,7 @@ void CTurbSSTSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_containe if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i); } } + END_SU2_OMP_FOR } @@ -605,6 +609,7 @@ void CTurbSSTSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, C } } + END_SU2_OMP_FOR } @@ -695,6 +700,7 @@ void CTurbSSTSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, } } + END_SU2_OMP_FOR } @@ -786,6 +792,7 @@ void CTurbSSTSolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_ if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } @@ -896,6 +903,7 @@ void CTurbSSTSolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contai if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i); } + END_SU2_OMP_FOR } } diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 4428f5a5f72..1df93e60466 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -228,6 +228,7 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe Viscous_Residual(iEdge, geometry, solver_container, numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config); } + END_SU2_OMP_FOR } // end color loop if (ReducerStrategy) { @@ -304,6 +305,7 @@ void CTurbSolver::SumEdgeFluxes(CGeometry* geometry) { LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge)); } } + END_SU2_OMP_FOR } @@ -502,6 +504,7 @@ void CTurbSolver::BC_Fluid_Interface(CGeometry *geometry, CSolver **solver_conta Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i); } + END_SU2_OMP_FOR } delete [] PrimVar_j; @@ -520,6 +523,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver SetRes_RMS(iVar, 0.0); SetRes_Max(iVar, 0.0, 0); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0}; @@ -562,16 +566,19 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver } } } + END_SU2_OMP_FOR SU2_OMP_CRITICAL for (unsigned short iVar = 0; iVar < nVar; iVar++) { AddRes_RMS(iVar, resRMS[iVar]); AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER /*--- Compute the root mean square residual ---*/ SU2_OMP_MASTER SetResidual_RMS(geometry, config); + END_SU2_OMP_MASTER SU2_OMP_BARRIER } @@ -597,6 +604,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) { nodes->AddSolution(iPoint, 0, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint]); } + END_SU2_OMP_FOR break; case SST: case SST_SUST: @@ -616,6 +624,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve density, density_old, lowerlimit[iVar], upperlimit[iVar]); } } + END_SU2_OMP_FOR break; } @@ -642,6 +651,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_ LinSysRes.SetBlock_Zero(iPoint); LinSysSol.SetBlock_Zero(iPoint); } + END_SU2_OMP_FOR auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config); @@ -649,6 +659,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_ SetIterLinSolver(iter); SetResLinSolver(System.GetResidual()); } + END_SU2_OMP_MASTER SU2_OMP_BARRIER CompleteImplicitIteration(geometry, solver_container, config); @@ -702,6 +713,7 @@ void CTurbSolver::ComputeUnderRelaxationFactor(const CConfig *config) { nodes->SetUnderRelaxation(iPoint, localUnderRelaxation); } + END_SU2_OMP_FOR } @@ -803,6 +815,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con } } + END_SU2_OMP_FOR } else { @@ -849,6 +862,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL; } } + END_SU2_OMP_FOR /*--- Loop over the boundary edges ---*/ @@ -896,6 +910,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con } } + END_SU2_OMP_FOR } } @@ -968,6 +983,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep)); } } + END_SU2_OMP_FOR } // end dynamic grid @@ -1050,6 +1066,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig * } } // end SU2_OMP_MASTER, pre and postprocessing are thread-safe. + END_SU2_OMP_MASTER SU2_OMP_BARRIER /*--- MPI solution and compute the eddy viscosity ---*/ @@ -1077,6 +1094,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig * } solver[iMesh][TURB_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse); } + END_SU2_OMP_FOR solver[iMesh][TURB_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION); solver[iMesh][TURB_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION); @@ -1093,7 +1111,8 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig * delete [] Restart_Vars; Restart_Vars = nullptr; delete [] Restart_Data; Restart_Data = nullptr; - } // end SU2_OMP_MASTER + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER } From 223c10d34febb8ef27d55d75421b2b5397557f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 17 Mar 2021 15:47:57 +0100 Subject: [PATCH 22/57] Recover CoDiPack version. --- externals/codi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/codi b/externals/codi index 1b8d3f5f03d..6a67202a388 160000 --- a/externals/codi +++ b/externals/codi @@ -1 +1 @@ -Subproject commit 1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8 +Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692 From 6775b29ced94423de19fb31541b2e9c3e0525b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 17 Mar 2021 15:50:23 +0100 Subject: [PATCH 23/57] OpDiLib update. --- externals/opdi | 2 +- meson_scripts/init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/opdi b/externals/opdi index f14b42f1255..a1210cc3d2f 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit f14b42f1255674bb10db91e3f45ceb39c1bccd17 +Subproject commit a1210cc3d2f58fa4652c70000920ff2e76896cf6 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index a42640f9fde..bbcd1b2ab4d 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,7 +48,7 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = 'f14b42f1255674bb10db91e3f45ceb39c1bccd17' + sha_version_opdi = 'a1210cc3d2f58fa4652c70000920ff2e76896cf6' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' From 6aaebca2b7c3273ef365b57d8352a7339f9f6bfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 17 Mar 2021 15:51:00 +0100 Subject: [PATCH 24/57] Add syntax file. --- su2omp.syntax.json | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 su2omp.syntax.json diff --git a/su2omp.syntax.json b/su2omp.syntax.json new file mode 100644 index 00000000000..5a524950142 --- /dev/null +++ b/su2omp.syntax.json @@ -0,0 +1,42 @@ +{ + "this file's header": + [ + "\\file su2omp.syntax.json", + "\\brief Definitions for the OpDiLib syntax checker", + "\\author J. Blühdorn", + "\\version 7.1.1 \"Blackbird\"", + + "SU2 Project Website: https://su2code.github.io", + + "The SU2 Project is maintained by the SU2 Foundation ", + "(http://su2foundation.org)", + + "Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)", + + "SU2 is free software; you can redistribute it and/or", + "modify it under the terms of the GNU Lesser General Public", + "License as published by the Free Software Foundation; either", + "version 2.1 of the License, or (at your option) any later version.", + + "SU2 is distributed in the hope that it will be useful,", + "but WITHOUT ANY WARRANTY; without even the implied warranty of", + "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU", + "Lesser General Public License for more details.", + + "You should have received a copy of the GNU Lesser General Public", + "License along with SU2. If not, see ." + ], + "pairs": + { + "SU2_OMP_MASTER": "END_SU2_OMP_MASTER", + "SU2_OMP_CRITICAL": "END_SU2_OMP_CRITICAL", + "SU2_OMP_PARALLEL": "END_SU2_OMP_PARALLEL", + "SU2_OMP_PARALLEL_": "END_SU2_OMP_PARALLEL", + "SU2_OMP_PARALLEL_ON": "END_SU2_OMP_PARALLEL", + "SU2_OMP_FOR_": "END_SU2_OMP_FOR", + "SU2_OMP_FOR_DYN": "END_SU2_OMP_FOR", + "SU2_OMP_FOR_STAT": "END_SU2_OMP_FOR", + "CSYSVEC_PARFOR": "END_CSYSVEC_PARFOR", + "CNEWTON_PARFOR": "END_CNEWTON_PARFOR" + } +} From ce44cac16305c9d6c96c2c2424e151fc3d04972b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Wed, 17 Mar 2021 16:35:24 +0100 Subject: [PATCH 25/57] Fix missing END macros. --- Common/src/linear_algebra/CSysSolve.cpp | 2 ++ SU2_CFD/src/solvers/CSolver.cpp | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 25e0294cd10..650e3f728c8 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -852,6 +852,7 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co xIsZero = false; tol_type = LinearToleranceType::ABSOLUTE; } + END_SU2_OMP_MASTER /*--- Create matrix-vector product, preconditioner, and solve the linear system ---*/ @@ -1058,6 +1059,7 @@ unsigned long CSysSolve::Solve_b(CSysMatrix & Jacobian, xIsZero = false; tol_type = LinearToleranceType::ABSOLUTE; } + END_SU2_OMP_MASTER HandleTemporariesIn(LinSysRes, LinSysSol); diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index 9daad35cf84..ba62e146500 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -4054,6 +4054,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig Residual_BGS[iVar] = 0.0; Residual_Max_BGS[iVar] = 0.0; } + END_SU2_OMP_MASTER vector resMax(nVar,0.0), resRMS(nVar,0.0); vector coordMax(nVar,nullptr); @@ -4077,6 +4078,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig } } } + END_SU2_OMP_FOR /*--- Reduce residual information over all threads in this rank. ---*/ SU2_OMP_CRITICAL @@ -4084,11 +4086,14 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig Residual_BGS[iVar] += resRMS[iVar]; AddRes_Max_BGS(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); } + END_SU2_OMP_CRITICAL SU2_OMP_BARRIER SU2_OMP_MASTER SetResidual_BGS(geometry, config); + END_SU2_OMP_MASTER SU2_OMP_BARRIER - } // end SU2_OMP_PARALLEL + } + END_SU2_OMP_PARALLEL } From f093b3501cbe2f0a66a3b1c1bd8e47806abf4f6c Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Wed, 17 Mar 2021 17:16:34 +0000 Subject: [PATCH 26/57] move MASTER out of ExtFunc functions, parallel copy in CSysSolve_b --- Common/include/basic_types/ad_structure.hpp | 78 ++++++--------------- Common/src/linear_algebra/CSysSolve.cpp | 58 +++++++-------- Common/src/linear_algebra/CSysSolve_b.cpp | 25 +++---- 3 files changed, 57 insertions(+), 104 deletions(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 1699534828b..185ee136350 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -483,87 +483,59 @@ namespace AD{ } FORCEINLINE void StartExtFunc(bool storePrimalInput, bool storePrimalOutput){ - SU2_OMP_MASTER - { - FuncHelper = new ExtFuncHelper(true); - if (!storePrimalInput){ - FuncHelper->disableInputPrimalStore(); - } - if (!storePrimalOutput){ - FuncHelper->disableOutputPrimalStore(); - } + FuncHelper = new ExtFuncHelper(true); + if (!storePrimalInput){ + FuncHelper->disableInputPrimalStore(); + } + if (!storePrimalOutput){ + FuncHelper->disableOutputPrimalStore(); } - END_SU2_OMP_MASTER } FORCEINLINE void SetExtFuncIn(const su2double &data) { - SU2_OMP_MASTER - { - FuncHelper->addInput(data); - } - END_SU2_OMP_MASTER + FuncHelper->addInput(data); } template FORCEINLINE void SetExtFuncIn(const T& data, const int size) { - SU2_OMP_MASTER - { - for (int i = 0; i < size; i++) { - FuncHelper->addInput(data[i]); - } + for (int i = 0; i < size; i++) { + FuncHelper->addInput(data[i]); } - END_SU2_OMP_MASTER } template FORCEINLINE void SetExtFuncIn(const T& data, const int size_x, const int size_y) { - SU2_OMP_MASTER - { - for (int i = 0; i < size_x; i++) { - for (int j = 0; j < size_y; j++) { - FuncHelper->addInput(data[i][j]); - } + for (int i = 0; i < size_x; i++) { + for (int j = 0; j < size_y; j++) { + FuncHelper->addInput(data[i][j]); } } - END_SU2_OMP_MASTER } FORCEINLINE void SetExtFuncOut(su2double& data) { - SU2_OMP_MASTER - { - if (AD::getGlobalTape().isActive()) { - FuncHelper->addOutput(data); - } + if (AD::getGlobalTape().isActive()) { + FuncHelper->addOutput(data); } - END_SU2_OMP_MASTER } template FORCEINLINE void SetExtFuncOut(T&& data, const int size) { - SU2_OMP_MASTER - { - for (int i = 0; i < size; i++) { - if (AD::getGlobalTape().isActive()) { - FuncHelper->addOutput(data[i]); - } + for (int i = 0; i < size; i++) { + if (AD::getGlobalTape().isActive()) { + FuncHelper->addOutput(data[i]); } } - END_SU2_OMP_MASTER } template FORCEINLINE void SetExtFuncOut(T&& data, const int size_x, const int size_y) { - SU2_OMP_MASTER - { - for (int i = 0; i < size_x; i++) { - for (int j = 0; j < size_y; j++) { - if (AD::getGlobalTape().isActive()) { - FuncHelper->addOutput(data[i][j]); - } + for (int i = 0; i < size_x; i++) { + for (int j = 0; j < size_y; j++) { + if (AD::getGlobalTape().isActive()) { + FuncHelper->addOutput(data[i][j]); } } } - END_SU2_OMP_MASTER } FORCEINLINE void delete_handler(void *handler) { @@ -571,13 +543,7 @@ namespace AD{ checkpoint->clear(); } - FORCEINLINE void EndExtFunc() { - SU2_OMP_MASTER - { - delete FuncHelper; - } - END_SU2_OMP_MASTER - } + FORCEINLINE void EndExtFunc() { delete FuncHelper; } FORCEINLINE bool BeginPassive() { if(AD::getGlobalTape().isActive()) { diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp index 333104600ae..9321b4eecb3 100644 --- a/Common/src/linear_algebra/CSysSolve.cpp +++ b/Common/src/linear_algebra/CSysSolve.cpp @@ -859,10 +859,11 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co TapeActive = AD::getGlobalTape().isActive(); - AD::StartExtFunc(false, false); - - AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize()); - + SU2_OMP_MASTER { + AD::StartExtFunc(false, false); + AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize()); + } + END_SU2_OMP_MASTER SU2_OMP_BARRIER AD::StopRecording(); @@ -933,33 +934,6 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co if (!mesh_deform) KindPrecond = config->GetKind_DiscAdj_Linear_Prec(); else KindPrecond = config->GetKind_Deform_Linear_Solver_Prec(); - /*--- Start recording if it was stopped for the linear solver ---*/ - - AD::StartRecording(); - - SU2_OMP_BARRIER - - AD::SetExtFuncOut(&LinSysSol[0], (int)LinSysSol.GetLocSize()); - - SU2_OMP_BARRIER - -#ifdef CODI_REVERSE_TYPE - SU2_OMP_MASTER - { - AD::FuncHelper->addUserData(&LinSysRes); - AD::FuncHelper->addUserData(&LinSysSol); - AD::FuncHelper->addUserData(&Jacobian); - AD::FuncHelper->addUserData(geometry); - AD::FuncHelper->addUserData(config); - AD::FuncHelper->addUserData(this); - } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - - AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); - SU2_OMP_BARRIER -#endif - /*--- Build preconditioner for the transposed Jacobian ---*/ if (RequiresTranspose) Jacobian.TransposeInPlace(); @@ -983,11 +957,31 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co break; } + /*--- Start recording if it was stopped for the linear solver ---*/ +#ifdef CODI_REVERSE_TYPE + AD::StartRecording(); SU2_OMP_BARRIER - AD::EndExtFunc(); + SU2_OMP_MASTER { + AD::SetExtFuncOut(&LinSysSol[0], LinSysSol.GetLocSize()); + AD::FuncHelper->addUserData(&LinSysRes); + AD::FuncHelper->addUserData(&LinSysSol); + AD::FuncHelper->addUserData(&Jacobian); + AD::FuncHelper->addUserData(geometry); + AD::FuncHelper->addUserData(config); + AD::FuncHelper->addUserData(this); + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER + + AD::FuncHelper->addToTape(CSysSolve_b::Solve_b); + SU2_OMP_BARRIER + SU2_OMP_MASTER + AD::EndExtFunc(); + END_SU2_OMP_MASTER SU2_OMP_BARRIER +#endif } return IterLinSol; diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp index 709bb251d70..d80eb4b306a 100644 --- a/Common/src/linear_algebra/CSysSolve_b.cpp +++ b/Common/src/linear_algebra/CSysSolve_b.cpp @@ -57,27 +57,20 @@ void CSysSolve_b::Solve_b(const codi::RealReverse::Real* x, codi::Re /*--- Initialize the right-hand side with the gradient of the solution of the primal linear system ---*/ SU2_OMP_BARRIER - SU2_OMP_MASTER - { - for (unsigned long i = 0; i < n; i++) { - (*LinSysRes_b)[i] = y_b[i]; - (*LinSysSol_b)[i] = 0.0; - } + SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads())) + for (unsigned long i = 0; i < n; i++) { + (*LinSysRes_b)[i] = y_b[i]; + (*LinSysSol_b)[i] = 0.0; } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_FOR solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config, false); - SU2_OMP_BARRIER - SU2_OMP_MASTER - { - for (unsigned long i = 0; i < n; i ++) { - x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i)); - } + SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads())) + for (unsigned long i = 0; i < n; i ++) { + x_b[i] = SU2_TYPE::GetValue((*LinSysSol_b)[i]); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + END_SU2_OMP_FOR } template class CSysSolve_b; From e174bacf79664e043eb17852cc991667a90d9f88 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Wed, 17 Mar 2021 17:36:57 +0000 Subject: [PATCH 27/57] move master into some solver methods --- .../include/solvers/CFVMFlowSolverBase.hpp | 20 ++---- .../include/solvers/CFVMFlowSolverBase.inl | 7 +++ SU2_CFD/src/solvers/CFEASolver.cpp | 2 - SU2_CFD/src/solvers/CSolver.cpp | 61 +++++++++++-------- SU2_CFD/src/solvers/CTurbSolver.cpp | 3 - 5 files changed, 46 insertions(+), 47 deletions(-) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp index 017a46340ee..2525256259b 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp @@ -825,17 +825,11 @@ class CFVMFlowSolverBase : public CSolver { CompleteComms(geometry, config, SOLUTION); if (!adjoint) { - SU2_OMP_MASTER { - /*--- Compute the root mean square residual ---*/ - - SetResidual_RMS(geometry, config); - - /*--- For verification cases, compute the global error metrics. ---*/ + /*--- Compute the root mean square residual ---*/ + SetResidual_RMS(geometry, config); - ComputeVerificationError(geometry, config); - } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + /*--- For verification cases, compute the global error metrics. ---*/ + ComputeVerificationError(geometry, config); } } @@ -941,10 +935,7 @@ class CFVMFlowSolverBase : public CSolver { SU2_OMP_BARRIER /*--- Compute the root mean square residual ---*/ - SU2_OMP_MASTER SetResidual_RMS(geometry, config); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER } /*! @@ -977,10 +968,7 @@ class CFVMFlowSolverBase : public CSolver { CompleteComms(geometry, config, SOLUTION); /*--- For verification cases, compute the global error metrics. ---*/ - SU2_OMP_MASTER ComputeVerificationError(geometry, config); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER } /*! diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl index f66d4b7da15..db290675173 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl @@ -465,6 +465,7 @@ void CFVMFlowSolverBase::Viscous_Residual_impl(unsigned long iEdge, CGeome template void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CConfig* config) { + /*--- The errors only need to be computed on the finest grid. ---*/ if (MGLevel != MESH_0) return; @@ -479,6 +480,8 @@ void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CCo (config->GetInnerIter() == 1)); if (!write_heads) return; + SU2_OMP_MASTER { + /*--- Check if there actually is an exact solution for this verification case, if computed at all. ---*/ if (VerificationSolution && VerificationSolution->ExactSolutionKnown()) { @@ -518,6 +521,10 @@ void CFVMFlowSolverBase::ComputeVerificationError(CGeometry* geometry, CCo PrintVerificationError(config); } + + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } template diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp index 6d1675e1f2d..2145310cb77 100644 --- a/SU2_CFD/src/solvers/CFEASolver.cpp +++ b/SU2_CFD/src/solvers/CFEASolver.cpp @@ -1881,9 +1881,7 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics SU2_OMP_BARRIER /*--- Compute the root mean square residual. ---*/ - SU2_OMP_MASTER SetResidual_RMS(geometry, config); - END_SU2_OMP_MASTER } END_SU2_OMP_PARALLEL diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index ba62e146500..6b8361ea23d 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -2153,6 +2153,8 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) if (geometry->GetMGLevel() != MESH_0) return; + SU2_OMP_MASTER { + /*--- Set the L2 Norm residual in all the processors. ---*/ vector rbuf_res(nVar); @@ -2185,30 +2187,36 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config) /*--- Set the Maximum residual in all the processors. ---*/ - if (config->GetComm_Level() != COMM_FULL) return; + if (config->GetComm_Level() == COMM_FULL) { - const unsigned long nProcessor = size; + const unsigned long nProcessor = size; - su2activematrix rbuf_residual(nProcessor,nVar); - su2matrix rbuf_point(nProcessor,nVar); - su2activematrix rbuf_coord(nProcessor*nVar, nDim); + su2activematrix rbuf_residual(nProcessor,nVar); + su2matrix rbuf_point(nProcessor,nVar); + su2activematrix rbuf_coord(nProcessor*nVar, nDim); - SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); - for (unsigned short iVar = 0; iVar < nVar; iVar++) { - for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { - AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + for (unsigned short iVar = 0; iVar < nVar; iVar++) { + for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { + AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + } } } + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) { if (geometry->GetMGLevel() != MESH_0) return; + SU2_OMP_MASTER { + /*--- Set the L2 Norm residual in all the processors. ---*/ vector rbuf_res(nVar); @@ -2220,26 +2228,30 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) Residual_BGS[iVar] = max(EPS*EPS, sqrt(rbuf_res[iVar]/Global_nPointDomain)); } - if (config->GetComm_Level() != COMM_FULL) return; + if (config->GetComm_Level() == COMM_FULL) { - /*--- Set the Maximum residual in all the processors. ---*/ + /*--- Set the Maximum residual in all the processors. ---*/ - const unsigned long nProcessor = size; + const unsigned long nProcessor = size; - su2activematrix rbuf_residual(nProcessor,nVar); - su2matrix rbuf_point(nProcessor,nVar); - su2activematrix rbuf_coord(nProcessor*nVar, nDim); + su2activematrix rbuf_residual(nProcessor,nVar); + su2matrix rbuf_point(nProcessor,nVar); + su2activematrix rbuf_coord(nProcessor*nVar, nDim); - SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); - SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm()); + SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm()); - for (unsigned short iVar = 0; iVar < nVar; iVar++) { - for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { - AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + for (unsigned short iVar = 0; iVar < nVar; iVar++) { + for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) { + AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]); + } } } + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) { @@ -4089,10 +4101,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig END_SU2_OMP_CRITICAL SU2_OMP_BARRIER - SU2_OMP_MASTER SetResidual_BGS(geometry, config); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER } END_SU2_OMP_PARALLEL diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp index 5fa99ec3dd5..acf72dae705 100644 --- a/SU2_CFD/src/solvers/CTurbSolver.cpp +++ b/SU2_CFD/src/solvers/CTurbSolver.cpp @@ -570,10 +570,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver SU2_OMP_BARRIER /*--- Compute the root mean square residual ---*/ - SU2_OMP_MASTER SetResidual_RMS(geometry, config); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER } void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solver_container, CConfig *config) { From 218262253bf762f80ab0725d819022357ec5ccb5 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Wed, 17 Mar 2021 19:03:22 +0000 Subject: [PATCH 28/57] try to have less "end master" --- .../include/solvers/CFVMFlowSolverBase.hpp | 37 +++++++----- SU2_CFD/src/solvers/CEulerSolver.cpp | 60 +++++++------------ SU2_CFD/src/solvers/CIncEulerSolver.cpp | 5 +- SU2_CFD/src/solvers/CNSSolver.cpp | 15 +---- 4 files changed, 46 insertions(+), 71 deletions(-) diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp index 2525256259b..0cddca7b081 100644 --- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp +++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp @@ -34,6 +34,15 @@ class CNumericsSIMD; template class CFVMFlowSolverBase : public CSolver { + private: + static void recursiveAssign() {} + + template + static void recursiveAssign(U& d, const V& s, Ts&&... otherPairs) { + d = s; + recursiveAssign(otherPairs...); + } + protected: static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */ static constexpr size_t MAXNVAR = VariableType::MAXNVAR; /*!< \brief Max number of variables, for static arrays. */ @@ -43,6 +52,18 @@ class CFVMFlowSolverBase : public CSolver { unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */ + /*! + * \brief Utility to set the value of a member variables safely, and so that the new values are seen by all threads. + * \param[in] lhsRhsPairs - Pairs of destination and source e.g. a,0,b,-1. + */ + template + static void ompMasterAssignBarrier(Ts&&... lhsRhsPairs) { + SU2_OMP_MASTER + recursiveAssign(lhsRhsPairs...); + END_SU2_OMP_MASTER + SU2_OMP_BARRIER + } + su2double Mach_Inf = 0.0; /*!< \brief Mach number at the infinity. */ su2double Density_Inf = 0.0; /*!< \brief Density at the infinity. */ su2double Energy_Inf = 0.0; /*!< \brief Energy at the infinity. */ @@ -318,14 +339,7 @@ class CFVMFlowSolverBase : public CSolver { * Critical sections are used for this instead of reduction * clauses for compatibility with OpenMP 2.0 (Windows...). ---*/ - SU2_OMP_MASTER - { - Min_Delta_Time = 1e30; - Max_Delta_Time = 0.0; - Global_Delta_UnstTimeND = 1e30; - } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + ompMasterAssignBarrier(Min_Delta_Time,1e30, Max_Delta_Time,0.0, Global_Delta_UnstTimeND,1e30); /*--- Loop domain points. ---*/ @@ -981,12 +995,7 @@ class CFVMFlowSolverBase : public CSolver { const auto& Gradient_Primitive = nodes->GetGradient_Primitive(); auto& StrainMag = nodes->GetStrainMag(); - SU2_OMP_MASTER { - StrainMag_Max = 0.0; - Omega_Max = 0.0; - } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + ompMasterAssignBarrier(StrainMag_Max,0.0, Omega_Max,0.0); su2double strainMax = 0.0, omegaMax = 0.0; diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp index 0ad59dfa87d..b14f0e2e0e9 100644 --- a/SU2_CFD/src/solvers/CEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CEulerSolver.cpp @@ -1757,67 +1757,47 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con config->GetKind_Upwind_Flow() == SLAU || config->GetKind_Upwind_Flow() == SLAU2); - /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/ - - if (fixed_cl && !disc_adjoint && !cont_adjoint) { - SU2_OMP_MASTER - SetFarfield_AoA(geometry, solver_container, config, iMesh, Output); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - } - /*--- Set the primitive variables ---*/ - SU2_OMP_MASTER - ErrorCounter = 0; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + ompMasterAssignBarrier(ErrorCounter, 0); SU2_OMP_ATOMIC ErrorCounter += SetPrimitive_Variables(solver_container, config); + SU2_OMP_BARRIER - if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { - SU2_OMP_BARRIER - SU2_OMP_MASTER - { + SU2_OMP_MASTER { /*--- Ops that are not OpenMP parallel go in this block. ---*/ + + if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) { unsigned long tmp = ErrorCounter; SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); config->SetNonphysical_Points(ErrorCounter); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - } - /*--- Compute the engine properties ---*/ + /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/ - if (engine) { - SU2_OMP_MASTER - GetPower_Properties(geometry, config, iMesh, Output); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - } + if (fixed_cl && !disc_adjoint && !cont_adjoint) { + SetFarfield_AoA(geometry, solver_container, config, iMesh, Output); + } + + /*--- Compute the engine properties ---*/ - /*--- Compute the actuator disk properties and distortion levels ---*/ + if (engine) GetPower_Properties(geometry, config, iMesh, Output); - if (actuator_disk) { - SU2_OMP_MASTER - { + /*--- Compute the actuator disk properties and distortion levels ---*/ + + if (actuator_disk) { Set_MPI_ActDisk(solver_container, geometry, config); GetPower_Properties(geometry, config, iMesh, Output); SetActDisk_BCThrust(geometry, solver_container, config, iMesh, Output); } - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - } - /*--- Compute NearField MPI ---*/ + /*--- Compute NearField MPI ---*/ + + if (nearfield) Set_MPI_Nearfield(geometry, config); - if (nearfield) { - SU2_OMP_MASTER - Set_MPI_Nearfield(geometry, config); - END_SU2_OMP_MASTER - SU2_OMP_BARRIER } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER /*--- Artificial dissipation ---*/ diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp index 9d57320430d..8f12221be2e 100644 --- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp +++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp @@ -825,10 +825,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_ /*--- Set the primitive variables ---*/ - SU2_OMP_MASTER - ErrorCounter = 0; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER + ompMasterAssignBarrier(ErrorCounter, 0); SU2_OMP_ATOMIC ErrorCounter += SetPrimitive_Variables(solver_container, config); diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp index 09e78f646ea..4ae2992dd4a 100644 --- a/SU2_CFD/src/solvers/CNSSolver.cpp +++ b/SU2_CFD/src/solvers/CNSSolver.cpp @@ -86,13 +86,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C turbulence solver, and post) only temperature and velocity are needed ---*/ const auto nPrimVarGrad_bak = nPrimVarGrad; - if (Output) { - SU2_OMP_BARRIER - SU2_OMP_MASTER - nPrimVarGrad = 1+nDim; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - } + if (Output) ompMasterAssignBarrier(nPrimVarGrad, 1+nDim); if (config->GetReconstructionGradientRequired() && muscl && !center) { switch (config->GetKind_Gradient_Method_Recon()) { @@ -114,12 +108,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C SetPrimitive_Gradient_LS(geometry, config); } - if (Output) { - SU2_OMP_MASTER - nPrimVarGrad = nPrimVarGrad_bak; - END_SU2_OMP_MASTER - SU2_OMP_BARRIER - } + if (Output) ompMasterAssignBarrier(nPrimVarGrad, nPrimVarGrad_bak); /*--- Compute the limiters ---*/ From 94dafb4f1137c554e221db3e812ee909bcadc1e3 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 01:00:05 +0000 Subject: [PATCH 29/57] omp directives in DiscAdjSolver --- SU2_CFD/include/solvers/CDiscAdjSolver.hpp | 40 +- SU2_CFD/include/solvers/CSolver.hpp | 23 -- SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 447 +++++++-------------- SU2_CFD/src/variables/CVariable.cpp | 6 + 4 files changed, 160 insertions(+), 356 deletions(-) diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp index c5b5dcd4138..7379bf3120f 100644 --- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp @@ -38,9 +38,17 @@ */ class CDiscAdjSolver final : public CSolver { private: + static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */ + static constexpr size_t MAXNVAR = 32; /*!< \brief Max number of variables, for static arrays. */ + + static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */ + + unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */ + unsigned short KindDirect_Solver; CSolver *direct_solver; - su2double **CSensitivity; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */ + vector > CSensitivity; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */ + vector Sens_Geo; /*!< \brief Total shape sensitivity for each monitored boundary. */ su2double Total_Sens_Mach; /*!< \brief Total mach sensitivity coefficient for all the boundaries. */ su2double Total_Sens_AoA; /*!< \brief Total angle of attack sensitivity coefficient for all the boundaries. */ su2double Total_Sens_Geo; /*!< \brief Total shape sensitivity coefficient for all the boundaries. */ @@ -52,8 +60,6 @@ class CDiscAdjSolver final : public CSolver { su2double Mach, Alpha, Beta, Pressure, Temperature, BPressure, ModVel; su2double TemperatureRad, Total_Sens_Temp_Rad; - su2double *Solution_Geometry; /*!< \brief Auxiliary vector for the geometry solution (dimension nDim instead of nVar). */ - CDiscAdjVariable* nodes = nullptr; /*!< \brief The highest level in the variable hierarchy this solver can safely use. */ /*! @@ -66,7 +72,7 @@ class CDiscAdjSolver final : public CSolver { /*! * \brief Constructor of the class. */ - CDiscAdjSolver(void); + CDiscAdjSolver() = default; /*! * \overload @@ -88,7 +94,7 @@ class CDiscAdjSolver final : public CSolver { /*! * \brief Destructor of the class. */ - ~CDiscAdjSolver(void) override; + ~CDiscAdjSolver() override; /*! * \brief Performs the preprocessing of the adjoint AD-based solver. @@ -115,14 +121,6 @@ class CDiscAdjSolver final : public CSolver { */ void SetAdjoint_Output(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Sets the adjoint values of the output of the mesh deformation iteration - * before evaluation of the tape. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] config - The particular config. - */ - void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) override; - /*! * \brief Sets the adjoint values of the input variables of the flow (+turb.) iteration * after tape has been evaluated. @@ -131,14 +129,6 @@ class CDiscAdjSolver final : public CSolver { */ void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override; - /*! - * \brief A virtual member. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] solver_container - The solver container holding all solutions. - * \param[in] config - The particular config. - */ - void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) override; - /*! * \brief Set the surface sensitivity. * \param[in] geometry - Geometrical definition of the problem. @@ -225,14 +215,6 @@ class CDiscAdjSolver final : public CSolver { */ void SetRecording(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Prepare the solver for a new recording. - * \param[in] kind_recording - Kind of AD recording. - */ - void SetMesh_Recording(CGeometry **geometry, - CVolumetricMovement *grid_movement, - CConfig *config) override; - /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp index 7071aae9ae8..57b01c685b8 100644 --- a/SU2_CFD/include/solvers/CSolver.hpp +++ b/SU2_CFD/include/solvers/CSolver.hpp @@ -3655,13 +3655,6 @@ class CSolver { */ inline virtual void SetAdjoint_Output(CGeometry *geometry, CConfig *config){} - /*! - * \brief A virtual member. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] config - The particular config. - */ - inline virtual void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) {} - /*! * \brief A virtual member. * \param[in] geometry - The geometrical definition of the problem. @@ -3670,14 +3663,6 @@ class CSolver { */ inline virtual void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){} - /*! - * \brief A virtual member. - * \param[in] geometry - The geometrical definition of the problem. - * \param[in] solver_container - The solver container holding all solutions. - * \param[in] config - The particular config. - */ - inline virtual void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) {} - /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. @@ -3860,14 +3845,6 @@ class CSolver { */ inline virtual void SetRecording(CGeometry *geometry, CConfig *config){} - /*! - * \brief A virtual member. - * \param[in] kind_recording - Kind of AD recording. - */ - inline virtual void SetMesh_Recording(CGeometry **geometry, - CVolumetricMovement *grid_movement, - CConfig *config) {} - /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index cb65dae84be..a2e305aa621 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -27,23 +27,12 @@ #include "../../include/solvers/CDiscAdjSolver.hpp" #include "../../../Common/include/toolboxes/geometry_toolbox.hpp" +#include "../../../Common/include/parallelization/omp_structure.hpp" -CDiscAdjSolver::CDiscAdjSolver(void) : CSolver () { - -} - -CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config) : CSolver() { - -} +CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config) : CSolver() {} CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { - unsigned short iVar, iMarker, iDim; - unsigned long iVertex; - string text_line, mesh_filename; - ifstream restart_file; - string filename, AdjExt; - adjoint = true; nVar = direct_solver->GetnVar(); @@ -51,21 +40,17 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di /*--- Initialize arrays to NULL ---*/ - CSensitivity = nullptr; - /*-- Store some information about direct solver ---*/ this->KindDirect_Solver = Kind_Solver; this->direct_solver = direct_solver; - nMarker = config->GetnMarker_All(); nPoint = geometry->GetnPoint(); nPointDomain = geometry->GetnPointDomain(); - /*--- Define some auxiliary vectors related to the residual ---*/ + omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE); - Residual = new su2double[nVar]; for (iVar = 0; iVar < nVar; iVar++) Residual[iVar] = 1.0; - Solution_Geometry = new su2double[nDim]; for (iDim = 0; iDim < nDim; iDim++) Solution_Geometry[iDim] = 1.0; + /*--- Define some auxiliary vectors related to the residual ---*/ Residual_RMS.resize(nVar,1.0); Residual_Max.resize(nVar,1.0); @@ -82,24 +67,16 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0); } - /*--- Define some auxiliary vectors related to the solution ---*/ - - Solution = new su2double[nVar]; - - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16; - /*--- Sensitivity definition and coefficient in all the markers ---*/ - CSensitivity = new su2double* [nMarker]; - - for (iMarker = 0; iMarker < nMarker; iMarker++) { - unsigned long nVertex = geometry->nVertex[iMarker]; - CSensitivity[iMarker] = new su2double [nVertex]; - - for (iVertex = 0; iVertex < nVertex; iVertex++) - CSensitivity[iMarker][iVertex] = 0.0; + CSensitivity.resize(nMarker); + for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) { + const auto nVertex = geometry->nVertex[iMarker]; + CSensitivity[iMarker].resize(nVertex, 0.0); } + Sens_Geo.resize(config->GetnMarker_Monitoring(), 0.0); + /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/ nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config); @@ -124,47 +101,41 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di } } -CDiscAdjSolver::~CDiscAdjSolver(void) { - - unsigned short iMarker; - - if (CSensitivity != nullptr) { - for (iMarker = 0; iMarker < nMarker; iMarker++) { - delete [] CSensitivity[iMarker]; - } - delete [] CSensitivity; - } - - delete nodes; -} +CDiscAdjSolver::~CDiscAdjSolver(void) { delete nodes; } void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){ - bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND; - bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; + const bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND; + const bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; unsigned long iPoint; unsigned short iVar; /*--- Reset the solution to the initial (converged) solution ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint)); } + END_SU2_OMP_FOR if (time_n_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { for (iVar = 0; iVar < nVar; iVar++) { AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint)[iVar]); } } + END_SU2_OMP_FOR } if (time_n1_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (iPoint = 0; iPoint < nPoint; iPoint++) { for (iVar = 0; iVar < nVar; iVar++) { AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint)[iVar]); } } + END_SU2_OMP_FOR } /*--- Set the Jacobian to zero since this is not done inside the fluid iteration @@ -178,64 +149,12 @@ void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){ } -void CDiscAdjSolver::SetMesh_Recording(CGeometry** geometry, CVolumetricMovement *grid_movement, CConfig *config) { - - -// bool time_n_needed = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) || -// (config->GetUnsteady_Simulation() == DT_STEPPING_2ND)), -// time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND; - -// unsigned long ExtIter = config->GetExtIter(); - - unsigned long iPoint; - unsigned short iDim; - - /*--- Reset the solution to the initial (converged) position ---*/ - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - for (iDim = 0; iDim < nDim; iDim++){ - geometry[MESH_0]->nodes->SetCoord(iPoint, iDim,nodes->GetGeometry_Direct(iPoint,iDim)); - } - } - - /*--- After moving all nodes, update the dual mesh. Recompute the edges and - dual mesh control volumes in the domain and on the boundaries. ---*/ - - grid_movement->UpdateDualGrid(geometry[MESH_0], config); - - /*--- After updating the dual mesh, compute the grid velocities (only dynamic problems). ---*/ -// if (time_n_needed){ -// geometry[MESH_0]->SetGridVelocity(config, ExtIter); -// } - - /*--- Update the multigrid structure after moving the finest grid, - including computing the grid velocities on the coarser levels. ---*/ - - grid_movement->UpdateMultiGrid(geometry, config); - -// if (time_n_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// for (iVar = 0; iVar < nVar; iVar++){ -// AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint,iVar)); -// } -// } -// } -// if (time_n1_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// for (iVar = 0; iVar < nVar; iVar++){ -// AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint,iVar)); -// } -// } -// } - -} - void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) { - bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND); - bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; - bool input = true; - bool push_index = !config->GetMultizone_Problem(); + const bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND); + const bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed; + const bool input = true; + const bool push_index = !config->GetMultizone_Problem(); /*--- Register solution at all necessary time instances and other variables on the tape ---*/ @@ -250,6 +169,8 @@ void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) { void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset) { + SU2_OMP_MASTER { + /*--- Register farfield values as input ---*/ if((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS && !config->GetBoolTurbomachinery())) { @@ -363,12 +284,16 @@ void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, boo /*--- Here it is possible to register other variables as input that influence the flow solution * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be * extracted in the ExtractAdjointVariables routine. ---*/ + + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CDiscAdjSolver::RegisterOutput(CGeometry *geometry, CConfig *config) { - bool input = false; - bool push_index = !config->GetMultizone_Problem(); + const bool input = false; + const bool push_index = !config->GetMultizone_Problem(); /*--- Register variables as output of the solver iteration ---*/ @@ -383,14 +308,21 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi const su2double relax = (config->GetInnerIter()==0)? 1.0 : config->GetRelaxation_Factor_Adjoint(); + su2double Solution[MAXNVAR] = {0.0}; + /*--- Set Residuals to zero ---*/ SetResToZero(); + su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0}; + const su2double* coordMax[MAXNVAR] = {nullptr}; + unsigned long idxMax[MAXNVAR] = {0}; + /*--- Set the old solution and compute residuals. ---*/ if(!multizone) nodes->Set_OldSolution(); + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { const su2double isdomain = (iPoint < nPointDomain)? 1.0 : 0.0; @@ -413,15 +345,37 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi residual *= isdomain; Residual_RMS[iVar] += pow(residual,2); AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint)); + + /*--- Update residual information for current thread. ---*/ + resRMS[iVar] += residual*residual; + if (fabs(residual) > resMax[iVar]) { + resMax[iVar] = fabs(residual); + idxMax[iVar] = iPoint; + coordMax[iVar] = geometry->nodes->GetCoord(iPoint); + } } } + END_SU2_OMP_FOR + + /*--- Reduce residual information over all threads in this rank. ---*/ + SU2_OMP_CRITICAL + for (auto iVar = 0u; iVar < nVar; iVar++) { + Residual_RMS[iVar] += resRMS[iVar]; + AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]); + } + END_SU2_OMP_CRITICAL + SU2_OMP_BARRIER SetResidual_RMS(geometry, config); - SetIterLinSolver(direct_solver->System.GetIterations()); - SetResLinSolver(direct_solver->System.GetResidual()); + SU2_OMP_MASTER { + SetIterLinSolver(direct_solver->System.GetIterations()); + SetResLinSolver(direct_solver->System.GetResidual()); + } + END_SU2_OMP_MASTER if (time_n_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { /*--- Extract the adjoint solution at time n ---*/ @@ -432,9 +386,11 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi nodes->Set_Solution_time_n(iPoint,Solution); } + END_SU2_OMP_FOR } if (time_n1_needed) { + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { /*--- Extract the adjoint solution at time n-1 ---*/ @@ -445,12 +401,15 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi nodes->Set_Solution_time_n1(iPoint,Solution); } + END_SU2_OMP_FOR } } void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) { + SU2_OMP_MASTER { + /*--- Extract the adjoint values of the farfield values ---*/ if ((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS) && !config->GetBoolTurbomachinery()) { @@ -508,98 +467,25 @@ void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *conf /*--- Extract here the adjoint values of everything else that is registered as input in RegisterInput. ---*/ -} - - -void CDiscAdjSolver::ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) { - -// bool time_n_needed = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) || -// (config->GetUnsteady_Simulation() == DT_STEPPING_2ND)); - -// bool time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND; - -// unsigned short iVar; - unsigned long iPoint; - - /*--- Set Residuals to zero ---*/ - -// for (iVar = 0; iVar < nVar; iVar++){ -// SetRes_RMS(iVar,0.0); -// SetRes_Max(iVar,0.0,0); -// } - - /*--- Set the old solution ---*/ - - nodes->Set_OldSolution_Geometry(); - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - - /*--- Extract the adjoint solution ---*/ - - if (config->GetMultizone_Problem()) - geometry->nodes->GetAdjointCoord_LocalIndex(iPoint, Solution_Geometry); - else - geometry->nodes->GetAdjointCoord(iPoint, Solution_Geometry); - - /*--- Store the adjoint solution ---*/ - - nodes->SetSolution_Geometry(iPoint,Solution_Geometry); - - } - -// if (time_n_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// -// /*--- Extract the adjoint solution at time n ---*/ -// -// direct_solver->GetNodes()->GetAdjointSolution_time_n(iPoint,Solution); -// -// /*--- Store the adjoint solution at time n ---*/ -// -// nodes->Set_Solution_time_n(iPoint,Solution); -// } -// } -// if (time_n1_needed){ -// for (iPoint = 0; iPoint < nPoint; iPoint++){ -// -// /*--- Extract the adjoint solution at time n-1 ---*/ -// -// direct_solver->GetNodes()->GetAdjointSolution_time_n1(iPoint,Solution); -// -// /*--- Store the adjoint solution at time n-1 ---*/ -// -// nodes->Set_Solution_time_n1(iPoint,Solution); -// } -// } - - /*--- Set the residuals ---*/ - -// for (iPoint = 0; iPoint < nPointDomain; iPoint++){ -// for (iVar = 0; iVar < nVar; iVar++){ -// residual = node[iPoint]->GetSolution_Geometry(iVar) - node[iPoint]->Get_OldSolution_Geometry(iVar); -// -// Residual_RMS[iVar] += residual*residual; -// AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint)); -// } -// } -// -// SetResidual_RMS(geometry, config); + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { - bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST || - config->GetTime_Marching() == DT_STEPPING_2ND); + const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST || + config->GetTime_Marching() == DT_STEPPING_2ND); - unsigned short iVar; - unsigned long iPoint; + su2double Solution[MAXNVAR] = {0.0}; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (iVar = 0; iVar < nVar; iVar++) { + SU2_OMP_FOR_STAT(omp_chunk_size) + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { + for (auto iVar = 0u; iVar < nVar; iVar++) { Solution[iVar] = nodes->GetSolution(iPoint,iVar); } if (dual_time) { - for (iVar = 0; iVar < nVar; iVar++) { + for (auto iVar = 0u; iVar < nVar; iVar++) { Solution[iVar] += nodes->GetDual_Time_Derivative(iPoint,iVar); } } @@ -610,45 +496,22 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution); } } -} - -void CDiscAdjSolver::SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config){ - -// bool dual_time = (config->GetUnsteady_Simulation() == DT_STEPPING_1ST || -// config->GetUnsteady_Simulation() == DT_STEPPING_2ND); - - unsigned short iDim; - unsigned long iPoint; - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - for (iDim = 0; iDim < nDim; iDim++){ - Solution_Geometry[iDim] = 0.0; - } -// if (dual_time){ -// for (iDim = 0; iDim < nVar; iDim++){ -// Solution_Geometry[iDim] += nodes->GetDual_Time_Derivative_Geometry(iPoint,iDim); -// } -// } - for (iDim = 0; iDim < nDim; iDim++){ - nodes->SetSensitivity(iPoint,iDim, Solution_Geometry[iDim]); - } - geometry->nodes->SetAdjointCoord(iPoint, Solution_Geometry); - } - + END_SU2_OMP_FOR } void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) { - unsigned long iPoint; - unsigned short iDim; - su2double *Coord, Sensitivity, eps; + const bool time_stepping = (config->GetTime_Marching() != STEADY); + const su2double eps = config->GetVenkat_LimiterCoeff()*config->GetAdjSharp_LimiterCoeff(); - bool time_stepping = (config->GetTime_Marching() != STEADY); + SU2_OMP_FOR_STAT(omp_chunk_size) + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { - for (iPoint = 0; iPoint < nPoint; iPoint++) { - Coord = geometry->nodes->GetCoord(iPoint); + auto Coord = geometry->nodes->GetCoord(iPoint); + + for (auto iDim = 0u; iDim < nDim; iDim++) { - for (iDim = 0; iDim < nDim; iDim++) { + su2double Sensitivity = 0.0; if(config->GetMultizone_Problem()) { Sensitivity = geometry->nodes->GetAdjointSolution(iPoint, iDim); @@ -663,119 +526,100 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve /*--- If sharp edge, set the sensitivity to 0 on that region ---*/ - if (config->GetSens_Remove_Sharp()) { - eps = config->GetVenkat_LimiterCoeff()*config->GetRefElemLength(); - if ( geometry->nodes->GetSharpEdge_Distance(iPoint) < config->GetAdjSharp_LimiterCoeff()*eps ) - Sensitivity = 0.0; + if (config->GetSens_Remove_Sharp() && geometry->nodes->GetSharpEdge_Distance(iPoint) < eps) { + Sensitivity = 0.0; } + if (!time_stepping) { nodes->SetSensitivity(iPoint,iDim, Sensitivity); } else { - nodes->SetSensitivity(iPoint, iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity); + nodes->SetSensitivity(iPoint,iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity); } } } + END_SU2_OMP_FOR + SetSurface_Sensitivity(geometry, config); + } void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) { - unsigned short iMarker, iDim, iMarker_Monitoring; - unsigned long iVertex, iPoint; - su2double *Normal, Prod, Sens = 0.0, SensDim, Area, Sens_Vertex, *Sens_Geo; - Total_Sens_Geo = 0.0; - string Monitoring_Tag, Marker_Tag; - Sens_Geo = new su2double[config->GetnMarker_Monitoring()]; - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - Sens_Geo[iMarker_Monitoring] = 0.0; - } + SU2_OMP_MASTER + for (auto& x : Sens_Geo) x = 0.0; + END_SU2_OMP_MASTER - for (iMarker = 0; iMarker < nMarker; iMarker++) { + /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/ - /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/ + for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) { - if(config->GetSolid_Wall(iMarker)) { + if (!config->GetSolid_Wall(iMarker)) continue; - Sens = 0.0; + su2double Sens = 0.0; - for (iVertex = 0; iVertex < geometry->GetnVertex(iMarker); iVertex++) { + SU2_OMP_FOR_STAT(OMP_MIN_SIZE) + for (auto iVertex = 0ul; iVertex < geometry->GetnVertex(iMarker); iVertex++) { - iPoint = geometry->vertex[iMarker][iVertex]->GetNode(); - Normal = geometry->vertex[iMarker][iVertex]->GetNormal(); - Prod = 0.0; - for (iDim = 0; iDim < nDim; iDim++) { - /*--- retrieve the gradient calculated with AD -- */ - SensDim = nodes->GetSensitivity(iPoint,iDim); + /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/ - /*--- calculate scalar product for projection onto the normal vector ---*/ - Prod += Normal[iDim]*SensDim; - - } - - Area = GeometryToolbox::Norm(nDim, Normal); + const auto iPoint = geometry->vertex[iMarker][iVertex]->GetNode(); + const auto Normal = geometry->vertex[iMarker][iVertex]->GetNormal(); + su2double Sens_Vertex = 0.0; + for (auto iDim = 0u; iDim < nDim; iDim++) { + Sens_Vertex += Normal[iDim] * nodes->GetSensitivity(iPoint,iDim); + } + Sens_Vertex /= GeometryToolbox::Norm(nDim, Normal); - /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/ + CSensitivity[iMarker][iVertex] = -Sens_Vertex; + Sens += pow(Sens_Vertex,2); + } + END_SU2_OMP_FOR - Sens_Vertex = Prod/Area; - CSensitivity[iMarker][iVertex] = -Sens_Vertex; - Sens += Sens_Vertex*Sens_Vertex; - } + if (config->GetMarker_All_Monitoring(iMarker) == NO) continue; - if (config->GetMarker_All_Monitoring(iMarker) == YES){ + /*--- Compute sensitivity for each surface point ---*/ - /*--- Compute sensitivity for each surface point ---*/ + const auto Marker_Tag = config->GetMarker_All_TagBound(iMarker); - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - Monitoring_Tag = config->GetMarker_Monitoring_TagBound(iMarker_Monitoring); - Marker_Tag = config->GetMarker_All_TagBound(iMarker); - if (Marker_Tag == Monitoring_Tag) { - Sens_Geo[iMarker_Monitoring] = Sens; - } - } + for (size_t iMarker_Mon = 0; iMarker_Mon < Sens_Geo.size(); iMarker_Mon++) { + if (Marker_Tag == config->GetMarker_Monitoring_TagBound(iMarker_Mon)) { + atomicAdd(Sens_Geo[iMarker_Mon], Sens); + break; } } } -#ifdef HAVE_MPI - su2double *MySens_Geo; - MySens_Geo = new su2double[config->GetnMarker_Monitoring()]; + SU2_OMP_MASTER { + auto local = Sens_Geo; + SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - MySens_Geo[iMarker_Monitoring] = Sens_Geo[iMarker_Monitoring]; - Sens_Geo[iMarker_Monitoring] = 0.0; + Total_Sens_Geo = 0.0; + for (auto x : Sens_Geo) Total_Sens_Geo += x; } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER - SU2_MPI::Allreduce(MySens_Geo, Sens_Geo, config->GetnMarker_Monitoring(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - delete [] MySens_Geo; -#endif +} - for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) { - Sens_Geo[iMarker_Monitoring] = sqrt(Sens_Geo[iMarker_Monitoring]); - Total_Sens_Geo += Sens_Geo[iMarker_Monitoring]; - } +void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh, + unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { - delete [] Sens_Geo; + const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST) || (config->GetTime_Marching() == DT_STEPPING_2ND); -} + if (!dual_time) return; -void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) { - bool dual_time_1st = (config_container->GetTime_Marching() == DT_STEPPING_1ST); - bool dual_time_2nd = (config_container->GetTime_Marching() == DT_STEPPING_2ND); - bool dual_time = (dual_time_1st || dual_time_2nd); - su2double *solution_n, *solution_n1; - unsigned long iPoint; - unsigned short iVar; - if (dual_time) { - for (iPoint = 0; iPointGetnPoint(); iPoint++) { - solution_n = nodes->GetSolution_time_n(iPoint); - solution_n1 = nodes->GetSolution_time_n1(iPoint); - for (iVar=0; iVar < nVar; iVar++) { - nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar)); - nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]); - } + SU2_OMP_FOR_STAT(omp_chunk_size) + for (auto iPoint = 0ul; iPointGetnPoint(); iPoint++) { + const auto solution_n = nodes->GetSolution_time_n(iPoint); + const auto solution_n1 = nodes->GetSolution_time_n1(iPoint); + + for (auto iVar = 0u; iVar < nVar; iVar++) { + nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar)); + nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]); } } + END_SU2_OMP_FOR } void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { @@ -812,7 +656,6 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi /*--- Read all lines in the restart file ---*/ long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0; - unsigned short rbuf_NotMatching = 0, sbuf_NotMatching = 0; /*--- Skip coordinates ---*/ unsigned short skipVars = geometry[MESH_0]->GetnDim(); @@ -862,11 +705,7 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi /*--- Detect a wrong solution file ---*/ - if (iPoint_Global_Local < nPointDomain) { sbuf_NotMatching = 1; } - - SU2_MPI::Allreduce(&sbuf_NotMatching, &rbuf_NotMatching, 1, MPI_UNSIGNED_SHORT, MPI_SUM, SU2_MPI::GetComm()); - - if (rbuf_NotMatching != 0) { + if (iPoint_Global_Local != nPointDomain) { SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); } diff --git a/SU2_CFD/src/variables/CVariable.cpp b/SU2_CFD/src/variables/CVariable.cpp index 5d16271961d..7da4faee991 100644 --- a/SU2_CFD/src/variables/CVariable.cpp +++ b/SU2_CFD/src/variables/CVariable.cpp @@ -113,6 +113,7 @@ void CVariable::Restore_BGSSolution_k() { void CVariable::SetExternalZero() { parallelSet(External.size(), 0.0, External.data()); } void CVariable::RegisterSolution(bool input, bool push_index) { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint) { for(unsigned long iVar=0; iVar Date: Fri, 19 Mar 2021 10:56:10 +0000 Subject: [PATCH 30/57] fixes --- SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 78 +++++++++++--------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index a2e305aa621..2300e521a71 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -79,6 +79,8 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/ + su2double Solution[MAXNVAR] = {1e-16}; + nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config); SetBaseClassPointerToNodes(); @@ -624,20 +626,12 @@ void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_contain void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { - unsigned short iVar, iMesh; - unsigned long iPoint, index, iChildren, Point_Fine, counter; - su2double Area_Children, Area_Parent, *Solution_Fine; - string restart_filename, filename; - - bool compressible = (config->GetKind_Regime() == COMPRESSIBLE); - bool incompressible = (config->GetKind_Regime() == INCOMPRESSIBLE); - bool rans = ((config->GetKind_Solver() == DISC_ADJ_RANS) || (config->GetKind_Solver() == DISC_ADJ_INC_RANS)) ; + const bool rans = (config->GetKind_Turb_Model() != NONE); /*--- Restart the solution from file information ---*/ - filename = config->GetSolution_AdjFileName(); - restart_filename = config->GetObjFunc_Extension(filename); - + auto filename = config->GetSolution_AdjFileName(); + auto restart_filename = config->GetObjFunc_Extension(filename); restart_filename = config->GetFilename(restart_filename, "", val_iter); @@ -653,52 +647,43 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename); } - /*--- Read all lines in the restart file ---*/ - - long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0; - /*--- Skip coordinates ---*/ unsigned short skipVars = geometry[MESH_0]->GetnDim(); /*--- Skip flow adjoint variables ---*/ if (KindDirect_Solver== RUNTIME_TURB_SYS) { - if (compressible) { - skipVars += nDim + 2; - } - if (incompressible) { - skipVars += nDim + 2; - } + skipVars += nDim + 2; } /*--- Skip flow adjoint and turbulent variables ---*/ if (KindDirect_Solver == RUNTIME_RADIATION_SYS) { - if (compressible) skipVars += nDim + 2; - if (incompressible) skipVars += nDim + 2; + skipVars += nDim + 2; if (rans) skipVars += solver[MESH_0][TURB_SOL]->GetnVar(); } /*--- Load data from the restart into correct containers. ---*/ - counter = 0; - for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) { + unsigned long iPoint_Global_Local = 0; + + for (auto iPoint_Global = 0ul; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) { /*--- Retrieve local index. If this node from the restart file lives on the current processor, we will load and instantiate the vars. ---*/ - iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global); + const auto iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global); if (iPoint_Local > -1) { /*--- We need to store this point's data, so jump to the correct offset in the buffer of data from the restart file and load it. ---*/ - index = counter*Restart_Vars[1] + skipVars; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar]; - nodes->SetSolution(iPoint_Local,Solution); - iPoint_Global_Local++; + const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars; - /*--- Increment the overall counter for how many points have been loaded. ---*/ - counter++; + for (auto iVar = 0u; iVar < nVar; iVar++) { + nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]); + } + + iPoint_Global_Local++; } } @@ -710,20 +695,21 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); } - /*--- Communicate the loaded solution on the fine grid before we transfer - it down to the coarse levels. ---*/ - - for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) { - for (iPoint = 0; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) { - Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint); - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 0.0; - for (iChildren = 0; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) { - Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren); - Area_Children = geometry[iMesh-1]->nodes->GetVolume(Point_Fine); - Solution_Fine = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution(Point_Fine); - for (iVar = 0; iVar < nVar; iVar++) { - Solution[iVar] += Solution_Fine[iVar]*Area_Children/Area_Parent; - } + /*--- Interpolate solution on coarse grids ---*/ + + for (auto iMesh = 1u; iMesh <= config->GetnMGLevels(); iMesh++) { + + const auto& fineSol = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution(); + + for (auto iPoint = 0ul; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) { + su2double Solution[MAXNVAR] = {0.0}; + const su2double Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint); + + for (auto iChildren = 0u; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) { + const auto Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren); + const su2double weight = geometry[iMesh-1]->nodes->GetVolume(Point_Fine) / Area_Parent; + + for (auto iVar = 0u; iVar < nVar; iVar++) Solution[iVar] += weight * fineSol(Point_Fine, iVar); } solver[iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution(iPoint, Solution); } From a7fbcd648a4bf8f15e86881f81b4af731afd5bb4 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 11:02:42 +0000 Subject: [PATCH 31/57] more year updates --- Common/include/code_config.hpp | 2 +- .../include/parallelization/omp_structure.cpp | 2 +- Common/lib/Makefile.am | 2 +- .../CMMSIncEulerSolution.py | 18 ++++-------------- .../CreateMMSSourceTerms/CMMSIncNSSolution.py | 18 ++++-------------- SU2_CFD/obj/Makefile.am | 2 +- 6 files changed, 12 insertions(+), 32 deletions(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index a9aabf17bca..3cbad21f08f 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -9,7 +9,7 @@ * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * - * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) + * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) * * SU2 is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp index 0daca1ca021..6432b6bb482 100644 --- a/Common/include/parallelization/omp_structure.cpp +++ b/Common/include/parallelization/omp_structure.cpp @@ -10,7 +10,7 @@ * The SU2 Project is maintained by the SU2 Foundation * (http://su2foundation.org) * - * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) + * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) * * SU2 is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public diff --git a/Common/lib/Makefile.am b/Common/lib/Makefile.am index 2e698b72336..813299d0f77 100644 --- a/Common/lib/Makefile.am +++ b/Common/lib/Makefile.am @@ -10,7 +10,7 @@ # The SU2 Project is maintained by the SU2 Foundation # (http://su2foundation.org) # -# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py index cbf999f412e..1d4d187fb83 100755 --- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py +++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py @@ -6,22 +6,12 @@ # \author T. Economon # \version 7.1.1 "Blackbird" # -# The current SU2 release has been coordinated by the -# SU2 International Developers Society -# with selected contributions from the open-source community. +# SU2 Project Website: https://su2code.github.io # -# The main research teams contributing to the current release are: -# - Prof. Juan J. Alonso's group at Stanford University. -# - Prof. Piero Colonna's group at Delft University of Technology. -# - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology. -# - Prof. Alberto Guardone's group at Polytechnic University of Milan. -# - Prof. Rafael Palacios' group at Imperial College London. -# - Prof. Vincent Terrapon's group at the University of Liege. -# - Prof. Edwin van der Weide's group at the University of Twente. -# - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics. +# The SU2 Project is maintained by the SU2 Foundation +# (http://su2foundation.org) # -# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon, -# Tim Albring, and the SU2 contributors. +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py index 614c458c103..c38335336aa 100755 --- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py +++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py @@ -6,22 +6,12 @@ # \author T. Economon # \version 7.1.1 "Blackbird" # -# The current SU2 release has been coordinated by the -# SU2 International Developers Society -# with selected contributions from the open-source community. +# SU2 Project Website: https://su2code.github.io # -# The main research teams contributing to the current release are: -# - Prof. Juan J. Alonso's group at Stanford University. -# - Prof. Piero Colonna's group at Delft University of Technology. -# - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology. -# - Prof. Alberto Guardone's group at Polytechnic University of Milan. -# - Prof. Rafael Palacios' group at Imperial College London. -# - Prof. Vincent Terrapon's group at the University of Liege. -# - Prof. Edwin van der Weide's group at the University of Twente. -# - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics. +# The SU2 Project is maintained by the SU2 Foundation +# (http://su2foundation.org) # -# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon, -# Tim Albring, and the SU2 contributors. +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public diff --git a/SU2_CFD/obj/Makefile.am b/SU2_CFD/obj/Makefile.am index 30e7636a0e5..054df6fa267 100644 --- a/SU2_CFD/obj/Makefile.am +++ b/SU2_CFD/obj/Makefile.am @@ -10,7 +10,7 @@ # The SU2 Project is maintained by the SU2 Foundation # (http://su2foundation.org) # -# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md) +# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md) # # SU2 is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public From 2776775bb64c68ffdc3ad935beac5de338a19627 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 11:35:25 +0000 Subject: [PATCH 32/57] dead code --- .../iteration/CDiscAdjFEAIteration.hpp | 20 ----- SU2_CFD/include/solvers/CDiscAdjSolver.hpp | 7 -- .../src/iteration/CDiscAdjFEAIteration.cpp | 81 ------------------- .../src/iteration/CDiscAdjFluidIteration.cpp | 4 - SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 2 - 5 files changed, 114 deletions(-) diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp index 82b2485ffef..d17797c4f41 100644 --- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp @@ -180,26 +180,6 @@ class CDiscAdjFEAIteration : public CIteration { void InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst) override; - /*! - * \brief Record a single iteration of the direct FEM system. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - * \param[in] kind_recording - The kind of recording (geometry or flow). - */ - void SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst, unsigned short kind_recording); - /*! * \brief Record a single iteration of the direct FEM system. * \param[in] solver - Container vector with all the solutions. diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp index 7c68619a8d8..d7f94a6d316 100644 --- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp @@ -74,13 +74,6 @@ class CDiscAdjSolver final : public CSolver { */ CDiscAdjSolver() = default; - /*! - * \overload - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - */ - CDiscAdjSolver(CGeometry *geometry, CConfig *config); - /*! * \overload * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp index 1d2af031c25..fa753a598a7 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp @@ -176,87 +176,6 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration } } -void CDiscAdjFEAIteration::SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst, - unsigned short kind_recording) { - unsigned long InnerIter = config[ZONE_0]->GetInnerIter(); - unsigned long TimeIter = config[val_iZone]->GetTimeIter(), DirectTimeIter; - bool dynamic = (config[val_iZone]->GetTime_Domain()); - - DirectTimeIter = 0; - if (dynamic) { - DirectTimeIter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1; - } - - /*--- Reset the tape ---*/ - - AD::Reset(); - - /*--- We only need to reset the indices if the current recording is different from the recording we want to have ---*/ - - if (CurrentRecording != kind_recording && (CurrentRecording != NONE)) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0], - config[val_iZone]); - - /*--- Clear indices of coupling variables ---*/ - - SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, SOLUTION_AND_MESH); - - /*--- Run one iteration while tape is passive - this clears all indices ---*/ - - fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement, - FFDBox, val_iZone, val_iInst); - } - - /*--- Prepare for recording ---*/ - - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0], - config[val_iZone]); - - /*--- Start the recording of all operations ---*/ - - AD::StartRecording(); - - /*--- Register FEA variables ---*/ - - RegisterInput(solver, geometry, config, val_iZone, val_iInst, kind_recording); - - /*--- Compute coupling or update the geometry ---*/ - - SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, kind_recording); - - /*--- Set the correct direct iteration number ---*/ - - if (dynamic) { - config[val_iZone]->SetTimeIter(DirectTimeIter); - } - - /*--- Run the direct iteration ---*/ - - fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement, - FFDBox, val_iZone, val_iInst); - - config[val_iZone]->SetTimeIter(TimeIter); - - /*--- Register structural variables and objective function as output ---*/ - - RegisterOutput(solver, geometry, config, val_iZone, val_iInst); - - /*--- Stop the recording ---*/ - - AD::StopRecording(); - - /*--- Set the recording status ---*/ - - CurrentRecording = kind_recording; - - /* --- Reset the number of the internal iterations---*/ - - config[ZONE_0]->SetInnerIter(InnerIter); -} - void CDiscAdjFEAIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) { diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 59b212c8b9e..5eb54162866 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -443,10 +443,6 @@ void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geo /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/ - if (solver[iZone][iInst][MESH_0][ADJFEA_SOL]) { - solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[iZone][iInst][MESH_0], config[iZone]); - } - for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { solver[iZone][iInst][iMesh][ADJFLOW_SOL]->SetRecording(geometry[iZone][iInst][iMesh], config[iZone]); } diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index 09fd7c2cabf..f96765a9216 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -29,8 +29,6 @@ #include "../../../Common/include/toolboxes/geometry_toolbox.hpp" #include "../../../Common/include/parallelization/omp_structure.hpp" -CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config) : CSolver() {} - CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { adjoint = true; From 74f20c479afc301a6ca090b8f188e2e986f1c18e Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 12:02:41 +0000 Subject: [PATCH 33/57] more cleanup --- .../iteration/CDiscAdjFEAIteration.hpp | 26 ++-- .../iteration/CDiscAdjFluidIteration.hpp | 45 ++---- .../iteration/CDiscAdjHeatIteration.hpp | 51 ++----- SU2_CFD/include/iteration/CIteration.hpp | 6 - SU2_CFD/include/solvers/CDiscAdjSolver.hpp | 8 ++ .../src/iteration/CDiscAdjFluidIteration.cpp | 135 +++++++++--------- .../src/iteration/CDiscAdjHeatIteration.cpp | 10 -- 7 files changed, 118 insertions(+), 163 deletions(-) diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp index d17797c4f41..c613d449bfc 100644 --- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp @@ -37,11 +37,23 @@ class CFEAIteration; * \brief Class for driving an iteration of the discrete adjoint FEM system. * \author R. Sanchez */ -class CDiscAdjFEAIteration : public CIteration { +class CDiscAdjFEAIteration final : public CIteration { private: CFEAIteration* fem_iteration; /*!< \brief Pointer to the primal iteration class. */ unsigned short CurrentRecording; /*!< \brief Stores the current status of the recording. */ + /*! + * \brief load solution for dynamic problems + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] solver - Container vector with all the solutions. + * \param[in] config - Definition of the particular problem. + * \param[in] val_iZone - Index of the zone. + * \param[in] val_iInst - Index of the instance. + * \param[in] val_DirectIter - Direct iteration to load. + */ + void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, + unsigned short val_iInst, int val_DirectIter); + public: /*! * \brief Constructor of the class. @@ -189,7 +201,6 @@ class CDiscAdjFEAIteration : public CIteration { * \param[in] val_iInst - Index of the instance. * \param[in] kind_recording - The kind of recording (geometry or flow). */ - void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) override; @@ -206,15 +217,4 @@ class CDiscAdjFEAIteration : public CIteration { void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override; - /*! - * \brief load solution for dynamic problems - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - * \param[in] val_DirectIter - Direct iteration to load. - */ - void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, - unsigned short val_iInst, int val_DirectIter) override; }; diff --git a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp index 93d1a9d2052..8647f709285 100644 --- a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp @@ -37,10 +37,22 @@ class CFluidIteration; * \brief Class for driving an iteration of the discrete adjoint fluid system. * \author T. Economon */ -class CDiscAdjFluidIteration : public CIteration { +class CDiscAdjFluidIteration final : public CIteration { private: const bool turbulent; /*!< \brief Stores the turbulent flag. */ + /*! + * \brief load unsteady solution for unsteady problems + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] solver - Container vector with all the solutions. + * \param[in] config - Definition of the particular problem. + * \param[in] val_iZone - Index of the zone. + * \param[in] val_iInst - Index of the instance. + * \param[in] val_DirectIter - Direct iteration to load. + */ + void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, + unsigned short val_iInst, int val_DirectIter); + public: /*! * \brief Constructor of the class. @@ -126,25 +138,6 @@ class CDiscAdjFluidIteration : public CIteration { CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) override; - /*! - * \brief Postprocess the discrete adjoint fluid iteration. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - */ - void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) override; - /*! * \brief Registers all input variables of the fluid iteration. * \param[in] solver - Container vector with all the solutions. @@ -188,7 +181,6 @@ class CDiscAdjFluidIteration : public CIteration { * \param[in] val_iInst - Index of the instance. * \param[in] kind_recording - The kind of recording (geometry or flow). */ - void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) override; @@ -205,15 +197,4 @@ class CDiscAdjFluidIteration : public CIteration { void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override; - /*! - * \brief load unsteady solution for unsteady problems - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - * \param[in] val_DirectIter - Direct iteration to load. - */ - void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, - unsigned short val_iInst, int val_DirectIter) override; }; diff --git a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp index 8c69d1162f8..ce981317897 100644 --- a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp @@ -35,7 +35,20 @@ * \brief Class for driving an iteration of the discrete adjoint heat equation. * \author O. Burghardt */ -class CDiscAdjHeatIteration : public CIteration { +class CDiscAdjHeatIteration final : public CIteration { + + /*! + * \brief load unsteady solution for unsteady problems + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] solver - Container vector with all the solutions. + * \param[in] config - Definition of the particular problem. + * \param[in] val_iZone - Index of the zone. + * \param[in] val_iInst - Index of the instance layer. + * \param[in] val_DirectIter - Direct iteration to load. + */ + void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, + unsigned short val_iInst, int val_DirectIter); + public: /*! * \brief Constructor of the class. @@ -108,31 +121,6 @@ class CDiscAdjHeatIteration : public CIteration { CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) override; - /*! - * \brief Outputs desired files and quantities for the discrete adjoint fluid system. - */ - void Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned long InnerIter, - bool StopCalc, unsigned short val_iZone, unsigned short val_iInst); - - /*! - * \brief Perform a single iteration of the adjoint fluid system. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance layer. - */ - void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) override; - /*! * \brief Registers all input variables of the fluid iteration. * \param[in] solver - Container vector with all the solutions. @@ -180,15 +168,4 @@ class CDiscAdjHeatIteration : public CIteration { void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override; - /*! - * \brief load unsteady solution for unsteady problems - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance layer. - * \param[in] val_DirectIter - Direct iteration to load. - */ - void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone, - unsigned short val_iInst, int val_DirectIter) override; }; diff --git a/SU2_CFD/include/iteration/CIteration.hpp b/SU2_CFD/include/iteration/CIteration.hpp index 961fdb9ed6a..05947c02402 100644 --- a/SU2_CFD/include/iteration/CIteration.hpp +++ b/SU2_CFD/include/iteration/CIteration.hpp @@ -280,12 +280,6 @@ class CIteration { virtual void RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output, unsigned short iZone, unsigned short iInst) {} - virtual void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {} - - virtual void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {} - virtual void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone, unsigned short val_iInst, unsigned short kind_recording) {} }; diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp index d7f94a6d316..cac68c1ab93 100644 --- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp @@ -257,4 +257,12 @@ class CDiscAdjSolver final : public CSolver { int val_iter, bool val_update_geo) override; + /*! + * \brief Depends on the direct solver. + */ + inline bool GetHasHybridParallel() const override { + if (direct_solver) return direct_solver->GetHasHybridParallel(); + return false; + } + }; diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 5eb54162866..5e09591f572 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -44,14 +44,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr bool heat = config[val_iZone]->GetWeakly_Coupled_Heat(); bool grid_IsMoving = config[val_iZone]->GetGrid_Movement(); + auto solvers0 = solver[val_iZone][val_iInst][MESH_0]; + // /*--- Read the target pressure for inverse design. ---------------------------------------------*/ // if (config[val_iZone]->GetInvDesign_Cp() == YES) - // output->SetCp_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL], + // output->SetCp_InverseDesign(solvers0[FLOW_SOL], // geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter); // /*--- Read the target heat flux ----------------------------------------------------------------*/ // if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES) - // output->SetHeatFlux_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL], + // output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL], // geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter); /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/ @@ -73,15 +75,17 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Push solution back to correct array ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(); - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1(); + auto solvers = solver[val_iZone][val_iInst][iMesh]; + + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(); - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1(); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1(); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(); - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1(); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1(); } if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(); @@ -96,12 +100,14 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Push solution back to correct array ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(); + auto solvers = solver[val_iZone][val_iInst][iMesh]; + + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(); } if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(); @@ -114,7 +120,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter); if (config[val_iZone]->GetDeform_Mesh()) { - solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart( + solvers0[MESH_SOL]->LoadRestart( geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true); } @@ -127,7 +133,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr ---*/ if (config[val_iZone]->GetDeform_Mesh()) { - solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart( + solvers0[MESH_SOL]->LoadRestart( geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true); } @@ -141,12 +147,14 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Temporarily store the loaded solution in the Solution_Old array ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Set_OldSolution(); + auto solvers = solver[val_iZone][val_iInst][iMesh]; + + solvers[FLOW_SOL]->Set_OldSolution(); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Set_OldSolution(); + solvers[TURB_SOL]->Set_OldSolution(); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Set_OldSolution(); + solvers[HEAT_SOL]->Set_OldSolution(); } if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_Old(); @@ -156,42 +164,46 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Set Solution at timestep n to solution at n-1 ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->SetSolution( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint)); + solvers[FLOW_SOL]->GetNodes()->SetSolution( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint)); if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord( iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->SetSolution( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint)); + solvers[TURB_SOL]->GetNodes()->SetSolution( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->SetSolution( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint)); + solvers[HEAT_SOL]->GetNodes()->SetSolution( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint)); } } } if (dual_time_1st) { /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n( iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); } } } @@ -199,41 +211,45 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr if (dual_time_2nd) { /*--- Set Solution at timestep n-1 to solution at n-2 ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n( iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n1(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); } } } /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/ for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1( + iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1( iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1( - iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1( + iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint)); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1( - iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); + solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1( + iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint)); } } } @@ -252,48 +268,42 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr if (TimeIter == 0 || dual_time) { for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[val_iZone][val_iInst][iMesh]; for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution(iPoint)); + solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint)); } } if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) { for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->GetNodes()->GetSolution(iPoint)); + solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint)); } } if (heat) { for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->GetNodes()->GetSolution(iPoint)); + solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint)); } } if (config[val_iZone]->AddRadiation()) { for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][RAD_SOL]->GetNodes()->GetSolution(iPoint)); + solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint)); } } } - solver[val_iZone][val_iInst][MESH_0][ADJFLOW_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJFLOW_SYS, false); + solvers0[ADJFLOW_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + MESH_0, 0, RUNTIME_ADJFLOW_SYS, false); + if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) { - solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJTURB_SYS, false); + solvers0[ADJTURB_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + MESH_0, 0, RUNTIME_ADJTURB_SYS, false); } if (heat) { - solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJHEAT_SYS, false); + solvers0[ADJHEAT_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + MESH_0, 0, RUNTIME_ADJHEAT_SYS, false); } if (config[val_iZone]->AddRadiation()) { - solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJRAD_SYS, false); + solvers0[ADJRAD_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + MESH_0, 0, RUNTIME_ADJRAD_SYS, false); } } @@ -557,8 +567,3 @@ bool CDiscAdjFluidIteration::Monitor(COutput* output, CIntegration**** integrati return output->GetConvergence(); } -void CDiscAdjFluidIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) {} diff --git a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp index 4258a631a52..8c5ff48afa8 100644 --- a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp @@ -258,13 +258,3 @@ bool CDiscAdjHeatIteration::Monitor(COutput* output, CIntegration**** integratio return output->GetConvergence(); } - -void CDiscAdjHeatIteration::Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned long InnerIter, bool StopCalc, unsigned short val_iZone, - unsigned short val_iInst) {} - -void CDiscAdjHeatIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { -} From 63003eea499ce3f480b699dad4d7d823f5df3ff7 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 12:24:54 +0000 Subject: [PATCH 34/57] val_ never made anything better, parallel dependencies, fix adjoint residual calc --- .../src/iteration/CDiscAdjFluidIteration.cpp | 225 +++++++++--------- SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 20 +- 2 files changed, 124 insertions(+), 121 deletions(-) diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 5e09591f572..192bc5c7a4d 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -31,35 +31,35 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { + CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { StartTime = SU2_MPI::Wtime(); unsigned long iPoint; - unsigned short TimeIter = config[val_iZone]->GetTimeIter(); - bool dual_time_1st = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST); - bool dual_time_2nd = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND); + unsigned short TimeIter = config[iZone]->GetTimeIter(); + bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST); + bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND); bool dual_time = (dual_time_1st || dual_time_2nd); unsigned short iMesh; int Direct_Iter; - bool heat = config[val_iZone]->GetWeakly_Coupled_Heat(); - bool grid_IsMoving = config[val_iZone]->GetGrid_Movement(); + bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + bool grid_IsMoving = config[iZone]->GetGrid_Movement(); - auto solvers0 = solver[val_iZone][val_iInst][MESH_0]; + auto solvers0 = solver[iZone][iInst][MESH_0]; // /*--- Read the target pressure for inverse design. ---------------------------------------------*/ - // if (config[val_iZone]->GetInvDesign_Cp() == YES) + // if (config[iZone]->GetInvDesign_Cp() == YES) // output->SetCp_InverseDesign(solvers0[FLOW_SOL], - // geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter); + // geometry[iZone][iInst][MESH_0], config[iZone], ExtIter); // /*--- Read the target heat flux ----------------------------------------------------------------*/ // if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES) // output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL], - // geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter); + // geometry[iZone][iInst][MESH_0], config[iZone], ExtIter); /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/ - if (config[val_iZone]->GetTime_Marching()) { - Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2; + if (config[iZone]->GetTime_Marching()) { + Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2; /*--- For dual-time stepping we want to load the already converged solution at timestep n ---*/ @@ -70,12 +70,12 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr if (TimeIter == 0) { if (dual_time_2nd) { /*--- Load solution at timestep n-2 ---*/ - LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 2); + LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 2); /*--- Push solution back to correct array ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(); @@ -88,19 +88,19 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1(); } if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(); - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1(); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1(); } } } if (dual_time) { /*--- Load solution at timestep n-1 ---*/ - LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 1); + LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 1); /*--- Push solution back to correct array ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); if (turbulent) { @@ -110,18 +110,18 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(); } if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(); } } } /*--- Load solution timestep n ---*/ - LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter); + LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter); - if (config[val_iZone]->GetDeform_Mesh()) { + if (config[iZone]->GetDeform_Mesh()) { solvers0[MESH_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true); + geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true); } } else if ((TimeIter > 0) && dual_time) { @@ -132,22 +132,22 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr Afterwards the GridVelocity is computed based on the Coordinates. ---*/ - if (config[val_iZone]->GetDeform_Mesh()) { + if (config[iZone]->GetDeform_Mesh()) { solvers0[MESH_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true); + geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true); } /*--- Load solution timestep n-1 | n-2 for DualTimestepping 1st | 2nd order ---*/ if (dual_time_1st) { - LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 1); + LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 1); } else { - LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 2); + LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 2); } /*--- Temporarily store the loaded solution in the Solution_Old array ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; solvers[FLOW_SOL]->Set_OldSolution(); if (turbulent) { @@ -157,22 +157,22 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr solvers[HEAT_SOL]->Set_OldSolution(); } if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_Old(); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_Old(); } } /*--- Set Solution at timestep n to solution at n-1 ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->SetSolution( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n(iPoint)); + geometry[iZone][iInst][iMesh]->nodes->SetCoord( + iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->SetSolution( @@ -186,16 +186,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr } if (dual_time_1st) { /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint)); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_n( + iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( @@ -210,16 +210,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr } if (dual_time_2nd) { /*--- Set Solution at timestep n-1 to solution at n-2 ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n1(iPoint)); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_n( + iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n1(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( @@ -232,16 +232,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr } } /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/ - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { - geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1( - iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint)); + geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1( + iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1( @@ -259,91 +259,89 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Compute & set Grid Velocity via finite differences of the Coordinates. ---*/ if (grid_IsMoving) - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) - geometry[val_iZone][val_iInst][iMesh]->SetGridVelocity(config[val_iZone], TimeIter); + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) + geometry[iZone][iInst][iMesh]->SetGridVelocity(config[iZone], TimeIter); } // if unsteady /*--- Store flow solution also in the adjoint solver in order to be able to reset it later ---*/ if (TimeIter == 0 || dual_time) { - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - auto solvers = solver[val_iZone][val_iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) { + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + auto solvers = solver[iZone][iInst][iMesh]; + for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint)); } } - if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { + if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) { solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint)); } } if (heat) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) { solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint)); } } - if (config[val_iZone]->AddRadiation()) { - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { + if (config[iZone]->AddRadiation()) { + for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) { solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint)); } } } - solvers0[ADJFLOW_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + solvers0[ADJFLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJFLOW_SYS, false); - if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) { - solvers0[ADJTURB_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) { + solvers0[ADJTURB_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJTURB_SYS, false); } if (heat) { - solvers0[ADJHEAT_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + solvers0[ADJHEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJHEAT_SYS, false); } - if (config[val_iZone]->AddRadiation()) { - solvers0[ADJRAD_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone], + if (config[iZone]->AddRadiation()) { + solvers0[ADJRAD_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJRAD_SYS, false); } } void CDiscAdjFluidIteration::LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, - unsigned short val_iZone, unsigned short val_iInst, - int val_DirectIter) { + unsigned short iZone, unsigned short iInst, int DirectIter) { unsigned short iMesh; - bool heat = config[val_iZone]->GetWeakly_Coupled_Heat(); + bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + auto solvers = solver[iZone][iInst]; + + if (DirectIter >= 0) { + if (rank == MASTER_NODE && iZone == ZONE_0) + cout << " Loading flow solution from direct iteration " << DirectIter << "." << endl; + + solvers[MESH_0][FLOW_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, true); - if (val_DirectIter >= 0) { - if (rank == MASTER_NODE && val_iZone == ZONE_0) - cout << " Loading flow solution from direct iteration " << val_DirectIter << "." << endl; - solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, true); if (turbulent) { - solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false); + solvers[MESH_0][TURB_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false); } if (heat) { - solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->LoadRestart( - geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false); + solvers[MESH_0][HEAT_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false); } } else { /*--- If there is no solution file we set the freestream condition ---*/ - if (rank == MASTER_NODE && val_iZone == ZONE_0) - cout << " Setting freestream conditions at direct iteration " << val_DirectIter << "." << endl; - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->SetFreeStream_Solution(config[val_iZone]); - solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh, - val_DirectIter, RUNTIME_FLOW_SYS, false); + if (rank == MASTER_NODE && iZone == ZONE_0) + cout << " Setting freestream conditions at direct iteration " << DirectIter << "." << endl; + + for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + solvers[iMesh][FLOW_SOL]->SetFreeStream_Solution(config[iZone]); + solvers[iMesh][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh, + DirectIter, RUNTIME_FLOW_SYS, false); if (turbulent) { - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->SetFreeStream_Solution(config[val_iZone]); - solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Postprocessing( - geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh); + solvers[iMesh][TURB_SOL]->SetFreeStream_Solution(config[iZone]); + solvers[iMesh][TURB_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh); } if (heat) { - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->SetFreeStream_Solution(config[val_iZone]); - solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Postprocessing( - geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh); + solvers[iMesh][HEAT_SOL]->SetFreeStream_Solution(config[iZone]); + solvers[iMesh][HEAT_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh); } } } @@ -406,8 +404,9 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry*** void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); + const bool heat = config[iZone]->GetWeakly_Coupled_Heat(); if (kind_recording == SOLUTION_VARIABLES || kind_recording == SOLUTION_AND_MESH) { /*--- Register flow and turbulent variables as input ---*/ @@ -449,7 +448,9 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); + SU2_OMP_PARALLEL { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/ @@ -465,13 +466,18 @@ void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geo if (config[iZone]->AddRadiation()) { solver[iZone][INST_0][MESH_0][ADJRAD_SOL]->SetRecording(geometry[iZone][INST_0][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][FLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); if ((kind_recording == MESH_COORDS) || (kind_recording == NONE) || (kind_recording == SOLUTION_AND_MESH)) { /*--- Update geometry to get the influence on other geometry variables (normals, volume etc) ---*/ @@ -494,7 +500,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** solver[iZone][iInst][MESH_0][TURB_SOL]->CompleteComms(geometry[iZone][iInst][MESH_0], config[iZone], SOLUTION); } - if (heat) { + } + END_SU2_OMP_PARALLEL + + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][HEAT_SOL]->Set_Heatflux_Areas(geometry[iZone][iInst][MESH_0], config[iZone]); solver[iZone][iInst][MESH_0][HEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0], config[iZone], MESH_0, NO_RK_ITER, RUNTIME_HEAT_SYS, true); @@ -538,15 +547,13 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { - unsigned short iMesh; - + CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { /*--- Dual time stepping strategy ---*/ - if ((config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST) || - (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND)) { - for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) { - integration[val_iZone][val_iInst][ADJFLOW_SOL]->SetConvergence(false); + if ((config[iZone]->GetTime_Marching() == DT_STEPPING_1ST) || + (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND)) { + for (unsigned short iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + integration[iZone][iInst][ADJFLOW_SOL]->SetConvergence(false); } } } @@ -554,16 +561,16 @@ void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integratio bool CDiscAdjFluidIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { + CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { StopTime = SU2_MPI::Wtime(); UsedTime = StopTime - StartTime; /*--- Write the convergence history for the fluid (only screen output) ---*/ - output->SetHistory_Output(geometry[val_iZone][INST_0][MESH_0], solver[val_iZone][INST_0][MESH_0], config[val_iZone], - config[val_iZone]->GetTimeIter(), config[val_iZone]->GetOuterIter(), - config[val_iZone]->GetInnerIter()); + output->SetHistory_Output(geometry[iZone][INST_0][MESH_0], solver[iZone][INST_0][MESH_0], config[iZone], + config[iZone]->GetTimeIter(), config[iZone]->GetOuterIter(), + config[iZone]->GetInnerIter()); return output->GetConvergence(); } diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index f96765a9216..b2a8fea1ea9 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -325,8 +325,6 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0u; iPoint < nPoint; iPoint++) { - const su2double isdomain = (iPoint < nPointDomain)? 1.0 : 0.0; - /*--- Extract the adjoint solution ---*/ if(config->GetMultizone_Problem()) { @@ -342,16 +340,14 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi su2double residual = Solution[iVar]-nodes->GetSolution_Old(iPoint,iVar); nodes->AddSolution(iPoint, iVar, relax*residual); - residual *= isdomain; - Residual_RMS[iVar] += pow(residual,2); - AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint)); - - /*--- Update residual information for current thread. ---*/ - resRMS[iVar] += residual*residual; - if (fabs(residual) > resMax[iVar]) { - resMax[iVar] = fabs(residual); - idxMax[iVar] = iPoint; - coordMax[iVar] = geometry->nodes->GetCoord(iPoint); + if (iPoint < nPointDomain) { + /*--- Update residual information for current thread. ---*/ + resRMS[iVar] += residual*residual; + if (fabs(residual) > resMax[iVar]) { + resMax[iVar] = fabs(residual); + idxMax[iVar] = iPoint; + coordMax[iVar] = geometry->nodes->GetCoord(iPoint); + } } } } From b329cb62fb9cb1540aa0307db16e0c76ee0e8fa5 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 15:39:26 +0000 Subject: [PATCH 35/57] more parallel, fix SensGeo output --- .../src/iteration/CDiscAdjFluidIteration.cpp | 45 ++++++++++++++----- SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 8 +++- SU2_CFD/src/solvers/CSolver.cpp | 4 ++ 3 files changed, 43 insertions(+), 14 deletions(-) diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 192bc5c7a4d..5cb17308905 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -351,8 +351,11 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement, CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); + const bool heat = config[iZone]->GetWeakly_Coupled_Heat(); /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/ @@ -372,12 +375,17 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati solver[iZone][iInst][MESH_0][ADJRAD_SOL]->ExtractAdjoint_Variables(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); /*--- Initialize the adjoints the conservative variables ---*/ @@ -389,7 +397,7 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry*** solver[iZone][iInst][MESH_0][ADJTURB_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]); } - if (heat) { + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]); } @@ -400,13 +408,17 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry*** if (config[iZone]->GetFluidProblem()) { solver[iZone][iInst][MESH_0][FLOW_SOL]->SetVertexTractionsAdjoint(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - const bool heat = config[iZone]->GetWeakly_Coupled_Heat(); if (kind_recording == SOLUTION_VARIABLES || kind_recording == SOLUTION_AND_MESH) { /*--- Register flow and turbulent variables as input ---*/ @@ -420,7 +432,7 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge if (turbulent && !frozen_visc) { solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]); } - if (heat) { + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]); } if (config[iZone]->AddRadiation()) { @@ -430,6 +442,9 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge } } + } + END_SU2_OMP_PARALLEL + if (kind_recording == MESH_COORDS) { /*--- Register node coordinates as input ---*/ @@ -448,7 +463,8 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - SU2_OMP_PARALLEL { + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); @@ -475,7 +491,7 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][FLOW_SOL]->GetHasHybridParallel())) { + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); @@ -522,8 +538,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output, unsigned short iZone, unsigned short iInst) { - bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); + + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + + const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); /*--- Register conservative variables as output of the iteration ---*/ @@ -533,7 +551,7 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g if (turbulent && !frozen_visc) { solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); } - if (heat) { + if (config[iZone]->GetWeakly_Coupled_Heat()) { solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); } if (config[iZone]->AddRadiation()) { @@ -542,6 +560,9 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g if (config[iZone]->GetFluidProblem()) { solver[iZone][iInst][MESH_0][FLOW_SOL]->RegisterVertexTractions(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index b2a8fea1ea9..44343c87e9c 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -580,18 +580,22 @@ void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config for (size_t iMarker_Mon = 0; iMarker_Mon < Sens_Geo.size(); iMarker_Mon++) { if (Marker_Tag == config->GetMarker_Monitoring_TagBound(iMarker_Mon)) { - atomicAdd(Sens_Geo[iMarker_Mon], Sens); + atomicAdd(Sens, Sens_Geo[iMarker_Mon]); break; } } } + SU2_OMP_BARRIER SU2_OMP_MASTER { auto local = Sens_Geo; SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); Total_Sens_Geo = 0.0; - for (auto x : Sens_Geo) Total_Sens_Geo += x; + for (auto& x : Sens_Geo) { + x = sqrt(x); + Total_Sens_Geo += x; + } } END_SU2_OMP_MASTER SU2_OMP_BARRIER diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index 1b717c09c9c..5e9cd83515e 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -3971,6 +3971,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config if (!config->GetSolid_Wall(iMarker)) continue; /*--- Loop over the vertices ---*/ + SU2_OMP_FOR_STAT(OMP_MIN_SIZE) for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) { /*--- Recover the point index ---*/ @@ -3984,6 +3985,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config AD::RegisterOutput(VertexTraction[iMarker][iVertex][iDim]); } } + END_SU2_OMP_FOR } } @@ -4000,6 +4002,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf if (!config->GetSolid_Wall(iMarker)) continue; /*--- Loop over the vertices ---*/ + SU2_OMP_FOR_STAT(OMP_MIN_SIZE) for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) { /*--- Recover the point index ---*/ @@ -4014,6 +4017,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf SU2_TYPE::GetValue(VertexTractionAdjoint[iMarker][iVertex][iDim])); } } + END_SU2_OMP_FOR } } From 02c9c8e20ca203cbed10aab203e5770f3ca0e715 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 19 Mar 2021 16:31:55 +0000 Subject: [PATCH 36/57] mesh solver, plus some cleanup --- Common/include/geometry/CGeometry.hpp | 8 +-- Common/src/geometry/CGeometry.cpp | 36 +++---------- .../include/solvers/CDiscAdjMeshSolver.hpp | 37 ++++---------- .../src/iteration/CDiscAdjFluidIteration.cpp | 6 +-- .../src/iteration/CDiscAdjHeatIteration.cpp | 3 +- SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp | 51 ++++++++++--------- SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 6 ++- SU2_CFD/src/variables/CMeshVariable.cpp | 4 ++ 8 files changed, 58 insertions(+), 93 deletions(-) diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp index 4941e62d219..fccbc5e1d11 100644 --- a/Common/include/geometry/CGeometry.hpp +++ b/Common/include/geometry/CGeometry.hpp @@ -1252,13 +1252,7 @@ class CGeometry { * \brief Register the coordinates of the mesh nodes. * \param[in] config */ - void RegisterCoordinates(CConfig *config) const; - - /*! - * \brief Register the coordinates of the mesh nodes as output. - * \param[in] config - */ - void RegisterOutput_Coordinates(CConfig *config) const; + void RegisterCoordinates(const CConfig *config) const; /*! * \brief Update the multi-grid structure and the wall-distance. diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp index 535a8e42c5c..85ca9c72f78 100644 --- a/Common/src/geometry/CGeometry.cpp +++ b/Common/src/geometry/CGeometry.cpp @@ -2492,44 +2492,24 @@ void CGeometry::ComputeAirfoil_Section(su2double *Plane_P0, su2double *Plane_Nor } -void CGeometry::RegisterCoordinates(CConfig *config) const { - unsigned short iDim; - unsigned long iPoint; - bool input = true; - bool push_index = config->GetMultizone_Problem()? false : true; +void CGeometry::RegisterCoordinates(const CConfig *config) const { + const bool input = true; + const bool push_index = config->GetMultizone_Problem()? false : true; - for (iPoint = 0; iPoint < nPoint; iPoint++) { - for (iDim = 0; iDim < nDim; iDim++) { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { + for (auto iDim = 0u; iDim < nDim; iDim++) { AD::RegisterInput(nodes->GetCoord(iPoint)[iDim], push_index); } if(!push_index) { nodes->SetIndex(iPoint, input); } } -} - -void CGeometry::RegisterOutput_Coordinates(CConfig *config) const{ - unsigned short iDim; - unsigned long iPoint; - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - if(config->GetMultizone_Problem()) { - for (iDim = 0; iDim < nDim; iDim++) { - AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]); - } - } - else { - for (iDim = 0; iDim < nDim; iDim++) { - AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]); - } - } - } + END_SU2_OMP_FOR } void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config) { - unsigned short iMesh; - geometry_container[MESH_0]->InitiateComms(geometry_container[MESH_0], config, COORDINATES); geometry_container[MESH_0]->CompleteComms(geometry_container[MESH_0], config, COORDINATES); if (config->GetDynamic_Grid()){ @@ -2541,7 +2521,7 @@ void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config) geometry_container[MESH_0]->SetBoundControlVolume(config, UPDATE); geometry_container[MESH_0]->SetMaxLength(config); - for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) { + for (unsigned short iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) { /*--- Update the control volume structures ---*/ geometry_container[iMesh]->SetControlVolume(config,geometry_container[iMesh-1], UPDATE); diff --git a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp index 4caa7e597e2..d7ba9d80b75 100644 --- a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp @@ -39,6 +39,13 @@ */ class CDiscAdjMeshSolver final : public CSolver { private: + static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */ + static constexpr size_t MAXNVAR = 3; /*!< \brief Max number of variables, for static arrays. */ + + static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */ + + unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */ + CSolver *direct_solver = nullptr; CDiscAdjMeshBoundVariable* nodes = nullptr; /*!< \brief Variables of the discrete adjoint mesh solver. */ @@ -53,15 +60,7 @@ class CDiscAdjMeshSolver final : public CSolver { /*! * \brief Constructor of the class. */ - CDiscAdjMeshSolver(void); - - /*! - * \overload - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - * \param[in] iMesh - Index of the mesh in multigrid computations. - */ - CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config); + CDiscAdjMeshSolver() = default; /*! * \overload @@ -75,7 +74,7 @@ class CDiscAdjMeshSolver final : public CSolver { /*! * \brief Destructor of the class. */ - ~CDiscAdjMeshSolver(void) override; + ~CDiscAdjMeshSolver() override; /*! * \brief Performs the preprocessing of the AD-based mesh adjoint solver. @@ -124,24 +123,6 @@ class CDiscAdjMeshSolver final : public CSolver { */ void ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Update the dual-time derivatives. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver_container - Container vector with all the solutions. - * \param[in] config - Definition of the particular problem. - * \param[in] iMesh - Index of the mesh in multigrid computations. - * \param[in] iRKStep - Current step of the Runge-Kutta iteration. - * \param[in] RunTime_EqSystem - System of equations which is going to be solved. - * \param[in] Output - boolean to determine whether to print output. - */ - void Preprocessing(CGeometry *geometry, - CSolver **solver_container, - CConfig *config, - unsigned short iMesh, - unsigned short iRKStep, - unsigned short RunTime_EqSystem, - bool Output) override; - /*! * \brief Load a solution from a restart file. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 5cb17308905..4bf2a9d9235 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -442,9 +442,6 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge } } - } - END_SU2_OMP_PARALLEL - if (kind_recording == MESH_COORDS) { /*--- Register node coordinates as input ---*/ @@ -459,6 +456,9 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge /*--- Boundary displacements ---*/ solver[iZone][iInst][MESH_0][ADJMESH_SOL]->RegisterVariables(geometry[iZone][iInst][MESH_0], config[iZone]); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, diff --git a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp index 8c5ff48afa8..5d278eb97f9 100644 --- a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp @@ -227,9 +227,8 @@ void CDiscAdjHeatIteration::SetDependencies(CSolver***** solver, CGeometry**** g void CDiscAdjHeatIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output, unsigned short iZone, unsigned short iInst) { - solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); - geometry[iZone][iInst][MESH_0]->RegisterOutput_Coordinates(config[iZone]); + solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]); } void CDiscAdjHeatIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp index 249c59b4722..9cb4950870b 100644 --- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp @@ -28,15 +28,8 @@ #include "../../include/solvers/CDiscAdjMeshSolver.hpp" #include "../../include/variables/CDiscAdjMeshBoundVariable.hpp" - -CDiscAdjMeshSolver::CDiscAdjMeshSolver() : CSolver () {} - -CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config) : CSolver() {} - CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver) : CSolver() { - unsigned short iVar; - nVar = geometry->GetnDim(); nDim = geometry->GetnDim(); @@ -46,6 +39,8 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo nPoint = geometry->GetnPoint(); nPointDomain = geometry->GetnPointDomain(); + omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE); + /*--- Define some auxiliary vectors related to the residual ---*/ Residual_RMS.resize(nVar,1.0); @@ -63,12 +58,8 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0); } - /*--- Define some auxiliary vectors related to the solution ---*/ - - Solution = new su2double[nVar]; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16; - /*--- Initialize the node structure ---*/ + su2double Solution[MAXNVAR] = {1e-16}; nodes = new CDiscAdjMeshBoundVariable(nPoint,nDim,config); SetBaseClassPointerToNodes(); @@ -90,22 +81,17 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo } -CDiscAdjMeshSolver::~CDiscAdjMeshSolver(void){ - delete nodes; -} - - -void CDiscAdjMeshSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, - unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){ -} +CDiscAdjMeshSolver::~CDiscAdjMeshSolver() { delete nodes; } void CDiscAdjMeshSolver::SetRecording(CGeometry* geometry, CConfig *config){ /*--- Reset the solution to the initial (converged) solution ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { direct_solver->GetNodes()->SetBound_Disp(iPoint,nodes->GetBoundDisp_Direct(iPoint)); } + END_SU2_OMP_FOR /*--- Set indices to zero ---*/ @@ -123,20 +109,25 @@ void CDiscAdjMeshSolver::RegisterSolution(CGeometry *geometry, CConfig *config){ void CDiscAdjMeshSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset){ - /*--- Register boundary displacements as input ---*/ - bool input = true; - direct_solver->GetNodes()->Register_BoundDisp(input); - + SU2_OMP_MASTER { + /*--- Register boundary displacements as input ---*/ + bool input = true; + direct_solver->GetNodes()->Register_BoundDisp(input); + } + END_SU2_OMP_MASTER + SU2_OMP_BARRIER } void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){ /*--- Extract the sensitivities of the mesh coordinates ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ /*--- Extract the adjoint solution from the original mesh coordinates ---*/ + su2double Solution[MAXNVAR] = {0.0}; direct_solver->GetNodes()->GetAdjoint_MeshCoord(iPoint,Solution); /*--- Store the adjoint solution (the container is reused) ---*/ @@ -144,6 +135,7 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *c nodes->SetSolution(iPoint,Solution); } + END_SU2_OMP_FOR } @@ -151,10 +143,12 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig * /*--- Extract the sensitivities of the boundary displacements ---*/ + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ /*--- Extract the adjoint solution of the boundary displacements ---*/ + su2double Solution[MAXNVAR] = {0.0}; direct_solver->GetNodes()->GetAdjoint_BoundDisp(iPoint,Solution); /*--- Store the sensitivities of the boundary displacements ---*/ @@ -162,11 +156,14 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig * nodes->SetBoundDisp_Sens(iPoint,Solution); } + END_SU2_OMP_FOR } void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver *solver) { + SU2_OMP_PARALLEL { + const bool time_stepping = (config->GetTime_Marching() != STEADY); const auto eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength(); @@ -177,6 +174,8 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS ExtractAdjoint_Variables(geometry, config); /*--- Store the sensitivities in the flow adjoint container ---*/ + + SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { /*--- If sharp edge, set the sensitivity to 0 on that region ---*/ @@ -198,8 +197,12 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS } } } + END_SU2_OMP_FOR + solver->SetSurface_Sensitivity(geometry, config); + } + END_SU2_OMP_PARALLEL } void CDiscAdjMeshSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index 44343c87e9c..30249c14a5d 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -497,8 +497,10 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) { void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) { + SU2_OMP_PARALLEL { + const bool time_stepping = (config->GetTime_Marching() != STEADY); - const su2double eps = config->GetVenkat_LimiterCoeff()*config->GetAdjSharp_LimiterCoeff(); + const su2double eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength(); SU2_OMP_FOR_STAT(omp_chunk_size) for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) { @@ -537,6 +539,8 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve SetSurface_Sensitivity(geometry, config); + } + END_SU2_OMP_PARALLEL } void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) { diff --git a/SU2_CFD/src/variables/CMeshVariable.cpp b/SU2_CFD/src/variables/CMeshVariable.cpp index 0f35b0fc442..d4786c3ed75 100644 --- a/SU2_CFD/src/variables/CMeshVariable.cpp +++ b/SU2_CFD/src/variables/CMeshVariable.cpp @@ -50,13 +50,17 @@ CMeshVariable::CMeshVariable(unsigned long npoint, unsigned long ndim, CConfig * void CMeshVariable::Register_MeshCoord(bool input) { if (input) { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) for (unsigned long iDim = 0; iDim < nDim; iDim++) AD::RegisterInput(Mesh_Coord(iPoint,iDim)); + END_SU2_OMP_FOR } else { + SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads())) for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) for (unsigned long iDim = 0; iDim < nDim; iDim++) AD::RegisterOutput(Mesh_Coord(iPoint,iDim)); + END_SU2_OMP_FOR } } From ac5c581a28ffb30fe1d6e0bf64499b1f628abd78 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Sat, 20 Mar 2021 23:08:12 +0000 Subject: [PATCH 37/57] no include of cpp --- Common/lib/Makefile.am | 1 + Common/src/basic_types/ad_structure.cpp | 2 -- Common/src/meson.build | 3 ++- SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp | 19 ++----------------- SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp | 10 ---------- 5 files changed, 5 insertions(+), 30 deletions(-) diff --git a/Common/lib/Makefile.am b/Common/lib/Makefile.am index 813299d0f77..1e7a8761c6f 100644 --- a/Common/lib/Makefile.am +++ b/Common/lib/Makefile.am @@ -63,6 +63,7 @@ lib_sources = \ ../src/grid_movement/CVolumetricMovement.cpp \ ../src/grid_movement/CSurfaceMovement.cpp \ ../include/parallelization/mpi_structure.cpp \ + ../include/parallelization/omp_structure.cpp \ ../src/basic_types/ad_structure.cpp \ ../src/fem/fem_gauss_jacobi_quadrature.cpp \ ../src/geometry/CGeometry.cpp \ diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp index efa0cacf401..18342e13a90 100644 --- a/Common/src/basic_types/ad_structure.cpp +++ b/Common/src/basic_types/ad_structure.cpp @@ -49,5 +49,3 @@ namespace AD { #endif } - -#include "../../include/parallelization/omp_structure.cpp" diff --git a/Common/src/meson.build b/Common/src/meson.build index 5dcbb57c66f..b3e0726e70c 100644 --- a/Common/src/meson.build +++ b/Common/src/meson.build @@ -3,7 +3,8 @@ common_src =files(['graph_coloring_structure.cpp', 'CConfig.cpp', 'basic_types/ad_structure.cpp', 'wall_model.cpp', - '../include/parallelization/mpi_structure.cpp']) + '../include/parallelization/mpi_structure.cpp', + '../include/parallelization/omp_structure.cpp']) subdir('linear_algebra') subdir('toolboxes') diff --git a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp index dec9c21e348..c85ea4e8653 100644 --- a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp @@ -102,15 +102,7 @@ class CDiscAdjFEASolver final : public CSolver { /*! * \brief Constructor of the class. */ - CDiscAdjFEASolver(void); - - /*! - * \overload - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - * \param[in] iMesh - Index of the mesh in multigrid computations. - */ - CDiscAdjFEASolver(CGeometry *geometry, CConfig *config); + CDiscAdjFEASolver() = default; /*! * \overload @@ -124,7 +116,7 @@ class CDiscAdjFEASolver final : public CSolver { /*! * \brief Destructor of the class. */ - ~CDiscAdjFEASolver(void) override; + ~CDiscAdjFEASolver() override; /*! * \brief Performs the preprocessing of the adjoint AD-based solver. @@ -159,13 +151,6 @@ class CDiscAdjFEASolver final : public CSolver { */ void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override; - /*! - * \brief Set the surface sensitivity. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] config - Definition of the particular problem. - */ - void SetSurface_Sensitivity(CGeometry *geometry, CConfig* config) override; - /*! * \brief Extract and set the geometrical sensitivity. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp index 287e1c50eaa..bc72135c545 100644 --- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp @@ -25,14 +25,9 @@ * License along with SU2. If not, see . */ - #include "../../include/solvers/CDiscAdjFEASolver.hpp" #include "../../include/variables/CDiscAdjFEAVariable.hpp" -CDiscAdjFEASolver::CDiscAdjFEASolver(void) : CSolver() { } - -CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config) : CSolver() { } - CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { @@ -776,11 +771,6 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo nodes->SetSensitivity(iPoint, iDim, Sensitivity); } } - SetSurface_Sensitivity(geometry, config); -} - -void CDiscAdjFEASolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config){ - } void CDiscAdjFEASolver::ReadDV(CConfig *config) { From 3527c2809a341ee27c7597e0915add33fb889365 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 22 Mar 2021 12:04:29 +0000 Subject: [PATCH 38/57] fix bug from nested parallel region --- SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 4bf2a9d9235..626d625d7aa 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -491,8 +491,6 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** CConfig** config, unsigned short iZone, unsigned short iInst, unsigned short kind_recording) { - SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { - const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc(); if ((kind_recording == MESH_COORDS) || (kind_recording == NONE) || (kind_recording == SOLUTION_AND_MESH)) { @@ -503,6 +501,8 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** CGeometry::ComputeWallDistance(config, geometry); } + SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) { + /*--- Compute coupling between flow and turbulent equations ---*/ solver[iZone][iInst][MESH_0][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0], config[iZone], MESH_0, NO_RK_ITER, RUNTIME_FLOW_SYS, true); From 9efa995aa1e22cc93ea5833395ccd9b2a82134e7 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Tue, 23 Mar 2021 00:14:49 +0000 Subject: [PATCH 39/57] less boilerplate, more boilerplate, fix merge, try to fix failed regressions --- Common/include/linear_algebra/CSysSolve.hpp | 4 - Common/src/linear_algebra/CSysMatrix.cpp | 91 +------------- Common/src/linear_algebra/CSysSolve.cpp | 13 -- .../src/iteration/CDiscAdjFluidIteration.cpp | 114 +++++++++--------- SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 10 +- 5 files changed, 66 insertions(+), 166 deletions(-) diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp index 826e779beea..c69643cefe1 100644 --- a/Common/include/linear_algebra/CSysSolve.hpp +++ b/Common/include/linear_algebra/CSysSolve.hpp @@ -323,10 +323,6 @@ class CSysSolve { const PrecondType & precond, ScalarType tol, unsigned long m, ScalarType & residual, bool monitoring, const CConfig *config) const; - unsigned long RFGMRES_LinSolver(const VectorType & b, VectorType & x, const ProductType & mat_vec, - const PrecondType & precond, ScalarType tol, unsigned long m, - ScalarType & residual, bool monitoring, const CConfig *config); - /*! * \brief Flexible Generalized Minimal Residual method with restarts (frequency comes from config). */ diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index 398aea672ad..a9ee199f3e6 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -94,15 +94,11 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi if(npoint == 0) return; if(matrix != nullptr) { - SU2_OMP_MASTER SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } if(nvar > MAXNVAR) { - SU2_OMP_MASTER SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } /*--- Application of this matrix, FVM or FEM. ---*/ @@ -607,14 +603,10 @@ void CSysMatrix::MatrixVectorProduct(const CSysVector & /*--- Some checks for consistency between CSysMatrix and the CSysVectors ---*/ #ifndef NDEBUG if ((nEqn != vec.GetNVar()) || (nVar != prod.GetNVar())) { - SU2_OMP_MASTER SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } if (nPoint != prod.GetNBlk()) { - SU2_OMP_MASTER SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } #endif @@ -1312,7 +1304,7 @@ void CSysMatrix::TransposeInPlace() { if (edge_ptr) { /*--- The FV way. ---*/ - SU2_OMP_FOR_DYN(omp_light_size/2) + SU2_OMP_FOR_DYN(omp_heavy_size*2) for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) { auto bij = &matrix[edge_ptr(iEdge,0)*nVar*nVar]; auto bji = &matrix[edge_ptr(iEdge,1)*nVar*nVar]; @@ -1362,80 +1354,9 @@ void CSysMatrix::TransposeInPlace() { END_SU2_OMP_FOR #ifdef HAVE_PASTIX + SU2_OMP_MASTER pastix_wrapper.SetTransposedSolve(); -#endif -} - -template -void CSysMatrix::TransposeInPlace() { - - assert(nVar==nEqn && "Cannot transpose with nVar != nEqn."); - - auto swapAndTransp = [](unsigned long n, ScalarType* a, ScalarType* b) { - assert(a!=b); - /*--- a=b', b=a' ---*/ - for (auto i=0ul; i::MatrixMatrixAddition(ScalarType alpha, const CSysMa (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz); if (!ok) { - SU2_OMP_MASTER SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } SU2_OMP_FOR_STAT(omp_light_size) @@ -1473,9 +1392,7 @@ void CSysMatrix::BuildPastixPreconditioner(CGeometry *geometry, cons END_SU2_OMP_MASTER SU2_OMP_BARRIER #else - SU2_OMP_MASTER SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION); - END_SU2_OMP_MASTER #endif } @@ -1492,9 +1409,7 @@ void CSysMatrix::ComputePastixPreconditioner(const CSysVector::ModGramSchmidt(int i, su2matrix& Hsbg, if ((nrm <= 0.0) || (nrm != nrm)) { /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/ - SU2_OMP_MASTER SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } /*--- Begin main Gram-Schmidt loop ---*/ @@ -210,9 +208,7 @@ unsigned long CSysSolve::CG_LinSolver(const CSysVector & /*--- Check the subspace size ---*/ if (m < 1) { - SU2_OMP_MASTER SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } /*--- Allocate if not allocated yet, only one thread can @@ -353,15 +349,11 @@ unsigned long CSysSolve::FGMRES_LinSolver(const CSysVector 5000) { - SU2_OMP_MASTER SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } /*--- Allocate if not allocated yet ---*/ @@ -552,9 +544,7 @@ unsigned long CSysSolve::BCGSTAB_LinSolver(const CSysVector::Smoother_LinSolver(const CSysVectorGetLinear_Solver_Smoother_Relaxation()); if (m < 1) { - SU2_OMP_MASTER SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION); - END_SU2_OMP_MASTER } /*--- Allocate vectors for residual (r), solution increment (z), and matrix-vector @@ -994,7 +982,6 @@ unsigned long CSysSolve::Solve(CSysMatrix & Jacobian, co SU2_OMP_MASTER AD::EndExtFunc(); END_SU2_OMP_MASTER - SU2_OMP_BARRIER #endif } diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp index 626d625d7aa..dbf2a950dfb 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp @@ -34,39 +34,33 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) { StartTime = SU2_MPI::Wtime(); - unsigned long iPoint; - unsigned short TimeIter = config[iZone]->GetTimeIter(); - bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST); - bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND); - bool dual_time = (dual_time_1st || dual_time_2nd); - unsigned short iMesh; - int Direct_Iter; - bool heat = config[iZone]->GetWeakly_Coupled_Heat(); - bool grid_IsMoving = config[iZone]->GetGrid_Movement(); + const auto TimeIter = config[iZone]->GetTimeIter(); + const bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST); + const bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND); + const bool dual_time = (dual_time_1st || dual_time_2nd); + const bool grid_IsMoving = config[iZone]->GetGrid_Movement(); + const bool heat = config[iZone]->GetWeakly_Coupled_Heat(); auto solvers0 = solver[iZone][iInst][MESH_0]; + auto geometries = geometry[iZone][iInst]; // /*--- Read the target pressure for inverse design. ---------------------------------------------*/ // if (config[iZone]->GetInvDesign_Cp() == YES) // output->SetCp_InverseDesign(solvers0[FLOW_SOL], - // geometry[iZone][iInst][MESH_0], config[iZone], ExtIter); + // geometries[MESH_0], config[iZone], ExtIter); // /*--- Read the target heat flux ----------------------------------------------------------------*/ // if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES) // output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL], - // geometry[iZone][iInst][MESH_0], config[iZone], ExtIter); + // geometries[MESH_0], config[iZone], ExtIter); /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/ if (config[iZone]->GetTime_Marching()) { - Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2; + const int Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2 + dual_time; /*--- For dual-time stepping we want to load the already converged solution at timestep n ---*/ - if (dual_time) { - Direct_Iter += 1; - } - if (TimeIter == 0) { if (dual_time_2nd) { /*--- Load solution at timestep n-2 ---*/ @@ -74,7 +68,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Push solution back to correct array ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); @@ -88,8 +82,8 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1(); } if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(); - geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1(); + geometries[iMesh]->nodes->SetCoord_n(); + geometries[iMesh]->nodes->SetCoord_n1(); } } } @@ -99,7 +93,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Push solution back to correct array ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(); @@ -110,7 +104,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(); } if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(); + geometries[iMesh]->nodes->SetCoord_n(); } } } @@ -120,8 +114,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter); if (config[iZone]->GetDeform_Mesh()) { - solvers0[MESH_SOL]->LoadRestart( - geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true); + solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true); } } else if ((TimeIter > 0) && dual_time) { @@ -133,8 +126,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr ---*/ if (config[iZone]->GetDeform_Mesh()) { - solvers0[MESH_SOL]->LoadRestart( - geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true); + solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true); } /*--- Load solution timestep n-1 | n-2 for DualTimestepping 1st | 2nd order ---*/ @@ -146,7 +138,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Temporarily store the loaded solution in the Solution_Old array ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; solvers[FLOW_SOL]->Set_OldSolution(); @@ -157,22 +149,22 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr solvers[HEAT_SOL]->Set_OldSolution(); } if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord_Old(); + geometries[iMesh]->nodes->SetCoord_Old(); } } /*--- Set Solution at timestep n to solution at n-1 ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->SetSolution( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint)); if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord( - iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n(iPoint)); + geometries[iMesh]->nodes->SetCoord( + iPoint, geometries[iMesh]->nodes->GetCoord_n(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->SetSolution( @@ -186,16 +178,15 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr } if (dual_time_1st) { /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord_n( - iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint)); + geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( @@ -210,16 +201,15 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr } if (dual_time_2nd) { /*--- Set Solution at timestep n-1 to solution at n-2 ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint)); if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord_n( - iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n1(iPoint)); + geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_n1(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n( @@ -232,16 +222,15 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr } } /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/ - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) { solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1( iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint)); if (grid_IsMoving) { - geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1( - iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint)); + geometries[iMesh]->nodes->SetCoord_n1(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint)); } if (turbulent) { solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1( @@ -259,52 +248,61 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr /*--- Compute & set Grid Velocity via finite differences of the Coordinates. ---*/ if (grid_IsMoving) - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) - geometry[iZone][iInst][iMesh]->SetGridVelocity(config[iZone], TimeIter); + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) + geometries[iMesh]->SetGridVelocity(config[iZone], TimeIter); } // if unsteady + SU2_OMP_PARALLEL_(if(solvers0[ADJFLOW_SOL]->GetHasHybridParallel())) { + /*--- Store flow solution also in the adjoint solver in order to be able to reset it later ---*/ if (TimeIter == 0 || dual_time) { - for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { + for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) { auto solvers = solver[iZone][iInst][iMesh]; - for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) { + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint)); - } + END_SU2_OMP_FOR } if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) { - for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) { + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++) solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint)); - } + END_SU2_OMP_FOR } if (heat) { - for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) { + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++) solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint)); - } + END_SU2_OMP_FOR } if (config[iZone]->AddRadiation()) { - for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) { + SU2_OMP_FOR_STAT(1024) + for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++) solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint)); - } + END_SU2_OMP_FOR } } - solvers0[ADJFLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], + solvers0[ADJFLOW_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJFLOW_SYS, false); if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) { - solvers0[ADJTURB_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], + solvers0[ADJTURB_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJTURB_SYS, false); } if (heat) { - solvers0[ADJHEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], + solvers0[ADJHEAT_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJHEAT_SYS, false); } if (config[iZone]->AddRadiation()) { - solvers0[ADJRAD_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone], + solvers0[ADJRAD_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone], MESH_0, 0, RUNTIME_ADJRAD_SYS, false); } + + } + END_SU2_OMP_PARALLEL } void CDiscAdjFluidIteration::LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index 30249c14a5d..b3df2767365 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -29,7 +29,8 @@ #include "../../../Common/include/toolboxes/geometry_toolbox.hpp" #include "../../../Common/include/parallelization/omp_structure.hpp" -CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { +CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, + unsigned short Kind_Solver, unsigned short iMesh) : CSolver() { adjoint = true; @@ -77,9 +78,12 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/ - su2double Solution[MAXNVAR] = {1e-16}; + if (nVar > MAXNVAR) { + SU2_MPI::Error("Oops! The CDiscAdjSolver static array sizes are not large enough.",CURRENT_FUNCTION); + } - nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config); + vector Solution(nVar,1e-16); + nodes = new CDiscAdjVariable(Solution.data(), nPoint, nDim, nVar, config); SetBaseClassPointerToNodes(); switch(KindDirect_Solver){ From 83b032bda257f4008a4985430d17a44ce2e24f57 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Tue, 23 Mar 2021 16:26:10 +0000 Subject: [PATCH 40/57] prepare CDiscAdjFEASolver --- .../iteration/CDiscAdjFEAIteration.hpp | 19 - SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp | 169 +++++--- SU2_CFD/include/solvers/CSolver.hpp | 5 - .../src/iteration/CDiscAdjFEAIteration.cpp | 61 +-- SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp | 410 ++++-------------- SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp | 5 +- 6 files changed, 225 insertions(+), 444 deletions(-) diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp index c613d449bfc..083222664b8 100644 --- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp +++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp @@ -104,25 +104,6 @@ class CDiscAdjFEAIteration final : public CIteration { CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) override; - /*! - * \brief Updates the containers for the discrete adjoint mean flow system. - * \param[in] output - Pointer to the COutput class. - * \param[in] integration - Container vector with all the integration methods. - * \param[in] geometry - Geometrical definition of the problem. - * \param[in] solver - Container vector with all the solutions. - * \param[in] numerics - Description of the numerical method (the way in which the equations are solved). - * \param[in] config - Definition of the particular problem. - * \param[in] surface_movement - Surface movement classes of the problem. - * \param[in] grid_movement - Volume grid movement classes of the problem. - * \param[in] FFDBox - FFD FFDBoxes of the problem. - * \param[in] val_iZone - Index of the zone. - * \param[in] val_iInst - Index of the instance. - */ - void Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, - CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, - CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, - unsigned short val_iInst) override; - /*! * \brief Monitors the convergence and other metrics for the discrete adjoint mean flow system. * \param[in] output - Pointer to the COutput class. diff --git a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp index c85ea4e8653..48c29f76dc7 100644 --- a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp +++ b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp @@ -38,57 +38,84 @@ */ class CDiscAdjFEASolver final : public CSolver { private: + static constexpr size_t MAXNVAR = 3; /*!< \brief Max number of variables, for static arrays. */ + unsigned short KindDirect_Solver = 0; CSolver *direct_solver = nullptr; - su2double *Sens_E = nullptr, /*!< \brief Young modulus sensitivity coefficient for each boundary. */ - *Sens_Nu = nullptr, /*!< \brief Poisson's ratio sensitivity coefficient for each boundary. */ - *Sens_nL = nullptr, /*!< \brief Normal pressure sensitivity coefficient for each boundary. */ - **CSensitivity = nullptr; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */ - - su2double *Solution_Vel = nullptr, /*!< \brief Velocity componenent of the solution. */ - *Solution_Accel = nullptr; /*!< \brief Acceleration componenent of the solution. */ - - su2double *normalLoads = nullptr; /*!< \brief Values of the normal loads for each marker iMarker_nL. */ - - unsigned short nMPROP = 0; /*!< \brief Number of material properties */ - - su2double *E_i = nullptr, /*!< \brief Values of the Young's Modulus. */ - *Nu_i = nullptr, /*!< \brief Values of the Poisson's ratio. */ - *Rho_i = nullptr, /*!< \brief Values of the density (for inertial effects). */ - *Rho_DL_i = nullptr; /*!< \brief Values of the density (for volume loading). */ - int *AD_Idx_E_i = nullptr, /*!< \brief Derivative index of the Young's Modulus. */ - *AD_Idx_Nu_i = nullptr, /*!< \brief Derivative index of the Poisson's ratio. */ - *AD_Idx_Rho_i = nullptr, /*!< \brief Derivative index of the density (for inertial effects). */ - *AD_Idx_Rho_DL_i = nullptr; /*!< \brief Derivative index of the density (for volume loading). */ - - su2double *Local_Sens_E = nullptr, /*!< \brief Local sensitivity of the Young's modulus. */ - *Global_Sens_E = nullptr, /*!< \brief Global sensitivity of the Young's modulus. */ - *Total_Sens_E = nullptr; /*!< \brief Total sensitivity of the Young's modulus (time domain). */ - su2double *Local_Sens_Nu = nullptr, /*!< \brief Local sensitivity of the Poisson ratio. */ - *Global_Sens_Nu = nullptr, /*!< \brief Global sensitivity of the Poisson ratio. */ - *Total_Sens_Nu = nullptr; /*!< \brief Total sensitivity of the Poisson ratio (time domain). */ - su2double *Local_Sens_Rho = nullptr, /*!< \brief Local sensitivity of the density. */ - *Global_Sens_Rho = nullptr, /*!< \brief Global sensitivity of the density. */ - *Total_Sens_Rho = nullptr; /*!< \brief Total sensitivity of the density (time domain). */ - su2double *Local_Sens_Rho_DL = nullptr, /*!< \brief Local sensitivity of the volume load. */ - *Global_Sens_Rho_DL = nullptr, /*!< \brief Global sensitivity of the volume load. */ - *Total_Sens_Rho_DL = nullptr; /*!< \brief Total sensitivity of the volume load (time domain). */ - - bool de_effects = false; /*!< \brief Determines if DE effects are considered. */ - unsigned short nEField = 0; /*!< \brief Number of electric field areas in the problem. */ - su2double *EField = nullptr; /*!< \brief Array that stores the electric field as design variables. */ - int *AD_Idx_EField = nullptr; /*!< \brief Derivative index of the electric field as design variables. */ - su2double *Local_Sens_EField = nullptr, /*!< \brief Local sensitivity of the Electric Field. */ - *Global_Sens_EField = nullptr, /*!< \brief Global sensitivity of the Electric Field. */ - *Total_Sens_EField = nullptr; /*!< \brief Total sensitivity of the Electric Field (time domain). */ - - bool fea_dv = false; /*!< \brief Determines if the design variable we study is a FEA parameter. */ - unsigned short nDV = 0; /*!< \brief Number of design variables in the problem. */ - su2double *DV_Val = nullptr; /*!< \brief Values of the design variables. */ - int *AD_Idx_DV_Val = nullptr; /*!< \brief Derivative index of the design variables. */ - su2double *Local_Sens_DV = nullptr, /*!< \brief Local sensitivity of the design variables. */ - *Global_Sens_DV = nullptr, /*!< \brief Global sensitivity of the design variables. */ - *Total_Sens_DV = nullptr; /*!< \brief Total sensitivity of the design variables (time domain). */ + + /*! + * \brief A type to manage sensitivities of design variables. + */ + struct SensData { + unsigned short size = 0; + su2double* val = nullptr; /*!< \brief Value of the variable. */ + int* AD_Idx = nullptr; /*!< \brief Derivative index in the AD tape. */ + bool localIdx = false; + su2double* LocalSens = nullptr; /*!< \brief Local sensitivity (domain). */ + su2double* GlobalSens = nullptr; /*!< \brief Global sensitivity (mpi). */ + su2double* TotalSens = nullptr; /*!< \brief Total sensitivity (time domain). */ + + su2double& operator[] (unsigned short i) { return val[i]; } + const su2double& operator[] (unsigned short i) const { return val[i]; } + + void resize(unsigned short n) { + clear(); + size = n; + val = new su2double[n](); + AD_Idx = new int[n](); + LocalSens = new su2double[n](); + GlobalSens = new su2double[n](); + TotalSens = new su2double[n](); + } + + void clear() { + size = 0; + localIdx = false; + delete [] val; + delete [] AD_Idx; + delete [] LocalSens; + delete [] GlobalSens; + delete [] TotalSens; + } + + void Register(bool push_index) { + for (auto i = 0u; i < size; ++i) AD::RegisterInput(val[i], push_index); + } + + void SetIndex() { + for (auto i = 0u; i < size; ++i) AD::SetIndex(AD_Idx[i], val[i]); + localIdx = true; + } + + void GetDerivative() { + if (localIdx) + for (auto i = 0u; i < size; ++i) LocalSens[i] = AD::GetDerivative(AD_Idx[i]); + else + for (auto i = 0u; i < size; ++i) LocalSens[i] = SU2_TYPE::GetDerivative(val[i]); + + SU2_MPI::Allreduce(LocalSens, GlobalSens, size, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); + } + + void UpdateTotal() { + for (auto i = 0u; i < size; ++i) TotalSens[i] += GlobalSens[i]; + } + + ~SensData() { clear(); } + }; + + unsigned short nMPROP = 0; /*!< \brief Number of material properties */ + SensData E; /*!< \brief Values of the Young's Modulus. */ + SensData Nu; /*!< \brief Values of the Poisson's ratio. */ + SensData Rho; /*!< \brief Values of the density (for inertial effects). */ + SensData Rho_DL; /*!< \brief Values of the density (for volume loading). */ + + bool de_effects = false; /*!< \brief Determines if DE effects are considered. */ + unsigned short nEField = 0; /*!< \brief Number of electric field areas in the problem. */ + SensData EField; /*!< \brief Array that stores the electric field as design variables. */ + + bool fea_dv = false; /*!< \brief Determines if the design variable we study is a FEA parameter. */ + unsigned short nDV = 0; /*!< \brief Number of design variables in the problem. */ + SensData DV; /*!< \brief Values of the design variables. */ CDiscAdjFEABoundVariable* nodes = nullptr; /*!< \brief The highest level in the variable hierarchy this solver can safely use. */ @@ -97,6 +124,11 @@ class CDiscAdjFEASolver final : public CSolver { */ inline CVariable* GetBaseClassPointerToNodes() override { return nodes; } + /*! + * \brief Read the design variables for the adjoint solver + */ + void ReadDV(const CConfig *config); + public: /*! @@ -163,97 +195,97 @@ class CDiscAdjFEASolver final : public CSolver { * \return Value of the total Young's modulus sensitivity * (inviscid + viscous contribution). */ - inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return Total_Sens_E[iVal]; } + inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return E.TotalSens[iVal]; } /*! * \brief Set the total Poisson's ratio sensitivity. * \return Value of the Poisson's ratio sensitivity */ - inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Total_Sens_Nu[iVal]; } + inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Nu.TotalSens[iVal]; } /*! * \brief Get the total sensitivity for the structural density * \return Value of the structural density sensitivity */ - inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Total_Sens_Rho[iVal]; } + inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Rho.TotalSens[iVal]; } /*! * \brief Get the total sensitivity for the structural weight * \return Value of the structural weight sensitivity */ - inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Total_Sens_Rho_DL[iVal]; } + inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.TotalSens[iVal]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the Electric Field in the region iEField (time averaged) */ - inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return Total_Sens_EField[iEField]; } + inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return EField.TotalSens[iEField]; } /*! * \brief A virtual member. * \return Value of the total sensitivity coefficient for the FEA DV in the region iDVFEA (time averaged) */ - inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return Total_Sens_DV[iDVFEA]; } + inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.TotalSens[iDVFEA]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the Young Modulus E */ - inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return Global_Sens_E[iVal]; } + inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return E.GlobalSens[iVal]; } /*! * \brief A virtual member. * \return Value of the Mach sensitivity for the Poisson's ratio Nu */ - inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Global_Sens_Nu[iVal]; } + inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Nu.GlobalSens[iVal]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the Electric Field in the region iEField */ - inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return Global_Sens_EField[iEField]; } + inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return EField.GlobalSens[iEField]; } /*! * \brief A virtual member. * \return Value of the sensitivity coefficient for the FEA DV in the region iDVFEA */ - inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return Global_Sens_DV[iDVFEA]; } + inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.GlobalSens[iDVFEA]; } /*! * \brief Get the total sensitivity for the structural density * \return Value of the structural density sensitivity */ - inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Global_Sens_Rho[iVal]; } + inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Rho.GlobalSens[iVal]; } /*! * \brief Get the total sensitivity for the structural weight * \return Value of the structural weight sensitivity */ - inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Global_Sens_Rho_DL[iVal]; } + inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.GlobalSens[iVal]; } /*! * \brief Get the value of the Young modulus from the adjoint solver * \return Value of the Young modulus from the adjoint solver */ - inline su2double GetVal_Young(unsigned short iVal) const override { return E_i[iVal]; } + inline su2double GetVal_Young(unsigned short iVal) const override { return E[iVal]; } /*! * \brief Get the value of the Poisson's ratio from the adjoint solver * \return Value of the Poisson's ratio from the adjoint solver */ - inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu_i[iVal]; } + inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu[iVal]; } /*! * \brief Get the value of the density from the adjoint solver, for inertial effects * \return Value of the density from the adjoint solver */ - inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho_i[iVal]; } + inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho[iVal]; } /*! * \brief Get the value of the density from the adjoint solver, for dead loads * \return Value of the density for dead loads, from the adjoint solver */ - inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL_i[iVal]; } + inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL[iVal]; } /*! * \brief Get the number of variables for the Electric Field from the adjoint solver @@ -261,11 +293,6 @@ class CDiscAdjFEASolver final : public CSolver { */ inline unsigned short GetnEField(void) const override { return nEField; } - /*! - * \brief Read the design variables for the adjoint solver - */ - void ReadDV(CConfig *config) override; - /*! * \brief Get the number of design variables from the adjoint solver, * \return Number of design variables from the adjoint solver @@ -282,7 +309,7 @@ class CDiscAdjFEASolver final : public CSolver { * \brief Get the value of the design variables from the adjoint solver * \return Pointer to the values of the design variables */ - inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV_Val[iVal]; } + inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV[iVal]; } /*! * \brief Prepare the solver for a new recording. diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp index 641c57e716f..e41dcf871ef 100644 --- a/SU2_CFD/include/solvers/CSolver.hpp +++ b/SU2_CFD/include/solvers/CSolver.hpp @@ -3383,11 +3383,6 @@ class CSolver { */ inline virtual unsigned short GetnDVFEA(void) const { return 0; } - /*! - * \brief A virtual member. - */ - inline virtual void ReadDV(CConfig *config) { } - /*! * \brief A virtual member. * \return Pointer to the values of the Electric Field diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp index fa753a598a7..23cb1ee1b75 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp @@ -162,7 +162,6 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { - bool dynamic = (config[val_iZone]->GetTime_Domain()); /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/ @@ -171,7 +170,7 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->ExtractAdjoint_Variables(geometry[val_iZone][val_iInst][MESH_0], config[val_iZone]); - if (dynamic) { + if (config[val_iZone]->GetTime_Domain()) { integration[val_iZone][val_iInst][ADJFEA_SOL]->SetConvergence(false); } } @@ -215,15 +214,21 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge auto structural_numerics = numerics[iZone][iInst][MESH_0][FEA_SOL]; /*--- Some numerics are only instanciated under these conditions ---*/ - bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem(); - bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS; - bool de_effects = config[iZone]->GetDE_Effects() && nonlinear; - bool element_based = dir_solver->IsElementBased() && nonlinear; + const bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem(); + const bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS; + const bool de_effects = config[iZone]->GetDE_Effects() && nonlinear; + const bool element_based = dir_solver->IsElementBased() && nonlinear; SU2_OMP_PARALLEL { - int thread = omp_get_thread_num(); + const int thread = omp_get_thread_num(); + const int offset = thread*MAX_TERMS; + const int fea_term = FEA_TERM+offset; + const int mat_nhcomp = MAT_NHCOMP+offset; + const int mat_idealde = MAT_IDEALDE+offset; + const int mat_knowles = MAT_KNOWLES+offset; + const int de_term = DE_TERM+offset; for (unsigned short iProp = 0; iProp < config[iZone]->GetnElasticityMod(); iProp++) { su2double E = adj_solver->GetVal_Young(iProp); @@ -233,33 +238,33 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge /*--- Add dependencies for E and Nu ---*/ - structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[fea_term]->SetMaterial_Properties(iProp, E, nu); /*--- Add dependencies for Rho and Rho_DL ---*/ - structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[fea_term]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Add dependencies for element-based simulations. ---*/ if (element_based) { /*--- Neo Hookean Compressible ---*/ - structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[mat_nhcomp]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[mat_nhcomp]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Ideal DE ---*/ - structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[mat_idealde]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[mat_idealde]->SetMaterial_Density(iProp, rho, rhoDL); /*--- Knowles ---*/ - structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu); - structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL); + structural_numerics[mat_knowles]->SetMaterial_Properties(iProp, E, nu); + structural_numerics[mat_knowles]->SetMaterial_Density(iProp, rho, rhoDL); } } if (de_effects) { for (unsigned short iEField = 0; iEField < adj_solver->GetnEField(); iEField++) { - structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); - structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); + structural_numerics[fea_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); + structural_numerics[de_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField)); } } @@ -275,22 +280,19 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge for (unsigned short iDV = 0; iDV < adj_solver->GetnDVFEA(); iDV++) { su2double dvfea = adj_solver->GetVal_DVFEA(iDV); - structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); + structural_numerics[fea_term]->Set_DV_Val(iDV, dvfea); - if (de_effects) structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); + if (de_effects) structural_numerics[de_term]->Set_DV_Val(iDV, dvfea); if (element_based) { - structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); - structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); - structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea); + structural_numerics[mat_nhcomp]->Set_DV_Val(iDV, dvfea); + structural_numerics[mat_idealde]->Set_DV_Val(iDV, dvfea); + structural_numerics[mat_knowles]->Set_DV_Val(iDV, dvfea); } } break; } - } - END_SU2_OMP_PARALLEL - /*--- MPI dependencies. ---*/ dir_solver->InitiateComms(structural_geometry, config[iZone], SOLUTION_FEA); @@ -301,6 +303,9 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge structural_geometry->CompleteComms(structural_geometry, config[iZone], COORDINATES); } + } + END_SU2_OMP_PARALLEL + /*--- FSI specific dependencies. ---*/ if (fsi) { /*--- Set relation between solution and predicted displacements, which are the transferred ones. ---*/ @@ -317,6 +322,7 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge /// making it a virtual method of CSolver does not feel "right" as its purpose could be confused. static_cast(dir_solver)->FilterElementDensities(structural_geometry, config[iZone]); } + } void CDiscAdjFEAIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, @@ -333,11 +339,6 @@ void CDiscAdjFEAIteration::InitializeAdjoint(CSolver***** solver, CGeometry**** solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]); } -void CDiscAdjFEAIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, - CSolver***** solver, CNumerics****** numerics, CConfig** config, - CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, - CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {} - bool CDiscAdjFEAIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp index bc72135c545..ea7b08a1d47 100644 --- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp @@ -33,13 +33,9 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv adjoint = true; - unsigned short iVar, iMarker; - unsigned long iPoint; - string text_line, mesh_filename; - string filename, AdjExt; - bool dynamic = (config->GetTime_Domain()); + const bool dynamic = (config->GetTime_Domain()); nVar = direct_solver->GetnVar(); nDim = geometry->GetnDim(); @@ -54,8 +50,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv /*--- Define some auxiliary vectors related to the residual ---*/ - Residual = new su2double[nVar]; for (iVar = 0; iVar < nVar; iVar++) Residual[iVar] = 1.0; - Residual_RMS.resize(nVar,1.0); Residual_Max.resize(nVar,1.0); Point_Max.resize(nVar,0); @@ -71,32 +65,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0); } - /*--- Define some auxiliary vectors related to the solution ---*/ - - Solution = new su2double[nVar]; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16; - - if (dynamic) { - Solution_Vel = new su2double[nVar]; - Solution_Accel = new su2double[nVar]; - - for (iVar = 0; iVar < nVar; iVar++) Solution_Vel[iVar] = 1e-16; - for (iVar = 0; iVar < nVar; iVar++) Solution_Accel[iVar] = 1e-16; - } - - /*--- Sensitivity definition and coefficient in all the markers ---*/ - - CSensitivity = new su2double* [nMarker]; - - for (iMarker = 0; iMarker < nMarker; iMarker++) { - CSensitivity[iMarker] = new su2double [geometry->nVertex[iMarker]](); - } - - Sens_E = new su2double[nMarker](); - Sens_Nu = new su2double[nMarker](); - Sens_nL = new su2double[nMarker](); + /*--- Initialize the adjoint solution. ---*/ - nodes = new CDiscAdjFEABoundVariable(Solution, Solution_Accel, Solution_Vel, nPoint, nDim, nVar, dynamic, config); + vector init(nVar,1e-16); + nodes = new CDiscAdjFEABoundVariable(init.data(), init.data(), init.data(), nPoint, nDim, nVar, dynamic, config); SetBaseClassPointerToNodes(); /*--- Set which points are vertices and allocate boundary data. ---*/ @@ -111,7 +83,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv } nodes->AllocateBoundaryVariables(config); - /*--- Store the direct solution ---*/ for (iPoint = 0; iPoint < nPoint; iPoint++){ @@ -141,29 +112,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv SU2_MPI::Error("WARNING: For a material to be fully defined, E, Nu and Rho need to have the same dimensions.", CURRENT_FUNCTION); } - E_i = new su2double[nMPROP](); - Local_Sens_E = new su2double[nMPROP](); - Global_Sens_E = new su2double[nMPROP](); - Total_Sens_E = new su2double[nMPROP](); - AD_Idx_E_i = new int[nMPROP](); - - Nu_i = new su2double[nMPROP](); - Local_Sens_Nu = new su2double[nMPROP](); - Global_Sens_Nu = new su2double[nMPROP](); - Total_Sens_Nu = new su2double[nMPROP](); - AD_Idx_Nu_i = new int[nMPROP](); - - Rho_i = new su2double[nMPROP](); // For inertial effects - Local_Sens_Rho = new su2double[nMPROP](); - Global_Sens_Rho = new su2double[nMPROP](); - Total_Sens_Rho = new su2double[nMPROP](); - AD_Idx_Rho_i = new int[nMPROP](); - - Rho_DL_i = new su2double[nMPROP](); // For dead loads - Local_Sens_Rho_DL = new su2double[nMPROP](); - Global_Sens_Rho_DL = new su2double[nMPROP](); - Total_Sens_Rho_DL = new su2double[nMPROP](); - AD_Idx_Rho_DL_i = new int[nMPROP](); + E.resize(nMPROP); + Nu.resize(nMPROP); + Rho.resize(nMPROP); + Rho_DL.resize(nMPROP); /*--- Initialize vector structures for multiple electric regions ---*/ @@ -171,12 +123,7 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv if (de_effects) { nEField = config->GetnElectric_Field(); - - EField = new su2double[nEField](); - Local_Sens_EField = new su2double[nEField](); - Global_Sens_EField = new su2double[nEField](); - Total_Sens_EField = new su2double[nEField](); - AD_Idx_EField = new int[nEField](); + EField.resize(nEField); } /*--- Initialize vector structures for structural-based design variables ---*/ @@ -194,80 +141,14 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv break; } - if (fea_dv) { - ReadDV(config); - Local_Sens_DV = new su2double[nDV](); - Global_Sens_DV = new su2double[nDV](); - Total_Sens_DV = new su2double[nDV](); - AD_Idx_DV_Val = new int[nDV](); - } + if (fea_dv) ReadDV(config); } -CDiscAdjFEASolver::~CDiscAdjFEASolver(void){ - - unsigned short iMarker; - - if (CSensitivity != nullptr) { - for (iMarker = 0; iMarker < nMarker; iMarker++) { - delete [] CSensitivity[iMarker]; - } - delete [] CSensitivity; - } - - delete [] E_i; - delete [] Nu_i; - delete [] Rho_i; - delete [] Rho_DL_i; - - delete [] AD_Idx_E_i; - delete [] AD_Idx_Nu_i; - delete [] AD_Idx_Rho_i; - delete [] AD_Idx_Rho_DL_i; - - delete [] Local_Sens_E; - delete [] Local_Sens_Nu; - delete [] Local_Sens_Rho; - delete [] Local_Sens_Rho_DL; - - delete [] Global_Sens_E; - delete [] Global_Sens_Nu; - delete [] Global_Sens_Rho; - delete [] Global_Sens_Rho_DL; - - delete [] Total_Sens_E; - delete [] Total_Sens_Nu; - delete [] Total_Sens_Rho; - delete [] Total_Sens_Rho_DL; - - delete [] normalLoads; - delete [] Sens_E; - delete [] Sens_Nu; - delete [] Sens_nL; - - delete [] EField; - delete [] Local_Sens_EField; - delete [] Global_Sens_EField; - delete [] Total_Sens_EField; - delete [] AD_Idx_EField; - - delete [] DV_Val; - delete [] Local_Sens_DV; - delete [] Global_Sens_DV; - delete [] Total_Sens_DV; - delete [] AD_Idx_DV_Val; - - delete [] Solution_Vel; - delete [] Solution_Accel; - - delete nodes; -} +CDiscAdjFEASolver::~CDiscAdjFEASolver() { delete nodes; } void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){ - - bool dynamic (config->GetTime_Domain()); - unsigned long iPoint; unsigned short iVar; @@ -277,7 +158,7 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){ direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint)); } - if (dynamic){ + if (config->GetTime_Domain()){ /*--- Reset the solution to the initial (converged) solution ---*/ for (iPoint = 0; iPoint < nPoint; iPoint++){ @@ -321,9 +202,9 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){ void CDiscAdjFEASolver::RegisterSolution(CGeometry *geometry, CConfig *config){ - bool input = true; - bool dynamic = config->GetTime_Domain(); - bool push_index = !config->GetMultizone_Problem(); + const bool input = true; + const bool dynamic = config->GetTime_Domain(); + const bool push_index = !config->GetMultizone_Problem(); /*--- Register solution at all necessary time instances and other variables on the tape ---*/ @@ -354,13 +235,13 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config, if (KindDirect_Solver == RUNTIME_FEA_SYS) { - bool pseudo_static = config->GetPseudoStatic(); + const bool pseudo_static = config->GetPseudoStatic(); for (iVar = 0; iVar < nMPROP; iVar++) { - E_i[iVar] = config->GetElasticyMod(iVar); - Nu_i[iVar] = config->GetPoissonRatio(iVar); - Rho_i[iVar] = pseudo_static? 0.0 : config->GetMaterialDensity(iVar); - Rho_DL_i[iVar] = config->GetMaterialDensity(iVar); + E[iVar] = config->GetElasticyMod(iVar); + Nu[iVar] = config->GetPoissonRatio(iVar); + Rho[iVar] = pseudo_static? 0.0 : config->GetMaterialDensity(iVar); + Rho_DL[iVar] = config->GetMaterialDensity(iVar); } /*--- Read the values of the electric field ---*/ @@ -371,48 +252,28 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config, /*--- Reset index, otherwise messes up other derivatives ---*/ if (fea_dv) { - for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV_Val[iVar]); + for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV[iVar]); } if (!reset) { - bool local_index = config->GetMultizone_Problem(); - bool push_index = !local_index; - - for (iVar = 0; iVar < nMPROP; iVar++) { - AD::RegisterInput(E_i[iVar], push_index); - AD::RegisterInput(Nu_i[iVar], push_index); - AD::RegisterInput(Rho_i[iVar], push_index); - AD::RegisterInput(Rho_DL_i[iVar], push_index); - } + const bool local_index = config->GetMultizone_Problem(); + const bool push_index = !local_index; - if(de_effects){ - for (iVar = 0; iVar < nEField; iVar++) - AD::RegisterInput(EField[iVar], push_index); - } - - if(fea_dv){ - for (iVar = 0; iVar < nDV; iVar++) - AD::RegisterInput(DV_Val[iVar], push_index); - } + E.Register(push_index); + Nu.Register(push_index); + Rho.Register(push_index); + Rho_DL.Register(push_index); + if (de_effects) EField.Register(push_index); + if (fea_dv) DV.Register(push_index); /*--- Explicitly store the tape indices for when we extract the derivatives ---*/ if (local_index) { - for (iVar = 0; iVar < nMPROP; iVar++) { - AD::SetIndex(AD_Idx_E_i[iVar], E_i[iVar]); - AD::SetIndex(AD_Idx_Nu_i[iVar], Nu_i[iVar]); - AD::SetIndex(AD_Idx_Rho_i[iVar], Rho_i[iVar]); - AD::SetIndex(AD_Idx_Rho_DL_i[iVar], Rho_DL_i[iVar]); - } - - if (de_effects) { - for (iVar = 0; iVar < nEField; iVar++) - AD::SetIndex(AD_Idx_EField[iVar], EField[iVar]); - } - - if (fea_dv) { - for (iVar = 0; iVar < nDV; iVar++) - AD::SetIndex(AD_Idx_DV_Val[iVar], DV_Val[iVar]); - } + E.SetIndex(); + Nu.SetIndex(); + Rho.SetIndex(); + Rho_DL.SetIndex(); + if (de_effects) EField.SetIndex(); + if (fea_dv) DV.SetIndex(); } /*--- Register the flow tractions ---*/ @@ -422,16 +283,16 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config, } - /*--- Here it is possible to register other variables as input that influence the flow solution - * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be - * extracted in the ExtractAdjointVariables routine. ---*/ + /*--- Here it is possible to register other variables as input that influence the flow solution + * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be + * extracted in the ExtractAdjointVariables routine. ---*/ } void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){ - bool input = false; - bool dynamic = config->GetTime_Domain(); - bool push_index = !config->GetMultizone_Problem(); + const bool input = false; + const bool dynamic = config->GetTime_Domain(); + const bool push_index = !config->GetMultizone_Problem(); /*--- Register variables as output of the solver iteration ---*/ @@ -447,13 +308,15 @@ void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){ - bool dynamic = config->GetTime_Domain(); - bool multizone = config->GetMultizone_Problem(); + const bool dynamic = config->GetTime_Domain(); + const bool multizone = config->GetMultizone_Problem(); unsigned short iVar; unsigned long iPoint; su2double residual; + su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0}; + /*--- Set Residuals to zero ---*/ SetResToZero(); @@ -595,78 +458,40 @@ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *co void CDiscAdjFEASolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config){ - unsigned short iVar; - bool local_index = config->GetMultizone_Problem(); + if (KindDirect_Solver != RUNTIME_FEA_SYS) return; - /*--- Extract the adjoint values of the farfield values ---*/ + /*--- Sensitivities of material properties and design variables. ---*/ - if (KindDirect_Solver == RUNTIME_FEA_SYS){ + E.GetDerivative(); + Nu.GetDerivative(); + Rho.GetDerivative(); + Rho_DL.GetDerivative(); + if (de_effects) EField.GetDerivative(); + if (fea_dv) DV.GetDerivative(); - if (local_index) { - for (iVar = 0; iVar < nMPROP; iVar++) { - Local_Sens_E[iVar] = AD::GetDerivative(AD_Idx_E_i[iVar]); - Local_Sens_Nu[iVar] = AD::GetDerivative(AD_Idx_Nu_i[iVar]); - Local_Sens_Rho[iVar] = AD::GetDerivative(AD_Idx_Rho_i[iVar]); - Local_Sens_Rho_DL[iVar] = AD::GetDerivative(AD_Idx_Rho_DL_i[iVar]); - } - } - else { - for (iVar = 0; iVar < nMPROP; iVar++) { - Local_Sens_E[iVar] = SU2_TYPE::GetDerivative(E_i[iVar]); - Local_Sens_Nu[iVar] = SU2_TYPE::GetDerivative(Nu_i[iVar]); - Local_Sens_Rho[iVar] = SU2_TYPE::GetDerivative(Rho_i[iVar]); - Local_Sens_Rho_DL[iVar] = SU2_TYPE::GetDerivative(Rho_DL_i[iVar]); - } - } - - SU2_MPI::Allreduce(Local_Sens_E, Global_Sens_E, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(Local_Sens_Nu, Global_Sens_Nu, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(Local_Sens_Rho, Global_Sens_Rho, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - SU2_MPI::Allreduce(Local_Sens_Rho_DL, Global_Sens_Rho_DL, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - - /*--- Extract the adjoint values of the electric field in the case that it is a parameter of the problem. ---*/ - - if (de_effects) { - for (iVar = 0; iVar < nEField; iVar++) { - if (local_index) Local_Sens_EField[iVar] = AD::GetDerivative(AD_Idx_EField[iVar]); - else Local_Sens_EField[iVar] = SU2_TYPE::GetDerivative(EField[iVar]); - } - SU2_MPI::Allreduce(Local_Sens_EField, Global_Sens_EField, nEField, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); - } + /*--- Extract the flow traction sensitivities. ---*/ - if (fea_dv) { - for (iVar = 0; iVar < nDV; iVar++) { - if (local_index) Local_Sens_DV[iVar] = AD::GetDerivative(AD_Idx_DV_Val[iVar]); - else Local_Sens_DV[iVar] = SU2_TYPE::GetDerivative(DV_Val[iVar]); + if (config->GetnMarker_Fluid_Load() > 0) { + for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){ + for (unsigned short iDim = 0; iDim < nDim; iDim++){ + su2double val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim); + nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens); } - SU2_MPI::Allreduce(Local_Sens_DV, Global_Sens_DV, nDV, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm()); } - - /*--- Extract the flow traction sensitivities ---*/ - - if (config->GetnMarker_Fluid_Load() > 0){ - su2double val_sens; - for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){ - for (unsigned short iDim = 0; iDim < nDim; iDim++){ - val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim); - nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens); - } - } - } - } } void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){ - bool dynamic = (config->GetTime_Domain()); - bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0); + const bool dynamic = (config->GetTime_Domain()); + const bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0); + + su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0}; unsigned short iVar; - unsigned long iPoint; - for (iPoint = 0; iPoint < nPoint; iPoint++){ + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ for (iVar = 0; iVar < nVar; iVar++){ Solution[iVar] = nodes->GetSolution(iPoint,iVar); } @@ -676,20 +501,14 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){ } } if (dynamic){ - for (iVar = 0; iVar < nVar; iVar++){ - Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar); - } - for (iVar = 0; iVar < nVar; iVar++){ - Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar); - } for (iVar = 0; iVar < nVar; iVar++){ Solution[iVar] += nodes->GetDynamic_Derivative_n(iPoint,iVar); } for (iVar = 0; iVar < nVar; iVar++){ - Solution_Accel[iVar] += nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar); + Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar) + nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar); } for (iVar = 0; iVar < nVar; iVar++){ - Solution_Vel[iVar] += nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar); + Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar) + nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar); } } direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution); @@ -704,12 +523,10 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){ void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){ - bool dynamic = (config_container->GetTime_Domain()); - unsigned long iPoint; unsigned short iVar; - if (dynamic){ - for (iPoint = 0; iPointGetnPoint(); iPoint++){ + if (config_container->GetTime_Domain()){ + for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){ for (iVar=0; iVar < nVar; iVar++){ nodes->SetDynamic_Derivative_n(iPoint, iVar, nodes->GetSolution_time_n(iPoint, iVar)); } @@ -726,26 +543,14 @@ void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_cont void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*){ - unsigned short iVar; - - for (iVar = 0; iVar < nMPROP; iVar++){ - Total_Sens_E[iVar] += Global_Sens_E[iVar]; - Total_Sens_Nu[iVar] += Global_Sens_Nu[iVar]; - Total_Sens_Rho[iVar] += Global_Sens_Rho[iVar]; - Total_Sens_Rho_DL[iVar] += Global_Sens_Rho_DL[iVar]; - } - - if (de_effects){ - for (iVar = 0; iVar < nEField; iVar++) - Total_Sens_EField[iVar]+= Global_Sens_EField[iVar]; - } + E.UpdateTotal(); + Nu.UpdateTotal(); + Rho.UpdateTotal(); + Rho_DL.UpdateTotal(); + if (de_effects) EField.UpdateTotal(); + if (fea_dv) DV.UpdateTotal(); - if (fea_dv){ - for (iVar = 0; iVar < nDV; iVar++) - Total_Sens_DV[iVar] += Global_Sens_DV[iVar]; - } - - /*--- Extract the topology optimization density sensitivities ---*/ + /*--- Extract the topology optimization density sensitivities. ---*/ direct_solver->ExtractAdjoint_Variables(geometry, config); @@ -773,38 +578,32 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo } } -void CDiscAdjFEASolver::ReadDV(CConfig *config) { - - unsigned long index; +void CDiscAdjFEASolver::ReadDV(const CConfig *config) { string filename; ifstream properties_file; /*--- Choose the filename of the design variable ---*/ - string input_name; - switch (config->GetDV_FEA()) { case YOUNG_MODULUS: - input_name = "dv_young.opt"; + filename = "dv_young.opt"; break; case POISSON_RATIO: - input_name = "dv_poisson.opt"; + filename = "dv_poisson.opt"; break; case DENSITY_VAL: case DEAD_WEIGHT: - input_name = "dv_density.opt"; + filename = "dv_density.opt"; break; case ELECTRIC_FIELD: - input_name = "dv_efield.opt"; + filename = "dv_efield.opt"; break; default: - input_name = "dv.opt"; + filename = "dv.opt"; break; } - filename = input_name; - if (rank == MASTER_NODE) cout << "Filename: " << filename << "." << endl; properties_file.open(filename.data(), ios::in); @@ -816,55 +615,32 @@ void CDiscAdjFEASolver::ReadDV(CConfig *config) { if (rank == MASTER_NODE) cout << "There is no design variable file." << endl; - nDV = 1; - DV_Val = new su2double[nDV]; - for (unsigned short iDV = 0; iDV < nDV; iDV++) - DV_Val[iDV] = 1.0; - + nDV = 1; + DV.resize(nDV); + DV[0] = 1.0; } else{ string text_line; - - /*--- First pass: determine number of design variables ---*/ - - unsigned short iDV = 0; + vector values; /*--- Skip the first line: it is the header ---*/ - getline (properties_file, text_line); - while (getline (properties_file, text_line)) iDV++; - - /*--- Close the restart file ---*/ - - properties_file.close(); - - nDV = iDV; - DV_Val = new su2double[nDV]; - - /*--- Reopen the file (TODO: improve this) ---*/ - - properties_file.open(filename.data(), ios::in); - - /*--- Skip the first line: it is the header ---*/ - - getline (properties_file, text_line); - - iDV = 0; while (getline (properties_file, text_line)) { - istringstream point_line(text_line); - point_line >> index >> DV_Val[iDV]; - - iDV++; + unsigned long index; + su2double value; + point_line >> index >> value; + values.push_back(value); } - /*--- Close the restart file ---*/ - - properties_file.close(); + nDV = values.size(); + DV.resize(nDV); + unsigned short iDV = 0; + for (auto x : values) DV[iDV++] = x; } diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp index 9cb4950870b..610d2026043 100644 --- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp @@ -59,15 +59,16 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo } /*--- Initialize the node structure ---*/ - su2double Solution[MAXNVAR] = {1e-16}; nodes = new CDiscAdjMeshBoundVariable(nPoint,nDim,config); SetBaseClassPointerToNodes(); /*--- Set which points are vertices and allocate boundary data. ---*/ + vector Solution(nVar,1e-16); + for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) { - nodes->SetSolution(iPoint,Solution); + nodes->SetSolution(iPoint,Solution.data()); for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) { long iVertex = geometry->nodes->GetVertex(iPoint, iMarker); From 746587100127dfb4f4f9fd64f134b1ca431db570 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Tue, 23 Mar 2021 17:07:05 +0000 Subject: [PATCH 41/57] simplify --- SU2_CFD/include/solvers/CSolver.hpp | 12 +++ .../src/iteration/CDiscAdjFEAIteration.cpp | 71 ++++++++-------- SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp | 82 +------------------ SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 55 +------------ SU2_CFD/src/solvers/CSolver.cpp | 54 ++++++++++++ 5 files changed, 105 insertions(+), 169 deletions(-) diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp index e41dcf871ef..a2e29b4c055 100644 --- a/SU2_CFD/include/solvers/CSolver.hpp +++ b/SU2_CFD/include/solvers/CSolver.hpp @@ -3488,6 +3488,18 @@ class CSolver { inline virtual void SetAitken_Relaxation(CGeometry *geometry, CConfig *config) { } + /*! + * \brief Loads the solution from the restart file. + * \param[in] geometry - Geometrical definition of the problem. + * \param[in] config - Definition of the particular problem. + * \param[in] filename - Name of the restart file. + * \param[in] skipVars - Number of variables preceeding the solution. + */ + void BasicLoadRestart(CGeometry *geometry, + const CConfig *config, + const string& filename, + unsigned long skipVars); + /*! * \brief A virtual member. * \param[in] geometry - Geometrical definition of the problem. diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp index 23cb1ee1b75..a19210d468f 100644 --- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp +++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp @@ -63,20 +63,22 @@ CDiscAdjFEAIteration::CDiscAdjFEAIteration(const CConfig *config) : CIteration(c } CDiscAdjFEAIteration::~CDiscAdjFEAIteration(void) {} + void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { unsigned long iPoint; - unsigned short TimeIter = config[val_iZone]->GetTimeIter(); - bool dynamic = (config[val_iZone]->GetTime_Domain()); - - int Direct_Iter; + auto solvers0 = solver[val_iZone][val_iInst][MESH_0]; + auto geometry0 = geometry[val_iZone][val_iInst][MESH_0]; + auto dirNodes = solvers0[FEA_SOL]->GetNodes(); + auto adjNodes = solvers0[ADJFEA_SOL]->GetNodes(); /*--- For the dynamic adjoint, load direct solutions from restart files. ---*/ - if (dynamic) { - Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1; + if (config[val_iZone]->GetTime_Domain()) { + const int TimeIter = config[val_iZone]->GetTimeIter(); + const int Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - TimeIter - 1; /*--- We want to load the already converged solution at timesteps n and n-1 ---*/ @@ -86,15 +88,15 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat /*--- Push solution back to correct array ---*/ - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->Set_Solution_time_n(); + dirNodes->Set_Solution_time_n(); /*--- Push solution back to correct array ---*/ - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Accel_time_n(); + dirNodes->SetSolution_Accel_time_n(); /*--- Push solution back to correct array ---*/ - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Vel_time_n(); + dirNodes->SetSolution_Vel_time_n(); /*--- Load solution timestep n ---*/ @@ -102,33 +104,28 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/ - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint)); } - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Accel_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Accel(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Accel_Direct(iPoint, dirNodes->GetSolution_Accel(iPoint)); } - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Vel_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Vel(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Vel_Direct(iPoint, dirNodes->GetSolution_Vel(iPoint)); } } else { /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/ - for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) { - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct( - iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint)); + for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) { + adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint)); } } - solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->Preprocessing( - geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0, - RUNTIME_ADJFEA_SYS, false); + solvers0[ADJFEA_SOL]->Preprocessing(geometry0, solvers0, config[val_iZone], MESH_0, 0, RUNTIME_ADJFEA_SYS, false); + } void CDiscAdjFEAIteration::LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, @@ -355,13 +352,14 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra CSolver***** solver, CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) { - bool dynamic = (config[val_iZone]->GetTime_Domain()); + const bool dynamic = (config[val_iZone]->GetTime_Domain()); + auto solvers0 = solver[val_iZone][val_iInst][MESH_0]; // TEMPORARY output only for standalone structural problems if ((!config[val_iZone]->GetFSI_Simulation()) && (rank == MASTER_NODE)) { unsigned short iVar; - bool de_effects = config[val_iZone]->GetDE_Effects(); + const bool de_effects = config[val_iZone]->GetDE_Effects(); /*--- Header of the temporary output file ---*/ ofstream myfile_res; @@ -371,24 +369,23 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra myfile_res << config[val_iZone]->GetTimeIter() << "\t"; - solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]); - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetTotal_ComboObj() << "\t"; + solvers0[FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]); + myfile_res << scientific << solvers0[FEA_SOL]->GetTotal_ComboObj() << "\t"; for (iVar = 0; iVar < config[val_iZone]->GetnElasticityMod(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t"; for (iVar = 0; iVar < config[val_iZone]->GetnPoissonRatio(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t"; if (dynamic) { for (iVar = 0; iVar < config[val_iZone]->GetnMaterialDensity(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t"; } if (de_effects) { for (iVar = 0; iVar < config[val_iZone]->GetnElectric_Field(); iVar++) - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_EField(iVar) - << "\t"; + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_EField(iVar) << "\t"; } - for (iVar = 0; iVar < solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA(); iVar++) { - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t"; + for (iVar = 0; iVar < solvers0[ADJFEA_SOL]->GetnDVFEA(); iVar++) { + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t"; } myfile_res << endl; @@ -427,7 +424,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra if (outputDVFEA) { unsigned short iDV; - unsigned short nDV = solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA(); + unsigned short nDV = solvers0[ADJFEA_SOL]->GetnDVFEA(); myfile_res << "INDEX" << "\t" @@ -438,7 +435,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra for (iDV = 0; iDV < nDV; iDV++) { myfile_res << iDV; myfile_res << "\t"; - myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV); + myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV); myfile_res << endl; } diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp index ea7b08a1d47..9b377b54751 100644 --- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp @@ -83,22 +83,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv } nodes->AllocateBoundaryVariables(config); - /*--- Store the direct solution ---*/ - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - nodes->SetSolution_Direct(iPoint, direct_solver->GetNodes()->GetSolution(iPoint)); - } - - if (dynamic){ - for (iPoint = 0; iPoint < nPoint; iPoint++){ - nodes->SetSolution_Accel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Accel(iPoint)); - } - - for (iPoint = 0; iPoint < nPoint; iPoint++){ - nodes->SetSolution_Vel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Vel(iPoint)); - } - } - /*--- Initialize vector structures for multiple material definition ---*/ nMPROP = config->GetnElasticityMod(); @@ -648,72 +632,12 @@ void CDiscAdjFEASolver::ReadDV(const CConfig *config) { void CDiscAdjFEASolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { - unsigned short iVar; - unsigned long index, counter; - string restart_filename, filename; - /*--- Restart the solution from file information ---*/ - filename = config->GetSolution_AdjFileName(); - restart_filename = config->GetObjFunc_Extension(filename); + auto filename = config->GetSolution_AdjFileName(); + auto restart_filename = config->GetObjFunc_Extension(filename); restart_filename = config->GetFilename(restart_filename, "", val_iter); - /*--- Read and store the restart metadata. ---*/ - -// Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename); - - /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ - - if (config->GetRead_Binary_Restart()) { - Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename); - } else { - Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename); - } - - /*--- Read all lines in the restart file ---*/ - - long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0; - - /*--- Skip coordinates ---*/ - - unsigned short skipVars = geometry[MESH_0]->GetnDim(); - - /*--- Load data from the restart into correct containers. ---*/ - - counter = 0; - for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) { - - /*--- Retrieve local index. If this node from the restart file lives - on the current processor, we will load and instantiate the vars. ---*/ - - iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global); - - if (iPoint_Local > -1) { - - /*--- We need to store this point's data, so jump to the correct - offset in the buffer of data from the restart file and load it. ---*/ - - index = counter*Restart_Vars[1] + skipVars; - for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar]; - nodes->SetSolution(iPoint_Local,Solution); - iPoint_Global_Local++; - - /*--- Increment the overall counter for how many points have been loaded. ---*/ - counter++; - } - - } - - /*--- Detect a wrong solution file ---*/ - - if (iPoint_Global_Local < nPointDomain) { - SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + - string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); - } - - /*--- Delete the class memory that is used to load the restart. ---*/ - - delete [] Restart_Vars; Restart_Vars = nullptr; - delete [] Restart_Data; Restart_Data = nullptr; + BasicLoadRestart(geometry[MESH_0], config, restart_filename, geometry[MESH_0]->GetnDim()); } diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp index b3df2767365..39e0cf2001d 100644 --- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp +++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp @@ -632,26 +632,13 @@ void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_contain void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) { - const bool rans = (config->GetKind_Turb_Model() != NONE); - /*--- Restart the solution from file information ---*/ auto filename = config->GetSolution_AdjFileName(); auto restart_filename = config->GetObjFunc_Extension(filename); restart_filename = config->GetFilename(restart_filename, "", val_iter); - - /*--- Read and store the restart metadata. ---*/ - -// Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename); - - /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ - - if (config->GetRead_Binary_Restart()) { - Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename); - } else { - Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename); - } + const bool rans = (config->GetKind_Turb_Model() != NONE); /*--- Skip coordinates ---*/ unsigned short skipVars = geometry[MESH_0]->GetnDim(); @@ -667,39 +654,7 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi if (rans) skipVars += solver[MESH_0][TURB_SOL]->GetnVar(); } - /*--- Load data from the restart into correct containers. ---*/ - - unsigned long iPoint_Global_Local = 0; - - for (auto iPoint_Global = 0ul; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) { - - /*--- Retrieve local index. If this node from the restart file lives - on the current processor, we will load and instantiate the vars. ---*/ - - const auto iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global); - - if (iPoint_Local > -1) { - - /*--- We need to store this point's data, so jump to the correct - offset in the buffer of data from the restart file and load it. ---*/ - - const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars; - - for (auto iVar = 0u; iVar < nVar; iVar++) { - nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]); - } - - iPoint_Global_Local++; - } - - } - - /*--- Detect a wrong solution file ---*/ - - if (iPoint_Global_Local != nPointDomain) { - SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + - string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); - } + BasicLoadRestart(geometry[MESH_0], config, restart_filename, skipVars); /*--- Interpolate solution on coarse grids ---*/ @@ -720,10 +675,4 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi solver[iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution(iPoint, Solution); } } - - /*--- Delete the class memory that is used to load the restart. ---*/ - - delete [] Restart_Vars; Restart_Vars = nullptr; - delete [] Restart_Data; Restart_Data = nullptr; - } diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp index 5e9cd83515e..b28fa751c08 100644 --- a/SU2_CFD/src/solvers/CSolver.cpp +++ b/SU2_CFD/src/solvers/CSolver.cpp @@ -4110,3 +4110,57 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig } END_SU2_OMP_PARALLEL } + +void CSolver::BasicLoadRestart(CGeometry *geometry, const CConfig *config, const string& filename, unsigned long skipVars) { + + /*--- Read and store the restart metadata. ---*/ + +// Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, filename); + + /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/ + + if (config->GetRead_Binary_Restart()) { + Read_SU2_Restart_Binary(geometry, config, filename); + } else { + Read_SU2_Restart_ASCII(geometry, config, filename); + } + + /*--- Load data from the restart into correct containers. ---*/ + + unsigned long iPoint_Global_Local = 0; + + for (auto iPoint_Global = 0ul; iPoint_Global < geometry->GetGlobal_nPointDomain(); iPoint_Global++ ) { + + /*--- Retrieve local index. If this node from the restart file lives + on the current processor, we will load and instantiate the vars. ---*/ + + const auto iPoint_Local = geometry->GetGlobal_to_Local_Point(iPoint_Global); + + if (iPoint_Local > -1) { + + /*--- We need to store this point's data, so jump to the correct + offset in the buffer of data from the restart file and load it. ---*/ + + const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars; + + for (auto iVar = 0u; iVar < nVar; iVar++) { + base_nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]); + } + + iPoint_Global_Local++; + } + + } + + /*--- Delete the class memory that is used to load the restart. ---*/ + + delete [] Restart_Vars; Restart_Vars = nullptr; + delete [] Restart_Data; Restart_Data = nullptr; + + /*--- Detect a wrong solution file ---*/ + + if (iPoint_Global_Local != nPointDomain) { + SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") + + string("It could be empty lines at the end of the file."), CURRENT_FUNCTION); + } +} From ecb64d02dd147292929614a8ce2e11d772caca2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Tue, 23 Mar 2021 23:11:04 +0100 Subject: [PATCH 42/57] Allow OpDiLib backend choice. --- Common/include/code_config.hpp | 2 +- meson.build | 6 ++++++ meson_options.txt | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 3cbad21f08f..377432ee945 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -119,6 +119,6 @@ using su2mixedfloat = passivedouble; #define HAVE_OPDI #endif -#if _OPENMP >= 201811 +#if (_OPENMP >= 201811 && !defined(FORCE_OPDI_MACRO_BACKEND)) || defined(FORCE_OPDI_OMPT_BACKEND) #define HAVE_OMPT #endif diff --git a/meson.build b/meson.build index 20f71ee2853..1fb2325c979 100644 --- a/meson.build +++ b/meson.build @@ -108,6 +108,12 @@ if omp # add opdi dependency if get_option('enable-autodiff') codi_dep += declare_dependency(include_directories: 'externals/opdi/include') + + if get_option('opdi-backend') == 'macro' + su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND' + elif get_option('opdi-backend') == 'ompt' + su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND' + endif endif endif diff --git a/meson_options.txt b/meson_options.txt index fd354b12276..b5d9ccdddc8 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -18,3 +18,4 @@ option('enable-tests', type : 'boolean', value : false, description: 'compile U option('enable-mixedprec', type : 'boolean', value : false, description: 'use single precision floating point arithmetic for sparse algebra') option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation') option('enable-mpp', type : 'boolean', value : false, description: 'enable Mutation++ support') +option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice') From 8b4a89c0bbc9f68044fb93b426740c806af7a281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 25 Mar 2021 14:22:47 +0100 Subject: [PATCH 43/57] OpDiLib update. --- externals/opdi | 2 +- meson_scripts/init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/opdi b/externals/opdi index a1210cc3d2f..55b33398d18 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit a1210cc3d2f58fa4652c70000920ff2e76896cf6 +Subproject commit 55b33398d18cbf977545a2dba2008201616664e0 diff --git a/meson_scripts/init.py b/meson_scripts/init.py index 7b15f8440b5..ec7fa1fc33c 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,7 +48,7 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = 'a1210cc3d2f58fa4652c70000920ff2e76896cf6' + sha_version_opdi = '55b33398d18cbf977545a2dba2008201616664e0' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' From 165a52b7d40dd8304f4c4a07cb1a0b166c6dda5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 25 Mar 2021 15:47:39 +0100 Subject: [PATCH 44/57] Add AD build tests. --- .github/workflows/regression.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index cea9c098ee4..8238f1af131 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP] + config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP, ReverseOMP, ForwardOMP] include: - config_set: BaseMPI flags: '-Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror' @@ -32,6 +32,10 @@ jobs: flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-tests=true --warnlevel=3 --werror' - config_set: BaseOMP flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' + - config_set: ReverseOMP + flags: '-Denable-autodiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' + - config_set: ForwardOMP + flags: '-Denable-directdiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' runs-on: ubuntu-latest steps: - name: Cache Object Files From 60792dc926951332ce00d079357475c655ccc6dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 25 Mar 2021 18:43:52 +0100 Subject: [PATCH 45/57] Disable normal builds in AD builds tests. --- .github/workflows/regression.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml index 8238f1af131..3d6c7308431 100644 --- a/.github/workflows/regression.yml +++ b/.github/workflows/regression.yml @@ -33,9 +33,9 @@ jobs: - config_set: BaseOMP flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' - config_set: ReverseOMP - flags: '-Denable-autodiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' + flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' - config_set: ForwardOMP - flags: '-Denable-directdiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' + flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror' runs-on: ubuntu-latest steps: - name: Cache Object Files From 3b0854b017d7940a84dc6b1f4ff91f7f852d660c Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Thu, 25 Mar 2021 18:13:06 +0000 Subject: [PATCH 46/57] add syntax check to meson for OpenMP+AD builds --- meson.build | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/meson.build b/meson.build index 1fb2325c979..500ebd87fad 100644 --- a/meson.build +++ b/meson.build @@ -186,6 +186,14 @@ if get_option('enable-mpp') su2_cpp_args += '-DHAVE_MPP' endif +if omp and get_option('enable-autodiff') + py = find_program('python3','python') + p = run_command(py, 'externals/opdi/syntax/check.py', 'su2omp.syntax.json', 'Common', 'SU2_CFD', '-p', '*.hpp', '*.cpp', '*.inl', '-r', '-q') + if p.returncode() != 0 + error(p.stdout()) + endif +endif + # compile common library subdir('Common/src') # compile SU2_CFD executable From 4738e29cee34ab30410bf5f12b835405e48401c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Thu, 25 Mar 2021 19:52:49 +0100 Subject: [PATCH 47/57] OpDiLib update. --- externals/opdi | 2 +- meson_scripts/init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/externals/opdi b/externals/opdi index 55b33398d18..e56f79cada2 160000 --- a/externals/opdi +++ b/externals/opdi @@ -1 +1 @@ -Subproject commit 55b33398d18cbf977545a2dba2008201616664e0 +Subproject commit e56f79cada202d21e7425f5d5cfd5b1153f2465e diff --git a/meson_scripts/init.py b/meson_scripts/init.py index ec7fa1fc33c..4d9a4e35ac3 100755 --- a/meson_scripts/init.py +++ b/meson_scripts/init.py @@ -48,7 +48,7 @@ def init_submodules(method = 'auto'): github_repo_codi = 'https://github.com/scicompkl/CoDiPack' sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da' github_repo_medi = 'https://github.com/SciCompKL/MeDiPack' - sha_version_opdi = '55b33398d18cbf977545a2dba2008201616664e0' + sha_version_opdi = 'e56f79cada202d21e7425f5d5cfd5b1153f2465e' github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib' sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2' github_repo_meson = 'https://github.com/mesonbuild/meson' From 7e0bc678ee917f0b004116cacf90bab8c0cf3a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?= Date: Fri, 26 Mar 2021 16:37:31 +0100 Subject: [PATCH 48/57] Fix include. --- Common/include/basic_types/ad_structure.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 3246845cd94..76705c0b242 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -27,7 +27,7 @@ #pragma once -#include "datatype_structure.hpp" +#include "../code_config.hpp" #include "../parallelization/omp_structure.hpp" /*! From 3e82662e56111cddf72391c1f993d6f8d5cdc233 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Fri, 26 Mar 2021 22:29:23 +0000 Subject: [PATCH 49/57] explicit construction and destruction of non trivial types in C2DContainer --- Common/include/containers/C2DContainer.hpp | 26 ++++++++++++++----- .../include/toolboxes/allocation_toolbox.hpp | 5 ++-- Common/src/linear_algebra/CSysMatrix.cpp | 17 +++++------- Common/src/linear_algebra/CSysVector.cpp | 2 +- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/Common/include/containers/C2DContainer.hpp b/Common/include/containers/C2DContainer.hpp index 963cddc99fe..c2d08269294 100644 --- a/Common/include/containers/C2DContainer.hpp +++ b/Common/include/containers/C2DContainer.hpp @@ -77,12 +77,17 @@ class AccessorImpl * Static size specializations use this do-nothing allocation macro. */ #define DUMMY_ALLOCATOR \ - void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {} + void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {}\ + void m_destroy() noexcept {} + /*! * Dynamic size specializations use this one, EXTRA is used to set some * runtime internal value that depend on the number of rows/columns. * What values need setting depends on the specialization as not all have * members for e.g. number of rows and cols (static size optimization). + * Because aligned allocation is used, "placement new" is used after to + * default construct the elements of non-trivial type. Such types also + * need to be destructed explicitly before freeing the memory. */ #define REAL_ALLOCATOR(EXTRA) \ static_assert(MemoryAllocation::is_power_of_two(AlignSize), \ @@ -91,6 +96,14 @@ class AccessorImpl void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept { \ EXTRA; \ m_data = MemoryAllocation::aligned_alloc(AlignSize,sz); \ + if (!std::is_trivial::value) \ + for (size_t i = 0; i < size(); ++i) new (m_data+i) Scalar_t(); \ + } \ + \ + void m_destroy() noexcept { \ + if (!std::is_trivial::value) \ + for (size_t i = 0; i < size(); ++i) m_data[i].~Scalar_t(); \ + MemoryAllocation::aligned_free(m_data); \ } DUMMY_ALLOCATOR @@ -114,15 +127,13 @@ class AccessorImpl \ AccessorImpl& operator= (AccessorImpl&& other) noexcept \ { \ - MemoryAllocation::aligned_free(m_data); \ + m_destroy(); \ MOVE; m_data=other.m_data; other.m_data=nullptr; \ return *this; \ } \ \ - ~AccessorImpl() noexcept \ - { \ - MemoryAllocation::aligned_free(m_data); \ - } + ~AccessorImpl() noexcept {m_destroy();} + /*! * Shorthand for when specialization has only one more member than m_data. */ @@ -380,6 +391,7 @@ class C2DContainer : using Base = container_helpers::AccessorImpl; using Base::m_data; using Base::m_allocate; + using Base::m_destroy; public: using Base::size; using Base::rows; @@ -473,7 +485,7 @@ class C2DContainer : if(rows==this->rows() && cols==this->cols()) return reqSize; - MemoryAllocation::aligned_free(m_data); + m_destroy(); /*--- request actual allocation to base class as it needs specialization ---*/ size_t bytes = reqSize*sizeof(Scalar_t); diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp index 1ba281413a3..2d5d3bb4409 100644 --- a/Common/include/toolboxes/allocation_toolbox.hpp +++ b/Common/include/toolboxes/allocation_toolbox.hpp @@ -57,9 +57,10 @@ inline constexpr size_t round_up(size_t multiple, size_t x) * \brief Aligned memory allocation compatible across platforms. * \param[in] alignment, in bytes, of the memory being allocated. * \param[in] size, also in bytes. + * \tparam ZeroInit, initialize memory to 0. * \return Pointer to memory, always use su2::aligned_free to deallocate. */ -template +template inline T* aligned_alloc(size_t alignment, size_t size) noexcept { assert(is_power_of_two(alignment)); @@ -80,7 +81,7 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept #else ptr = ::aligned_alloc(alignment, size); #endif - memset(ptr, 0, size); + if (ZeroInit) memset(ptr, 0, size); return static_cast(ptr); } diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp index a9ee199f3e6..c8bd4164d32 100644 --- a/Common/src/linear_algebra/CSysMatrix.cpp +++ b/Common/src/linear_algebra/CSysMatrix.cpp @@ -158,22 +158,17 @@ void CSysMatrix::Initialize(unsigned long npoint, unsigned long npoi } /*--- Allocate data. ---*/ -#define ALLOC_AND_INIT(ptr,num) {\ - ptr = MemoryAllocation::aligned_alloc(64,num*sizeof(ScalarType));\ - for(size_t k=0; k(64, num*sizeof(ScalarType)); + }; - ALLOC_AND_INIT(matrix, nnz*nVar*nEqn) + allocAndInit(matrix, nnz*nVar*nEqn); /*--- Preconditioners. ---*/ - if (ilu_needed) { - ALLOC_AND_INIT(ILU_matrix, nnz_ilu*nVar*nEqn) - } + if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu*nVar*nEqn); - if (diag_needed) { - ALLOC_AND_INIT(invM, nPointDomain*nVar*nEqn); - } -#undef ALLOC_AND_INIT + if (diag_needed) allocAndInit(invM, nPointDomain*nVar*nEqn); /*--- Thread parallel initialization. ---*/ diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index 4477d8b3fe1..8eedbf0e254 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -50,7 +50,7 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE); - if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, nElm * sizeof(ScalarType)); + if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc(64, nElm*sizeof(ScalarType)); if (val != nullptr) { if (!valIsArray) { From c82f3c72d8ba52553e90bab182a37cc8ebc8794b Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Sun, 28 Mar 2021 14:57:40 +0100 Subject: [PATCH 50/57] test preaccumulation with RealReverseIndex --- Common/include/basic_types/ad_structure.hpp | 17 ++---- Common/include/code_config.hpp | 3 +- Common/include/geometry/dual_grid/CPoint.hpp | 12 ++-- Common/src/geometry/CPhysicalGeometry.cpp | 8 +-- .../computeGradientsLeastSquares.hpp | 28 ++++----- .../flow/convection/centered.hpp | 2 +- SU2_CFD/include/numerics_simd/util.hpp | 58 ++++++++++++++++--- SU2_CFD/src/fluid/CPengRobinson.cpp | 2 +- 8 files changed, 85 insertions(+), 45 deletions(-) diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp index 76705c0b242..620da3246f5 100644 --- a/Common/include/basic_types/ad_structure.hpp +++ b/Common/include/basic_types/ad_structure.hpp @@ -392,6 +392,11 @@ namespace AD{ SetPreaccIn(moreData...); } + template::value> = 0> + FORCEINLINE void SetPreaccIn(T&& data, Ts&&... moreData) { + static_assert(!std::is_same::value, "rvalues cannot be registered"); + } + template FORCEINLINE void SetPreaccIn(const T& data, const int size) { if (PreaccActive) { @@ -415,18 +420,6 @@ namespace AD{ } } - template - FORCEINLINE void SetPreaccIn(const T& data, const int size_x, const int size_y, const int size_z) { - if (!PreaccActive) return; - for (int i = 0; i < size_x; i++) { - for (int j = 0; j < size_y; j++) { - for (int k = 0; k < size_z; k++) { - if (data[i][j][k].isActive()) PreaccHelper.addInput(data[i][j][k]); - } - } - } - } - FORCEINLINE void StartPreacc() { if (AD::getGlobalTape().isActive() && PreaccEnabled) { PreaccHelper.start(); diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 377432ee945..904805dc870 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -92,7 +92,8 @@ using su2double = codi::RealReversePrimal; #elif CODI_PRIMAL_INDEX_TAPE using su2double = codi::RealReversePrimalIndex; #else -using su2double = codi::RealReverse; +//using su2double = codi::RealReverse; +using su2double = codi::RealReverseIndex; #endif #endif #elif defined(CODI_FORWARD_TYPE) // forward mode AD diff --git a/Common/include/geometry/dual_grid/CPoint.hpp b/Common/include/geometry/dual_grid/CPoint.hpp index 86ac53d4936..9db963524ad 100644 --- a/Common/include/geometry/dual_grid/CPoint.hpp +++ b/Common/include/geometry/dual_grid/CPoint.hpp @@ -423,7 +423,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Value of the distance to the nearest wall. */ - inline su2double GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); } + inline su2double& GetWall_Distance(unsigned long iPoint) { return Wall_Distance(iPoint); } + inline const su2double& GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); } /*! * \brief Set the value of the distance to the nearest wall. @@ -451,7 +452,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Value of the distance to the nearest wall. */ - inline su2double GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); } + inline su2double& GetSharpEdge_Distance(unsigned long iPoint) { return SharpEdge_Distance(iPoint); } + inline const su2double& GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); } /*! * \brief Set the value of the curvature at a surface node. @@ -486,7 +488,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Area or volume of the control volume. */ - inline su2double GetVolume(unsigned long iPoint) const { return Volume(iPoint); } + inline su2double& GetVolume(unsigned long iPoint) { return Volume(iPoint); } + inline const su2double& GetVolume(unsigned long iPoint) const { return Volume(iPoint); } /*! * \brief Set the volume of the control volume. @@ -507,7 +510,8 @@ class CPoint { * \param[in] iPoint - Index of the point. * \return Periodic component of area or volume for a control volume on a periodic marker. */ - inline su2double GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); } + inline su2double& GetPeriodicVolume(unsigned long iPoint) { return Periodic_Volume(iPoint); } + inline const su2double& GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); } /*! * \brief Set the missing component of area or volume for a control volume on a periodic marker. diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 688972ce1f1..72e26928e03 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -7543,7 +7543,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) /*--- To make preaccumulation more effective, use as few inputs as possible, recomputing intermediate quantities as needed. ---*/ - AD::StartPreacc(); +// AD::StartPreacc(); /*--- Get pointers to the coordinates of all the element nodes ---*/ array Coord; @@ -7654,7 +7654,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) } } #endif - AD::EndPreacc(); +// AD::EndPreacc(); } su2double DomainVolume; @@ -7700,7 +7700,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh const auto nNodes = bound[iMarker][iElem]->GetnNodes(); - AD::StartPreacc(); +// AD::StartPreacc(); /*--- Get pointers to the coordinates of all the element nodes ---*/ array Coord; @@ -7752,7 +7752,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh const auto iVertex = nodes->GetVertex(iPoint, iMarker); AD::SetPreaccOut(vertex[iMarker][iVertex]->GetNormal(), nDim); } - AD::EndPreacc(); +// AD::EndPreacc(); } } END_SU2_OMP_FOR diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp index 3a069f8c942..dcd923901dc 100644 --- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp +++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp @@ -76,35 +76,35 @@ FORCEINLINE void solveLeastSquares(size_t iPoint, /*--- Entries of upper triangular matrix R. ---*/ + if (periodic) { + AD::StartPreacc(); + AD::SetPreaccIn(Rmatrix(iPoint,0,0)); + AD::SetPreaccIn(Rmatrix(iPoint,0,1)); + AD::SetPreaccIn(Rmatrix(iPoint,1,1)); + } + su2double r11 = Rmatrix(iPoint,0,0); su2double r12 = Rmatrix(iPoint,0,1); su2double r22 = Rmatrix(iPoint,1,1); su2double r13 = 0.0, r23 = 0.0, r33 = 1.0; - if (periodic) { - AD::StartPreacc(); - AD::SetPreaccIn(r11); - AD::SetPreaccIn(r12); - AD::SetPreaccIn(r22); - } - r11 = sqrt(max(r11, eps)); r12 /= r11; r22 = sqrt(max(r22 - r12*r12, eps)); if (nDim == 3) { + if (periodic) { + AD::SetPreaccIn(Rmatrix(iPoint,0,2)); + AD::SetPreaccIn(Rmatrix(iPoint,1,2)); + AD::SetPreaccIn(Rmatrix(iPoint,2,1)); + AD::SetPreaccIn(Rmatrix(iPoint,2,2)); + } + r13 = Rmatrix(iPoint,0,2); r33 = Rmatrix(iPoint,2,2); const auto r23_a = Rmatrix(iPoint,1,2); const auto r23_b = Rmatrix(iPoint,2,1); - if (periodic) { - AD::SetPreaccIn(r13); - AD::SetPreaccIn(r23_a); - AD::SetPreaccIn(r23_b); - AD::SetPreaccIn(r33); - } - r13 /= r11; r23 = r23_a/r22 - r23_b*r12/(r11*r22); r33 = sqrt(max(r33 - r23*r23 - r13*r13, eps)); diff --git a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp index 7e0bd6f8870..9b62a3a89db 100644 --- a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp +++ b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp @@ -77,7 +77,7 @@ class CCenteredBase : public Base { public: /*! - * \brief Implementation of the base Roe flux. + * \brief Implementation of the base centered flux. */ void ComputeFlux(Int iEdge, const CConfig& config, diff --git a/SU2_CFD/include/numerics_simd/util.hpp b/SU2_CFD/include/numerics_simd/util.hpp index 7127912329b..21c99c7e529 100644 --- a/SU2_CFD/include/numerics_simd/util.hpp +++ b/SU2_CFD/include/numerics_simd/util.hpp @@ -115,14 +115,13 @@ FORCEINLINE Double squaredNorm(const VectorDbl& vector) { template FORCEINLINE Double norm(const VectorDbl& vector) { return sqrt(squaredNorm(vector)); } +#ifndef CODI_REVERSE_TYPE /*! * \brief Gather a single variable from index iPoint of a 1D container. */ template FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) { - auto x = *vars.innerIter(iPoint); - AD::SetPreaccIn(x, Double::Size); - return x; + return *vars.innerIter(iPoint); } /*! @@ -130,9 +129,7 @@ FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) { */ template FORCEINLINE VectorDbl gatherVariables(Int iPoint, const Container& vars) { - auto x = vars.template get >(iPoint); - AD::SetPreaccIn(x, nVar, Double::Size); - return x; + return vars.template get >(iPoint); } /*! @@ -140,10 +137,55 @@ FORCEINLINE VectorDbl gatherVariables(Int iPoint, const Container& vars) { */ template FORCEINLINE MatrixDbl gatherVariables(Int iPoint, const Container& vars) { - auto x = vars.template get >(iPoint); - AD::SetPreaccIn(x, nRows, nCols, Double::Size); + return vars.template get >(iPoint); +} +#else + +namespace { + template = 0> + FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint); } + + /*--- When getting 1 variable from a matrix container, we assume it is the first. ---*/ + template = 0> + FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint,0); } +} + +template +FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) { + Double x; + for (size_t k=0; k +FORCEINLINE VectorDbl gatherVariables(Int iPoint, const Container& vars) { + VectorDbl x; + for (size_t i=0; i +FORCEINLINE MatrixDbl gatherVariables(Int iPoint, const Container& vars) { + MatrixDbl x; + for (size_t i=0; i Date: Mon, 29 Mar 2021 19:33:23 +0100 Subject: [PATCH 51/57] missing destruction in CSysVector --- Common/src/linear_algebra/CSysVector.cpp | 2 ++ SU2_CFD/src/SU2_CFD.cpp | 4 ++-- SU2_DOT/src/SU2_DOT.cpp | 19 ++++++++++++++++++- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp index 8eedbf0e254..9cb66905fde 100644 --- a/Common/src/linear_algebra/CSysVector.cpp +++ b/Common/src/linear_algebra/CSysVector.cpp @@ -63,6 +63,8 @@ void CSysVector::Initialize(unsigned long numBlk, unsigned long numB template CSysVector::~CSysVector() { + if (!std::is_trivial::value) + for (auto i = 0ul; i < nElm; i++) vec_val[i].~ScalarType(); MemoryAllocation::aligned_free(vec_val); } diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp index 90a86ad018b..d9dfbbdc12f 100644 --- a/SU2_CFD/src/SU2_CFD.cpp +++ b/SU2_CFD/src/SU2_CFD.cpp @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm(); /*--- AD initialization ---*/ -#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) +#ifdef HAVE_OPDI AD::getGlobalTape().initialize(); #endif @@ -170,7 +170,7 @@ int main(int argc, char *argv[]) { #endif /*--- Finalize AD, if necessary. ---*/ -#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE) +#ifdef HAVE_OPDI AD::getGlobalTape().finalize(); #endif diff --git a/SU2_DOT/src/SU2_DOT.cpp b/SU2_DOT/src/SU2_DOT.cpp index 7b9927c1637..e2b2de498be 100644 --- a/SU2_DOT/src/SU2_DOT.cpp +++ b/SU2_DOT/src/SU2_DOT.cpp @@ -36,6 +36,10 @@ int main(int argc, char *argv[]) { char config_file_name[MAX_STRING_SIZE]; + /*--- OpenMP initialization ---*/ + + omp_initialize(); + /*--- MPI initialization, and buffer setting ---*/ #if defined(HAVE_OMP) && defined(HAVE_MPI) @@ -49,6 +53,11 @@ int main(int argc, char *argv[]) { const int rank = SU2_MPI::GetRank(); const int size = SU2_MPI::GetSize(); + /*--- AD initialization ---*/ +#ifdef HAVE_OPDI + AD::getGlobalTape().initialize(); +#endif + /*--- Pointer to different structures that will be used throughout the entire code ---*/ CConfig **config_container = nullptr; @@ -406,9 +415,17 @@ int main(int argc, char *argv[]) { if (rank == MASTER_NODE) cout << "\n------------------------- Exit Success (SU2_DOT) ------------------------\n" << endl; - /*--- Finalize MPI parallelization ---*/ + /*--- Finalize AD, if necessary. ---*/ +#ifdef HAVE_OPDI + AD::getGlobalTape().finalize(); +#endif + + /*--- Finalize MPI parallelization. ---*/ SU2_MPI::Finalize(); + /*--- Finalize OpenMP. ---*/ + omp_finalize(); + return EXIT_SUCCESS; } From 6483a3ffdbf39c86607a1e4f08a7fa0b4ec10bff Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 29 Mar 2021 19:34:02 +0100 Subject: [PATCH 52/57] no type punning in COutput... --- SU2_CFD/include/output/COutput.hpp | 1 - .../output/filewriter/CParallelDataSorter.hpp | 28 ++++--- .../src/output/filewriter/CFVMDataSorter.cpp | 20 ++--- .../output/filewriter/CParallelDataSorter.cpp | 76 ++++++------------- .../filewriter/CSurfaceFEMDataSorter.cpp | 11 +-- .../filewriter/CSurfaceFVMDataSorter.cpp | 42 +++------- 6 files changed, 63 insertions(+), 115 deletions(-) diff --git a/SU2_CFD/include/output/COutput.hpp b/SU2_CFD/include/output/COutput.hpp index 95a07335e7b..829c0698502 100644 --- a/SU2_CFD/include/output/COutput.hpp +++ b/SU2_CFD/include/output/COutput.hpp @@ -581,7 +581,6 @@ class COutput { volumeOutput_List.push_back(name); } - /*! * \brief Set the value of a volume output field * \param[in] name - Name of the field. diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp index 5122eed672e..ae6273ea322 100644 --- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp @@ -66,7 +66,20 @@ class CParallelDataSorter{ * \brief Map that stores the index for each GEO_TYPE type where to find information * in the element arrays. */ - static const map TypeMap; + struct { + static unsigned short at(unsigned short type) { + switch(type) { + case LINE: return 0; + case TRIANGLE: return 1; + case QUADRILATERAL: return 2; + case TETRAHEDRON: return 3; + case HEXAHEDRON: return 4; + case PRISM: return 5; + case PYRAMID: return 6; + default: assert(false); return 0; + }; + } + } TypeMap; unsigned long nPointsGlobal; //!< Global number of points without halos unsigned long nElemGlobal; //!< Global number of elems without halos @@ -88,11 +101,8 @@ class CParallelDataSorter{ int *nElemConn_Send; //!< Number of element connectivity this processor has to send to other processors int *nElemConn_Cum; //!< Cumulative number of element connectivity entries unsigned long *Index; //!< Index each point has in the send buffer - su2double *connSend; //!< Send buffer holding the data that will be send to other processors - passivedouble *passiveDoubleBuffer; //!< Buffer holding the sorted, partitioned data as passivedouble types - su2double *doubleBuffer; //!< Buffer holding the sorted, partitioned data as su2double types - /// Pointer used to allocate the memory used for ::passiveDoubleBuffer and ::doubleBuffer. - char *dataBuffer; + passivedouble *connSend; //!< Send buffer holding the data that will be send to other processors + passivedouble *dataBuffer; //!< Buffer holding the sorted, partitioned data as passivedouble types unsigned long *idSend; //!< Send buffer holding global indices that will be send to other processors int nSends, //!< Number of sends nRecvs; //!< Number of receives @@ -261,13 +271,13 @@ class CParallelDataSorter{ * \input iPoint - the point ID. * \return the value of the data field at a point. */ - passivedouble GetData(unsigned short iField, unsigned long iPoint) const {return passiveDoubleBuffer[iPoint*GlobalField_Counter + iField];} + passivedouble GetData(unsigned short iField, unsigned long iPoint) const {return dataBuffer[iPoint*GlobalField_Counter + iField];} /*! * \brief Get the pointer to the sorted linear partitioned data. * \return Pointer to the sorted data. */ - const passivedouble *GetData() const {return passiveDoubleBuffer;} + const passivedouble *GetData() const {return dataBuffer;} /*! * \brief Get the global index of a point. @@ -305,7 +315,7 @@ class CParallelDataSorter{ * \param[in] data - Value of the field */ void SetUnsorted_Data(unsigned long iPoint, unsigned short iField, su2double data){ - connSend[Index[iPoint] + iField] = data; + connSend[Index[iPoint] + iField] = SU2_TYPE::GetValue(data); } su2double GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const { diff --git a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp index 768438a9b39..fbc0dfd452e 100644 --- a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp @@ -71,9 +71,8 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto CFVMDataSorter::~CFVMDataSorter(){ delete [] Local_Halo; - - delete [] Index; - delete [] idSend; + delete [] Index; + delete [] idSend; delete linearPartitioner; } @@ -107,9 +106,6 @@ void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){ } } - - - void CFVMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) { /*--- Sort connectivity for each type of element (excluding halos). Note @@ -262,14 +258,11 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, /*--- Allocate memory to hold the connectivity that we are sending. ---*/ - unsigned long *connSend = nullptr; - connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]](); + auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]](); /*--- Allocate arrays for storing halo flags. ---*/ - unsigned short *haloSend = new unsigned short[nElem_Send[size]](); - for (int ii = 0; ii < nElem_Send[size]; ii++) - haloSend[ii] = false; + auto haloSend = new unsigned short[nElem_Send[size]](); /*--- Create an index variable to keep track of our index position as we load up the send buffer. ---*/ @@ -356,10 +349,9 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *connRecv = nullptr; - connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]](); + auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]](); - unsigned short *haloRecv = new unsigned short[nElem_Cum[size]](); + auto haloRecv = new unsigned short[nElem_Cum[size]](); #ifdef HAVE_MPI diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp index d65e55317ce..2c32df0e2cb 100644 --- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp @@ -29,17 +29,6 @@ #include #include - -const map CParallelDataSorter::TypeMap = { - {LINE, 0}, - {TRIANGLE, 1}, - {QUADRILATERAL, 2}, - {TETRAHEDRON, 3}, - {HEXAHEDRON, 4}, - {PRISM, 5}, - {PYRAMID, 6} -}; - CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector &valFieldNames) : fieldNames(std::move(valFieldNames)){ @@ -61,8 +50,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector & Index = nullptr; connSend = nullptr; dataBuffer = nullptr; - passiveDoubleBuffer = nullptr; - doubleBuffer = nullptr; idSend = nullptr; nSends = 0; nRecvs = 0; @@ -110,11 +97,13 @@ CParallelDataSorter::~CParallelDataSorter(){ void CParallelDataSorter::SortOutputData() { + using MPI_WRAP = SelectMPIWrapper::W; + int VARS_PER_POINT = GlobalField_Counter; #ifdef HAVE_MPI - SU2_MPI::Request *send_req, *recv_req; - SU2_MPI::Status status; + MPI_WRAP::Request *send_req, *recv_req; + MPI_WRAP::Status status; int ind; #endif @@ -130,8 +119,8 @@ void CParallelDataSorter::SortOutputData() { /*--- We need double the number of messages to send both the conn. and the global IDs. ---*/ - send_req = new SU2_MPI::Request[2*nSends]; - recv_req = new SU2_MPI::Request[2*nRecvs]; + send_req = new MPI_WRAP::Request[2*nSends]; + recv_req = new MPI_WRAP::Request[2*nRecvs]; unsigned long iMessage = 0; for (int ii=0; ii::value){ - for (int jj = 0; jj < VARS_PER_POINT*nPoint_Recv[size]; jj++){ - const passivedouble tmpVal = SU2_TYPE::GetValue(doubleBuffer[jj]); - passiveDoubleBuffer[jj] = tmpVal; - /*--- For some AD datatypes a call of the destructor is - * necessary to properly delete the AD type ---*/ - doubleBuffer[jj].~su2double(); - } - } - - /*--- Step 2: Reorder the data in the buffer --- */ + /*--- Reorder the data in the buffer --- */ passivedouble *tmpBuffer = new passivedouble[nPoint_Recv[size]]; for (int jj = 0; jj < VARS_PER_POINT; jj++){ for (int ii = 0; ii < nPoint_Recv[size]; ii++){ - tmpBuffer[idRecv[ii]] = passiveDoubleBuffer[ii*VARS_PER_POINT+jj]; + tmpBuffer[idRecv[ii]] = dataBuffer[ii*VARS_PER_POINT+jj]; } for (int ii = 0; ii < nPoint_Recv[size]; ii++){ - passiveDoubleBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii]; + dataBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii]; } } @@ -318,18 +292,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI /*--- Allocate memory to hold the connectivity that we are sending. ---*/ - connSend = nullptr; - connSend = new su2double[VARS_PER_POINT*nPoint_Send[size]](); + connSend = new passivedouble[VARS_PER_POINT*nPoint_Send[size]] (); /*--- Allocate the data buffer to hold the sorted data. We have to make it large enough * to hold passivedoubles and su2doubles ---*/ - unsigned short maxSize = max(sizeof(passivedouble), sizeof(su2double)); - dataBuffer = new char[VARS_PER_POINT*nPoint_Recv[size]*maxSize] {}; - - /*--- doubleBuffer and passiveDouble buffer use the same memory allocated above using the dataBuffer. ---*/ - doubleBuffer = reinterpret_cast(dataBuffer); - passiveDoubleBuffer = reinterpret_cast(dataBuffer); + dataBuffer = new passivedouble[VARS_PER_POINT*nPoint_Recv[size]] (); /*--- Allocate arrays for sending the global ID. ---*/ diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp index 99c314f2e5c..d21edce01a0 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp @@ -69,7 +69,7 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter(){ delete linearPartitioner; - delete [] passiveDoubleBuffer; + delete [] dataBuffer; } @@ -220,11 +220,8 @@ void CSurfaceFEMDataSorter::SortOutputData() { /* Allocate the memory for Parallel_Surf_Data. */ nPoints = globalSurfaceDOFIDs.size(); - - delete [] passiveDoubleBuffer; - - - passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT]; + delete [] dataBuffer; + dataBuffer = new passivedouble[nPoints*VARS_PER_POINT]; /* Determine the local index of the global surface DOFs and copy the data into Parallel_Surf_Data. */ @@ -232,7 +229,7 @@ void CSurfaceFEMDataSorter::SortOutputData() { const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner->GetCumulativeSizeBeforeRank(rank); for(int jj=0; jjGetData(jj,ii); + dataBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii); } /*--- Reduce the total number of surf points we have. This will be diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp index a4dc31c32a5..ee7535dafb9 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp @@ -50,7 +50,7 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter(){ delete linearPartitioner; - delete [] passiveDoubleBuffer; + delete [] dataBuffer; } @@ -438,17 +438,14 @@ void CSurfaceFVMDataSorter::SortOutputData() { we can allocate the new data structure to hold these points alone. Here, we also copy the data for those points from our volume data structure. ---*/ - - delete [] passiveDoubleBuffer; - - - passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT]; + delete [] dataBuffer; + dataBuffer = new passivedouble[nPoints*VARS_PER_POINT]; for (int jj = 0; jj < VARS_PER_POINT; jj++) { count = 0; for (int ii = 0; ii < (int)volumeSorter->GetnPoints(); ii++) { if (surfPoint[ii] !=-1) { - passiveDoubleBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii); + dataBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii); count++; } } @@ -545,14 +542,12 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Allocate memory to hold the globals that we are sending. ---*/ - unsigned long *globalSend = nullptr; - globalSend = new unsigned long[nElem_Send[size]](); + auto globalSend = new unsigned long[nElem_Send[size]](); /*--- Allocate memory to hold the renumbering that we are sending. ---*/ - unsigned long *renumbSend = nullptr; - renumbSend = new unsigned long[nElem_Send[size]](); + auto renumbSend = new unsigned long[nElem_Send[size]](); /*--- Create an index variable to keep track of our index position as we load up the send buffer. ---*/ @@ -595,11 +590,8 @@ void CSurfaceFVMDataSorter::SortOutputData() { we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *globalRecv = nullptr; - globalRecv = new unsigned long[nElem_Recv[size]](); - - unsigned long *renumbRecv = nullptr; - renumbRecv = new unsigned long[nElem_Recv[size]](); + auto globalRecv = new unsigned long[nElem_Recv[size]](); + auto renumbRecv = new unsigned long[nElem_Recv[size]](); #ifdef HAVE_MPI /*--- We need double the number of messages to send both the conn. @@ -1247,16 +1239,11 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * /*--- Allocate memory to hold the connectivity that we are sending. ---*/ - unsigned long *connSend = nullptr; - connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]]; - for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Send[size]; ii++) - connSend[ii] = 0; + auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]] (); /*--- Allocate arrays for storing halo flags. ---*/ - unsigned short *haloSend = new unsigned short[nElem_Send[size]]; - for (int ii = 0; ii < nElem_Send[size]; ii++) - haloSend[ii] = false; + auto haloSend = new unsigned short[nElem_Send[size]] (); /*--- Create an index variable to keep track of our index position as we load up the send buffer. ---*/ @@ -1346,14 +1333,9 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *connRecv = nullptr; - connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]]; - for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Recv[size]; ii++) - connRecv[ii] = 0; + auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]] (); - unsigned short *haloRecv = new unsigned short[nElem_Recv[size]]; - for (int ii = 0; ii < nElem_Recv[size]; ii++) - haloRecv[ii] = false; + auto haloRecv = new unsigned short[nElem_Recv[size]] (); #ifdef HAVE_MPI /*--- We need double the number of messages to send both the conn. From 083f0b7929b785eb976bd098f7619a1d9bdafd71 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 29 Mar 2021 19:46:45 +0100 Subject: [PATCH 53/57] missing include --- SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp index ae6273ea322..6b4d3adf0f2 100644 --- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp @@ -31,6 +31,7 @@ #include "../../../../Common/include/option_structure.hpp" #include "../../../../Common/include/toolboxes/CLinearPartitioner.hpp" #include +#include class CGeometry; class CConfig; From c3a62d3a2e95b46c9ff531422b2a0f46f5650404 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 29 Mar 2021 20:02:04 +0100 Subject: [PATCH 54/57] fix unused warning --- SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp index 2c32df0e2cb..5f3ff6fb5de 100644 --- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp @@ -97,11 +97,11 @@ CParallelDataSorter::~CParallelDataSorter(){ void CParallelDataSorter::SortOutputData() { - using MPI_WRAP = SelectMPIWrapper::W; - - int VARS_PER_POINT = GlobalField_Counter; + const int VARS_PER_POINT = GlobalField_Counter; #ifdef HAVE_MPI + using MPI_WRAP = SelectMPIWrapper::W; + MPI_WRAP::Request *send_req, *recv_req; MPI_WRAP::Status status; int ind; @@ -112,7 +112,6 @@ void CParallelDataSorter::SortOutputData() { we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *idRecv = new unsigned long[nPoint_Recv[size]](); #ifdef HAVE_MPI From 92406edd210cd4d656d7e8bd57fdfe8a4f0bfe0d Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Mon, 29 Mar 2021 20:51:29 +0100 Subject: [PATCH 55/57] double free --- SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp | 1 - SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp | 9 +-------- SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp | 7 +------ 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp index 5f3ff6fb5de..60aa6431250 100644 --- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp @@ -91,7 +91,6 @@ CParallelDataSorter::~CParallelDataSorter(){ delete [] Conn_Pyra_Par; delete [] connSend; - delete [] dataBuffer; } diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp index d21edce01a0..0ed8b0a5603 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp @@ -66,14 +66,7 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr } -CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter(){ - - delete linearPartitioner; - delete [] dataBuffer; - -} - - +CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter() { delete linearPartitioner; } void CSurfaceFEMDataSorter::SortOutputData() { diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp index ee7535dafb9..64c2c1bbde5 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp @@ -47,12 +47,7 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr } -CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter(){ - - delete linearPartitioner; - delete [] dataBuffer; - -} +CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter() { delete linearPartitioner; } void CSurfaceFVMDataSorter::SortOutputData() { From 73a575bd9cc30eaf05ac577065bcb119a5d5d1d2 Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Tue, 30 Mar 2021 13:44:08 +0100 Subject: [PATCH 56/57] why is everything a pointer ffs... --- .../include/toolboxes/CLinearPartitioner.hpp | 51 +++++----- Common/src/toolboxes/CLinearPartitioner.cpp | 28 +++--- .../output/filewriter/CFEMDataSorter.hpp | 7 +- .../output/filewriter/CFVMDataSorter.hpp | 10 +- .../output/filewriter/CParallelDataSorter.hpp | 18 ++-- .../filewriter/CSurfaceFEMDataSorter.hpp | 9 +- .../filewriter/CSurfaceFVMDataSorter.hpp | 9 +- .../src/output/filewriter/CFEMDataSorter.cpp | 13 +-- .../src/output/filewriter/CFVMDataSorter.cpp | 17 +--- .../output/filewriter/CParallelDataSorter.cpp | 97 ++++++++----------- .../filewriter/CSurfaceFEMDataSorter.cpp | 12 +-- .../filewriter/CSurfaceFVMDataSorter.cpp | 38 ++++---- 12 files changed, 126 insertions(+), 183 deletions(-) diff --git a/Common/include/toolboxes/CLinearPartitioner.hpp b/Common/include/toolboxes/CLinearPartitioner.hpp index 4a86acebf68..5e2a4d24dea 100644 --- a/Common/include/toolboxes/CLinearPartitioner.hpp +++ b/Common/include/toolboxes/CLinearPartitioner.hpp @@ -52,63 +52,68 @@ class CLinearPartitioner { vector cumulativeSizeBeforeRank; /*!< \brief Vector containing the cumulative size of all linear partitions before the current rank. */ public: + CLinearPartitioner() = default; /*! - * \brief Constructor of the CLinearPartitioner class. - * \param[in] val_global_count - global count to be linearly partitioned. - * \param[in] val_offset - offset from 0 for the first index on rank 0 (typically 0). - * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false). + * \brief Constructor of the CLinearPartitioner class, see Initialize. */ - CLinearPartitioner(unsigned long val_global_count, - unsigned long val_offset, - bool isDisjoint = false); + CLinearPartitioner(unsigned long global_count, + unsigned long offset, + bool isDisjoint = false) { + Initialize(global_count, offset, isDisjoint); + } /*! - * \brief Destructor of the CLinearPartitioner class. + * \brief Initialize the CLinearPartitioner class. + * \param[in] global_count - global count to be linearly partitioned. + * \param[in] offset - offset from 0 for the first index on rank 0 (typically 0). + * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false). */ - ~CLinearPartitioner(void); + void Initialize(unsigned long global_count, + unsigned long offset, + bool isDisjoint = false); /*! * \brief Get the rank that owns the index based on the linear partitioning. - * \param[in] val_index - Current index. + * \param[in] index - Current index. * \returns Owning rank for the current index based on linear partitioning. */ - unsigned long GetRankContainingIndex(unsigned long val_index); + unsigned long GetRankContainingIndex(unsigned long index) const; /*! * \brief Get the first index of the current rank's linear partition. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns First index of the current rank's linear partition. */ - inline unsigned long GetFirstIndexOnRank(int val_rank) { - return firstIndex[val_rank]; + inline unsigned long GetFirstIndexOnRank(int rank) const { + return firstIndex[rank]; } /*! * \brief Get the last index of the current rank's linear partition. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns Last index of the current rank's linear partition. */ - inline unsigned long GetLastIndexOnRank(int val_rank) { - return lastIndex[val_rank]; + inline unsigned long GetLastIndexOnRank(int rank) const { + return lastIndex[rank]; } /*! * \brief Get the total size of the current rank's linear partition. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns Size of the current rank's linear partition. */ - inline unsigned long GetSizeOnRank(int val_rank) { - return sizeOnRank[val_rank]; + inline unsigned long GetSizeOnRank(int rank) const { + return sizeOnRank[rank]; } /*! * \brief Get the cumulative size of all linear partitions before the current rank. - * \param[in] val_rank - MPI rank identifier. + * \param[in] rank - MPI rank identifier. * \returns Cumulative size of all linear partitions before the current rank. */ - inline unsigned long GetCumulativeSizeBeforeRank(int val_rank) { - return cumulativeSizeBeforeRank[val_rank]; + inline unsigned long GetCumulativeSizeBeforeRank(int rank) const { + return cumulativeSizeBeforeRank[rank]; } }; diff --git a/Common/src/toolboxes/CLinearPartitioner.cpp b/Common/src/toolboxes/CLinearPartitioner.cpp index 6a45f4fb20f..16ac5373762 100644 --- a/Common/src/toolboxes/CLinearPartitioner.cpp +++ b/Common/src/toolboxes/CLinearPartitioner.cpp @@ -28,9 +28,9 @@ #include "../../include/toolboxes/CLinearPartitioner.hpp" -CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, - unsigned long val_offset, - bool isDisjoint) { +void CLinearPartitioner::Initialize(unsigned long global_count, + unsigned long offset, + bool isDisjoint) { /*--- Store MPI size ---*/ @@ -48,10 +48,10 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, balancing for any remainder points. ---*/ unsigned long quotient = 0; - if (val_global_count >= (unsigned long)size) - quotient = val_global_count/size; + if (global_count >= (unsigned long)size) + quotient = global_count/size; - int remainder = int(val_global_count%size); + int remainder = int(global_count%size); for (int ii = 0; ii < size; ii++) { sizeOnRank[ii] = quotient + int(ii < remainder); } @@ -63,7 +63,7 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, unsigned long adjust = 0; if (isDisjoint) adjust = 1; - firstIndex[0] = val_offset; + firstIndex[0] = offset; lastIndex[0] = firstIndex[0] + sizeOnRank[0] - adjust; cumulativeSizeBeforeRank[0] = 0; for (int iProc = 1; iProc < size; iProc++) { @@ -72,17 +72,15 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count, cumulativeSizeBeforeRank[iProc] = (cumulativeSizeBeforeRank[iProc-1] + sizeOnRank[iProc-1]); } - cumulativeSizeBeforeRank[size] = val_global_count; + cumulativeSizeBeforeRank[size] = global_count; } -CLinearPartitioner::~CLinearPartitioner(void) { } - -unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index) { +unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long index) const { /*--- Initial guess ---*/ - unsigned long iProcessor = val_index/sizeOnRank[0]; + unsigned long iProcessor = index/sizeOnRank[0]; /*--- Guard against going over size. ---*/ @@ -91,11 +89,11 @@ unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index /*--- Move up or down until we find the processor. ---*/ - if (val_index >= cumulativeSizeBeforeRank[iProcessor]) - while(val_index >= cumulativeSizeBeforeRank[iProcessor+1]) + if (index >= cumulativeSizeBeforeRank[iProcessor]) + while(index >= cumulativeSizeBeforeRank[iProcessor+1]) iProcessor++; else - while(val_index < cumulativeSizeBeforeRank[iProcessor]) + while(index < cumulativeSizeBeforeRank[iProcessor]) iProcessor--; return iProcessor; diff --git a/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp index 372069c8735..91df74fcd3c 100644 --- a/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp @@ -41,11 +41,6 @@ class CFEMDataSorter final: public CParallelDataSorter{ */ CFEMDataSorter(CConfig *config, CGeometry *geometry, const vector &valFieldNames); - /*! - * \brief Destructor - */ - ~CFEMDataSorter() override; - /*! * \brief Sort the connectivities (volume and surface) into data structures used for output file writing. * \param[in] config - Definition of the particular problem. @@ -60,7 +55,7 @@ class CFEMDataSorter final: public CParallelDataSorter{ * \return Global index of a specific point. */ unsigned long GetGlobalIndex(unsigned long iPoint) const override{ - return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint; + return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint; } private: diff --git a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp index f2f70e23a7e..cd561c6a7bf 100644 --- a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp @@ -34,10 +34,9 @@ class CFVMDataSorter final: public CParallelDataSorter{ private: - int* Local_Halo; //!< Array containing the flag whether a point is a halo node + vector Local_Halo; //!< Array containing the flag whether a point is a halo node public: - /*! * \brief Constructor * \param[in] config - Pointer to the current config structure @@ -46,11 +45,6 @@ class CFVMDataSorter final: public CParallelDataSorter{ */ CFVMDataSorter(CConfig *config, CGeometry *geometry, const vector &valFieldNames); - /*! - * \brief Destructor - */ - ~CFVMDataSorter() override; - /*! * \brief Sort the connectivities (volume and surface) into data structures used for output file writing. * \param[in] config - Definition of the particular problem. @@ -65,7 +59,7 @@ class CFVMDataSorter final: public CParallelDataSorter{ * \return Global index of a specific point. */ unsigned long GetGlobalIndex(unsigned long iPoint) const override { - return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint; + return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint; } /*! diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp index 6b4d3adf0f2..1a22dbda832 100644 --- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp @@ -42,12 +42,12 @@ class CParallelDataSorter{ /*! * \brief The MPI rank */ - int rank; + const int rank; /*! * \brief The MPI size, aka the number of processors. */ - int size; + const int size; unsigned long nGlobalPointBeforeSort; //!< Global number of points without halos before sorting unsigned long nLocalPointsBeforeSort; //!< Local number of points without halos before sorting on this proc @@ -89,7 +89,7 @@ class CParallelDataSorter{ unsigned long nElem; //!< Local number of elements unsigned long nConn; //!< Local size of the connectivity array - CLinearPartitioner* linearPartitioner; //!< Linear partitioner based on the global number of points. + CLinearPartitioner linearPartitioner; //!< Linear partitioner based on the global number of points. unsigned short GlobalField_Counter; //!< Number of output fields @@ -254,7 +254,7 @@ class CParallelDataSorter{ * \return The beginning node ID. */ virtual unsigned long GetNodeBegin(unsigned short rank) const { - return linearPartitioner->GetFirstIndexOnRank(rank); + return linearPartitioner.GetFirstIndexOnRank(rank); } /*! @@ -263,7 +263,7 @@ class CParallelDataSorter{ * \return The ending node ID. */ unsigned long GetNodeEnd(unsigned short rank) const { - return linearPartitioner->GetLastIndexOnRank(rank); + return linearPartitioner.GetLastIndexOnRank(rank); } /*! @@ -292,14 +292,14 @@ class CParallelDataSorter{ * \input rank - the processor rank. * \return The cumulated number of points up to certain processor rank. */ - virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner->GetCumulativeSizeBeforeRank(rank);} + virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner.GetCumulativeSizeBeforeRank(rank);} /*! * \brief Get the linear number of points * \input rank - the processor rank. * \return The linear number of points up to certain processor rank. */ - unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner->GetSizeOnRank(rank);} + unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner.GetSizeOnRank(rank);} /*! * \brief Check whether the current connectivity is sorted (i.e. if SortConnectivity has been called) @@ -319,7 +319,7 @@ class CParallelDataSorter{ connSend[Index[iPoint] + iField] = SU2_TYPE::GetValue(data); } - su2double GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const { + passivedouble GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const { return connSend[Index[iPoint] + iField]; } @@ -329,7 +329,7 @@ class CParallelDataSorter{ * \return The rank/processor number. */ virtual unsigned short FindProcessor(unsigned long iPoint) const { - return linearPartitioner->GetRankContainingIndex(iPoint); + return linearPartitioner.GetRankContainingIndex(iPoint); } /*! diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp index 7775761f81c..5d2e7481364 100644 --- a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp @@ -31,7 +31,7 @@ class CSurfaceFEMDataSorter final: public CParallelDataSorter{ - CFEMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance + const CFEMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance vector globalSurfaceDOFIDs; //!< Structure to map the local sorted point ID to the global point ID vector nSurfaceDOFsRanks; //!< Number of points on each rank @@ -43,12 +43,7 @@ class CSurfaceFEMDataSorter final: public CParallelDataSorter{ * \param[in] geometry - Pointer to the current geometry * \param[in] valVolumeSorter - The datasorter containing the volume data */ - CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter); - - /*! - * \brief Destructor - */ - ~CSurfaceFEMDataSorter() override; + CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter); /*! * \brief Sort the output data for each grid node into a linear partitioning across all processors. diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp index dd6132c4248..d65a2a03260 100644 --- a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp +++ b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp @@ -31,7 +31,7 @@ class CSurfaceFVMDataSorter final: public CParallelDataSorter{ - CFVMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance + const CFVMDataSorter* volumeSorter; //!< Pointer to the volume sorter instance map Renumber2Global; //! Structure to map the local sorted point ID to the global point ID public: @@ -41,12 +41,7 @@ class CSurfaceFVMDataSorter final: public CParallelDataSorter{ * \param[in] geometry - Pointer to the current geometry * \param[in] valVolumeSorter - The datasorter containing the volume data */ - CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, CFVMDataSorter* valVolumeSorter); - - /*! - * \brief Destructor - */ - ~CSurfaceFVMDataSorter() override; + CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, const CFVMDataSorter* valVolumeSorter); /*! * \brief Sort the output data for each grid node into a linear partitioning across all processors. diff --git a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp index ed75ae1e0d4..f9052653abb 100644 --- a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp @@ -66,7 +66,7 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto /*--- Create a linear partition --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); /*--- Prepare the send buffers ---*/ @@ -74,17 +74,6 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto } -CFEMDataSorter::~CFEMDataSorter(){ - - delete [] Index; - delete [] idSend; - delete linearPartitioner; - -} - - - - void CFEMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) { /*--- Sort connectivity for each type of element (excluding halos). Note diff --git a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp index fbc0dfd452e..c193c98c6a6 100644 --- a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp @@ -39,7 +39,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto nGlobalPointBeforeSort = geometry->GetGlobal_nPointDomain(); nLocalPointsBeforeSort = geometry->GetnPointDomain(); - Local_Halo = new int[geometry->GetnPoint()](); + Local_Halo.resize(geometry->GetnPoint()); for (unsigned long iPoint = 0; iPoint < geometry->GetnPoint(); iPoint++){ @@ -60,7 +60,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto /*--- Create the linear partitioner --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); /*--- Prepare the send buffers ---*/ @@ -68,15 +68,6 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto } -CFVMDataSorter::~CFVMDataSorter(){ - - delete [] Local_Halo; - delete [] Index; - delete [] idSend; - delete linearPartitioner; - -} - void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){ unsigned long iPoint, iVertex; @@ -214,7 +205,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, own elements into the connectivity data structure. ---*/ if (val_sort) { - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); } else { iProcessor = rank; } @@ -301,7 +292,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config, own elements into the connectivity data structure. ---*/ if (val_sort) { - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); } else { iProcessor = rank; } diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp index 60aa6431250..e600566155f 100644 --- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp @@ -30,12 +30,11 @@ #include CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector &valFieldNames) : - fieldNames(std::move(valFieldNames)){ + rank(SU2_MPI::GetRank()), + size(SU2_MPI::GetSize()), + fieldNames(std::move(valFieldNames)) { - rank = SU2_MPI::GetRank(); - size = SU2_MPI::GetSize(); - - GlobalField_Counter = this->fieldNames.size(); + GlobalField_Counter = fieldNames.size(); Conn_Line_Par = nullptr; Conn_Hexa_Par = nullptr; @@ -45,8 +44,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector & Conn_Tria_Par = nullptr; Conn_Pyra_Par = nullptr; - nPoint_Send = nullptr; - nPoint_Recv = nullptr; Index = nullptr; connSend = nullptr; dataBuffer = nullptr; @@ -64,8 +61,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector & nElemConn_Send = new int[size+1](); nElemConn_Cum = new int[size+1](); - linearPartitioner = nullptr; - nElemPerType.fill(0); nElemPerTypeGlobal.fill(0); @@ -92,33 +87,31 @@ CParallelDataSorter::~CParallelDataSorter(){ delete [] connSend; delete [] dataBuffer; + delete [] Index; + delete [] idSend; + } void CParallelDataSorter::SortOutputData() { const int VARS_PER_POINT = GlobalField_Counter; -#ifdef HAVE_MPI - using MPI_WRAP = SelectMPIWrapper::W; - - MPI_WRAP::Request *send_req, *recv_req; - MPI_WRAP::Status status; - int ind; -#endif - /*--- Allocate the memory that we need for receiving the conn values and then cue up the non-blocking receives. Note that we do not include our own rank in the communications. We will directly copy our own data later. ---*/ - unsigned long *idRecv = new unsigned long[nPoint_Recv[size]](); + vector idRecv(nPoint_Recv[size], 0); #ifdef HAVE_MPI - /*--- We need double the number of messages to send both the conn. - and the global IDs. ---*/ + /*--- NOTE: This function calls MPI routines directly, instead of via SU2_MPI::, + * because it communicates passivedoubles and not AD types. This avoids some + * creative C++ to communicate AD types and then convert to passive. ---*/ - send_req = new MPI_WRAP::Request[2*nSends]; - recv_req = new MPI_WRAP::Request[2*nRecvs]; + /*--- We need double the number of messages to send both the conn. and the global IDs. ---*/ + + auto send_req = new MPI_Request[2*nSends]; + auto recv_req = new MPI_Request[2*nRecvs]; unsigned long iMessage = 0; for (int ii=0; ii tmpBuffer(nPoint_Recv[size]); - passivedouble *tmpBuffer = new passivedouble[nPoint_Recv[size]]; for (int jj = 0; jj < VARS_PER_POINT; jj++){ for (int ii = 0; ii < nPoint_Recv[size]; ii++){ tmpBuffer[idRecv[ii]] = dataBuffer[ii*VARS_PER_POINT+jj]; @@ -224,8 +221,6 @@ void CParallelDataSorter::SortOutputData() { } } - delete [] tmpBuffer; - /*--- Store the total number of local points my rank has for the current section after completing the communications. ---*/ @@ -233,12 +228,8 @@ void CParallelDataSorter::SortOutputData() { /*--- Reduce the total number of points we will write in the output files. ---*/ - SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1, - MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); - - /*--- Free temporary memory from communications ---*/ + SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm()); - delete [] idRecv; } void CParallelDataSorter::PrepareSendBuffers(std::vector& globalID){ @@ -257,7 +248,7 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++ ) { - iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]); + iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]); /*--- If we have not visited this node yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -304,11 +295,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI /*--- Create an index variable to keep track of our index positions as we load up the send buffer. ---*/ - unsigned long *index = new unsigned long[size](); - for (int ii=0; ii < size; ii++) index[ii] = VARS_PER_POINT*nPoint_Send[ii]; + vector index(size), idIndex(size); - unsigned long *idIndex = new unsigned long[size](); - for (int ii=0; ii < size; ii++) idIndex[ii] = nPoint_Send[ii]; + for (int ii=0; ii < size; ii++) { + index[ii] = VARS_PER_POINT*nPoint_Send[ii]; + idIndex[ii] = nPoint_Send[ii]; + } Index = new unsigned long[nLocalPointsBeforeSort](); @@ -317,13 +309,13 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++) { - iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]); + iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]); /*--- Load the global ID (minus offset) for sorting the points once they all reach the correct processor. ---*/ unsigned long nn = idIndex[iProcessor]; - idSend[nn] = globalID[iPoint] - linearPartitioner->GetFirstIndexOnRank(iProcessor); + idSend[nn] = globalID[iPoint] - linearPartitioner.GetFirstIndexOnRank(iProcessor); /*--- Store the index this point has in the send buffer ---*/ @@ -334,13 +326,8 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector& globalI index[iProcessor] += VARS_PER_POINT; idIndex[iProcessor]++; - } - /*--- Free memory after loading up the send buffer. ---*/ - - delete [] index; - delete [] idIndex; } unsigned long CParallelDataSorter::GetElem_Connectivity(GEO_TYPE type, unsigned long iElem, unsigned long iNode) const { @@ -429,6 +416,4 @@ void CParallelDataSorter::SetTotalElements(){ nElemConn_Cum[ii+1] += nElemConn_Cum[ii]; } - } - diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp index 0ed8b0a5603..9a7bc400418 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp @@ -29,12 +29,12 @@ #include "../../../../Common/include/fem/fem_geometry_structure.hpp" #include -CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter) : +CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter) : CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){ nDim = geometry->GetnDim(); - this->volumeSorter = valVolumeSorter; + volumeSorter = valVolumeSorter; connectivitySorted = false; @@ -62,12 +62,10 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr /*--- Create the linear partitioner --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); } -CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter() { delete linearPartitioner; } - void CSurfaceFEMDataSorter::SortOutputData() { if (!connectivitySorted){ @@ -129,7 +127,7 @@ void CSurfaceFEMDataSorter::SortOutputData() { for(unsigned long i=0; iGetRankContainingIndex(globalSurfaceDOFIDs[i]); + unsigned long iProcessor = linearPartitioner.GetRankContainingIndex(globalSurfaceDOFIDs[i]); /* Store the global ID in the send buffer for iProcessor. */ sendBuf[iProcessor].push_back(globalSurfaceDOFIDs[i]); @@ -219,7 +217,7 @@ void CSurfaceFEMDataSorter::SortOutputData() { /* Determine the local index of the global surface DOFs and copy the data into Parallel_Surf_Data. */ for(unsigned long i=0; iGetCumulativeSizeBeforeRank(rank); + const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner.GetCumulativeSizeBeforeRank(rank); for(int jj=0; jjGetData(jj,ii); diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp index 64c2c1bbde5..f9b36ab1648 100644 --- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp +++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp @@ -29,12 +29,12 @@ #include "../../../../Common/include/geometry/CGeometry.hpp" #include -CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, CFVMDataSorter* valVolumeSorter) : +CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, const CFVMDataSorter* valVolumeSorter) : CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){ nDim = geometry->GetnDim(); - this->volumeSorter = valVolumeSorter; + volumeSorter = valVolumeSorter; connectivitySorted = false; @@ -43,12 +43,10 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr /*--- Create the linear partitioner --- */ - linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0); + linearPartitioner.Initialize(nGlobalPointBeforeSort, 0); } -CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter() { delete linearPartitioner; } - void CSurfaceFVMDataSorter::SortOutputData() { unsigned long iProcessor; @@ -96,7 +94,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -124,7 +122,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -152,7 +150,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -211,7 +209,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load global ID into the buffer for sending ---*/ @@ -245,7 +243,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load global ID into the buffer for sending ---*/ @@ -279,7 +277,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load global ID into the buffer for sending ---*/ @@ -499,7 +497,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -559,7 +557,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); if (nElem_Flag[iProcessor] != ii) { @@ -718,7 +716,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Store the global ID if it is outside our own linear partition. ---*/ @@ -739,7 +737,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Store the global ID if it is outside our own linear partition. ---*/ @@ -760,7 +758,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Store the global ID if it is outside our own linear partition. ---*/ @@ -795,7 +793,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -847,7 +845,7 @@ void CSurfaceFVMDataSorter::SortOutputData() { /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -1192,7 +1190,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- If we have not visited this element yet, increment our number of elements that must be sent to a particular proc. ---*/ @@ -1282,7 +1280,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry * /*--- Search for the processor that owns this point ---*/ - iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index); + iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index); /*--- Load connectivity into the buffer for sending ---*/ From 3870382fc015eb2c274549c7252cd20ac6a9920b Mon Sep 17 00:00:00 2001 From: Pedro Gomes Date: Wed, 31 Mar 2021 00:04:40 +0100 Subject: [PATCH 57/57] enough testing for now, revert RealReverseIndex to RealReverse --- Common/include/code_config.hpp | 3 +-- Common/src/geometry/CPhysicalGeometry.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp index 904805dc870..377432ee945 100644 --- a/Common/include/code_config.hpp +++ b/Common/include/code_config.hpp @@ -92,8 +92,7 @@ using su2double = codi::RealReversePrimal; #elif CODI_PRIMAL_INDEX_TAPE using su2double = codi::RealReversePrimalIndex; #else -//using su2double = codi::RealReverse; -using su2double = codi::RealReverseIndex; +using su2double = codi::RealReverse; #endif #endif #elif defined(CODI_FORWARD_TYPE) // forward mode AD diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp index 72e26928e03..688972ce1f1 100644 --- a/Common/src/geometry/CPhysicalGeometry.cpp +++ b/Common/src/geometry/CPhysicalGeometry.cpp @@ -7543,7 +7543,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) /*--- To make preaccumulation more effective, use as few inputs as possible, recomputing intermediate quantities as needed. ---*/ -// AD::StartPreacc(); + AD::StartPreacc(); /*--- Get pointers to the coordinates of all the element nodes ---*/ array Coord; @@ -7654,7 +7654,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action) } } #endif -// AD::EndPreacc(); + AD::EndPreacc(); } su2double DomainVolume; @@ -7700,7 +7700,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh const auto nNodes = bound[iMarker][iElem]->GetnNodes(); -// AD::StartPreacc(); + AD::StartPreacc(); /*--- Get pointers to the coordinates of all the element nodes ---*/ array Coord; @@ -7752,7 +7752,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh const auto iVertex = nodes->GetVertex(iPoint, iMarker); AD::SetPreaccOut(vertex[iMarker][iVertex]->GetNormal(), nDim); } -// AD::EndPreacc(); + AD::EndPreacc(); } } END_SU2_OMP_FOR