From 5fd72ca40624c430f2e35a7b6914346e599583a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Sun, 28 Feb 2021 20:02:24 +0100
Subject: [PATCH 01/57] Add OpDiLib submodule.

---
 .gitmodules    | 3 +++
 externals/opdi | 1 +
 2 files changed, 4 insertions(+)
 create mode 160000 externals/opdi

diff --git a/.gitmodules b/.gitmodules
index f160f2e549e..ae2967618b2 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -15,3 +15,6 @@
 [submodule "subprojects/Mutationpp"]
         path = subprojects/Mutationpp
         url = https://github.com/mutationpp/Mutationpp.git	
+[submodule "externals/opdi"]
+	path = externals/opdi
+	url = https://github.com/SciCompKL/OpDiLib
diff --git a/externals/opdi b/externals/opdi
new file mode 160000
index 00000000000..3c4132bbf12
--- /dev/null
+++ b/externals/opdi
@@ -0,0 +1 @@
+Subproject commit 3c4132bbf1266b2e999d22212c8de88ec085a3e0

From 679e979ab0a0baa7359a92be6aa797992f4e4a86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Sun, 28 Feb 2021 20:07:18 +0100
Subject: [PATCH 02/57] Update meson script.

---
 meson.build           | 5 +++++
 meson_scripts/init.py | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/meson.build b/meson.build
index cb688126fbe..3399630f9c3 100644
--- a/meson.build
+++ b/meson.build
@@ -104,6 +104,11 @@ endif
 if omp
   # add OpenMP dependency
   su2_deps += omp_dep
+
+  # add opdi dependency
+  if get_option('enable-autodiff')
+    codi_dep += declare_dependency(include_directories: 'externals/opdi/include')
+  endif
 endif
 
 if get_option('enable-tecio')
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index fe0cc063aa9..e34d786a04a 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -48,6 +48,8 @@ def init_submodules(method = 'auto'):
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
+  sha_version_opdi = '3c4132bbf1266b2e999d22212c8de88ec085a3e0'
+  github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'
   sha_version_ninja = '52649de2c56b63f42bc59513d51286531c595b44'
@@ -57,12 +59,14 @@ def init_submodules(method = 'auto'):
 
   medi_name = 'MeDiPack'
   codi_name = 'CoDiPack'
+  opdi_name = 'OpDiLib'
   meson_name = 'meson'
   ninja_name= 'ninja'
   mpp_name= 'Mutationpp'
   base_path = cur_dir + os.path.sep + 'externals' + os.path.sep 
   alt_name_medi = base_path + 'medi'
   alt_name_codi = base_path + 'codi'
+  alt_name_opdi = base_path + 'opdi'
   alt_name_meson =  base_path + 'meson'
   alt_name_ninja =  base_path + 'ninja'
   alt_name_mpp =  cur_dir + os.path.sep + 'subprojects' + os.path.sep  + 'Mutationpp'
@@ -83,6 +87,7 @@ def init_submodules(method = 'auto'):
   if is_git:
     submodule_status(alt_name_codi, sha_version_codi)
     submodule_status(alt_name_medi, sha_version_medi)
+    submodule_status(alt_name_opdi, sha_version_opdi)
     submodule_status(alt_name_meson, sha_version_meson)
     submodule_status(alt_name_ninja, sha_version_ninja)
     submodule_status(alt_name_mpp, sha_version_mpp)
@@ -90,6 +95,7 @@ def init_submodules(method = 'auto'):
   else:
     download_module(codi_name, alt_name_codi, github_repo_codi, sha_version_codi)
     download_module(medi_name, alt_name_medi, github_repo_medi, sha_version_medi)
+    download_module(opdi_name, alt_name_opdi, github_repo_opdi, sha_version_opdi)
     download_module(meson_name, alt_name_meson, github_repo_meson, sha_version_meson)
     download_module(ninja_name, alt_name_ninja, github_repo_ninja, sha_version_ninja)
     download_module(mpp_name, alt_name_mpp, github_repo_mpp, sha_version_mpp)

From b4650ba8f1f3a412677e937eba2da4e302abb4e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Sun, 28 Feb 2021 20:55:08 +0100
Subject: [PATCH 03/57] Update to thread-safe version of CoDiPack.

---
 externals/codi        | 2 +-
 meson_scripts/init.py | 2 +-
 preconfigure.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/externals/codi b/externals/codi
index 1b8d3f5f03d..2a0dbdbed2f 160000
--- a/externals/codi
+++ b/externals/codi
@@ -1 +1 @@
-Subproject commit 1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8
+Subproject commit 2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index e34d786a04a..717c644cb20 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -44,7 +44,7 @@ def init_submodules(method = 'auto'):
 
   # This information of the modules is used if projects was not cloned using git
   # The sha tag must be maintained manually to point to the correct commit
-  sha_version_codi = '1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8'
+  sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87'
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
diff --git a/preconfigure.py b/preconfigure.py
index 639740a54d8..94314b5da33 100755
--- a/preconfigure.py
+++ b/preconfigure.py
@@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False):
     
     # This information of the modules is used if projects was not cloned using git
     # The sha tag must be maintained manually to point to the correct commit
-    sha_version_codi = '1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8'
+    sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87'
     github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
     sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
     github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'

From caa15426eb84ffc9f6e604407a6becdd281c5e2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 00:18:06 +0100
Subject: [PATCH 04/57] Add parallel AD type.

---
 Common/include/basic_types/datatype_structure.hpp | 4 ++++
 Common/include/parallelization/omp_structure.hpp  | 5 ++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp
index 58bc9920c3b..63965aa92ad 100644
--- a/Common/include/basic_types/datatype_structure.hpp
+++ b/Common/include/basic_types/datatype_structure.hpp
@@ -80,6 +80,9 @@ using su2enable_if = typename std::enable_if<condition,bool>::type;
 #define CODI_PRIMAL_INDEX_TAPE 0
 #endif
 
+#if defined(_OPENMP)
+using su2double = codi::RealReverseIndexParallel;
+#else
 #if CODI_INDEX_TAPE
 using su2double = codi::RealReverseIndex;
 #elif CODI_PRIMAL_TAPE
@@ -89,6 +92,7 @@ using su2double = codi::RealReversePrimalIndex;
 #else
 using su2double = codi::RealReverse;
 #endif
+#endif
 
 #elif defined(CODI_FORWARD_TYPE) // forward mode AD
 #include "codi.hpp"
diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index d12f450219b..a211664c549 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -46,9 +46,8 @@
 #define PRAGMIZE(X) _Pragma(#X)
 #endif
 
-/*--- Detect compilation with OpenMP support, protect agaisnt
- *    using OpenMP with Reverse AD (not supported yet). ---*/
-#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE)
+/*--- Detect compilation with OpenMP support. ---*/
+#if defined(_OPENMP)
 #define HAVE_OMP
 #include <omp.h>
 

From d153a000c928372578a8bc0000f2222d51efd951 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 00:58:28 +0100
Subject: [PATCH 05/57] Add OpDiLib bindings.

---
 .../include/parallelization/omp_structure.cpp | 50 +++++++++++++++++++
 .../include/parallelization/omp_structure.hpp | 12 +++++
 Common/src/meson.build                        |  3 +-
 SU2_CFD/src/SU2_CFD.cpp                       |  7 +++
 4 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 Common/include/parallelization/omp_structure.cpp

diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp
new file mode 100644
index 00000000000..933f154eea6
--- /dev/null
+++ b/Common/include/parallelization/omp_structure.cpp
@@ -0,0 +1,50 @@
+/*!
+ * \file omp_structure.cpp
+ * \brief Source file counterpart for omp_structure.hpp.
+ * \note Contains OpDiLib logic and includes the OpDiLib source file.
+ * \author J. Blühdorn
+ * \version 7.1.0 "Blackbird"
+ *
+ * SU2 Project Website: https://su2code.github.io
+ *
+ * The SU2 Project is maintained by the SU2 Foundation
+ * (http://su2foundation.org)
+ *
+ * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+ *
+ * SU2 is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * SU2 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "omp_structure.hpp"
+
+void omp_initialize() {
+#ifdef HAVE_OPDI
+  opdi::logic = new opdi::OmpLogic;
+  opdi::logic->init();
+  opdi::tool = new CoDiOpDiTool<su2double>;
+#endif
+}
+
+void omp_finalize() {
+#ifdef HAVE_OPDI
+  opdi::logic->finalize();
+  opdi::backend->finalize();
+  delete opdi::logic;
+  delete opdi::tool;
+#endif
+}
+
+#ifdef HAVE_OPDI
+#include "opdi.cpp"
+#endif
diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index a211664c549..c74f2e434e0 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -51,6 +51,13 @@
 #define HAVE_OMP
 #include <omp.h>
 
+#if defined(CODI_REVERSE_TYPE)
+#define HAVE_OPDI
+#include "opdi/backend/ompt/omptBackend.hpp"
+#include "codi/externals/codiOpdiTool.hpp"
+#include "opdi.hpp"
+#endif
+
 /*--- The generic start of OpenMP constructs. ---*/
 #define SU2_OMP(ARGS) PRAGMIZE(omp ARGS)
 
@@ -105,6 +112,11 @@ inline void omp_destroy_lock(omp_lock_t*){}
 
 #endif // end OpenMP detection
 
+/*--- Initialization and finalization ---*/
+
+void omp_initialize();
+void omp_finalize();
+
 /*--- Detect SIMD support (version 4+, after Jul 2013). ---*/
 #ifdef _OPENMP
 #if _OPENMP >= 201307
diff --git a/Common/src/meson.build b/Common/src/meson.build
index 5dcbb57c66f..b3e0726e70c 100644
--- a/Common/src/meson.build
+++ b/Common/src/meson.build
@@ -3,7 +3,8 @@ common_src =files(['graph_coloring_structure.cpp',
            'CConfig.cpp',
            'basic_types/ad_structure.cpp',
            'wall_model.cpp',
-           '../include/parallelization/mpi_structure.cpp'])
+           '../include/parallelization/mpi_structure.cpp',
+           '../include/parallelization/omp_structure.cpp'])
 
 subdir('linear_algebra')
 subdir('toolboxes')
diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp
index a73cb5126dc..0353d547eba 100644
--- a/SU2_CFD/src/SU2_CFD.cpp
+++ b/SU2_CFD/src/SU2_CFD.cpp
@@ -56,6 +56,10 @@ int main(int argc, char *argv[]) {
 
   CLI11_PARSE(app, argc, argv)
 
+  /*--- OpenMP initialization ---*/
+
+  omp_initialize();
+
   omp_set_num_threads(num_threads);
 
   /*--- MPI initialization, and buffer setting ---*/
@@ -173,6 +177,9 @@ int main(int argc, char *argv[]) {
   /*--- Finalize MPI parallelization. ---*/
   SU2_MPI::Finalize();
 
+  /*--- Finalize OpenMP. ---*/
+  omp_finalize();
+
   return EXIT_SUCCESS;
 
 }

From d9ce155649a05d2cfe6ff3ade526002884a9f140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 10:59:03 +0100
Subject: [PATCH 06/57] Update AD interface.

---
 Common/include/basic_types/ad_structure.hpp   | 149 ++++++++++++------
 .../basic_types/datatype_structure.hpp        |  87 +---------
 Common/include/code_config.hpp                | 120 ++++++++++++++
 .../include/parallelization/omp_structure.cpp |   2 +-
 .../include/parallelization/omp_structure.hpp |  11 +-
 Common/src/basic_types/ad_structure.cpp       |   5 +-
 Common/src/linear_algebra/CSysSolve.cpp       |   2 +-
 SU2_CFD/src/SU2_CFD.cpp                       |  10 ++
 .../src/drivers/CDiscAdjMultizoneDriver.cpp   |   2 +-
 .../src/drivers/CDiscAdjSinglezoneDriver.cpp  |   2 +-
 10 files changed, 248 insertions(+), 142 deletions(-)
 create mode 100644 Common/include/code_config.hpp

diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index 6353ec3046c..d93df0414fe 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -1,7 +1,7 @@
 /*!
  * \file ad_structure.hpp
  * \brief Main routines for the algorithmic differentiation (AD) structure.
- * \author T. Albring
+ * \author T. Albring, J. Blühdorn
  * \version 7.1.0 "Blackbird"
  *
  * SU2 Project Website: https://su2code.github.io
@@ -28,6 +28,7 @@
 #pragma once
 
 #include "datatype_structure.hpp"
+#include "../parallelization/omp_structure.hpp"
 
 /*!
  * \namespace AD
@@ -278,19 +279,23 @@ namespace AD{
 
   extern int adjointVectorPosition;
 
-  /*--- Reference to the tape ---*/
-
-  extern su2double::TapeType& globalTape;
-
   extern bool Status;
 
   extern bool PreaccActive;
 
   extern bool PreaccEnabled;
 
-  extern su2double::TapeType::Position StartPosition, EndPosition;
+#ifdef HAVE_OPDI
+  using CoDiTapePosition = su2double::TapeType::Position;
+  using OpDiState = void*;
+  using TapePosition = std::pair<CoDiTapePosition, OpDiState>;
+#else
+  using TapePosition = su2double::TapeType::Position;
+#endif
 
-  extern std::vector<su2double::TapeType::Position> TapePositions;
+  extern TapePosition StartPosition, EndPosition;
+
+  extern std::vector<TapePosition> TapePositions;
 
   extern std::vector<su2double::GradientData> localInputValues;
 
@@ -298,42 +303,68 @@ namespace AD{
 
   extern codi::PreaccumulationHelper<su2double> PreaccHelper;
 
+  /*--- Reference to the tape. ---*/
+
+  FORCEINLINE su2double::TapeType& getGlobalTape() {
+    return su2double::getGlobalTape();
+  }
+
   FORCEINLINE void RegisterInput(su2double &data, bool push_index = true) {
-    AD::globalTape.registerInput(data);
+    AD::getGlobalTape().registerInput(data);
     if (push_index) {
       inputValues.push_back(data.getGradientData());
     }
   }
 
-  FORCEINLINE void RegisterOutput(su2double& data) {AD::globalTape.registerOutput(data);}
+  FORCEINLINE void RegisterOutput(su2double& data) {AD::getGlobalTape().registerOutput(data);}
 
   FORCEINLINE void ResetInput(su2double &data) {data.getGradientData() = su2double::GradientData();}
 
-  FORCEINLINE void StartRecording() {AD::globalTape.setActive();}
+  FORCEINLINE void StartRecording() {AD::getGlobalTape().setActive();}
 
-  FORCEINLINE void StopRecording() {AD::globalTape.setPassive();}
+  FORCEINLINE void StopRecording() {AD::getGlobalTape().setPassive();}
 
-  FORCEINLINE bool TapeActive() { return AD::globalTape.isActive(); }
+  FORCEINLINE bool TapeActive() { return AD::getGlobalTape().isActive(); }
 
-  FORCEINLINE void PrintStatistics() {AD::globalTape.printStatistics();}
+  FORCEINLINE void PrintStatistics() {AD::getGlobalTape().printStatistics();}
 
-  FORCEINLINE void ClearAdjoints() {AD::globalTape.clearAdjoints(); }
+  FORCEINLINE void ClearAdjoints() {AD::getGlobalTape().clearAdjoints(); }
 
-  FORCEINLINE void ComputeAdjoint() {AD::globalTape.evaluate(); adjointVectorPosition = 0;}
+  FORCEINLINE void ComputeAdjoint() {
+  #if defined(HAVE_OPDI)
+    opdi::logic->prepareEvaluate();
+  #endif
+    AD::getGlobalTape().evaluate();
+    adjointVectorPosition = 0;
+  }
 
   FORCEINLINE void ComputeAdjoint(unsigned short enter, unsigned short leave) {
-    AD::globalTape.evaluate(TapePositions[enter], TapePositions[leave]);
+  #if defined(HAVE_OPDI)
+    opdi::logic->recoverState(TapePositions[enter].second);
+    opdi::logic->prepareEvaluate();
+    AD::getGlobalTape().evaluate(TapePositions[enter].first, TapePositions[leave].first);
+  #else
+    AD::getGlobalTape().evaluate(TapePositions[enter], TapePositions[leave]);
+  #endif
     if (leave == 0)
       adjointVectorPosition = 0;
   }
 
   FORCEINLINE void Reset() {
-    globalTape.reset();
+    AD::getGlobalTape().reset();
+  #if defined(HAVE_OPDI)
+    opdi::logic->reset();
+  #endif
     if (inputValues.size() != 0) {
       adjointVectorPosition = 0;
       inputValues.clear();
     }
     if (TapePositions.size() != 0) {
+    #if defined(HAVE_OPDI)
+      for (TapePosition& pos : TapePositions) {
+        opdi::logic->freeState(pos.second);
+      }
+    #endif
       TapePositions.clear();
     }
   }
@@ -343,11 +374,11 @@ namespace AD{
   }
 
   FORCEINLINE void SetDerivative(int index, const double val) {
-    AD::globalTape.setGradient(index, val);
+    AD::getGlobalTape().setGradient(index, val);
   }
 
   FORCEINLINE double GetDerivative(int index) {
-    return AD::globalTape.getGradient(index);
+    return AD::getGlobalTape().getGradient(index);
   }
 
   /*--- Base case for parameter pack expansion. ---*/
@@ -397,7 +428,7 @@ namespace AD{
   }
 
   FORCEINLINE void StartPreacc() {
-    if (globalTape.isActive() && PreaccEnabled) {
+    if (AD::getGlobalTape().isActive() && PreaccEnabled) {
       PreaccHelper.start();
       PreaccActive = true;
     }
@@ -438,7 +469,11 @@ namespace AD{
   }
 
   FORCEINLINE void Push_TapePosition() {
-    TapePositions.push_back(AD::globalTape.getPosition());
+  #if defined(HAVE_OPDI)
+    TapePositions.push_back({AD::getGlobalTape().getPosition(), opdi::logic->exportState()});
+  #else
+    TapePositions.push_back(AD::getGlobalTape().getPosition());
+  #endif
   }
 
   FORCEINLINE void EndPreacc(){
@@ -448,56 +483,77 @@ namespace AD{
   }
 
   FORCEINLINE void StartExtFunc(bool storePrimalInput, bool storePrimalOutput){
-    FuncHelper = new ExtFuncHelper(true);
-    if (!storePrimalInput){
-      FuncHelper->disableInputPrimalStore();
-    }
-    if (!storePrimalOutput){
-      FuncHelper->disableOutputPrimalStore();
+    SU2_OMP_MASTER
+    {
+      FuncHelper = new ExtFuncHelper(true);
+      if (!storePrimalInput){
+        FuncHelper->disableInputPrimalStore();
+      }
+      if (!storePrimalOutput){
+        FuncHelper->disableOutputPrimalStore();
+      }
     }
   }
 
   FORCEINLINE void SetExtFuncIn(const su2double &data) {
-    FuncHelper->addInput(data);
+    SU2_OMP_MASTER
+    {
+      FuncHelper->addInput(data);
+    }
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncIn(const T& data, const int size) {
-    for (int i = 0; i < size; i++) {
-      FuncHelper->addInput(data[i]);
+    SU2_OMP_MASTER
+    {
+      for (int i = 0; i < size; i++) {
+        FuncHelper->addInput(data[i]);
+      }
     }
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncIn(const T& data, const int size_x, const int size_y) {
-    for (int i = 0; i < size_x; i++) {
-      for (int j = 0; j < size_y; j++) {
-        FuncHelper->addInput(data[i][j]);
+    SU2_OMP_MASTER
+    {
+      for (int i = 0; i < size_x; i++) {
+        for (int j = 0; j < size_y; j++) {
+          FuncHelper->addInput(data[i][j]);
+        }
       }
     }
   }
 
   FORCEINLINE void SetExtFuncOut(su2double& data) {
-    if (globalTape.isActive()) {
-      FuncHelper->addOutput(data);
+    SU2_OMP_MASTER
+    {
+      if (AD::getGlobalTape().isActive()) {
+        FuncHelper->addOutput(data);
+      }
     }
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncOut(T&& data, const int size) {
-    for (int i = 0; i < size; i++) {
-      if (globalTape.isActive()) {
-        FuncHelper->addOutput(data[i]);
+    SU2_OMP_MASTER
+    {
+      for (int i = 0; i < size; i++) {
+        if (AD::getGlobalTape().isActive()) {
+          FuncHelper->addOutput(data[i]);
+        }
       }
     }
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncOut(T&& data, const int size_x, const int size_y) {
-    for (int i = 0; i < size_x; i++) {
-      for (int j = 0; j < size_y; j++) {
-        if (globalTape.isActive()) {
-          FuncHelper->addOutput(data[i][j]);
+    SU2_OMP_MASTER
+    {
+      for (int i = 0; i < size_x; i++) {
+        for (int j = 0; j < size_y; j++) {
+          if (AD::getGlobalTape().isActive()) {
+            FuncHelper->addOutput(data[i][j]);
+          }
         }
       }
     }
@@ -508,10 +564,15 @@ namespace AD{
     checkpoint->clear();
   }
 
-  FORCEINLINE void EndExtFunc() { delete FuncHelper; }
+  FORCEINLINE void EndExtFunc() {
+    SU2_OMP_MASTER
+    {
+      delete FuncHelper;
+    }
+  }
 
   FORCEINLINE bool BeginPassive() {
-    if(AD::globalTape.isActive()) {
+    if(AD::getGlobalTape().isActive()) {
       StopRecording();
       return true;
     }
diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp
index 63965aa92ad..943e57a8fbf 100644
--- a/Common/include/basic_types/datatype_structure.hpp
+++ b/Common/include/basic_types/datatype_structure.hpp
@@ -30,91 +30,10 @@
 #include <iostream>
 #include <complex>
 #include <cstdio>
-#include <type_traits>
-
-#if defined(_MSC_VER)
-#define FORCEINLINE __forceinline
-#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
-#define FORCEINLINE inline __attribute__((always_inline))
-#else
-#define FORCEINLINE inline
-#endif
-
-#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
-#define NEVERINLINE inline __attribute__((noinline))
-#else
-#define NEVERINLINE inline
-#endif
-
-#if defined(__INTEL_COMPILER)
-/*--- Disable warnings related to inline attributes. ---*/
-#pragma warning disable 2196
-#pragma warning disable 3415
-/*--- Disable warnings related to overloaded virtual. ---*/
-#pragma warning disable 654
-#pragma warning disable 1125
-#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE)
-#pragma warning disable 1875
-#endif
-#endif
-
-/*--- Convenience SFINAE typedef to conditionally
- * enable/disable function template overloads. ---*/
-template<bool condition>
-using su2enable_if = typename std::enable_if<condition,bool>::type;
-
-/*--- Depending on the datatype defined during the configuration,
- * include the correct definition, and create the main typedef. ---*/
-
-#if defined(CODI_REVERSE_TYPE) // reverse mode AD
-#include "codi.hpp"
-#include "codi/tools/dataStore.hpp"
-
-#ifndef CODI_INDEX_TAPE
-#define CODI_INDEX_TAPE 0
-#endif
-#ifndef CODI_PRIMAL_TAPE
-#define CODI_PRIMAL_TAPE 0
-#endif
-#ifndef CODI_PRIMAL_INDEX_TAPE
-#define CODI_PRIMAL_INDEX_TAPE 0
-#endif
-
-#if defined(_OPENMP)
-using su2double = codi::RealReverseIndexParallel;
-#else
-#if CODI_INDEX_TAPE
-using su2double = codi::RealReverseIndex;
-#elif CODI_PRIMAL_TAPE
-using su2double = codi::RealReversePrimal;
-#elif CODI_PRIMAL_INDEX_TAPE
-using su2double = codi::RealReversePrimalIndex;
-#else
-using su2double = codi::RealReverse;
-#endif
-#endif
-
-#elif defined(CODI_FORWARD_TYPE) // forward mode AD
-#include "codi.hpp"
-using su2double = codi::RealForward;
-
-#else // primal / direct / no AD
-using su2double = double;
-#endif
 
+#include "../code_config.hpp"
 #include "ad_structure.hpp"
 
-/*--- This type can be used for (rare) compatiblity cases or for
- * computations that are intended to be (always) passive. ---*/
-using passivedouble = double;
-
-/*--- Define a type for potentially lower precision operations. ---*/
-#ifdef USE_MIXED_PRECISION
-using su2mixedfloat = float;
-#else
-using su2mixedfloat = passivedouble;
-#endif
-
 /*!
  * \namespace SU2_TYPE
  * \brief Namespace for defining the datatype wrapper routines, this acts as a base
@@ -178,11 +97,11 @@ namespace SU2_TYPE {
 
 #ifdef CODI_REVERSE_TYPE
   FORCEINLINE passivedouble GetSecondary(const su2double& data) {
-    return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]);
+    return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]);
   }
 
   FORCEINLINE passivedouble GetDerivative(const su2double& data) {
-    return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]);
+    return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]);
   }
 #else // forward
   FORCEINLINE passivedouble GetSecondary(const su2double& data) {return data.getGradient();}
diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
new file mode 100644
index 00000000000..0c017612a6b
--- /dev/null
+++ b/Common/include/code_config.hpp
@@ -0,0 +1,120 @@
+/*!
+ * \file code_config.hpp
+ * \brief Header file for collecting common macros, definitions and type configurations.
+ * \author T. Albring, P. Gomes, J. Blühdorn
+ * \version 7.1.0 "Blackbird"
+ *
+ * SU2 Project Website: https://su2code.github.io
+ *
+ * The SU2 Project is maintained by the SU2 Foundation
+ * (http://su2foundation.org)
+ *
+ * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+ *
+ * SU2 is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * SU2 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+#if defined(_MSC_VER)
+#define FORCEINLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define FORCEINLINE inline __attribute__((always_inline))
+#else
+#define FORCEINLINE inline
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define NEVERINLINE inline __attribute__((noinline))
+#else
+#define NEVERINLINE inline
+#endif
+
+#if defined(__INTEL_COMPILER)
+/*--- Disable warnings related to inline attributes. ---*/
+#pragma warning disable 2196
+#pragma warning disable 3415
+/*--- Disable warnings related to overloaded virtual. ---*/
+#pragma warning disable 654
+#pragma warning disable 1125
+#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE)
+#pragma warning disable 1875
+#endif
+#endif
+
+/*--- Convenience SFINAE typedef to conditionally
+ * enable/disable function template overloads. ---*/
+template<bool condition>
+using su2enable_if = typename std::enable_if<condition,bool>::type;
+
+/*--- Detect compilation with OpenMP. ---*/
+#if defined(_OPENMP)
+#define HAVE_OMP
+#endif
+
+/*--- Depending on the datatype defined during the configuration,
+ * include the correct definition, and create the main typedef. ---*/
+
+#if defined(CODI_REVERSE_TYPE) // reverse mode AD
+#include "codi.hpp"
+#include "codi/tools/dataStore.hpp"
+
+#ifndef CODI_INDEX_TAPE
+#define CODI_INDEX_TAPE 0
+#endif
+#ifndef CODI_PRIMAL_TAPE
+#define CODI_PRIMAL_TAPE 0
+#endif
+#ifndef CODI_PRIMAL_INDEX_TAPE
+#define CODI_PRIMAL_INDEX_TAPE 0
+#endif
+
+#if defined(HAVE_OMP)
+using su2double = codi::RealReverseIndexParallel;
+#else
+#if CODI_INDEX_TAPE
+using su2double = codi::RealReverseIndex;
+#elif CODI_PRIMAL_TAPE
+using su2double = codi::RealReversePrimal;
+#elif CODI_PRIMAL_INDEX_TAPE
+using su2double = codi::RealReversePrimalIndex;
+#else
+using su2double = codi::RealReverse;
+#endif
+#endif
+#elif defined(CODI_FORWARD_TYPE) // forward mode AD
+#include "codi.hpp"
+using su2double = codi::RealForward;
+
+#else // primal / direct / no AD
+using su2double = double;
+#endif
+
+/*--- This type can be used for (rare) compatiblity cases or for
+ * computations that are intended to be (always) passive. ---*/
+using passivedouble = double;
+
+/*--- Define a type for potentially lower precision operations. ---*/
+#ifdef USE_MIXED_PRECISION
+using su2mixedfloat = float;
+#else
+using su2mixedfloat = passivedouble;
+#endif
+
+/*--- Detect if OpDiLib has to be used. ---*/
+#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
+#define HAVE_OPDI
+#endif
diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp
index 933f154eea6..13183c9178a 100644
--- a/Common/include/parallelization/omp_structure.cpp
+++ b/Common/include/parallelization/omp_structure.cpp
@@ -1,7 +1,7 @@
 /*!
  * \file omp_structure.cpp
  * \brief Source file counterpart for omp_structure.hpp.
- * \note Contains OpDiLib logic and includes the OpDiLib source file.
+ * \note Contains OpDiLib initialization, finalization and includes the OpDiLib source file.
  * \author J. Blühdorn
  * \version 7.1.0 "Blackbird"
  *
diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index c74f2e434e0..7a258881f86 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -12,7 +12,7 @@
  *       e.g. SU2_OMP_PARALLEL. Exotic pragmas of limited portability should be
  *       defined here with suitable fallback versions to limit the spread of
  *       compiler tricks in other areas of the code.
- * \author P. Gomes
+ * \author P. Gomes, J. Blühdorn
  * \version 7.1.0 "Blackbird"
  *
  * SU2 Project Website: https://su2code.github.io
@@ -38,7 +38,7 @@
 
 #pragma once
 
-#include "../basic_types/datatype_structure.hpp"
+#include "../code_config.hpp"
 
 #if defined(_MSC_VER)
 #define PRAGMIZE(X) __pragma(X)
@@ -46,13 +46,10 @@
 #define PRAGMIZE(X) _Pragma(#X)
 #endif
 
-/*--- Detect compilation with OpenMP support. ---*/
-#if defined(_OPENMP)
-#define HAVE_OMP
+#if defined(HAVE_OMP)
 #include <omp.h>
 
-#if defined(CODI_REVERSE_TYPE)
-#define HAVE_OPDI
+#if defined(HAVE_OPDI)
 #include "opdi/backend/ompt/omptBackend.hpp"
 #include "codi/externals/codiOpdiTool.hpp"
 #include "opdi.hpp"
diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp
index 6d3a99c4375..f9fc9f460e2 100644
--- a/Common/src/basic_types/ad_structure.cpp
+++ b/Common/src/basic_types/ad_structure.cpp
@@ -37,9 +37,8 @@ namespace AD {
   std::vector<su2double::GradientData> localInputValues;
   std::vector<su2double*> localOutputValues;
 
-  su2double::TapeType& globalTape = su2double::getGlobalTape();
-  su2double::TapeType::Position StartPosition, EndPosition;
-  std::vector<su2double::TapeType::Position> TapePositions;
+  TapePosition StartPosition, EndPosition;
+  std::vector<TapePosition> TapePositions;
 
   bool PreaccActive = false;
   bool PreaccEnabled = true;
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 2f4907f00cd..888cab7e41c 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -825,7 +825,7 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
   if (config->GetDiscrete_Adjoint()) {
 #ifdef CODI_REVERSE_TYPE
 
-    TapeActive = AD::globalTape.isActive();
+    TapeActive = AD::getGlobalTape().isActive();
 
     AD::StartExtFunc(false, false);
 
diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp
index 0353d547eba..7a3675026d9 100644
--- a/SU2_CFD/src/SU2_CFD.cpp
+++ b/SU2_CFD/src/SU2_CFD.cpp
@@ -73,6 +73,11 @@ int main(int argc, char *argv[]) {
 #endif
   SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm();
 
+  /*--- AD initialization ---*/
+#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
+  AD::getGlobalTape().initialize();
+#endif
+
   /*--- Uncomment the following line if runtime NaN catching is desired. ---*/
   // feenableexcept(FE_INVALID | FE_OVERFLOW);
 
@@ -174,6 +179,11 @@ int main(int argc, char *argv[]) {
   libxsmm_finalize();
 #endif
 
+  /*--- Finalize AD, if necessary. ---*/
+#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
+  AD::getGlobalTape().finalize();
+#endif
+
   /*--- Finalize MPI parallelization. ---*/
   SU2_MPI::Finalize();
 
diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
index 296065b8668..e09af868ef9 100644
--- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
+++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
@@ -624,7 +624,7 @@ void CDiscAdjMultizoneDriver::SetRecording(unsigned short kind_recording, Kind_T
     if (rank == MASTER_NODE) AD::PrintStatistics();
 #ifdef CODI_REVERSE_TYPE
     if (size > SINGLE_NODE) {
-      su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0;
+      su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0;
       SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
       if (rank == MASTER_NODE) {
         cout << "MPI\n";
diff --git a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp
index 48a9463e00d..429bafcd796 100644
--- a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp
+++ b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp
@@ -295,7 +295,7 @@ void CDiscAdjSinglezoneDriver::SetRecording(unsigned short kind_recording){
     if (rank == MASTER_NODE) AD::PrintStatistics();
 #ifdef CODI_REVERSE_TYPE
     if (size > SINGLE_NODE) {
-      su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0;
+      su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0;
       SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
       if (rank == MASTER_NODE) {
         cout << "MPI\n";

From c9ac197daeb1e606392cf1378d01da8f3ba7818b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 12:46:51 +0100
Subject: [PATCH 07/57] Linear algebra updates.

---
 Common/src/linear_algebra/CSysMatrix.cpp  | 11 ++++---
 Common/src/linear_algebra/CSysSolve.cpp   | 30 +++++++++++++++----
 Common/src/linear_algebra/CSysSolve_b.cpp | 35 ++++++++++++++---------
 3 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index e71afd5144b..003413cc93c 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -477,7 +477,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
 template<class ScalarType>
 void CSysMatrix<ScalarType>::SetValZero() {
   const auto size = nnz*nVar*nEqn;
-  const auto chunk = roundUpDiv(size,omp_get_max_threads());
+  const auto chunk = roundUpDiv(size,omp_get_num_threads());
   const auto begin = chunk * omp_get_thread_num();
   const auto mySize = min(chunk, size-begin) * sizeof(ScalarType);
   memset(&matrix[begin], 0, mySize);
@@ -633,8 +633,6 @@ void CSysMatrix<ScalarType>::MatrixVectorProductTransposed(const CSysVector<Scal
                                                            CGeometry *geometry, const CConfig *config) const {
 
   /// TODO: The transpose product requires a different thread-parallel strategy.
-  SU2_OMP_MASTER
-  {
 
   /*--- Some checks for consistency between CSysMatrix and the CSysVector<ScalarType>s ---*/
 #ifndef NDEBUG
@@ -647,8 +645,13 @@ void CSysMatrix<ScalarType>::MatrixVectorProductTransposed(const CSysVector<Scal
     SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION);
   }
 #endif
+  SU2_OMP_BARRIER
 
-  prod = ScalarType(0.0); // set all entries of prod to zero
+  prod = ScalarType(0.0); // set all entries of prod to zero, note that this is designed to run in parallel
+  SU2_OMP_BARRIER // wait until it is done and make the memory view consistent
+
+  SU2_OMP_MASTER
+  {
   for (auto row_i = 0ul; row_i < nPointDomain; row_i++) {
     auto vec_begin = row_i*nVar; // offset to beginning of block col_ind[index]
     for (auto index = row_ptr[row_i]; index < row_ptr[row_i+1]; index++) {
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 888cab7e41c..28888335c80 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -831,6 +831,8 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
 
     AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize());
 
+    SU2_OMP_BARRIER
+
     AD::StopRecording();
 #endif
   }
@@ -924,16 +926,26 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
 
     AD::StartRecording();
 
+    SU2_OMP_BARRIER
+
     AD::SetExtFuncOut(&LinSysSol[0], (int)LinSysSol.GetLocSize());
 
+    SU2_OMP_BARRIER
+
 #ifdef CODI_REVERSE_TYPE
-    AD::FuncHelper->addUserData(&LinSysRes);
-    AD::FuncHelper->addUserData(&LinSysSol);
-    AD::FuncHelper->addUserData(&Jacobian);
-    AD::FuncHelper->addUserData(geometry);
-    AD::FuncHelper->addUserData(config);
-    AD::FuncHelper->addUserData(this);
+    SU2_OMP_MASTER
+    {
+      AD::FuncHelper->addUserData(&LinSysRes);
+      AD::FuncHelper->addUserData(&LinSysSol);
+      AD::FuncHelper->addUserData(&Jacobian);
+      AD::FuncHelper->addUserData(geometry);
+      AD::FuncHelper->addUserData(config);
+      AD::FuncHelper->addUserData(this);
+    }
+    SU2_OMP_BARRIER
+
     AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
+    SU2_OMP_BARRIER
 #endif
 
     /*--- Build preconditioner for the transposed Jacobian ---*/
@@ -953,7 +965,11 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
         break;
     }
 
+    SU2_OMP_BARRIER
+
     AD::EndExtFunc();
+
+    SU2_OMP_BARRIER
   }
 
   return IterLinSol;
@@ -1055,7 +1071,9 @@ unsigned long CSysSolve<ScalarType>::Solve_b(CSysMatrix<ScalarType> & Jacobian,
 
   delete precond;
 
+  SU2_OMP_MASTER
   Iterations = IterLinSol;
+
   return IterLinSol;
 
 }
diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp
index 062708f9dc7..3953bd6cfb9 100644
--- a/Common/src/linear_algebra/CSysSolve_b.cpp
+++ b/Common/src/linear_algebra/CSysSolve_b.cpp
@@ -1,7 +1,7 @@
 /*!
  * \file CSysSolve_b.cpp
  * \brief Routines for the linear solver used in the reverse sweep of AD.
- * \author T. Albring
+ * \author T. Albring, J. Blühdorn
  * \version 7.1.0 "Blackbird"
  *
  * SU2 Project Website: https://su2code.github.io
@@ -37,36 +37,45 @@ void CSysSolve_b<ScalarType>::Solve_b(const codi::RealReverse::Real* x, codi::Re
                                       codi::DataStore* d) {
 
   CSysVector<su2double>* LinSysRes_b = nullptr;
-  d->getData(LinSysRes_b);
+  d->getDataByIndex(LinSysRes_b, 0);
 
   CSysVector<su2double>* LinSysSol_b = nullptr;
-  d->getData(LinSysSol_b);
+  d->getDataByIndex(LinSysSol_b, 1);
 
   CSysMatrix<ScalarType>* Jacobian = nullptr;
-  d->getData(Jacobian);
+  d->getDataByIndex(Jacobian, 2);
 
   CGeometry* geometry = nullptr;
-  d->getData(geometry);
+  d->getDataByIndex(geometry, 3);
 
   const CConfig* config = nullptr;
-  d->getData(config);
+  d->getDataByIndex(config, 4);
 
   CSysSolve<ScalarType>* solver = nullptr;
-  d->getData(solver);
+  d->getDataByIndex(solver, 5);
 
   /*--- Initialize the right-hand side with the gradient of the solution of the primal linear system ---*/
 
-  for (unsigned long i = 0; i < n; i++) {
-    (*LinSysRes_b)[i] = y_b[i];
-    (*LinSysSol_b)[i] = 0.0;
+  SU2_OMP_BARRIER
+  SU2_OMP_MASTER
+  {
+    for (unsigned long i = 0; i < n; i++) {
+      (*LinSysRes_b)[i] = y_b[i];
+      (*LinSysSol_b)[i] = 0.0;
+    }
   }
+  SU2_OMP_BARRIER
 
   solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config);
 
-  for (unsigned long i = 0; i < n; i ++) {
-    x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i));
+  SU2_OMP_BARRIER
+  SU2_OMP_MASTER
+  {
+    for (unsigned long i = 0; i < n; i ++) {
+      x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i));
+    }
   }
-
+  SU2_OMP_BARRIER
 }
 
 template class CSysSolve_b<su2mixedfloat>;

From 5074ee34d2f66a0e826bdf02da552f7380f7d830 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 13:52:25 +0100
Subject: [PATCH 08/57] Zero-initialize memory.

---
 Common/include/toolboxes/allocation_toolbox.hpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp
index 9754217d3a7..279e3d62a9b 100644
--- a/Common/include/toolboxes/allocation_toolbox.hpp
+++ b/Common/include/toolboxes/allocation_toolbox.hpp
@@ -36,6 +36,8 @@
 #include <stdlib.h>
 #endif
 
+#include <cstring>
+
 #include <cassert>
 
 namespace MemoryAllocation
@@ -78,6 +80,7 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept
 #else
   ptr = ::aligned_alloc(alignment, size);
 #endif
+  memset(ptr, 0, size);
   return static_cast<T*>(ptr);
 }
 

From 33437ced739a9ed625e8232561a8cca8b5cf01b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 13:53:15 +0100
Subject: [PATCH 09/57] Fix CDiscAdjFEAIteration dependencies.

---
 .../src/iteration/CDiscAdjFEAIteration.cpp    | 37 +++++++++++--------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
index b0887c79a41..42fe51675c1 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
@@ -301,6 +301,11 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
   bool de_effects = config[iZone]->GetDE_Effects() && nonlinear;
   bool element_based = dir_solver->IsElementBased() && nonlinear;
 
+  SU2_OMP_PARALLEL
+  {
+
+  int thread = omp_get_thread_num();
+
   for (unsigned short iProp = 0; iProp < config[iZone]->GetnElasticityMod(); iProp++) {
     su2double E = adj_solver->GetVal_Young(iProp);
     su2double nu = adj_solver->GetVal_Poisson(iProp);
@@ -309,33 +314,33 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
 
     /*--- Add dependencies for E and Nu ---*/
 
-    structural_numerics[FEA_TERM]->SetMaterial_Properties(iProp, E, nu);
+    structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
 
     /*--- Add dependencies for Rho and Rho_DL ---*/
 
-    structural_numerics[FEA_TERM]->SetMaterial_Density(iProp, rho, rhoDL);
+    structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
 
     /*--- Add dependencies for element-based simulations. ---*/
 
     if (element_based) {
       /*--- Neo Hookean Compressible ---*/
-      structural_numerics[MAT_NHCOMP]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_NHCOMP]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
 
       /*--- Ideal DE ---*/
-      structural_numerics[MAT_IDEALDE]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_IDEALDE]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
 
       /*--- Knowles ---*/
-      structural_numerics[MAT_KNOWLES]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_KNOWLES]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
     }
   }
 
   if (de_effects) {
     for (unsigned short iEField = 0; iEField < adj_solver->GetnEField(); iEField++) {
-      structural_numerics[FEA_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
-      structural_numerics[DE_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
+      structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
+      structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
     }
   }
 
@@ -351,19 +356,21 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
       for (unsigned short iDV = 0; iDV < adj_solver->GetnDVFEA(); iDV++) {
         su2double dvfea = adj_solver->GetVal_DVFEA(iDV);
 
-        structural_numerics[FEA_TERM]->Set_DV_Val(iDV, dvfea);
+        structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
 
-        if (de_effects) structural_numerics[DE_TERM]->Set_DV_Val(iDV, dvfea);
+        if (de_effects) structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
 
         if (element_based) {
-          structural_numerics[MAT_NHCOMP]->Set_DV_Val(iDV, dvfea);
-          structural_numerics[MAT_IDEALDE]->Set_DV_Val(iDV, dvfea);
-          structural_numerics[MAT_KNOWLES]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
         }
       }
       break;
   }
 
+  } // end SU2_OMP_PARALLEL
+
   /*--- MPI dependencies. ---*/
 
   dir_solver->InitiateComms(structural_geometry, config[iZone], SOLUTION_FEA);

From 5735c0e898054934633a826e9b457e2dac94eb95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 1 Mar 2021 14:26:39 +0100
Subject: [PATCH 10/57] Disable preaccumulation for OpenMP.

---
 Common/src/CConfig.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
index eddd0d68918..d4cf8637e8c 100644
--- a/Common/src/CConfig.cpp
+++ b/Common/src/CConfig.cpp
@@ -4390,7 +4390,11 @@ void CConfig::SetPostprocessing(unsigned short val_software, unsigned short val_
 #if defined CODI_REVERSE_TYPE
   AD_Mode = YES;
 
+#if defined HAVE_OMP
+  AD::PreaccEnabled = false;
+#else
   AD::PreaccEnabled = AD_Preaccumulation;
+#endif
 
 #else
   if (AD_Mode == YES) {

From 4a820f715c746108db6ab89bf6ee4f5bd0ee9465 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Tue, 2 Mar 2021 19:04:39 +0100
Subject: [PATCH 11/57] Fix python wrapper builds.

---
 SU2_PY/pySU2/pySU2.i   | 1 +
 SU2_PY/pySU2/pySU2ad.i | 1 +
 2 files changed, 2 insertions(+)

diff --git a/SU2_PY/pySU2/pySU2.i b/SU2_PY/pySU2/pySU2.i
index ae4307d4c22..7e16b15f7ca 100644
--- a/SU2_PY/pySU2/pySU2.i
+++ b/SU2_PY/pySU2/pySU2.i
@@ -46,6 +46,7 @@ threads="1"
 %}
 
 // ----------- USED MODULES ------------
+%import "../../Common/include/code_config.hpp"
 %import "../../Common/include/basic_types/datatype_structure.hpp"
 %import "../../Common/include/parallelization/mpi_structure.hpp"
 %include "std_string.i"
diff --git a/SU2_PY/pySU2/pySU2ad.i b/SU2_PY/pySU2/pySU2ad.i
index 9af6ac16fff..d0e6605f672 100644
--- a/SU2_PY/pySU2/pySU2ad.i
+++ b/SU2_PY/pySU2/pySU2ad.i
@@ -46,6 +46,7 @@ threads="1"
 %}
 
 // ----------- USED MODULES ------------
+%import "../../Common/include/code_config.hpp"
 %import "../../Common/include/basic_types/datatype_structure.hpp"
 %import "../../Common/include/parallelization/mpi_structure.hpp"
 %include "std_string.i"

From a26e2be40c2904e6fdae73ea5df7d9db3941b8cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Tue, 2 Mar 2021 19:10:02 +0100
Subject: [PATCH 12/57] Fix missing definition of size_t.

---
 Common/include/parallelization/omp_structure.hpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index 7a258881f86..9fd13aa9afc 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -38,6 +38,8 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include "../code_config.hpp"
 
 #if defined(_MSC_VER)

From 94ac52ef4e6691a1a6e467d11e3dff7beed3e7ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Wed, 3 Mar 2021 11:56:06 +0100
Subject: [PATCH 13/57] Check OMPT support.

---
 Common/include/code_config.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index 0c017612a6b..a708e2d3cb6 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -61,7 +61,7 @@ template<bool condition>
 using su2enable_if = typename std::enable_if<condition,bool>::type;
 
 /*--- Detect compilation with OpenMP. ---*/
-#if defined(_OPENMP)
+#if defined(_OPENMP) && (!defined(CODI_REVERSE_TYPE) || _OPENMP >= 201811)
 #define HAVE_OMP
 #endif
 

From 7bbb9cd8673c892ed92da63bf221d15f9a774466 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Mon, 8 Mar 2021 12:35:56 +0100
Subject: [PATCH 14/57] CoDiPack update.

---
 externals/codi        | 2 +-
 meson_scripts/init.py | 2 +-
 preconfigure.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/externals/codi b/externals/codi
index 2a0dbdbed2f..89958053647 160000
--- a/externals/codi
+++ b/externals/codi
@@ -1 +1 @@
-Subproject commit 2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87
+Subproject commit 899580536474003370a912234bec4e2b48de2bcc
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index abf9518175a..a2bf57623b3 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -44,7 +44,7 @@ def init_submodules(method = 'auto'):
 
   # This information of the modules is used if projects was not cloned using git
   # The sha tag must be maintained manually to point to the correct commit
-  sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87'
+  sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc'
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
diff --git a/preconfigure.py b/preconfigure.py
index 0fc86eaa75a..ee0e86e03c5 100755
--- a/preconfigure.py
+++ b/preconfigure.py
@@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False):
     
     # This information of the modules is used if projects was not cloned using git
     # The sha tag must be maintained manually to point to the correct commit
-    sha_version_codi = '2a0dbdbed2fcbeb1b5f68a0fc2228d2a27d28a87'
+    sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc'
     github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
     sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
     github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'

From cfb7285034065f7da4868c4101b512868c74155b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 11 Mar 2021 22:21:03 +0100
Subject: [PATCH 15/57] OpDiLib update.

---
 externals/opdi        | 2 +-
 meson_scripts/init.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/externals/opdi b/externals/opdi
index 3c4132bbf12..f14b42f1255 160000
--- a/externals/opdi
+++ b/externals/opdi
@@ -1 +1 @@
-Subproject commit 3c4132bbf1266b2e999d22212c8de88ec085a3e0
+Subproject commit f14b42f1255674bb10db91e3f45ceb39c1bccd17
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index a2bf57623b3..a7e018a0a6b 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -48,7 +48,7 @@ def init_submodules(method = 'auto'):
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
-  sha_version_opdi = '3c4132bbf1266b2e999d22212c8de88ec085a3e0'
+  sha_version_opdi = 'f14b42f1255674bb10db91e3f45ceb39c1bccd17'
   github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'

From 8fc09412fb52deb665f06682ef6e3ad29ff2c790 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 11 Mar 2021 22:25:39 +0100
Subject: [PATCH 16/57] CoDiPack update.

---
 externals/codi        | 2 +-
 meson_scripts/init.py | 2 +-
 preconfigure.py       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/externals/codi b/externals/codi
index 89958053647..6a67202a388 160000
--- a/externals/codi
+++ b/externals/codi
@@ -1 +1 @@
-Subproject commit 899580536474003370a912234bec4e2b48de2bcc
+Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index a7e018a0a6b..a42640f9fde 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -44,7 +44,7 @@ def init_submodules(method = 'auto'):
 
   # This information of the modules is used if projects was not cloned using git
   # The sha tag must be maintained manually to point to the correct commit
-  sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc'
+  sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692'
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
diff --git a/preconfigure.py b/preconfigure.py
index ee0e86e03c5..ca8187afc2c 100755
--- a/preconfigure.py
+++ b/preconfigure.py
@@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False):
     
     # This information of the modules is used if projects was not cloned using git
     # The sha tag must be maintained manually to point to the correct commit
-    sha_version_codi = '899580536474003370a912234bec4e2b48de2bcc'
+    sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692'
     github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
     sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
     github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'

From e04f931abc10bab7db19e21a9f2bd26dca972ac1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 11 Mar 2021 23:35:29 +0100
Subject: [PATCH 17/57] Enable OpDiLib macro backend.

---
 Common/include/code_config.hpp                   |  6 +++++-
 Common/include/parallelization/omp_structure.cpp | 13 ++++++++++++-
 Common/include/parallelization/omp_structure.hpp |  4 ++++
 Common/src/basic_types/ad_structure.cpp          |  2 ++
 Common/src/meson.build                           |  3 +--
 5 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index 11fb69bd296..a9aabf17bca 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -61,7 +61,7 @@ template<bool condition>
 using su2enable_if = typename std::enable_if<condition,bool>::type;
 
 /*--- Detect compilation with OpenMP. ---*/
-#if defined(_OPENMP) && (!defined(CODI_REVERSE_TYPE) || _OPENMP >= 201811)
+#if defined(_OPENMP)
 #define HAVE_OMP
 #endif
 
@@ -118,3 +118,7 @@ using su2mixedfloat = passivedouble;
 #if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
 #define HAVE_OPDI
 #endif
+
+#if _OPENMP >= 201811
+#define HAVE_OMPT
+#endif
diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp
index 069be89f250..0daca1ca021 100644
--- a/Common/include/parallelization/omp_structure.cpp
+++ b/Common/include/parallelization/omp_structure.cpp
@@ -30,18 +30,29 @@
 
 void omp_initialize() {
 #ifdef HAVE_OPDI
+#if !defined(HAVE_OMPT)
+  opdi::backend = new opdi::MacroBackend;
+  opdi::backend->init();
+#endif
   opdi::logic = new opdi::OmpLogic;
   opdi::logic->init();
+  su2double::getGlobalTape().initialize();
   opdi::tool = new CoDiOpDiTool<su2double>;
+  opdi::tool->init();
 #endif
 }
 
 void omp_finalize() {
 #ifdef HAVE_OPDI
+  opdi::tool->finalize();
+  su2double::getGlobalTape().finalize();
   opdi::logic->finalize();
   opdi::backend->finalize();
-  delete opdi::logic;
   delete opdi::tool;
+  delete opdi::logic;
+#if !defined(HAVE_OMPT)
+  delete opdi::backend;
+#endif
 #endif
 }
 
diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index 1f01245017b..bbf976f76f6 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -52,7 +52,11 @@
 #include <omp.h>
 
 #if defined(HAVE_OPDI)
+#if defined(HAVE_OMPT)
 #include "opdi/backend/ompt/omptBackend.hpp"
+#else
+#include "opdi/backend/macro/macroBackend.hpp"
+#endif
 #include "codi/externals/codiOpdiTool.hpp"
 #include "opdi.hpp"
 #endif
diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp
index becea08156a..20bfe3e6d74 100644
--- a/Common/src/basic_types/ad_structure.cpp
+++ b/Common/src/basic_types/ad_structure.cpp
@@ -49,3 +49,5 @@ namespace AD {
 
 #endif
 }
+
+#include "../../include/parallelization/omp_structure.cpp"
diff --git a/Common/src/meson.build b/Common/src/meson.build
index b3e0726e70c..5dcbb57c66f 100644
--- a/Common/src/meson.build
+++ b/Common/src/meson.build
@@ -3,8 +3,7 @@ common_src =files(['graph_coloring_structure.cpp',
            'CConfig.cpp',
            'basic_types/ad_structure.cpp',
            'wall_model.cpp',
-           '../include/parallelization/mpi_structure.cpp',
-           '../include/parallelization/omp_structure.cpp'])
+           '../include/parallelization/mpi_structure.cpp'])
 
 subdir('linear_algebra')
 subdir('toolboxes')

From 1351c797cb535ab37aa38e1caf08b40c88dda381 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 11 Mar 2021 23:38:37 +0100
Subject: [PATCH 18/57] Update SU2_OMP macros and introduce END macros.

---
 .../include/parallelization/omp_structure.hpp | 36 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index bbf976f76f6..7b688d3f388 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -139,8 +139,11 @@ void omp_finalize();
 
 /*--- Convenience macros (do not use excessive nesting). ---*/
 
-#define SU2_OMP_MASTER SU2_OMP(master)
 #define SU2_OMP_ATOMIC SU2_OMP(atomic)
+
+#ifndef HAVE_OPDI
+
+#define SU2_OMP_MASTER SU2_OMP(master)
 #define SU2_OMP_BARRIER SU2_OMP(barrier)
 #define SU2_OMP_CRITICAL SU2_OMP(critical)
 
@@ -148,9 +151,40 @@ void omp_finalize();
 #define SU2_OMP_PARALLEL_(ARGS) SU2_OMP(parallel ARGS)
 #define SU2_OMP_PARALLEL_ON(NTHREADS) SU2_OMP(parallel num_threads(NTHREADS))
 
+#define SU2_OMP_FOR_(ARGS) SU2_OMP(for ARGS)
 #define SU2_OMP_FOR_DYN(CHUNK) SU2_OMP(for schedule(dynamic,CHUNK))
 #define SU2_OMP_FOR_STAT(CHUNK) SU2_OMP(for schedule(static,CHUNK))
 
+#define SU2_NOWAIT nowait
+
+#define END_SU2_OMP_MASTER
+#define END_SU2_OMP_CRITICAL
+#define END_SU2_OMP_PARALLEL
+#define END_SU2_OMP_FOR
+
+#else
+
+#define SU2_OMP_MASTER OPDI_MASTER()
+#define SU2_OMP_BARRIER OPDI_BARRIER()
+#define SU2_OMP_CRITICAL OPDI_CRITICAL()
+
+#define SU2_OMP_PARALLEL OPDI_PARALLEL()
+#define SU2_OMP_PARALLEL_(ARGS) OPDI_PARALLEL(ARGS)
+#define SU2_OMP_PARALLEL_ON(NTHREADS) OPDI_PARALLEL(num_threads(NTHREADS))
+
+#define SU2_OMP_FOR_(ARGS) OPDI_FOR(ARGS)
+#define SU2_OMP_FOR_DYN(CHUNK) OPDI_FOR(schedule(dynamic,CHUNK))
+#define SU2_OMP_FOR_STAT(CHUNK) OPDI_FOR(schedule(static,CHUNK))
+
+#define SU2_NOWAIT OPDI_NOWAIT
+
+#define END_SU2_OMP_MASTER OPDI_END_MASTER
+#define END_SU2_OMP_CRITICAL OPDI_END_CRITICAL
+#define END_SU2_OMP_PARALLEL OPDI_END_PARALLEL
+#define END_SU2_OMP_FOR OPDI_END_FOR
+
+#endif
+
 /*--- Convenience functions (e.g. to compute chunk sizes). ---*/
 
 /*!

From 6bf97a252d01bcad24b01e42d308478a886448df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 11 Mar 2021 23:42:24 +0100
Subject: [PATCH 19/57] Update specialized macros.

---
 Common/include/linear_algebra/CSysVector.hpp       | 7 +++++--
 SU2_CFD/include/integration/CNewtonIntegration.hpp | 7 +++++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp
index 6a54b84efa1..dae407c8973 100644
--- a/Common/include/linear_algebra/CSysVector.hpp
+++ b/Common/include/linear_algebra/CSysVector.hpp
@@ -45,12 +45,14 @@
  */
 #ifdef HAVE_OMP
 #ifdef HAVE_OMP_SIMD
-#define CSYSVEC_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait)
+#define CSYSVEC_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT)
 #else
-#define CSYSVEC_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+#define CSYSVEC_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
 #endif
+#define END_CSYSVEC_PARFOR END_SU2_OMP_FOR
 #else
 #define CSYSVEC_PARFOR SU2_OMP_SIMD
+#define END_CSYSVEC_PARFOR
 #endif
 
 /*!
@@ -443,3 +445,4 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
 };
 
 #undef CSYSVEC_PARFOR
+#undef END_CSYSVEC_PARFOR
diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp
index 86626b7629e..4450a71898c 100644
--- a/SU2_CFD/include/integration/CNewtonIntegration.hpp
+++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp
@@ -33,12 +33,14 @@
 
 #ifdef HAVE_OMP
 #ifdef HAVE_OMP_SIMD
-#define CNEWTON_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait)
+#define CNEWTON_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT)
 #else
-#define CNEWTON_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+#define CNEWTON_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
 #endif
+#define END_CNEWTON_PARFOR END_SU2_OMP_FOR
 #else
 #define CNEWTON_PARFOR SU2_OMP_SIMD
+#define END_CNEWTON_PARFOR
 #endif
 
 /*!
@@ -212,3 +214,4 @@ class CNewtonIntegration final : public CIntegration {
 };
 
 #undef CNEWTON_PARFOR
+#undef END_CNEWTON_PARFOR

From aeaf25141fed3662c4782552954075f5628ccf0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Fri, 12 Mar 2021 00:09:29 +0100
Subject: [PATCH 20/57] Update macros throughout the code.

---
 Common/src/linear_algebra/CSysMatrix.cpp       |  4 ++--
 SU2_CFD/include/limiters/CLimiterDetails.hpp   |  2 +-
 SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp | 10 +++++-----
 SU2_CFD/include/solvers/CFVMFlowSolverBase.inl |  2 +-
 SU2_CFD/src/solvers/CFEASolver.cpp             |  2 +-
 SU2_CFD/src/solvers/CTurbSolver.cpp            |  4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index 5f523daf0cb..909b9542de8 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -675,7 +675,7 @@ template<class ScalarType>
 void CSysMatrix<ScalarType>::BuildJacobiPreconditioner(bool transpose) {
 
   /*--- Build Jacobi preconditioner (M = D), compute and store the inverses of the diagonal blocks. ---*/
-  SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait)
+  SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
     InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar]), transpose);
 
@@ -1105,7 +1105,7 @@ void CSysMatrix<ScalarType>::ComputeLineletPreconditioner(const CSysVector<Scala
 
   /*--- Jacobi preconditioning where there is no linelet ---*/
 
-  SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait)
+  SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT)
   for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++)
     if (!LineletBool[iPoint])
       MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]);
diff --git a/SU2_CFD/include/limiters/CLimiterDetails.hpp b/SU2_CFD/include/limiters/CLimiterDetails.hpp
index 2b82e80351c..d605c668b83 100644
--- a/SU2_CFD/include/limiters/CLimiterDetails.hpp
+++ b/SU2_CFD/include/limiters/CLimiterDetails.hpp
@@ -185,7 +185,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
     localMin = largeNum;
     localMax =-largeNum;
 
-    SU2_OMP(for schedule(static, 512) nowait)
+    SU2_OMP_FOR_(schedule(static, 512) SU2_NOWAIT)
     for(size_t iPoint = 0; iPoint < geometry.GetnPointDomain(); ++iPoint)
     {
       for(size_t iVar = varBegin; iVar < varEnd; ++iVar)
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
index acb1135c426..251f494f8ed 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
@@ -427,7 +427,7 @@ class CFVMFlowSolverBase : public CSolver {
       /*--- Thread-local variables for min/max reduction. ---*/
       su2double minDt = 1e30, maxDt = 0.0;
 
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
 
         su2double Vol = geometry->nodes->GetVolume(iPoint);
@@ -509,7 +509,7 @@ class CFVMFlowSolverBase : public CSolver {
       /*--- Thread-local variable for reduction. ---*/
       su2double glbDtND = 1e30;
 
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
         glbDtND = min(glbDtND, config->GetUnst_CFL()*Global_Delta_Time / nodes->GetLocalCFL(iPoint));
       }
@@ -744,7 +744,7 @@ class CFVMFlowSolverBase : public CSolver {
     /*--- Update the solution and residuals ---*/
 
     if (!adjoint) {
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
         su2double Vol = geometry->nodes->GetVolume(iPoint) + geometry->nodes->GetPeriodicVolume(iPoint);
@@ -869,7 +869,7 @@ class CFVMFlowSolverBase : public CSolver {
     /*--- Add pseudotime term to Jacobian. ---*/
 
     if (implicit) {
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
         /*--- Modify matrix diagonal to improve diagonal dominance. ---*/
@@ -893,7 +893,7 @@ class CFVMFlowSolverBase : public CSolver {
 
     /*--- Right hand side of the system (-Residual) and initial guess (x = 0) ---*/
 
-    SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+    SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
     for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
       /*--- Multigrid contribution to residual. ---*/
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
index 5cc95853861..adaba33241d 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -581,7 +581,7 @@ void CFVMFlowSolverBase<V, R>::ImplicitEuler_Iteration(CGeometry *geometry, CSol
 
   /*--- Solve or smooth the linear system. ---*/
 
-  SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait)
+  SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT)
   for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) {
     LinSysRes.SetBlock_Zero(iPoint);
     LinSysSol.SetBlock_Zero(iPoint);
diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp
index 23a47c217f2..d36e156a250 100644
--- a/SU2_CFD/src/solvers/CFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CFEASolver.cpp
@@ -1317,7 +1317,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
     /*--- Compute the von Misses stress at each point, and the maximum for the domain. ---*/
     su2double maxVonMises = 0.0;
 
-    SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+    SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
     for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
 
       const auto vms = CFEAElasticity::VonMisesStress(nDim, nodes->GetStress_FEM(iPoint));
diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp
index 66d9a7c8308..4428f5a5f72 100644
--- a/SU2_CFD/src/solvers/CTurbSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSolver.cpp
@@ -528,7 +528,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver
 
   /*--- Build implicit system ---*/
 
-  SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+  SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
     /// TODO: This could be the SetTime_Step of this solver.
@@ -637,7 +637,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_
 
   /*--- Solve or smooth the linear system. ---*/
 
-  SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait)
+  SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT)
   for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) {
     LinSysRes.SetBlock_Zero(iPoint);
     LinSysSol.SetBlock_Zero(iPoint);

From 5cea3861e1e1e97d54796ce34ac16de4811da89d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Fri, 12 Mar 2021 00:22:02 +0100
Subject: [PATCH 21/57] Introduce END macros throughout the code.

---
 Common/include/basic_types/ad_structure.hpp   |  8 ++
 Common/include/linear_algebra/CSysSolve.hpp   |  4 +
 Common/include/linear_algebra/CSysVector.hpp  |  8 ++
 .../include/parallelization/omp_structure.hpp |  3 +
 Common/include/toolboxes/graph_toolbox.hpp    |  2 +
 Common/src/geometry/CGeometry.cpp             | 42 +++++++-
 Common/src/geometry/CMultiGridGeometry.cpp    | 10 +-
 Common/src/geometry/CPhysicalGeometry.cpp     | 30 +++++-
 .../CIsoparametric.cpp                        |  5 +-
 .../src/interface_interpolation/CMirror.cpp   |  3 +-
 .../CNearestNeighbor.cpp                      |  5 +-
 .../CRadialBasisFunction.cpp                  |  7 +-
 Common/src/linear_algebra/CSysMatrix.cpp      | 35 ++++++-
 Common/src/linear_algebra/CSysSolve.cpp       | 13 +++
 Common/src/linear_algebra/CSysSolve_b.cpp     |  2 +
 .../gradients/computeGradientsGreenGauss.hpp  |  2 +
 .../computeGradientsLeastSquares.hpp          |  2 +
 .../integration/CNewtonIntegration.hpp        |  3 +
 SU2_CFD/include/limiters/CLimiterDetails.hpp  |  4 +
 SU2_CFD/include/limiters/computeLimiters.hpp  |  1 +
 .../include/limiters/computeLimiters_impl.hpp |  2 +
 .../include/solvers/CFVMFlowSolverBase.hpp    | 31 ++++++
 .../include/solvers/CFVMFlowSolverBase.inl    | 23 ++++-
 SU2_CFD/include/solvers/CTurbSASolver.hpp     |  1 +
 SU2_CFD/include/solvers/CTurbSSTSolver.hpp    |  1 +
 SU2_CFD/src/drivers/CDriver.cpp               | 35 +++++++
 SU2_CFD/src/integration/CIntegration.cpp      |  8 +-
 .../src/integration/CMultiGridIntegration.cpp | 22 ++++-
 .../src/integration/CNewtonIntegration.cpp    | 18 +++-
 .../integration/CSingleGridIntegration.cpp    |  8 +-
 .../src/iteration/CDiscAdjFEAIteration.cpp    |  3 +-
 SU2_CFD/src/iteration/CFluidIteration.cpp     |  1 +
 .../numerics/elasticity/CFEAElasticity.cpp    |  1 +
 SU2_CFD/src/python_wrapper_structure.cpp      |  2 +
 SU2_CFD/src/solvers/CEulerSolver.cpp          | 44 ++++++++-
 SU2_CFD/src/solvers/CFEASolver.cpp            | 99 ++++++++++++++++---
 SU2_CFD/src/solvers/CIncEulerSolver.cpp       | 31 ++++++
 SU2_CFD/src/solvers/CIncNSSolver.cpp          |  5 +
 SU2_CFD/src/solvers/CMeshSolver.cpp           | 27 ++++-
 SU2_CFD/src/solvers/CNEMOEulerSolver.cpp      |  3 +
 SU2_CFD/src/solvers/CNSSolver.cpp             |  7 ++
 SU2_CFD/src/solvers/CSolver.cpp               | 20 +++-
 SU2_CFD/src/solvers/CTurbSASolver.cpp         | 15 +++
 SU2_CFD/src/solvers/CTurbSSTSolver.cpp        |  8 ++
 SU2_CFD/src/solvers/CTurbSolver.cpp           | 21 +++-
 45 files changed, 574 insertions(+), 51 deletions(-)

diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index a40bf482c2f..1699534828b 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -493,6 +493,7 @@ namespace AD{
         FuncHelper->disableOutputPrimalStore();
       }
     }
+    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE void SetExtFuncIn(const su2double &data) {
@@ -500,6 +501,7 @@ namespace AD{
     {
       FuncHelper->addInput(data);
     }
+    END_SU2_OMP_MASTER
   }
 
   template<class T>
@@ -510,6 +512,7 @@ namespace AD{
         FuncHelper->addInput(data[i]);
       }
     }
+    END_SU2_OMP_MASTER
   }
 
   template<class T>
@@ -522,6 +525,7 @@ namespace AD{
         }
       }
     }
+    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE void SetExtFuncOut(su2double& data) {
@@ -531,6 +535,7 @@ namespace AD{
         FuncHelper->addOutput(data);
       }
     }
+    END_SU2_OMP_MASTER
   }
 
   template<class T>
@@ -543,6 +548,7 @@ namespace AD{
         }
       }
     }
+    END_SU2_OMP_MASTER
   }
 
   template<class T>
@@ -557,6 +563,7 @@ namespace AD{
         }
       }
     }
+    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE void delete_handler(void *handler) {
@@ -569,6 +576,7 @@ namespace AD{
     {
       delete FuncHelper;
     }
+    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE bool BeginPassive() {
diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp
index ecfb4a3a789..548e1986bac 100644
--- a/Common/include/linear_algebra/CSysSolve.hpp
+++ b/Common/include/linear_algebra/CSysSolve.hpp
@@ -221,6 +221,7 @@ class CSysSolve {
       LinSysRes_ptr = &LinSysRes;
       LinSysSol_ptr = &LinSysSol;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -242,6 +243,7 @@ class CSysSolve {
       LinSysRes_ptr = &LinSysRes_tmp;
       LinSysSol_ptr = &LinSysSol_tmp;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -258,6 +260,7 @@ class CSysSolve {
       LinSysRes_ptr = nullptr;
       LinSysSol_ptr = nullptr;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -277,6 +280,7 @@ class CSysSolve {
       LinSysRes_ptr = nullptr;
       LinSysSol_ptr = nullptr;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp
index dae407c8973..9101eeae083 100644
--- a/Common/include/linear_algebra/CSysVector.hpp
+++ b/Common/include/linear_algebra/CSysVector.hpp
@@ -188,10 +188,12 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
 
     SU2_OMP_MASTER
     Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     CSYSVEC_PARFOR
     for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]);
+    END_CSYSVEC_PARFOR
   }
 
   /*!
@@ -252,6 +254,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
   CSysVector& operator=(const CSysVector& other) {
     CSYSVEC_PARFOR
     for (auto i = 0ul; i < nElm; ++i) vec_val[i] = other.vec_val[i];
+    END_CSYSVEC_PARFOR
     return *this;
   }
 
@@ -263,12 +266,14 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
   CSysVector& operator OP(ScalarType val) {                               \
     CSYSVEC_PARFOR                                                        \
     for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP val;                  \
+    END_CSYSVEC_PARFOR                                                    \
     return *this;                                                         \
   }                                                                       \
   template <class T>                                                      \
   CSysVector& operator OP(const VecExpr::CVecExpr<T, ScalarType>& expr) { \
     CSYSVEC_PARFOR                                                        \
     for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP expr.derived()[i];    \
+    END_CSYSVEC_PARFOR                                                    \
     return *this;                                                         \
   }
   MAKE_COMPOUND(=)
@@ -295,6 +300,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
     SU2_OMP_BARRIER
     SU2_OMP_MASTER
     dotRes = 0.0;
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Local dot product for each thread. ---*/
@@ -304,6 +310,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
     for (auto i = 0ul; i < nElmDomain; ++i) {
       sum += vec_val[i] * expr.derived()[i];
     }
+    END_CSYSVEC_PARFOR
 
     /*--- Update shared variable with "our" partial sum. ---*/
     atomicAdd(sum, dotRes);
@@ -318,6 +325,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
         const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE;
         SelectMPIWrapper<ScalarType>::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm());
       }
+      END_SU2_OMP_MASTER
     }
 #endif
     /*--- Make view of result consistent across threads. ---*/
diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index 7b688d3f388..987e87dbd86 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -232,6 +232,7 @@ void parallelCopy(size_t size, const T* src, U* dst)
 {
   SU2_OMP_FOR_STAT(2048)
   for(size_t i=0; i<size; ++i) dst[i] = src[i];
+  END_SU2_OMP_FOR
 }
 
 /*!
@@ -245,6 +246,7 @@ void parallelSet(size_t size, T val, U* dst)
 {
   SU2_OMP_FOR_STAT(2048)
   for(size_t i=0; i<size; ++i) dst[i] = val;
+  END_SU2_OMP_FOR
 }
 
 /*!
@@ -258,6 +260,7 @@ inline void atomicAdd(T rhs, T& lhs)
 {
   SU2_OMP_CRITICAL
   lhs += rhs;
+  END_SU2_OMP_CRITICAL
 }
 template<class T, su2enable_if<std::is_arithmetic<T>::value> = 0>
 inline void atomicAdd(T rhs, T& lhs)
diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp
index a33e7d43117..c013d6d5b06 100644
--- a/Common/include/toolboxes/graph_toolbox.hpp
+++ b/Common/include/toolboxes/graph_toolbox.hpp
@@ -166,6 +166,7 @@ class CCompressedSparsePattern {
     SU2_OMP_PARALLEL_(for schedule(static,roundUpDiv(getOuterSize(),omp_get_max_threads())))
     for(Index_t k = 0; k < getOuterSize(); ++k)
       m_diagPtr(k) = findInnerIdx(k,k);
+    END_SU2_OMP_PARALLEL
   }
 
   /*!
@@ -184,6 +185,7 @@ class CCompressedSparsePattern {
         assert(m_innerIdxTransp(k) != m_innerIdx.size() && "The pattern is not symmetric.");
       }
     }
+    END_SU2_OMP_PARALLEL
   }
 
   /*!
diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp
index 69f337368de..7c62cdd38c4 100644
--- a/Common/src/geometry/CGeometry.cpp
+++ b/Common/src/geometry/CGeometry.cpp
@@ -400,7 +400,9 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
   delete [] bufS_P2PRecv;
   bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] ();
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
 }
 
@@ -504,6 +506,7 @@ void CGeometry::PostP2PRecvs(CGeometry *geometry,
     }
 
   }
+  END_SU2_OMP_MASTER
 
 }
 
@@ -601,6 +604,7 @@ void CGeometry::PostP2PSends(CGeometry *geometry,
     }
 
   }
+  END_SU2_OMP_MASTER
 
 }
 
@@ -736,6 +740,7 @@ void CGeometry::InitiateComms(CGeometry *geometry,
           break;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Launch the point-to-point MPI send for this message. ---*/
 
@@ -782,6 +787,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
 
     SU2_OMP_MASTER
     SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Once we have recv'd a message, get the source rank. ---*/
@@ -839,6 +845,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
           break;
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Verify that all non-blocking point-to-point sends have finished.
@@ -848,6 +855,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
 #ifdef HAVE_MPI
   SU2_OMP_MASTER
   SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);
+  END_SU2_OMP_MASTER
 #endif
   SU2_OMP_BARRIER
 
@@ -1226,7 +1234,9 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
   delete [] bufS_PeriodicRecv;
   bufS_PeriodicRecv = new unsigned short[nRecv] ();
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
@@ -1283,6 +1293,7 @@ void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
     }
 
   }
+  END_SU2_OMP_MASTER
 
 #endif
 
@@ -1337,7 +1348,8 @@ void CGeometry::PostPeriodicSends(CGeometry *geometry,
                      CURRENT_FUNCTION);
       break;
   }
-  } // end master
+  }
+  END_SU2_OMP_MASTER
 #else
 
   /*--- Copy my own rank's data into the recv buffer directly in serial. ---*/
@@ -3159,6 +3171,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       cg_elem[nDim*iElem+iDim] = 0.0;
     vol_elem[iElem] = 0.0;
   }
+  END_SU2_OMP_FOR
 
   /*--- Populate ---*/
   SU2_OMP_FOR_STAT(256)
@@ -3168,6 +3181,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       cg_elem[nDim*iElem_global+iDim] = elem[iElem]->GetCG(iDim);
     vol_elem[iElem_global] = elem[iElem]->GetVolume();
   }
+  END_SU2_OMP_FOR
 
 #ifdef HAVE_MPI
   /*--- Account for the duplication introduced by the halo elements and the
@@ -3175,10 +3189,12 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
   SU2_OMP_FOR_STAT(256)
   for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem)
     halo_detect[iElem] = 0;
+  END_SU2_OMP_FOR
 
   SU2_OMP_FOR_STAT(256)
   for(auto iElem=0ul; iElem<nElem; ++iElem)
     halo_detect[elem[iElem]->GetGlobalIndex()] = 1;
+  END_SU2_OMP_FOR
 
   /*--- Share with all processors ---*/
   SU2_OMP_MASTER
@@ -3195,6 +3211,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
     MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm());
     halo_detect.swap(char_buffer);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   SU2_OMP_FOR_STAT(256)
@@ -3204,6 +3221,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       cg_elem[nDim*iElem+iDim] /= numRepeat;
     vol_elem[iElem] /= numRepeat;
   }
+  END_SU2_OMP_FOR
 #endif
 
   /*--- SECOND: Each processor performs the average for its elements. For each
@@ -3223,11 +3241,13 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem)
       work_values[iElem] = 0.0;
+    END_SU2_OMP_FOR
 
     /*--- Populate ---*/
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<nElem; ++iElem)
       work_values[elem[iElem]->GetGlobalIndex()] = values[iElem];
+    END_SU2_OMP_FOR
 
 #ifdef HAVE_MPI
     /*--- Share with all processors ---*/
@@ -3237,6 +3257,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
       swap(buffer, work_values); delete [] buffer;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Account for duplication ---*/
@@ -3245,6 +3266,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       su2double numRepeat = halo_detect[iElem];
       work_values[iElem] /= numRepeat;
     }
+    END_SU2_OMP_FOR
 #endif
 
     /*--- Filter ---*/
@@ -3308,9 +3330,11 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
           SU2_MPI::Error("Unknown type of filter kernel",CURRENT_FUNCTION);
       }
     }
+    END_SU2_OMP_FOR
   }
 
-  } // end OpenMP parallel section
+  }
+  END_SU2_OMP_PARALLEL
 
   limited_searches /= kernels.size();
 
@@ -3342,13 +3366,16 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector<unsigned long> &neighbour
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem)
       nFaces_elem[iElem] = 0;
+    END_SU2_OMP_FOR
 
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<nElem; ++iElem) {
       auto iElem_global = elem[iElem]->GetGlobalIndex();
       nFaces_elem[iElem_global] = elem[iElem]->GetnFaces();
     }
+    END_SU2_OMP_FOR
   }
+  END_SU2_OMP_PARALLEL
 #ifdef HAVE_MPI
   /*--- Share with all processors ---*/
   {
@@ -3378,6 +3405,7 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector<unsigned long> &neighbour
     /*--- Initialize ---*/
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<matrix_size; ++iElem) neighbour_idx[iElem] = -1;
+    END_SU2_OMP_FOR
 
     /*--- Populate ---*/
     SU2_OMP_FOR_STAT(128)
@@ -3395,7 +3423,9 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector<unsigned long> &neighbour
         }
       }
     }
+    END_SU2_OMP_FOR
   }
+  END_SU2_OMP_PARALLEL
 #ifdef HAVE_MPI
   /*--- Share with all processors ---*/
   {
@@ -3523,6 +3553,7 @@ void CGeometry::SetElemVolume()
     if(nDim==2) elem[iElem]->SetVolume(element->ComputeArea());
     else        elem[iElem]->SetVolume(element->ComputeVolume());
   }
+  END_SU2_OMP_FOR
 
   delete elements[0];
   delete elements[1];
@@ -3531,7 +3562,8 @@ void CGeometry::SetElemVolume()
     delete elements[3];
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CGeometry::SetGeometryPlanes(CConfig *config) {
diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp
index f6ba9222211..341d8cac8b2 100644
--- a/Common/src/geometry/CMultiGridGeometry.cpp
+++ b/Common/src/geometry/CMultiGridGeometry.cpp
@@ -1142,7 +1142,9 @@ void CMultiGridGeometry::SetControlVolume(CConfig *config, CGeometry *fine_grid,
     }
   }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_grid, unsigned short action) {
@@ -1184,7 +1186,9 @@ void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_
       if (Area == 0.0) for (iDim = 0; iDim < nDim; iDim++) NormalFace[iDim] = EPS*EPS;
     }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CMultiGridGeometry::SetCoord(CGeometry *geometry) {
@@ -1202,6 +1206,7 @@ void CMultiGridGeometry::SetCoord(CGeometry *geometry) {
     }
     nodes->SetCoord(Point_Coarse, Coordinates);
   }
+  END_SU2_OMP_FOR
 }
 
 void CMultiGridGeometry::SetMultiGridWallHeatFlux(CGeometry *geometry, unsigned short val_marker){
@@ -1320,6 +1325,7 @@ void CMultiGridGeometry::SetRestricted_GridVelocity(CGeometry *fine_mesh, CConfi
     for (unsigned short iDim = 0; iDim < nDim; iDim++)
       nodes->SetGridVel(Point_Coarse, iDim, Grid_Vel[iDim]);
   }
+  END_SU2_OMP_FOR
 }
 
 
diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp
index 6ede288f986..3b37d54f1cc 100644
--- a/Common/src/geometry/CPhysicalGeometry.cpp
+++ b/Common/src/geometry/CPhysicalGeometry.cpp
@@ -4375,7 +4375,10 @@ void CPhysicalGeometry::Check_IntElem_Orientation(const CConfig *config) {
       }
     }
 
-  }} // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_FOR
+  }
+  END_SU2_OMP_PARALLEL
 
   auto reduce = [](unsigned long& val) {
     unsigned long tmp = val;
@@ -4522,7 +4525,10 @@ void CPhysicalGeometry::Check_BoundElem_Orientation(const CConfig *config) {
         }
       }
     }
-  }} // end SU2_OMP_PARALLEL
+    END_SU2_OMP_FOR
+  }
+  }
+  END_SU2_OMP_PARALLEL
 
   auto reduce = [](unsigned long& val) {
     unsigned long tmp = val;
@@ -4698,6 +4704,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
     }
     nodes->SetElems(elems);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- Loop over all the points ---*/
@@ -4734,11 +4741,14 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
     /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel. ---*/
     nodes->SetnNeighbor(iPoint, points[iPoint].size());
   }
+  END_SU2_OMP_FOR
 
   SU2_OMP_MASTER
   nodes->SetPoints(points);
+  END_SU2_OMP_MASTER
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CPhysicalGeometry::SetRCM_Ordering(CConfig *config) {
@@ -6681,6 +6691,7 @@ void CPhysicalGeometry::SetMaxLength(CConfig* config) {
     max_delta = GeometryToolbox::Distance(nDim, Coord_i, Coord_j);
     nodes->SetMaxLength(iPoint, max_delta);
   }
+  END_SU2_OMP_FOR
 
   InitiateComms(this, config, MAX_LENGTH);
   CompleteComms(this, config, MAX_LENGTH);
@@ -7561,10 +7572,12 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
     SU2_OMP_FOR_STAT(1024)
     for (auto iEdge = 0ul; iEdge < nEdge; iEdge++)
       edges->SetNormal(iEdge, ZeroArea);
+    END_SU2_OMP_FOR
 
     SU2_OMP_FOR_STAT(1024)
     for (auto iPoint = 0ul; iPoint < nPoint; iPoint++)
       nodes->SetVolume(iPoint, 0.0);
+    END_SU2_OMP_FOR
   }
 
   SU2_OMP_MASTER { /*--- The following is difficult to parallelize with threads. ---*/
@@ -7699,7 +7712,9 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
     if (nDim == 3) cout <<"Volume of the computational grid: "<< DomainVolume <<"."<< endl;
   }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   /*--- Check if there is a normal with null area ---*/
   SU2_OMP_FOR_STAT(1024)
@@ -7708,6 +7723,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
     su2double DefaultArea[MAXNDIM] = {EPS*EPS};
     if (Area2 == 0.0) edges->SetNormal(iEdge, DefaultArea);
   }
+  END_SU2_OMP_FOR
 }
 
 void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned short action) {
@@ -7719,6 +7735,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
     for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++)
       for (unsigned long iVertex = 0; iVertex < nVertex[iMarker]; iVertex++)
         vertex[iMarker][iVertex]->SetZeroValues();
+    END_SU2_OMP_FOR
   }
 
   /*--- Loop over all the boundary elements ---*/
@@ -7784,6 +7801,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
       AD::EndPreacc();
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Check if there is a normal with null area ---*/
 
@@ -7795,6 +7813,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
       if (Area2 == 0.0) vertex[iMarker][iVertex]->SetNormal(DefaultArea);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CPhysicalGeometry::VisualizeControlVolume(const CConfig *config) const {
@@ -11076,9 +11095,10 @@ void CPhysicalGeometry::SetWallDistance(const CConfig *config, CADTElemClass *Wa
         nodes->SetRoughnessHeight(iPoint, localRoughness);
       }
     }
+    END_SU2_OMP_FOR
 
   }
-  // end SU2_OMP_PARALLEL
+  END_SU2_OMP_PARALLEL
 }
 
 void CPhysicalGeometry::SetGlobalMarkerRoughness(const CConfig* config) {
diff --git a/Common/src/interface_interpolation/CIsoparametric.cpp b/Common/src/interface_interpolation/CIsoparametric.cpp
index c0589a2a88e..c60144035c2 100644
--- a/Common/src/interface_interpolation/CIsoparametric.cpp
+++ b/Common/src/interface_interpolation/CIsoparametric.cpp
@@ -253,13 +253,16 @@ void CIsoparametric::SetTransferCoeff(const CConfig* const* config) {
       }
 
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       MaxDistance = max(MaxDistance, maxDist);
       ErrorCounter += errorCount;
       nGlobalVertexTarget += totalCount;
     }
-    } // end SU2_OMP_PARALLEL
+    END_SU2_OMP_CRITICAL
+    }
+    END_SU2_OMP_PARALLEL
 
   } // end nMarkerInt loop
 
diff --git a/Common/src/interface_interpolation/CMirror.cpp b/Common/src/interface_interpolation/CMirror.cpp
index 0ef8a55d751..e008720639f 100644
--- a/Common/src/interface_interpolation/CMirror.cpp
+++ b/Common/src/interface_interpolation/CMirror.cpp
@@ -231,7 +231,8 @@ void CMirror::SetTransferCoeff(const CConfig* const* config) {
         }
       }
 
-    } // end target loop
+    }
+    END_SU2_OMP_PARALLEL
 
     /*--- Free the heap allocations. ---*/
     for (auto ptr : GlobalIndex) if (ptr != sendGlobalIndex.data()) delete [] ptr;
diff --git a/Common/src/interface_interpolation/CNearestNeighbor.cpp b/Common/src/interface_interpolation/CNearestNeighbor.cpp
index e29d893c1fe..91e25e75b89 100644
--- a/Common/src/interface_interpolation/CNearestNeighbor.cpp
+++ b/Common/src/interface_interpolation/CNearestNeighbor.cpp
@@ -158,13 +158,16 @@ void CNearestNeighbor::SetTransferCoeff(const CConfig* const* config) {
         target_vertex.coefficient[iDonor] = donorInfo[iDonor].dist/denom;
       }
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       totalTargetPoints += numTarget;
       AvgDistance += avgDist;
       MaxDistance = max(MaxDistance, maxDist);
     }
-    } // end SU2_OMP_PARALLEL
+    END_SU2_OMP_CRITICAL
+    }
+    END_SU2_OMP_PARALLEL
 
     delete[] Buffer_Send_Coord;
     delete[] Buffer_Send_GlobalPoint;
diff --git a/Common/src/interface_interpolation/CRadialBasisFunction.cpp b/Common/src/interface_interpolation/CRadialBasisFunction.cpp
index 2b88464dea7..2c318414eb6 100644
--- a/Common/src/interface_interpolation/CRadialBasisFunction.cpp
+++ b/Common/src/interface_interpolation/CRadialBasisFunction.cpp
@@ -218,6 +218,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) {
                              keepPolynomialRowVec[iMarkerInt], CinvTrucVec[iMarkerInt]);
     }
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Final loop over interface markers to compute the interpolation coefficients. ---*/
 
@@ -381,7 +382,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) {
         }
       }
     } // end target vertex loop
-
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       totalDonorPoints += totalDonors;
@@ -390,7 +391,9 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) {
       AvgCorrection += sumCorr;
       MaxCorrection = max(MaxCorrection, maxCorr);
     }
-    } // end SU2_OMP_PARALLEL
+    END_SU2_OMP_CRITICAL
+    }
+    END_SU2_OMP_PARALLEL
 
     /*--- Free global data that will no longer be used. ---*/
     donorCoord.resize(0,0);
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index 909b9542de8..1181b5b7212 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -97,11 +97,13 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   if(matrix != nullptr) {
     SU2_OMP_MASTER
     SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   if(nvar > MAXNVAR) {
     SU2_OMP_MASTER
     SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   /*--- Application of this matrix, FVM or FEM. ---*/
@@ -296,6 +298,7 @@ void CSysMatrixComms::Initiate(const CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             bufDSend[buf_offset+iVar] = x(iPoint,iVar);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -333,6 +336,7 @@ void CSysMatrixComms::Initiate(const CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             bufDSend[buf_offset+iVar] = x(iPoint,iVar);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -375,6 +379,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
 
     SU2_OMP_MASTER
     SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Once we have recv'd a message, get the source rank. ---*/
@@ -414,6 +419,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             x(iPoint,iVar) = CSysMatrix<T>::template ActiveAssign<T>(bufDRecv[buf_offset+iVar]);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -453,6 +459,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             x(iPoint,iVar) += CSysMatrix<T>::template ActiveAssign<T>(bufDRecv[buf_offset+iVar]);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -469,6 +476,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
 #ifdef HAVE_MPI
   SU2_OMP_MASTER
   SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);
+  END_SU2_OMP_MASTER
 #endif
   SU2_OMP_BARRIER
 
@@ -490,6 +498,7 @@ void CSysMatrix<ScalarType>::SetValDiagonalZero() {
   for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint)
     for (auto index = 0ul; index < nVar*nEqn; ++index)
       matrix[dia_ptr[iPoint]*nVar*nEqn + index] = 0.0;
+  END_SU2_OMP_FOR
 }
 
 template<class ScalarType>
@@ -603,10 +612,12 @@ void CSysMatrix<ScalarType>::MatrixVectorProduct(const CSysVector<ScalarType> &
   if ((nEqn != vec.GetNVar()) || (nVar != prod.GetNVar())) {
     SU2_OMP_MASTER
     SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
   if (nPoint != prod.GetNBlk()) {
     SU2_OMP_MASTER
     SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 #endif
 
@@ -620,6 +631,7 @@ void CSysMatrix<ScalarType>::MatrixVectorProduct(const CSysVector<ScalarType> &
   for (auto row_i = 0ul; row_i < nPointDomain; row_i++) {
     RowProduct(vec, row_i, &prod[row_i*nVar]);
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization. ---*/
 
@@ -639,10 +651,12 @@ void CSysMatrix<ScalarType>::MatrixVectorProductTransposed(const CSysVector<Scal
   if ((nVar != vec.GetNVar()) || (nEqn != prod.GetNVar())) {
     SU2_OMP_MASTER
     SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
   if (nPoint != vec.GetNBlk()) {
     SU2_OMP_MASTER
     SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 #endif
   SU2_OMP_BARRIER
@@ -660,8 +674,8 @@ void CSysMatrix<ScalarType>::MatrixVectorProductTransposed(const CSysVector<Scal
       MatrixVectorProductTransp(&matrix[mat_begin], &vec[vec_begin], &prod[prod_begin]);
     }
   }
-
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- MPI Parallelization ---*/
@@ -678,6 +692,7 @@ void CSysMatrix<ScalarType>::BuildJacobiPreconditioner(bool transpose) {
   SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
     InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar]), transpose);
+  END_SU2_OMP_FOR
 
 }
 
@@ -690,6 +705,7 @@ void CSysMatrix<ScalarType>::ComputeJacobiPreconditioner(const CSysVector<Scalar
   SU2_OMP_FOR_DYN(omp_heavy_size)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
     MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]);
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
   CSysMatrixComms::Initiate(prod, geometry, config, SOLUTION_MATRIX);
@@ -707,6 +723,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner(bool transposed) {
     SU2_OMP_FOR_STAT(omp_light_size)
     for (auto iVar = 0ul; iVar < nnz*nVar*nVar; ++iVar)
       ILU_matrix[iVar] = matrix[iVar];
+    END_SU2_OMP_FOR
   }
   else {
     /*--- ILUn clear the ILU matrix first, for ILU0^T
@@ -715,6 +732,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner(bool transposed) {
       SU2_OMP_FOR_STAT(omp_light_size)
       for (auto iVar = 0ul; iVar < nnz_ilu*nVar*nVar; iVar++)
         ILU_matrix[iVar] = 0.0;
+      END_SU2_OMP_FOR
     }
 
     /*--- Transposed or ILUn, traverse matrix to access its blocks
@@ -730,6 +748,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner(bool transposed) {
         }
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Transform system in Upper Matrix ---*/
@@ -804,6 +823,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner(bool transposed) {
     InverseDiagonalBlock_ILUMatrix(end-1, &invM[(end-1)*nVar*nVar]);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -857,6 +877,7 @@ void CSysMatrix<ScalarType>::ComputeILUPreconditioner(const CSysVector<ScalarTyp
       MatrixVectorProduct(&invM[iPoint*nVar*nVar], aux_vec, &prod[iPoint*nVar]);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -895,6 +916,7 @@ void CSysMatrix<ScalarType>::ComputeLU_SGSPreconditioner(const CSysVector<Scalar
       Gauss_Elimination(iPoint, &prod[idx]);              // Solve D.x* = y
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -924,6 +946,7 @@ void CSysMatrix<ScalarType>::ComputeLU_SGSPreconditioner(const CSysVector<Scalar
       Gauss_Elimination(iPoint, &prod[idx]);            // Solve D.x* = y
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -1109,6 +1132,7 @@ void CSysMatrix<ScalarType>::ComputeLineletPreconditioner(const CSysVector<Scala
   for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++)
     if (!LineletBool[iPoint])
       MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]);
+  END_SU2_OMP_FOR
 
   /*--- Solve each linelet using the Thomas algorithm ---*/
 
@@ -1196,6 +1220,7 @@ void CSysMatrix<ScalarType>::ComputeLineletPreconditioner(const CSysVector<Scala
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -1214,6 +1239,7 @@ void CSysMatrix<ScalarType>::ComputeResidual(const CSysVector<ScalarType> & sol,
     RowProduct(sol, iPoint, aux_vec);
     VectorSubtraction(aux_vec, &f[iPoint*nVar], &res[iPoint*nVar]);
   }
+  END_SU2_OMP_FOR
 }
 
 template<class ScalarType>
@@ -1315,6 +1341,7 @@ void CSysMatrix<ScalarType>::SetDiagonalAsColumnSum() {
       if (block_ji != block_ii) MatrixSubtraction(block_ii, block_ji, block_ii);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 template<class ScalarType>
@@ -1328,11 +1355,13 @@ void CSysMatrix<ScalarType>::MatrixMatrixAddition(ScalarType alpha, const CSysMa
   if (!ok) {
     SU2_OMP_MASTER
     SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   SU2_OMP_FOR_STAT(omp_light_size)
   for (auto i = 0ul; i < nnz*nVar*nEqn; ++i)
     matrix[i] += alpha*B.matrix[i];
+  END_SU2_OMP_FOR
 
 }
 
@@ -1346,10 +1375,12 @@ void CSysMatrix<ScalarType>::BuildPastixPreconditioner(CGeometry *geometry, cons
     pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix);
     pastix_wrapper.Factorize(geometry, config, kind_fact, transposed);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 #else
   SU2_OMP_MASTER
   SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
+  END_SU2_OMP_MASTER
 #endif
 }
 
@@ -1360,6 +1391,7 @@ void CSysMatrix<ScalarType>::ComputePastixPreconditioner(const CSysVector<Scalar
   SU2_OMP_BARRIER
   SU2_OMP_MASTER
   pastix_wrapper.Solve(vec,prod);
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   CSysMatrixComms::Initiate(prod, geometry, config, SOLUTION_MATRIX);
@@ -1367,6 +1399,7 @@ void CSysMatrix<ScalarType>::ComputePastixPreconditioner(const CSysVector<Scalar
 #else
   SU2_OMP_MASTER
   SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
+  END_SU2_OMP_MASTER
 #endif
 }
 
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 0f68ce46084..c9a3396480a 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -132,6 +132,7 @@ void CSysSolve<ScalarType>::ModGramSchmidt(int i, su2matrix<ScalarType>& Hsbg,
     /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/
     SU2_OMP_MASTER
     SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   /*--- Begin main Gram-Schmidt loop ---*/
@@ -211,6 +212,7 @@ unsigned long CSysSolve<ScalarType>::CG_LinSolver(const CSysVector<ScalarType> &
   if (m < 1) {
     SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate if not allocated yet, only one thread can
@@ -230,6 +232,7 @@ unsigned long CSysSolve<ScalarType>::CG_LinSolver(const CSysVector<ScalarType> &
 
       cg_ready = true;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -351,11 +354,13 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
   if (m < 1) {
     SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   if (m > 5000) {
     SU2_OMP_MASTER
     SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate if not allocated yet ---*/
@@ -368,6 +373,7 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
       for (auto& w : W) w.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
       for (auto& z : Z) z.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -513,6 +519,7 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
   if (m < 1) {
     SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate if not allocated yet ---*/
@@ -533,6 +540,7 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
 
       bcg_ready = true;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -677,6 +685,7 @@ unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarT
   if (m < 1) {
     SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
+    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate vectors for residual (r), solution increment (z), and matrix-vector
@@ -695,6 +704,7 @@ unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarT
 
       smooth_ready = true;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -911,6 +921,7 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
     Residual = residual;
     Iterations = IterLinSol;
   }
+  END_SU2_OMP_MASTER
 
   HandleTemporariesOut(LinSysSol);
 
@@ -942,6 +953,7 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
       AD::FuncHelper->addUserData(config);
       AD::FuncHelper->addUserData(this);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
@@ -1073,6 +1085,7 @@ unsigned long CSysSolve<ScalarType>::Solve_b(CSysMatrix<ScalarType> & Jacobian,
 
   SU2_OMP_MASTER
   Iterations = IterLinSol;
+  END_SU2_OMP_MASTER
 
   return IterLinSol;
 
diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp
index 39eb711d3ad..33f4ae14c78 100644
--- a/Common/src/linear_algebra/CSysSolve_b.cpp
+++ b/Common/src/linear_algebra/CSysSolve_b.cpp
@@ -64,6 +64,7 @@ void CSysSolve_b<ScalarType>::Solve_b(const codi::RealReverse::Real* x, codi::Re
       (*LinSysSol_b)[i] = 0.0;
     }
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config);
@@ -75,6 +76,7 @@ void CSysSolve_b<ScalarType>::Solve_b(const codi::RealReverse::Real* x, codi::Re
       x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i));
     }
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 }
 
diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
index 0233daa886c..1522b49f208 100644
--- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
@@ -127,6 +127,7 @@ void computeGradientsGreenGauss(CSolver* solver,
 
     AD::EndPreacc();
   }
+  END_SU2_OMP_FOR
 
   /*--- Add boundary fluxes. ---*/
 
@@ -160,6 +161,7 @@ void computeGradientsGreenGauss(CSolver* solver,
             gradient(iPoint, iVar, iDim) -= flux * area[iDim];
         }
       }
+      END_SU2_OMP_FOR
     }
   }
 
diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
index 6cf2e739a7b..bd32f9b9542 100644
--- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
@@ -284,6 +284,7 @@ void computeGradientsLeastSquares(CSolver* solver,
       solveLeastSquares<nDim, false>(iPoint, varBegin, varEnd, Rmatrix, gradient);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Correct the gradient values across any periodic boundaries. ---*/
 
@@ -300,6 +301,7 @@ void computeGradientsLeastSquares(CSolver* solver,
     SU2_OMP_FOR_DYN(chunkSize)
     for (size_t iPoint = 0; iPoint < nPointDomain; ++iPoint)
       solveLeastSquares<nDim, true>(iPoint, varBegin, varEnd, Rmatrix, gradient);
+    END_SU2_OMP_FOR
   }
 
   /*--- If no solver was provided we do not communicate ---*/
diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp
index 4450a71898c..3f2582aae84 100644
--- a/SU2_CFD/include/integration/CNewtonIntegration.hpp
+++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp
@@ -116,6 +116,7 @@ class CNewtonIntegration final : public CIntegration {
   inline void SetSolutionResult(CSysVector<T>& x) const {
     CNEWTON_PARFOR
     for (auto i = 0ul; i < x.GetLocSize(); ++i) x[i] = LinSysSol[i];
+    END_CNEWTON_PARFOR
   }
 
   /*--- Preconditioner objects for each active solver. ---*/
@@ -129,11 +130,13 @@ class CNewtonIntegration final : public CIntegration {
                                            unsigned long iters, Scalar& eps) const {
     CNEWTON_PARFOR
     for (auto i = 0ul; i < u.GetLocSize(); ++i) precondIn[i] = u[i];
+    END_CNEWTON_PARFOR
 
     iters = Preconditioner_impl(precondIn, precondOut, iters, eps);
 
     CNEWTON_PARFOR
     for (auto i = 0ul; i < u.GetLocSize(); ++i) v[i] = precondOut[i];
+    END_CNEWTON_PARFOR
     SU2_OMP_BARRIER
 
     return iters;
diff --git a/SU2_CFD/include/limiters/CLimiterDetails.hpp b/SU2_CFD/include/limiters/CLimiterDetails.hpp
index d605c668b83..3cfa4f3febf 100644
--- a/SU2_CFD/include/limiters/CLimiterDetails.hpp
+++ b/SU2_CFD/include/limiters/CLimiterDetails.hpp
@@ -177,6 +177,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
       sharedMin.resize(varEnd) = largeNum;
       sharedMax.resize(varEnd) =-largeNum;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Per thread reduction. ---*/
@@ -194,6 +195,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
         localMax(iVar) = max(localMax(iVar), field(iPoint, iVar));
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Per rank reduction. ---*/
 
@@ -203,6 +205,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
       sharedMin(iVar) = min(sharedMin(iVar), localMin(iVar));
       sharedMax(iVar) = max(sharedMax(iVar), localMax(iVar));
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     /*--- Global reduction. ---*/
@@ -215,6 +218,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
       localMax = sharedMax;
       SU2_MPI::Allreduce(localMax.data(), sharedMax.data(), varEnd, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Compute eps^2 (each thread has its own copy of it). ---*/
diff --git a/SU2_CFD/include/limiters/computeLimiters.hpp b/SU2_CFD/include/limiters/computeLimiters.hpp
index a54832af6b9..f7d55cef0e7 100644
--- a/SU2_CFD/include/limiters/computeLimiters.hpp
+++ b/SU2_CFD/include/limiters/computeLimiters.hpp
@@ -68,6 +68,7 @@ if (geometry.GetnDim() == 2) {\
       for(size_t iPoint = 0; iPoint < geometry.GetnPoint(); ++iPoint)
         for(size_t iVar = varBegin; iVar < varEnd; ++iVar)
          limiter(iPoint, iVar) = 1.0;
+      END_SU2_OMP_FOR
       break;
     }
     case BARTH_JESPERSEN:
diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp
index ae43d10e07a..2a68406a1f8 100644
--- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp
+++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp
@@ -115,6 +115,7 @@ void computeLimiters_impl(CSolver* solver,
     for (size_t iPoint = 0; iPoint < nPoint; ++iPoint)
       for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
         fieldMax(iPoint,iVar) = fieldMin(iPoint,iVar) = field(iPoint,iVar);
+    END_SU2_OMP_FOR
 
     for (size_t iPeriodic = 1; iPeriodic <= config.GetnMarker_Periodic()/2; ++iPeriodic)
     {
@@ -215,6 +216,7 @@ void computeLimiters_impl(CSolver* solver,
 
     AD::EndPreacc();
   }
+  END_SU2_OMP_FOR
 
   /*--- Account for periodic effects, take the minimum limiter on each periodic pair. ---*/
   if (periodic)
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
index 251f494f8ed..dde3885682b 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
@@ -324,6 +324,7 @@ class CFVMFlowSolverBase : public CSolver {
       Max_Delta_Time = 0.0;
       Global_Delta_UnstTimeND = 1e30;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Loop domain points. ---*/
@@ -377,6 +378,7 @@ class CFVMFlowSolverBase : public CSolver {
       }
 
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop boundary edges ---*/
 
@@ -419,6 +421,7 @@ class CFVMFlowSolverBase : public CSolver {
           Lambda = lambdaVisc(*nodes,iPoint) * Area2;
           nodes->AddMax_Lambda_Visc(iPoint, Lambda);
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -449,6 +452,7 @@ class CFVMFlowSolverBase : public CSolver {
           nodes->SetDelta_Time(iPoint,0.0);
         }
       }
+      END_SU2_OMP_FOR
       /*--- Min/max over threads. ---*/
       SU2_OMP_CRITICAL
       {
@@ -456,6 +460,7 @@ class CFVMFlowSolverBase : public CSolver {
         Max_Delta_Time = max(Max_Delta_Time, maxDt);
         Global_Delta_Time = Min_Delta_Time;
       }
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
     }
 
@@ -470,6 +475,7 @@ class CFVMFlowSolverBase : public CSolver {
       SU2_MPI::Allreduce(&Max_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
       Max_Delta_Time = rbuf_time;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- For exact time solution use the minimum delta time of the whole mesh. ---*/
@@ -490,6 +496,7 @@ class CFVMFlowSolverBase : public CSolver {
 
         config->SetDelta_UnstTimeND(Global_Delta_Time);
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
 
       /*--- Sets the regular CFL equal to the unsteady CFL. ---*/
@@ -499,6 +506,7 @@ class CFVMFlowSolverBase : public CSolver {
         nodes->SetLocalCFL(iPoint, config->GetUnst_CFL());
         nodes->SetDelta_Time(iPoint, Global_Delta_Time);
       }
+      END_SU2_OMP_FOR
 
     }
 
@@ -513,8 +521,10 @@ class CFVMFlowSolverBase : public CSolver {
       for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
         glbDtND = min(glbDtND, config->GetUnst_CFL()*Global_Delta_Time / nodes->GetLocalCFL(iPoint));
       }
+      END_SU2_OMP_FOR
       SU2_OMP_CRITICAL
       Global_Delta_UnstTimeND = min(Global_Delta_UnstTimeND, glbDtND);
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
 
       SU2_OMP_MASTER
@@ -524,6 +534,7 @@ class CFVMFlowSolverBase : public CSolver {
 
         config->SetDelta_UnstTimeND(Global_Delta_UnstTimeND);
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
@@ -535,6 +546,7 @@ class CFVMFlowSolverBase : public CSolver {
         su2double dt = min((2.0/3.0)*config->GetDelta_UnstTimeND(), nodes->GetDelta_Time(iPoint));
         nodes->SetDelta_Time(iPoint, dt);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -585,6 +597,7 @@ class CFVMFlowSolverBase : public CSolver {
         nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed);
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop boundary edges ---*/
 
@@ -619,6 +632,7 @@ class CFVMFlowSolverBase : public CSolver {
 
           nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed);
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -681,6 +695,7 @@ class CFVMFlowSolverBase : public CSolver {
         nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]);
       }
     }
+    END_SU2_OMP_FOR
 
     if (isPeriodic) {
       /*--- Correct the sensor values across any periodic boundaries. ---*/
@@ -695,6 +710,7 @@ class CFVMFlowSolverBase : public CSolver {
       SU2_OMP_FOR_STAT(omp_chunk_size)
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
         nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]);
+      END_SU2_OMP_FOR
     }
 
     /*--- MPI parallelization ---*/
@@ -735,6 +751,7 @@ class CFVMFlowSolverBase : public CSolver {
       SetRes_RMS(iVar, 0.0);
       SetRes_Max(iVar, 0.0, 0);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0};
@@ -797,12 +814,14 @@ class CFVMFlowSolverBase : public CSolver {
           }
         }
       }
+      END_SU2_OMP_FOR
       /*--- Reduce residual information over all threads in this rank. ---*/
       SU2_OMP_CRITICAL
       for (unsigned short iVar = 0; iVar < nVar; iVar++) {
         AddRes_RMS(iVar, resRMS[iVar]);
         AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
       }
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
     }
 
@@ -821,6 +840,7 @@ class CFVMFlowSolverBase : public CSolver {
 
         ComputeVerificationError(geometry, config);
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
@@ -860,6 +880,7 @@ class CFVMFlowSolverBase : public CSolver {
       SetRes_RMS(iVar, 0.0);
       SetRes_Max(iVar, 0.0, 0);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0};
@@ -889,6 +910,7 @@ class CFVMFlowSolverBase : public CSolver {
           Jacobian.SetVal2Diag(iPoint, 1.0);
         }
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Right hand side of the system (-Residual) and initial guess (x = 0) ---*/
@@ -921,16 +943,19 @@ class CFVMFlowSolverBase : public CSolver {
         }
       }
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     for (unsigned short iVar = 0; iVar < nVar; iVar++) {
       AddRes_RMS(iVar, resRMS[iVar]);
       AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     /*--- Compute the root mean square residual ---*/
     SU2_OMP_MASTER
     SetResidual_RMS(geometry, config);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -952,6 +977,7 @@ class CFVMFlowSolverBase : public CSolver {
           nodes->AddSolution(iPoint, iVar, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint*nVar+iVar]);
         }
       }
+      END_SU2_OMP_FOR
     }
 
     for (unsigned short iPeriodic = 1; iPeriodic <= config->GetnMarker_Periodic()/2; iPeriodic++) {
@@ -965,6 +991,7 @@ class CFVMFlowSolverBase : public CSolver {
     /*--- For verification cases, compute the global error metrics. ---*/
     SU2_OMP_MASTER
     ComputeVerificationError(geometry, config);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -982,6 +1009,7 @@ class CFVMFlowSolverBase : public CSolver {
       StrainMag_Max = 0.0;
       Omega_Max = 0.0;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     su2double strainMax = 0.0, omegaMax = 0.0;
@@ -1045,12 +1073,14 @@ class CFVMFlowSolverBase : public CSolver {
 
       AD::EndPreacc();
     }
+    END_SU2_OMP_FOR
 
     if ((iMesh == MESH_0) && (config.GetComm_Level() == COMM_FULL)) {
       SU2_OMP_CRITICAL {
         StrainMag_Max = max(StrainMag_Max, strainMax);
         Omega_Max = max(Omega_Max, omegaMax);
       }
+      END_SU2_OMP_CRITICAL
 
       SU2_OMP_BARRIER
       SU2_OMP_MASTER {
@@ -1060,6 +1090,7 @@ class CFVMFlowSolverBase : public CSolver {
         SU2_MPI::Allreduce(&MyStrainMag_Max, &StrainMag_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
         SU2_MPI::Allreduce(&MyOmega_Max, &Omega_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
index adaba33241d..d62706df237 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -572,6 +572,7 @@ void CFVMFlowSolverBase<V, R>::ComputeUnderRelaxationFactor(const CConfig* confi
 
     nodes->SetUnderRelaxation(iPoint, localUnderRelaxation);
   }
+  END_SU2_OMP_FOR
 }
 
 template <class V, ENUM_REGIME R>
@@ -586,6 +587,7 @@ void CFVMFlowSolverBase<V, R>::ImplicitEuler_Iteration(CGeometry *geometry, CSol
     LinSysRes.SetBlock_Zero(iPoint);
     LinSysSol.SetBlock_Zero(iPoint);
   }
+  END_SU2_OMP_FOR
 
   auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config);
 
@@ -593,6 +595,7 @@ void CFVMFlowSolverBase<V, R>::ImplicitEuler_Iteration(CGeometry *geometry, CSol
     SetIterLinSolver(iter);
     SetResLinSolver(System.GetResidual());
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   CompleteImplicitIteration(geometry, nullptr, config);
@@ -848,7 +851,8 @@ void CFVMFlowSolverBase<V, R>::LoadRestart_impl(CGeometry **geometry, CSolver **
     SU2_MPI::Error(string("The solution file ") + restart_filename + string(" doesn't match with the mesh file!\n") +
                    string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
   }
-  } // end SU2_OMP_MASTER
+  }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- Update the geometry for flows on deforming meshes ---*/
@@ -918,6 +922,7 @@ void CFVMFlowSolverBase<V, R>::LoadRestart_impl(CGeometry **geometry, CSolver **
       }
       solver[iMesh][FLOW_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse);
     }
+    END_SU2_OMP_FOR
 
     solver[iMesh][FLOW_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION);
     solver[iMesh][FLOW_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION);
@@ -941,7 +946,8 @@ void CFVMFlowSolverBase<V, R>::LoadRestart_impl(CGeometry **geometry, CSolver **
   delete [] Restart_Vars; Restart_Vars = nullptr;
   delete [] Restart_Data; Restart_Data = nullptr;
 
-  } // end SU2_OMP_MASTER
+  }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
@@ -987,6 +993,7 @@ void CFVMFlowSolverBase<V, R>::SetInitialCondition(CGeometry **geometry, CSolver
            but this is not necessary. */
         VerificationSolution->GetInitialCondition(coor, solDOF);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -996,7 +1003,8 @@ void CFVMFlowSolverBase<V, R>::SetInitialCondition(CGeometry **geometry, CSolver
     PushSolutionBackInTime(TimeIter, restart, rans, solver_container, geometry, config);
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1281,6 +1289,7 @@ void CFVMFlowSolverBase<V, R>::BC_Sym_Plane(CGeometry* geometry, CSolver** solve
       }  // if viscous
     }    // if GetDomain
   }      // for iVertex
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   for (iVar = 0; iVar < nPrimVarGrad; iVar++) delete[] Grad_Reflected[iVar];
@@ -1462,6 +1471,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::BC_Fluid_Interface(CGeometry* geometry,
           }
         }
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -1524,6 +1534,7 @@ void CFVMFlowSolverBase<V, R>::BC_Custom(CGeometry* geometry, CSolver** solver_c
         }
       }
     }
+    END_SU2_OMP_FOR
 
   } else {
     /* The user must specify the custom BC's here. */
@@ -1558,6 +1569,7 @@ void CFVMFlowSolverBase<V, R>::EdgeFluxResidual(const CGeometry *geometry,
         edgeNumerics->ComputeFlux(iEdge, *config, *geometry, *nodes, UpdateType::COLORING, mask, LinSysRes, Jacobian);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   if (ReducerStrategy) {
@@ -1583,6 +1595,7 @@ void CFVMFlowSolverBase<V, R>::SumEdgeFluxes(const CGeometry* geometry) {
         LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge));
     }
   }
+  END_SU2_OMP_FOR
 }
 
 template <class V, ENUM_REGIME FlowRegime>
@@ -1646,6 +1659,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
         if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep));
       }
     }
+    END_SU2_OMP_FOR
 
   }
 
@@ -1684,6 +1698,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
           LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over the boundary edges ---*/
 
@@ -1716,6 +1731,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
           for (iVar = 0; iVar < nVar; iVar++)
             LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -1758,6 +1774,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
         if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep));
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
diff --git a/SU2_CFD/include/solvers/CTurbSASolver.hpp b/SU2_CFD/include/solvers/CTurbSASolver.hpp
index 320cc555715..2cca80813c0 100644
--- a/SU2_CFD/include/solvers/CTurbSASolver.hpp
+++ b/SU2_CFD/include/solvers/CTurbSASolver.hpp
@@ -373,6 +373,7 @@ class CTurbSASolver final : public CTurbSolver {
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       nodes->SetSolution(iPoint, 0, nu_tilde_Inf);
+    END_SU2_OMP_FOR
   }
 
   /*!
diff --git a/SU2_CFD/include/solvers/CTurbSSTSolver.hpp b/SU2_CFD/include/solvers/CTurbSSTSolver.hpp
index 7d38a8050aa..8fe2dd73401 100644
--- a/SU2_CFD/include/solvers/CTurbSSTSolver.hpp
+++ b/SU2_CFD/include/solvers/CTurbSSTSolver.hpp
@@ -245,6 +245,7 @@ class CTurbSSTSolver final : public CTurbSolver {
       nodes->SetSolution(iPoint, 0, kine_Inf);
       nodes->SetSolution(iPoint, 1, omega_Inf);
     }
+    END_SU2_OMP_FOR
   }
 
   /*!
diff --git a/SU2_CFD/src/drivers/CDriver.cpp b/SU2_CFD/src/drivers/CDriver.cpp
index 32b0e871c8e..7b7e35042c7 100644
--- a/SU2_CFD/src/drivers/CDriver.cpp
+++ b/SU2_CFD/src/drivers/CDriver.cpp
@@ -815,6 +815,7 @@ void CDriver::Geometrical_Preprocessing_FVM(CConfig *config, CGeometry **&geomet
     geometry[MESH_0]->SetControlVolume(config, ALLOCATE);
     geometry[MESH_0]->SetBoundControlVolume(config, ALLOCATE);
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Visualize a dual control volume if requested ---*/
 
@@ -1280,6 +1281,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry,
     if (euler || ns) {
       SU2_OMP_PARALLEL_(if(solver[MESH_0][FLOW_SOL]->GetHasHybridParallel()))
       solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
+      END_SU2_OMP_PARALLEL
     }
     if (NEMO_euler || NEMO_ns) {
       solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
@@ -1287,6 +1289,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry,
     if (turbulent) {
       SU2_OMP_PARALLEL_(if(solver[MESH_0][TURB_SOL]->GetHasHybridParallel()))
       solver[MESH_0][TURB_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
+      END_SU2_OMP_PARALLEL
     }
     if (config->AddRadiation()) {
       solver[MESH_0][RAD_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
@@ -1598,6 +1601,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Convective scheme not implemented (template_solver).", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -1624,6 +1628,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_CONVECTIVE :
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
 
       case SPACE_CENTERED :
@@ -1643,6 +1648,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid centered scheme or not implemented.\n Currently, only JST and LAX-FRIEDRICH are available for incompressible flows.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
           for (iMGlevel = 1; iMGlevel <= config->GetnMGLevels(); iMGlevel++)
@@ -1761,6 +1767,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
 
@@ -1777,6 +1784,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid upwind scheme or not implemented.\n Currently, only FDS is available for incompressible flows.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
         }
@@ -1785,6 +1793,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the Euler / Navier-Stokes equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -1885,6 +1894,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_CONVECTIVE :
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
 
       case SPACE_CENTERED :
@@ -1895,6 +1905,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
             SU2_OMP_MASTER
             SU2_MPI::Error("Invalid centered scheme or not implemented.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
           }
 
@@ -1948,6 +1959,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
 
@@ -1957,6 +1969,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the NEMO Euler / Navier-Stokes equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2028,6 +2041,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Riemann solver not implemented.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2043,6 +2057,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_UPWIND:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
       case SPACE_UPWIND :
         for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) {
@@ -2055,6 +2070,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the turbulence equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2106,6 +2122,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_UPWIND:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
       case SPACE_UPWIND:
         for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) {
@@ -2115,6 +2132,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the transition equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2159,6 +2177,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
         default:
           SU2_OMP_MASTER
           SU2_MPI::Error("Invalid convective scheme for the heat transfer equations.", CURRENT_FUNCTION);
+          END_SU2_OMP_MASTER
           break;
       }
     }
@@ -2184,6 +2203,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
     if (incompressible)
       SU2_OMP_MASTER
       SU2_MPI::Error("Convective schemes not implemented for incompressible continuous adjoint.", CURRENT_FUNCTION);
+      END_SU2_OMP_MASTER
 
     /*--- Definition of the convective scheme for each equation and mesh level ---*/
 
@@ -2191,6 +2211,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_CONVECTIVE:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJFLOW option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
 
       case SPACE_CENTERED :
@@ -2205,6 +2226,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Centered scheme not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
 
@@ -2233,6 +2255,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Upwind scheme not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
         }
@@ -2241,6 +2264,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the continuous adjoint Euler / Navier-Stokes equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2304,12 +2328,14 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
     if (!spalart_allmaras)
       SU2_OMP_MASTER
       SU2_MPI::Error("Only the SA turbulence model can be used with the continuous adjoint solver.", CURRENT_FUNCTION);
+      END_SU2_OMP_MASTER
 
     /*--- Definition of the convective scheme for each equation and mesh level ---*/
     switch (config->GetKind_ConvNumScheme_AdjTurb()) {
       case NO_CONVECTIVE:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJTURB option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
       case SPACE_UPWIND :
         for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++)
@@ -2318,6 +2344,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Convective scheme not implemented (adjoint turbulence).", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2350,10 +2377,12 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
           case NEO_HOOKEAN:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
           default:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
         }
         break;
@@ -2362,6 +2391,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
           case LINEAR_ELASTIC:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
           case NEO_HOOKEAN:
             if (config->GetMaterialCompressibility() == COMPRESSIBLE_MAT) {
@@ -2369,6 +2399,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             } else {
               SU2_OMP_MASTER
               SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
             }
             break;
           case KNOWLES:
@@ -2377,6 +2408,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             } else {
               SU2_OMP_MASTER
               SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
             }
             break;
           case IDEAL_DE:
@@ -2385,17 +2417,20 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             } else {
               SU2_OMP_MASTER
               SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
             }
             break;
           default:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
         }
         break;
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Solver not implemented.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp
index 729eb41958b..5c6d7647ede 100644
--- a/SU2_CFD/src/integration/CIntegration.cpp
+++ b/SU2_CFD/src/integration/CIntegration.cpp
@@ -223,7 +223,8 @@ void CIntegration::SetDualTime_Geometry(CGeometry *geometry, CSolver *mesh_solve
 
   if ((iMesh==MESH_0) && config->GetDeform_Mesh()) mesh_solver->SetDualTime_Mesh();
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver, const CConfig *config, unsigned short iMesh) {
@@ -236,6 +237,7 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver
 
   SU2_OMP_MASTER
   solver->ResetCFLAdapt();
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPoint(), omp_get_num_threads()))
@@ -247,6 +249,8 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver
     /*--- Initialize the local CFL number ---*/
     solver->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(iMesh));
   }
+  END_SU2_OMP_FOR
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
index 3441fe82737..d5062c8c0cc 100644
--- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp
+++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
@@ -94,6 +94,7 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry,
 
     SU2_OMP_MASTER
     config[iZone]->SubtractFinestMesh();
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -120,7 +121,8 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry,
                             numerics_container[iZone][iInst], config[iZone],
                             FinestMesh, RunTime_EqSystem, &monitor);
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -334,6 +336,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
     for (iVar = 0; iVar < nVar; iVar++)
       sol_coarse->GetNodes()->SetSolution_Old(Point_Coarse,Solution);
   }
+  END_SU2_OMP_FOR
 
   delete [] Solution;
 
@@ -354,6 +357,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
         sol_coarse->GetNodes()->SetVelocity_Old(Point_Coarse, zero);
 
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -369,6 +373,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
       sol_fine->LinSysRes.SetBlock(Point_Fine, sol_coarse->GetNodes()->GetSolution_Old(Point_Coarse));
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -389,6 +394,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
     Residual_Old = solver->LinSysRes.GetBlock(iPoint);
     solver->GetNodes()->SetResidual_Old(iPoint,Residual_Old);
   }
+  END_SU2_OMP_FOR
 
   /*--- Jacobi iterations. ---*/
 
@@ -408,6 +414,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
       }
 
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over all mesh points (update residuals with the neighbor averages). ---*/
 
@@ -422,6 +429,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
       for (iVar = 0; iVar < nVar; iVar++)
         solver->LinSysRes(iPoint,iVar) = (Residual_Old[iVar] + val_smooth_coeff*Residual_Sum[iVar])*factor;
     }
+    END_SU2_OMP_FOR
 
     /*--- Restore original residuals (without average) at boundary points. ---*/
 
@@ -435,6 +443,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
           Residual_Old = solver->GetNodes()->GetResidual_Old(iPoint);
           solver->LinSysRes.SetBlock(iPoint, Residual_Old);
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -462,6 +471,7 @@ void CMultiGridIntegration::SetProlongated_Correction(CSolver *sol_fine, CGeomet
       Solution_Fine[iVar] += factor*Residual_Fine[iVar];
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI the new interpolated solution ---*/
 
@@ -482,6 +492,7 @@ void CMultiGridIntegration::SetProlongated_Solution(unsigned short RunTime_EqSys
       sol_fine->GetNodes()->SetSolution(Point_Fine, sol_coarse->GetNodes()->GetSolution(Point_Coarse));
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coarse, CGeometry *geo_fine,
@@ -511,6 +522,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
     }
     sol_coarse->GetNodes()->AddRes_TruncError(Point_Coarse, Residual);
   }
+  END_SU2_OMP_FOR
 
   delete [] Residual;
 
@@ -521,6 +533,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
         Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode();
         sol_coarse->GetNodes()->SetVel_ResTruncError_Zero(Point_Coarse);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -528,6 +541,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
   for (Point_Coarse = 0; Point_Coarse < geo_coarse->GetnPointDomain(); Point_Coarse++) {
     sol_coarse->GetNodes()->SubtractRes_TruncError(Point_Coarse, sol_coarse->LinSysRes.GetBlock(Point_Coarse));
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -536,6 +550,7 @@ void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solve
   SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++)
     solver->LinSysRes.AddBlock(iPoint, solver->GetNodes()->GetResTruncError(iPoint));
+  END_SU2_OMP_FOR
 
 }
 
@@ -575,6 +590,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst
     sol_coarse->GetNodes()->SetSolution(Point_Coarse, Solution);
 
   }
+  END_SU2_OMP_FOR
 
   delete [] Solution;
 
@@ -609,6 +625,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst
         }
 
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -652,6 +669,7 @@ void CMultiGridIntegration::SetRestricted_Gradient(unsigned short RunTime_EqSyst
     }
     sol_coarse->GetNodes()->SetGradient(Point_Coarse,Gradient);
   }
+  END_SU2_OMP_FOR
 
   for (iVar = 0; iVar < nVar; iVar++)
     delete [] Gradient[iVar];
@@ -693,6 +711,7 @@ void CMultiGridIntegration::NonDimensional_Parameters(CGeometry **geometry, CSol
                                                    numerics_container[FinestMesh][ADJFLOW_SOL][CONV_BOUND_TERM], config);
       break;
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
@@ -719,6 +738,7 @@ void CMultiGridIntegration::Adjoint_Setup(CGeometry ****geometry, CSolver *****s
       solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CT(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CT());
       solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CQ(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CQ());
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Restrict solution and gradients to the coarse levels ---*/
diff --git a/SU2_CFD/src/integration/CNewtonIntegration.cpp b/SU2_CFD/src/integration/CNewtonIntegration.cpp
index a9f3a804c61..c8d9c56d4e3 100644
--- a/SU2_CFD/src/integration/CNewtonIntegration.cpp
+++ b/SU2_CFD/src/integration/CNewtonIntegration.cpp
@@ -131,6 +131,7 @@ void CNewtonIntegration::PerturbSolution(const CSysVector<Scalar>& dir, Scalar m
     for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar)
       solvers[FLOW_SOL]->GetNodes()->AddSolution(iPoint,iVar, mag*dir(iPoint,iVar));
   }
+  END_SU2_OMP_FOR
 }
 
 void CNewtonIntegration::ComputeResiduals(ResEvalType type) {
@@ -140,6 +141,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) {
   if (type == EXPLICIT) {
     SU2_OMP_MASTER
     config->SetKind_TimeIntScheme(EULER_EXPLICIT);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -151,6 +153,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) {
   if (type == EXPLICIT) {
     SU2_OMP_MASTER
     config->SetKind_TimeIntScheme(TimeIntScheme);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -163,11 +166,13 @@ void CNewtonIntegration::ComputeFinDiffStep() {
 
   SU2_OMP_MASTER
   rmsSol = 0.0;
+  END_SU2_OMP_MASTER
 
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint)
     for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar)
       rmsSol_loc += pow(solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint,iVar), 2);
+  END_SU2_OMP_FOR
 
   atomicAdd(rmsSol_loc, rmsSol);
 
@@ -177,6 +182,7 @@ void CNewtonIntegration::ComputeFinDiffStep() {
     SU2_MPI::Allreduce(&t, &rmsSol, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
     finDiffStep = finDiffStepND * max(1.0, sqrt(SU2_TYPE::GetValue(rmsSol) / geometry->GetGlobal_nPointDomain()));
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
@@ -212,6 +218,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto i = 0ul; i < LinSysRes.GetNElmDomain(); ++i)
     LinSysRes[i] = SU2_TYPE::GetValue(solvers[FLOW_SOL]->LinSysRes[i]);
+  END_SU2_OMP_FOR
 
   su2double residual = 0.0;
   for (auto iVar = 0ul; iVar < LinSysRes.GetNVar(); ++iVar)
@@ -226,6 +233,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
       firstResidual = max(firstResidual, residual);
       if (startupIters) startupIters -= 1;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
     endStartup = (startupIters == 0) && (residual - firstResidual < startupResidual);
   }
@@ -237,6 +245,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
   if (!startupPeriod && tolRelaxFactor > 1 && fullTolResidual < 0.0) {
     SU2_OMP_MASTER
     firstResidual = max(firstResidual, residual);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
     su2double x = (residual - firstResidual) / fullTolResidual;
     toleranceFactor = 1.0 + (tolRelaxFactor-1)*max(0.0, 1.0-SU2_TYPE::GetValue(x));
@@ -267,6 +276,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
     solvers[FLOW_SOL]->SetIterLinSolver(iter);
     solvers[FLOW_SOL]->SetResLinSolver(eps);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /// TODO: Clever back-tracking and CFL adaptation based on residual reduction.
@@ -286,6 +296,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
     solvers[FLOW_SOL]->Momentum_Forces(geometry, config);
     solvers[FLOW_SOL]->Friction_Forces(geometry, config);
   }
+  END_SU2_OMP_MASTER
 
   /*--- At the end of the startup period the CFL is reset to the initial value. ---*/
 
@@ -294,12 +305,15 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
       startupPeriod = false;
       firstResidual = residual;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < geometry->GetnPoint(); ++iPoint)
       solvers[FLOW_SOL]->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(MESH_0));
+    END_SU2_OMP_FOR
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CNewtonIntegration::MatrixFreeProduct(const CSysVector<Scalar>& u, CSysVector<Scalar>& v) {
@@ -328,6 +342,7 @@ void CNewtonIntegration::MatrixFreeProduct(const CSysVector<Scalar>& u, CSysVect
       v(iPoint,iVar) += SU2_TYPE::GetValue(delta) * u(iPoint,iVar);
     }
   }
+  END_SU2_OMP_FOR
 
   CSysMatrixComms::Initiate(v, geometry, config);
   CSysMatrixComms::Complete(v, geometry, config);
@@ -350,6 +365,7 @@ void CNewtonIntegration::Preconditioner(const CSysVector<Scalar>& u, CSysVector<
       for (auto iVar = 0ul; iVar < u.GetNVar(); ++iVar)
         v(iPoint,iVar) = SU2_TYPE::GetValue(delta) * u(iPoint,iVar);
     }
+    END_SU2_OMP_FOR
 
     CSysMatrixComms::Initiate(v, geometry, config);
     CSysMatrixComms::Complete(v, geometry, config);
diff --git a/SU2_CFD/src/integration/CSingleGridIntegration.cpp b/SU2_CFD/src/integration/CSingleGridIntegration.cpp
index 88da5a71ef9..d2f0d4de49e 100644
--- a/SU2_CFD/src/integration/CSingleGridIntegration.cpp
+++ b/SU2_CFD/src/integration/CSingleGridIntegration.cpp
@@ -79,6 +79,7 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve
   if (RunTime_EqSystem == RUNTIME_HEAT_SYS) {
     SU2_OMP_MASTER
     solvers_fine[HEAT_SOL]->Heat_Fluxes(geometry_fine, solvers_fine, config[iZone]);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -105,8 +106,8 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve
 
   }
 
-  } // end SU2_OMP_PARALLEL
-
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSystem, CSolver *sol_fine, CSolver *sol_coarse,
@@ -141,6 +142,7 @@ void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSys
     sol_coarse->GetNodes()->SetSolution(Point_Coarse,Solution);
 
   }
+  END_SU2_OMP_FOR
 
   delete [] Solution;
 
@@ -177,6 +179,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys
     sol_coarse->GetNodes()->SetmuT(Point_Coarse,EddyVisc);
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Update solution at the no slip wall boundary, only the first
    variable (nu_tilde -in SA and SA_NEG- and k -in SST-), to guarantee that the eddy viscoisty
@@ -192,6 +195,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys
         Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode();
         sol_coarse->GetNodes()->SetmuT(Point_Coarse,0.0);
       }
+      END_SU2_OMP_FOR
     }
   }
 
diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
index 2842768efef..d4fe65a2822 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
@@ -369,7 +369,8 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
       break;
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   /*--- MPI dependencies. ---*/
 
diff --git a/SU2_CFD/src/iteration/CFluidIteration.cpp b/SU2_CFD/src/iteration/CFluidIteration.cpp
index aeaf77f9621..7f67cea5cc5 100644
--- a/SU2_CFD/src/iteration/CFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CFluidIteration.cpp
@@ -141,6 +141,7 @@ void CFluidIteration::Iterate(COutput* output, CIntegration**** integration, CGe
     SU2_OMP_PARALLEL
     solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->AdaptCFLNumber(geometry[val_iZone][val_iInst],
                                                                    solver[val_iZone][val_iInst], config[val_iZone]);
+    END_SU2_OMP_PARALLEL
   }
 
   /*--- Call Dynamic mesh update if AEROELASTIC motion was specified ---*/
diff --git a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp
index 4c47c6f3847..643bd7e5b94 100644
--- a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp
+++ b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp
@@ -333,6 +333,7 @@ void CFEAElasticity::ReadDV(const CConfig *config) {
   bool master_node = false;
   SU2_OMP_MASTER
   master_node = (rank == MASTER_NODE);
+  END_SU2_OMP_MASTER
 
   unsigned long index;
 
diff --git a/SU2_CFD/src/python_wrapper_structure.cpp b/SU2_CFD/src/python_wrapper_structure.cpp
index e324046844e..166a0c66c1a 100644
--- a/SU2_CFD/src/python_wrapper_structure.cpp
+++ b/SU2_CFD/src/python_wrapper_structure.cpp
@@ -602,6 +602,7 @@ void CSinglezoneDriver::SetInitialMesh() {
         /*--- Set the grid velocity for this coarse node. ---*/
         geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetGridVel(iPoint, Grid_Vel);
       }
+      END_SU2_OMP_FOR
       /*--- Push back the volume. ---*/
       geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_n();
       geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_nM1();
@@ -610,6 +611,7 @@ void CSinglezoneDriver::SetInitialMesh() {
     solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n();
     solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n1();
   }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDriver::BoundaryConditionsUpdate(){
diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp
index 7cd1a186cb1..038b4ebd70d 100644
--- a/SU2_CFD/src/solvers/CEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CEulerSolver.cpp
@@ -617,6 +617,7 @@ void CEulerSolver::InstantiateEdgeNumerics(const CSolver* const* solver_containe
                    "support vectorization.", CURRENT_FUNCTION);
 
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 }
 
@@ -1689,7 +1690,8 @@ void CEulerSolver::SetNondimensionalization(CConfig *config, unsigned short iMes
       GetFluidModel()->SetThermalConductivityModel(config);
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   Energy_FreeStreamND = GetFluidModel()->GetStaticEnergy() + 0.5*ModVel_FreeStreamND*ModVel_FreeStreamND;
 
@@ -2025,12 +2027,14 @@ void CEulerSolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_c
         }
 
       }
+      END_SU2_OMP_FOR
 
       FlowNodes->Set_OldSolution();
 
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2059,6 +2063,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
   if (fixed_cl && !disc_adjoint && !cont_adjoint) {
     SU2_OMP_MASTER
     SetFarfield_AoA(geometry, solver_container, config, iMesh, Output);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2066,6 +2071,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
 
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   SU2_OMP_ATOMIC
@@ -2079,6 +2085,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
       SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
       config->SetNonphysical_Points(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2087,6 +2094,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
   if (engine) {
     SU2_OMP_MASTER
     GetPower_Properties(geometry, config, iMesh, Output);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2099,6 +2107,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
       GetPower_Properties(geometry, config, iMesh, Output);
       SetActDisk_BCThrust(geometry, solver_container, config, iMesh, Output);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2107,6 +2116,7 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
   if (nearfield) {
     SU2_OMP_MASTER
     Set_MPI_Nearfield(geometry, config);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2192,6 +2202,7 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c
 
     if (!physical) nonPhysicalPoints++;
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 }
@@ -2276,6 +2287,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
   unsigned long counter_local = 0;
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
 
   /*--- Pick one numerics object per thread. ---*/
   CNumerics* numerics = numerics_container[CONV_TERM + omp_get_thread_num()*MAX_TERMS];
@@ -2465,6 +2477,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -2487,6 +2500,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
       SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
       config->SetNonphysical_Reconstr(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2591,6 +2605,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (rotating_frame) {
@@ -2621,6 +2636,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (axisymmetric) {
@@ -2697,6 +2713,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       if (implicit)
         Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
+    END_SU2_OMP_FOR
   }
 
   if (gravity) {
@@ -2718,6 +2735,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
 
   }
 
@@ -2735,6 +2753,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
         LinSysRes(iPoint,iVar) += Volume * nodes->GetHarmonicBalance_Source(iPoint,iVar);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   if (windgust) {
@@ -2765,6 +2784,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Check if a verification solution is to be computed. ---*/
@@ -2795,6 +2815,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
           LinSysRes(iPoint,iVar) -= sourceMan[iVar]*Volume;
         }
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -2865,6 +2886,7 @@ void CEulerSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *co
       nodes->AddUnd_Lapl(iPoint, nVar-1, Pressure_j-Pressure_i);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Correct the Laplacian across any periodic boundaries. ---*/
 
@@ -2937,6 +2959,7 @@ void CEulerSolver::SetUpwind_Ducros_Sensor(CGeometry *geometry, CConfig *config)
 
     nodes->SetSensor(iPoint, Ducros_i);
   }
+  END_SU2_OMP_FOR
 
   InitiateComms(geometry, config, SENSOR);
   CompleteComms(geometry, config, SENSOR);
@@ -5117,6 +5140,7 @@ void CEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -5608,6 +5632,7 @@ void CEulerSolver::BC_Riemann(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -6124,7 +6149,8 @@ void CEulerSolver::BC_TurboRiemann(CGeometry *geometry, CSolver **solver_contain
         }
       }
     }
-}
+    END_SU2_OMP_FOR
+  }
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -7024,6 +7050,7 @@ void CEulerSolver::BC_Giles(CGeometry *geometry, CSolver **solver_container, CNu
       }
 
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Free locally allocated memory ---*/
@@ -7349,6 +7376,7 @@ void CEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -7525,6 +7553,7 @@ void CEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -7672,6 +7701,7 @@ void CEulerSolver::BC_Supersonic_Inlet(CGeometry *geometry, CSolver **solver_con
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -7797,6 +7827,7 @@ void CEulerSolver::BC_Supersonic_Outlet(CGeometry *geometry, CSolver **solver_co
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -8021,6 +8052,7 @@ void CEulerSolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_contai
 
     }
   }
+  END_SU2_OMP_FOR
 
   delete [] Normal;
 
@@ -8275,6 +8307,7 @@ void CEulerSolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_conta
 
     }
   }
+  END_SU2_OMP_FOR
 
   delete [] Normal;
 
@@ -8334,6 +8367,7 @@ void CEulerSolver::BC_Interface_Boundary(CGeometry *geometry, CSolver **solver_c
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -8397,6 +8431,7 @@ void CEulerSolver::BC_NearField_Boundary(CGeometry *geometry, CSolver **solver_c
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -8840,6 +8875,7 @@ void CEulerSolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, C
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -9078,6 +9114,7 @@ void CEulerSolver::BC_ActDisk_VariableLoad(CGeometry *geometry, CSolver **solver
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CEulerSolver::PrintVerificationError(const CConfig *config) const {
@@ -9131,6 +9168,7 @@ void CEulerSolver::SetFreeStream_Solution(const CConfig *config) {
     }
     nodes->SetSolution(iPoint,nVar-1, Density_Inf*Energy_Inf);
   }
+  END_SU2_OMP_FOR
 }
 
 void CEulerSolver::SetFreeStream_TurboSolution(CConfig *config) {
diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp
index d36e156a250..70bf7f110e4 100644
--- a/SU2_CFD/src/solvers/CFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CFEASolver.cpp
@@ -116,6 +116,7 @@ CFEASolver::CFEASolver(CGeometry *geometry, CConfig *config) : CSolver() {
       }
     }
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Set element properties ---*/
   Set_ElementProperties(geometry, config);
@@ -680,6 +681,7 @@ void CFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container,
   {
     LinSysSol.SetValZero();
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Clear external forces. ---*/
   nodes->Clear_SurfaceLoad_Res();
@@ -698,13 +700,16 @@ void CFEASolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_con
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint)
       nodes->SetSolution(iPoint, zeros);
+    END_SU2_OMP_FOR
   }
   else {
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint)
       nodes->SetSolution(iPoint, nodes->GetPrestretch(iPoint));
+    END_SU2_OMP_FOR
   }
-  } // end parallel
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics, const CConfig *config) {
@@ -789,10 +794,12 @@ void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics,
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -929,10 +936,12 @@ void CFEASolver::Compute_StiffMatrix_NodalStressRes(CGeometry *geometry, CNumeri
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1012,10 +1021,12 @@ void CFEASolver::Compute_MassMatrix(const CGeometry *geometry, CNumerics **numer
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   AD::EndPassive(wasActive);
 
@@ -1091,6 +1102,7 @@ void CFEASolver::Compute_MassRes(const CGeometry *geometry, CNumerics **numerics
       }
 
     } // end iElem loop
+    END_SU2_OMP_FOR
 
   } // end color loop
 
@@ -1180,10 +1192,12 @@ void CFEASolver::Compute_NodalStressRes(CGeometry *geometry, CNumerics **numeric
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1221,6 +1235,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
         nodes->SetStress_FEM(iPoint,iStress, 0.0);
       }
     }
+    END_SU2_OMP_FOR
     AD::EndPassive(wasActive);
 
     for(auto color : ElemColoring) {
@@ -1308,6 +1323,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
         AD::EndPassive(wasActive);
 
       } // end iElem loop
+      END_SU2_OMP_FOR
       atomicAdd(stressPen, StressPenalty);
 
     } // end color loop
@@ -1326,12 +1342,15 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
 
       maxVonMises = max(maxVonMises, vms);
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     MaxVonMises_Stress = max(MaxVonMises_Stress, maxVonMises);
+    END_SU2_OMP_CRITICAL
 
     AD::EndPassive(wasActive);
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   /*--- Set the value of the MaxVonMises_Stress as the CFEA coeffient ---*/
   SU2_MPI::Allreduce(&MaxVonMises_Stress, &Total_CFEA, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
@@ -1473,6 +1492,7 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       nodes->Clear_BodyForces_Res(iPoint);
+    END_SU2_OMP_FOR
 
     for(auto color : ElemColoring) {
 
@@ -1530,11 +1550,13 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con
           if (LockStrategy) omp_unset_lock(&UpdateLocks[indexNode[iNode]]);
         }
 
-      } // end iElem loop
+      }
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1743,6 +1765,7 @@ CSysVector<T> computeLinearResidual(const CSysMatrix<T>& A,
                                     const CSysVector<U>& b) {
   CSysVector<T> r(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
   SU2_OMP_PARALLEL { A.ComputeResidual(x, b, r); }
+  END_SU2_OMP_PARALLEL
   return r;
 }
 
@@ -1762,6 +1785,7 @@ CSysVector<T> computeLinearResidual(const CSysMatrix<T>& A,
     btmp.PassiveCopy(b);
     A.ComputeResidual(xtmp, btmp, r);
   }
+  END_SU2_OMP_PARALLEL
   return r;
 }
 
@@ -1823,7 +1847,9 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics
       Conv_Check[1] = rtol;
       Conv_Check[2] = etol;
     }
-    } // end parallel
+    END_SU2_OMP_MASTER
+    }
+    END_SU2_OMP_PARALLEL
   }
   else {
 
@@ -1859,18 +1885,22 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics
         }
       }
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     for (auto iVar = 0ul; iVar < nVar; iVar++) {
       AddRes_RMS(iVar, resRMS[iVar]);
       AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     /*--- Compute the root mean square residual. ---*/
     SU2_OMP_MASTER
     SetResidual_RMS(geometry, config);
+    END_SU2_OMP_MASTER
 
-    } // end SU2_OMP_PARALLEL
+    }
+    END_SU2_OMP_PARALLEL
 
   }
 
@@ -2224,6 +2254,7 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics
       }
 
     }
+    END_SU2_OMP_FOR
 
     /*--- Dynamic contribution. ---*/
 
@@ -2255,13 +2286,15 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics
             a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t)
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Add M*TimeRes_Aux to the residual. ---*/
       Compute_MassRes(geometry, numerics, config);
       LinSysRes += TimeRes;
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2282,6 +2315,7 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig
       for (iVar = 0; iVar < nVar; iVar++)
         nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar));
     }
+    END_SU2_OMP_FOR
 
     if (dynamic) {
       SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -2308,8 +2342,10 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig
           nodes->SetSolution_Vel(iPoint, iVar, sol);
         }
       }
+      END_SU2_OMP_FOR
     }
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CConfig *config) {
@@ -2327,6 +2363,7 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo
       nodes->SetSolution(iPoint, nodes->GetSolution_Pred(iPoint));
       nodes->SetSolution_Pred_Old(iPoint, nodes->GetSolution(iPoint));
     }
+    END_SU2_OMP_FOR
 
     if (dynamic) {
       SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -2353,9 +2390,11 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo
           nodes->SetSolution_Vel(iPoint, iVar, sol);
         }
       }
+      END_SU2_OMP_FOR
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2406,6 +2445,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics
         }
 
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Loads for dynamic problems. ---*/
@@ -2430,6 +2470,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics
             a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t)
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Add M*TimeRes_Aux to the residual. ---*/
       Compute_MassRes(geometry, numerics, config);
@@ -2462,9 +2503,11 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics
                                                     alpha_f  * nodes->Get_FlowTraction_n(iPoint,iVar) );
         }
       }
+      END_SU2_OMP_FOR
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2476,6 +2519,7 @@ void CFEASolver::GeneralizedAlpha_UpdateDisp(const CGeometry *geometry, const CC
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
     for (unsigned short iVar = 0; iVar < nVar; iVar++)
       nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar));
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2530,6 +2574,7 @@ void CFEASolver::GeneralizedAlpha_UpdateSolution(const CGeometry *geometry, cons
     }
 
   }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2556,6 +2601,7 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) {
   /*--- This is required for the discrete adjoint. ---*/
   SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
   for (auto i = nPointDomain*nVar; i < nPoint*nVar; ++i) LinSysRes[i] = 0.0;
+  END_SU2_OMP_FOR
 
   /*--- Solve or smooth the linear system. ---*/
 
@@ -2566,8 +2612,10 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) {
     SetIterLinSolver(iter);
     SetResLinSolver(System.GetResidual());
   }
+  END_SU2_OMP_MASTER
   //SU2_OMP_BARRIER
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2616,6 +2664,7 @@ void CFEASolver::PredictStruct_Displacement(CGeometry *geometry, CConfig *config
     }
 
   }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2734,6 +2783,7 @@ void CFEASolver::SetAitken_Relaxation(CGeometry *geometry, CConfig *config) {
 
     nodes->SetSolution_Pred(iPoint, newDispPred);
   }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2825,6 +2875,7 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){
     for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
       obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint));
     }
+    END_SU2_OMP_FOR
   }
   else {
     for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
@@ -2840,12 +2891,14 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){
           if (geometry->nodes->GetDomain(iPoint))
             obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint));
         }
+        END_SU2_OMP_FOR
       }
     }
   }
   atomicAdd(obj_fun_local, objective_function);
   atomicAdd(nSurf_local, nSurfPoints);
   }
+  END_SU2_OMP_PARALLEL
   SU2_MPI::Allreduce(&objective_function, &Total_OFRefGeom, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
 
   unsigned long nPointsOF = geometry->GetGlobal_nPointDomain();
@@ -2940,10 +2993,13 @@ void CFEASolver::Compute_OFVolFrac(CGeometry *geometry, const CConfig *config)
       discrete_loc += volume*4.0*rho*(1.0-rho);
     }
   }
+  END_SU2_OMP_FOR
+
   atomicAdd(tot_vol_loc, total_volume);
   atomicAdd(integral_loc, integral);
   atomicAdd(discrete_loc, discreteness);
   }
+  END_SU2_OMP_PARALLEL
 
   su2double tmp;
   SU2_MPI::Allreduce(&total_volume,&tmp,1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
@@ -3003,8 +3059,11 @@ void CFEASolver::Compute_OFCompliance(CGeometry *geometry, const CConfig *config
     for (iVar = 0; iVar < nVar; iVar++)
       comp_local += nodalForce[iVar]*nodes->GetSolution(iPoint,iVar);
   }
+  END_SU2_OMP_FOR
+
   atomicAdd(comp_local, compliance);
   }
+  END_SU2_OMP_PARALLEL
 
   SU2_MPI::Allreduce(&compliance, &Total_OFCompliance, 1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
 
@@ -3073,9 +3132,12 @@ void CFEASolver::Stiffness_Penalty(CGeometry *geometry, CNumerics **numerics, CC
 
     }
   }
+  END_SU2_OMP_FOR
+
   atomicAdd(totalVol_loc, totalVolume);
   atomicAdd(weighted_loc, weightedValue);
   }
+  END_SU2_OMP_PARALLEL
 
   // Reduce value across processors for parallelization
 
@@ -3267,6 +3329,7 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
     else if (rho < 0.0) physical_rho[iElem] = 0.0;
     else                physical_rho[iElem] = rho;
   }
+  END_SU2_OMP_PARALLEL
 
   geometry->FilterValuesAtElementCG(filter_radius, kernels, search_lim, physical_rho);
 
@@ -3279,15 +3342,18 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
         SU2_OMP_FOR_STAT(omp_chunk_size)
         for (auto iElem=0ul; iElem<nElement; ++iElem)
           physical_rho[iElem] = 1.0-exp(-param*physical_rho[iElem])+physical_rho[iElem]*exp(-param);
+        END_SU2_OMP_FOR
         break;
       case HEAVISIDE_DOWN:
         SU2_OMP_FOR_STAT(omp_chunk_size)
         for (auto iElem=0ul; iElem<nElement; ++iElem)
           physical_rho[iElem] = exp(-param*(1.0-physical_rho[iElem]))-(1.0-physical_rho[iElem])*exp(-param);
+        END_SU2_OMP_FOR
         break;
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Unknown type of projection function",CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
     }
 
     /*--- If input was out of bounds use the bound instead of the filtered
@@ -3299,6 +3365,7 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
       else if (rho < 0.0) element_properties[iElem]->SetPhysicalDensity(0.0);
       else element_properties[iElem]->SetPhysicalDensity(physical_rho[iElem]);
     }
+    END_SU2_OMP_FOR
 
     /*--- Compute nodal averages for output. ---*/
     SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -3311,7 +3378,9 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
       }
       nodes->SetAuxVar(iPoint, 0, sum/vol);
     }
+    END_SU2_OMP_FOR
   }
+  END_SU2_OMP_PARALLEL
 
   delete [] physical_rho;
 
diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
index e02b8fd2504..c5cd8a789af 100644
--- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
@@ -827,6 +827,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
 
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   SU2_OMP_ATOMIC
@@ -840,6 +841,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
       SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
       config->SetNonphysical_Points(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -862,6 +864,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
   if (outlet) {
     SU2_OMP_MASTER
     GetOutlet_Properties(geometry, config, iMesh, Output);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -924,6 +927,7 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container
 
     if (!physical) nonPhysicalPoints++;
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 }
@@ -1053,6 +1057,7 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -1076,6 +1081,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
 
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
 
   const bool implicit   = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT);
   const bool muscl      = (config->GetMUSCL_Flow() && (iMesh == MESH_0));
@@ -1219,6 +1225,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -1241,6 +1248,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
       SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
       config->SetNonphysical_Reconstr(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -1298,6 +1306,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (boussinesq) {
@@ -1330,6 +1339,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (rotating_frame) {
@@ -1364,6 +1374,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (axisymmetric) {
@@ -1388,6 +1399,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         nodes->SetAuxVar(iPoint, 0, AuxVar);
 
       }
+      END_SU2_OMP_FOR
 
       /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/
 
@@ -1451,6 +1463,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (radiation) {
@@ -1493,6 +1506,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       }
 
     }
+    END_SU2_OMP_FOR
 
   }
 
@@ -1506,6 +1520,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         /*--- Set the auxiliary variable, Eddy viscosity mu_t, for this node. ---*/
         nodes->SetAuxVar(iPoint, 0, nodes->GetEddyViscosity(iPoint));
       }
+      END_SU2_OMP_FOR
 
       /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/
       if (config->GetKind_Gradient_Method() == GREEN_GAUSS) {
@@ -1545,6 +1560,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     } // for iPoint
+    END_SU2_OMP_FOR
 
     if(!streamwise_periodic_temperature && energy) {
 
@@ -1584,6 +1600,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
             LinSysRes.AddBlock(iPoint, residual);
 
           }// for iVertex
+          END_SU2_OMP_FOR
         }// if periodic inlet boundary
       }// for iMarker
 
@@ -1619,6 +1636,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         }
 
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -1754,9 +1772,11 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < nPoint; iPoint++)
       maxVel2 = max(maxVel2, nodes->GetVelocity2(iPoint));
+    END_SU2_OMP_FOR
 
     SU2_OMP_CRITICAL
     MaxVel2 = max(MaxVel2, maxVel2);
+    END_SU2_OMP_CRITICAL
 
     SU2_OMP_BARRIER
 
@@ -1766,6 +1786,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co
 
       config->SetMax_Vel2(max(1e-10, MaxVel2));
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -1776,6 +1797,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++)
     nodes->SetBetaInc2(iPoint, BetaInc2);
+  END_SU2_OMP_FOR
 
 }
 
@@ -2008,6 +2030,7 @@ void CIncEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_contain
       Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -2249,6 +2272,7 @@ void CIncEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container,
     if (implicit)
       Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i);
   }
+  END_SU2_OMP_FOR
 }
 
 void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
@@ -2446,6 +2470,7 @@ void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
       Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -2536,6 +2561,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
         Jacobian.AddVal2Diag(iPoint, nDim+1, delta);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   else {
@@ -2579,6 +2605,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
           LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over the boundary edges ---*/
 
@@ -2615,6 +2642,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
           for (iVar = 0; iVar < nVar-!energy; iVar++)
             LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -2675,6 +2703,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
         Jacobian.AddVal2Diag(iPoint, nDim+1, delta);
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -2954,4 +2983,6 @@ void CIncEulerSolver::SetFreeStream_Solution(const CConfig *config){
     }
     nodes->SetSolution(iPoint,nDim+1, Temperature_Inf);
   }
+  END_SU2_OMP_FOR
+
 }
diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp
index 2eb8acbd679..f105e7f64f8 100644
--- a/SU2_CFD/src/solvers/CIncNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp
@@ -290,10 +290,12 @@ void CIncNSSolver::Compute_Streamwise_Periodic_Recovered_Values(CConfig *config,
       nodes->SetStreamwise_Periodic_RecoveredTemperature(iPoint, Temperature_Recovered);
     }
   } // for iPoint
+  END_SU2_OMP_FOR
 
   /*--- Compute the integrated Heatflux Q into the domain, and massflow over periodic markers ---*/
   SU2_OMP_MASTER
   GetStreamwise_Periodic_Properties(geometry, config, iMesh);
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 }
 
@@ -338,6 +340,7 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c
     nodes->SetDES_LengthScale(iPoint,DES_LengthScale);
 
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 
@@ -476,6 +479,7 @@ void CIncNSSolver::BC_Wall_Generic(const CGeometry *geometry, const CConfig *con
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CIncNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver**, CNumerics*,
@@ -585,4 +589,5 @@ void CIncNSSolver::BC_ConjugateHeat_Interface(CGeometry *geometry, CSolver **sol
     nodes->SetSolution_Old(iPoint, nDim+1, Twall);
     nodes->SetEnergy_ResTruncError_Zero(iPoint);
   }
+  END_SU2_OMP_FOR
 }
diff --git a/SU2_CFD/src/solvers/CMeshSolver.cpp b/SU2_CFD/src/solvers/CMeshSolver.cpp
index f008ad0e812..314ba5e1b36 100644
--- a/SU2_CFD/src/solvers/CMeshSolver.cpp
+++ b/SU2_CFD/src/solvers/CMeshSolver.cpp
@@ -157,6 +157,7 @@ CMeshSolver::CMeshSolver(CGeometry *geometry, CConfig *config) : CFEASolver(true
   SU2_OMP_PARALLEL {
     SetMinMaxVolume(geometry, config, false);
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Compute the wall distance using the reference coordinates ---*/
   SetWallDistance(geometry, config);
@@ -187,6 +188,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
     MaxVolume = -1E22; MinVolume = 1E22;
     ElemCounter = 0;
   }
+  END_SU2_OMP_MASTER
 
   /*--- Local min/max, final reduction outside loop. ---*/
   su2double maxVol = -1E22, minVol = 1E22;
@@ -238,12 +240,14 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
     /*--- Count distorted elements. ---*/
     if (ElemVolume <= 0.0) elCount++;
   }
+  END_SU2_OMP_FOR
   SU2_OMP_CRITICAL
   {
     MaxVolume = max(MaxVolume, maxVol);
     MinVolume = min(MinVolume, minVol);
     ElemCounter += elCount;
   }
+  END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
   SU2_OMP_MASTER
@@ -253,6 +257,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
     SU2_MPI::Allreduce(&maxVol, &MaxVolume, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
     SU2_MPI::Allreduce(&minVol, &MinVolume, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- Volume from 0 to 1 ---*/
@@ -268,6 +273,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
       element[iElem].SetRef_Volume(ElemVolume);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Store the maximum and minimum volume. ---*/
   SU2_OMP_MASTER {
@@ -283,7 +289,9 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
   if ((ElemCounter != 0) && (rank == MASTER_NODE))
     cout <<"There are " << ElemCounter << " elements with negative volume.\n" << endl;
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   AD::EndPassive(wasActive);
 }
@@ -346,6 +354,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
       nodes->SetWallDistance(iPoint, MaxDistance);
     }
+    END_SU2_OMP_FOR
   }
   else {
     su2double MaxDistance_Local = -1E22, MinDistance_Local = 1E22;
@@ -368,11 +377,13 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
       if (dist > EPS)  MinDistance_Local = min(MinDistance_Local, dist);
 
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       MaxDistance = max(MaxDistance, MaxDistance_Local);
       MinDistance = min(MinDistance, MinDistance_Local);
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     SU2_OMP_MASTER
@@ -382,6 +393,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
       SU2_MPI::Allreduce(&MaxDistance_Local, &MaxDistance, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
       SU2_MPI::Allreduce(&MinDistance_Local, &MinDistance, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -391,6 +403,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
     su2double nodeDist = nodes->GetWallDistance(iPoint)/MaxDistance;
     nodes->SetWallDistance(iPoint,nodeDist);
   }
+  END_SU2_OMP_FOR
 
   /*--- Compute the element distances ---*/
   SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -411,8 +424,10 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
 
     element[iElem].SetWallDistance(ElemDist);
   }
+  END_SU2_OMP_FOR
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics, CConfig *config){
@@ -466,6 +481,8 @@ void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics,
     break;
   }
   }
+  END_SU2_OMP_PARALLEL
+
   stiffness_set = true;
 
 }
@@ -496,6 +513,7 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig
   SU2_OMP_PARALLEL {
     LinSysRes.SetValZero();
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Impose boundary conditions (all of them are ESSENTIAL BC's - displacements). ---*/
   SetBoundaryDisplacements(geometry[MESH_0], numerics[FEA_TERM], config);
@@ -521,7 +539,8 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig
   /*--- Check for failed deformation (negative volumes). ---*/
   SetMinMaxVolume(geometry[MESH_0], config, true);
 
-  } // end parallel
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -543,6 +562,7 @@ void CMeshSolver::UpdateGridCoord(CGeometry *geometry, CConfig *config){
       geometry->nodes->SetCoord(iPoint, iDim, val_coord);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Communicate the updated displacements and mesh coordinates. ---*/
   geometry->InitiateComms(geometry, config, COORDINATES);
@@ -600,6 +620,7 @@ void CMeshSolver::ComputeGridVelocity(CGeometry *geometry, CConfig *config){
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- The velocity was computed for nPointDomain, now we communicate it. ---*/
   geometry->InitiateComms(geometry, config, GRID_VELOCITY);
diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
index aae061ef04d..824e2ccdae9 100644
--- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
@@ -525,6 +525,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con
   unsigned long counter_local = 0;
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
 
   /*--- Pick one numerics object per thread. ---*/
   CNumerics* numerics = numerics_container[CONV_TERM];
@@ -697,6 +698,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con
       SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
       config->SetNonphysical_Reconstr(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 }
@@ -985,6 +987,7 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con
         }else
           eAxi_local++;
       }
+      END_SU2_OMP_FOR
     }
 
   /*--- Checking for NaN ---*/
diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp
index 87df3fb2c72..09e78f646ea 100644
--- a/SU2_CFD/src/solvers/CNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CNSSolver.cpp
@@ -90,6 +90,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C
     SU2_OMP_BARRIER
     SU2_OMP_MASTER
     nPrimVarGrad = 1+nDim;
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -116,6 +117,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C
   if (Output) {
     SU2_OMP_MASTER
     nPrimVarGrad = nPrimVarGrad_bak;
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -171,6 +173,7 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons
     nonPhysicalPoints += !physical;
 
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 }
@@ -316,6 +319,7 @@ void CNSSolver::SetRoe_Dissipation(CGeometry *geometry, CConfig *config){
       nodes->SetRoe_Dissipation_NTS(iPoint, delta, config->GetConst_DES());
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -520,6 +524,7 @@ void CNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_container
       }
     }
   }
+  END_SU2_OMP_FOR
 
   if (Jacobian_i)
     for (auto iVar = 0u; iVar < nVar; iVar++)
@@ -717,6 +722,7 @@ void CNSSolver::BC_Isothermal_Wall_Generic(CGeometry *geometry, CSolver **solver
       }
     }
   }
+  END_SU2_OMP_FOR
 
   if (Jacobian_i)
     for (auto iVar = 0u; iVar < nVar; iVar++)
@@ -914,6 +920,7 @@ void CNSSolver::SetTauWall_WF(CGeometry *geometry, CSolver **solver_container, c
       nodes->SetTauWall(iPoint, Tau_Wall);
 
     }
+    END_SU2_OMP_FOR
 
   }
 
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index 312e5b39d24..dfaf6d2d4f6 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -1155,6 +1155,7 @@ void CSolver::InitiatePeriodicComms(CGeometry *geometry,
             break;
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Launch the point-to-point MPI send for this message. ---*/
 
@@ -1232,6 +1233,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry,
       SU2_MPI::Waitany(geometry->nPeriodicRecv,
                        geometry->req_PeriodicRecv,
                        &ind, &status);
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
       source = status.MPI_SOURCE;
 #else
@@ -1543,6 +1545,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry,
           }
         }
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Verify that all non-blocking point-to-point sends have finished.
@@ -1554,6 +1557,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry,
     SU2_MPI::Waitall(geometry->nPeriodicSend,
                      geometry->req_PeriodicSend,
                      MPI_STATUS_IGNORE);
+    END_SU2_OMP_MASTER
 #endif
     SU2_OMP_BARRIER
   }
@@ -1772,6 +1776,7 @@ void CSolver::InitiateComms(CGeometry *geometry,
             break;
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Launch the point-to-point MPI send for this message. ---*/
 
@@ -1818,6 +1823,7 @@ void CSolver::CompleteComms(CGeometry *geometry,
 
       SU2_OMP_MASTER
       SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
 
       /*--- Once we have recv'd a message, get the source rank. ---*/
@@ -1932,6 +1938,7 @@ void CSolver::CompleteComms(CGeometry *geometry,
             break;
         }
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Verify that all non-blocking point-to-point sends have finished.
@@ -1941,6 +1948,7 @@ void CSolver::CompleteComms(CGeometry *geometry,
 #ifdef HAVE_MPI
     SU2_OMP_MASTER
     SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);
+    END_SU2_OMP_MASTER
 #endif
     SU2_OMP_BARRIER
   }
@@ -2067,6 +2075,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
       }
     }
     } /* End SU2_OMP_MASTER, now all threads update the CFL number. */
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /* Loop over all points on this grid and apply CFL adaption. */
@@ -2079,6 +2088,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
       Max_CFL_Local = 0.0;
       Avg_CFL_Local = 0.0;
     }
+    END_SU2_OMP_MASTER
 
     SU2_OMP_FOR_STAT(roundUpDiv(geometry[iMesh]->GetnPointDomain(),omp_get_max_threads()))
     for (unsigned long iPoint = 0; iPoint < geometry[iMesh]->GetnPointDomain(); iPoint++) {
@@ -2147,6 +2157,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
       }
 
     }
+    END_SU2_OMP_FOR
 
     /* Reduce the min/max/avg local CFL numbers. */
 
@@ -2157,6 +2168,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
         Max_CFL_Local = max(Max_CFL_Local,myCFLMax);
         Avg_CFL_Local += myCFLSum;
       }
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
 
       SU2_OMP_MASTER
@@ -2167,6 +2179,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
         SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
         Avg_CFL_Local /= su2double(geometry[iMesh]->GetGlobal_nPointDomain());
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
@@ -2401,6 +2414,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) {
         LinSysRes(iPoint,iVar) += Flux * Solution_i[iVar];
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Loop boundary edges ---*/
 
@@ -2426,6 +2440,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) {
         for (auto iVar = 0u; iVar < nVar; iVar++)
           LinSysRes(iPoint,iVar) -= Flux * base_nodes->GetSolution(iPoint,iVar);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -2508,6 +2523,7 @@ void CSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *config)
       }
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Correct the Laplacian across any periodic boundaries. ---*/
 
@@ -3049,7 +3065,9 @@ void CSolver::Restart_OldGeometry(CGeometry *geometry, CConfig *config) {
 
   }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   /*--- It's necessary to communicate this information ---*/
 
diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp
index a33a4795822..fb774ed82b1 100644
--- a/SU2_CFD/src/solvers/CTurbSASolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp
@@ -246,6 +246,7 @@ void CTurbSASolver::Preprocessing(CGeometry *geometry, CSolver **solver_containe
         auto Laminar_Viscosity  = solver_container[FLOW_SOL]->GetNodes()->GetLaminarViscosity(iPoint);
         nodes->SetVortex_Tilting(iPoint, PrimGrad_Flow, Vorticity, Laminar_Viscosity);
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Compute the DES length scale ---*/
@@ -291,6 +292,7 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain
     nodes->SetmuT(iPoint,muT);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -389,6 +391,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai
     if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
   if (harmonic_balance) {
 
@@ -404,6 +407,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai
         LinSysRes(iPoint,iVar) += Source*Volume;
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -420,6 +424,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta
   if (config->GetWall_Functions()) {
     SU2_OMP_MASTER
     SetNuTilde_WF(geometry, solver_container, conv_numerics, visc_numerics, config, val_marker);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
     return;
   }
@@ -485,6 +490,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CTurbSASolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics,
@@ -545,6 +551,7 @@ void CTurbSASolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -632,6 +639,7 @@ void CTurbSASolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, CN
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -717,6 +725,7 @@ void CTurbSASolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, C
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -805,6 +814,7 @@ void CTurbSASolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_conta
     }
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -893,6 +903,7 @@ void CTurbSASolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_cont
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -1042,6 +1053,7 @@ void CTurbSASolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container,
 //        Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -1135,6 +1147,7 @@ void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_c
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -1239,6 +1252,7 @@ void CTurbSASolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contain
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -1899,6 +1913,7 @@ void CTurbSASolver::SetDES_LengthScale(CSolver **solver, CGeometry *geometry, CC
     nodes->SetDES_LengthScale(iPoint, lengthScale);
 
   }
+  END_SU2_OMP_FOR
 }
 
 void CTurbSASolver::SetInletAtVertex(const su2double *val_inlet,
diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
index c03e2295c19..cc9ed8c8013 100644
--- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
@@ -282,6 +282,7 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai
     nodes->SetmuT(iPoint,muT);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -356,6 +357,7 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta
     if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -450,6 +452,7 @@ void CTurbSSTSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_cont
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CTurbSSTSolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics,
@@ -512,6 +515,7 @@ void CTurbSSTSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_containe
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -605,6 +609,7 @@ void CTurbSSTSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, C
     }
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -695,6 +700,7 @@ void CTurbSSTSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -786,6 +792,7 @@ void CTurbSSTSolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -896,6 +903,7 @@ void CTurbSSTSolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contai
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp
index 4428f5a5f72..1df93e60466 100644
--- a/SU2_CFD/src/solvers/CTurbSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSolver.cpp
@@ -228,6 +228,7 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -304,6 +305,7 @@ void CTurbSolver::SumEdgeFluxes(CGeometry* geometry) {
         LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge));
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -502,6 +504,7 @@ void CTurbSolver::BC_Fluid_Interface(CGeometry *geometry, CSolver **solver_conta
       Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   delete [] PrimVar_j;
@@ -520,6 +523,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver
     SetRes_RMS(iVar, 0.0);
     SetRes_Max(iVar, 0.0, 0);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0};
@@ -562,16 +566,19 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver
       }
     }
   }
+  END_SU2_OMP_FOR
   SU2_OMP_CRITICAL
   for (unsigned short iVar = 0; iVar < nVar; iVar++) {
     AddRes_RMS(iVar, resRMS[iVar]);
     AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
   }
+  END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
   /*--- Compute the root mean square residual ---*/
   SU2_OMP_MASTER
   SetResidual_RMS(geometry, config);
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 }
 
@@ -597,6 +604,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve
         for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
           nodes->AddSolution(iPoint, 0, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint]);
         }
+        END_SU2_OMP_FOR
         break;
 
       case SST: case SST_SUST:
@@ -616,6 +624,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve
                       density, density_old, lowerlimit[iVar], upperlimit[iVar]);
           }
         }
+        END_SU2_OMP_FOR
         break;
 
     }
@@ -642,6 +651,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_
     LinSysRes.SetBlock_Zero(iPoint);
     LinSysSol.SetBlock_Zero(iPoint);
   }
+  END_SU2_OMP_FOR
 
   auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config);
 
@@ -649,6 +659,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_
     SetIterLinSolver(iter);
     SetResLinSolver(System.GetResidual());
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   CompleteImplicitIteration(geometry, solver_container, config);
@@ -702,6 +713,7 @@ void CTurbSolver::ComputeUnderRelaxationFactor(const CConfig *config) {
     nodes->SetUnderRelaxation(iPoint, localUnderRelaxation);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -803,6 +815,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
       }
 
     }
+    END_SU2_OMP_FOR
 
   } else {
 
@@ -849,6 +862,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
           LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over the boundary edges ---*/
 
@@ -896,6 +910,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
           }
 
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -968,6 +983,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
         if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep));
       }
     }
+    END_SU2_OMP_FOR
 
   } // end dynamic grid
 
@@ -1050,6 +1066,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *
   }
 
   } // end SU2_OMP_MASTER, pre and postprocessing are thread-safe.
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- MPI solution and compute the eddy viscosity ---*/
@@ -1077,6 +1094,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *
       }
       solver[iMesh][TURB_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse);
     }
+    END_SU2_OMP_FOR
 
     solver[iMesh][TURB_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION);
     solver[iMesh][TURB_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION);
@@ -1093,7 +1111,8 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *
   delete [] Restart_Vars; Restart_Vars = nullptr;
   delete [] Restart_Data; Restart_Data = nullptr;
 
-  } // end SU2_OMP_MASTER
+  }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }

From 223c10d34febb8ef27d55d75421b2b5397557f33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Wed, 17 Mar 2021 15:47:57 +0100
Subject: [PATCH 22/57] Recover CoDiPack version.

---
 externals/codi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/externals/codi b/externals/codi
index 1b8d3f5f03d..6a67202a388 160000
--- a/externals/codi
+++ b/externals/codi
@@ -1 +1 @@
-Subproject commit 1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8
+Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692

From 6775b29ced94423de19fb31541b2e9c3e0525b23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Wed, 17 Mar 2021 15:50:23 +0100
Subject: [PATCH 23/57] OpDiLib update.

---
 externals/opdi        | 2 +-
 meson_scripts/init.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/externals/opdi b/externals/opdi
index f14b42f1255..a1210cc3d2f 160000
--- a/externals/opdi
+++ b/externals/opdi
@@ -1 +1 @@
-Subproject commit f14b42f1255674bb10db91e3f45ceb39c1bccd17
+Subproject commit a1210cc3d2f58fa4652c70000920ff2e76896cf6
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index a42640f9fde..bbcd1b2ab4d 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -48,7 +48,7 @@ def init_submodules(method = 'auto'):
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
-  sha_version_opdi = 'f14b42f1255674bb10db91e3f45ceb39c1bccd17'
+  sha_version_opdi = 'a1210cc3d2f58fa4652c70000920ff2e76896cf6'
   github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'

From 6aaebca2b7c3273ef365b57d8352a7339f9f6bfe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Wed, 17 Mar 2021 15:51:00 +0100
Subject: [PATCH 24/57] Add syntax file.

---
 su2omp.syntax.json | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 su2omp.syntax.json

diff --git a/su2omp.syntax.json b/su2omp.syntax.json
new file mode 100644
index 00000000000..5a524950142
--- /dev/null
+++ b/su2omp.syntax.json
@@ -0,0 +1,42 @@
+{
+  "this file's header":
+  [
+    "\\file su2omp.syntax.json",
+    "\\brief Definitions for the OpDiLib syntax checker",
+    "\\author J. Blühdorn",
+    "\\version 7.1.1 \"Blackbird\"",
+
+    "SU2 Project Website: https://su2code.github.io",
+
+    "The SU2 Project is maintained by the SU2 Foundation ",
+    "(http://su2foundation.org)",
+
+    "Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)",
+
+    "SU2 is free software; you can redistribute it and/or",
+    "modify it under the terms of the GNU Lesser General Public",
+    "License as published by the Free Software Foundation; either",
+    "version 2.1 of the License, or (at your option) any later version.",
+
+    "SU2 is distributed in the hope that it will be useful,",
+    "but WITHOUT ANY WARRANTY; without even the implied warranty of",
+    "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU",
+    "Lesser General Public License for more details.",
+
+    "You should have received a copy of the GNU Lesser General Public",
+    "License along with SU2. If not, see <http://www.gnu.org/licenses/>."
+  ],
+  "pairs":
+  {
+    "SU2_OMP_MASTER": "END_SU2_OMP_MASTER",
+    "SU2_OMP_CRITICAL": "END_SU2_OMP_CRITICAL",
+    "SU2_OMP_PARALLEL": "END_SU2_OMP_PARALLEL",
+    "SU2_OMP_PARALLEL_": "END_SU2_OMP_PARALLEL",
+    "SU2_OMP_PARALLEL_ON": "END_SU2_OMP_PARALLEL",
+    "SU2_OMP_FOR_": "END_SU2_OMP_FOR",
+    "SU2_OMP_FOR_DYN": "END_SU2_OMP_FOR",
+    "SU2_OMP_FOR_STAT": "END_SU2_OMP_FOR",
+    "CSYSVEC_PARFOR": "END_CSYSVEC_PARFOR",
+    "CNEWTON_PARFOR": "END_CNEWTON_PARFOR"
+  }
+}

From ce44cac16305c9d6c96c2c2424e151fc3d04972b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Wed, 17 Mar 2021 16:35:24 +0100
Subject: [PATCH 25/57] Fix missing END macros.

---
 Common/src/linear_algebra/CSysSolve.cpp | 2 ++
 SU2_CFD/src/solvers/CSolver.cpp         | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 25e0294cd10..650e3f728c8 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -852,6 +852,7 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
     xIsZero = false;
     tol_type = LinearToleranceType::ABSOLUTE;
   }
+  END_SU2_OMP_MASTER
 
   /*--- Create matrix-vector product, preconditioner, and solve the linear system ---*/
 
@@ -1058,6 +1059,7 @@ unsigned long CSysSolve<ScalarType>::Solve_b(CSysMatrix<ScalarType> & Jacobian,
     xIsZero = false;
     tol_type = LinearToleranceType::ABSOLUTE;
   }
+  END_SU2_OMP_MASTER
 
   HandleTemporariesIn(LinSysRes, LinSysSol);
 
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index 9daad35cf84..ba62e146500 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -4054,6 +4054,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
     Residual_BGS[iVar] = 0.0;
     Residual_Max_BGS[iVar] = 0.0;
   }
+  END_SU2_OMP_MASTER
 
   vector<su2double> resMax(nVar,0.0), resRMS(nVar,0.0);
   vector<const su2double*> coordMax(nVar,nullptr);
@@ -4077,6 +4078,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
       }
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Reduce residual information over all threads in this rank. ---*/
   SU2_OMP_CRITICAL
@@ -4084,11 +4086,14 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
     Residual_BGS[iVar] += resRMS[iVar];
     AddRes_Max_BGS(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
   }
+  END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
   SU2_OMP_MASTER
   SetResidual_BGS(geometry, config);
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }

From f093b3501cbe2f0a66a3b1c1bd8e47806abf4f6c Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Wed, 17 Mar 2021 17:16:34 +0000
Subject: [PATCH 26/57] move MASTER out of ExtFunc functions, parallel copy in
 CSysSolve_b

---
 Common/include/basic_types/ad_structure.hpp | 78 ++++++---------------
 Common/src/linear_algebra/CSysSolve.cpp     | 58 +++++++--------
 Common/src/linear_algebra/CSysSolve_b.cpp   | 25 +++----
 3 files changed, 57 insertions(+), 104 deletions(-)

diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index 1699534828b..185ee136350 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -483,87 +483,59 @@ namespace AD{
   }
 
   FORCEINLINE void StartExtFunc(bool storePrimalInput, bool storePrimalOutput){
-    SU2_OMP_MASTER
-    {
-      FuncHelper = new ExtFuncHelper(true);
-      if (!storePrimalInput){
-        FuncHelper->disableInputPrimalStore();
-      }
-      if (!storePrimalOutput){
-        FuncHelper->disableOutputPrimalStore();
-      }
+    FuncHelper = new ExtFuncHelper(true);
+    if (!storePrimalInput){
+      FuncHelper->disableInputPrimalStore();
+    }
+    if (!storePrimalOutput){
+      FuncHelper->disableOutputPrimalStore();
     }
-    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE void SetExtFuncIn(const su2double &data) {
-    SU2_OMP_MASTER
-    {
-      FuncHelper->addInput(data);
-    }
-    END_SU2_OMP_MASTER
+    FuncHelper->addInput(data);
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncIn(const T& data, const int size) {
-    SU2_OMP_MASTER
-    {
-      for (int i = 0; i < size; i++) {
-        FuncHelper->addInput(data[i]);
-      }
+    for (int i = 0; i < size; i++) {
+      FuncHelper->addInput(data[i]);
     }
-    END_SU2_OMP_MASTER
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncIn(const T& data, const int size_x, const int size_y) {
-    SU2_OMP_MASTER
-    {
-      for (int i = 0; i < size_x; i++) {
-        for (int j = 0; j < size_y; j++) {
-          FuncHelper->addInput(data[i][j]);
-        }
+    for (int i = 0; i < size_x; i++) {
+      for (int j = 0; j < size_y; j++) {
+        FuncHelper->addInput(data[i][j]);
       }
     }
-    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE void SetExtFuncOut(su2double& data) {
-    SU2_OMP_MASTER
-    {
-      if (AD::getGlobalTape().isActive()) {
-        FuncHelper->addOutput(data);
-      }
+    if (AD::getGlobalTape().isActive()) {
+      FuncHelper->addOutput(data);
     }
-    END_SU2_OMP_MASTER
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncOut(T&& data, const int size) {
-    SU2_OMP_MASTER
-    {
-      for (int i = 0; i < size; i++) {
-        if (AD::getGlobalTape().isActive()) {
-          FuncHelper->addOutput(data[i]);
-        }
+    for (int i = 0; i < size; i++) {
+      if (AD::getGlobalTape().isActive()) {
+        FuncHelper->addOutput(data[i]);
       }
     }
-    END_SU2_OMP_MASTER
   }
 
   template<class T>
   FORCEINLINE void SetExtFuncOut(T&& data, const int size_x, const int size_y) {
-    SU2_OMP_MASTER
-    {
-      for (int i = 0; i < size_x; i++) {
-        for (int j = 0; j < size_y; j++) {
-          if (AD::getGlobalTape().isActive()) {
-            FuncHelper->addOutput(data[i][j]);
-          }
+    for (int i = 0; i < size_x; i++) {
+      for (int j = 0; j < size_y; j++) {
+        if (AD::getGlobalTape().isActive()) {
+          FuncHelper->addOutput(data[i][j]);
         }
       }
     }
-    END_SU2_OMP_MASTER
   }
 
   FORCEINLINE void delete_handler(void *handler) {
@@ -571,13 +543,7 @@ namespace AD{
     checkpoint->clear();
   }
 
-  FORCEINLINE void EndExtFunc() {
-    SU2_OMP_MASTER
-    {
-      delete FuncHelper;
-    }
-    END_SU2_OMP_MASTER
-  }
+  FORCEINLINE void EndExtFunc() { delete FuncHelper; }
 
   FORCEINLINE bool BeginPassive() {
     if(AD::getGlobalTape().isActive()) {
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 333104600ae..9321b4eecb3 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -859,10 +859,11 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
 
     TapeActive = AD::getGlobalTape().isActive();
 
-    AD::StartExtFunc(false, false);
-
-    AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize());
-
+    SU2_OMP_MASTER {
+      AD::StartExtFunc(false, false);
+      AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize());
+    }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     AD::StopRecording();
@@ -933,33 +934,6 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
     if (!mesh_deform) KindPrecond = config->GetKind_DiscAdj_Linear_Prec();
     else              KindPrecond = config->GetKind_Deform_Linear_Solver_Prec();
 
-    /*--- Start recording if it was stopped for the linear solver ---*/
-
-    AD::StartRecording();
-
-    SU2_OMP_BARRIER
-
-    AD::SetExtFuncOut(&LinSysSol[0], (int)LinSysSol.GetLocSize());
-
-    SU2_OMP_BARRIER
-
-#ifdef CODI_REVERSE_TYPE
-    SU2_OMP_MASTER
-    {
-      AD::FuncHelper->addUserData(&LinSysRes);
-      AD::FuncHelper->addUserData(&LinSysSol);
-      AD::FuncHelper->addUserData(&Jacobian);
-      AD::FuncHelper->addUserData(geometry);
-      AD::FuncHelper->addUserData(config);
-      AD::FuncHelper->addUserData(this);
-    }
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-
-    AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
-    SU2_OMP_BARRIER
-#endif
-
     /*--- Build preconditioner for the transposed Jacobian ---*/
 
     if (RequiresTranspose) Jacobian.TransposeInPlace();
@@ -983,11 +957,31 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
         break;
     }
 
+    /*--- Start recording if it was stopped for the linear solver ---*/
+#ifdef CODI_REVERSE_TYPE
+    AD::StartRecording();
     SU2_OMP_BARRIER
 
-    AD::EndExtFunc();
+    SU2_OMP_MASTER {
+      AD::SetExtFuncOut(&LinSysSol[0], LinSysSol.GetLocSize());
+      AD::FuncHelper->addUserData(&LinSysRes);
+      AD::FuncHelper->addUserData(&LinSysSol);
+      AD::FuncHelper->addUserData(&Jacobian);
+      AD::FuncHelper->addUserData(geometry);
+      AD::FuncHelper->addUserData(config);
+      AD::FuncHelper->addUserData(this);
+    }
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
+
+    AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
+    SU2_OMP_BARRIER
 
+    SU2_OMP_MASTER
+    AD::EndExtFunc();
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
+#endif
   }
 
   return IterLinSol;
diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp
index 709bb251d70..d80eb4b306a 100644
--- a/Common/src/linear_algebra/CSysSolve_b.cpp
+++ b/Common/src/linear_algebra/CSysSolve_b.cpp
@@ -57,27 +57,20 @@ void CSysSolve_b<ScalarType>::Solve_b(const codi::RealReverse::Real* x, codi::Re
   /*--- Initialize the right-hand side with the gradient of the solution of the primal linear system ---*/
 
   SU2_OMP_BARRIER
-  SU2_OMP_MASTER
-  {
-    for (unsigned long i = 0; i < n; i++) {
-      (*LinSysRes_b)[i] = y_b[i];
-      (*LinSysSol_b)[i] = 0.0;
-    }
+  SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads()))
+  for (unsigned long i = 0; i < n; i++) {
+    (*LinSysRes_b)[i] = y_b[i];
+    (*LinSysSol_b)[i] = 0.0;
   }
-  END_SU2_OMP_MASTER
-  SU2_OMP_BARRIER
+  END_SU2_OMP_FOR
 
   solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config, false);
 
-  SU2_OMP_BARRIER
-  SU2_OMP_MASTER
-  {
-    for (unsigned long i = 0; i < n; i ++) {
-      x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i));
-    }
+  SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads()))
+  for (unsigned long i = 0; i < n; i ++) {
+    x_b[i] = SU2_TYPE::GetValue((*LinSysSol_b)[i]);
   }
-  END_SU2_OMP_MASTER
-  SU2_OMP_BARRIER
+  END_SU2_OMP_FOR
 }
 
 template class CSysSolve_b<su2mixedfloat>;

From e174bacf79664e043eb17852cc991667a90d9f88 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Wed, 17 Mar 2021 17:36:57 +0000
Subject: [PATCH 27/57] move master into some solver methods

---
 .../include/solvers/CFVMFlowSolverBase.hpp    | 20 ++----
 .../include/solvers/CFVMFlowSolverBase.inl    |  7 +++
 SU2_CFD/src/solvers/CFEASolver.cpp            |  2 -
 SU2_CFD/src/solvers/CSolver.cpp               | 61 +++++++++++--------
 SU2_CFD/src/solvers/CTurbSolver.cpp           |  3 -
 5 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
index 017a46340ee..2525256259b 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
@@ -825,17 +825,11 @@ class CFVMFlowSolverBase : public CSolver {
     CompleteComms(geometry, config, SOLUTION);
 
     if (!adjoint) {
-      SU2_OMP_MASTER {
-        /*--- Compute the root mean square residual ---*/
-
-        SetResidual_RMS(geometry, config);
-
-        /*--- For verification cases, compute the global error metrics. ---*/
+      /*--- Compute the root mean square residual ---*/
+      SetResidual_RMS(geometry, config);
 
-        ComputeVerificationError(geometry, config);
-      }
-      END_SU2_OMP_MASTER
-      SU2_OMP_BARRIER
+      /*--- For verification cases, compute the global error metrics. ---*/
+      ComputeVerificationError(geometry, config);
     }
 
   }
@@ -941,10 +935,7 @@ class CFVMFlowSolverBase : public CSolver {
     SU2_OMP_BARRIER
 
     /*--- Compute the root mean square residual ---*/
-    SU2_OMP_MASTER
     SetResidual_RMS(geometry, config);
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
   }
 
   /*!
@@ -977,10 +968,7 @@ class CFVMFlowSolverBase : public CSolver {
     CompleteComms(geometry, config, SOLUTION);
 
     /*--- For verification cases, compute the global error metrics. ---*/
-    SU2_OMP_MASTER
     ComputeVerificationError(geometry, config);
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
   }
 
   /*!
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
index f66d4b7da15..db290675173 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -465,6 +465,7 @@ void CFVMFlowSolverBase<V, R>::Viscous_Residual_impl(unsigned long iEdge, CGeome
 
 template <class V, ENUM_REGIME R>
 void CFVMFlowSolverBase<V, R>::ComputeVerificationError(CGeometry* geometry, CConfig* config) {
+
   /*--- The errors only need to be computed on the finest grid. ---*/
   if (MGLevel != MESH_0) return;
 
@@ -479,6 +480,8 @@ void CFVMFlowSolverBase<V, R>::ComputeVerificationError(CGeometry* geometry, CCo
        (config->GetInnerIter() == 1));
   if (!write_heads) return;
 
+  SU2_OMP_MASTER {
+
   /*--- Check if there actually is an exact solution for this
         verification case, if computed at all. ---*/
   if (VerificationSolution && VerificationSolution->ExactSolutionKnown()) {
@@ -518,6 +521,10 @@ void CFVMFlowSolverBase<V, R>::ComputeVerificationError(CGeometry* geometry, CCo
 
     PrintVerificationError(config);
   }
+
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 template <class V, ENUM_REGIME R>
diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp
index 6d1675e1f2d..2145310cb77 100644
--- a/SU2_CFD/src/solvers/CFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CFEASolver.cpp
@@ -1881,9 +1881,7 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics
     SU2_OMP_BARRIER
 
     /*--- Compute the root mean square residual. ---*/
-    SU2_OMP_MASTER
     SetResidual_RMS(geometry, config);
-    END_SU2_OMP_MASTER
 
     }
     END_SU2_OMP_PARALLEL
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index ba62e146500..6b8361ea23d 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -2153,6 +2153,8 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config)
 
   if (geometry->GetMGLevel() != MESH_0) return;
 
+  SU2_OMP_MASTER {
+
   /*--- Set the L2 Norm residual in all the processors. ---*/
 
   vector<su2double> rbuf_res(nVar);
@@ -2185,30 +2187,36 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config)
 
   /*--- Set the Maximum residual in all the processors. ---*/
 
-  if (config->GetComm_Level() != COMM_FULL) return;
+  if (config->GetComm_Level() == COMM_FULL) {
 
-  const unsigned long nProcessor = size;
+    const unsigned long nProcessor = size;
 
-  su2activematrix rbuf_residual(nProcessor,nVar);
-  su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
-  su2activematrix rbuf_coord(nProcessor*nVar, nDim);
+    su2activematrix rbuf_residual(nProcessor,nVar);
+    su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
+    su2activematrix rbuf_coord(nProcessor*nVar, nDim);
 
-  SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
 
-  for (unsigned short iVar = 0; iVar < nVar; iVar++) {
-    for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
-      AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+    for (unsigned short iVar = 0; iVar < nVar; iVar++) {
+      for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
+        AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+      }
     }
   }
 
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) {
 
   if (geometry->GetMGLevel() != MESH_0) return;
 
+  SU2_OMP_MASTER {
+
   /*--- Set the L2 Norm residual in all the processors. ---*/
 
   vector<su2double> rbuf_res(nVar);
@@ -2220,26 +2228,30 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config)
     Residual_BGS[iVar] = max(EPS*EPS, sqrt(rbuf_res[iVar]/Global_nPointDomain));
   }
 
-  if (config->GetComm_Level() != COMM_FULL) return;
+  if (config->GetComm_Level() == COMM_FULL) {
 
-  /*--- Set the Maximum residual in all the processors. ---*/
+    /*--- Set the Maximum residual in all the processors. ---*/
 
-  const unsigned long nProcessor = size;
+    const unsigned long nProcessor = size;
 
-  su2activematrix rbuf_residual(nProcessor,nVar);
-  su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
-  su2activematrix rbuf_coord(nProcessor*nVar, nDim);
+    su2activematrix rbuf_residual(nProcessor,nVar);
+    su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
+    su2activematrix rbuf_coord(nProcessor*nVar, nDim);
 
-  SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
 
-  for (unsigned short iVar = 0; iVar < nVar; iVar++) {
-    for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
-      AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+    for (unsigned short iVar = 0; iVar < nVar; iVar++) {
+      for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
+        AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+      }
     }
   }
 
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) {
@@ -4089,10 +4101,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
   END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
-  SU2_OMP_MASTER
   SetResidual_BGS(geometry, config);
-  END_SU2_OMP_MASTER
-  SU2_OMP_BARRIER
 
   }
   END_SU2_OMP_PARALLEL
diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp
index 5fa99ec3dd5..acf72dae705 100644
--- a/SU2_CFD/src/solvers/CTurbSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSolver.cpp
@@ -570,10 +570,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver
   SU2_OMP_BARRIER
 
   /*--- Compute the root mean square residual ---*/
-  SU2_OMP_MASTER
   SetResidual_RMS(geometry, config);
-  END_SU2_OMP_MASTER
-  SU2_OMP_BARRIER
 }
 
 void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solver_container, CConfig *config) {

From 218262253bf762f80ab0725d819022357ec5ccb5 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Wed, 17 Mar 2021 19:03:22 +0000
Subject: [PATCH 28/57] try to have less "end master"

---
 .../include/solvers/CFVMFlowSolverBase.hpp    | 37 +++++++-----
 SU2_CFD/src/solvers/CEulerSolver.cpp          | 60 +++++++------------
 SU2_CFD/src/solvers/CIncEulerSolver.cpp       |  5 +-
 SU2_CFD/src/solvers/CNSSolver.cpp             | 15 +----
 4 files changed, 46 insertions(+), 71 deletions(-)

diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
index 2525256259b..0cddca7b081 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
@@ -34,6 +34,15 @@ class CNumericsSIMD;
 
 template <class VariableType, ENUM_REGIME FlowRegime>
 class CFVMFlowSolverBase : public CSolver {
+ private:
+  static void recursiveAssign() {}
+
+  template<class U, class V, class... Ts>
+  static void recursiveAssign(U& d, const V& s, Ts&&... otherPairs) {
+    d = s;
+    recursiveAssign(otherPairs...);
+  }
+
  protected:
   static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */
   static constexpr size_t MAXNVAR = VariableType::MAXNVAR; /*!< \brief Max number of variables, for static arrays. */
@@ -43,6 +52,18 @@ class CFVMFlowSolverBase : public CSolver {
 
   unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */
 
+  /*!
+   * \brief Utility to set the value of a member variables safely, and so that the new values are seen by all threads.
+   * \param[in] lhsRhsPairs - Pairs of destination and source e.g. a,0,b,-1.
+   */
+  template<class... Ts>
+  static void ompMasterAssignBarrier(Ts&&... lhsRhsPairs) {
+    SU2_OMP_MASTER
+    recursiveAssign(lhsRhsPairs...);
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
+  }
+
   su2double Mach_Inf = 0.0;          /*!< \brief Mach number at the infinity. */
   su2double Density_Inf = 0.0;       /*!< \brief Density at the infinity. */
   su2double Energy_Inf = 0.0;        /*!< \brief Energy at the infinity. */
@@ -318,14 +339,7 @@ class CFVMFlowSolverBase : public CSolver {
      *    Critical sections are used for this instead of reduction
      *    clauses for compatibility with OpenMP 2.0 (Windows...). ---*/
 
-    SU2_OMP_MASTER
-    {
-      Min_Delta_Time = 1e30;
-      Max_Delta_Time = 0.0;
-      Global_Delta_UnstTimeND = 1e30;
-    }
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
+    ompMasterAssignBarrier(Min_Delta_Time,1e30, Max_Delta_Time,0.0, Global_Delta_UnstTimeND,1e30);
 
     /*--- Loop domain points. ---*/
 
@@ -981,12 +995,7 @@ class CFVMFlowSolverBase : public CSolver {
     const auto& Gradient_Primitive = nodes->GetGradient_Primitive();
     auto& StrainMag = nodes->GetStrainMag();
 
-    SU2_OMP_MASTER {
-      StrainMag_Max = 0.0;
-      Omega_Max = 0.0;
-    }
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
+    ompMasterAssignBarrier(StrainMag_Max,0.0, Omega_Max,0.0);
 
     su2double strainMax = 0.0, omegaMax = 0.0;
 
diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp
index 0ad59dfa87d..b14f0e2e0e9 100644
--- a/SU2_CFD/src/solvers/CEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CEulerSolver.cpp
@@ -1757,67 +1757,47 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
                                config->GetKind_Upwind_Flow() == SLAU ||
                                config->GetKind_Upwind_Flow() == SLAU2);
 
-  /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/
-
-  if (fixed_cl && !disc_adjoint && !cont_adjoint) {
-    SU2_OMP_MASTER
-    SetFarfield_AoA(geometry, solver_container, config, iMesh, Output);
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-  }
-
   /*--- Set the primitive variables ---*/
 
-  SU2_OMP_MASTER
-  ErrorCounter = 0;
-  END_SU2_OMP_MASTER
-  SU2_OMP_BARRIER
+  ompMasterAssignBarrier(ErrorCounter, 0);
 
   SU2_OMP_ATOMIC
   ErrorCounter += SetPrimitive_Variables(solver_container, config);
+  SU2_OMP_BARRIER
 
-  if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) {
-    SU2_OMP_BARRIER
-    SU2_OMP_MASTER
-    {
+  SU2_OMP_MASTER { /*--- Ops that are not OpenMP parallel go in this block. ---*/
+
+    if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) {
       unsigned long tmp = ErrorCounter;
       SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
       config->SetNonphysical_Points(ErrorCounter);
     }
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-  }
 
-  /*--- Compute the engine properties ---*/
+    /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/
 
-  if (engine) {
-    SU2_OMP_MASTER
-    GetPower_Properties(geometry, config, iMesh, Output);
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-  }
+    if (fixed_cl && !disc_adjoint && !cont_adjoint) {
+      SetFarfield_AoA(geometry, solver_container, config, iMesh, Output);
+    }
+
+    /*--- Compute the engine properties ---*/
 
-  /*--- Compute the actuator disk properties and distortion levels ---*/
+    if (engine) GetPower_Properties(geometry, config, iMesh, Output);
 
-  if (actuator_disk) {
-    SU2_OMP_MASTER
-    {
+    /*--- Compute the actuator disk properties and distortion levels ---*/
+
+    if (actuator_disk) {
       Set_MPI_ActDisk(solver_container, geometry, config);
       GetPower_Properties(geometry, config, iMesh, Output);
       SetActDisk_BCThrust(geometry, solver_container, config, iMesh, Output);
     }
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-  }
 
-  /*--- Compute NearField MPI ---*/
+    /*--- Compute NearField MPI ---*/
+
+    if (nearfield) Set_MPI_Nearfield(geometry, config);
 
-  if (nearfield) {
-    SU2_OMP_MASTER
-    Set_MPI_Nearfield(geometry, config);
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
   }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   /*--- Artificial dissipation ---*/
 
diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
index 9d57320430d..8f12221be2e 100644
--- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
@@ -825,10 +825,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
 
   /*--- Set the primitive variables ---*/
 
-  SU2_OMP_MASTER
-  ErrorCounter = 0;
-  END_SU2_OMP_MASTER
-  SU2_OMP_BARRIER
+  ompMasterAssignBarrier(ErrorCounter, 0);
 
   SU2_OMP_ATOMIC
   ErrorCounter += SetPrimitive_Variables(solver_container, config);
diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp
index 09e78f646ea..4ae2992dd4a 100644
--- a/SU2_CFD/src/solvers/CNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CNSSolver.cpp
@@ -86,13 +86,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C
    turbulence solver, and post) only temperature and velocity are needed ---*/
 
   const auto nPrimVarGrad_bak = nPrimVarGrad;
-  if (Output) {
-    SU2_OMP_BARRIER
-    SU2_OMP_MASTER
-    nPrimVarGrad = 1+nDim;
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-  }
+  if (Output) ompMasterAssignBarrier(nPrimVarGrad, 1+nDim);
 
   if (config->GetReconstructionGradientRequired() && muscl && !center) {
     switch (config->GetKind_Gradient_Method_Recon()) {
@@ -114,12 +108,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C
     SetPrimitive_Gradient_LS(geometry, config);
   }
 
-  if (Output) {
-    SU2_OMP_MASTER
-    nPrimVarGrad = nPrimVarGrad_bak;
-    END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
-  }
+  if (Output) ompMasterAssignBarrier(nPrimVarGrad, nPrimVarGrad_bak);
 
   /*--- Compute the limiters ---*/
 

From 94dafb4f1137c554e221db3e812ee909bcadc1e3 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 01:00:05 +0000
Subject: [PATCH 29/57] omp directives in DiscAdjSolver

---
 SU2_CFD/include/solvers/CDiscAdjSolver.hpp |  40 +-
 SU2_CFD/include/solvers/CSolver.hpp        |  23 --
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp     | 447 +++++++--------------
 SU2_CFD/src/variables/CVariable.cpp        |   6 +
 4 files changed, 160 insertions(+), 356 deletions(-)

diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
index c5b5dcd4138..7379bf3120f 100644
--- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
@@ -38,9 +38,17 @@
  */
 class CDiscAdjSolver final : public CSolver {
 private:
+  static constexpr size_t MAXNDIM = 3;  /*!< \brief Max number of space dimensions, used in some static arrays. */
+  static constexpr size_t MAXNVAR = 32; /*!< \brief Max number of variables, for static arrays. */
+
+  static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */
+
+  unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */
+
   unsigned short KindDirect_Solver;
   CSolver *direct_solver;
-  su2double **CSensitivity;      /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */
+  vector<vector<su2double> > CSensitivity; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */
+  vector<su2double> Sens_Geo;    /*!< \brief Total shape sensitivity for each monitored boundary. */
   su2double Total_Sens_Mach;     /*!< \brief Total mach sensitivity coefficient for all the boundaries. */
   su2double Total_Sens_AoA;      /*!< \brief Total angle of attack sensitivity coefficient for all the boundaries. */
   su2double Total_Sens_Geo;      /*!< \brief Total shape sensitivity coefficient for all the boundaries. */
@@ -52,8 +60,6 @@ class CDiscAdjSolver final : public CSolver {
   su2double Mach, Alpha, Beta, Pressure, Temperature, BPressure, ModVel;
   su2double TemperatureRad, Total_Sens_Temp_Rad;
 
-  su2double *Solution_Geometry; /*!< \brief Auxiliary vector for the geometry solution (dimension nDim instead of nVar). */
-
   CDiscAdjVariable* nodes = nullptr;  /*!< \brief The highest level in the variable hierarchy this solver can safely use. */
 
   /*!
@@ -66,7 +72,7 @@ class CDiscAdjSolver final : public CSolver {
   /*!
    * \brief Constructor of the class.
    */
-  CDiscAdjSolver(void);
+  CDiscAdjSolver() = default;
 
   /*!
    * \overload
@@ -88,7 +94,7 @@ class CDiscAdjSolver final : public CSolver {
   /*!
    * \brief Destructor of the class.
    */
-  ~CDiscAdjSolver(void) override;
+  ~CDiscAdjSolver() override;
 
   /*!
    * \brief Performs the preprocessing of the adjoint AD-based solver.
@@ -115,14 +121,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   void SetAdjoint_Output(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Sets the adjoint values of the output of the mesh deformation iteration
-   *        before evaluation of the tape.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] config - The particular config.
-   */
-  void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) override;
-
   /*!
    * \brief Sets the adjoint values of the input variables of the flow (+turb.) iteration
    *        after tape has been evaluated.
@@ -131,14 +129,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] solver_container - The solver container holding all solutions.
-   * \param[in] config - The particular config.
-   */
-  void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) override;
-
   /*!
    * \brief Set the surface sensitivity.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -225,14 +215,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   void SetRecording(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Prepare the solver for a new recording.
-   * \param[in] kind_recording - Kind of AD recording.
-   */
-  void SetMesh_Recording(CGeometry **geometry,
-                         CVolumetricMovement *grid_movement,
-                         CConfig *config) override;
-
   /*!
    * \brief A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp
index 7071aae9ae8..57b01c685b8 100644
--- a/SU2_CFD/include/solvers/CSolver.hpp
+++ b/SU2_CFD/include/solvers/CSolver.hpp
@@ -3655,13 +3655,6 @@ class CSolver {
    */
   inline virtual void SetAdjoint_Output(CGeometry *geometry, CConfig *config){}
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] config - The particular config.
-   */
-  inline virtual void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) {}
-
   /*!
    * \brief A virtual member.
    * \param[in] geometry - The geometrical definition of the problem.
@@ -3670,14 +3663,6 @@ class CSolver {
    */
   inline virtual void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){}
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] solver_container - The solver container holding all solutions.
-   * \param[in] config - The particular config.
-   */
-  inline virtual void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) {}
-
   /*!
    * \brief  A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -3860,14 +3845,6 @@ class CSolver {
    */
   inline virtual void SetRecording(CGeometry *geometry, CConfig *config){}
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] kind_recording - Kind of AD recording.
-   */
-  inline virtual void SetMesh_Recording(CGeometry **geometry,
-                                        CVolumetricMovement *grid_movement,
-                                        CConfig *config) {}
-
   /*!
    * \brief A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index cb65dae84be..a2e305aa621 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -27,23 +27,12 @@
 
 #include "../../include/solvers/CDiscAdjSolver.hpp"
 #include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
+#include "../../../Common/include/parallelization/omp_structure.hpp"
 
-CDiscAdjSolver::CDiscAdjSolver(void) : CSolver () {
-
-}
-
-CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config)  : CSolver() {
-
-}
+CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config)  : CSolver() {}
 
 CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
 
-  unsigned short iVar, iMarker, iDim;
-  unsigned long iVertex;
-  string text_line, mesh_filename;
-  ifstream restart_file;
-  string filename, AdjExt;
-
   adjoint = true;
 
   nVar = direct_solver->GetnVar();
@@ -51,21 +40,17 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
 
   /*--- Initialize arrays to NULL ---*/
 
-  CSensitivity = nullptr;
-
   /*-- Store some information about direct solver ---*/
   this->KindDirect_Solver = Kind_Solver;
   this->direct_solver = direct_solver;
 
-
   nMarker      = config->GetnMarker_All();
   nPoint       = geometry->GetnPoint();
   nPointDomain = geometry->GetnPointDomain();
 
-  /*--- Define some auxiliary vectors related to the residual ---*/
+  omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE);
 
-  Residual      = new su2double[nVar];         for (iVar = 0; iVar < nVar; iVar++) Residual[iVar]      = 1.0;
-  Solution_Geometry = new su2double[nDim];     for (iDim = 0; iDim < nDim; iDim++) Solution_Geometry[iDim] = 1.0;
+  /*--- Define some auxiliary vectors related to the residual ---*/
 
   Residual_RMS.resize(nVar,1.0);
   Residual_Max.resize(nVar,1.0);
@@ -82,24 +67,16 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
     Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0);
   }
 
-  /*--- Define some auxiliary vectors related to the solution ---*/
-
-  Solution = new su2double[nVar];
-
-  for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16;
-
   /*--- Sensitivity definition and coefficient in all the markers ---*/
 
-  CSensitivity = new su2double* [nMarker];
-
-  for (iMarker = 0; iMarker < nMarker; iMarker++) {
-    unsigned long nVertex = geometry->nVertex[iMarker];
-    CSensitivity[iMarker] = new su2double [nVertex];
-
-    for (iVertex = 0; iVertex < nVertex; iVertex++)
-      CSensitivity[iMarker][iVertex] = 0.0;
+  CSensitivity.resize(nMarker);
+  for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) {
+    const auto nVertex = geometry->nVertex[iMarker];
+    CSensitivity[iMarker].resize(nVertex, 0.0);
   }
 
+  Sens_Geo.resize(config->GetnMarker_Monitoring(), 0.0);
+
   /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/
 
   nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config);
@@ -124,47 +101,41 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
   }
 }
 
-CDiscAdjSolver::~CDiscAdjSolver(void) {
-
-  unsigned short iMarker;
-
-  if (CSensitivity != nullptr) {
-    for (iMarker = 0; iMarker < nMarker; iMarker++) {
-      delete [] CSensitivity[iMarker];
-    }
-    delete [] CSensitivity;
-  }
-
-  delete nodes;
-}
+CDiscAdjSolver::~CDiscAdjSolver(void) { delete nodes; }
 
 void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){
 
-  bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND;
-  bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
+  const bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND;
+  const bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
 
   unsigned long iPoint;
   unsigned short iVar;
 
   /*--- Reset the solution to the initial (converged) solution ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (iPoint = 0; iPoint < nPoint; iPoint++) {
     direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint));
   }
+  END_SU2_OMP_FOR
 
   if (time_n_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPoint; iPoint++) {
       for (iVar = 0; iVar < nVar; iVar++) {
         AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint)[iVar]);
       }
     }
+    END_SU2_OMP_FOR
   }
   if (time_n1_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPoint; iPoint++) {
       for (iVar = 0; iVar < nVar; iVar++) {
         AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint)[iVar]);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Set the Jacobian to zero since this is not done inside the fluid iteration
@@ -178,64 +149,12 @@ void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){
 
 }
 
-void CDiscAdjSolver::SetMesh_Recording(CGeometry** geometry, CVolumetricMovement *grid_movement, CConfig *config) {
-
-
-//  bool time_n_needed  = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) ||
-//      (config->GetUnsteady_Simulation() == DT_STEPPING_2ND)),
-//  time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND;
-
-//  unsigned long ExtIter = config->GetExtIter();
-
-  unsigned long iPoint;
-  unsigned short iDim;
-
-  /*--- Reset the solution to the initial (converged) position ---*/
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    for (iDim = 0; iDim < nDim; iDim++){
-      geometry[MESH_0]->nodes->SetCoord(iPoint, iDim,nodes->GetGeometry_Direct(iPoint,iDim));
-    }
-  }
-
-  /*--- After moving all nodes, update the dual mesh. Recompute the edges and
-   dual mesh control volumes in the domain and on the boundaries. ---*/
-
-  grid_movement->UpdateDualGrid(geometry[MESH_0], config);
-
-  /*--- After updating the dual mesh, compute the grid velocities (only dynamic problems). ---*/
-//  if (time_n_needed){
-//    geometry[MESH_0]->SetGridVelocity(config, ExtIter);
-//  }
-
-  /*--- Update the multigrid structure after moving the finest grid,
-   including computing the grid velocities on the coarser levels. ---*/
-
-  grid_movement->UpdateMultiGrid(geometry, config);
-
-//  if (time_n_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//      for (iVar = 0; iVar < nVar; iVar++){
-//        AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint,iVar));
-//      }
-//    }
-//  }
-//  if (time_n1_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//      for (iVar = 0; iVar < nVar; iVar++){
-//        AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint,iVar));
-//      }
-//    }
-//  }
-
-}
-
 void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) {
 
-  bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND);
-  bool time_n_needed  = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
-  bool input          = true;
-  bool push_index     = !config->GetMultizone_Problem();
+  const bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND);
+  const bool time_n_needed  = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
+  const bool input          = true;
+  const bool push_index     = !config->GetMultizone_Problem();
 
   /*--- Register solution at all necessary time instances and other variables on the tape ---*/
 
@@ -250,6 +169,8 @@ void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) {
 
 void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset) {
 
+  SU2_OMP_MASTER {
+
   /*--- Register farfield values as input ---*/
 
   if((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS && !config->GetBoolTurbomachinery())) {
@@ -363,12 +284,16 @@ void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, boo
   /*--- Here it is possible to register other variables as input that influence the flow solution
    * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be
    * extracted in the ExtractAdjointVariables routine. ---*/
+
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CDiscAdjSolver::RegisterOutput(CGeometry *geometry, CConfig *config) {
 
-  bool input        = false;
-  bool push_index   = !config->GetMultizone_Problem();
+  const bool input        = false;
+  const bool push_index   = !config->GetMultizone_Problem();
 
   /*--- Register variables as output of the solver iteration ---*/
 
@@ -383,14 +308,21 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
 
   const su2double relax = (config->GetInnerIter()==0)? 1.0 : config->GetRelaxation_Factor_Adjoint();
 
+  su2double Solution[MAXNVAR] = {0.0};
+
   /*--- Set Residuals to zero ---*/
 
   SetResToZero();
 
+  su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0};
+  const su2double* coordMax[MAXNVAR] = {nullptr};
+  unsigned long idxMax[MAXNVAR] = {0};
+
   /*--- Set the old solution and compute residuals. ---*/
 
   if(!multizone) nodes->Set_OldSolution();
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
     const su2double isdomain = (iPoint < nPointDomain)? 1.0 : 0.0;
@@ -413,15 +345,37 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
       residual *= isdomain;
       Residual_RMS[iVar] += pow(residual,2);
       AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint));
+
+      /*--- Update residual information for current thread. ---*/
+      resRMS[iVar] += residual*residual;
+      if (fabs(residual) > resMax[iVar]) {
+        resMax[iVar] = fabs(residual);
+        idxMax[iVar] = iPoint;
+        coordMax[iVar] = geometry->nodes->GetCoord(iPoint);
+      }
     }
   }
+  END_SU2_OMP_FOR
+
+  /*--- Reduce residual information over all threads in this rank. ---*/
+  SU2_OMP_CRITICAL
+  for (auto iVar = 0u; iVar < nVar; iVar++) {
+    Residual_RMS[iVar] += resRMS[iVar];
+    AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
+  }
+  END_SU2_OMP_CRITICAL
+  SU2_OMP_BARRIER
 
   SetResidual_RMS(geometry, config);
 
-  SetIterLinSolver(direct_solver->System.GetIterations());
-  SetResLinSolver(direct_solver->System.GetResidual());
+  SU2_OMP_MASTER {
+    SetIterLinSolver(direct_solver->System.GetIterations());
+    SetResLinSolver(direct_solver->System.GetResidual());
+  }
+  END_SU2_OMP_MASTER
 
   if (time_n_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
       /*--- Extract the adjoint solution at time n ---*/
@@ -432,9 +386,11 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
 
       nodes->Set_Solution_time_n(iPoint,Solution);
     }
+    END_SU2_OMP_FOR
   }
 
   if (time_n1_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
       /*--- Extract the adjoint solution at time n-1 ---*/
@@ -445,12 +401,15 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
 
       nodes->Set_Solution_time_n1(iPoint,Solution);
     }
+    END_SU2_OMP_FOR
   }
 
 }
 
 void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) {
 
+  SU2_OMP_MASTER {
+
   /*--- Extract the adjoint values of the farfield values ---*/
 
   if ((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS) && !config->GetBoolTurbomachinery()) {
@@ -508,98 +467,25 @@ void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *conf
 
   /*--- Extract here the adjoint values of everything else that is registered as input in RegisterInput. ---*/
 
-}
-
-
-void CDiscAdjSolver::ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) {
-
-//  bool time_n_needed  = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) ||
-//      (config->GetUnsteady_Simulation() == DT_STEPPING_2ND));
-
-//  bool time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND;
-
-//  unsigned short iVar;
-  unsigned long iPoint;
-
-  /*--- Set Residuals to zero ---*/
-
-//  for (iVar = 0; iVar < nVar; iVar++){
-//      SetRes_RMS(iVar,0.0);
-//      SetRes_Max(iVar,0.0,0);
-//  }
-
-  /*--- Set the old solution ---*/
-
-  nodes->Set_OldSolution_Geometry();
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-
-    /*--- Extract the adjoint solution ---*/
-
-    if (config->GetMultizone_Problem())
-      geometry->nodes->GetAdjointCoord_LocalIndex(iPoint, Solution_Geometry);
-    else
-      geometry->nodes->GetAdjointCoord(iPoint, Solution_Geometry);
-
-    /*--- Store the adjoint solution ---*/
-
-    nodes->SetSolution_Geometry(iPoint,Solution_Geometry);
-
-  }
-
-//  if (time_n_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//
-//      /*--- Extract the adjoint solution at time n ---*/
-//
-//      direct_solver->GetNodes()->GetAdjointSolution_time_n(iPoint,Solution);
-//
-//      /*--- Store the adjoint solution at time n ---*/
-//
-//      nodes->Set_Solution_time_n(iPoint,Solution);
-//    }
-//  }
-//  if (time_n1_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//
-//      /*--- Extract the adjoint solution at time n-1 ---*/
-//
-//      direct_solver->GetNodes()->GetAdjointSolution_time_n1(iPoint,Solution);
-//
-//      /*--- Store the adjoint solution at time n-1 ---*/
-//
-//      nodes->Set_Solution_time_n1(iPoint,Solution);
-//    }
-//  }
-
-  /*--- Set the residuals ---*/
-
-//  for (iPoint = 0; iPoint < nPointDomain; iPoint++){
-//      for (iVar = 0; iVar < nVar; iVar++){
-//          residual = node[iPoint]->GetSolution_Geometry(iVar) - node[iPoint]->Get_OldSolution_Geometry(iVar);
-//
-//          Residual_RMS[iVar] += residual*residual;
-//          AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint));
-//      }
-//  }
-//
-//  SetResidual_RMS(geometry, config);
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) {
 
-  bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST ||
-                    config->GetTime_Marching() == DT_STEPPING_2ND);
+  const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST ||
+                          config->GetTime_Marching() == DT_STEPPING_2ND);
 
-  unsigned short iVar;
-  unsigned long iPoint;
+  su2double Solution[MAXNVAR] = {0.0};
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++) {
-    for (iVar = 0; iVar < nVar; iVar++) {
+  SU2_OMP_FOR_STAT(omp_chunk_size)
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
+    for (auto iVar = 0u; iVar < nVar; iVar++) {
       Solution[iVar] = nodes->GetSolution(iPoint,iVar);
     }
     if (dual_time) {
-      for (iVar = 0; iVar < nVar; iVar++) {
+      for (auto iVar = 0u; iVar < nVar; iVar++) {
         Solution[iVar] += nodes->GetDual_Time_Derivative(iPoint,iVar);
       }
     }
@@ -610,45 +496,22 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) {
       direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution);
     }
   }
-}
-
-void CDiscAdjSolver::SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config){
-
-//  bool dual_time = (config->GetUnsteady_Simulation() == DT_STEPPING_1ST ||
-//      config->GetUnsteady_Simulation() == DT_STEPPING_2ND);
-
-  unsigned short iDim;
-  unsigned long iPoint;
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    for (iDim = 0; iDim < nDim; iDim++){
-      Solution_Geometry[iDim] = 0.0;
-    }
-//    if (dual_time){
-//      for (iDim = 0; iDim < nVar; iDim++){
-//        Solution_Geometry[iDim] += nodes->GetDual_Time_Derivative_Geometry(iPoint,iDim);
-//      }
-//    }
-    for (iDim = 0; iDim < nDim; iDim++){
-      nodes->SetSensitivity(iPoint,iDim, Solution_Geometry[iDim]);
-    }
-    geometry->nodes->SetAdjointCoord(iPoint, Solution_Geometry);
-  }
-
+  END_SU2_OMP_FOR
 }
 
 void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) {
 
-  unsigned long iPoint;
-  unsigned short iDim;
-  su2double *Coord, Sensitivity, eps;
+  const bool time_stepping = (config->GetTime_Marching() != STEADY);
+  const su2double eps = config->GetVenkat_LimiterCoeff()*config->GetAdjSharp_LimiterCoeff();
 
-  bool time_stepping = (config->GetTime_Marching() != STEADY);
+  SU2_OMP_FOR_STAT(omp_chunk_size)
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++) {
-    Coord = geometry->nodes->GetCoord(iPoint);
+    auto Coord = geometry->nodes->GetCoord(iPoint);
+
+    for (auto iDim = 0u; iDim < nDim; iDim++) {
 
-    for (iDim = 0; iDim < nDim; iDim++) {
+      su2double Sensitivity = 0.0;
 
       if(config->GetMultizone_Problem()) {
         Sensitivity = geometry->nodes->GetAdjointSolution(iPoint, iDim);
@@ -663,119 +526,100 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve
 
       /*--- If sharp edge, set the sensitivity to 0 on that region ---*/
 
-      if (config->GetSens_Remove_Sharp()) {
-        eps = config->GetVenkat_LimiterCoeff()*config->GetRefElemLength();
-        if ( geometry->nodes->GetSharpEdge_Distance(iPoint) < config->GetAdjSharp_LimiterCoeff()*eps )
-          Sensitivity = 0.0;
+      if (config->GetSens_Remove_Sharp() && geometry->nodes->GetSharpEdge_Distance(iPoint) < eps) {
+        Sensitivity = 0.0;
       }
+
       if (!time_stepping) {
         nodes->SetSensitivity(iPoint,iDim, Sensitivity);
       } else {
-        nodes->SetSensitivity(iPoint, iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity);
+        nodes->SetSensitivity(iPoint,iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity);
       }
     }
   }
+  END_SU2_OMP_FOR
+
   SetSurface_Sensitivity(geometry, config);
+
 }
 
 void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) {
-  unsigned short iMarker, iDim, iMarker_Monitoring;
-  unsigned long iVertex, iPoint;
-  su2double *Normal, Prod, Sens = 0.0, SensDim, Area, Sens_Vertex, *Sens_Geo;
-  Total_Sens_Geo = 0.0;
-  string Monitoring_Tag, Marker_Tag;
 
-  Sens_Geo = new su2double[config->GetnMarker_Monitoring()];
-  for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-    Sens_Geo[iMarker_Monitoring] = 0.0;
-  }
+  SU2_OMP_MASTER
+  for (auto& x : Sens_Geo) x = 0.0;
+  END_SU2_OMP_MASTER
 
-  for (iMarker = 0; iMarker < nMarker; iMarker++) {
+  /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/
 
-    /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/
+  for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) {
 
-    if(config->GetSolid_Wall(iMarker)) {
+    if (!config->GetSolid_Wall(iMarker)) continue;
 
-      Sens = 0.0;
+    su2double Sens = 0.0;
 
-      for (iVertex = 0; iVertex < geometry->GetnVertex(iMarker); iVertex++) {
+    SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
+    for (auto iVertex = 0ul; iVertex < geometry->GetnVertex(iMarker); iVertex++) {
 
-        iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
-        Normal = geometry->vertex[iMarker][iVertex]->GetNormal();
-        Prod = 0.0;
-        for (iDim = 0; iDim < nDim; iDim++) {
-          /*--- retrieve the gradient calculated with AD -- */
-          SensDim = nodes->GetSensitivity(iPoint,iDim);
+      /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/
 
-          /*--- calculate scalar product for projection onto the normal vector ---*/
-          Prod += Normal[iDim]*SensDim;
-
-        }
-
-        Area = GeometryToolbox::Norm(nDim, Normal);
+      const auto iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
+      const auto Normal = geometry->vertex[iMarker][iVertex]->GetNormal();
 
+      su2double Sens_Vertex = 0.0;
+      for (auto iDim = 0u; iDim < nDim; iDim++) {
+        Sens_Vertex += Normal[iDim] * nodes->GetSensitivity(iPoint,iDim);
+      }
+      Sens_Vertex /= GeometryToolbox::Norm(nDim, Normal);
 
-        /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/
+      CSensitivity[iMarker][iVertex] = -Sens_Vertex;
+      Sens += pow(Sens_Vertex,2);
+    }
+    END_SU2_OMP_FOR
 
-        Sens_Vertex = Prod/Area;
-        CSensitivity[iMarker][iVertex] = -Sens_Vertex;
-        Sens += Sens_Vertex*Sens_Vertex;
-      }
+    if (config->GetMarker_All_Monitoring(iMarker) == NO) continue;
 
-      if (config->GetMarker_All_Monitoring(iMarker) == YES){
+    /*--- Compute sensitivity for each surface point ---*/
 
-        /*--- Compute sensitivity for each surface point ---*/
+    const auto Marker_Tag = config->GetMarker_All_TagBound(iMarker);
 
-        for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-          Monitoring_Tag = config->GetMarker_Monitoring_TagBound(iMarker_Monitoring);
-          Marker_Tag = config->GetMarker_All_TagBound(iMarker);
-          if (Marker_Tag == Monitoring_Tag) {
-            Sens_Geo[iMarker_Monitoring] = Sens;
-          }
-        }
+    for (size_t iMarker_Mon = 0; iMarker_Mon < Sens_Geo.size(); iMarker_Mon++) {
+      if (Marker_Tag == config->GetMarker_Monitoring_TagBound(iMarker_Mon)) {
+        atomicAdd(Sens_Geo[iMarker_Mon], Sens);
+        break;
       }
     }
   }
 
-#ifdef HAVE_MPI
-  su2double *MySens_Geo;
-  MySens_Geo = new su2double[config->GetnMarker_Monitoring()];
+  SU2_OMP_MASTER {
+    auto local = Sens_Geo;
+    SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
 
-  for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-    MySens_Geo[iMarker_Monitoring] = Sens_Geo[iMarker_Monitoring];
-    Sens_Geo[iMarker_Monitoring]   = 0.0;
+    Total_Sens_Geo = 0.0;
+    for (auto x : Sens_Geo) Total_Sens_Geo += x;
   }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
-  SU2_MPI::Allreduce(MySens_Geo, Sens_Geo, config->GetnMarker_Monitoring(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-  delete [] MySens_Geo;
-#endif
+}
 
-  for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-    Sens_Geo[iMarker_Monitoring] = sqrt(Sens_Geo[iMarker_Monitoring]);
-    Total_Sens_Geo   += Sens_Geo[iMarker_Monitoring];
-  }
+void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh,
+                                   unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) {
 
-  delete [] Sens_Geo;
+  const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST) || (config->GetTime_Marching() == DT_STEPPING_2ND);
 
-}
+  if (!dual_time) return;
 
-void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) {
-  bool dual_time_1st = (config_container->GetTime_Marching() == DT_STEPPING_1ST);
-  bool dual_time_2nd = (config_container->GetTime_Marching() == DT_STEPPING_2ND);
-  bool dual_time = (dual_time_1st || dual_time_2nd);
-  su2double *solution_n, *solution_n1;
-  unsigned long iPoint;
-  unsigned short iVar;
-  if (dual_time) {
-    for (iPoint = 0; iPoint<geometry->GetnPoint(); iPoint++) {
-      solution_n = nodes->GetSolution_time_n(iPoint);
-      solution_n1 = nodes->GetSolution_time_n1(iPoint);
-      for (iVar=0; iVar < nVar; iVar++) {
-        nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar));
-        nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]);
-      }
+  SU2_OMP_FOR_STAT(omp_chunk_size)
+  for (auto iPoint = 0ul; iPoint<geometry->GetnPoint(); iPoint++) {
+    const auto solution_n = nodes->GetSolution_time_n(iPoint);
+    const auto solution_n1 = nodes->GetSolution_time_n1(iPoint);
+
+    for (auto iVar = 0u; iVar < nVar; iVar++) {
+      nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar));
+      nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
@@ -812,7 +656,6 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi
   /*--- Read all lines in the restart file ---*/
 
   long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0;
-  unsigned short rbuf_NotMatching = 0, sbuf_NotMatching = 0;
 
   /*--- Skip coordinates ---*/
   unsigned short skipVars = geometry[MESH_0]->GetnDim();
@@ -862,11 +705,7 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi
 
   /*--- Detect a wrong solution file ---*/
 
-  if (iPoint_Global_Local < nPointDomain) { sbuf_NotMatching = 1; }
-
-  SU2_MPI::Allreduce(&sbuf_NotMatching, &rbuf_NotMatching, 1, MPI_UNSIGNED_SHORT, MPI_SUM, SU2_MPI::GetComm());
-
-  if (rbuf_NotMatching != 0) {
+  if (iPoint_Global_Local != nPointDomain) {
     SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
                    string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
   }
diff --git a/SU2_CFD/src/variables/CVariable.cpp b/SU2_CFD/src/variables/CVariable.cpp
index 5d16271961d..7da4faee991 100644
--- a/SU2_CFD/src/variables/CVariable.cpp
+++ b/SU2_CFD/src/variables/CVariable.cpp
@@ -113,6 +113,7 @@ void CVariable::Restore_BGSSolution_k() {
 void CVariable::SetExternalZero() { parallelSet(External.size(), 0.0, External.data()); }
 
 void CVariable::RegisterSolution(bool input, bool push_index) {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint) {
     for(unsigned long iVar=0; iVar<nVar; ++iVar) {
       if(input) {
@@ -131,16 +132,21 @@ void CVariable::RegisterSolution(bool input, bool push_index) {
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CVariable::RegisterSolution_time_n() {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint)
     for(unsigned long iVar=0; iVar<nVar; ++iVar)
       AD::RegisterInput(Solution_time_n(iPoint,iVar));
+  END_SU2_OMP_FOR
 }
 
 void CVariable::RegisterSolution_time_n1() {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint)
     for(unsigned long iVar=0; iVar<nVar; ++iVar)
       AD::RegisterInput(Solution_time_n1(iPoint,iVar));
+  END_SU2_OMP_FOR
 }

From 8eb30941d8535dfefd3e67c2bf55534c83e9dc39 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 10:56:10 +0000
Subject: [PATCH 30/57] fixes

---
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp | 78 +++++++++++---------------
 1 file changed, 32 insertions(+), 46 deletions(-)

diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index a2e305aa621..2300e521a71 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -79,6 +79,8 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
 
   /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/
 
+  su2double Solution[MAXNVAR] = {1e-16};
+
   nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config);
   SetBaseClassPointerToNodes();
 
@@ -624,20 +626,12 @@ void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_contain
 
 void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
 
-  unsigned short iVar, iMesh;
-  unsigned long iPoint, index, iChildren, Point_Fine, counter;
-  su2double Area_Children, Area_Parent, *Solution_Fine;
-  string restart_filename, filename;
-
-  bool compressible = (config->GetKind_Regime() == COMPRESSIBLE);
-  bool incompressible = (config->GetKind_Regime() == INCOMPRESSIBLE);
-  bool rans = ((config->GetKind_Solver() == DISC_ADJ_RANS) || (config->GetKind_Solver() == DISC_ADJ_INC_RANS)) ;
+  const bool rans = (config->GetKind_Turb_Model() != NONE);
 
   /*--- Restart the solution from file information ---*/
 
-  filename = config->GetSolution_AdjFileName();
-  restart_filename = config->GetObjFunc_Extension(filename);
-
+  auto filename = config->GetSolution_AdjFileName();
+  auto restart_filename = config->GetObjFunc_Extension(filename);
   restart_filename = config->GetFilename(restart_filename, "", val_iter);
 
 
@@ -653,52 +647,43 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi
     Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename);
   }
 
-  /*--- Read all lines in the restart file ---*/
-
-  long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0;
-
   /*--- Skip coordinates ---*/
   unsigned short skipVars = geometry[MESH_0]->GetnDim();
 
   /*--- Skip flow adjoint variables ---*/
   if (KindDirect_Solver== RUNTIME_TURB_SYS) {
-    if (compressible) {
-      skipVars += nDim + 2;
-    }
-    if (incompressible) {
-      skipVars += nDim + 2;
-    }
+    skipVars += nDim + 2;
   }
 
   /*--- Skip flow adjoint and turbulent variables ---*/
   if (KindDirect_Solver == RUNTIME_RADIATION_SYS) {
-    if (compressible) skipVars += nDim + 2;
-    if (incompressible) skipVars += nDim + 2;
+    skipVars += nDim + 2;
     if (rans) skipVars += solver[MESH_0][TURB_SOL]->GetnVar();
   }
 
   /*--- Load data from the restart into correct containers. ---*/
 
-  counter = 0;
-  for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) {
+  unsigned long iPoint_Global_Local = 0;
+
+  for (auto iPoint_Global = 0ul; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) {
 
     /*--- Retrieve local index. If this node from the restart file lives
      on the current processor, we will load and instantiate the vars. ---*/
 
-    iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global);
+    const auto iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global);
 
     if (iPoint_Local > -1) {
 
       /*--- We need to store this point's data, so jump to the correct
        offset in the buffer of data from the restart file and load it. ---*/
 
-      index = counter*Restart_Vars[1] + skipVars;
-      for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar];
-      nodes->SetSolution(iPoint_Local,Solution);
-      iPoint_Global_Local++;
+      const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars;
 
-      /*--- Increment the overall counter for how many points have been loaded. ---*/
-      counter++;
+      for (auto iVar = 0u; iVar < nVar; iVar++) {
+        nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]);
+      }
+
+      iPoint_Global_Local++;
     }
 
   }
@@ -710,20 +695,21 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi
                    string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
   }
 
-  /*--- Communicate the loaded solution on the fine grid before we transfer
-   it down to the coarse levels. ---*/
-
-  for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) {
-    for (iPoint = 0; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) {
-      Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint);
-      for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 0.0;
-      for (iChildren = 0; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) {
-        Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren);
-        Area_Children = geometry[iMesh-1]->nodes->GetVolume(Point_Fine);
-        Solution_Fine = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution(Point_Fine);
-        for (iVar = 0; iVar < nVar; iVar++) {
-          Solution[iVar] += Solution_Fine[iVar]*Area_Children/Area_Parent;
-        }
+  /*--- Interpolate solution on coarse grids ---*/
+
+  for (auto iMesh = 1u; iMesh <= config->GetnMGLevels(); iMesh++) {
+
+    const auto& fineSol = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution();
+
+    for (auto iPoint = 0ul; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) {
+      su2double Solution[MAXNVAR] = {0.0};
+      const su2double Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint);
+
+      for (auto iChildren = 0u; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) {
+        const auto Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren);
+        const su2double weight = geometry[iMesh-1]->nodes->GetVolume(Point_Fine) / Area_Parent;
+
+        for (auto iVar = 0u; iVar < nVar; iVar++) Solution[iVar] += weight * fineSol(Point_Fine, iVar);
       }
       solver[iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution(iPoint, Solution);
     }

From a7fbcd648a4bf8f15e86881f81b4af731afd5bb4 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 11:02:42 +0000
Subject: [PATCH 31/57] more year updates

---
 Common/include/code_config.hpp                 |  2 +-
 .../include/parallelization/omp_structure.cpp  |  2 +-
 Common/lib/Makefile.am                         |  2 +-
 .../CMMSIncEulerSolution.py                    | 18 ++++--------------
 .../CreateMMSSourceTerms/CMMSIncNSSolution.py  | 18 ++++--------------
 SU2_CFD/obj/Makefile.am                        |  2 +-
 6 files changed, 12 insertions(+), 32 deletions(-)

diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index a9aabf17bca..3cbad21f08f 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -9,7 +9,7 @@
  * The SU2 Project is maintained by the SU2 Foundation
  * (http://su2foundation.org)
  *
- * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+ * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
  *
  * SU2 is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp
index 0daca1ca021..6432b6bb482 100644
--- a/Common/include/parallelization/omp_structure.cpp
+++ b/Common/include/parallelization/omp_structure.cpp
@@ -10,7 +10,7 @@
  * The SU2 Project is maintained by the SU2 Foundation
  * (http://su2foundation.org)
  *
- * Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+ * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
  *
  * SU2 is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
diff --git a/Common/lib/Makefile.am b/Common/lib/Makefile.am
index 2e698b72336..813299d0f77 100644
--- a/Common/lib/Makefile.am
+++ b/Common/lib/Makefile.am
@@ -10,7 +10,7 @@
 # The SU2 Project is maintained by the SU2 Foundation 
 # (http://su2foundation.org)
 #
-# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py
index cbf999f412e..1d4d187fb83 100755
--- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py
+++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py
@@ -6,22 +6,12 @@
 #  \author T. Economon
 #  \version 7.1.1 "Blackbird"
 #
-# The current SU2 release has been coordinated by the
-# SU2 International Developers Society <www.su2devsociety.org>
-# with selected contributions from the open-source community.
+# SU2 Project Website: https://su2code.github.io
 #
-# The main research teams contributing to the current release are:
-#  - Prof. Juan J. Alonso's group at Stanford University.
-#  - Prof. Piero Colonna's group at Delft University of Technology.
-#  - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology.
-#  - Prof. Alberto Guardone's group at Polytechnic University of Milan.
-#  - Prof. Rafael Palacios' group at Imperial College London.
-#  - Prof. Vincent Terrapon's group at the University of Liege.
-#  - Prof. Edwin van der Weide's group at the University of Twente.
-#  - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics.
+# The SU2 Project is maintained by the SU2 Foundation
+# (http://su2foundation.org)
 #
-# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon,
-#                      Tim Albring, and the SU2 contributors.
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py
index 614c458c103..c38335336aa 100755
--- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py
+++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py
@@ -6,22 +6,12 @@
 #  \author T. Economon
 #  \version 7.1.1 "Blackbird"
 #
-# The current SU2 release has been coordinated by the
-# SU2 International Developers Society <www.su2devsociety.org>
-# with selected contributions from the open-source community.
+# SU2 Project Website: https://su2code.github.io
 #
-# The main research teams contributing to the current release are:
-#  - Prof. Juan J. Alonso's group at Stanford University.
-#  - Prof. Piero Colonna's group at Delft University of Technology.
-#  - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology.
-#  - Prof. Alberto Guardone's group at Polytechnic University of Milan.
-#  - Prof. Rafael Palacios' group at Imperial College London.
-#  - Prof. Vincent Terrapon's group at the University of Liege.
-#  - Prof. Edwin van der Weide's group at the University of Twente.
-#  - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics.
+# The SU2 Project is maintained by the SU2 Foundation
+# (http://su2foundation.org)
 #
-# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon,
-#                      Tim Albring, and the SU2 contributors.
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
diff --git a/SU2_CFD/obj/Makefile.am b/SU2_CFD/obj/Makefile.am
index 30e7636a0e5..054df6fa267 100644
--- a/SU2_CFD/obj/Makefile.am
+++ b/SU2_CFD/obj/Makefile.am
@@ -10,7 +10,7 @@
 # The SU2 Project is maintained by the SU2 Foundation 
 # (http://su2foundation.org)
 #
-# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public

From 2776775bb64c68ffdc3ad935beac5de338a19627 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 11:35:25 +0000
Subject: [PATCH 32/57] dead code

---
 .../iteration/CDiscAdjFEAIteration.hpp        | 20 -----
 SU2_CFD/include/solvers/CDiscAdjSolver.hpp    |  7 --
 .../src/iteration/CDiscAdjFEAIteration.cpp    | 81 -------------------
 .../src/iteration/CDiscAdjFluidIteration.cpp  |  4 -
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp        |  2 -
 5 files changed, 114 deletions(-)

diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
index 82b2485ffef..d17797c4f41 100644
--- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
@@ -180,26 +180,6 @@ class CDiscAdjFEAIteration : public CIteration {
   void InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone,
                          unsigned short iInst) override;
 
-  /*!
-   * \brief Record a single iteration of the direct FEM system.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   * \param[in] kind_recording - The kind of recording (geometry or flow).
-   */
-  void SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-                    CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-                    CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                    unsigned short val_iInst, unsigned short kind_recording);
-
   /*!
    * \brief Record a single iteration of the direct FEM system.
    * \param[in] solver - Container vector with all the solutions.
diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
index 7c68619a8d8..d7f94a6d316 100644
--- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
@@ -74,13 +74,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   CDiscAdjSolver() = default;
 
-  /*!
-   * \overload
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   */
-  CDiscAdjSolver(CGeometry *geometry, CConfig *config);
-
   /*!
    * \overload
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
index 1d2af031c25..fa753a598a7 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
@@ -176,87 +176,6 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration
   }
 }
 
-void CDiscAdjFEAIteration::SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                        CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                        CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst,
-                                        unsigned short kind_recording) {
-  unsigned long InnerIter = config[ZONE_0]->GetInnerIter();
-  unsigned long TimeIter = config[val_iZone]->GetTimeIter(), DirectTimeIter;
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
-
-  DirectTimeIter = 0;
-  if (dynamic) {
-    DirectTimeIter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1;
-  }
-
-  /*--- Reset the tape ---*/
-
-  AD::Reset();
-
-  /*--- We only need to reset the indices if the current recording is different from the recording we want to have ---*/
-
-  if (CurrentRecording != kind_recording && (CurrentRecording != NONE)) {
-    solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0],
-                                                                   config[val_iZone]);
-
-    /*--- Clear indices of coupling variables ---*/
-
-    SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, SOLUTION_AND_MESH);
-
-    /*--- Run one iteration while tape is passive - this clears all indices ---*/
-
-    fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement,
-                           FFDBox, val_iZone, val_iInst);
-  }
-
-  /*--- Prepare for recording ---*/
-
-  solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0],
-                                                                 config[val_iZone]);
-
-  /*--- Start the recording of all operations ---*/
-
-  AD::StartRecording();
-
-  /*--- Register FEA variables ---*/
-
-  RegisterInput(solver, geometry, config, val_iZone, val_iInst, kind_recording);
-
-  /*--- Compute coupling or update the geometry ---*/
-
-  SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, kind_recording);
-
-  /*--- Set the correct direct iteration number ---*/
-
-  if (dynamic) {
-    config[val_iZone]->SetTimeIter(DirectTimeIter);
-  }
-
-  /*--- Run the direct iteration ---*/
-
-  fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement,
-                         FFDBox, val_iZone, val_iInst);
-
-  config[val_iZone]->SetTimeIter(TimeIter);
-
-  /*--- Register structural variables and objective function as output ---*/
-
-  RegisterOutput(solver, geometry, config, val_iZone, val_iInst);
-
-  /*--- Stop the recording ---*/
-
-  AD::StopRecording();
-
-  /*--- Set the recording status ---*/
-
-  CurrentRecording = kind_recording;
-
-  /* --- Reset the number of the internal iterations---*/
-
-  config[ZONE_0]->SetInnerIter(InnerIter);
-}
-
 void CDiscAdjFEAIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                         unsigned short val_iZone, unsigned short val_iInst,
                                         unsigned short kind_recording) {
diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 59b212c8b9e..5eb54162866 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -443,10 +443,6 @@ void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geo
 
   /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/
 
-  if (solver[iZone][iInst][MESH_0][ADJFEA_SOL]) {
-    solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[iZone][iInst][MESH_0], config[iZone]);
-  }
-
   for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
     solver[iZone][iInst][iMesh][ADJFLOW_SOL]->SetRecording(geometry[iZone][iInst][iMesh], config[iZone]);
   }
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index 09fd7c2cabf..f96765a9216 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -29,8 +29,6 @@
 #include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
 #include "../../../Common/include/parallelization/omp_structure.hpp"
 
-CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config)  : CSolver() {}
-
 CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
 
   adjoint = true;

From 74f20c479afc301a6ca090b8f188e2e986f1c18e Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 12:02:41 +0000
Subject: [PATCH 33/57] more cleanup

---
 .../iteration/CDiscAdjFEAIteration.hpp        |  26 ++--
 .../iteration/CDiscAdjFluidIteration.hpp      |  45 ++----
 .../iteration/CDiscAdjHeatIteration.hpp       |  51 ++-----
 SU2_CFD/include/iteration/CIteration.hpp      |   6 -
 SU2_CFD/include/solvers/CDiscAdjSolver.hpp    |   8 ++
 .../src/iteration/CDiscAdjFluidIteration.cpp  | 135 +++++++++---------
 .../src/iteration/CDiscAdjHeatIteration.cpp   |  10 --
 7 files changed, 118 insertions(+), 163 deletions(-)

diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
index d17797c4f41..c613d449bfc 100644
--- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
@@ -37,11 +37,23 @@ class CFEAIteration;
  * \brief Class for driving an iteration of the discrete adjoint FEM system.
  * \author R. Sanchez
  */
-class CDiscAdjFEAIteration : public CIteration {
+class CDiscAdjFEAIteration final : public CIteration {
  private:
   CFEAIteration* fem_iteration;    /*!< \brief Pointer to the primal iteration class. */
   unsigned short CurrentRecording; /*!< \brief Stores the current status of the recording. */
 
+  /*!
+   * \brief load solution for dynamic problems
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] solver - Container vector with all the solutions.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] val_iZone - Index of the zone.
+   * \param[in] val_iInst - Index of the instance.
+   * \param[in] val_DirectIter - Direct iteration to load.
+   */
+  void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
+                            unsigned short val_iInst, int val_DirectIter);
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -189,7 +201,6 @@ class CDiscAdjFEAIteration : public CIteration {
    * \param[in] val_iInst - Index of the instance.
    * \param[in] kind_recording - The kind of recording (geometry or flow).
    */
-
   void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone,
                     unsigned short val_iInst, unsigned short kind_recording) override;
 
@@ -206,15 +217,4 @@ class CDiscAdjFEAIteration : public CIteration {
   void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config,
                        unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override;
 
-  /*!
-   * \brief load solution for dynamic problems
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   * \param[in] val_DirectIter - Direct iteration to load.
-   */
-  void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
-                            unsigned short val_iInst, int val_DirectIter) override;
 };
diff --git a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp
index 93d1a9d2052..8647f709285 100644
--- a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp
@@ -37,10 +37,22 @@ class CFluidIteration;
  * \brief Class for driving an iteration of the discrete adjoint fluid system.
  * \author T. Economon
  */
-class CDiscAdjFluidIteration : public CIteration {
+class CDiscAdjFluidIteration final : public CIteration {
  private:
   const bool turbulent;                      /*!< \brief Stores the turbulent flag. */
 
+  /*!
+   * \brief load unsteady solution for unsteady problems
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] solver - Container vector with all the solutions.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] val_iZone - Index of the zone.
+   * \param[in] val_iInst - Index of the instance.
+   * \param[in] val_DirectIter - Direct iteration to load.
+   */
+  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
+                             unsigned short val_iInst, int val_DirectIter);
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -126,25 +138,6 @@ class CDiscAdjFluidIteration : public CIteration {
                CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
                unsigned short val_iInst) override;
 
-  /*!
-   * \brief Postprocess the discrete adjoint fluid iteration.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   */
-  void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-                   CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-                   CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                   unsigned short val_iInst) override;
-
   /*!
    * \brief Registers all input variables of the fluid iteration.
    * \param[in] solver - Container vector with all the solutions.
@@ -188,7 +181,6 @@ class CDiscAdjFluidIteration : public CIteration {
    * \param[in] val_iInst - Index of the instance.
    * \param[in] kind_recording - The kind of recording (geometry or flow).
    */
-
   void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone,
                     unsigned short val_iInst, unsigned short kind_recording) override;
 
@@ -205,15 +197,4 @@ class CDiscAdjFluidIteration : public CIteration {
   void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config,
                        unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override;
 
-  /*!
-   * \brief load unsteady solution for unsteady problems
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   * \param[in] val_DirectIter - Direct iteration to load.
-   */
-  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
-                             unsigned short val_iInst, int val_DirectIter) override;
 };
diff --git a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp
index 8c69d1162f8..ce981317897 100644
--- a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp
@@ -35,7 +35,20 @@
  * \brief Class for driving an iteration of the discrete adjoint heat equation.
  * \author O. Burghardt
  */
-class CDiscAdjHeatIteration : public CIteration {
+class CDiscAdjHeatIteration final : public CIteration {
+
+  /*!
+   * \brief load unsteady solution for unsteady problems
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] solver - Container vector with all the solutions.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] val_iZone - Index of the zone.
+   * \param[in] val_iInst - Index of the instance layer.
+   * \param[in] val_DirectIter - Direct iteration to load.
+   */
+  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
+                             unsigned short val_iInst, int val_DirectIter);
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -108,31 +121,6 @@ class CDiscAdjHeatIteration : public CIteration {
                CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
                unsigned short val_iInst) override;
 
-  /*!
-   * \brief Outputs desired files and quantities for the discrete adjoint fluid system.
-   */
-  void Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned long InnerIter,
-              bool StopCalc, unsigned short val_iZone, unsigned short val_iInst);
-
-  /*!
-   * \brief Perform a single iteration of the adjoint fluid system.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance layer.
-   */
-  void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-                   CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-                   CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                   unsigned short val_iInst) override;
-
   /*!
    * \brief Registers all input variables of the fluid iteration.
    * \param[in] solver - Container vector with all the solutions.
@@ -180,15 +168,4 @@ class CDiscAdjHeatIteration : public CIteration {
   void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config,
                        unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override;
 
-  /*!
-   * \brief load unsteady solution for unsteady problems
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance layer.
-   * \param[in] val_DirectIter - Direct iteration to load.
-   */
-  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
-                             unsigned short val_iInst, int val_DirectIter) override;
 };
diff --git a/SU2_CFD/include/iteration/CIteration.hpp b/SU2_CFD/include/iteration/CIteration.hpp
index 961fdb9ed6a..05947c02402 100644
--- a/SU2_CFD/include/iteration/CIteration.hpp
+++ b/SU2_CFD/include/iteration/CIteration.hpp
@@ -280,12 +280,6 @@ class CIteration {
   virtual void RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output,
                               unsigned short iZone, unsigned short iInst) {}
 
-  virtual void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                     unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {}
-
-  virtual void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                    unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {}
-
   virtual void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone,
                             unsigned short val_iInst, unsigned short kind_recording) {}
 };
diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
index d7f94a6d316..cac68c1ab93 100644
--- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
@@ -257,4 +257,12 @@ class CDiscAdjSolver final : public CSolver {
                    int val_iter,
                    bool val_update_geo) override;
 
+  /*!
+   * \brief Depends on the direct solver.
+   */
+  inline bool GetHasHybridParallel() const override {
+    if (direct_solver) return direct_solver->GetHasHybridParallel();
+    return false;
+  }
+
 };
diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 5eb54162866..5e09591f572 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -44,14 +44,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
   bool heat = config[val_iZone]->GetWeakly_Coupled_Heat();
   bool grid_IsMoving = config[val_iZone]->GetGrid_Movement();
 
+  auto solvers0 = solver[val_iZone][val_iInst][MESH_0];
+
   //  /*--- Read the target pressure for inverse design. ---------------------------------------------*/
   //  if (config[val_iZone]->GetInvDesign_Cp() == YES)
-  //    output->SetCp_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL],
+  //    output->SetCp_InverseDesign(solvers0[FLOW_SOL],
   //    geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter);
 
   //  /*--- Read the target heat flux ----------------------------------------------------------------*/
   //  if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES)
-  //    output->SetHeatFlux_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL],
+  //    output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL],
   //    geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter);
 
   /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/
@@ -73,15 +75,17 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
         /*--- Push solution back to correct array ---*/
 
         for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n();
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1();
+          auto solvers = solver[val_iZone][val_iInst][iMesh];
+
+          solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
+          solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1();
           if (turbulent) {
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n();
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1();
+            solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1();
           }
           if (heat) {
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n();
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1();
+            solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1();
           }
           if (grid_IsMoving) {
             geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n();
@@ -96,12 +100,14 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
         /*--- Push solution back to correct array ---*/
 
         for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n();
+          auto solvers = solver[val_iZone][val_iInst][iMesh];
+
+          solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
           if (turbulent) {
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n();
           }
           if (heat) {
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n();
           }
           if (grid_IsMoving) {
             geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n();
@@ -114,7 +120,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter);
 
       if (config[val_iZone]->GetDeform_Mesh()) {
-        solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart(
+        solvers0[MESH_SOL]->LoadRestart(
             geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true);
       }
 
@@ -127,7 +133,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       ---*/
 
       if (config[val_iZone]->GetDeform_Mesh()) {
-        solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart(
+        solvers0[MESH_SOL]->LoadRestart(
             geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true);
       }
 
@@ -141,12 +147,14 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       /*--- Temporarily store the loaded solution in the Solution_Old array ---*/
 
       for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-        solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Set_OldSolution();
+        auto solvers = solver[val_iZone][val_iInst][iMesh];
+
+        solvers[FLOW_SOL]->Set_OldSolution();
         if (turbulent) {
-          solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Set_OldSolution();
+          solvers[TURB_SOL]->Set_OldSolution();
         }
         if (heat) {
-          solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Set_OldSolution();
+          solvers[HEAT_SOL]->Set_OldSolution();
         }
         if (grid_IsMoving) {
           geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_Old();
@@ -156,42 +164,46 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       /*--- Set Solution at timestep n to solution at n-1 ---*/
 
       for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
+        auto solvers = solver[val_iZone][val_iInst][iMesh];
+
         for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->SetSolution(
-              iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint));
+          solvers[FLOW_SOL]->GetNodes()->SetSolution(
+              iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint));
 
           if (grid_IsMoving) {
             geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord(
                 iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n(iPoint));
           }
           if (turbulent) {
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->SetSolution(
-                iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint));
+            solvers[TURB_SOL]->GetNodes()->SetSolution(
+                iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint));
           }
           if (heat) {
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->SetSolution(
-                iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint));
+            solvers[HEAT_SOL]->GetNodes()->SetSolution(
+                iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint));
           }
         }
       }
       if (dual_time_1st) {
         /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/
         for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[val_iZone][val_iInst][iMesh];
+
           for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-            solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(
-                iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
+            solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
+                iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
               geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(
                   iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
-              solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
             if (heat) {
-              solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
           }
         }
@@ -199,41 +211,45 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       if (dual_time_2nd) {
         /*--- Set Solution at timestep n-1 to solution at n-2 ---*/
         for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[val_iZone][val_iInst][iMesh];
+
           for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-            solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(
-                iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
+            solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
+                iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
 
             if (grid_IsMoving) {
               geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(
                   iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n1(iPoint));
             }
             if (turbulent) {
-              solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
+              solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
             }
             if (heat) {
-              solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
+              solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
             }
           }
         }
         /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/
         for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[val_iZone][val_iInst][iMesh];
+
           for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-            solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1(
-                iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
+            solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(
+                iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
               geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1(
                   iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
-              solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1(
+                  iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
             if (heat) {
-              solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1(
+                  iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
           }
         }
@@ -252,48 +268,42 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
   if (TimeIter == 0 || dual_time) {
     for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
+      auto solvers = solver[val_iZone][val_iInst][iMesh];
       for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution(iPoint));
+        solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
     if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) {
       for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->GetNodes()->GetSolution(iPoint));
+        solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
     if (heat) {
       for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->GetNodes()->GetSolution(iPoint));
+        solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
     if (config[val_iZone]->AddRadiation()) {
       for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][MESH_0][RAD_SOL]->GetNodes()->GetSolution(iPoint));
+        solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
   }
 
-  solver[val_iZone][val_iInst][MESH_0][ADJFLOW_SOL]->Preprocessing(
-      geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-      RUNTIME_ADJFLOW_SYS, false);
+  solvers0[ADJFLOW_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+                                       MESH_0, 0, RUNTIME_ADJFLOW_SYS, false);
+
   if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) {
-    solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->Preprocessing(
-        geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-        RUNTIME_ADJTURB_SYS, false);
+    solvers0[ADJTURB_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+                                         MESH_0, 0, RUNTIME_ADJTURB_SYS, false);
   }
   if (heat) {
-    solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->Preprocessing(
-        geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-        RUNTIME_ADJHEAT_SYS, false);
+    solvers0[ADJHEAT_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+                                         MESH_0, 0, RUNTIME_ADJHEAT_SYS, false);
   }
   if (config[val_iZone]->AddRadiation()) {
-    solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->Preprocessing(
-        geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-        RUNTIME_ADJRAD_SYS, false);
+    solvers0[ADJRAD_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+                                        MESH_0, 0, RUNTIME_ADJRAD_SYS, false);
   }
 }
 
@@ -557,8 +567,3 @@ bool CDiscAdjFluidIteration::Monitor(COutput* output, CIntegration**** integrati
 
   return output->GetConvergence();
 }
-void CDiscAdjFluidIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                         CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                         CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                         CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                                         unsigned short val_iInst) {}
diff --git a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
index 4258a631a52..8c5ff48afa8 100644
--- a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
@@ -258,13 +258,3 @@ bool CDiscAdjHeatIteration::Monitor(COutput* output, CIntegration**** integratio
 
   return output->GetConvergence();
 }
-
-void CDiscAdjHeatIteration::Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                   unsigned long InnerIter, bool StopCalc, unsigned short val_iZone,
-                                   unsigned short val_iInst) {}
-
-void CDiscAdjHeatIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                        CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                        CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-}

From 63003eea499ce3f480b699dad4d7d823f5df3ff7 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 12:24:54 +0000
Subject: [PATCH 34/57] val_ never made anything better, parallel dependencies,
 fix adjoint residual calc

---
 .../src/iteration/CDiscAdjFluidIteration.cpp  | 225 +++++++++---------
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp        |  20 +-
 2 files changed, 124 insertions(+), 121 deletions(-)

diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 5e09591f572..192bc5c7a4d 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -31,35 +31,35 @@
 void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                         CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                         CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
+                                        CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   StartTime = SU2_MPI::Wtime();
 
   unsigned long iPoint;
-  unsigned short TimeIter = config[val_iZone]->GetTimeIter();
-  bool dual_time_1st = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST);
-  bool dual_time_2nd = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND);
+  unsigned short TimeIter = config[iZone]->GetTimeIter();
+  bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST);
+  bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND);
   bool dual_time = (dual_time_1st || dual_time_2nd);
   unsigned short iMesh;
   int Direct_Iter;
-  bool heat = config[val_iZone]->GetWeakly_Coupled_Heat();
-  bool grid_IsMoving = config[val_iZone]->GetGrid_Movement();
+  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+  bool grid_IsMoving = config[iZone]->GetGrid_Movement();
 
-  auto solvers0 = solver[val_iZone][val_iInst][MESH_0];
+  auto solvers0 = solver[iZone][iInst][MESH_0];
 
   //  /*--- Read the target pressure for inverse design. ---------------------------------------------*/
-  //  if (config[val_iZone]->GetInvDesign_Cp() == YES)
+  //  if (config[iZone]->GetInvDesign_Cp() == YES)
   //    output->SetCp_InverseDesign(solvers0[FLOW_SOL],
-  //    geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter);
+  //    geometry[iZone][iInst][MESH_0], config[iZone], ExtIter);
 
   //  /*--- Read the target heat flux ----------------------------------------------------------------*/
   //  if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES)
   //    output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL],
-  //    geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter);
+  //    geometry[iZone][iInst][MESH_0], config[iZone], ExtIter);
 
   /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/
 
-  if (config[val_iZone]->GetTime_Marching()) {
-    Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2;
+  if (config[iZone]->GetTime_Marching()) {
+    Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2;
 
     /*--- For dual-time stepping we want to load the already converged solution at timestep n ---*/
 
@@ -70,12 +70,12 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
     if (TimeIter == 0) {
       if (dual_time_2nd) {
         /*--- Load solution at timestep n-2 ---*/
-        LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 2);
+        LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 2);
 
         /*--- Push solution back to correct array ---*/
 
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          auto solvers = solver[val_iZone][val_iInst][iMesh];
+        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
 
           solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
           solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1();
@@ -88,19 +88,19 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
             solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1();
           }
           if (grid_IsMoving) {
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n();
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1();
+            geometry[iZone][iInst][iMesh]->nodes->SetCoord_n();
+            geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1();
           }
         }
       }
       if (dual_time) {
         /*--- Load solution at timestep n-1 ---*/
-        LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 1);
+        LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 1);
 
         /*--- Push solution back to correct array ---*/
 
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          auto solvers = solver[val_iZone][val_iInst][iMesh];
+        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
 
           solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
           if (turbulent) {
@@ -110,18 +110,18 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
             solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n();
           }
           if (grid_IsMoving) {
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n();
+            geometry[iZone][iInst][iMesh]->nodes->SetCoord_n();
           }
         }
       }
 
       /*--- Load solution timestep n ---*/
 
-      LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter);
+      LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter);
 
-      if (config[val_iZone]->GetDeform_Mesh()) {
+      if (config[iZone]->GetDeform_Mesh()) {
         solvers0[MESH_SOL]->LoadRestart(
-            geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true);
+            geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true);
       }
 
     } else if ((TimeIter > 0) && dual_time) {
@@ -132,22 +132,22 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       Afterwards the GridVelocity is computed based on the Coordinates.
       ---*/
 
-      if (config[val_iZone]->GetDeform_Mesh()) {
+      if (config[iZone]->GetDeform_Mesh()) {
         solvers0[MESH_SOL]->LoadRestart(
-            geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true);
+            geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true);
       }
 
       /*--- Load solution timestep n-1 | n-2 for DualTimestepping 1st | 2nd order ---*/
       if (dual_time_1st) {
-        LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 1);
+        LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 1);
       } else {
-        LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 2);
+        LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 2);
       }
 
       /*--- Temporarily store the loaded solution in the Solution_Old array ---*/
 
-      for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-        auto solvers = solver[val_iZone][val_iInst][iMesh];
+      for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        auto solvers = solver[iZone][iInst][iMesh];
 
         solvers[FLOW_SOL]->Set_OldSolution();
         if (turbulent) {
@@ -157,22 +157,22 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
           solvers[HEAT_SOL]->Set_OldSolution();
         }
         if (grid_IsMoving) {
-          geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_Old();
+          geometry[iZone][iInst][iMesh]->nodes->SetCoord_Old();
         }
       }
 
       /*--- Set Solution at timestep n to solution at n-1 ---*/
 
-      for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-        auto solvers = solver[val_iZone][val_iInst][iMesh];
+      for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        auto solvers = solver[iZone][iInst][iMesh];
 
-        for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
+        for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
           solvers[FLOW_SOL]->GetNodes()->SetSolution(
               iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint));
 
           if (grid_IsMoving) {
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord(
-                iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n(iPoint));
+            geometry[iZone][iInst][iMesh]->nodes->SetCoord(
+                iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n(iPoint));
           }
           if (turbulent) {
             solvers[TURB_SOL]->GetNodes()->SetSolution(
@@ -186,16 +186,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       }
       if (dual_time_1st) {
         /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          auto solvers = solver[val_iZone][val_iInst][iMesh];
+        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
 
-          for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
+          for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
             solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
                 iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
-              geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(
-                  iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint));
+              geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(
+                  iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
               solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
@@ -210,16 +210,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       }
       if (dual_time_2nd) {
         /*--- Set Solution at timestep n-1 to solution at n-2 ---*/
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          auto solvers = solver[val_iZone][val_iInst][iMesh];
+        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
 
-          for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
+          for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
             solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
                 iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
 
             if (grid_IsMoving) {
-              geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(
-                  iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n1(iPoint));
+              geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(
+                  iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n1(iPoint));
             }
             if (turbulent) {
               solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
@@ -232,16 +232,16 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
           }
         }
         /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          auto solvers = solver[val_iZone][val_iInst][iMesh];
+        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
 
-          for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
+          for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
             solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(
                 iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
-              geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1(
-                  iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint));
+              geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1(
+                  iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
               solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1(
@@ -259,91 +259,89 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
     /*--- Compute & set Grid Velocity via finite differences of the Coordinates. ---*/
     if (grid_IsMoving)
-      for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++)
-        geometry[val_iZone][val_iInst][iMesh]->SetGridVelocity(config[val_iZone], TimeIter);
+      for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++)
+        geometry[iZone][iInst][iMesh]->SetGridVelocity(config[iZone], TimeIter);
 
   }  // if unsteady
 
   /*--- Store flow solution also in the adjoint solver in order to be able to reset it later ---*/
 
   if (TimeIter == 0 || dual_time) {
-    for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-      auto solvers = solver[val_iZone][val_iInst][iMesh];
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
+    for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      auto solvers = solver[iZone][iInst][iMesh];
+      for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
         solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
-    if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
+    if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) {
+      for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) {
         solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
     if (heat) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
+      for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) {
         solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
-    if (config[val_iZone]->AddRadiation()) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
+    if (config[iZone]->AddRadiation()) {
+      for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) {
         solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint));
       }
     }
   }
 
-  solvers0[ADJFLOW_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+  solvers0[ADJFLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
                                        MESH_0, 0, RUNTIME_ADJFLOW_SYS, false);
 
-  if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) {
-    solvers0[ADJTURB_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+  if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) {
+    solvers0[ADJTURB_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
                                          MESH_0, 0, RUNTIME_ADJTURB_SYS, false);
   }
   if (heat) {
-    solvers0[ADJHEAT_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+    solvers0[ADJHEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
                                          MESH_0, 0, RUNTIME_ADJHEAT_SYS, false);
   }
-  if (config[val_iZone]->AddRadiation()) {
-    solvers0[ADJRAD_SOL]->Preprocessing(geometry[val_iZone][val_iInst][MESH_0], solvers0, config[val_iZone],
+  if (config[iZone]->AddRadiation()) {
+    solvers0[ADJRAD_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
                                         MESH_0, 0, RUNTIME_ADJRAD_SYS, false);
   }
 }
 
 void CDiscAdjFluidIteration::LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                                   unsigned short val_iZone, unsigned short val_iInst,
-                                                   int val_DirectIter) {
+                                                   unsigned short iZone, unsigned short iInst, int DirectIter) {
   unsigned short iMesh;
-  bool heat = config[val_iZone]->GetWeakly_Coupled_Heat();
+  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  auto solvers = solver[iZone][iInst];
+
+  if (DirectIter >= 0) {
+    if (rank == MASTER_NODE && iZone == ZONE_0)
+      cout << " Loading flow solution from direct iteration " << DirectIter << "." << endl;
+
+    solvers[MESH_0][FLOW_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, true);
 
-  if (val_DirectIter >= 0) {
-    if (rank == MASTER_NODE && val_iZone == ZONE_0)
-      cout << " Loading flow solution from direct iteration " << val_DirectIter << "." << endl;
-    solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->LoadRestart(
-        geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, true);
     if (turbulent) {
-      solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->LoadRestart(
-          geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false);
+      solvers[MESH_0][TURB_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false);
     }
     if (heat) {
-      solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->LoadRestart(
-          geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false);
+      solvers[MESH_0][HEAT_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false);
     }
   } else {
     /*--- If there is no solution file we set the freestream condition ---*/
-    if (rank == MASTER_NODE && val_iZone == ZONE_0)
-      cout << " Setting freestream conditions at direct iteration " << val_DirectIter << "." << endl;
-    for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-      solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->SetFreeStream_Solution(config[val_iZone]);
-      solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Preprocessing(
-          geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh,
-          val_DirectIter, RUNTIME_FLOW_SYS, false);
+    if (rank == MASTER_NODE && iZone == ZONE_0)
+      cout << " Setting freestream conditions at direct iteration " << DirectIter << "." << endl;
+
+    for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      solvers[iMesh][FLOW_SOL]->SetFreeStream_Solution(config[iZone]);
+      solvers[iMesh][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh,
+                                              DirectIter, RUNTIME_FLOW_SYS, false);
       if (turbulent) {
-        solver[val_iZone][val_iInst][iMesh][TURB_SOL]->SetFreeStream_Solution(config[val_iZone]);
-        solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Postprocessing(
-            geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh);
+        solvers[iMesh][TURB_SOL]->SetFreeStream_Solution(config[iZone]);
+        solvers[iMesh][TURB_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh);
       }
       if (heat) {
-        solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->SetFreeStream_Solution(config[val_iZone]);
-        solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Postprocessing(
-            geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh);
+        solvers[iMesh][HEAT_SOL]->SetFreeStream_Solution(config[iZone]);
+        solvers[iMesh][HEAT_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh);
       }
     }
   }
@@ -406,8 +404,9 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry***
 
 void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                            unsigned short iZone, unsigned short iInst, unsigned short kind_recording) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
+  const bool heat = config[iZone]->GetWeakly_Coupled_Heat();
 
   if (kind_recording == SOLUTION_VARIABLES || kind_recording == SOLUTION_AND_MESH) {
     /*--- Register flow and turbulent variables as input ---*/
@@ -449,7 +448,9 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
 
 void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                           unsigned short iZone, unsigned short iInst, unsigned short kind_recording) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
+  SU2_OMP_PARALLEL {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/
 
@@ -465,13 +466,18 @@ void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geo
   if (config[iZone]->AddRadiation()) {
     solver[iZone][INST_0][MESH_0][ADJRAD_SOL]->SetRecording(geometry[iZone][INST_0][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics,
                                              CConfig** config, unsigned short iZone, unsigned short iInst,
                                              unsigned short kind_recording) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][FLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   if ((kind_recording == MESH_COORDS) || (kind_recording == NONE) || (kind_recording == SOLUTION_AND_MESH)) {
     /*--- Update geometry to get the influence on other geometry variables (normals, volume etc) ---*/
@@ -494,7 +500,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
     solver[iZone][iInst][MESH_0][TURB_SOL]->CompleteComms(geometry[iZone][iInst][MESH_0], config[iZone], SOLUTION);
   }
 
-  if (heat) {
+  }
+  END_SU2_OMP_PARALLEL
+
+  if (config[iZone]->GetWeakly_Coupled_Heat()) {
     solver[iZone][iInst][MESH_0][HEAT_SOL]->Set_Heatflux_Areas(geometry[iZone][iInst][MESH_0], config[iZone]);
     solver[iZone][iInst][MESH_0][HEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0],
                                                           config[iZone], MESH_0, NO_RK_ITER, RUNTIME_HEAT_SYS, true);
@@ -538,15 +547,13 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g
 void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                     CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                     CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                    CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-  unsigned short iMesh;
-
+                                    CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   /*--- Dual time stepping strategy ---*/
 
-  if ((config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST) ||
-      (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND)) {
-    for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-      integration[val_iZone][val_iInst][ADJFLOW_SOL]->SetConvergence(false);
+  if ((config[iZone]->GetTime_Marching() == DT_STEPPING_1ST) ||
+      (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND)) {
+    for (unsigned short iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      integration[iZone][iInst][ADJFLOW_SOL]->SetConvergence(false);
     }
   }
 }
@@ -554,16 +561,16 @@ void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integratio
 bool CDiscAdjFluidIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                      CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                      CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                     CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
+                                     CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   StopTime = SU2_MPI::Wtime();
 
   UsedTime = StopTime - StartTime;
 
   /*--- Write the convergence history for the fluid (only screen output) ---*/
 
-  output->SetHistory_Output(geometry[val_iZone][INST_0][MESH_0], solver[val_iZone][INST_0][MESH_0], config[val_iZone],
-                            config[val_iZone]->GetTimeIter(), config[val_iZone]->GetOuterIter(),
-                            config[val_iZone]->GetInnerIter());
+  output->SetHistory_Output(geometry[iZone][INST_0][MESH_0], solver[iZone][INST_0][MESH_0], config[iZone],
+                            config[iZone]->GetTimeIter(), config[iZone]->GetOuterIter(),
+                            config[iZone]->GetInnerIter());
 
   return output->GetConvergence();
 }
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index f96765a9216..b2a8fea1ea9 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -325,8 +325,6 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
-    const su2double isdomain = (iPoint < nPointDomain)? 1.0 : 0.0;
-
     /*--- Extract the adjoint solution ---*/
 
     if(config->GetMultizone_Problem()) {
@@ -342,16 +340,14 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
       su2double residual = Solution[iVar]-nodes->GetSolution_Old(iPoint,iVar);
       nodes->AddSolution(iPoint, iVar, relax*residual);
 
-      residual *= isdomain;
-      Residual_RMS[iVar] += pow(residual,2);
-      AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint));
-
-      /*--- Update residual information for current thread. ---*/
-      resRMS[iVar] += residual*residual;
-      if (fabs(residual) > resMax[iVar]) {
-        resMax[iVar] = fabs(residual);
-        idxMax[iVar] = iPoint;
-        coordMax[iVar] = geometry->nodes->GetCoord(iPoint);
+      if (iPoint < nPointDomain) {
+        /*--- Update residual information for current thread. ---*/
+        resRMS[iVar] += residual*residual;
+        if (fabs(residual) > resMax[iVar]) {
+          resMax[iVar] = fabs(residual);
+          idxMax[iVar] = iPoint;
+          coordMax[iVar] = geometry->nodes->GetCoord(iPoint);
+        }
       }
     }
   }

From b329cb62fb9cb1540aa0307db16e0c76ee0e8fa5 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 15:39:26 +0000
Subject: [PATCH 35/57] more parallel, fix SensGeo output

---
 .../src/iteration/CDiscAdjFluidIteration.cpp  | 45 ++++++++++++++-----
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp        |  8 +++-
 SU2_CFD/src/solvers/CSolver.cpp               |  4 ++
 3 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 192bc5c7a4d..5cb17308905 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -351,8 +351,11 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati
                                      CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                      CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement,
                                      CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
+  const bool heat = config[iZone]->GetWeakly_Coupled_Heat();
 
   /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/
 
@@ -372,12 +375,17 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati
 
     solver[iZone][iInst][MESH_0][ADJRAD_SOL]->ExtractAdjoint_Variables(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                                unsigned short iZone, unsigned short iInst) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   /*--- Initialize the adjoints the conservative variables ---*/
 
@@ -389,7 +397,7 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry***
     solver[iZone][iInst][MESH_0][ADJTURB_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
 
-  if (heat) {
+  if (config[iZone]->GetWeakly_Coupled_Heat()) {
     solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
 
@@ -400,13 +408,17 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry***
   if (config[iZone]->GetFluidProblem()) {
     solver[iZone][iInst][MESH_0][FLOW_SOL]->SetVertexTractionsAdjoint(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                            unsigned short iZone, unsigned short iInst, unsigned short kind_recording) {
 
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
   const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  const bool heat = config[iZone]->GetWeakly_Coupled_Heat();
 
   if (kind_recording == SOLUTION_VARIABLES || kind_recording == SOLUTION_AND_MESH) {
     /*--- Register flow and turbulent variables as input ---*/
@@ -420,7 +432,7 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
     if (turbulent && !frozen_visc) {
       solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]);
     }
-    if (heat) {
+    if (config[iZone]->GetWeakly_Coupled_Heat()) {
       solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]);
     }
     if (config[iZone]->AddRadiation()) {
@@ -430,6 +442,9 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
     }
   }
 
+  }
+  END_SU2_OMP_PARALLEL
+
   if (kind_recording == MESH_COORDS) {
     /*--- Register node coordinates as input ---*/
 
@@ -448,7 +463,8 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
 
 void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                           unsigned short iZone, unsigned short iInst, unsigned short kind_recording) {
-  SU2_OMP_PARALLEL {
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
 
   const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
@@ -475,7 +491,7 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
                                              CConfig** config, unsigned short iZone, unsigned short iInst,
                                              unsigned short kind_recording) {
 
-  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][FLOW_SOL]->GetHasHybridParallel())) {
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
 
   const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
@@ -522,8 +538,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
 
 void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                             COutput* output, unsigned short iZone, unsigned short iInst) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   /*--- Register conservative variables as output of the iteration ---*/
 
@@ -533,7 +551,7 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g
   if (turbulent && !frozen_visc) {
     solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
-  if (heat) {
+  if (config[iZone]->GetWeakly_Coupled_Heat()) {
     solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
   if (config[iZone]->AddRadiation()) {
@@ -542,6 +560,9 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g
   if (config[iZone]->GetFluidProblem()) {
     solver[iZone][iInst][MESH_0][FLOW_SOL]->RegisterVertexTractions(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index b2a8fea1ea9..44343c87e9c 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -580,18 +580,22 @@ void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config
 
     for (size_t iMarker_Mon = 0; iMarker_Mon < Sens_Geo.size(); iMarker_Mon++) {
       if (Marker_Tag == config->GetMarker_Monitoring_TagBound(iMarker_Mon)) {
-        atomicAdd(Sens_Geo[iMarker_Mon], Sens);
+        atomicAdd(Sens, Sens_Geo[iMarker_Mon]);
         break;
       }
     }
   }
 
+  SU2_OMP_BARRIER
   SU2_OMP_MASTER {
     auto local = Sens_Geo;
     SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
 
     Total_Sens_Geo = 0.0;
-    for (auto x : Sens_Geo) Total_Sens_Geo += x;
+    for (auto& x : Sens_Geo) {
+      x = sqrt(x);
+      Total_Sens_Geo += x;
+    }
   }
   END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index 1b717c09c9c..5e9cd83515e 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -3971,6 +3971,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config
     if (!config->GetSolid_Wall(iMarker)) continue;
 
     /*--- Loop over the vertices ---*/
+    SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
     for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
 
       /*--- Recover the point index ---*/
@@ -3984,6 +3985,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config
         AD::RegisterOutput(VertexTraction[iMarker][iVertex][iDim]);
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -4000,6 +4002,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf
     if (!config->GetSolid_Wall(iMarker)) continue;
 
     /*--- Loop over the vertices ---*/
+    SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
     for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
 
       /*--- Recover the point index ---*/
@@ -4014,6 +4017,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf
                                 SU2_TYPE::GetValue(VertexTractionAdjoint[iMarker][iVertex][iDim]));
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }

From 02c9c8e20ca203cbed10aab203e5770f3ca0e715 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 19 Mar 2021 16:31:55 +0000
Subject: [PATCH 36/57] mesh solver, plus some cleanup

---
 Common/include/geometry/CGeometry.hpp         |  8 +--
 Common/src/geometry/CGeometry.cpp             | 36 +++----------
 .../include/solvers/CDiscAdjMeshSolver.hpp    | 37 ++++----------
 .../src/iteration/CDiscAdjFluidIteration.cpp  |  6 +--
 .../src/iteration/CDiscAdjHeatIteration.cpp   |  3 +-
 SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp    | 51 ++++++++++---------
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp        |  6 ++-
 SU2_CFD/src/variables/CMeshVariable.cpp       |  4 ++
 8 files changed, 58 insertions(+), 93 deletions(-)

diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp
index 4941e62d219..fccbc5e1d11 100644
--- a/Common/include/geometry/CGeometry.hpp
+++ b/Common/include/geometry/CGeometry.hpp
@@ -1252,13 +1252,7 @@ class CGeometry {
    * \brief Register the coordinates of the mesh nodes.
    * \param[in] config
    */
-  void RegisterCoordinates(CConfig *config) const;
-
-  /*!
-   * \brief Register the coordinates of the mesh nodes as output.
-   * \param[in] config
-   */
-  void RegisterOutput_Coordinates(CConfig *config) const;
+  void RegisterCoordinates(const CConfig *config) const;
 
   /*!
    * \brief Update the multi-grid structure and the wall-distance.
diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp
index 535a8e42c5c..85ca9c72f78 100644
--- a/Common/src/geometry/CGeometry.cpp
+++ b/Common/src/geometry/CGeometry.cpp
@@ -2492,44 +2492,24 @@ void CGeometry::ComputeAirfoil_Section(su2double *Plane_P0, su2double *Plane_Nor
 
 }
 
-void CGeometry::RegisterCoordinates(CConfig *config) const {
-  unsigned short iDim;
-  unsigned long iPoint;
-  bool input = true;
-  bool push_index = config->GetMultizone_Problem()? false : true;
+void CGeometry::RegisterCoordinates(const CConfig *config) const {
+  const bool input = true;
+  const bool push_index = config->GetMultizone_Problem()? false : true;
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++) {
-    for (iDim = 0; iDim < nDim; iDim++) {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
+    for (auto iDim = 0u; iDim < nDim; iDim++) {
       AD::RegisterInput(nodes->GetCoord(iPoint)[iDim], push_index);
     }
     if(!push_index) {
       nodes->SetIndex(iPoint, input);
     }
   }
-}
-
-void CGeometry::RegisterOutput_Coordinates(CConfig *config) const{
-  unsigned short iDim;
-  unsigned long iPoint;
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    if(config->GetMultizone_Problem()) {
-      for (iDim = 0; iDim < nDim; iDim++) {
-        AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]);
-      }
-    }
-    else {
-      for (iDim = 0; iDim < nDim; iDim++) {
-        AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]);
-      }
-    }
-  }
+  END_SU2_OMP_FOR
 }
 
 void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config) {
 
-  unsigned short iMesh;
-
   geometry_container[MESH_0]->InitiateComms(geometry_container[MESH_0], config, COORDINATES);
   geometry_container[MESH_0]->CompleteComms(geometry_container[MESH_0], config, COORDINATES);
   if (config->GetDynamic_Grid()){
@@ -2541,7 +2521,7 @@ void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config)
   geometry_container[MESH_0]->SetBoundControlVolume(config, UPDATE);
   geometry_container[MESH_0]->SetMaxLength(config);
 
-  for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) {
+  for (unsigned short iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) {
     /*--- Update the control volume structures ---*/
 
     geometry_container[iMesh]->SetControlVolume(config,geometry_container[iMesh-1], UPDATE);
diff --git a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp
index 4caa7e597e2..d7ba9d80b75 100644
--- a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp
@@ -39,6 +39,13 @@
  */
 class CDiscAdjMeshSolver final : public CSolver {
 private:
+  static constexpr size_t MAXNDIM = 3;  /*!< \brief Max number of space dimensions, used in some static arrays. */
+  static constexpr size_t MAXNVAR = 3;  /*!< \brief Max number of variables, for static arrays. */
+
+  static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */
+
+  unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */
+
   CSolver *direct_solver = nullptr;
 
   CDiscAdjMeshBoundVariable* nodes = nullptr;   /*!< \brief Variables of the discrete adjoint mesh solver. */
@@ -53,15 +60,7 @@ class CDiscAdjMeshSolver final : public CSolver {
   /*!
    * \brief Constructor of the class.
    */
-  CDiscAdjMeshSolver(void);
-
-  /*!
-   * \overload
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] iMesh - Index of the mesh in multigrid computations.
-   */
-  CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config);
+  CDiscAdjMeshSolver() = default;
 
   /*!
    * \overload
@@ -75,7 +74,7 @@ class CDiscAdjMeshSolver final : public CSolver {
   /*!
    * \brief Destructor of the class.
    */
-  ~CDiscAdjMeshSolver(void) override;
+  ~CDiscAdjMeshSolver() override;
 
   /*!
    * \brief Performs the preprocessing of the AD-based mesh adjoint solver.
@@ -124,24 +123,6 @@ class CDiscAdjMeshSolver final : public CSolver {
    */
   void ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Update the dual-time derivatives.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver_container - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] iMesh - Index of the mesh in multigrid computations.
-   * \param[in] iRKStep - Current step of the Runge-Kutta iteration.
-   * \param[in] RunTime_EqSystem - System of equations which is going to be solved.
-   * \param[in] Output - boolean to determine whether to print output.
-   */
-  void Preprocessing(CGeometry *geometry,
-                    CSolver **solver_container,
-                    CConfig *config,
-                    unsigned short iMesh,
-                    unsigned short iRKStep,
-                    unsigned short RunTime_EqSystem,
-                    bool Output) override;
-
   /*!
    * \brief Load a solution from a restart file.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 5cb17308905..4bf2a9d9235 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -442,9 +442,6 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
     }
   }
 
-  }
-  END_SU2_OMP_PARALLEL
-
   if (kind_recording == MESH_COORDS) {
     /*--- Register node coordinates as input ---*/
 
@@ -459,6 +456,9 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
     /*--- Boundary displacements ---*/
     solver[iZone][iInst][MESH_0][ADJMESH_SOL]->RegisterVariables(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config,
diff --git a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
index 8c5ff48afa8..5d278eb97f9 100644
--- a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
@@ -227,9 +227,8 @@ void CDiscAdjHeatIteration::SetDependencies(CSolver***** solver, CGeometry**** g
 
 void CDiscAdjHeatIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                            COutput* output, unsigned short iZone, unsigned short iInst) {
-  solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
 
-  geometry[iZone][iInst][MESH_0]->RegisterOutput_Coordinates(config[iZone]);
+  solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
 }
 
 void CDiscAdjHeatIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
index 249c59b4722..9cb4950870b 100644
--- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
@@ -28,15 +28,8 @@
 #include "../../include/solvers/CDiscAdjMeshSolver.hpp"
 #include "../../include/variables/CDiscAdjMeshBoundVariable.hpp"
 
-
-CDiscAdjMeshSolver::CDiscAdjMeshSolver() : CSolver () {}
-
-CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config) : CSolver() {}
-
 CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver) : CSolver() {
 
-  unsigned short iVar;
-
   nVar = geometry->GetnDim();
   nDim = geometry->GetnDim();
 
@@ -46,6 +39,8 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
   nPoint       = geometry->GetnPoint();
   nPointDomain = geometry->GetnPointDomain();
 
+  omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE);
+
   /*--- Define some auxiliary vectors related to the residual ---*/
 
   Residual_RMS.resize(nVar,1.0);
@@ -63,12 +58,8 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
     Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0);
   }
 
-  /*--- Define some auxiliary vectors related to the solution ---*/
-
-  Solution = new su2double[nVar];
-  for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16;
-
   /*--- Initialize the node structure ---*/
+  su2double Solution[MAXNVAR] = {1e-16};
   nodes = new CDiscAdjMeshBoundVariable(nPoint,nDim,config);
   SetBaseClassPointerToNodes();
 
@@ -90,22 +81,17 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
 
 }
 
-CDiscAdjMeshSolver::~CDiscAdjMeshSolver(void){
-  delete nodes;
-}
-
-
-void CDiscAdjMeshSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container,
-                                       unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){
-}
+CDiscAdjMeshSolver::~CDiscAdjMeshSolver() { delete nodes; }
 
 void CDiscAdjMeshSolver::SetRecording(CGeometry* geometry, CConfig *config){
 
   /*--- Reset the solution to the initial (converged) solution ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
     direct_solver->GetNodes()->SetBound_Disp(iPoint,nodes->GetBoundDisp_Direct(iPoint));
   }
+  END_SU2_OMP_FOR
 
   /*--- Set indices to zero ---*/
 
@@ -123,20 +109,25 @@ void CDiscAdjMeshSolver::RegisterSolution(CGeometry *geometry, CConfig *config){
 
 void CDiscAdjMeshSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset){
 
-  /*--- Register boundary displacements as input ---*/
-  bool input = true;
-  direct_solver->GetNodes()->Register_BoundDisp(input);
-
+  SU2_OMP_MASTER {
+    /*--- Register boundary displacements as input ---*/
+    bool input = true;
+    direct_solver->GetNodes()->Register_BoundDisp(input);
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){
 
   /*--- Extract the sensitivities of the mesh coordinates ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
 
     /*--- Extract the adjoint solution from the original mesh coordinates ---*/
 
+    su2double Solution[MAXNVAR] = {0.0};
     direct_solver->GetNodes()->GetAdjoint_MeshCoord(iPoint,Solution);
 
     /*--- Store the adjoint solution (the container is reused) ---*/
@@ -144,6 +135,7 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *c
     nodes->SetSolution(iPoint,Solution);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -151,10 +143,12 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *
 
   /*--- Extract the sensitivities of the boundary displacements ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
 
     /*--- Extract the adjoint solution of the boundary displacements ---*/
 
+    su2double Solution[MAXNVAR] = {0.0};
     direct_solver->GetNodes()->GetAdjoint_BoundDisp(iPoint,Solution);
 
     /*--- Store the sensitivities of the boundary displacements ---*/
@@ -162,11 +156,14 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *
     nodes->SetBoundDisp_Sens(iPoint,Solution);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
 void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver *solver) {
 
+  SU2_OMP_PARALLEL {
+
   const bool time_stepping = (config->GetTime_Marching() != STEADY);
   const auto eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength();
 
@@ -177,6 +174,8 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS
   ExtractAdjoint_Variables(geometry, config);
 
   /*--- Store the sensitivities in the flow adjoint container ---*/
+
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
 
     /*--- If sharp edge, set the sensitivity to 0 on that region ---*/
@@ -198,8 +197,12 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS
       }
     }
   }
+  END_SU2_OMP_FOR
+
   solver->SetSurface_Sensitivity(geometry, config);
 
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjMeshSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index 44343c87e9c..30249c14a5d 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -497,8 +497,10 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) {
 
 void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) {
 
+  SU2_OMP_PARALLEL {
+
   const bool time_stepping = (config->GetTime_Marching() != STEADY);
-  const su2double eps = config->GetVenkat_LimiterCoeff()*config->GetAdjSharp_LimiterCoeff();
+  const su2double eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength();
 
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
@@ -537,6 +539,8 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve
 
   SetSurface_Sensitivity(geometry, config);
 
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) {
diff --git a/SU2_CFD/src/variables/CMeshVariable.cpp b/SU2_CFD/src/variables/CMeshVariable.cpp
index 0f35b0fc442..d4786c3ed75 100644
--- a/SU2_CFD/src/variables/CMeshVariable.cpp
+++ b/SU2_CFD/src/variables/CMeshVariable.cpp
@@ -50,13 +50,17 @@ CMeshVariable::CMeshVariable(unsigned long npoint, unsigned long ndim, CConfig *
 
 void CMeshVariable::Register_MeshCoord(bool input) {
   if (input) {
+    SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       for (unsigned long iDim = 0; iDim < nDim; iDim++)
         AD::RegisterInput(Mesh_Coord(iPoint,iDim));
+    END_SU2_OMP_FOR
   }
   else {
+    SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       for (unsigned long iDim = 0; iDim < nDim; iDim++)
         AD::RegisterOutput(Mesh_Coord(iPoint,iDim));
+    END_SU2_OMP_FOR
   }
 }

From ac5c581a28ffb30fe1d6e0bf64499b1f628abd78 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Sat, 20 Mar 2021 23:08:12 +0000
Subject: [PATCH 37/57] no include of cpp

---
 Common/lib/Makefile.am                        |  1 +
 Common/src/basic_types/ad_structure.cpp       |  2 --
 Common/src/meson.build                        |  3 ++-
 SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp | 19 ++-----------------
 SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp     | 10 ----------
 5 files changed, 5 insertions(+), 30 deletions(-)

diff --git a/Common/lib/Makefile.am b/Common/lib/Makefile.am
index 813299d0f77..1e7a8761c6f 100644
--- a/Common/lib/Makefile.am
+++ b/Common/lib/Makefile.am
@@ -63,6 +63,7 @@ lib_sources = \
   ../src/grid_movement/CVolumetricMovement.cpp \
   ../src/grid_movement/CSurfaceMovement.cpp \
   ../include/parallelization/mpi_structure.cpp \
+  ../include/parallelization/omp_structure.cpp \
   ../src/basic_types/ad_structure.cpp \
   ../src/fem/fem_gauss_jacobi_quadrature.cpp \
   ../src/geometry/CGeometry.cpp \
diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp
index efa0cacf401..18342e13a90 100644
--- a/Common/src/basic_types/ad_structure.cpp
+++ b/Common/src/basic_types/ad_structure.cpp
@@ -49,5 +49,3 @@ namespace AD {
 
 #endif
 }
-
-#include "../../include/parallelization/omp_structure.cpp"
diff --git a/Common/src/meson.build b/Common/src/meson.build
index 5dcbb57c66f..b3e0726e70c 100644
--- a/Common/src/meson.build
+++ b/Common/src/meson.build
@@ -3,7 +3,8 @@ common_src =files(['graph_coloring_structure.cpp',
            'CConfig.cpp',
            'basic_types/ad_structure.cpp',
            'wall_model.cpp',
-           '../include/parallelization/mpi_structure.cpp'])
+           '../include/parallelization/mpi_structure.cpp',
+           '../include/parallelization/omp_structure.cpp'])
 
 subdir('linear_algebra')
 subdir('toolboxes')
diff --git a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
index dec9c21e348..c85ea4e8653 100644
--- a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
@@ -102,15 +102,7 @@ class CDiscAdjFEASolver final : public CSolver {
   /*!
    * \brief Constructor of the class.
    */
-  CDiscAdjFEASolver(void);
-
-  /*!
-   * \overload
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] iMesh - Index of the mesh in multigrid computations.
-   */
-  CDiscAdjFEASolver(CGeometry *geometry, CConfig *config);
+  CDiscAdjFEASolver() = default;
 
   /*!
    * \overload
@@ -124,7 +116,7 @@ class CDiscAdjFEASolver final : public CSolver {
   /*!
    * \brief Destructor of the class.
    */
-  ~CDiscAdjFEASolver(void) override;
+  ~CDiscAdjFEASolver() override;
 
   /*!
    * \brief Performs the preprocessing of the adjoint AD-based solver.
@@ -159,13 +151,6 @@ class CDiscAdjFEASolver final : public CSolver {
    */
   void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Set the surface sensitivity.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   */
-  void SetSurface_Sensitivity(CGeometry *geometry, CConfig* config) override;
-
   /*!
    * \brief Extract and set the geometrical sensitivity.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
index 287e1c50eaa..bc72135c545 100644
--- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
@@ -25,14 +25,9 @@
  * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
  */
 
-
 #include "../../include/solvers/CDiscAdjFEASolver.hpp"
 #include "../../include/variables/CDiscAdjFEAVariable.hpp"
 
-CDiscAdjFEASolver::CDiscAdjFEASolver(void) : CSolver() { }
-
-CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config)  : CSolver() { }
-
 CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver,
                                      unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
 
@@ -776,11 +771,6 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo
       nodes->SetSensitivity(iPoint, iDim, Sensitivity);
     }
   }
-  SetSurface_Sensitivity(geometry, config);
-}
-
-void CDiscAdjFEASolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config){
-
 }
 
 void CDiscAdjFEASolver::ReadDV(CConfig *config) {

From 3527c2809a341ee27c7597e0915add33fb889365 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Mon, 22 Mar 2021 12:04:29 +0000
Subject: [PATCH 38/57] fix bug from nested parallel region

---
 SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 4bf2a9d9235..626d625d7aa 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -491,8 +491,6 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
                                              CConfig** config, unsigned short iZone, unsigned short iInst,
                                              unsigned short kind_recording) {
 
-  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
-
   const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   if ((kind_recording == MESH_COORDS) || (kind_recording == NONE) || (kind_recording == SOLUTION_AND_MESH)) {
@@ -503,6 +501,8 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
     CGeometry::ComputeWallDistance(config, geometry);
   }
 
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
   /*--- Compute coupling between flow and turbulent equations ---*/
   solver[iZone][iInst][MESH_0][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0],
                                                         config[iZone], MESH_0, NO_RK_ITER, RUNTIME_FLOW_SYS, true);

From 9efa995aa1e22cc93ea5833395ccd9b2a82134e7 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Tue, 23 Mar 2021 00:14:49 +0000
Subject: [PATCH 39/57] less boilerplate, more boilerplate, fix merge, try to
 fix failed regressions

---
 Common/include/linear_algebra/CSysSolve.hpp   |   4 -
 Common/src/linear_algebra/CSysMatrix.cpp      |  91 +-------------
 Common/src/linear_algebra/CSysSolve.cpp       |  13 --
 .../src/iteration/CDiscAdjFluidIteration.cpp  | 114 +++++++++---------
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp        |  10 +-
 5 files changed, 66 insertions(+), 166 deletions(-)

diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp
index 826e779beea..c69643cefe1 100644
--- a/Common/include/linear_algebra/CSysSolve.hpp
+++ b/Common/include/linear_algebra/CSysSolve.hpp
@@ -323,10 +323,6 @@ class CSysSolve {
                                  const PrecondType & precond, ScalarType tol, unsigned long m,
                                  ScalarType & residual, bool monitoring, const CConfig *config) const;
 
-  unsigned long RFGMRES_LinSolver(const VectorType & b, VectorType & x, const ProductType & mat_vec,
-                                  const PrecondType & precond, ScalarType tol, unsigned long m,
-                                  ScalarType & residual, bool monitoring, const CConfig *config);
-
   /*!
    * \brief Flexible Generalized Minimal Residual method with restarts (frequency comes from config).
    */
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index 398aea672ad..a9ee199f3e6 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -94,15 +94,11 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   if(npoint == 0) return;
 
   if(matrix != nullptr) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   if(nvar > MAXNVAR) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   /*--- Application of this matrix, FVM or FEM. ---*/
@@ -607,14 +603,10 @@ void CSysMatrix<ScalarType>::MatrixVectorProduct(const CSysVector<ScalarType> &
   /*--- Some checks for consistency between CSysMatrix and the CSysVector<ScalarType>s ---*/
 #ifndef NDEBUG
   if ((nEqn != vec.GetNVar()) || (nVar != prod.GetNVar())) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
   if (nPoint != prod.GetNBlk()) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 #endif
 
@@ -1312,7 +1304,7 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
 
   if (edge_ptr) {
     /*--- The FV way. ---*/
-    SU2_OMP_FOR_DYN(omp_light_size/2)
+    SU2_OMP_FOR_DYN(omp_heavy_size*2)
     for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) {
       auto bij = &matrix[edge_ptr(iEdge,0)*nVar*nVar];
       auto bji = &matrix[edge_ptr(iEdge,1)*nVar*nVar];
@@ -1362,80 +1354,9 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
   END_SU2_OMP_FOR
 
 #ifdef HAVE_PASTIX
+  SU2_OMP_MASTER
   pastix_wrapper.SetTransposedSolve();
-#endif
-}
-
-template<class ScalarType>
-void CSysMatrix<ScalarType>::TransposeInPlace() {
-
-  assert(nVar==nEqn && "Cannot transpose with nVar != nEqn.");
-
-  auto swapAndTransp = [](unsigned long n, ScalarType* a, ScalarType* b) {
-    assert(a!=b);
-    /*--- a=b', b=a' ---*/
-    for (auto i=0ul; i<n; ++i) {
-      for (auto j=0ul; j<i; ++j) {
-        const auto lo = i*n+j;
-        const auto up = j*n+i;
-        std::swap(a[lo], b[up]);
-        std::swap(a[up], b[lo]);
-      }
-      std::swap(a[i*n+i], b[i*n+i]);
-    }
-  };
-
-  /*--- Swap ij with ji and transpose them. ---*/
-
-  if (edge_ptr) {
-    /*--- The FV way. ---*/
-    SU2_OMP_FOR_DYN(omp_light_size/2)
-    for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) {
-      auto bij = &matrix[edge_ptr(iEdge,0)*nVar*nVar];
-      auto bji = &matrix[edge_ptr(iEdge,1)*nVar*nVar];
-
-      swapAndTransp(nVar, bij, bji);
-    }
-  }
-  else if (col_ptr) {
-    /*--- If the column pointer was built. ---*/
-    SU2_OMP_FOR_DYN(omp_heavy_size)
-    for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-      for (auto k = row_ptr[iPoint]; k < dia_ptr[iPoint]; ++k) {
-        auto bij = &matrix[k*nVar*nVar];
-        auto bji = &matrix[col_ptr[k]*nVar*nVar];
-
-        swapAndTransp(nVar, bij, bji);
-      }
-    }
-  }
-  else {
-    /*--- Slow fallback, needs to search for ji. ---*/
-    SU2_OMP_FOR_DYN(omp_heavy_size)
-    for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-      for (auto k = dia_ptr[iPoint]+1ul; k < row_ptr[iPoint+1]; ++k) {
-        const auto jPoint = col_ind[k];
-        auto bij = &matrix[k*nVar*nVar];
-        auto bji = GetBlock(jPoint,iPoint);
-        assert(bji && "Pattern is not symmetric.");
-
-        swapAndTransp(nVar, bij, bji);
-      }
-    }
-  }
-
-  /*--- Transpose the diagonal blocks. ---*/
-
-  SU2_OMP_FOR_STAT(omp_heavy_size)
-  for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
-    auto bii = &matrix[dia_ptr[iPoint]*nVar*nVar];
-    for (auto i=0ul; i<nVar; ++i)
-      for (auto j=0ul; j<i; ++j)
-        std::swap(bii[i*nVar+j], bii[j*nVar+i]);
-  }
-
-#ifdef HAVE_PASTIX
-  pastix_wrapper.SetTransposedSolve();
+  END_SU2_OMP_MASTER
 #endif
 }
 
@@ -1448,9 +1369,7 @@ void CSysMatrix<ScalarType>::MatrixMatrixAddition(ScalarType alpha, const CSysMa
             (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz);
 
   if (!ok) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   SU2_OMP_FOR_STAT(omp_light_size)
@@ -1473,9 +1392,7 @@ void CSysMatrix<ScalarType>::BuildPastixPreconditioner(CGeometry *geometry, cons
   END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 #else
-  SU2_OMP_MASTER
   SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
-  END_SU2_OMP_MASTER
 #endif
 }
 
@@ -1492,9 +1409,7 @@ void CSysMatrix<ScalarType>::ComputePastixPreconditioner(const CSysVector<Scalar
   CSysMatrixComms::Initiate(prod, geometry, config);
   CSysMatrixComms::Complete(prod, geometry, config);
 #else
-  SU2_OMP_MASTER
   SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
-  END_SU2_OMP_MASTER
 #endif
 }
 
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 0591c7d6093..66306655407 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -130,9 +130,7 @@ void CSysSolve<ScalarType>::ModGramSchmidt(int i, su2matrix<ScalarType>& Hsbg,
 
   if ((nrm <= 0.0) || (nrm != nrm)) {
     /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/
-    SU2_OMP_MASTER
     SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   /*--- Begin main Gram-Schmidt loop ---*/
@@ -210,9 +208,7 @@ unsigned long CSysSolve<ScalarType>::CG_LinSolver(const CSysVector<ScalarType> &
   /*--- Check the subspace size ---*/
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate if not allocated yet, only one thread can
@@ -353,15 +349,11 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
   /*---  Check the subspace size ---*/
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   if (m > 5000) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate if not allocated yet ---*/
@@ -552,9 +544,7 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
   /*--- Check the subspace size ---*/
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate if not allocated yet ---*/
@@ -719,9 +709,7 @@ unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarT
   const ScalarType omega = SU2_TYPE::GetValue(config->GetLinear_Solver_Smoother_Relaxation());
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
-    END_SU2_OMP_MASTER
   }
 
   /*--- Allocate vectors for residual (r), solution increment (z), and matrix-vector
@@ -994,7 +982,6 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
     SU2_OMP_MASTER
     AD::EndExtFunc();
     END_SU2_OMP_MASTER
-    SU2_OMP_BARRIER
 #endif
   }
 
diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 626d625d7aa..dbf2a950dfb 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -34,39 +34,33 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
                                         CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   StartTime = SU2_MPI::Wtime();
 
-  unsigned long iPoint;
-  unsigned short TimeIter = config[iZone]->GetTimeIter();
-  bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST);
-  bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND);
-  bool dual_time = (dual_time_1st || dual_time_2nd);
-  unsigned short iMesh;
-  int Direct_Iter;
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
-  bool grid_IsMoving = config[iZone]->GetGrid_Movement();
+  const auto TimeIter = config[iZone]->GetTimeIter();
+  const bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST);
+  const bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND);
+  const bool dual_time = (dual_time_1st || dual_time_2nd);
+  const bool grid_IsMoving = config[iZone]->GetGrid_Movement();
+  const bool heat = config[iZone]->GetWeakly_Coupled_Heat();
 
   auto solvers0 = solver[iZone][iInst][MESH_0];
+  auto geometries = geometry[iZone][iInst];
 
   //  /*--- Read the target pressure for inverse design. ---------------------------------------------*/
   //  if (config[iZone]->GetInvDesign_Cp() == YES)
   //    output->SetCp_InverseDesign(solvers0[FLOW_SOL],
-  //    geometry[iZone][iInst][MESH_0], config[iZone], ExtIter);
+  //    geometries[MESH_0], config[iZone], ExtIter);
 
   //  /*--- Read the target heat flux ----------------------------------------------------------------*/
   //  if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES)
   //    output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL],
-  //    geometry[iZone][iInst][MESH_0], config[iZone], ExtIter);
+  //    geometries[MESH_0], config[iZone], ExtIter);
 
   /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/
 
   if (config[iZone]->GetTime_Marching()) {
-    Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2;
+    const int Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2 + dual_time;
 
     /*--- For dual-time stepping we want to load the already converged solution at timestep n ---*/
 
-    if (dual_time) {
-      Direct_Iter += 1;
-    }
-
     if (TimeIter == 0) {
       if (dual_time_2nd) {
         /*--- Load solution at timestep n-2 ---*/
@@ -74,7 +68,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
         /*--- Push solution back to correct array ---*/
 
-        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
           auto solvers = solver[iZone][iInst][iMesh];
 
           solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
@@ -88,8 +82,8 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
             solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1();
           }
           if (grid_IsMoving) {
-            geometry[iZone][iInst][iMesh]->nodes->SetCoord_n();
-            geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1();
+            geometries[iMesh]->nodes->SetCoord_n();
+            geometries[iMesh]->nodes->SetCoord_n1();
           }
         }
       }
@@ -99,7 +93,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
         /*--- Push solution back to correct array ---*/
 
-        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
           auto solvers = solver[iZone][iInst][iMesh];
 
           solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
@@ -110,7 +104,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
             solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n();
           }
           if (grid_IsMoving) {
-            geometry[iZone][iInst][iMesh]->nodes->SetCoord_n();
+            geometries[iMesh]->nodes->SetCoord_n();
           }
         }
       }
@@ -120,8 +114,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter);
 
       if (config[iZone]->GetDeform_Mesh()) {
-        solvers0[MESH_SOL]->LoadRestart(
-            geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true);
+        solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true);
       }
 
     } else if ((TimeIter > 0) && dual_time) {
@@ -133,8 +126,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       ---*/
 
       if (config[iZone]->GetDeform_Mesh()) {
-        solvers0[MESH_SOL]->LoadRestart(
-            geometry[iZone][iInst], solver[iZone][iInst], config[iZone], Direct_Iter, true);
+        solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true);
       }
 
       /*--- Load solution timestep n-1 | n-2 for DualTimestepping 1st | 2nd order ---*/
@@ -146,7 +138,7 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
       /*--- Temporarily store the loaded solution in the Solution_Old array ---*/
 
-      for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
         auto solvers = solver[iZone][iInst][iMesh];
 
         solvers[FLOW_SOL]->Set_OldSolution();
@@ -157,22 +149,22 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
           solvers[HEAT_SOL]->Set_OldSolution();
         }
         if (grid_IsMoving) {
-          geometry[iZone][iInst][iMesh]->nodes->SetCoord_Old();
+          geometries[iMesh]->nodes->SetCoord_Old();
         }
       }
 
       /*--- Set Solution at timestep n to solution at n-1 ---*/
 
-      for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
         auto solvers = solver[iZone][iInst][iMesh];
 
-        for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
+        for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
           solvers[FLOW_SOL]->GetNodes()->SetSolution(
               iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint));
 
           if (grid_IsMoving) {
-            geometry[iZone][iInst][iMesh]->nodes->SetCoord(
-                iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n(iPoint));
+            geometries[iMesh]->nodes->SetCoord(
+                iPoint, geometries[iMesh]->nodes->GetCoord_n(iPoint));
           }
           if (turbulent) {
             solvers[TURB_SOL]->GetNodes()->SetSolution(
@@ -186,16 +178,15 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       }
       if (dual_time_1st) {
         /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/
-        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
           auto solvers = solver[iZone][iInst][iMesh];
 
-          for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
+          for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
             solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
                 iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
-              geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(
-                  iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint));
+              geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
               solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
@@ -210,16 +201,15 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       }
       if (dual_time_2nd) {
         /*--- Set Solution at timestep n-1 to solution at n-2 ---*/
-        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
           auto solvers = solver[iZone][iInst][iMesh];
 
-          for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
+          for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
             solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
                 iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
 
             if (grid_IsMoving) {
-              geometry[iZone][iInst][iMesh]->nodes->SetCoord_n(
-                  iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_n1(iPoint));
+              geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_n1(iPoint));
             }
             if (turbulent) {
               solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
@@ -232,16 +222,15 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
           }
         }
         /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/
-        for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
           auto solvers = solver[iZone][iInst][iMesh];
 
-          for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
+          for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
             solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(
                 iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
-              geometry[iZone][iInst][iMesh]->nodes->SetCoord_n1(
-                  iPoint, geometry[iZone][iInst][iMesh]->nodes->GetCoord_Old(iPoint));
+              geometries[iMesh]->nodes->SetCoord_n1(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
               solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1(
@@ -259,52 +248,61 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
     /*--- Compute & set Grid Velocity via finite differences of the Coordinates. ---*/
     if (grid_IsMoving)
-      for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++)
-        geometry[iZone][iInst][iMesh]->SetGridVelocity(config[iZone], TimeIter);
+      for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++)
+        geometries[iMesh]->SetGridVelocity(config[iZone], TimeIter);
 
   }  // if unsteady
 
+  SU2_OMP_PARALLEL_(if(solvers0[ADJFLOW_SOL]->GetHasHybridParallel())) {
+
   /*--- Store flow solution also in the adjoint solver in order to be able to reset it later ---*/
 
   if (TimeIter == 0 || dual_time) {
-    for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+    for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
       auto solvers = solver[iZone][iInst][iMesh];
-      for (iPoint = 0; iPoint < geometry[iZone][iInst][iMesh]->GetnPoint(); iPoint++) {
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++)
         solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+      END_SU2_OMP_FOR
     }
     if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) {
-      for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) {
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++)
         solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+      END_SU2_OMP_FOR
     }
     if (heat) {
-      for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) {
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++)
         solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+      END_SU2_OMP_FOR
     }
     if (config[iZone]->AddRadiation()) {
-      for (iPoint = 0; iPoint < geometry[iZone][iInst][MESH_0]->GetnPoint(); iPoint++) {
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++)
         solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+      END_SU2_OMP_FOR
     }
   }
 
-  solvers0[ADJFLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
+  solvers0[ADJFLOW_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
                                        MESH_0, 0, RUNTIME_ADJFLOW_SYS, false);
 
   if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) {
-    solvers0[ADJTURB_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
+    solvers0[ADJTURB_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
                                          MESH_0, 0, RUNTIME_ADJTURB_SYS, false);
   }
   if (heat) {
-    solvers0[ADJHEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
+    solvers0[ADJHEAT_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
                                          MESH_0, 0, RUNTIME_ADJHEAT_SYS, false);
   }
   if (config[iZone]->AddRadiation()) {
-    solvers0[ADJRAD_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solvers0, config[iZone],
+    solvers0[ADJRAD_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
                                         MESH_0, 0, RUNTIME_ADJRAD_SYS, false);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index 30249c14a5d..b3df2767365 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -29,7 +29,8 @@
 #include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
 #include "../../../Common/include/parallelization/omp_structure.hpp"
 
-CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
+CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver,
+                               unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
 
   adjoint = true;
 
@@ -77,9 +78,12 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
 
   /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/
 
-  su2double Solution[MAXNVAR] = {1e-16};
+  if (nVar > MAXNVAR) {
+    SU2_MPI::Error("Oops! The CDiscAdjSolver static array sizes are not large enough.",CURRENT_FUNCTION);
+  }
 
-  nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config);
+  vector<su2double> Solution(nVar,1e-16);
+  nodes = new CDiscAdjVariable(Solution.data(), nPoint, nDim, nVar, config);
   SetBaseClassPointerToNodes();
 
   switch(KindDirect_Solver){

From 83b032bda257f4008a4985430d17a44ce2e24f57 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Tue, 23 Mar 2021 16:26:10 +0000
Subject: [PATCH 40/57] prepare CDiscAdjFEASolver

---
 .../iteration/CDiscAdjFEAIteration.hpp        |  19 -
 SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp | 169 +++++---
 SU2_CFD/include/solvers/CSolver.hpp           |   5 -
 .../src/iteration/CDiscAdjFEAIteration.cpp    |  61 +--
 SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp     | 410 ++++--------------
 SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp    |   5 +-
 6 files changed, 225 insertions(+), 444 deletions(-)

diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
index c613d449bfc..083222664b8 100644
--- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
@@ -104,25 +104,6 @@ class CDiscAdjFEAIteration final : public CIteration {
                CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
                unsigned short val_iInst) override;
 
-  /*!
-   * \brief Updates the containers for the discrete adjoint mean flow system.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   */
-  void Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-              CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-              CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-              unsigned short val_iInst) override;
-
   /*!
    * \brief Monitors the convergence and other metrics for the discrete adjoint mean flow system.
    * \param[in] output - Pointer to the COutput class.
diff --git a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
index c85ea4e8653..48c29f76dc7 100644
--- a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
@@ -38,57 +38,84 @@
  */
 class CDiscAdjFEASolver final : public CSolver {
 private:
+  static constexpr size_t MAXNVAR = 3;  /*!< \brief Max number of variables, for static arrays. */
+
   unsigned short KindDirect_Solver = 0;
   CSolver *direct_solver = nullptr;
-  su2double *Sens_E = nullptr,          /*!< \brief Young modulus sensitivity coefficient for each boundary. */
-  *Sens_Nu = nullptr,                   /*!< \brief Poisson's ratio sensitivity coefficient for each boundary. */
-  *Sens_nL = nullptr,                   /*!< \brief Normal pressure sensitivity coefficient for each boundary. */
-  **CSensitivity = nullptr;             /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */
-
-  su2double *Solution_Vel = nullptr,    /*!< \brief Velocity componenent of the solution. */
-  *Solution_Accel = nullptr;            /*!< \brief Acceleration componenent of the solution. */
-
-  su2double *normalLoads = nullptr;     /*!< \brief Values of the normal loads for each marker iMarker_nL. */
-
-  unsigned short nMPROP = 0;            /*!< \brief Number of material properties */
-
-  su2double *E_i = nullptr,               /*!< \brief Values of the Young's Modulus. */
-            *Nu_i = nullptr,              /*!< \brief Values of the Poisson's ratio. */
-            *Rho_i = nullptr,             /*!< \brief Values of the density (for inertial effects). */
-            *Rho_DL_i = nullptr;          /*!< \brief Values of the density (for volume loading). */
-  int       *AD_Idx_E_i = nullptr,        /*!< \brief Derivative index of the Young's Modulus. */
-            *AD_Idx_Nu_i = nullptr,       /*!< \brief Derivative index of the Poisson's ratio. */
-            *AD_Idx_Rho_i = nullptr,      /*!< \brief Derivative index of the density (for inertial effects). */
-            *AD_Idx_Rho_DL_i = nullptr;   /*!< \brief Derivative index of the density (for volume loading). */
-
-  su2double *Local_Sens_E = nullptr,        /*!< \brief Local sensitivity of the Young's modulus. */
-            *Global_Sens_E = nullptr,       /*!< \brief Global sensitivity of the Young's modulus. */
-            *Total_Sens_E = nullptr;        /*!< \brief Total sensitivity of the Young's modulus (time domain). */
-  su2double *Local_Sens_Nu = nullptr,       /*!< \brief Local sensitivity of the Poisson ratio. */
-            *Global_Sens_Nu = nullptr,      /*!< \brief Global sensitivity of the Poisson ratio. */
-            *Total_Sens_Nu = nullptr;       /*!< \brief Total sensitivity of the Poisson ratio (time domain). */
-  su2double *Local_Sens_Rho = nullptr,      /*!< \brief Local sensitivity of the density. */
-            *Global_Sens_Rho = nullptr,     /*!< \brief Global sensitivity of the density. */
-            *Total_Sens_Rho = nullptr;      /*!< \brief Total sensitivity of the density (time domain). */
-  su2double *Local_Sens_Rho_DL = nullptr,   /*!< \brief Local sensitivity of the volume load. */
-            *Global_Sens_Rho_DL = nullptr,  /*!< \brief Global sensitivity of the volume load. */
-            *Total_Sens_Rho_DL = nullptr;   /*!< \brief Total sensitivity of the volume load (time domain). */
-
-  bool de_effects = false;                  /*!< \brief Determines if DE effects are considered. */
-  unsigned short nEField = 0;               /*!< \brief Number of electric field areas in the problem. */
-  su2double *EField = nullptr;              /*!< \brief Array that stores the electric field as design variables. */
-  int       *AD_Idx_EField = nullptr;       /*!< \brief Derivative index of the electric field as design variables. */
-  su2double *Local_Sens_EField = nullptr,   /*!< \brief Local sensitivity of the Electric Field. */
-            *Global_Sens_EField = nullptr,  /*!< \brief Global sensitivity of the Electric Field. */
-            *Total_Sens_EField = nullptr;   /*!< \brief Total sensitivity of the Electric Field (time domain). */
-
-  bool fea_dv = false;                  /*!< \brief Determines if the design variable we study is a FEA parameter. */
-  unsigned short nDV = 0;               /*!< \brief Number of design variables in the problem. */
-  su2double *DV_Val = nullptr;          /*!< \brief Values of the design variables. */
-  int       *AD_Idx_DV_Val = nullptr;   /*!< \brief Derivative index of the design variables. */
-  su2double *Local_Sens_DV = nullptr,   /*!< \brief Local sensitivity of the design variables. */
-            *Global_Sens_DV = nullptr,  /*!< \brief Global sensitivity of the design variables. */
-            *Total_Sens_DV = nullptr;   /*!< \brief Total sensitivity of the design variables (time domain). */
+
+  /*!
+   * \brief A type to manage sensitivities of design variables.
+   */
+  struct SensData {
+    unsigned short size = 0;
+    su2double* val = nullptr;         /*!< \brief Value of the variable. */
+    int* AD_Idx = nullptr;            /*!< \brief Derivative index in the AD tape. */
+    bool localIdx = false;
+    su2double* LocalSens = nullptr;   /*!< \brief Local sensitivity (domain). */
+    su2double* GlobalSens = nullptr;  /*!< \brief Global sensitivity (mpi). */
+    su2double* TotalSens = nullptr;   /*!< \brief Total sensitivity (time domain). */
+
+    su2double& operator[] (unsigned short i) { return val[i]; }
+    const su2double& operator[] (unsigned short i) const { return val[i]; }
+
+    void resize(unsigned short n) {
+      clear();
+      size = n;
+      val = new su2double[n]();
+      AD_Idx = new int[n]();
+      LocalSens = new su2double[n]();
+      GlobalSens = new su2double[n]();
+      TotalSens = new su2double[n]();
+    }
+
+    void clear() {
+      size = 0;
+      localIdx = false;
+      delete [] val;
+      delete [] AD_Idx;
+      delete [] LocalSens;
+      delete [] GlobalSens;
+      delete [] TotalSens;
+    }
+
+    void Register(bool push_index) {
+      for (auto i = 0u; i < size; ++i) AD::RegisterInput(val[i], push_index);
+    }
+
+    void SetIndex() {
+      for (auto i = 0u; i < size; ++i) AD::SetIndex(AD_Idx[i], val[i]);
+      localIdx = true;
+    }
+
+    void GetDerivative() {
+      if (localIdx)
+        for (auto i = 0u; i < size; ++i) LocalSens[i] = AD::GetDerivative(AD_Idx[i]);
+      else
+        for (auto i = 0u; i < size; ++i) LocalSens[i] = SU2_TYPE::GetDerivative(val[i]);
+
+      SU2_MPI::Allreduce(LocalSens, GlobalSens, size, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
+    }
+
+    void UpdateTotal() {
+      for (auto i = 0u; i < size; ++i) TotalSens[i] += GlobalSens[i];
+    }
+
+    ~SensData() { clear(); }
+  };
+
+  unsigned short nMPROP = 0;  /*!< \brief Number of material properties */
+  SensData E;                 /*!< \brief Values of the Young's Modulus. */
+  SensData Nu;                /*!< \brief Values of the Poisson's ratio. */
+  SensData Rho;               /*!< \brief Values of the density (for inertial effects). */
+  SensData Rho_DL;            /*!< \brief Values of the density (for volume loading). */
+
+  bool de_effects = false;    /*!< \brief Determines if DE effects are considered. */
+  unsigned short nEField = 0; /*!< \brief Number of electric field areas in the problem. */
+  SensData EField;            /*!< \brief Array that stores the electric field as design variables. */
+
+  bool fea_dv = false;        /*!< \brief Determines if the design variable we study is a FEA parameter. */
+  unsigned short nDV = 0;     /*!< \brief Number of design variables in the problem. */
+  SensData DV;                /*!< \brief Values of the design variables. */
 
   CDiscAdjFEABoundVariable* nodes = nullptr;  /*!< \brief The highest level in the variable hierarchy this solver can safely use. */
 
@@ -97,6 +124,11 @@ class CDiscAdjFEASolver final : public CSolver {
    */
   inline CVariable* GetBaseClassPointerToNodes() override { return nodes; }
 
+  /*!
+   * \brief Read the design variables for the adjoint solver
+   */
+  void ReadDV(const CConfig *config);
+
 public:
 
   /*!
@@ -163,97 +195,97 @@ class CDiscAdjFEASolver final : public CSolver {
    * \return Value of the total Young's modulus sensitivity
    *         (inviscid + viscous contribution).
    */
-  inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return Total_Sens_E[iVal]; }
+  inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return E.TotalSens[iVal]; }
 
   /*!
    * \brief Set the total Poisson's ratio sensitivity.
    * \return Value of the Poisson's ratio sensitivity
    */
-  inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Total_Sens_Nu[iVal]; }
+  inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Nu.TotalSens[iVal]; }
 
   /*!
    * \brief Get the total sensitivity for the structural density
    * \return Value of the structural density sensitivity
    */
-  inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Total_Sens_Rho[iVal]; }
+  inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Rho.TotalSens[iVal]; }
 
   /*!
    * \brief Get the total sensitivity for the structural weight
    * \return Value of the structural weight sensitivity
    */
-  inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Total_Sens_Rho_DL[iVal]; }
+  inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.TotalSens[iVal]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the Electric Field in the region iEField (time averaged)
    */
-  inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return Total_Sens_EField[iEField]; }
+  inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return EField.TotalSens[iEField]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the total sensitivity coefficient for the FEA DV in the region iDVFEA (time averaged)
    */
-  inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return Total_Sens_DV[iDVFEA]; }
+  inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.TotalSens[iDVFEA]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the Young Modulus E
    */
-  inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return Global_Sens_E[iVal]; }
+  inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return E.GlobalSens[iVal]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the Mach sensitivity for the Poisson's ratio Nu
    */
-  inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Global_Sens_Nu[iVal]; }
+  inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Nu.GlobalSens[iVal]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the Electric Field in the region iEField
    */
-  inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return Global_Sens_EField[iEField]; }
+  inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return EField.GlobalSens[iEField]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the FEA DV in the region iDVFEA
    */
-  inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return Global_Sens_DV[iDVFEA]; }
+  inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.GlobalSens[iDVFEA]; }
 
   /*!
    * \brief Get the total sensitivity for the structural density
    * \return Value of the structural density sensitivity
    */
-  inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Global_Sens_Rho[iVal]; }
+  inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Rho.GlobalSens[iVal]; }
 
   /*!
    * \brief Get the total sensitivity for the structural weight
    * \return Value of the structural weight sensitivity
    */
-  inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Global_Sens_Rho_DL[iVal]; }
+  inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.GlobalSens[iVal]; }
 
   /*!
    * \brief Get the value of the Young modulus from the adjoint solver
    * \return Value of the Young modulus from the adjoint solver
    */
-  inline su2double GetVal_Young(unsigned short iVal) const override { return E_i[iVal]; }
+  inline su2double GetVal_Young(unsigned short iVal) const override { return E[iVal]; }
 
   /*!
    * \brief Get the value of the Poisson's ratio from the adjoint solver
    * \return Value of the Poisson's ratio from the adjoint solver
    */
-  inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu_i[iVal]; }
+  inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu[iVal]; }
 
   /*!
    * \brief Get the value of the density from the adjoint solver, for inertial effects
    * \return Value of the density from the adjoint solver
    */
-  inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho_i[iVal]; }
+  inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho[iVal]; }
 
   /*!
    * \brief Get the value of the density from the adjoint solver, for dead loads
    * \return Value of the density for dead loads, from the adjoint solver
    */
-  inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL_i[iVal]; }
+  inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL[iVal]; }
 
   /*!
    * \brief Get the number of variables for the Electric Field from the adjoint solver
@@ -261,11 +293,6 @@ class CDiscAdjFEASolver final : public CSolver {
    */
   inline unsigned short GetnEField(void) const override { return nEField; }
 
-  /*!
-   * \brief Read the design variables for the adjoint solver
-   */
-  void ReadDV(CConfig *config) override;
-
   /*!
    * \brief Get the number of design variables from the adjoint solver,
    * \return Number of design variables from the adjoint solver
@@ -282,7 +309,7 @@ class CDiscAdjFEASolver final : public CSolver {
    * \brief Get the value of the design variables from the adjoint solver
    * \return Pointer to the values of the design variables
    */
-  inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV_Val[iVal]; }
+  inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV[iVal]; }
 
   /*!
    * \brief Prepare the solver for a new recording.
diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp
index 641c57e716f..e41dcf871ef 100644
--- a/SU2_CFD/include/solvers/CSolver.hpp
+++ b/SU2_CFD/include/solvers/CSolver.hpp
@@ -3383,11 +3383,6 @@ class CSolver {
    */
   inline virtual unsigned short GetnDVFEA(void) const { return 0; }
 
-  /*!
-   * \brief A virtual member.
-   */
-  inline virtual void ReadDV(CConfig *config) { }
-
   /*!
    * \brief A virtual member.
    * \return Pointer to the values of the Electric Field
diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
index fa753a598a7..23cb1ee1b75 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
@@ -162,7 +162,6 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration
                                    CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                    CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement,
                                    CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
 
   /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/
 
@@ -171,7 +170,7 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration
 
   solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->ExtractAdjoint_Variables(geometry[val_iZone][val_iInst][MESH_0],
                                                                              config[val_iZone]);
-  if (dynamic) {
+  if (config[val_iZone]->GetTime_Domain()) {
     integration[val_iZone][val_iInst][ADJFEA_SOL]->SetConvergence(false);
   }
 }
@@ -215,15 +214,21 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
   auto structural_numerics = numerics[iZone][iInst][MESH_0][FEA_SOL];
 
   /*--- Some numerics are only instanciated under these conditions ---*/
-  bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem();
-  bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS;
-  bool de_effects = config[iZone]->GetDE_Effects() && nonlinear;
-  bool element_based = dir_solver->IsElementBased() && nonlinear;
+  const bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem();
+  const bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS;
+  const bool de_effects = config[iZone]->GetDE_Effects() && nonlinear;
+  const bool element_based = dir_solver->IsElementBased() && nonlinear;
 
   SU2_OMP_PARALLEL
   {
 
-  int thread = omp_get_thread_num();
+  const int thread = omp_get_thread_num();
+  const int offset = thread*MAX_TERMS;
+  const int fea_term = FEA_TERM+offset;
+  const int mat_nhcomp = MAT_NHCOMP+offset;
+  const int mat_idealde = MAT_IDEALDE+offset;
+  const int mat_knowles = MAT_KNOWLES+offset;
+  const int de_term = DE_TERM+offset;
 
   for (unsigned short iProp = 0; iProp < config[iZone]->GetnElasticityMod(); iProp++) {
     su2double E = adj_solver->GetVal_Young(iProp);
@@ -233,33 +238,33 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
 
     /*--- Add dependencies for E and Nu ---*/
 
-    structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
+    structural_numerics[fea_term]->SetMaterial_Properties(iProp, E, nu);
 
     /*--- Add dependencies for Rho and Rho_DL ---*/
 
-    structural_numerics[FEA_TERM + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
+    structural_numerics[fea_term]->SetMaterial_Density(iProp, rho, rhoDL);
 
     /*--- Add dependencies for element-based simulations. ---*/
 
     if (element_based) {
       /*--- Neo Hookean Compressible ---*/
-      structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[mat_nhcomp]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[mat_nhcomp]->SetMaterial_Density(iProp, rho, rhoDL);
 
       /*--- Ideal DE ---*/
-      structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[mat_idealde]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[mat_idealde]->SetMaterial_Density(iProp, rho, rhoDL);
 
       /*--- Knowles ---*/
-      structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[mat_knowles]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[mat_knowles]->SetMaterial_Density(iProp, rho, rhoDL);
     }
   }
 
   if (de_effects) {
     for (unsigned short iEField = 0; iEField < adj_solver->GetnEField(); iEField++) {
-      structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
-      structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
+      structural_numerics[fea_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
+      structural_numerics[de_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
     }
   }
 
@@ -275,22 +280,19 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
       for (unsigned short iDV = 0; iDV < adj_solver->GetnDVFEA(); iDV++) {
         su2double dvfea = adj_solver->GetVal_DVFEA(iDV);
 
-        structural_numerics[FEA_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
+        structural_numerics[fea_term]->Set_DV_Val(iDV, dvfea);
 
-        if (de_effects) structural_numerics[DE_TERM + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
+        if (de_effects) structural_numerics[de_term]->Set_DV_Val(iDV, dvfea);
 
         if (element_based) {
-          structural_numerics[MAT_NHCOMP + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
-          structural_numerics[MAT_IDEALDE + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
-          structural_numerics[MAT_KNOWLES + thread*MAX_TERMS]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[mat_nhcomp]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[mat_idealde]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[mat_knowles]->Set_DV_Val(iDV, dvfea);
         }
       }
       break;
   }
 
-  }
-  END_SU2_OMP_PARALLEL
-
   /*--- MPI dependencies. ---*/
 
   dir_solver->InitiateComms(structural_geometry, config[iZone], SOLUTION_FEA);
@@ -301,6 +303,9 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
     structural_geometry->CompleteComms(structural_geometry, config[iZone], COORDINATES);
   }
 
+  }
+  END_SU2_OMP_PARALLEL
+
   /*--- FSI specific dependencies. ---*/
   if (fsi) {
     /*--- Set relation between solution and predicted displacements, which are the transferred ones. ---*/
@@ -317,6 +322,7 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
     ///       making it a virtual method of CSolver does not feel "right" as its purpose could be confused.
     static_cast<CFEASolver*>(dir_solver)->FilterElementDensities(structural_geometry, config[iZone]);
   }
+
 }
 
 void CDiscAdjFEAIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
@@ -333,11 +339,6 @@ void CDiscAdjFEAIteration::InitializeAdjoint(CSolver***** solver, CGeometry****
   solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]);
 }
 
-void CDiscAdjFEAIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                  CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                  CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                  CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {}
-
 bool CDiscAdjFEAIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                    CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                    CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
index bc72135c545..ea7b08a1d47 100644
--- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
@@ -33,13 +33,9 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
 
   adjoint = true;
 
-  unsigned short iVar, iMarker;
-
   unsigned long iPoint;
-  string text_line, mesh_filename;
-  string filename, AdjExt;
 
-  bool dynamic = (config->GetTime_Domain());
+  const bool dynamic = (config->GetTime_Domain());
 
   nVar = direct_solver->GetnVar();
   nDim = geometry->GetnDim();
@@ -54,8 +50,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
 
   /*--- Define some auxiliary vectors related to the residual ---*/
 
-  Residual      = new su2double[nVar];         for (iVar = 0; iVar < nVar; iVar++) Residual[iVar]      = 1.0;
-
   Residual_RMS.resize(nVar,1.0);
   Residual_Max.resize(nVar,1.0);
   Point_Max.resize(nVar,0);
@@ -71,32 +65,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0);
   }
 
-  /*--- Define some auxiliary vectors related to the solution ---*/
-
-  Solution = new su2double[nVar];
-  for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16;
-
-  if (dynamic) {
-    Solution_Vel    = new su2double[nVar];
-    Solution_Accel  = new su2double[nVar];
-
-    for (iVar = 0; iVar < nVar; iVar++) Solution_Vel[iVar]      = 1e-16;
-    for (iVar = 0; iVar < nVar; iVar++) Solution_Accel[iVar]    = 1e-16;
-  }
-
-  /*--- Sensitivity definition and coefficient in all the markers ---*/
-
-  CSensitivity = new su2double* [nMarker];
-
-  for (iMarker = 0; iMarker < nMarker; iMarker++) {
-    CSensitivity[iMarker] = new su2double [geometry->nVertex[iMarker]]();
-  }
-
-  Sens_E  = new su2double[nMarker]();
-  Sens_Nu = new su2double[nMarker]();
-  Sens_nL = new su2double[nMarker]();
+  /*--- Initialize the adjoint solution. ---*/
 
-  nodes = new CDiscAdjFEABoundVariable(Solution, Solution_Accel, Solution_Vel, nPoint, nDim, nVar, dynamic, config);
+  vector<su2double> init(nVar,1e-16);
+  nodes = new CDiscAdjFEABoundVariable(init.data(), init.data(), init.data(), nPoint, nDim, nVar, dynamic, config);
   SetBaseClassPointerToNodes();
 
   /*--- Set which points are vertices and allocate boundary data. ---*/
@@ -111,7 +83,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     }
   nodes->AllocateBoundaryVariables(config);
 
-
   /*--- Store the direct solution ---*/
 
   for (iPoint = 0; iPoint < nPoint; iPoint++){
@@ -141,29 +112,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     SU2_MPI::Error("WARNING: For a material to be fully defined, E, Nu and Rho need to have the same dimensions.", CURRENT_FUNCTION);
   }
 
-  E_i           = new su2double[nMPROP]();
-  Local_Sens_E  = new su2double[nMPROP]();
-  Global_Sens_E = new su2double[nMPROP]();
-  Total_Sens_E  = new su2double[nMPROP]();
-  AD_Idx_E_i    = new int[nMPROP]();
-
-  Nu_i           = new su2double[nMPROP]();
-  Local_Sens_Nu  = new su2double[nMPROP]();
-  Global_Sens_Nu = new su2double[nMPROP]();
-  Total_Sens_Nu  = new su2double[nMPROP]();
-  AD_Idx_Nu_i    = new int[nMPROP]();
-
-  Rho_i           = new su2double[nMPROP](); // For inertial effects
-  Local_Sens_Rho  = new su2double[nMPROP]();
-  Global_Sens_Rho = new su2double[nMPROP]();
-  Total_Sens_Rho  = new su2double[nMPROP]();
-  AD_Idx_Rho_i    = new int[nMPROP]();
-
-  Rho_DL_i           = new su2double[nMPROP](); // For dead loads
-  Local_Sens_Rho_DL  = new su2double[nMPROP]();
-  Global_Sens_Rho_DL = new su2double[nMPROP]();
-  Total_Sens_Rho_DL  = new su2double[nMPROP]();
-  AD_Idx_Rho_DL_i    = new int[nMPROP]();
+  E.resize(nMPROP);
+  Nu.resize(nMPROP);
+  Rho.resize(nMPROP);
+  Rho_DL.resize(nMPROP);
 
   /*--- Initialize vector structures for multiple electric regions ---*/
 
@@ -171,12 +123,7 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
 
   if (de_effects) {
     nEField = config->GetnElectric_Field();
-
-    EField             = new su2double[nEField]();
-    Local_Sens_EField  = new su2double[nEField]();
-    Global_Sens_EField = new su2double[nEField]();
-    Total_Sens_EField  = new su2double[nEField]();
-    AD_Idx_EField      = new int[nEField]();
+    EField.resize(nEField);
   }
 
   /*--- Initialize vector structures for structural-based design variables ---*/
@@ -194,80 +141,14 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
       break;
   }
 
-  if (fea_dv) {
-    ReadDV(config);
-    Local_Sens_DV  = new su2double[nDV]();
-    Global_Sens_DV = new su2double[nDV]();
-    Total_Sens_DV  = new su2double[nDV]();
-    AD_Idx_DV_Val  = new int[nDV]();
-  }
+  if (fea_dv) ReadDV(config);
 
 }
 
-CDiscAdjFEASolver::~CDiscAdjFEASolver(void){
-
-  unsigned short iMarker;
-
-  if (CSensitivity != nullptr) {
-    for (iMarker = 0; iMarker < nMarker; iMarker++) {
-      delete [] CSensitivity[iMarker];
-    }
-    delete [] CSensitivity;
-  }
-
-  delete [] E_i;
-  delete [] Nu_i;
-  delete [] Rho_i;
-  delete [] Rho_DL_i;
-
-  delete [] AD_Idx_E_i;
-  delete [] AD_Idx_Nu_i;
-  delete [] AD_Idx_Rho_i;
-  delete [] AD_Idx_Rho_DL_i;
-
-  delete [] Local_Sens_E;
-  delete [] Local_Sens_Nu;
-  delete [] Local_Sens_Rho;
-  delete [] Local_Sens_Rho_DL;
-
-  delete [] Global_Sens_E;
-  delete [] Global_Sens_Nu;
-  delete [] Global_Sens_Rho;
-  delete [] Global_Sens_Rho_DL;
-
-  delete [] Total_Sens_E;
-  delete [] Total_Sens_Nu;
-  delete [] Total_Sens_Rho;
-  delete [] Total_Sens_Rho_DL;
-
-  delete [] normalLoads;
-  delete [] Sens_E;
-  delete [] Sens_Nu;
-  delete [] Sens_nL;
-
-  delete [] EField;
-  delete [] Local_Sens_EField;
-  delete [] Global_Sens_EField;
-  delete [] Total_Sens_EField;
-  delete [] AD_Idx_EField;
-
-  delete [] DV_Val;
-  delete [] Local_Sens_DV;
-  delete [] Global_Sens_DV;
-  delete [] Total_Sens_DV;
-  delete [] AD_Idx_DV_Val;
-
-  delete [] Solution_Vel;
-  delete [] Solution_Accel;
-
-  delete nodes;
-}
+CDiscAdjFEASolver::~CDiscAdjFEASolver() { delete nodes; }
 
 void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){
 
-
-  bool dynamic (config->GetTime_Domain());
-
   unsigned long iPoint;
   unsigned short iVar;
 
@@ -277,7 +158,7 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){
     direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint));
   }
 
-  if (dynamic){
+  if (config->GetTime_Domain()){
     /*--- Reset the solution to the initial (converged) solution ---*/
 
     for (iPoint = 0; iPoint < nPoint; iPoint++){
@@ -321,9 +202,9 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){
 
 void CDiscAdjFEASolver::RegisterSolution(CGeometry *geometry, CConfig *config){
 
-  bool input = true;
-  bool dynamic = config->GetTime_Domain();
-  bool push_index = !config->GetMultizone_Problem();
+  const bool input = true;
+  const bool dynamic = config->GetTime_Domain();
+  const bool push_index = !config->GetMultizone_Problem();
 
   /*--- Register solution at all necessary time instances and other variables on the tape ---*/
 
@@ -354,13 +235,13 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config,
 
   if (KindDirect_Solver == RUNTIME_FEA_SYS) {
 
-    bool pseudo_static = config->GetPseudoStatic();
+    const bool pseudo_static = config->GetPseudoStatic();
 
     for (iVar = 0; iVar < nMPROP; iVar++) {
-      E_i[iVar]      = config->GetElasticyMod(iVar);
-      Nu_i[iVar]     = config->GetPoissonRatio(iVar);
-      Rho_i[iVar]    = pseudo_static? 0.0 : config->GetMaterialDensity(iVar);
-      Rho_DL_i[iVar] = config->GetMaterialDensity(iVar);
+      E[iVar]      = config->GetElasticyMod(iVar);
+      Nu[iVar]     = config->GetPoissonRatio(iVar);
+      Rho[iVar]    = pseudo_static? 0.0 : config->GetMaterialDensity(iVar);
+      Rho_DL[iVar] = config->GetMaterialDensity(iVar);
     }
 
     /*--- Read the values of the electric field ---*/
@@ -371,48 +252,28 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config,
 
     /*--- Reset index, otherwise messes up other derivatives ---*/
     if (fea_dv) {
-      for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV_Val[iVar]);
+      for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV[iVar]);
     }
 
     if (!reset) {
-      bool local_index = config->GetMultizone_Problem();
-      bool push_index = !local_index;
-
-      for (iVar = 0; iVar < nMPROP; iVar++) {
-        AD::RegisterInput(E_i[iVar], push_index);
-        AD::RegisterInput(Nu_i[iVar], push_index);
-        AD::RegisterInput(Rho_i[iVar], push_index);
-        AD::RegisterInput(Rho_DL_i[iVar], push_index);
-      }
+      const bool local_index = config->GetMultizone_Problem();
+      const bool push_index = !local_index;
 
-      if(de_effects){
-        for (iVar = 0; iVar < nEField; iVar++)
-          AD::RegisterInput(EField[iVar], push_index);
-      }
-
-      if(fea_dv){
-        for (iVar = 0; iVar < nDV; iVar++)
-          AD::RegisterInput(DV_Val[iVar], push_index);
-      }
+      E.Register(push_index);
+      Nu.Register(push_index);
+      Rho.Register(push_index);
+      Rho_DL.Register(push_index);
+      if (de_effects) EField.Register(push_index);
+      if (fea_dv) DV.Register(push_index);
 
       /*--- Explicitly store the tape indices for when we extract the derivatives ---*/
       if (local_index) {
-        for (iVar = 0; iVar < nMPROP; iVar++) {
-          AD::SetIndex(AD_Idx_E_i[iVar], E_i[iVar]);
-          AD::SetIndex(AD_Idx_Nu_i[iVar], Nu_i[iVar]);
-          AD::SetIndex(AD_Idx_Rho_i[iVar], Rho_i[iVar]);
-          AD::SetIndex(AD_Idx_Rho_DL_i[iVar], Rho_DL_i[iVar]);
-        }
-
-        if (de_effects) {
-          for (iVar = 0; iVar < nEField; iVar++)
-            AD::SetIndex(AD_Idx_EField[iVar], EField[iVar]);
-        }
-
-        if (fea_dv) {
-          for (iVar = 0; iVar < nDV; iVar++)
-            AD::SetIndex(AD_Idx_DV_Val[iVar], DV_Val[iVar]);
-        }
+        E.SetIndex();
+        Nu.SetIndex();
+        Rho.SetIndex();
+        Rho_DL.SetIndex();
+        if (de_effects) EField.SetIndex();
+        if (fea_dv) DV.SetIndex();
       }
 
       /*--- Register the flow tractions ---*/
@@ -422,16 +283,16 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config,
 
   }
 
-    /*--- Here it is possible to register other variables as input that influence the flow solution
-     * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be
-     * extracted in the ExtractAdjointVariables routine. ---*/
+  /*--- Here it is possible to register other variables as input that influence the flow solution
+   * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be
+   * extracted in the ExtractAdjointVariables routine. ---*/
 }
 
 void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){
 
-  bool input = false;
-  bool dynamic = config->GetTime_Domain();
-  bool push_index = !config->GetMultizone_Problem();
+  const bool input = false;
+  const bool dynamic = config->GetTime_Domain();
+  const bool push_index = !config->GetMultizone_Problem();
 
   /*--- Register variables as output of the solver iteration ---*/
 
@@ -447,13 +308,15 @@ void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){
 
 void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){
 
-  bool dynamic = config->GetTime_Domain();
-  bool multizone = config->GetMultizone_Problem();
+  const bool dynamic = config->GetTime_Domain();
+  const bool multizone = config->GetMultizone_Problem();
 
   unsigned short iVar;
   unsigned long iPoint;
   su2double residual;
 
+  su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0};
+
   /*--- Set Residuals to zero ---*/
 
   SetResToZero();
@@ -595,78 +458,40 @@ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *co
 
 void CDiscAdjFEASolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config){
 
-  unsigned short iVar;
-  bool local_index = config->GetMultizone_Problem();
+  if (KindDirect_Solver != RUNTIME_FEA_SYS) return;
 
-  /*--- Extract the adjoint values of the farfield values ---*/
+  /*--- Sensitivities of material properties and design variables. ---*/
 
-  if (KindDirect_Solver == RUNTIME_FEA_SYS){
+  E.GetDerivative();
+  Nu.GetDerivative();
+  Rho.GetDerivative();
+  Rho_DL.GetDerivative();
+  if (de_effects) EField.GetDerivative();
+  if (fea_dv) DV.GetDerivative();
 
-    if (local_index) {
-      for (iVar = 0; iVar < nMPROP; iVar++) {
-        Local_Sens_E[iVar] = AD::GetDerivative(AD_Idx_E_i[iVar]);
-        Local_Sens_Nu[iVar] = AD::GetDerivative(AD_Idx_Nu_i[iVar]);
-        Local_Sens_Rho[iVar] = AD::GetDerivative(AD_Idx_Rho_i[iVar]);
-        Local_Sens_Rho_DL[iVar] = AD::GetDerivative(AD_Idx_Rho_DL_i[iVar]);
-      }
-    }
-    else {
-      for (iVar = 0; iVar < nMPROP; iVar++) {
-        Local_Sens_E[iVar] = SU2_TYPE::GetDerivative(E_i[iVar]);
-        Local_Sens_Nu[iVar] = SU2_TYPE::GetDerivative(Nu_i[iVar]);
-        Local_Sens_Rho[iVar] = SU2_TYPE::GetDerivative(Rho_i[iVar]);
-        Local_Sens_Rho_DL[iVar] = SU2_TYPE::GetDerivative(Rho_DL_i[iVar]);
-      }
-    }
-
-    SU2_MPI::Allreduce(Local_Sens_E, Global_Sens_E,  nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    SU2_MPI::Allreduce(Local_Sens_Nu, Global_Sens_Nu, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    SU2_MPI::Allreduce(Local_Sens_Rho, Global_Sens_Rho, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    SU2_MPI::Allreduce(Local_Sens_Rho_DL, Global_Sens_Rho_DL, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-
-    /*--- Extract the adjoint values of the electric field in the case that it is a parameter of the problem. ---*/
-
-    if (de_effects) {
-      for (iVar = 0; iVar < nEField; iVar++) {
-        if (local_index) Local_Sens_EField[iVar] = AD::GetDerivative(AD_Idx_EField[iVar]);
-        else             Local_Sens_EField[iVar] = SU2_TYPE::GetDerivative(EField[iVar]);
-      }
-      SU2_MPI::Allreduce(Local_Sens_EField, Global_Sens_EField, nEField, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    }
+  /*--- Extract the flow traction sensitivities. ---*/
 
-    if (fea_dv) {
-      for (iVar = 0; iVar < nDV; iVar++) {
-        if (local_index) Local_Sens_DV[iVar] = AD::GetDerivative(AD_Idx_DV_Val[iVar]);
-        else             Local_Sens_DV[iVar] = SU2_TYPE::GetDerivative(DV_Val[iVar]);
+  if (config->GetnMarker_Fluid_Load() > 0) {
+    for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){
+      for (unsigned short iDim = 0; iDim < nDim; iDim++){
+        su2double val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim);
+        nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens);
       }
-      SU2_MPI::Allreduce(Local_Sens_DV, Global_Sens_DV, nDV, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
     }
-
-    /*--- Extract the flow traction sensitivities ---*/
-
-    if (config->GetnMarker_Fluid_Load() > 0){
-      su2double val_sens;
-      for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){
-        for (unsigned short iDim = 0; iDim < nDim; iDim++){
-          val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim);
-          nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens);
-        }
-      }
-    }
-
   }
 
 }
 
 void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){
 
-  bool dynamic = (config->GetTime_Domain());
-  bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0);
+  const bool dynamic = (config->GetTime_Domain());
+  const bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0);
+
+  su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0};
 
   unsigned short iVar;
-  unsigned long iPoint;
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
     for (iVar = 0; iVar < nVar; iVar++){
       Solution[iVar] = nodes->GetSolution(iPoint,iVar);
     }
@@ -676,20 +501,14 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){
       }
     }
     if (dynamic){
-      for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar);
-      }
-      for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar);
-      }
       for (iVar = 0; iVar < nVar; iVar++){
         Solution[iVar] += nodes->GetDynamic_Derivative_n(iPoint,iVar);
       }
       for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Accel[iVar] += nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar);
+        Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar) + nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar);
       }
       for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Vel[iVar] += nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar);
+        Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar) + nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar);
       }
     }
     direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution);
@@ -704,12 +523,10 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){
 
 void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){
 
-  bool dynamic = (config_container->GetTime_Domain());
-  unsigned long iPoint;
   unsigned short iVar;
 
-  if (dynamic){
-    for (iPoint = 0; iPoint<geometry->GetnPoint(); iPoint++){
+  if (config_container->GetTime_Domain()){
+    for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
       for (iVar=0; iVar < nVar; iVar++){
         nodes->SetDynamic_Derivative_n(iPoint, iVar, nodes->GetSolution_time_n(iPoint, iVar));
       }
@@ -726,26 +543,14 @@ void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_cont
 
 void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*){
 
-  unsigned short iVar;
-
-  for (iVar = 0; iVar < nMPROP; iVar++){
-    Total_Sens_E[iVar]        += Global_Sens_E[iVar];
-    Total_Sens_Nu[iVar]       += Global_Sens_Nu[iVar];
-    Total_Sens_Rho[iVar]      += Global_Sens_Rho[iVar];
-    Total_Sens_Rho_DL[iVar]   += Global_Sens_Rho_DL[iVar];
-  }
-
-  if (de_effects){
-    for (iVar = 0; iVar < nEField; iVar++)
-      Total_Sens_EField[iVar]+= Global_Sens_EField[iVar];
-  }
+  E.UpdateTotal();
+  Nu.UpdateTotal();
+  Rho.UpdateTotal();
+  Rho_DL.UpdateTotal();
+  if (de_effects) EField.UpdateTotal();
+  if (fea_dv) DV.UpdateTotal();
 
-  if (fea_dv){
-    for (iVar = 0; iVar < nDV; iVar++)
-      Total_Sens_DV[iVar] += Global_Sens_DV[iVar];
-  }
-
-  /*--- Extract the topology optimization density sensitivities ---*/
+  /*--- Extract the topology optimization density sensitivities. ---*/
 
   direct_solver->ExtractAdjoint_Variables(geometry, config);
 
@@ -773,38 +578,32 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo
   }
 }
 
-void CDiscAdjFEASolver::ReadDV(CConfig *config) {
-
-  unsigned long index;
+void CDiscAdjFEASolver::ReadDV(const CConfig *config) {
 
   string filename;
   ifstream properties_file;
 
   /*--- Choose the filename of the design variable ---*/
 
-  string input_name;
-
   switch (config->GetDV_FEA()) {
     case YOUNG_MODULUS:
-      input_name = "dv_young.opt";
+      filename = "dv_young.opt";
       break;
     case POISSON_RATIO:
-      input_name = "dv_poisson.opt";
+      filename = "dv_poisson.opt";
       break;
     case DENSITY_VAL:
     case DEAD_WEIGHT:
-      input_name = "dv_density.opt";
+      filename = "dv_density.opt";
       break;
     case ELECTRIC_FIELD:
-      input_name = "dv_efield.opt";
+      filename = "dv_efield.opt";
       break;
     default:
-      input_name = "dv.opt";
+      filename = "dv.opt";
       break;
   }
 
-  filename = input_name;
-
   if (rank == MASTER_NODE) cout << "Filename: " << filename << "." << endl;
 
   properties_file.open(filename.data(), ios::in);
@@ -816,55 +615,32 @@ void CDiscAdjFEASolver::ReadDV(CConfig *config) {
     if (rank == MASTER_NODE)
       cout << "There is no design variable file." << endl;
 
-    nDV   = 1;
-    DV_Val = new su2double[nDV];
-    for (unsigned short iDV = 0; iDV < nDV; iDV++)
-      DV_Val[iDV] = 1.0;
-
+    nDV = 1;
+    DV.resize(nDV);
+    DV[0] = 1.0;
   }
   else{
 
     string text_line;
-
-     /*--- First pass: determine number of design variables ---*/
-
-    unsigned short iDV = 0;
+    vector<su2double> values;
 
     /*--- Skip the first line: it is the header ---*/
-
     getline (properties_file, text_line);
 
-    while (getline (properties_file, text_line)) iDV++;
-
-    /*--- Close the restart file ---*/
-
-    properties_file.close();
-
-    nDV = iDV;
-    DV_Val = new su2double[nDV];
-
-    /*--- Reopen the file (TODO: improve this) ---*/
-
-    properties_file.open(filename.data(), ios::in);
-
-    /*--- Skip the first line: it is the header ---*/
-
-    getline (properties_file, text_line);
-
-    iDV = 0;
     while (getline (properties_file, text_line)) {
-
       istringstream point_line(text_line);
 
-      point_line >> index >> DV_Val[iDV];
-
-      iDV++;
+      unsigned long index;
+      su2double value;
+      point_line >> index >> value;
 
+      values.push_back(value);
     }
 
-    /*--- Close the restart file ---*/
-
-    properties_file.close();
+    nDV = values.size();
+    DV.resize(nDV);
+    unsigned short iDV = 0;
+    for (auto x : values) DV[iDV++] = x;
 
   }
 
diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
index 9cb4950870b..610d2026043 100644
--- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
@@ -59,15 +59,16 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
   }
 
   /*--- Initialize the node structure ---*/
-  su2double Solution[MAXNVAR] = {1e-16};
   nodes = new CDiscAdjMeshBoundVariable(nPoint,nDim,config);
   SetBaseClassPointerToNodes();
 
   /*--- Set which points are vertices and allocate boundary data. ---*/
 
+  vector<su2double> Solution(nVar,1e-16);
+
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) {
 
-    nodes->SetSolution(iPoint,Solution);
+    nodes->SetSolution(iPoint,Solution.data());
 
     for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
       long iVertex = geometry->nodes->GetVertex(iPoint, iMarker);

From 746587100127dfb4f4f9fd64f134b1ca431db570 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Tue, 23 Mar 2021 17:07:05 +0000
Subject: [PATCH 41/57] simplify

---
 SU2_CFD/include/solvers/CSolver.hpp           | 12 +++
 .../src/iteration/CDiscAdjFEAIteration.cpp    | 71 ++++++++--------
 SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp     | 82 +------------------
 SU2_CFD/src/solvers/CDiscAdjSolver.cpp        | 55 +------------
 SU2_CFD/src/solvers/CSolver.cpp               | 54 ++++++++++++
 5 files changed, 105 insertions(+), 169 deletions(-)

diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp
index e41dcf871ef..a2e29b4c055 100644
--- a/SU2_CFD/include/solvers/CSolver.hpp
+++ b/SU2_CFD/include/solvers/CSolver.hpp
@@ -3488,6 +3488,18 @@ class CSolver {
   inline virtual void SetAitken_Relaxation(CGeometry *geometry,
                                            CConfig *config) { }
 
+  /*!
+   * \brief Loads the solution from the restart file.
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] filename - Name of the restart file.
+   * \param[in] skipVars - Number of variables preceeding the solution.
+   */
+  void BasicLoadRestart(CGeometry *geometry,
+                        const CConfig *config,
+                        const string& filename,
+                        unsigned long skipVars);
+
   /*!
    * \brief A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
index 23cb1ee1b75..a19210d468f 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
@@ -63,20 +63,22 @@ CDiscAdjFEAIteration::CDiscAdjFEAIteration(const CConfig *config) : CIteration(c
 }
 
 CDiscAdjFEAIteration::~CDiscAdjFEAIteration(void) {}
+
 void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                       CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                       CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
                                       CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
   unsigned long iPoint;
-  unsigned short TimeIter = config[val_iZone]->GetTimeIter();
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
-
-  int Direct_Iter;
+  auto solvers0 = solver[val_iZone][val_iInst][MESH_0];
+  auto geometry0 = geometry[val_iZone][val_iInst][MESH_0];
+  auto dirNodes = solvers0[FEA_SOL]->GetNodes();
+  auto adjNodes = solvers0[ADJFEA_SOL]->GetNodes();
 
   /*--- For the dynamic adjoint, load direct solutions from restart files. ---*/
 
-  if (dynamic) {
-    Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1;
+  if (config[val_iZone]->GetTime_Domain()) {
+    const int TimeIter = config[val_iZone]->GetTimeIter();
+    const int Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - TimeIter - 1;
 
     /*--- We want to load the already converged solution at timesteps n and n-1 ---*/
 
@@ -86,15 +88,15 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat
 
     /*--- Push solution back to correct array ---*/
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->Set_Solution_time_n();
+    dirNodes->Set_Solution_time_n();
 
     /*--- Push solution back to correct array ---*/
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Accel_time_n();
+    dirNodes->SetSolution_Accel_time_n();
 
     /*--- Push solution back to correct array ---*/
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Vel_time_n();
+    dirNodes->SetSolution_Vel_time_n();
 
     /*--- Load solution timestep n ---*/
 
@@ -102,33 +104,28 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat
 
     /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint));
     }
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Accel_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Accel(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Accel_Direct(iPoint, dirNodes->GetSolution_Accel(iPoint));
     }
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Vel_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Vel(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Vel_Direct(iPoint, dirNodes->GetSolution_Vel(iPoint));
     }
 
   } else {
     /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint));
     }
   }
 
-  solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->Preprocessing(
-      geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-      RUNTIME_ADJFEA_SYS, false);
+  solvers0[ADJFEA_SOL]->Preprocessing(geometry0, solvers0, config[val_iZone], MESH_0, 0, RUNTIME_ADJFEA_SYS, false);
+
 }
 
 void CDiscAdjFEAIteration::LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
@@ -355,13 +352,14 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
                                        CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                        CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
+  const bool dynamic = (config[val_iZone]->GetTime_Domain());
+  auto solvers0 = solver[val_iZone][val_iInst][MESH_0];
 
   // TEMPORARY output only for standalone structural problems
   if ((!config[val_iZone]->GetFSI_Simulation()) && (rank == MASTER_NODE)) {
     unsigned short iVar;
 
-    bool de_effects = config[val_iZone]->GetDE_Effects();
+    const bool de_effects = config[val_iZone]->GetDE_Effects();
 
     /*--- Header of the temporary output file ---*/
     ofstream myfile_res;
@@ -371,24 +369,23 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
 
     myfile_res << config[val_iZone]->GetTimeIter() << "\t";
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]);
-    myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetTotal_ComboObj() << "\t";
+    solvers0[FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]);
+    myfile_res << scientific << solvers0[FEA_SOL]->GetTotal_ComboObj() << "\t";
 
     for (iVar = 0; iVar < config[val_iZone]->GetnElasticityMod(); iVar++)
-      myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t";
+      myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t";
     for (iVar = 0; iVar < config[val_iZone]->GetnPoissonRatio(); iVar++)
-      myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t";
+      myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t";
     if (dynamic) {
       for (iVar = 0; iVar < config[val_iZone]->GetnMaterialDensity(); iVar++)
-        myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t";
+        myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t";
     }
     if (de_effects) {
       for (iVar = 0; iVar < config[val_iZone]->GetnElectric_Field(); iVar++)
-        myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_EField(iVar)
-                   << "\t";
+        myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_EField(iVar) << "\t";
     }
-    for (iVar = 0; iVar < solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA(); iVar++) {
-      myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t";
+    for (iVar = 0; iVar < solvers0[ADJFEA_SOL]->GetnDVFEA(); iVar++) {
+      myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t";
     }
 
     myfile_res << endl;
@@ -427,7 +424,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
 
     if (outputDVFEA) {
       unsigned short iDV;
-      unsigned short nDV = solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA();
+      unsigned short nDV = solvers0[ADJFEA_SOL]->GetnDVFEA();
 
       myfile_res << "INDEX"
                  << "\t"
@@ -438,7 +435,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
       for (iDV = 0; iDV < nDV; iDV++) {
         myfile_res << iDV;
         myfile_res << "\t";
-        myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV);
+        myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV);
         myfile_res << endl;
       }
 
diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
index ea7b08a1d47..9b377b54751 100644
--- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
@@ -83,22 +83,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     }
   nodes->AllocateBoundaryVariables(config);
 
-  /*--- Store the direct solution ---*/
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    nodes->SetSolution_Direct(iPoint, direct_solver->GetNodes()->GetSolution(iPoint));
-  }
-
-  if (dynamic){
-    for (iPoint = 0; iPoint < nPoint; iPoint++){
-      nodes->SetSolution_Accel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Accel(iPoint));
-    }
-
-    for (iPoint = 0; iPoint < nPoint; iPoint++){
-      nodes->SetSolution_Vel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Vel(iPoint));
-    }
-  }
-
   /*--- Initialize vector structures for multiple material definition ---*/
 
   nMPROP = config->GetnElasticityMod();
@@ -648,72 +632,12 @@ void CDiscAdjFEASolver::ReadDV(const CConfig *config) {
 
 void CDiscAdjFEASolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
 
-  unsigned short iVar;
-  unsigned long index, counter;
-  string restart_filename, filename;
-
   /*--- Restart the solution from file information ---*/
 
-  filename = config->GetSolution_AdjFileName();
-  restart_filename = config->GetObjFunc_Extension(filename);
+  auto filename = config->GetSolution_AdjFileName();
+  auto restart_filename = config->GetObjFunc_Extension(filename);
   restart_filename = config->GetFilename(restart_filename, "", val_iter);
 
-  /*--- Read and store the restart metadata. ---*/
-
-//  Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename);
-
-  /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/
-
-  if (config->GetRead_Binary_Restart()) {
-    Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename);
-  } else {
-    Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename);
-  }
-
-  /*--- Read all lines in the restart file ---*/
-
-  long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0;
-
-  /*--- Skip coordinates ---*/
-
-  unsigned short skipVars = geometry[MESH_0]->GetnDim();
-
-  /*--- Load data from the restart into correct containers. ---*/
-
-  counter = 0;
-  for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) {
-
-    /*--- Retrieve local index. If this node from the restart file lives
-     on the current processor, we will load and instantiate the vars. ---*/
-
-    iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global);
-
-    if (iPoint_Local > -1) {
-
-      /*--- We need to store this point's data, so jump to the correct
-       offset in the buffer of data from the restart file and load it. ---*/
-
-      index = counter*Restart_Vars[1] + skipVars;
-      for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar];
-      nodes->SetSolution(iPoint_Local,Solution);
-      iPoint_Global_Local++;
-
-      /*--- Increment the overall counter for how many points have been loaded. ---*/
-      counter++;
-    }
-
-  }
-
-  /*--- Detect a wrong solution file ---*/
-
-  if (iPoint_Global_Local < nPointDomain) {
-    SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
-                   string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
-  }
-
-  /*--- Delete the class memory that is used to load the restart. ---*/
-
-  delete [] Restart_Vars; Restart_Vars = nullptr;
-  delete [] Restart_Data; Restart_Data = nullptr;
+  BasicLoadRestart(geometry[MESH_0], config, restart_filename, geometry[MESH_0]->GetnDim());
 
 }
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index b3df2767365..39e0cf2001d 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -632,26 +632,13 @@ void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_contain
 
 void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
 
-  const bool rans = (config->GetKind_Turb_Model() != NONE);
-
   /*--- Restart the solution from file information ---*/
 
   auto filename = config->GetSolution_AdjFileName();
   auto restart_filename = config->GetObjFunc_Extension(filename);
   restart_filename = config->GetFilename(restart_filename, "", val_iter);
 
-
-  /*--- Read and store the restart metadata. ---*/
-
-//  Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename);
-
-  /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/
-
-  if (config->GetRead_Binary_Restart()) {
-    Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename);
-  } else {
-    Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename);
-  }
+  const bool rans = (config->GetKind_Turb_Model() != NONE);
 
   /*--- Skip coordinates ---*/
   unsigned short skipVars = geometry[MESH_0]->GetnDim();
@@ -667,39 +654,7 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi
     if (rans) skipVars += solver[MESH_0][TURB_SOL]->GetnVar();
   }
 
-  /*--- Load data from the restart into correct containers. ---*/
-
-  unsigned long iPoint_Global_Local = 0;
-
-  for (auto iPoint_Global = 0ul; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) {
-
-    /*--- Retrieve local index. If this node from the restart file lives
-     on the current processor, we will load and instantiate the vars. ---*/
-
-    const auto iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global);
-
-    if (iPoint_Local > -1) {
-
-      /*--- We need to store this point's data, so jump to the correct
-       offset in the buffer of data from the restart file and load it. ---*/
-
-      const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars;
-
-      for (auto iVar = 0u; iVar < nVar; iVar++) {
-        nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]);
-      }
-
-      iPoint_Global_Local++;
-    }
-
-  }
-
-  /*--- Detect a wrong solution file ---*/
-
-  if (iPoint_Global_Local != nPointDomain) {
-    SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
-                   string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
-  }
+  BasicLoadRestart(geometry[MESH_0], config, restart_filename, skipVars);
 
   /*--- Interpolate solution on coarse grids ---*/
 
@@ -720,10 +675,4 @@ void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfi
       solver[iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution(iPoint, Solution);
     }
   }
-
-  /*--- Delete the class memory that is used to load the restart. ---*/
-
-  delete [] Restart_Vars;  Restart_Vars = nullptr;
-  delete [] Restart_Data;  Restart_Data = nullptr;
-
 }
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index 5e9cd83515e..b28fa751c08 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -4110,3 +4110,57 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
   }
   END_SU2_OMP_PARALLEL
 }
+
+void CSolver::BasicLoadRestart(CGeometry *geometry, const CConfig *config, const string& filename, unsigned long skipVars) {
+
+  /*--- Read and store the restart metadata. ---*/
+
+//  Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, filename);
+
+  /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/
+
+  if (config->GetRead_Binary_Restart()) {
+    Read_SU2_Restart_Binary(geometry, config, filename);
+  } else {
+    Read_SU2_Restart_ASCII(geometry, config, filename);
+  }
+
+  /*--- Load data from the restart into correct containers. ---*/
+
+  unsigned long iPoint_Global_Local = 0;
+
+  for (auto iPoint_Global = 0ul; iPoint_Global < geometry->GetGlobal_nPointDomain(); iPoint_Global++ ) {
+
+    /*--- Retrieve local index. If this node from the restart file lives
+     on the current processor, we will load and instantiate the vars. ---*/
+
+    const auto iPoint_Local = geometry->GetGlobal_to_Local_Point(iPoint_Global);
+
+    if (iPoint_Local > -1) {
+
+      /*--- We need to store this point's data, so jump to the correct
+       offset in the buffer of data from the restart file and load it. ---*/
+
+      const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars;
+
+      for (auto iVar = 0u; iVar < nVar; iVar++) {
+        base_nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]);
+      }
+
+      iPoint_Global_Local++;
+    }
+
+  }
+
+  /*--- Delete the class memory that is used to load the restart. ---*/
+
+  delete [] Restart_Vars;  Restart_Vars = nullptr;
+  delete [] Restart_Data;  Restart_Data = nullptr;
+
+  /*--- Detect a wrong solution file ---*/
+
+  if (iPoint_Global_Local != nPointDomain) {
+    SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
+                   string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
+  }
+}

From ecb64d02dd147292929614a8ce2e11d772caca2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Tue, 23 Mar 2021 23:11:04 +0100
Subject: [PATCH 42/57] Allow OpDiLib backend choice.

---
 Common/include/code_config.hpp | 2 +-
 meson.build                    | 6 ++++++
 meson_options.txt              | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index 3cbad21f08f..377432ee945 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -119,6 +119,6 @@ using su2mixedfloat = passivedouble;
 #define HAVE_OPDI
 #endif
 
-#if _OPENMP >= 201811
+#if (_OPENMP >= 201811 && !defined(FORCE_OPDI_MACRO_BACKEND)) || defined(FORCE_OPDI_OMPT_BACKEND)
 #define HAVE_OMPT
 #endif
diff --git a/meson.build b/meson.build
index 20f71ee2853..1fb2325c979 100644
--- a/meson.build
+++ b/meson.build
@@ -108,6 +108,12 @@ if omp
   # add opdi dependency
   if get_option('enable-autodiff')
     codi_dep += declare_dependency(include_directories: 'externals/opdi/include')
+
+    if get_option('opdi-backend') == 'macro'
+      su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND'
+    elif get_option('opdi-backend') == 'ompt'
+      su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND'
+    endif
   endif
 endif
 
diff --git a/meson_options.txt b/meson_options.txt
index fd354b12276..b5d9ccdddc8 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -18,3 +18,4 @@ option('enable-tests',  type : 'boolean', value : false, description: 'compile U
 option('enable-mixedprec', type : 'boolean', value : false, description: 'use single precision floating point arithmetic for sparse algebra')
 option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation')
 option('enable-mpp',  type : 'boolean', value : false, description: 'enable Mutation++ support')
+option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice')

From 8b4a89c0bbc9f68044fb93b426740c806af7a281 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 25 Mar 2021 14:22:47 +0100
Subject: [PATCH 43/57] OpDiLib update.

---
 externals/opdi        | 2 +-
 meson_scripts/init.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/externals/opdi b/externals/opdi
index a1210cc3d2f..55b33398d18 160000
--- a/externals/opdi
+++ b/externals/opdi
@@ -1 +1 @@
-Subproject commit a1210cc3d2f58fa4652c70000920ff2e76896cf6
+Subproject commit 55b33398d18cbf977545a2dba2008201616664e0
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index 7b15f8440b5..ec7fa1fc33c 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -48,7 +48,7 @@ def init_submodules(method = 'auto'):
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
-  sha_version_opdi = 'a1210cc3d2f58fa4652c70000920ff2e76896cf6'
+  sha_version_opdi = '55b33398d18cbf977545a2dba2008201616664e0'
   github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'

From 165a52b7d40dd8304f4c4a07cb1a0b166c6dda5d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 25 Mar 2021 15:47:39 +0100
Subject: [PATCH 44/57] Add AD build tests.

---
 .github/workflows/regression.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index cea9c098ee4..8238f1af131 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix: 
-        config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP]
+        config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP, ReverseOMP, ForwardOMP]
         include:
           - config_set: BaseMPI
             flags: '-Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror'
@@ -32,6 +32,10 @@ jobs:
             flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-tests=true --warnlevel=3 --werror'
           - config_set: BaseOMP
             flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
+          - config_set: ReverseOMP
+            flags: '-Denable-autodiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
+          - config_set: ForwardOMP
+            flags: '-Denable-directdiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
     runs-on: ubuntu-latest
     steps:
       - name: Cache Object Files

From 60792dc926951332ce00d079357475c655ccc6dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 25 Mar 2021 18:43:52 +0100
Subject: [PATCH 45/57] Disable normal builds in AD builds tests.

---
 .github/workflows/regression.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index 8238f1af131..3d6c7308431 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -33,9 +33,9 @@ jobs:
           - config_set: BaseOMP
             flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
           - config_set: ReverseOMP
-            flags: '-Denable-autodiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
+            flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
           - config_set: ForwardOMP
-            flags: '-Denable-directdiff=true -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
+            flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
     runs-on: ubuntu-latest
     steps:
       - name: Cache Object Files

From 3b0854b017d7940a84dc6b1f4ff91f7f852d660c Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Thu, 25 Mar 2021 18:13:06 +0000
Subject: [PATCH 46/57] add syntax check to meson for OpenMP+AD builds

---
 meson.build | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/meson.build b/meson.build
index 1fb2325c979..500ebd87fad 100644
--- a/meson.build
+++ b/meson.build
@@ -186,6 +186,14 @@ if get_option('enable-mpp')
   su2_cpp_args += '-DHAVE_MPP'
 endif
 
+if omp and get_option('enable-autodiff')
+  py = find_program('python3','python')
+  p = run_command(py, 'externals/opdi/syntax/check.py', 'su2omp.syntax.json', 'Common', 'SU2_CFD', '-p', '*.hpp', '*.cpp', '*.inl', '-r', '-q')
+  if p.returncode() != 0
+    error(p.stdout())
+  endif
+endif
+
 # compile common library
 subdir('Common/src')
 # compile SU2_CFD executable

From 4738e29cee34ab30410bf5f12b835405e48401c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Thu, 25 Mar 2021 19:52:49 +0100
Subject: [PATCH 47/57] OpDiLib update.

---
 externals/opdi        | 2 +-
 meson_scripts/init.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/externals/opdi b/externals/opdi
index 55b33398d18..e56f79cada2 160000
--- a/externals/opdi
+++ b/externals/opdi
@@ -1 +1 @@
-Subproject commit 55b33398d18cbf977545a2dba2008201616664e0
+Subproject commit e56f79cada202d21e7425f5d5cfd5b1153f2465e
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index ec7fa1fc33c..4d9a4e35ac3 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -48,7 +48,7 @@ def init_submodules(method = 'auto'):
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
-  sha_version_opdi = '55b33398d18cbf977545a2dba2008201616664e0'
+  sha_version_opdi = 'e56f79cada202d21e7425f5d5cfd5b1153f2465e'
   github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'

From 7e0bc678ee917f0b004116cacf90bab8c0cf3a67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20Bl=C3=BChdorn?=
 <johannes.bluehdorn@scicomp.uni-kl.de>
Date: Fri, 26 Mar 2021 16:37:31 +0100
Subject: [PATCH 48/57] Fix include.

---
 Common/include/basic_types/ad_structure.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index 3246845cd94..76705c0b242 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -27,7 +27,7 @@
 
 #pragma once
 
-#include "datatype_structure.hpp"
+#include "../code_config.hpp"
 #include "../parallelization/omp_structure.hpp"
 
 /*!

From 3e82662e56111cddf72391c1f993d6f8d5cdc233 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Fri, 26 Mar 2021 22:29:23 +0000
Subject: [PATCH 49/57] explicit construction and destruction of non trivial
 types in C2DContainer

---
 Common/include/containers/C2DContainer.hpp    | 26 ++++++++++++++-----
 .../include/toolboxes/allocation_toolbox.hpp  |  5 ++--
 Common/src/linear_algebra/CSysMatrix.cpp      | 17 +++++-------
 Common/src/linear_algebra/CSysVector.cpp      |  2 +-
 4 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/Common/include/containers/C2DContainer.hpp b/Common/include/containers/C2DContainer.hpp
index 963cddc99fe..c2d08269294 100644
--- a/Common/include/containers/C2DContainer.hpp
+++ b/Common/include/containers/C2DContainer.hpp
@@ -77,12 +77,17 @@ class AccessorImpl
    * Static size specializations use this do-nothing allocation macro.
    */
 #define DUMMY_ALLOCATOR \
-  void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {}
+  void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {}\
+  void m_destroy() noexcept {}
+
   /*!
    * Dynamic size specializations use this one, EXTRA is used to set some
    * runtime internal value that depend on the number of rows/columns.
    * What values need setting depends on the specialization as not all have
    * members for e.g. number of rows and cols (static size optimization).
+   * Because aligned allocation is used, "placement new" is used after to
+   * default construct the elements of non-trivial type. Such types also
+   * need to be destructed explicitly before freeing the memory.
    */
 #define REAL_ALLOCATOR(EXTRA)                                           \
   static_assert(MemoryAllocation::is_power_of_two(AlignSize),           \
@@ -91,6 +96,14 @@ class AccessorImpl
   void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {     \
     EXTRA;                                                              \
     m_data = MemoryAllocation::aligned_alloc<Scalar_t>(AlignSize,sz);   \
+    if (!std::is_trivial<Scalar_t>::value)                              \
+      for (size_t i = 0; i < size(); ++i) new (m_data+i) Scalar_t();    \
+  }                                                                     \
+                                                                        \
+  void m_destroy() noexcept {                                           \
+    if (!std::is_trivial<Scalar_t>::value)                              \
+      for (size_t i = 0; i < size(); ++i) m_data[i].~Scalar_t();        \
+    MemoryAllocation::aligned_free<Scalar_t>(m_data);                   \
   }
 
   DUMMY_ALLOCATOR
@@ -114,15 +127,13 @@ class AccessorImpl
                                                                         \
   AccessorImpl& operator= (AccessorImpl&& other) noexcept               \
   {                                                                     \
-    MemoryAllocation::aligned_free<Scalar_t>(m_data);                   \
+    m_destroy();                                                        \
     MOVE; m_data=other.m_data; other.m_data=nullptr;                    \
     return *this;                                                       \
   }                                                                     \
                                                                         \
-  ~AccessorImpl() noexcept                                              \
-  {                                                                     \
-    MemoryAllocation::aligned_free<Scalar_t>(m_data);                   \
-  }
+  ~AccessorImpl() noexcept {m_destroy();}
+
   /*!
    * Shorthand for when specialization has only one more member than m_data.
    */
@@ -380,6 +391,7 @@ class C2DContainer :
   using Base = container_helpers::AccessorImpl<Index_t,Scalar_t,Store,AlignSize,StaticRows,StaticCols>;
   using Base::m_data;
   using Base::m_allocate;
+  using Base::m_destroy;
 public:
   using Base::size;
   using Base::rows;
@@ -473,7 +485,7 @@ class C2DContainer :
     if(rows==this->rows() && cols==this->cols())
       return reqSize;
 
-    MemoryAllocation::aligned_free<Scalar_t>(m_data);
+    m_destroy();
 
     /*--- request actual allocation to base class as it needs specialization ---*/
     size_t bytes = reqSize*sizeof(Scalar_t);
diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp
index 1ba281413a3..2d5d3bb4409 100644
--- a/Common/include/toolboxes/allocation_toolbox.hpp
+++ b/Common/include/toolboxes/allocation_toolbox.hpp
@@ -57,9 +57,10 @@ inline constexpr size_t round_up(size_t multiple, size_t x)
  * \brief Aligned memory allocation compatible across platforms.
  * \param[in] alignment, in bytes, of the memory being allocated.
  * \param[in] size, also in bytes.
+ * \tparam ZeroInit, initialize memory to 0.
  * \return Pointer to memory, always use su2::aligned_free to deallocate.
  */
-template<class T>
+template<class T, bool ZeroInit = false>
 inline T* aligned_alloc(size_t alignment, size_t size) noexcept
 {
   assert(is_power_of_two(alignment));
@@ -80,7 +81,7 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept
 #else
   ptr = ::aligned_alloc(alignment, size);
 #endif
-  memset(ptr, 0, size);
+  if (ZeroInit) memset(ptr, 0, size);
   return static_cast<T*>(ptr);
 }
 
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index a9ee199f3e6..c8bd4164d32 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -158,22 +158,17 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   }
 
   /*--- Allocate data. ---*/
-#define ALLOC_AND_INIT(ptr,num) {\
-  ptr = MemoryAllocation::aligned_alloc<ScalarType>(64,num*sizeof(ScalarType));\
-  for(size_t k=0; k<num; ++k) ptr[k]=0.0; }
+  auto allocAndInit = [](ScalarType*& ptr, unsigned long num) {
+    ptr = MemoryAllocation::aligned_alloc<ScalarType,true>(64, num*sizeof(ScalarType));
+  };
 
-  ALLOC_AND_INIT(matrix, nnz*nVar*nEqn)
+  allocAndInit(matrix, nnz*nVar*nEqn);
 
   /*--- Preconditioners. ---*/
 
-  if (ilu_needed) {
-    ALLOC_AND_INIT(ILU_matrix, nnz_ilu*nVar*nEqn)
-  }
+  if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu*nVar*nEqn);
 
-  if (diag_needed) {
-    ALLOC_AND_INIT(invM, nPointDomain*nVar*nEqn);
-  }
-#undef ALLOC_AND_INIT
+  if (diag_needed) allocAndInit(invM, nPointDomain*nVar*nEqn);
 
   /*--- Thread parallel initialization. ---*/
 
diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp
index 4477d8b3fe1..8eedbf0e254 100644
--- a/Common/src/linear_algebra/CSysVector.cpp
+++ b/Common/src/linear_algebra/CSysVector.cpp
@@ -50,7 +50,7 @@ void CSysVector<ScalarType>::Initialize(unsigned long numBlk, unsigned long numB
 
   omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE);
 
-  if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc<ScalarType>(64, nElm * sizeof(ScalarType));
+  if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc<ScalarType,true>(64, nElm*sizeof(ScalarType));
 
   if (val != nullptr) {
     if (!valIsArray) {

From c82f3c72d8ba52553e90bab182a37cc8ebc8794b Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Sun, 28 Mar 2021 14:57:40 +0100
Subject: [PATCH 50/57] test preaccumulation with RealReverseIndex

---
 Common/include/basic_types/ad_structure.hpp   | 17 ++----
 Common/include/code_config.hpp                |  3 +-
 Common/include/geometry/dual_grid/CPoint.hpp  | 12 ++--
 Common/src/geometry/CPhysicalGeometry.cpp     |  8 +--
 .../computeGradientsLeastSquares.hpp          | 28 ++++-----
 .../flow/convection/centered.hpp              |  2 +-
 SU2_CFD/include/numerics_simd/util.hpp        | 58 ++++++++++++++++---
 SU2_CFD/src/fluid/CPengRobinson.cpp           |  2 +-
 8 files changed, 85 insertions(+), 45 deletions(-)

diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index 76705c0b242..620da3246f5 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -392,6 +392,11 @@ namespace AD{
     SetPreaccIn(moreData...);
   }
 
+  template<class T, class... Ts, su2enable_if<std::is_same<T,su2double>::value> = 0>
+  FORCEINLINE void SetPreaccIn(T&& data, Ts&&... moreData) {
+    static_assert(!std::is_same<T,su2double>::value, "rvalues cannot be registered");
+  }
+
   template<class T>
   FORCEINLINE void SetPreaccIn(const T& data, const int size) {
     if (PreaccActive) {
@@ -415,18 +420,6 @@ namespace AD{
     }
   }
 
-  template<class T>
-  FORCEINLINE void SetPreaccIn(const T& data, const int size_x, const int size_y, const int size_z) {
-    if (!PreaccActive) return;
-    for (int i = 0; i < size_x; i++) {
-      for (int j = 0; j < size_y; j++) {
-        for (int k = 0; k < size_z; k++) {
-          if (data[i][j][k].isActive()) PreaccHelper.addInput(data[i][j][k]);
-        }
-      }
-    }
-  }
-
   FORCEINLINE void StartPreacc() {
     if (AD::getGlobalTape().isActive() && PreaccEnabled) {
       PreaccHelper.start();
diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index 377432ee945..904805dc870 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -92,7 +92,8 @@ using su2double = codi::RealReversePrimal;
 #elif CODI_PRIMAL_INDEX_TAPE
 using su2double = codi::RealReversePrimalIndex;
 #else
-using su2double = codi::RealReverse;
+//using su2double = codi::RealReverse;
+using su2double = codi::RealReverseIndex;
 #endif
 #endif
 #elif defined(CODI_FORWARD_TYPE) // forward mode AD
diff --git a/Common/include/geometry/dual_grid/CPoint.hpp b/Common/include/geometry/dual_grid/CPoint.hpp
index 86ac53d4936..9db963524ad 100644
--- a/Common/include/geometry/dual_grid/CPoint.hpp
+++ b/Common/include/geometry/dual_grid/CPoint.hpp
@@ -423,7 +423,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Value of the distance to the nearest wall.
    */
-  inline su2double GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); }
+  inline su2double& GetWall_Distance(unsigned long iPoint) { return Wall_Distance(iPoint); }
+  inline const su2double& GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); }
 
   /*!
    * \brief Set the value of the distance to the nearest wall.
@@ -451,7 +452,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Value of the distance to the nearest wall.
    */
-  inline su2double GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); }
+  inline su2double& GetSharpEdge_Distance(unsigned long iPoint) { return SharpEdge_Distance(iPoint); }
+  inline const su2double& GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); }
 
   /*!
    * \brief Set the value of the curvature at a surface node.
@@ -486,7 +488,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Area or volume of the control volume.
    */
-  inline su2double GetVolume(unsigned long iPoint) const { return Volume(iPoint); }
+  inline su2double& GetVolume(unsigned long iPoint) { return Volume(iPoint); }
+  inline const su2double& GetVolume(unsigned long iPoint) const { return Volume(iPoint); }
 
   /*!
    * \brief Set the volume of the control volume.
@@ -507,7 +510,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Periodic component of area or volume for a control volume on a periodic marker.
    */
-  inline su2double GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); }
+  inline su2double& GetPeriodicVolume(unsigned long iPoint) { return Periodic_Volume(iPoint); }
+  inline const su2double& GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); }
 
   /*!
    * \brief Set the missing component of area or volume for a control volume on a periodic marker.
diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp
index 688972ce1f1..72e26928e03 100644
--- a/Common/src/geometry/CPhysicalGeometry.cpp
+++ b/Common/src/geometry/CPhysicalGeometry.cpp
@@ -7543,7 +7543,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
 
     /*--- To make preaccumulation more effective, use as few inputs
      as possible, recomputing intermediate quantities as needed. ---*/
-    AD::StartPreacc();
+//    AD::StartPreacc();
 
     /*--- Get pointers to the coordinates of all the element nodes ---*/
     array<const su2double*, N_POINTS_MAXIMUM> Coord;
@@ -7654,7 +7654,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
       }
     }
 #endif
-    AD::EndPreacc();
+//    AD::EndPreacc();
   }
 
   su2double DomainVolume;
@@ -7700,7 +7700,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
 
       const auto nNodes = bound[iMarker][iElem]->GetnNodes();
 
-      AD::StartPreacc();
+//      AD::StartPreacc();
 
       /*--- Get pointers to the coordinates of all the element nodes ---*/
       array<const su2double*, N_POINTS_MAXIMUM> Coord;
@@ -7752,7 +7752,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
         const auto iVertex = nodes->GetVertex(iPoint, iMarker);
         AD::SetPreaccOut(vertex[iMarker][iVertex]->GetNormal(), nDim);
       }
-      AD::EndPreacc();
+//      AD::EndPreacc();
     }
   }
   END_SU2_OMP_FOR
diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
index 3a069f8c942..dcd923901dc 100644
--- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
@@ -76,35 +76,35 @@ FORCEINLINE void solveLeastSquares(size_t iPoint,
 
   /*--- Entries of upper triangular matrix R. ---*/
 
+  if (periodic) {
+    AD::StartPreacc();
+    AD::SetPreaccIn(Rmatrix(iPoint,0,0));
+    AD::SetPreaccIn(Rmatrix(iPoint,0,1));
+    AD::SetPreaccIn(Rmatrix(iPoint,1,1));
+  }
+
   su2double r11 = Rmatrix(iPoint,0,0);
   su2double r12 = Rmatrix(iPoint,0,1);
   su2double r22 = Rmatrix(iPoint,1,1);
   su2double r13 = 0.0, r23 = 0.0, r33 = 1.0;
 
-  if (periodic) {
-    AD::StartPreacc();
-    AD::SetPreaccIn(r11);
-    AD::SetPreaccIn(r12);
-    AD::SetPreaccIn(r22);
-  }
-
   r11 = sqrt(max(r11, eps));
   r12 /= r11;
   r22 = sqrt(max(r22 - r12*r12, eps));
 
   if (nDim == 3) {
+    if (periodic) {
+      AD::SetPreaccIn(Rmatrix(iPoint,0,2));
+      AD::SetPreaccIn(Rmatrix(iPoint,1,2));
+      AD::SetPreaccIn(Rmatrix(iPoint,2,1));
+      AD::SetPreaccIn(Rmatrix(iPoint,2,2));
+    }
+
     r13 = Rmatrix(iPoint,0,2);
     r33 = Rmatrix(iPoint,2,2);
     const auto r23_a = Rmatrix(iPoint,1,2);
     const auto r23_b = Rmatrix(iPoint,2,1);
 
-    if (periodic) {
-      AD::SetPreaccIn(r13);
-      AD::SetPreaccIn(r23_a);
-      AD::SetPreaccIn(r23_b);
-      AD::SetPreaccIn(r33);
-    }
-
     r13 /= r11;
     r23 = r23_a/r22 - r23_b*r12/(r11*r22);
     r33 = sqrt(max(r33 - r23*r23 - r13*r13, eps));
diff --git a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
index 7e0bd6f8870..9b62a3a89db 100644
--- a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
+++ b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
@@ -77,7 +77,7 @@ class CCenteredBase : public Base {
 
 public:
   /*!
-   * \brief Implementation of the base Roe flux.
+   * \brief Implementation of the base centered flux.
    */
   void ComputeFlux(Int iEdge,
                    const CConfig& config,
diff --git a/SU2_CFD/include/numerics_simd/util.hpp b/SU2_CFD/include/numerics_simd/util.hpp
index 7127912329b..21c99c7e529 100644
--- a/SU2_CFD/include/numerics_simd/util.hpp
+++ b/SU2_CFD/include/numerics_simd/util.hpp
@@ -115,14 +115,13 @@ FORCEINLINE Double squaredNorm(const VectorDbl<nDim>& vector) {
 template<size_t nDim>
 FORCEINLINE Double norm(const VectorDbl<nDim>& vector) { return sqrt(squaredNorm(vector)); }
 
+#ifndef CODI_REVERSE_TYPE
 /*!
  * \brief Gather a single variable from index iPoint of a 1D container.
  */
 template<class Container>
 FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) {
-  auto x = *vars.innerIter(iPoint);
-  AD::SetPreaccIn(x, Double::Size);
-  return x;
+  return *vars.innerIter(iPoint);
 }
 
 /*!
@@ -130,9 +129,7 @@ FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) {
  */
 template<size_t nVar, class Container>
 FORCEINLINE VectorDbl<nVar> gatherVariables(Int iPoint, const Container& vars) {
-  auto x = vars.template get<VectorDbl<nVar> >(iPoint);
-  AD::SetPreaccIn(x, nVar, Double::Size);
-  return x;
+  return vars.template get<VectorDbl<nVar> >(iPoint);
 }
 
 /*!
@@ -140,10 +137,55 @@ FORCEINLINE VectorDbl<nVar> gatherVariables(Int iPoint, const Container& vars) {
  */
 template<size_t nRows, size_t nCols, class Container>
 FORCEINLINE MatrixDbl<nRows,nCols> gatherVariables(Int iPoint, const Container& vars) {
-  auto x = vars.template get<MatrixDbl<nRows,nCols> >(iPoint);
-  AD::SetPreaccIn(x, nRows, nCols, Double::Size);
+  return vars.template get<MatrixDbl<nRows,nCols> >(iPoint);
+}
+#else
+
+namespace {
+  template<class Container, su2enable_if<Container::IsVector> = 0>
+  FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint); }
+
+  /*--- When getting 1 variable from a matrix container, we assume it is the first. ---*/
+  template<class Container, su2enable_if<!Container::IsVector> = 0>
+  FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint,0); }
+}
+
+template<class Container>
+FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) {
+  Double x;
+  for (size_t k=0; k<Double::Size; ++k) {
+    AD::SetPreaccIn(get(vars, iPoint[k]));
+    x[k] = get(vars, iPoint[k]);
+  }
+  return x;
+}
+
+template<size_t nVar, class Container>
+FORCEINLINE VectorDbl<nVar> gatherVariables(Int iPoint, const Container& vars) {
+  VectorDbl<nVar> x;
+  for (size_t i=0; i<nVar; ++i) {
+    for (size_t k=0; k<Double::Size; ++k) {
+      AD::SetPreaccIn(vars(iPoint[k],i));
+      x[i][k] = vars(iPoint[k],i);
+    }
+  }
+  return x;
+}
+
+template<size_t nRows, size_t nCols, class Container>
+FORCEINLINE MatrixDbl<nRows,nCols> gatherVariables(Int iPoint, const Container& vars) {
+  MatrixDbl<nRows,nCols> x;
+  for (size_t i=0; i<nRows; ++i) {
+    for (size_t j=0; j<nCols; ++j) {
+      for (size_t k=0; k<Double::Size; ++k) {
+        AD::SetPreaccIn(vars(iPoint[k],i,j));
+        x(i,j)[k] = vars(iPoint[k],i,j);
+      }
+    }
+  }
   return x;
 }
+#endif
 
 /*!
  * \brief Stop the AD preaccumulation.
diff --git a/SU2_CFD/src/fluid/CPengRobinson.cpp b/SU2_CFD/src/fluid/CPengRobinson.cpp
index 7dc56bd4d25..09d0e67a557 100644
--- a/SU2_CFD/src/fluid/CPengRobinson.cpp
+++ b/SU2_CFD/src/fluid/CPengRobinson.cpp
@@ -130,7 +130,7 @@ void CPengRobinson::SetTDState_rhoe(su2double rho, su2double e) {
 
   dTde_rho = 1 / Cv;
 
-  Zed = Pressure / (Gas_Constant * Temperature * Density);
+  Zed = Pressure / (Gas_Constant * Temperature * rho);
 
   AD::SetPreaccOut(Temperature);
   AD::SetPreaccOut(SoundSpeed2);

From e5e3ebcaeb128ffa4acacd84346eefb991705fad Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Mon, 29 Mar 2021 19:33:23 +0100
Subject: [PATCH 51/57] missing destruction in CSysVector

---
 Common/src/linear_algebra/CSysVector.cpp |  2 ++
 SU2_CFD/src/SU2_CFD.cpp                  |  4 ++--
 SU2_DOT/src/SU2_DOT.cpp                  | 19 ++++++++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp
index 8eedbf0e254..9cb66905fde 100644
--- a/Common/src/linear_algebra/CSysVector.cpp
+++ b/Common/src/linear_algebra/CSysVector.cpp
@@ -63,6 +63,8 @@ void CSysVector<ScalarType>::Initialize(unsigned long numBlk, unsigned long numB
 
 template <class ScalarType>
 CSysVector<ScalarType>::~CSysVector() {
+  if (!std::is_trivial<ScalarType>::value)
+    for (auto i = 0ul; i < nElm; i++) vec_val[i].~ScalarType();
   MemoryAllocation::aligned_free(vec_val);
 }
 
diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp
index 90a86ad018b..d9dfbbdc12f 100644
--- a/SU2_CFD/src/SU2_CFD.cpp
+++ b/SU2_CFD/src/SU2_CFD.cpp
@@ -74,7 +74,7 @@ int main(int argc, char *argv[]) {
   SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm();
 
   /*--- AD initialization ---*/
-#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
+#ifdef HAVE_OPDI
   AD::getGlobalTape().initialize();
 #endif
 
@@ -170,7 +170,7 @@ int main(int argc, char *argv[]) {
 #endif
 
   /*--- Finalize AD, if necessary. ---*/
-#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
+#ifdef HAVE_OPDI
   AD::getGlobalTape().finalize();
 #endif
 
diff --git a/SU2_DOT/src/SU2_DOT.cpp b/SU2_DOT/src/SU2_DOT.cpp
index 7b9927c1637..e2b2de498be 100644
--- a/SU2_DOT/src/SU2_DOT.cpp
+++ b/SU2_DOT/src/SU2_DOT.cpp
@@ -36,6 +36,10 @@ int main(int argc, char *argv[]) {
 
   char config_file_name[MAX_STRING_SIZE];
 
+  /*--- OpenMP initialization ---*/
+
+  omp_initialize();
+
   /*--- MPI initialization, and buffer setting ---*/
 
 #if defined(HAVE_OMP) && defined(HAVE_MPI)
@@ -49,6 +53,11 @@ int main(int argc, char *argv[]) {
   const int rank = SU2_MPI::GetRank();
   const int size = SU2_MPI::GetSize();
 
+  /*--- AD initialization ---*/
+#ifdef HAVE_OPDI
+  AD::getGlobalTape().initialize();
+#endif
+
   /*--- Pointer to different structures that will be used throughout the entire code ---*/
 
   CConfig **config_container            = nullptr;
@@ -406,9 +415,17 @@ int main(int argc, char *argv[]) {
   if (rank == MASTER_NODE)
     cout << "\n------------------------- Exit Success (SU2_DOT) ------------------------\n" << endl;
 
-  /*--- Finalize MPI parallelization ---*/
+  /*--- Finalize AD, if necessary. ---*/
+#ifdef HAVE_OPDI
+  AD::getGlobalTape().finalize();
+#endif
+
+  /*--- Finalize MPI parallelization. ---*/
   SU2_MPI::Finalize();
 
+  /*--- Finalize OpenMP. ---*/
+  omp_finalize();
+
   return EXIT_SUCCESS;
 
 }

From 6483a3ffdbf39c86607a1e4f08a7fa0b4ec10bff Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Mon, 29 Mar 2021 19:34:02 +0100
Subject: [PATCH 52/57] no type punning in COutput...

---
 SU2_CFD/include/output/COutput.hpp            |  1 -
 .../output/filewriter/CParallelDataSorter.hpp | 28 ++++---
 .../src/output/filewriter/CFVMDataSorter.cpp  | 20 ++---
 .../output/filewriter/CParallelDataSorter.cpp | 76 ++++++-------------
 .../filewriter/CSurfaceFEMDataSorter.cpp      | 11 +--
 .../filewriter/CSurfaceFVMDataSorter.cpp      | 42 +++-------
 6 files changed, 63 insertions(+), 115 deletions(-)

diff --git a/SU2_CFD/include/output/COutput.hpp b/SU2_CFD/include/output/COutput.hpp
index 95a07335e7b..829c0698502 100644
--- a/SU2_CFD/include/output/COutput.hpp
+++ b/SU2_CFD/include/output/COutput.hpp
@@ -581,7 +581,6 @@ class COutput {
     volumeOutput_List.push_back(name);
   }
 
-
   /*!
    * \brief Set the value of a volume output field
    * \param[in] name - Name of the field.
diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
index 5122eed672e..ae6273ea322 100644
--- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
@@ -66,7 +66,20 @@ class CParallelDataSorter{
    * \brief Map that stores the index for each GEO_TYPE type where to find information
    * in the element arrays.
    */
-  static const map<unsigned short, unsigned short> TypeMap;
+  struct {
+    static unsigned short at(unsigned short type) {
+      switch(type) {
+        case LINE: return 0;
+        case TRIANGLE: return 1;
+        case QUADRILATERAL: return 2;
+        case TETRAHEDRON: return 3;
+        case HEXAHEDRON: return 4;
+        case PRISM: return 5;
+        case PYRAMID: return 6;
+        default: assert(false); return 0;
+      };
+    }
+  } TypeMap;
 
   unsigned long nPointsGlobal;   //!< Global number of points without halos
   unsigned long nElemGlobal;    //!< Global number of elems without halos
@@ -88,11 +101,8 @@ class CParallelDataSorter{
   int *nElemConn_Send;                 //!< Number of element connectivity this processor has to send to other processors
   int *nElemConn_Cum;                  //!< Cumulative number of element connectivity entries
   unsigned long *Index;                //!< Index each point has in the send buffer
-  su2double *connSend;                 //!< Send buffer holding the data that will be send to other processors
-  passivedouble *passiveDoubleBuffer;  //!< Buffer holding the sorted, partitioned data as passivedouble types
-  su2double     *doubleBuffer;         //!< Buffer holding the sorted, partitioned data as su2double types
-  /// Pointer used to allocate the memory used for ::passiveDoubleBuffer and ::doubleBuffer.
-  char *dataBuffer;
+  passivedouble *connSend;             //!< Send buffer holding the data that will be send to other processors
+  passivedouble *dataBuffer;           //!< Buffer holding the sorted, partitioned data as passivedouble types
   unsigned long *idSend;               //!< Send buffer holding global indices that will be send to other processors
   int nSends,                          //!< Number of sends
   nRecvs;                              //!< Number of receives
@@ -261,13 +271,13 @@ class CParallelDataSorter{
    * \input iPoint - the point ID.
    * \return the value of the data field at a point.
    */
-  passivedouble GetData(unsigned short iField, unsigned long iPoint) const  {return passiveDoubleBuffer[iPoint*GlobalField_Counter + iField];}
+  passivedouble GetData(unsigned short iField, unsigned long iPoint) const  {return dataBuffer[iPoint*GlobalField_Counter + iField];}
 
   /*!
    * \brief Get the pointer to the sorted linear partitioned data.
    * \return Pointer to the sorted data.
    */
-  const passivedouble *GetData() const {return passiveDoubleBuffer;}
+  const passivedouble *GetData() const {return dataBuffer;}
 
   /*!
    * \brief Get the global index of a point.
@@ -305,7 +315,7 @@ class CParallelDataSorter{
    * \param[in] data - Value of the field
    */
   void SetUnsorted_Data(unsigned long iPoint, unsigned short iField, su2double data){
-    connSend[Index[iPoint] + iField] = data;
+    connSend[Index[iPoint] + iField] = SU2_TYPE::GetValue(data);
   }
 
   su2double GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const {
diff --git a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
index 768438a9b39..fbc0dfd452e 100644
--- a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
@@ -71,9 +71,8 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 CFVMDataSorter::~CFVMDataSorter(){
 
   delete [] Local_Halo;
-
-        delete [] Index;
-       delete [] idSend;
+  delete [] Index;
+  delete [] idSend;
   delete linearPartitioner;
 
 }
@@ -107,9 +106,6 @@ void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){
   }
 }
 
-
-
-
 void CFVMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) {
 
   /*--- Sort connectivity for each type of element (excluding halos). Note
@@ -262,14 +258,11 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
   /*--- Allocate memory to hold the connectivity that we are
    sending. ---*/
 
-  unsigned long *connSend = nullptr;
-  connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]]();
+  auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]]();
 
   /*--- Allocate arrays for storing halo flags. ---*/
 
-  unsigned short *haloSend = new unsigned short[nElem_Send[size]]();
-  for (int ii = 0; ii < nElem_Send[size]; ii++)
-    haloSend[ii] = false;
+  auto haloSend = new unsigned short[nElem_Send[size]]();
 
   /*--- Create an index variable to keep track of our index
    position as we load up the send buffer. ---*/
@@ -356,10 +349,9 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *connRecv = nullptr;
-  connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]]();
+  auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]]();
 
-  unsigned short *haloRecv = new unsigned short[nElem_Cum[size]]();
+  auto haloRecv = new unsigned short[nElem_Cum[size]]();
 
 #ifdef HAVE_MPI
 
diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
index d65e55317ce..2c32df0e2cb 100644
--- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
@@ -29,17 +29,6 @@
 #include <cassert>
 #include <numeric>
 
-
-const map<unsigned short, unsigned short> CParallelDataSorter::TypeMap = {
-  {LINE, 0},
-  {TRIANGLE, 1},
-  {QUADRILATERAL, 2},
-  {TETRAHEDRON, 3},
-  {HEXAHEDRON, 4},
-  {PRISM, 5},
-  {PYRAMID, 6}
-};
-
 CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &valFieldNames) :
   fieldNames(std::move(valFieldNames)){
 
@@ -61,8 +50,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &
   Index        = nullptr;
   connSend     = nullptr;
   dataBuffer   = nullptr;
-  passiveDoubleBuffer = nullptr;
-  doubleBuffer = nullptr;
   idSend       = nullptr;
   nSends = 0;
   nRecvs = 0;
@@ -110,11 +97,13 @@ CParallelDataSorter::~CParallelDataSorter(){
 
 void CParallelDataSorter::SortOutputData() {
 
+  using MPI_WRAP = SelectMPIWrapper<passivedouble>::W;
+
   int VARS_PER_POINT = GlobalField_Counter;
 
 #ifdef HAVE_MPI
-  SU2_MPI::Request *send_req, *recv_req;
-  SU2_MPI::Status status;
+  MPI_WRAP::Request *send_req, *recv_req;
+  MPI_WRAP::Status status;
   int ind;
 #endif
 
@@ -130,8 +119,8 @@ void CParallelDataSorter::SortOutputData() {
   /*--- We need double the number of messages to send both the conn.
    and the global IDs. ---*/
 
-  send_req = new SU2_MPI::Request[2*nSends];
-  recv_req = new SU2_MPI::Request[2*nRecvs];
+  send_req = new MPI_WRAP::Request[2*nSends];
+  recv_req = new MPI_WRAP::Request[2*nRecvs];
 
   unsigned long iMessage = 0;
   for (int ii=0; ii<size; ii++) {
@@ -141,8 +130,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = VARS_PER_POINT*kk;
       int source = ii;
       int tag    = ii + 1;
-      SU2_MPI::Irecv(&(doubleBuffer[ll]), count, MPI_DOUBLE, source, tag,
-                     SU2_MPI::GetComm(), &(recv_req[iMessage]));
+      MPI_WRAP::Irecv(&(dataBuffer[ll]), count, MPI_DOUBLE, source, tag,
+                      SU2_MPI::GetComm(), &(recv_req[iMessage]));
       iMessage++;
     }
   }
@@ -157,8 +146,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = VARS_PER_POINT*kk;
       int dest = ii;
       int tag    = rank + 1;
-      SU2_MPI::Isend(&(connSend[ll]), count, MPI_DOUBLE, dest, tag,
-                     SU2_MPI::GetComm(), &(send_req[iMessage]));
+      MPI_WRAP::Isend(&(connSend[ll]), count, MPI_DOUBLE, dest, tag,
+                      SU2_MPI::GetComm(), &(send_req[iMessage]));
       iMessage++;
     }
   }
@@ -173,8 +162,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = kk;
       int source = ii;
       int tag    = ii + 1;
-      SU2_MPI::Irecv(&(idRecv[ll]), count, MPI_UNSIGNED_LONG, source, tag,
-                     SU2_MPI::GetComm(), &(recv_req[iMessage+nRecvs]));
+      MPI_WRAP::Irecv(&(idRecv[ll]), count, MPI_UNSIGNED_LONG, source, tag,
+                      SU2_MPI::GetComm(), &(recv_req[iMessage+nRecvs]));
       iMessage++;
     }
   }
@@ -189,8 +178,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = kk;
       int dest   = ii;
       int tag    = rank + 1;
-      SU2_MPI::Isend(&(idSend[ll]), count, MPI_UNSIGNED_LONG, dest, tag,
-                     SU2_MPI::GetComm(), &(send_req[iMessage+nSends]));
+      MPI_WRAP::Isend(&(idSend[ll]), count, MPI_UNSIGNED_LONG, dest, tag,
+                      SU2_MPI::GetComm(), &(send_req[iMessage+nSends]));
       iMessage++;
     }
   }
@@ -202,7 +191,7 @@ void CParallelDataSorter::SortOutputData() {
   int ll = VARS_PER_POINT*nPoint_Send[rank];
   int kk = VARS_PER_POINT*nPoint_Send[rank+1];
 
-  for (int nn=ll; nn<kk; nn++, mm++) doubleBuffer[mm] = connSend[nn];
+  for (int nn=ll; nn<kk; nn++, mm++) dataBuffer[mm] = connSend[nn];
 
   mm = nPoint_Recv[rank];
   ll = nPoint_Send[rank];
@@ -215,40 +204,25 @@ void CParallelDataSorter::SortOutputData() {
 #ifdef HAVE_MPI
   int number = 2*nSends;
   for (int ii = 0; ii < number; ii++)
-    SU2_MPI::Waitany(number, send_req, &ind, &status);
+    MPI_WRAP::Waitany(number, send_req, &ind, &status);
 
   number = 2*nRecvs;
   for (int ii = 0; ii < number; ii++)
-    SU2_MPI::Waitany(number, recv_req, &ind, &status);
+    MPI_WRAP::Waitany(number, recv_req, &ind, &status);
 
   delete [] send_req;
   delete [] recv_req;
 #endif
 
-  /*--- Note, passiveDoubleBuffer and doubleBuffer point to the same address.
-   * This is the reason why we have to do the following copy/reordering in two steps. ---*/
-
-  /*--- Step 1: Extract the underlying double value --- */
-
-  if (!std::is_same<su2double, passivedouble>::value){
-    for (int jj = 0; jj < VARS_PER_POINT*nPoint_Recv[size]; jj++){
-      const passivedouble tmpVal = SU2_TYPE::GetValue(doubleBuffer[jj]);
-      passiveDoubleBuffer[jj] = tmpVal;
-      /*--- For some AD datatypes a call of the destructor is
-       *  necessary to properly delete the AD type ---*/
-      doubleBuffer[jj].~su2double();
-    }
-  }
-
-  /*--- Step 2: Reorder the data in the buffer --- */
+  /*--- Reorder the data in the buffer --- */
 
   passivedouble *tmpBuffer = new passivedouble[nPoint_Recv[size]];
   for (int jj = 0; jj < VARS_PER_POINT; jj++){
     for (int ii = 0; ii < nPoint_Recv[size]; ii++){
-      tmpBuffer[idRecv[ii]] = passiveDoubleBuffer[ii*VARS_PER_POINT+jj];
+      tmpBuffer[idRecv[ii]] = dataBuffer[ii*VARS_PER_POINT+jj];
     }
     for (int ii = 0; ii < nPoint_Recv[size]; ii++){
-      passiveDoubleBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii];
+      dataBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii];
     }
   }
 
@@ -318,18 +292,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
   /*--- Allocate memory to hold the connectivity that we are
    sending. ---*/
 
-  connSend = nullptr;
-  connSend = new su2double[VARS_PER_POINT*nPoint_Send[size]]();
+  connSend = new passivedouble[VARS_PER_POINT*nPoint_Send[size]] ();
 
   /*--- Allocate the data buffer to hold the sorted data. We have to make it large enough
    * to hold passivedoubles and su2doubles ---*/
-  unsigned short maxSize = max(sizeof(passivedouble), sizeof(su2double));
-  dataBuffer = new char[VARS_PER_POINT*nPoint_Recv[size]*maxSize] {};
-
-  /*--- doubleBuffer and passiveDouble buffer use the same memory allocated above using the dataBuffer. ---*/
 
-  doubleBuffer = reinterpret_cast<su2double*>(dataBuffer);
-  passiveDoubleBuffer = reinterpret_cast<passivedouble*>(dataBuffer);
+  dataBuffer = new passivedouble[VARS_PER_POINT*nPoint_Recv[size]] ();
 
   /*--- Allocate arrays for sending the global ID. ---*/
 
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
index 99c314f2e5c..d21edce01a0 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
@@ -69,7 +69,7 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr
 CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter(){
 
   delete linearPartitioner;
-  delete [] passiveDoubleBuffer;
+  delete [] dataBuffer;
 
 }
 
@@ -220,11 +220,8 @@ void CSurfaceFEMDataSorter::SortOutputData() {
   /* Allocate the memory for Parallel_Surf_Data. */
   nPoints = globalSurfaceDOFIDs.size();
 
-
-    delete [] passiveDoubleBuffer;
-
-
-  passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT];
+  delete [] dataBuffer;
+  dataBuffer = new passivedouble[nPoints*VARS_PER_POINT];
 
   /* Determine the local index of the global surface DOFs and
      copy the data into Parallel_Surf_Data. */
@@ -232,7 +229,7 @@ void CSurfaceFEMDataSorter::SortOutputData() {
     const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner->GetCumulativeSizeBeforeRank(rank);
 
     for(int jj=0; jj<VARS_PER_POINT; jj++)
-      passiveDoubleBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii);
+      dataBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii);
   }
 
   /*--- Reduce the total number of surf points we have. This will be
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
index a4dc31c32a5..ee7535dafb9 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
@@ -50,7 +50,7 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr
 CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter(){
 
   delete linearPartitioner;
-  delete [] passiveDoubleBuffer;
+  delete [] dataBuffer;
 
 }
 
@@ -438,17 +438,14 @@ void CSurfaceFVMDataSorter::SortOutputData() {
    we can allocate the new data structure to hold these points alone. Here,
    we also copy the data for those points from our volume data structure. ---*/
 
-
-    delete [] passiveDoubleBuffer;
-
-
-  passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT];
+  delete [] dataBuffer;
+  dataBuffer = new passivedouble[nPoints*VARS_PER_POINT];
 
   for (int jj = 0; jj < VARS_PER_POINT; jj++) {
     count = 0;
     for (int ii = 0; ii < (int)volumeSorter->GetnPoints(); ii++) {
       if (surfPoint[ii] !=-1) {
-        passiveDoubleBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii);
+        dataBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii);
         count++;
       }
     }
@@ -545,14 +542,12 @@ void CSurfaceFVMDataSorter::SortOutputData() {
   /*--- Allocate memory to hold the globals that we are
    sending. ---*/
 
-  unsigned long *globalSend = nullptr;
-  globalSend = new unsigned long[nElem_Send[size]]();
+  auto globalSend = new unsigned long[nElem_Send[size]]();
 
   /*--- Allocate memory to hold the renumbering that we are
    sending. ---*/
 
-  unsigned long *renumbSend = nullptr;
-  renumbSend = new unsigned long[nElem_Send[size]]();
+  auto renumbSend = new unsigned long[nElem_Send[size]]();
 
   /*--- Create an index variable to keep track of our index
    position as we load up the send buffer. ---*/
@@ -595,11 +590,8 @@ void CSurfaceFVMDataSorter::SortOutputData() {
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *globalRecv = nullptr;
-  globalRecv = new unsigned long[nElem_Recv[size]]();
-
-  unsigned long *renumbRecv = nullptr;
-  renumbRecv = new unsigned long[nElem_Recv[size]]();
+  auto globalRecv = new unsigned long[nElem_Recv[size]]();
+  auto renumbRecv = new unsigned long[nElem_Recv[size]]();
 
 #ifdef HAVE_MPI
   /*--- We need double the number of messages to send both the conn.
@@ -1247,16 +1239,11 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
   /*--- Allocate memory to hold the connectivity that we are
    sending. ---*/
 
-  unsigned long *connSend = nullptr;
-  connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]];
-  for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Send[size]; ii++)
-    connSend[ii] = 0;
+  auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]] ();
 
   /*--- Allocate arrays for storing halo flags. ---*/
 
-  unsigned short *haloSend = new unsigned short[nElem_Send[size]];
-  for (int ii = 0; ii < nElem_Send[size]; ii++)
-    haloSend[ii] = false;
+  auto haloSend = new unsigned short[nElem_Send[size]] ();
 
   /*--- Create an index variable to keep track of our index
    position as we load up the send buffer. ---*/
@@ -1346,14 +1333,9 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *connRecv = nullptr;
-  connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]];
-  for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Recv[size]; ii++)
-    connRecv[ii] = 0;
+  auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]] ();
 
-  unsigned short *haloRecv = new unsigned short[nElem_Recv[size]];
-  for (int ii = 0; ii < nElem_Recv[size]; ii++)
-    haloRecv[ii] = false;
+  auto haloRecv = new unsigned short[nElem_Recv[size]] ();
 
 #ifdef HAVE_MPI
   /*--- We need double the number of messages to send both the conn.

From 083f0b7929b785eb976bd098f7619a1d9bdafd71 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Mon, 29 Mar 2021 19:46:45 +0100
Subject: [PATCH 53/57] missing include

---
 SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
index ae6273ea322..6b4d3adf0f2 100644
--- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
@@ -31,6 +31,7 @@
 #include "../../../../Common/include/option_structure.hpp"
 #include "../../../../Common/include/toolboxes/CLinearPartitioner.hpp"
 #include <array>
+#include <cassert>
 
 class CGeometry;
 class CConfig;

From c3a62d3a2e95b46c9ff531422b2a0f46f5650404 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Mon, 29 Mar 2021 20:02:04 +0100
Subject: [PATCH 54/57] fix unused warning

---
 SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
index 2c32df0e2cb..5f3ff6fb5de 100644
--- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
@@ -97,11 +97,11 @@ CParallelDataSorter::~CParallelDataSorter(){
 
 void CParallelDataSorter::SortOutputData() {
 
-  using MPI_WRAP = SelectMPIWrapper<passivedouble>::W;
-
-  int VARS_PER_POINT = GlobalField_Counter;
+  const int VARS_PER_POINT = GlobalField_Counter;
 
 #ifdef HAVE_MPI
+  using MPI_WRAP = SelectMPIWrapper<passivedouble>::W;
+
   MPI_WRAP::Request *send_req, *recv_req;
   MPI_WRAP::Status status;
   int ind;
@@ -112,7 +112,6 @@ void CParallelDataSorter::SortOutputData() {
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-
   unsigned long *idRecv = new unsigned long[nPoint_Recv[size]]();
 
 #ifdef HAVE_MPI

From 92406edd210cd4d656d7e8bd57fdfe8a4f0bfe0d Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Mon, 29 Mar 2021 20:51:29 +0100
Subject: [PATCH 55/57] double free

---
 SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp   | 1 -
 SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp | 9 +--------
 SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp | 7 +------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
index 5f3ff6fb5de..60aa6431250 100644
--- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
@@ -91,7 +91,6 @@ CParallelDataSorter::~CParallelDataSorter(){
   delete [] Conn_Pyra_Par;
 
   delete [] connSend;
-
   delete [] dataBuffer;
 }
 
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
index d21edce01a0..0ed8b0a5603 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
@@ -66,14 +66,7 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr
 
 }
 
-CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter(){
-
-  delete linearPartitioner;
-  delete [] dataBuffer;
-
-}
-
-
+CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter() { delete linearPartitioner; }
 
 void CSurfaceFEMDataSorter::SortOutputData() {
 
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
index ee7535dafb9..64c2c1bbde5 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
@@ -47,12 +47,7 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr
 
 }
 
-CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter(){
-
-  delete linearPartitioner;
-  delete [] dataBuffer;
-
-}
+CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter() { delete linearPartitioner; }
 
 void CSurfaceFVMDataSorter::SortOutputData() {
 

From 73a575bd9cc30eaf05ac577065bcb119a5d5d1d2 Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Tue, 30 Mar 2021 13:44:08 +0100
Subject: [PATCH 56/57] why is everything a pointer ffs...

---
 .../include/toolboxes/CLinearPartitioner.hpp  | 51 +++++-----
 Common/src/toolboxes/CLinearPartitioner.cpp   | 28 +++---
 .../output/filewriter/CFEMDataSorter.hpp      |  7 +-
 .../output/filewriter/CFVMDataSorter.hpp      | 10 +-
 .../output/filewriter/CParallelDataSorter.hpp | 18 ++--
 .../filewriter/CSurfaceFEMDataSorter.hpp      |  9 +-
 .../filewriter/CSurfaceFVMDataSorter.hpp      |  9 +-
 .../src/output/filewriter/CFEMDataSorter.cpp  | 13 +--
 .../src/output/filewriter/CFVMDataSorter.cpp  | 17 +---
 .../output/filewriter/CParallelDataSorter.cpp | 97 ++++++++-----------
 .../filewriter/CSurfaceFEMDataSorter.cpp      | 12 +--
 .../filewriter/CSurfaceFVMDataSorter.cpp      | 38 ++++----
 12 files changed, 126 insertions(+), 183 deletions(-)

diff --git a/Common/include/toolboxes/CLinearPartitioner.hpp b/Common/include/toolboxes/CLinearPartitioner.hpp
index 4a86acebf68..5e2a4d24dea 100644
--- a/Common/include/toolboxes/CLinearPartitioner.hpp
+++ b/Common/include/toolboxes/CLinearPartitioner.hpp
@@ -52,63 +52,68 @@ class CLinearPartitioner {
   vector<unsigned long> cumulativeSizeBeforeRank; /*!< \brief Vector containing the cumulative size of all linear partitions before the current rank. */
 
 public:
+  CLinearPartitioner() = default;
 
   /*!
-   * \brief Constructor of the CLinearPartitioner class.
-   * \param[in] val_global_count - global count to be linearly partitioned.
-   * \param[in] val_offset - offset from 0 for the first index on rank 0 (typically 0).
-   * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false).
+   * \brief Constructor of the CLinearPartitioner class, see Initialize.
    */
-  CLinearPartitioner(unsigned long val_global_count,
-                     unsigned long val_offset,
-                     bool          isDisjoint = false);
+  CLinearPartitioner(unsigned long global_count,
+                     unsigned long offset,
+                     bool isDisjoint = false) {
+    Initialize(global_count, offset, isDisjoint);
+  }
 
   /*!
-   * \brief Destructor of the CLinearPartitioner class.
+   * \brief Initialize the CLinearPartitioner class.
+   * \param[in] global_count - global count to be linearly partitioned.
+   * \param[in] offset - offset from 0 for the first index on rank 0 (typically 0).
+   * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false).
    */
-  ~CLinearPartitioner(void);
+  void Initialize(unsigned long global_count,
+                  unsigned long offset,
+                  bool isDisjoint = false);
 
   /*!
    * \brief Get the rank that owns the index based on the linear partitioning.
-   * \param[in] val_index - Current index.
+   * \param[in] index - Current index.
    * \returns Owning rank for the current index based on linear partitioning.
    */
-  unsigned long GetRankContainingIndex(unsigned long val_index);
+  unsigned long GetRankContainingIndex(unsigned long index) const;
 
   /*!
    * \brief Get the first index of the current rank's linear partition.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns First index of the current rank's linear partition.
    */
-  inline unsigned long GetFirstIndexOnRank(int val_rank) {
-    return firstIndex[val_rank];
+  inline unsigned long GetFirstIndexOnRank(int rank) const {
+    return firstIndex[rank];
   }
 
   /*!
    * \brief Get the last index of the current rank's linear partition.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns Last index of the current rank's linear partition.
    */
-  inline unsigned long GetLastIndexOnRank(int val_rank) {
-    return lastIndex[val_rank];
+  inline unsigned long GetLastIndexOnRank(int rank) const {
+    return lastIndex[rank];
   }
 
   /*!
    * \brief Get the total size of the current rank's linear partition.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns Size of the current rank's linear partition.
    */
-  inline unsigned long GetSizeOnRank(int val_rank) {
-    return sizeOnRank[val_rank];
+  inline unsigned long GetSizeOnRank(int rank) const {
+    return sizeOnRank[rank];
   }
 
   /*!
    * \brief Get the cumulative size of all linear partitions before the current rank.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns Cumulative size of all linear partitions before the current rank.
    */
-  inline unsigned long GetCumulativeSizeBeforeRank(int val_rank) {
-    return cumulativeSizeBeforeRank[val_rank];
+  inline unsigned long GetCumulativeSizeBeforeRank(int rank) const {
+    return cumulativeSizeBeforeRank[rank];
   }
 
 };
diff --git a/Common/src/toolboxes/CLinearPartitioner.cpp b/Common/src/toolboxes/CLinearPartitioner.cpp
index 6a45f4fb20f..16ac5373762 100644
--- a/Common/src/toolboxes/CLinearPartitioner.cpp
+++ b/Common/src/toolboxes/CLinearPartitioner.cpp
@@ -28,9 +28,9 @@
 
 #include "../../include/toolboxes/CLinearPartitioner.hpp"
 
-CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
-                                       unsigned long val_offset,
-                                       bool          isDisjoint) {
+void CLinearPartitioner::Initialize(unsigned long global_count,
+                                    unsigned long offset,
+                                    bool isDisjoint) {
 
   /*--- Store MPI size ---*/
 
@@ -48,10 +48,10 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
    balancing for any remainder points. ---*/
 
   unsigned long quotient = 0;
-  if (val_global_count >= (unsigned long)size)
-    quotient = val_global_count/size;
+  if (global_count >= (unsigned long)size)
+    quotient = global_count/size;
 
-  int remainder = int(val_global_count%size);
+  int remainder = int(global_count%size);
   for (int ii = 0; ii < size; ii++) {
     sizeOnRank[ii] = quotient + int(ii < remainder);
   }
@@ -63,7 +63,7 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
   unsigned long adjust = 0;
   if (isDisjoint) adjust = 1;
 
-  firstIndex[0] = val_offset;
+  firstIndex[0] = offset;
   lastIndex[0]  = firstIndex[0] + sizeOnRank[0] - adjust;
   cumulativeSizeBeforeRank[0] = 0;
   for (int iProc = 1; iProc < size; iProc++) {
@@ -72,17 +72,15 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
     cumulativeSizeBeforeRank[iProc] = (cumulativeSizeBeforeRank[iProc-1] +
                                        sizeOnRank[iProc-1]);
   }
-  cumulativeSizeBeforeRank[size] = val_global_count;
+  cumulativeSizeBeforeRank[size] = global_count;
 
 }
 
-CLinearPartitioner::~CLinearPartitioner(void) { }
-
-unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index) {
+unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long index) const {
 
   /*--- Initial guess ---*/
 
-  unsigned long iProcessor = val_index/sizeOnRank[0];
+  unsigned long iProcessor = index/sizeOnRank[0];
 
   /*--- Guard against going over size. ---*/
 
@@ -91,11 +89,11 @@ unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index
 
   /*--- Move up or down until we find the processor. ---*/
 
-  if (val_index >= cumulativeSizeBeforeRank[iProcessor])
-    while(val_index >= cumulativeSizeBeforeRank[iProcessor+1])
+  if (index >= cumulativeSizeBeforeRank[iProcessor])
+    while(index >= cumulativeSizeBeforeRank[iProcessor+1])
       iProcessor++;
   else
-    while(val_index < cumulativeSizeBeforeRank[iProcessor])
+    while(index < cumulativeSizeBeforeRank[iProcessor])
       iProcessor--;
 
   return iProcessor;
diff --git a/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp
index 372069c8735..91df74fcd3c 100644
--- a/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp
@@ -41,11 +41,6 @@ class CFEMDataSorter final: public CParallelDataSorter{
    */
   CFEMDataSorter(CConfig *config, CGeometry *geometry, const vector<string> &valFieldNames);
 
-  /*!
-   * \brief Destructor
-   */
-  ~CFEMDataSorter() override;
-
   /*!
    * \brief Sort the connectivities (volume and surface) into data structures used for output file writing.
    * \param[in] config - Definition of the particular problem.
@@ -60,7 +55,7 @@ class CFEMDataSorter final: public CParallelDataSorter{
    * \return Global index of a specific point.
    */
   unsigned long GetGlobalIndex(unsigned long iPoint) const override{
-    return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint;
+    return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint;
   }
 
 private:
diff --git a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp
index f2f70e23a7e..cd561c6a7bf 100644
--- a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp
@@ -34,10 +34,9 @@ class CFVMDataSorter final: public CParallelDataSorter{
 
 private:
 
-  int* Local_Halo; //!< Array containing the flag whether a point is a halo node
+  vector<int> Local_Halo; //!< Array containing the flag whether a point is a halo node
 
 public:
-
   /*!
    * \brief Constructor
    * \param[in] config - Pointer to the current config structure
@@ -46,11 +45,6 @@ class CFVMDataSorter final: public CParallelDataSorter{
    */
   CFVMDataSorter(CConfig *config, CGeometry *geometry, const vector<string> &valFieldNames);
 
-  /*!
-   * \brief Destructor
-   */
-  ~CFVMDataSorter() override;
-
   /*!
    * \brief Sort the connectivities (volume and surface) into data structures used for output file writing.
    * \param[in] config - Definition of the particular problem.
@@ -65,7 +59,7 @@ class CFVMDataSorter final: public CParallelDataSorter{
    * \return Global index of a specific point.
    */
   unsigned long GetGlobalIndex(unsigned long iPoint) const override {
-    return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint;
+    return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint;
   }
 
   /*!
diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
index 6b4d3adf0f2..1a22dbda832 100644
--- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
@@ -42,12 +42,12 @@ class CParallelDataSorter{
   /*!
    * \brief The MPI rank
    */
-  int rank;
+  const int rank;
 
   /*!
    * \brief The MPI size, aka the number of processors.
    */
-  int size;
+  const int size;
 
   unsigned long nGlobalPointBeforeSort; //!< Global number of points without halos before sorting
   unsigned long nLocalPointsBeforeSort;   //!< Local number of points without halos before sorting on this proc
@@ -89,7 +89,7 @@ class CParallelDataSorter{
   unsigned long nElem;     //!< Local number of elements
   unsigned long nConn;     //!< Local size of the connectivity array
 
-  CLinearPartitioner* linearPartitioner;  //!< Linear partitioner based on the global number of points.
+  CLinearPartitioner linearPartitioner;  //!< Linear partitioner based on the global number of points.
 
   unsigned short GlobalField_Counter;  //!< Number of output fields
 
@@ -254,7 +254,7 @@ class CParallelDataSorter{
    * \return The beginning node ID.
    */
   virtual unsigned long GetNodeBegin(unsigned short rank) const {
-    return linearPartitioner->GetFirstIndexOnRank(rank);
+    return linearPartitioner.GetFirstIndexOnRank(rank);
   }
 
   /*!
@@ -263,7 +263,7 @@ class CParallelDataSorter{
    * \return The ending node ID.
    */
   unsigned long GetNodeEnd(unsigned short rank) const {
-    return linearPartitioner->GetLastIndexOnRank(rank);
+    return linearPartitioner.GetLastIndexOnRank(rank);
   }
 
   /*!
@@ -292,14 +292,14 @@ class CParallelDataSorter{
    * \input rank - the processor rank.
    * \return The cumulated number of points up to certain processor rank.
    */
-  virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner->GetCumulativeSizeBeforeRank(rank);}
+  virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner.GetCumulativeSizeBeforeRank(rank);}
 
   /*!
    * \brief Get the linear number of points
    * \input rank - the processor rank.
    * \return The linear number of points up to certain processor rank.
    */
-  unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner->GetSizeOnRank(rank);}
+  unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner.GetSizeOnRank(rank);}
 
   /*!
    * \brief Check whether the current connectivity is sorted (i.e. if SortConnectivity has been called)
@@ -319,7 +319,7 @@ class CParallelDataSorter{
     connSend[Index[iPoint] + iField] = SU2_TYPE::GetValue(data);
   }
 
-  su2double GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const {
+  passivedouble GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const {
     return connSend[Index[iPoint] + iField];
   }
 
@@ -329,7 +329,7 @@ class CParallelDataSorter{
    * \return The rank/processor number.
    */
   virtual unsigned short FindProcessor(unsigned long iPoint) const {
-    return linearPartitioner->GetRankContainingIndex(iPoint);
+    return linearPartitioner.GetRankContainingIndex(iPoint);
   }
 
   /*!
diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp
index 7775761f81c..5d2e7481364 100644
--- a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp
@@ -31,7 +31,7 @@
 
 class CSurfaceFEMDataSorter final: public CParallelDataSorter{
 
-  CFEMDataSorter* volumeSorter;                  //!< Pointer to the volume sorter instance
+  const CFEMDataSorter* volumeSorter;            //!< Pointer to the volume sorter instance
   vector<unsigned long> globalSurfaceDOFIDs;     //!< Structure to map the local sorted point ID to the global point ID
   vector<unsigned long> nSurfaceDOFsRanks;       //!< Number of points on each rank
 
@@ -43,12 +43,7 @@ class CSurfaceFEMDataSorter final: public CParallelDataSorter{
    * \param[in] geometry - Pointer to the current geometry
    * \param[in] valVolumeSorter - The datasorter containing the volume data
    */
-  CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter);
-
-  /*!
-   * \brief Destructor
-   */
-  ~CSurfaceFEMDataSorter() override;
+  CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter);
 
   /*!
    * \brief Sort the output data for each grid node into a linear partitioning across all processors.
diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp
index dd6132c4248..d65a2a03260 100644
--- a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp
@@ -31,7 +31,7 @@
 
 class CSurfaceFVMDataSorter final: public CParallelDataSorter{
 
-  CFVMDataSorter* volumeSorter;                    //!< Pointer to the volume sorter instance
+  const CFVMDataSorter* volumeSorter;               //!< Pointer to the volume sorter instance
   map<unsigned long,unsigned long> Renumber2Global; //! Structure to map the local sorted point ID to the global point ID
 public:
 
@@ -41,12 +41,7 @@ class CSurfaceFVMDataSorter final: public CParallelDataSorter{
    * \param[in] geometry - Pointer to the current geometry
    * \param[in] valVolumeSorter - The datasorter containing the volume data
    */
-  CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, CFVMDataSorter* valVolumeSorter);
-
-  /*!
-   * \brief Destructor
-   */
-  ~CSurfaceFVMDataSorter() override;
+  CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, const CFVMDataSorter* valVolumeSorter);
 
   /*!
    * \brief Sort the output data for each grid node into a linear partitioning across all processors.
diff --git a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp
index ed75ae1e0d4..f9052653abb 100644
--- a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp
@@ -66,7 +66,7 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
   /*--- Create a linear partition --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
   /*--- Prepare the send buffers ---*/
 
@@ -74,17 +74,6 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
 }
 
-CFEMDataSorter::~CFEMDataSorter(){
-
-        delete [] Index;
-       delete [] idSend;
-  delete linearPartitioner;
-
-}
-
-
-
-
 void CFEMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) {
 
   /*--- Sort connectivity for each type of element (excluding halos). Note
diff --git a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
index fbc0dfd452e..c193c98c6a6 100644
--- a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
@@ -39,7 +39,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
   nGlobalPointBeforeSort = geometry->GetGlobal_nPointDomain();
   nLocalPointsBeforeSort  = geometry->GetnPointDomain();
 
-  Local_Halo = new int[geometry->GetnPoint()]();
+  Local_Halo.resize(geometry->GetnPoint());
 
   for (unsigned long iPoint = 0; iPoint < geometry->GetnPoint(); iPoint++){
 
@@ -60,7 +60,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
   /*--- Create the linear partitioner --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
   /*--- Prepare the send buffers ---*/
 
@@ -68,15 +68,6 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
 }
 
-CFVMDataSorter::~CFVMDataSorter(){
-
-  delete [] Local_Halo;
-  delete [] Index;
-  delete [] idSend;
-  delete linearPartitioner;
-
-}
-
 void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){
 
   unsigned long iPoint, iVertex;
@@ -214,7 +205,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
          own elements into the connectivity data structure. ---*/
 
         if (val_sort) {
-          iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+          iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
         } else {
           iProcessor = rank;
         }
@@ -301,7 +292,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
          own elements into the connectivity data structure. ---*/
 
         if (val_sort) {
-          iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+          iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
         } else {
           iProcessor = rank;
         }
diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
index 60aa6431250..e600566155f 100644
--- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
@@ -30,12 +30,11 @@
 #include <numeric>
 
 CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &valFieldNames) :
-  fieldNames(std::move(valFieldNames)){
+  rank(SU2_MPI::GetRank()),
+  size(SU2_MPI::GetSize()),
+  fieldNames(std::move(valFieldNames)) {
 
-  rank = SU2_MPI::GetRank();
-  size = SU2_MPI::GetSize();
-
-  GlobalField_Counter = this->fieldNames.size();
+  GlobalField_Counter = fieldNames.size();
 
   Conn_Line_Par = nullptr;
   Conn_Hexa_Par = nullptr;
@@ -45,8 +44,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &
   Conn_Tria_Par = nullptr;
   Conn_Pyra_Par = nullptr;
 
-  nPoint_Send  = nullptr;
-  nPoint_Recv  = nullptr;
   Index        = nullptr;
   connSend     = nullptr;
   dataBuffer   = nullptr;
@@ -64,8 +61,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &
   nElemConn_Send = new int[size+1]();
   nElemConn_Cum = new int[size+1]();
 
-  linearPartitioner = nullptr;
-
   nElemPerType.fill(0);
   nElemPerTypeGlobal.fill(0);
 
@@ -92,33 +87,31 @@ CParallelDataSorter::~CParallelDataSorter(){
 
   delete [] connSend;
   delete [] dataBuffer;
+  delete [] Index;
+  delete [] idSend;
+
 }
 
 void CParallelDataSorter::SortOutputData() {
 
   const int VARS_PER_POINT = GlobalField_Counter;
 
-#ifdef HAVE_MPI
-  using MPI_WRAP = SelectMPIWrapper<passivedouble>::W;
-
-  MPI_WRAP::Request *send_req, *recv_req;
-  MPI_WRAP::Status status;
-  int ind;
-#endif
-
   /*--- Allocate the memory that we need for receiving the conn
    values and then cue up the non-blocking receives. Note that
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *idRecv = new unsigned long[nPoint_Recv[size]]();
+  vector<unsigned long> idRecv(nPoint_Recv[size], 0);
 
 #ifdef HAVE_MPI
-  /*--- We need double the number of messages to send both the conn.
-   and the global IDs. ---*/
+  /*--- NOTE: This function calls MPI routines directly, instead of via SU2_MPI::,
+   * because it communicates passivedoubles and not AD types. This avoids some
+   * creative C++ to communicate AD types and then convert to passive. ---*/
 
-  send_req = new MPI_WRAP::Request[2*nSends];
-  recv_req = new MPI_WRAP::Request[2*nRecvs];
+  /*--- We need double the number of messages to send both the conn. and the global IDs. ---*/
+
+  auto send_req = new MPI_Request[2*nSends];
+  auto recv_req = new MPI_Request[2*nRecvs];
 
   unsigned long iMessage = 0;
   for (int ii=0; ii<size; ii++) {
@@ -128,8 +121,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = VARS_PER_POINT*kk;
       int source = ii;
       int tag    = ii + 1;
-      MPI_WRAP::Irecv(&(dataBuffer[ll]), count, MPI_DOUBLE, source, tag,
-                      SU2_MPI::GetComm(), &(recv_req[iMessage]));
+      MPI_Irecv(&(dataBuffer[ll]), count, MPI_DOUBLE, source, tag,
+                SU2_MPI::GetComm(), &(recv_req[iMessage]));
       iMessage++;
     }
   }
@@ -142,10 +135,10 @@ void CParallelDataSorter::SortOutputData() {
       int ll = VARS_PER_POINT*nPoint_Send[ii];
       int kk = nPoint_Send[ii+1] - nPoint_Send[ii];
       int count  = VARS_PER_POINT*kk;
-      int dest = ii;
+      int dest   = ii;
       int tag    = rank + 1;
-      MPI_WRAP::Isend(&(connSend[ll]), count, MPI_DOUBLE, dest, tag,
-                      SU2_MPI::GetComm(), &(send_req[iMessage]));
+      MPI_Isend(&(connSend[ll]), count, MPI_DOUBLE, dest, tag,
+                SU2_MPI::GetComm(), &(send_req[iMessage]));
       iMessage++;
     }
   }
@@ -160,8 +153,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = kk;
       int source = ii;
       int tag    = ii + 1;
-      MPI_WRAP::Irecv(&(idRecv[ll]), count, MPI_UNSIGNED_LONG, source, tag,
-                      SU2_MPI::GetComm(), &(recv_req[iMessage+nRecvs]));
+      MPI_Irecv(&(idRecv[ll]), count, MPI_UNSIGNED_LONG, source, tag,
+                SU2_MPI::GetComm(), &(recv_req[iMessage+nRecvs]));
       iMessage++;
     }
   }
@@ -176,8 +169,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = kk;
       int dest   = ii;
       int tag    = rank + 1;
-      MPI_WRAP::Isend(&(idSend[ll]), count, MPI_UNSIGNED_LONG, dest, tag,
-                      SU2_MPI::GetComm(), &(send_req[iMessage+nSends]));
+      MPI_Isend(&(idSend[ll]), count, MPI_UNSIGNED_LONG, dest, tag,
+                SU2_MPI::GetComm(), &(send_req[iMessage+nSends]));
       iMessage++;
     }
   }
@@ -200,21 +193,25 @@ void CParallelDataSorter::SortOutputData() {
   /*--- Wait for the non-blocking sends and recvs to complete. ---*/
 
 #ifdef HAVE_MPI
+  MPI_Status status;
+  int ind;
+
   int number = 2*nSends;
   for (int ii = 0; ii < number; ii++)
-    MPI_WRAP::Waitany(number, send_req, &ind, &status);
+    MPI_Waitany(number, send_req, &ind, &status);
 
   number = 2*nRecvs;
   for (int ii = 0; ii < number; ii++)
-    MPI_WRAP::Waitany(number, recv_req, &ind, &status);
+    MPI_Waitany(number, recv_req, &ind, &status);
 
   delete [] send_req;
   delete [] recv_req;
 #endif
 
-  /*--- Reorder the data in the buffer --- */
+  /*--- Reorder the data in the buffer. ---*/
+
+  vector<passivedouble> tmpBuffer(nPoint_Recv[size]);
 
-  passivedouble *tmpBuffer = new passivedouble[nPoint_Recv[size]];
   for (int jj = 0; jj < VARS_PER_POINT; jj++){
     for (int ii = 0; ii < nPoint_Recv[size]; ii++){
       tmpBuffer[idRecv[ii]] = dataBuffer[ii*VARS_PER_POINT+jj];
@@ -224,8 +221,6 @@ void CParallelDataSorter::SortOutputData() {
     }
   }
 
-  delete [] tmpBuffer;
-
   /*--- Store the total number of local points my rank has for
    the current section after completing the communications. ---*/
 
@@ -233,12 +228,8 @@ void CParallelDataSorter::SortOutputData() {
 
   /*--- Reduce the total number of points we will write in the output files. ---*/
 
-  SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1,
-                     MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
-
-  /*--- Free temporary memory from communications ---*/
+  SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
 
-  delete [] idRecv;
 }
 
 void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalID){
@@ -257,7 +248,7 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
 
   for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++ ) {
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]);
+    iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]);
 
     /*--- If we have not visited this node yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -304,11 +295,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
   /*--- Create an index variable to keep track of our index
    positions as we load up the send buffer. ---*/
 
-  unsigned long *index = new unsigned long[size]();
-  for (int ii=0; ii < size; ii++) index[ii] = VARS_PER_POINT*nPoint_Send[ii];
+  vector<unsigned long> index(size), idIndex(size);
 
-  unsigned long *idIndex = new unsigned long[size]();
-  for (int ii=0; ii < size; ii++) idIndex[ii] = nPoint_Send[ii];
+  for (int ii=0; ii < size; ii++) {
+    index[ii] = VARS_PER_POINT*nPoint_Send[ii];
+    idIndex[ii] = nPoint_Send[ii];
+  }
 
   Index = new unsigned long[nLocalPointsBeforeSort]();
 
@@ -317,13 +309,13 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
 
   for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++) {
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]);
+    iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]);
 
     /*--- Load the global ID (minus offset) for sorting the
          points once they all reach the correct processor. ---*/
 
     unsigned long nn = idIndex[iProcessor];
-    idSend[nn] = globalID[iPoint] - linearPartitioner->GetFirstIndexOnRank(iProcessor);
+    idSend[nn] = globalID[iPoint] - linearPartitioner.GetFirstIndexOnRank(iProcessor);
 
     /*--- Store the index this point has in the send buffer ---*/
 
@@ -334,13 +326,8 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
     index[iProcessor]  += VARS_PER_POINT;
     idIndex[iProcessor]++;
 
-
   }
 
-  /*--- Free memory after loading up the send buffer. ---*/
-
-  delete [] index;
-  delete [] idIndex;
 }
 
 unsigned long CParallelDataSorter::GetElem_Connectivity(GEO_TYPE type, unsigned long iElem, unsigned long iNode) const {
@@ -429,6 +416,4 @@ void CParallelDataSorter::SetTotalElements(){
     nElemConn_Cum[ii+1] += nElemConn_Cum[ii];
   }
 
-
 }
-
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
index 0ed8b0a5603..9a7bc400418 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
@@ -29,12 +29,12 @@
 #include "../../../../Common/include/fem/fem_geometry_structure.hpp"
 #include <numeric>
 
-CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter) :
+CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter) :
   CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){
 
   nDim = geometry->GetnDim();
 
-  this->volumeSorter = valVolumeSorter;
+  volumeSorter = valVolumeSorter;
 
   connectivitySorted = false;
 
@@ -62,12 +62,10 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr
 
   /*--- Create the linear partitioner --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
 }
 
-CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter() { delete linearPartitioner; }
-
 void CSurfaceFEMDataSorter::SortOutputData() {
 
   if (!connectivitySorted){
@@ -129,7 +127,7 @@ void CSurfaceFEMDataSorter::SortOutputData() {
   for(unsigned long i=0; i<globalSurfaceDOFIDs.size(); ++i) {
 
     /* Search for the processor that owns this point. */
-    unsigned long iProcessor = linearPartitioner->GetRankContainingIndex(globalSurfaceDOFIDs[i]);
+    unsigned long iProcessor = linearPartitioner.GetRankContainingIndex(globalSurfaceDOFIDs[i]);
 
     /* Store the global ID in the send buffer for iProcessor. */
     sendBuf[iProcessor].push_back(globalSurfaceDOFIDs[i]);
@@ -219,7 +217,7 @@ void CSurfaceFEMDataSorter::SortOutputData() {
   /* Determine the local index of the global surface DOFs and
      copy the data into Parallel_Surf_Data. */
   for(unsigned long i=0; i<nPoints; ++i) {
-    const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner->GetCumulativeSizeBeforeRank(rank);
+    const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner.GetCumulativeSizeBeforeRank(rank);
 
     for(int jj=0; jj<VARS_PER_POINT; jj++)
       dataBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii);
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
index 64c2c1bbde5..f9b36ab1648 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
@@ -29,12 +29,12 @@
 #include "../../../../Common/include/geometry/CGeometry.hpp"
 #include <numeric>
 
-CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, CFVMDataSorter* valVolumeSorter) :
+CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, const CFVMDataSorter* valVolumeSorter) :
   CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){
 
   nDim = geometry->GetnDim();
 
-  this->volumeSorter = valVolumeSorter;
+  volumeSorter = valVolumeSorter;
 
   connectivitySorted = false;
 
@@ -43,12 +43,10 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr
 
   /*--- Create the linear partitioner --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
 }
 
-CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter() { delete linearPartitioner; }
-
 void CSurfaceFVMDataSorter::SortOutputData() {
 
   unsigned long iProcessor;
@@ -96,7 +94,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- If we have not visited this element yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -124,7 +122,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- If we have not visited this element yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -152,7 +150,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- If we have not visited this element yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -211,7 +209,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Load global ID into the buffer for sending ---*/
 
@@ -245,7 +243,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Load global ID into the buffer for sending ---*/
 
@@ -279,7 +277,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Load global ID into the buffer for sending ---*/
 
@@ -499,7 +497,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     /*--- If we have not visited this element yet, increment our
      number of elements that must be sent to a particular proc. ---*/
@@ -559,7 +557,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     if (nElem_Flag[iProcessor] != ii) {
 
@@ -718,7 +716,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Store the global ID if it is outside our own linear partition. ---*/
 
@@ -739,7 +737,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Store the global ID if it is outside our own linear partition. ---*/
 
@@ -760,7 +758,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Store the global ID if it is outside our own linear partition. ---*/
 
@@ -795,7 +793,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     /*--- If we have not visited this element yet, increment our
      number of elements that must be sent to a particular proc. ---*/
@@ -847,7 +845,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     /*--- If we have not visited this element yet, increment our
      number of elements that must be sent to a particular proc. ---*/
@@ -1192,7 +1190,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
 
             /*--- Search for the processor that owns this point ---*/
 
-            iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+            iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
             /*--- If we have not visited this element yet, increment our
              number of elements that must be sent to a particular proc. ---*/
@@ -1282,7 +1280,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
 
             /*--- Search for the processor that owns this point ---*/
 
-            iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+            iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
             /*--- Load connectivity into the buffer for sending ---*/
 

From 3870382fc015eb2c274549c7252cd20ac6a9920b Mon Sep 17 00:00:00 2001
From: Pedro Gomes <pcarruscag@gmail.com>
Date: Wed, 31 Mar 2021 00:04:40 +0100
Subject: [PATCH 57/57] enough testing for now, revert RealReverseIndex to
 RealReverse

---
 Common/include/code_config.hpp            | 3 +--
 Common/src/geometry/CPhysicalGeometry.cpp | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
index 904805dc870..377432ee945 100644
--- a/Common/include/code_config.hpp
+++ b/Common/include/code_config.hpp
@@ -92,8 +92,7 @@ using su2double = codi::RealReversePrimal;
 #elif CODI_PRIMAL_INDEX_TAPE
 using su2double = codi::RealReversePrimalIndex;
 #else
-//using su2double = codi::RealReverse;
-using su2double = codi::RealReverseIndex;
+using su2double = codi::RealReverse;
 #endif
 #endif
 #elif defined(CODI_FORWARD_TYPE) // forward mode AD
diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp
index 72e26928e03..688972ce1f1 100644
--- a/Common/src/geometry/CPhysicalGeometry.cpp
+++ b/Common/src/geometry/CPhysicalGeometry.cpp
@@ -7543,7 +7543,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
 
     /*--- To make preaccumulation more effective, use as few inputs
      as possible, recomputing intermediate quantities as needed. ---*/
-//    AD::StartPreacc();
+    AD::StartPreacc();
 
     /*--- Get pointers to the coordinates of all the element nodes ---*/
     array<const su2double*, N_POINTS_MAXIMUM> Coord;
@@ -7654,7 +7654,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
       }
     }
 #endif
-//    AD::EndPreacc();
+    AD::EndPreacc();
   }
 
   su2double DomainVolume;
@@ -7700,7 +7700,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
 
       const auto nNodes = bound[iMarker][iElem]->GetnNodes();
 
-//      AD::StartPreacc();
+      AD::StartPreacc();
 
       /*--- Get pointers to the coordinates of all the element nodes ---*/
       array<const su2double*, N_POINTS_MAXIMUM> Coord;
@@ -7752,7 +7752,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
         const auto iVertex = nodes->GetVertex(iPoint, iMarker);
         AD::SetPreaccOut(vertex[iMarker][iVertex]->GetNormal(), nDim);
       }
-//      AD::EndPreacc();
+      AD::EndPreacc();
     }
   }
   END_SU2_OMP_FOR