diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
index cea9c098ee4..3d6c7308431 100644
--- a/.github/workflows/regression.yml
+++ b/.github/workflows/regression.yml
@@ -16,7 +16,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix: 
-        config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP]
+        config_set: [BaseMPI, ReverseMPI, ForwardMPI, BaseNoMPI, ReverseNoMPI, ForwardNoMPI, BaseOMP, ReverseOMP, ForwardOMP]
         include:
           - config_set: BaseMPI
             flags: '-Denable-pywrapper=true -Denable-tests=true --warnlevel=3 --werror'
@@ -32,6 +32,10 @@ jobs:
             flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-mpi=disabled -Denable-tests=true --warnlevel=3 --werror'
           - config_set: BaseOMP
             flags: '-Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
+          - config_set: ReverseOMP
+            flags: '-Denable-autodiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
+          - config_set: ForwardOMP
+            flags: '-Denable-directdiff=true -Denable-normal=false -Dwith-omp=true -Denable-mixedprec=true -Denable-tecio=false --warnlevel=3 --werror'
     runs-on: ubuntu-latest
     steps:
       - name: Cache Object Files
diff --git a/.gitmodules b/.gitmodules
index f160f2e549e..ae2967618b2 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -15,3 +15,6 @@
 [submodule "subprojects/Mutationpp"]
         path = subprojects/Mutationpp
         url = https://github.com/mutationpp/Mutationpp.git	
+[submodule "externals/opdi"]
+	path = externals/opdi
+	url = https://github.com/SciCompKL/OpDiLib
diff --git a/Common/include/basic_types/ad_structure.hpp b/Common/include/basic_types/ad_structure.hpp
index 0037465b0e4..620da3246f5 100644
--- a/Common/include/basic_types/ad_structure.hpp
+++ b/Common/include/basic_types/ad_structure.hpp
@@ -1,7 +1,7 @@
 /*!
  * \file ad_structure.hpp
  * \brief Main routines for the algorithmic differentiation (AD) structure.
- * \author T. Albring
+ * \author T. Albring, J. Blühdorn
  * \version 7.1.1 "Blackbird"
  *
  * SU2 Project Website: https://su2code.github.io
@@ -27,7 +27,8 @@
 
 #pragma once
 
-#include "datatype_structure.hpp"
+#include "../code_config.hpp"
+#include "../parallelization/omp_structure.hpp"
 
 /*!
  * \namespace AD
@@ -278,19 +279,23 @@ namespace AD{
 
   extern int adjointVectorPosition;
 
-  /*--- Reference to the tape ---*/
-
-  extern su2double::TapeType& globalTape;
-
   extern bool Status;
 
   extern bool PreaccActive;
 
   extern bool PreaccEnabled;
 
-  extern su2double::TapeType::Position StartPosition, EndPosition;
+#ifdef HAVE_OPDI
+  using CoDiTapePosition = su2double::TapeType::Position;
+  using OpDiState = void*;
+  using TapePosition = std::pair<CoDiTapePosition, OpDiState>;
+#else
+  using TapePosition = su2double::TapeType::Position;
+#endif
+
+  extern TapePosition StartPosition, EndPosition;
 
-  extern std::vector<su2double::TapeType::Position> TapePositions;
+  extern std::vector<TapePosition> TapePositions;
 
   extern std::vector<su2double::GradientData> localInputValues;
 
@@ -298,42 +303,68 @@ namespace AD{
 
   extern codi::PreaccumulationHelper<su2double> PreaccHelper;
 
+  /*--- Reference to the tape. ---*/
+
+  FORCEINLINE su2double::TapeType& getGlobalTape() {
+    return su2double::getGlobalTape();
+  }
+
   FORCEINLINE void RegisterInput(su2double &data, bool push_index = true) {
-    AD::globalTape.registerInput(data);
+    AD::getGlobalTape().registerInput(data);
     if (push_index) {
       inputValues.push_back(data.getGradientData());
     }
   }
 
-  FORCEINLINE void RegisterOutput(su2double& data) {AD::globalTape.registerOutput(data);}
+  FORCEINLINE void RegisterOutput(su2double& data) {AD::getGlobalTape().registerOutput(data);}
 
   FORCEINLINE void ResetInput(su2double &data) {data.getGradientData() = su2double::GradientData();}
 
-  FORCEINLINE void StartRecording() {AD::globalTape.setActive();}
+  FORCEINLINE void StartRecording() {AD::getGlobalTape().setActive();}
 
-  FORCEINLINE void StopRecording() {AD::globalTape.setPassive();}
+  FORCEINLINE void StopRecording() {AD::getGlobalTape().setPassive();}
 
-  FORCEINLINE bool TapeActive() { return AD::globalTape.isActive(); }
+  FORCEINLINE bool TapeActive() { return AD::getGlobalTape().isActive(); }
 
-  FORCEINLINE void PrintStatistics() {AD::globalTape.printStatistics();}
+  FORCEINLINE void PrintStatistics() {AD::getGlobalTape().printStatistics();}
 
-  FORCEINLINE void ClearAdjoints() {AD::globalTape.clearAdjoints(); }
+  FORCEINLINE void ClearAdjoints() {AD::getGlobalTape().clearAdjoints(); }
 
-  FORCEINLINE void ComputeAdjoint() {AD::globalTape.evaluate(); adjointVectorPosition = 0;}
+  FORCEINLINE void ComputeAdjoint() {
+  #if defined(HAVE_OPDI)
+    opdi::logic->prepareEvaluate();
+  #endif
+    AD::getGlobalTape().evaluate();
+    adjointVectorPosition = 0;
+  }
 
   FORCEINLINE void ComputeAdjoint(unsigned short enter, unsigned short leave) {
-    AD::globalTape.evaluate(TapePositions[enter], TapePositions[leave]);
+  #if defined(HAVE_OPDI)
+    opdi::logic->recoverState(TapePositions[enter].second);
+    opdi::logic->prepareEvaluate();
+    AD::getGlobalTape().evaluate(TapePositions[enter].first, TapePositions[leave].first);
+  #else
+    AD::getGlobalTape().evaluate(TapePositions[enter], TapePositions[leave]);
+  #endif
     if (leave == 0)
       adjointVectorPosition = 0;
   }
 
   FORCEINLINE void Reset() {
-    globalTape.reset();
+    AD::getGlobalTape().reset();
+  #if defined(HAVE_OPDI)
+    opdi::logic->reset();
+  #endif
     if (inputValues.size() != 0) {
       adjointVectorPosition = 0;
       inputValues.clear();
     }
     if (TapePositions.size() != 0) {
+    #if defined(HAVE_OPDI)
+      for (TapePosition& pos : TapePositions) {
+        opdi::logic->freeState(pos.second);
+      }
+    #endif
       TapePositions.clear();
     }
   }
@@ -343,11 +374,11 @@ namespace AD{
   }
 
   FORCEINLINE void SetDerivative(int index, const double val) {
-    AD::globalTape.setGradient(index, val);
+    AD::getGlobalTape().setGradient(index, val);
   }
 
   FORCEINLINE double GetDerivative(int index) {
-    return AD::globalTape.getGradient(index);
+    return AD::getGlobalTape().getGradient(index);
   }
 
   /*--- Base case for parameter pack expansion. ---*/
@@ -361,6 +392,11 @@ namespace AD{
     SetPreaccIn(moreData...);
   }
 
+  template<class T, class... Ts, su2enable_if<std::is_same<T,su2double>::value> = 0>
+  FORCEINLINE void SetPreaccIn(T&& data, Ts&&... moreData) {
+    static_assert(!std::is_same<T,su2double>::value, "rvalues cannot be registered");
+  }
+
   template<class T>
   FORCEINLINE void SetPreaccIn(const T& data, const int size) {
     if (PreaccActive) {
@@ -384,20 +420,8 @@ namespace AD{
     }
   }
 
-  template<class T>
-  FORCEINLINE void SetPreaccIn(const T& data, const int size_x, const int size_y, const int size_z) {
-    if (!PreaccActive) return;
-    for (int i = 0; i < size_x; i++) {
-      for (int j = 0; j < size_y; j++) {
-        for (int k = 0; k < size_z; k++) {
-          if (data[i][j][k].isActive()) PreaccHelper.addInput(data[i][j][k]);
-        }
-      }
-    }
-  }
-
   FORCEINLINE void StartPreacc() {
-    if (globalTape.isActive() && PreaccEnabled) {
+    if (AD::getGlobalTape().isActive() && PreaccEnabled) {
       PreaccHelper.start();
       PreaccActive = true;
     }
@@ -438,7 +462,11 @@ namespace AD{
   }
 
   FORCEINLINE void Push_TapePosition() {
-    TapePositions.push_back(AD::globalTape.getPosition());
+  #if defined(HAVE_OPDI)
+    TapePositions.push_back({AD::getGlobalTape().getPosition(), opdi::logic->exportState()});
+  #else
+    TapePositions.push_back(AD::getGlobalTape().getPosition());
+  #endif
   }
 
   FORCEINLINE void EndPreacc(){
@@ -478,7 +506,7 @@ namespace AD{
   }
 
   FORCEINLINE void SetExtFuncOut(su2double& data) {
-    if (globalTape.isActive()) {
+    if (AD::getGlobalTape().isActive()) {
       FuncHelper->addOutput(data);
     }
   }
@@ -486,7 +514,7 @@ namespace AD{
   template<class T>
   FORCEINLINE void SetExtFuncOut(T&& data, const int size) {
     for (int i = 0; i < size; i++) {
-      if (globalTape.isActive()) {
+      if (AD::getGlobalTape().isActive()) {
         FuncHelper->addOutput(data[i]);
       }
     }
@@ -496,7 +524,7 @@ namespace AD{
   FORCEINLINE void SetExtFuncOut(T&& data, const int size_x, const int size_y) {
     for (int i = 0; i < size_x; i++) {
       for (int j = 0; j < size_y; j++) {
-        if (globalTape.isActive()) {
+        if (AD::getGlobalTape().isActive()) {
           FuncHelper->addOutput(data[i][j]);
         }
       }
@@ -511,7 +539,7 @@ namespace AD{
   FORCEINLINE void EndExtFunc() { delete FuncHelper; }
 
   FORCEINLINE bool BeginPassive() {
-    if(AD::globalTape.isActive()) {
+    if(AD::getGlobalTape().isActive()) {
       StopRecording();
       return true;
     }
diff --git a/Common/include/basic_types/datatype_structure.hpp b/Common/include/basic_types/datatype_structure.hpp
index 2c4c2bfa885..039df331200 100644
--- a/Common/include/basic_types/datatype_structure.hpp
+++ b/Common/include/basic_types/datatype_structure.hpp
@@ -30,87 +30,10 @@
 #include <iostream>
 #include <complex>
 #include <cstdio>
-#include <type_traits>
-
-#if defined(_MSC_VER)
-#define FORCEINLINE __forceinline
-#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
-#define FORCEINLINE inline __attribute__((always_inline))
-#else
-#define FORCEINLINE inline
-#endif
-
-#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
-#define NEVERINLINE inline __attribute__((noinline))
-#else
-#define NEVERINLINE inline
-#endif
-
-#if defined(__INTEL_COMPILER)
-/*--- Disable warnings related to inline attributes. ---*/
-#pragma warning disable 2196
-#pragma warning disable 3415
-/*--- Disable warnings related to overloaded virtual. ---*/
-#pragma warning disable 654
-#pragma warning disable 1125
-#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE)
-#pragma warning disable 1875
-#endif
-#endif
-
-/*--- Convenience SFINAE typedef to conditionally
- * enable/disable function template overloads. ---*/
-template<bool condition>
-using su2enable_if = typename std::enable_if<condition,bool>::type;
-
-/*--- Depending on the datatype defined during the configuration,
- * include the correct definition, and create the main typedef. ---*/
-
-#if defined(CODI_REVERSE_TYPE) // reverse mode AD
-#include "codi.hpp"
-#include "codi/tools/dataStore.hpp"
-
-#ifndef CODI_INDEX_TAPE
-#define CODI_INDEX_TAPE 0
-#endif
-#ifndef CODI_PRIMAL_TAPE
-#define CODI_PRIMAL_TAPE 0
-#endif
-#ifndef CODI_PRIMAL_INDEX_TAPE
-#define CODI_PRIMAL_INDEX_TAPE 0
-#endif
-
-#if CODI_INDEX_TAPE
-using su2double = codi::RealReverseIndex;
-#elif CODI_PRIMAL_TAPE
-using su2double = codi::RealReversePrimal;
-#elif CODI_PRIMAL_INDEX_TAPE
-using su2double = codi::RealReversePrimalIndex;
-#else
-using su2double = codi::RealReverse;
-#endif
-
-#elif defined(CODI_FORWARD_TYPE) // forward mode AD
-#include "codi.hpp"
-using su2double = codi::RealForward;
-
-#else // primal / direct / no AD
-using su2double = double;
-#endif
 
+#include "../code_config.hpp"
 #include "ad_structure.hpp"
 
-/*--- This type can be used for (rare) compatiblity cases or for
- * computations that are intended to be (always) passive. ---*/
-using passivedouble = double;
-
-/*--- Define a type for potentially lower precision operations. ---*/
-#ifdef USE_MIXED_PRECISION
-using su2mixedfloat = float;
-#else
-using su2mixedfloat = passivedouble;
-#endif
-
 /*!
  * \namespace SU2_TYPE
  * \brief Namespace for defining the datatype wrapper routines, this acts as a base
@@ -174,11 +97,11 @@ namespace SU2_TYPE {
 
 #ifdef CODI_REVERSE_TYPE
   FORCEINLINE passivedouble GetSecondary(const su2double& data) {
-    return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]);
+    return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]);
   }
 
   FORCEINLINE passivedouble GetDerivative(const su2double& data) {
-    return AD::globalTape.getGradient(AD::inputValues[AD::adjointVectorPosition++]);
+    return AD::getGlobalTape().getGradient(AD::inputValues[AD::adjointVectorPosition++]);
   }
 #else // forward
   FORCEINLINE passivedouble GetSecondary(const su2double& data) {return data.getGradient();}
diff --git a/Common/include/code_config.hpp b/Common/include/code_config.hpp
new file mode 100644
index 00000000000..377432ee945
--- /dev/null
+++ b/Common/include/code_config.hpp
@@ -0,0 +1,124 @@
+/*!
+ * \file code_config.hpp
+ * \brief Header file for collecting common macros, definitions and type configurations.
+ * \author T. Albring, P. Gomes, J. Blühdorn
+ * \version 7.1.1 "Blackbird"
+ *
+ * SU2 Project Website: https://su2code.github.io
+ *
+ * The SU2 Project is maintained by the SU2 Foundation
+ * (http://su2foundation.org)
+ *
+ * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
+ *
+ * SU2 is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * SU2 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+#if defined(_MSC_VER)
+#define FORCEINLINE __forceinline
+#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define FORCEINLINE inline __attribute__((always_inline))
+#else
+#define FORCEINLINE inline
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
+#define NEVERINLINE inline __attribute__((noinline))
+#else
+#define NEVERINLINE inline
+#endif
+
+#if defined(__INTEL_COMPILER)
+/*--- Disable warnings related to inline attributes. ---*/
+#pragma warning disable 2196
+#pragma warning disable 3415
+/*--- Disable warnings related to overloaded virtual. ---*/
+#pragma warning disable 654
+#pragma warning disable 1125
+#if defined(CODI_FORWARD_TYPE) || defined(CODI_REVERSE_TYPE)
+#pragma warning disable 1875
+#endif
+#endif
+
+/*--- Convenience SFINAE typedef to conditionally
+ * enable/disable function template overloads. ---*/
+template<bool condition>
+using su2enable_if = typename std::enable_if<condition,bool>::type;
+
+/*--- Detect compilation with OpenMP. ---*/
+#if defined(_OPENMP)
+#define HAVE_OMP
+#endif
+
+/*--- Depending on the datatype defined during the configuration,
+ * include the correct definition, and create the main typedef. ---*/
+
+#if defined(CODI_REVERSE_TYPE) // reverse mode AD
+#include "codi.hpp"
+#include "codi/tools/dataStore.hpp"
+
+#ifndef CODI_INDEX_TAPE
+#define CODI_INDEX_TAPE 0
+#endif
+#ifndef CODI_PRIMAL_TAPE
+#define CODI_PRIMAL_TAPE 0
+#endif
+#ifndef CODI_PRIMAL_INDEX_TAPE
+#define CODI_PRIMAL_INDEX_TAPE 0
+#endif
+
+#if defined(HAVE_OMP)
+using su2double = codi::RealReverseIndexParallel;
+#else
+#if CODI_INDEX_TAPE
+using su2double = codi::RealReverseIndex;
+#elif CODI_PRIMAL_TAPE
+using su2double = codi::RealReversePrimal;
+#elif CODI_PRIMAL_INDEX_TAPE
+using su2double = codi::RealReversePrimalIndex;
+#else
+using su2double = codi::RealReverse;
+#endif
+#endif
+#elif defined(CODI_FORWARD_TYPE) // forward mode AD
+#include "codi.hpp"
+using su2double = codi::RealForward;
+
+#else // primal / direct / no AD
+using su2double = double;
+#endif
+
+/*--- This type can be used for (rare) compatiblity cases or for
+ * computations that are intended to be (always) passive. ---*/
+using passivedouble = double;
+
+/*--- Define a type for potentially lower precision operations. ---*/
+#ifdef USE_MIXED_PRECISION
+using su2mixedfloat = float;
+#else
+using su2mixedfloat = passivedouble;
+#endif
+
+/*--- Detect if OpDiLib has to be used. ---*/
+#if defined(HAVE_OMP) && defined(CODI_REVERSE_TYPE)
+#define HAVE_OPDI
+#endif
+
+#if (_OPENMP >= 201811 && !defined(FORCE_OPDI_MACRO_BACKEND)) || defined(FORCE_OPDI_OMPT_BACKEND)
+#define HAVE_OMPT
+#endif
diff --git a/Common/include/containers/C2DContainer.hpp b/Common/include/containers/C2DContainer.hpp
index 963cddc99fe..c2d08269294 100644
--- a/Common/include/containers/C2DContainer.hpp
+++ b/Common/include/containers/C2DContainer.hpp
@@ -77,12 +77,17 @@ class AccessorImpl
    * Static size specializations use this do-nothing allocation macro.
    */
 #define DUMMY_ALLOCATOR \
-  void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {}
+  void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {}\
+  void m_destroy() noexcept {}
+
   /*!
    * Dynamic size specializations use this one, EXTRA is used to set some
    * runtime internal value that depend on the number of rows/columns.
    * What values need setting depends on the specialization as not all have
    * members for e.g. number of rows and cols (static size optimization).
+   * Because aligned allocation is used, "placement new" is used after to
+   * default construct the elements of non-trivial type. Such types also
+   * need to be destructed explicitly before freeing the memory.
    */
 #define REAL_ALLOCATOR(EXTRA)                                           \
   static_assert(MemoryAllocation::is_power_of_two(AlignSize),           \
@@ -91,6 +96,14 @@ class AccessorImpl
   void m_allocate(size_t sz, Index_t rows, Index_t cols) noexcept {     \
     EXTRA;                                                              \
     m_data = MemoryAllocation::aligned_alloc<Scalar_t>(AlignSize,sz);   \
+    if (!std::is_trivial<Scalar_t>::value)                              \
+      for (size_t i = 0; i < size(); ++i) new (m_data+i) Scalar_t();    \
+  }                                                                     \
+                                                                        \
+  void m_destroy() noexcept {                                           \
+    if (!std::is_trivial<Scalar_t>::value)                              \
+      for (size_t i = 0; i < size(); ++i) m_data[i].~Scalar_t();        \
+    MemoryAllocation::aligned_free<Scalar_t>(m_data);                   \
   }
 
   DUMMY_ALLOCATOR
@@ -114,15 +127,13 @@ class AccessorImpl
                                                                         \
   AccessorImpl& operator= (AccessorImpl&& other) noexcept               \
   {                                                                     \
-    MemoryAllocation::aligned_free<Scalar_t>(m_data);                   \
+    m_destroy();                                                        \
     MOVE; m_data=other.m_data; other.m_data=nullptr;                    \
     return *this;                                                       \
   }                                                                     \
                                                                         \
-  ~AccessorImpl() noexcept                                              \
-  {                                                                     \
-    MemoryAllocation::aligned_free<Scalar_t>(m_data);                   \
-  }
+  ~AccessorImpl() noexcept {m_destroy();}
+
   /*!
    * Shorthand for when specialization has only one more member than m_data.
    */
@@ -380,6 +391,7 @@ class C2DContainer :
   using Base = container_helpers::AccessorImpl<Index_t,Scalar_t,Store,AlignSize,StaticRows,StaticCols>;
   using Base::m_data;
   using Base::m_allocate;
+  using Base::m_destroy;
 public:
   using Base::size;
   using Base::rows;
@@ -473,7 +485,7 @@ class C2DContainer :
     if(rows==this->rows() && cols==this->cols())
       return reqSize;
 
-    MemoryAllocation::aligned_free<Scalar_t>(m_data);
+    m_destroy();
 
     /*--- request actual allocation to base class as it needs specialization ---*/
     size_t bytes = reqSize*sizeof(Scalar_t);
diff --git a/Common/include/geometry/CGeometry.hpp b/Common/include/geometry/CGeometry.hpp
index 583ffab12fd..07dc11447d9 100644
--- a/Common/include/geometry/CGeometry.hpp
+++ b/Common/include/geometry/CGeometry.hpp
@@ -1242,13 +1242,7 @@ class CGeometry {
    * \brief Register the coordinates of the mesh nodes.
    * \param[in] config
    */
-  void RegisterCoordinates(CConfig *config) const;
-
-  /*!
-   * \brief Register the coordinates of the mesh nodes as output.
-   * \param[in] config
-   */
-  void RegisterOutput_Coordinates(CConfig *config) const;
+  void RegisterCoordinates(const CConfig *config) const;
 
   /*!
    * \brief Update the multi-grid structure and the wall-distance.
diff --git a/Common/include/geometry/dual_grid/CPoint.hpp b/Common/include/geometry/dual_grid/CPoint.hpp
index 86ac53d4936..9db963524ad 100644
--- a/Common/include/geometry/dual_grid/CPoint.hpp
+++ b/Common/include/geometry/dual_grid/CPoint.hpp
@@ -423,7 +423,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Value of the distance to the nearest wall.
    */
-  inline su2double GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); }
+  inline su2double& GetWall_Distance(unsigned long iPoint) { return Wall_Distance(iPoint); }
+  inline const su2double& GetWall_Distance(unsigned long iPoint) const { return Wall_Distance(iPoint); }
 
   /*!
    * \brief Set the value of the distance to the nearest wall.
@@ -451,7 +452,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Value of the distance to the nearest wall.
    */
-  inline su2double GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); }
+  inline su2double& GetSharpEdge_Distance(unsigned long iPoint) { return SharpEdge_Distance(iPoint); }
+  inline const su2double& GetSharpEdge_Distance(unsigned long iPoint) const { return SharpEdge_Distance(iPoint); }
 
   /*!
    * \brief Set the value of the curvature at a surface node.
@@ -486,7 +488,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Area or volume of the control volume.
    */
-  inline su2double GetVolume(unsigned long iPoint) const { return Volume(iPoint); }
+  inline su2double& GetVolume(unsigned long iPoint) { return Volume(iPoint); }
+  inline const su2double& GetVolume(unsigned long iPoint) const { return Volume(iPoint); }
 
   /*!
    * \brief Set the volume of the control volume.
@@ -507,7 +510,8 @@ class CPoint {
    * \param[in] iPoint - Index of the point.
    * \return Periodic component of area or volume for a control volume on a periodic marker.
    */
-  inline su2double GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); }
+  inline su2double& GetPeriodicVolume(unsigned long iPoint) { return Periodic_Volume(iPoint); }
+  inline const su2double& GetPeriodicVolume(unsigned long iPoint) const { return Periodic_Volume(iPoint); }
 
   /*!
    * \brief Set the missing component of area or volume for a control volume on a periodic marker.
diff --git a/Common/include/linear_algebra/CSysSolve.hpp b/Common/include/linear_algebra/CSysSolve.hpp
index d5e8dfb1dce..c69643cefe1 100644
--- a/Common/include/linear_algebra/CSysSolve.hpp
+++ b/Common/include/linear_algebra/CSysSolve.hpp
@@ -221,6 +221,7 @@ class CSysSolve {
       LinSysRes_ptr = &LinSysRes;
       LinSysSol_ptr = &LinSysSol;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -242,6 +243,7 @@ class CSysSolve {
       LinSysRes_ptr = &LinSysRes_tmp;
       LinSysSol_ptr = &LinSysSol_tmp;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -258,6 +260,7 @@ class CSysSolve {
       LinSysRes_ptr = nullptr;
       LinSysSol_ptr = nullptr;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -277,6 +280,7 @@ class CSysSolve {
       LinSysRes_ptr = nullptr;
       LinSysSol_ptr = nullptr;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
diff --git a/Common/include/linear_algebra/CSysVector.hpp b/Common/include/linear_algebra/CSysVector.hpp
index f64e2873e0f..59ee1304fe3 100644
--- a/Common/include/linear_algebra/CSysVector.hpp
+++ b/Common/include/linear_algebra/CSysVector.hpp
@@ -45,12 +45,14 @@
  */
 #ifdef HAVE_OMP
 #ifdef HAVE_OMP_SIMD
-#define CSYSVEC_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait)
+#define CSYSVEC_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT)
 #else
-#define CSYSVEC_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+#define CSYSVEC_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
 #endif
+#define END_CSYSVEC_PARFOR END_SU2_OMP_FOR
 #else
 #define CSYSVEC_PARFOR SU2_OMP_SIMD
+#define END_CSYSVEC_PARFOR
 #endif
 
 /*!
@@ -186,10 +188,12 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
 
     SU2_OMP_MASTER
     Initialize(other.GetNBlk(), other.GetNBlkDomain(), other.GetNVar(), nullptr, true, false);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     CSYSVEC_PARFOR
     for (auto i = 0ul; i < nElm; i++) vec_val[i] = SU2_TYPE::GetValue(other[i]);
+    END_CSYSVEC_PARFOR
   }
 
   /*!
@@ -250,6 +254,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
   CSysVector& operator=(const CSysVector& other) {
     CSYSVEC_PARFOR
     for (auto i = 0ul; i < nElm; ++i) vec_val[i] = other.vec_val[i];
+    END_CSYSVEC_PARFOR
     return *this;
   }
 
@@ -261,12 +266,14 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
   CSysVector& operator OP(ScalarType val) {                               \
     CSYSVEC_PARFOR                                                        \
     for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP val;                  \
+    END_CSYSVEC_PARFOR                                                    \
     return *this;                                                         \
   }                                                                       \
   template <class T>                                                      \
   CSysVector& operator OP(const VecExpr::CVecExpr<T, ScalarType>& expr) { \
     CSYSVEC_PARFOR                                                        \
     for (auto i = 0ul; i < nElm; ++i) vec_val[i] OP expr.derived()[i];    \
+    END_CSYSVEC_PARFOR                                                    \
     return *this;                                                         \
   }
   MAKE_COMPOUND(=)
@@ -293,6 +300,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
     SU2_OMP_BARRIER
     SU2_OMP_MASTER
     dotRes = 0.0;
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Local dot product for each thread. ---*/
@@ -302,6 +310,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
     for (auto i = 0ul; i < nElmDomain; ++i) {
       sum += vec_val[i] * expr.derived()[i];
     }
+    END_CSYSVEC_PARFOR
 
     /*--- Update shared variable with "our" partial sum. ---*/
     atomicAdd(sum, dotRes);
@@ -314,6 +323,7 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
       const auto mpi_type = (sizeof(ScalarType) < sizeof(double)) ? MPI_FLOAT : MPI_DOUBLE;
       SelectMPIWrapper<ScalarType>::W::Allreduce(&sum, &dotRes, 1, mpi_type, MPI_SUM, SU2_MPI::GetComm());
     }
+    END_SU2_OMP_MASTER
 #endif
     /*--- Make view of result consistent across threads. ---*/
     SU2_OMP_BARRIER
@@ -440,3 +450,4 @@ class CSysVector : public VecExpr::CVecExpr<CSysVector<ScalarType>, ScalarType>
 };
 
 #undef CSYSVEC_PARFOR
+#undef END_CSYSVEC_PARFOR
diff --git a/Common/include/parallelization/omp_structure.cpp b/Common/include/parallelization/omp_structure.cpp
new file mode 100644
index 00000000000..6432b6bb482
--- /dev/null
+++ b/Common/include/parallelization/omp_structure.cpp
@@ -0,0 +1,61 @@
+/*!
+ * \file omp_structure.cpp
+ * \brief Source file counterpart for omp_structure.hpp.
+ * \note Contains OpDiLib initialization, finalization and includes the OpDiLib source file.
+ * \author J. Blühdorn
+ * \version 7.1.1 "Blackbird"
+ *
+ * SU2 Project Website: https://su2code.github.io
+ *
+ * The SU2 Project is maintained by the SU2 Foundation
+ * (http://su2foundation.org)
+ *
+ * Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
+ *
+ * SU2 is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * SU2 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "omp_structure.hpp"
+
+void omp_initialize() {
+#ifdef HAVE_OPDI
+#if !defined(HAVE_OMPT)
+  opdi::backend = new opdi::MacroBackend;
+  opdi::backend->init();
+#endif
+  opdi::logic = new opdi::OmpLogic;
+  opdi::logic->init();
+  su2double::getGlobalTape().initialize();
+  opdi::tool = new CoDiOpDiTool<su2double>;
+  opdi::tool->init();
+#endif
+}
+
+void omp_finalize() {
+#ifdef HAVE_OPDI
+  opdi::tool->finalize();
+  su2double::getGlobalTape().finalize();
+  opdi::logic->finalize();
+  opdi::backend->finalize();
+  delete opdi::tool;
+  delete opdi::logic;
+#if !defined(HAVE_OMPT)
+  delete opdi::backend;
+#endif
+#endif
+}
+
+#ifdef HAVE_OPDI
+#include "opdi.cpp"
+#endif
diff --git a/Common/include/parallelization/omp_structure.hpp b/Common/include/parallelization/omp_structure.hpp
index 42a96d5428f..c96d7383f94 100644
--- a/Common/include/parallelization/omp_structure.hpp
+++ b/Common/include/parallelization/omp_structure.hpp
@@ -12,7 +12,7 @@
  *       e.g. SU2_OMP_PARALLEL. Exotic pragmas of limited portability should be
  *       defined here with suitable fallback versions to limit the spread of
  *       compiler tricks in other areas of the code.
- * \author P. Gomes
+ * \author P. Gomes, J. Blühdorn
  * \version 7.1.1 "Blackbird"
  *
  * SU2 Project Website: https://su2code.github.io
@@ -38,7 +38,9 @@
 
 #pragma once
 
-#include "../basic_types/datatype_structure.hpp"
+#include <cstddef>
+
+#include "../code_config.hpp"
 
 #if defined(_MSC_VER)
 #define PRAGMIZE(X) __pragma(X)
@@ -46,12 +48,19 @@
 #define PRAGMIZE(X) _Pragma(#X)
 #endif
 
-/*--- Detect compilation with OpenMP support, protect agaisnt
- *    using OpenMP with Reverse AD (not supported yet). ---*/
-#if defined(_OPENMP) && !defined(CODI_REVERSE_TYPE)
-#define HAVE_OMP
+#if defined(HAVE_OMP)
 #include <omp.h>
 
+#if defined(HAVE_OPDI)
+#if defined(HAVE_OMPT)
+#include "opdi/backend/ompt/omptBackend.hpp"
+#else
+#include "opdi/backend/macro/macroBackend.hpp"
+#endif
+#include "codi/externals/codiOpdiTool.hpp"
+#include "opdi.hpp"
+#endif
+
 /*--- The generic start of OpenMP constructs. ---*/
 #define SU2_OMP(ARGS) PRAGMIZE(omp ARGS)
 
@@ -106,6 +115,11 @@ inline void omp_destroy_lock(omp_lock_t*){}
 
 #endif // end OpenMP detection
 
+/*--- Initialization and finalization ---*/
+
+void omp_initialize();
+void omp_finalize();
+
 /*--- Detect SIMD support (version 4+, after Jul 2013). ---*/
 #ifdef _OPENMP
 #if _OPENMP >= 201307
@@ -125,8 +139,11 @@ inline void omp_destroy_lock(omp_lock_t*){}
 
 /*--- Convenience macros (do not use excessive nesting). ---*/
 
-#define SU2_OMP_MASTER SU2_OMP(master)
 #define SU2_OMP_ATOMIC SU2_OMP(atomic)
+
+#ifndef HAVE_OPDI
+
+#define SU2_OMP_MASTER SU2_OMP(master)
 #define SU2_OMP_BARRIER SU2_OMP(barrier)
 #define SU2_OMP_CRITICAL SU2_OMP(critical)
 
@@ -134,9 +151,40 @@ inline void omp_destroy_lock(omp_lock_t*){}
 #define SU2_OMP_PARALLEL_(ARGS) SU2_OMP(parallel ARGS)
 #define SU2_OMP_PARALLEL_ON(NTHREADS) SU2_OMP(parallel num_threads(NTHREADS))
 
+#define SU2_OMP_FOR_(ARGS) SU2_OMP(for ARGS)
 #define SU2_OMP_FOR_DYN(CHUNK) SU2_OMP(for schedule(dynamic,CHUNK))
 #define SU2_OMP_FOR_STAT(CHUNK) SU2_OMP(for schedule(static,CHUNK))
 
+#define SU2_NOWAIT nowait
+
+#define END_SU2_OMP_MASTER
+#define END_SU2_OMP_CRITICAL
+#define END_SU2_OMP_PARALLEL
+#define END_SU2_OMP_FOR
+
+#else
+
+#define SU2_OMP_MASTER OPDI_MASTER()
+#define SU2_OMP_BARRIER OPDI_BARRIER()
+#define SU2_OMP_CRITICAL OPDI_CRITICAL()
+
+#define SU2_OMP_PARALLEL OPDI_PARALLEL()
+#define SU2_OMP_PARALLEL_(ARGS) OPDI_PARALLEL(ARGS)
+#define SU2_OMP_PARALLEL_ON(NTHREADS) OPDI_PARALLEL(num_threads(NTHREADS))
+
+#define SU2_OMP_FOR_(ARGS) OPDI_FOR(ARGS)
+#define SU2_OMP_FOR_DYN(CHUNK) OPDI_FOR(schedule(dynamic,CHUNK))
+#define SU2_OMP_FOR_STAT(CHUNK) OPDI_FOR(schedule(static,CHUNK))
+
+#define SU2_NOWAIT OPDI_NOWAIT
+
+#define END_SU2_OMP_MASTER OPDI_END_MASTER
+#define END_SU2_OMP_CRITICAL OPDI_END_CRITICAL
+#define END_SU2_OMP_PARALLEL OPDI_END_PARALLEL
+#define END_SU2_OMP_FOR OPDI_END_FOR
+
+#endif
+
 /*--- Convenience functions (e.g. to compute chunk sizes). ---*/
 
 /*!
@@ -184,6 +232,7 @@ void parallelCopy(size_t size, const T* src, U* dst)
 {
   SU2_OMP_FOR_STAT(2048)
   for(size_t i=0; i<size; ++i) dst[i] = src[i];
+  END_SU2_OMP_FOR
 }
 
 /*!
@@ -197,6 +246,7 @@ void parallelSet(size_t size, T val, U* dst)
 {
   SU2_OMP_FOR_STAT(2048)
   for(size_t i=0; i<size; ++i) dst[i] = val;
+  END_SU2_OMP_FOR
 }
 
 /*!
@@ -210,6 +260,7 @@ inline void atomicAdd(T rhs, T& lhs)
 {
   SU2_OMP_CRITICAL
   lhs += rhs;
+  END_SU2_OMP_CRITICAL
 }
 template<class T, su2enable_if<std::is_arithmetic<T>::value> = 0>
 inline void atomicAdd(T rhs, T& lhs)
diff --git a/Common/include/toolboxes/CLinearPartitioner.hpp b/Common/include/toolboxes/CLinearPartitioner.hpp
index 4a86acebf68..5e2a4d24dea 100644
--- a/Common/include/toolboxes/CLinearPartitioner.hpp
+++ b/Common/include/toolboxes/CLinearPartitioner.hpp
@@ -52,63 +52,68 @@ class CLinearPartitioner {
   vector<unsigned long> cumulativeSizeBeforeRank; /*!< \brief Vector containing the cumulative size of all linear partitions before the current rank. */
 
 public:
+  CLinearPartitioner() = default;
 
   /*!
-   * \brief Constructor of the CLinearPartitioner class.
-   * \param[in] val_global_count - global count to be linearly partitioned.
-   * \param[in] val_offset - offset from 0 for the first index on rank 0 (typically 0).
-   * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false).
+   * \brief Constructor of the CLinearPartitioner class, see Initialize.
    */
-  CLinearPartitioner(unsigned long val_global_count,
-                     unsigned long val_offset,
-                     bool          isDisjoint = false);
+  CLinearPartitioner(unsigned long global_count,
+                     unsigned long offset,
+                     bool isDisjoint = false) {
+    Initialize(global_count, offset, isDisjoint);
+  }
 
   /*!
-   * \brief Destructor of the CLinearPartitioner class.
+   * \brief Initialize the CLinearPartitioner class.
+   * \param[in] global_count - global count to be linearly partitioned.
+   * \param[in] offset - offset from 0 for the first index on rank 0 (typically 0).
+   * \param[in] isDisjoint - boolean controlling whether the linear partitions should be disjoint (default is false).
    */
-  ~CLinearPartitioner(void);
+  void Initialize(unsigned long global_count,
+                  unsigned long offset,
+                  bool isDisjoint = false);
 
   /*!
    * \brief Get the rank that owns the index based on the linear partitioning.
-   * \param[in] val_index - Current index.
+   * \param[in] index - Current index.
    * \returns Owning rank for the current index based on linear partitioning.
    */
-  unsigned long GetRankContainingIndex(unsigned long val_index);
+  unsigned long GetRankContainingIndex(unsigned long index) const;
 
   /*!
    * \brief Get the first index of the current rank's linear partition.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns First index of the current rank's linear partition.
    */
-  inline unsigned long GetFirstIndexOnRank(int val_rank) {
-    return firstIndex[val_rank];
+  inline unsigned long GetFirstIndexOnRank(int rank) const {
+    return firstIndex[rank];
   }
 
   /*!
    * \brief Get the last index of the current rank's linear partition.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns Last index of the current rank's linear partition.
    */
-  inline unsigned long GetLastIndexOnRank(int val_rank) {
-    return lastIndex[val_rank];
+  inline unsigned long GetLastIndexOnRank(int rank) const {
+    return lastIndex[rank];
   }
 
   /*!
    * \brief Get the total size of the current rank's linear partition.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns Size of the current rank's linear partition.
    */
-  inline unsigned long GetSizeOnRank(int val_rank) {
-    return sizeOnRank[val_rank];
+  inline unsigned long GetSizeOnRank(int rank) const {
+    return sizeOnRank[rank];
   }
 
   /*!
    * \brief Get the cumulative size of all linear partitions before the current rank.
-   * \param[in] val_rank - MPI rank identifier.
+   * \param[in] rank - MPI rank identifier.
    * \returns Cumulative size of all linear partitions before the current rank.
    */
-  inline unsigned long GetCumulativeSizeBeforeRank(int val_rank) {
-    return cumulativeSizeBeforeRank[val_rank];
+  inline unsigned long GetCumulativeSizeBeforeRank(int rank) const {
+    return cumulativeSizeBeforeRank[rank];
   }
 
 };
diff --git a/Common/include/toolboxes/allocation_toolbox.hpp b/Common/include/toolboxes/allocation_toolbox.hpp
index f513752ba26..2d5d3bb4409 100644
--- a/Common/include/toolboxes/allocation_toolbox.hpp
+++ b/Common/include/toolboxes/allocation_toolbox.hpp
@@ -36,6 +36,8 @@
 #include <stdlib.h>
 #endif
 
+#include <cstring>
+
 #include <cassert>
 
 namespace MemoryAllocation
@@ -55,9 +57,10 @@ inline constexpr size_t round_up(size_t multiple, size_t x)
  * \brief Aligned memory allocation compatible across platforms.
  * \param[in] alignment, in bytes, of the memory being allocated.
  * \param[in] size, also in bytes.
+ * \tparam ZeroInit, initialize memory to 0.
  * \return Pointer to memory, always use su2::aligned_free to deallocate.
  */
-template<class T>
+template<class T, bool ZeroInit = false>
 inline T* aligned_alloc(size_t alignment, size_t size) noexcept
 {
   assert(is_power_of_two(alignment));
@@ -78,6 +81,7 @@ inline T* aligned_alloc(size_t alignment, size_t size) noexcept
 #else
   ptr = ::aligned_alloc(alignment, size);
 #endif
+  if (ZeroInit) memset(ptr, 0, size);
   return static_cast<T*>(ptr);
 }
 
diff --git a/Common/include/toolboxes/graph_toolbox.hpp b/Common/include/toolboxes/graph_toolbox.hpp
index d170cce5ae3..9dba7b4d955 100644
--- a/Common/include/toolboxes/graph_toolbox.hpp
+++ b/Common/include/toolboxes/graph_toolbox.hpp
@@ -166,6 +166,7 @@ class CCompressedSparsePattern {
     SU2_OMP_PARALLEL_(for schedule(static,roundUpDiv(getOuterSize(),omp_get_max_threads())))
     for(Index_t k = 0; k < getOuterSize(); ++k)
       m_diagPtr(k) = findInnerIdx(k,k);
+    END_SU2_OMP_PARALLEL
   }
 
   /*!
@@ -184,6 +185,7 @@ class CCompressedSparsePattern {
         assert(m_innerIdxTransp(k) != m_innerIdx.size() && "The pattern is not symmetric.");
       }
     }
+    END_SU2_OMP_PARALLEL
   }
 
   /*!
diff --git a/Common/lib/Makefile.am b/Common/lib/Makefile.am
index 2e698b72336..1e7a8761c6f 100644
--- a/Common/lib/Makefile.am
+++ b/Common/lib/Makefile.am
@@ -10,7 +10,7 @@
 # The SU2 Project is maintained by the SU2 Foundation 
 # (http://su2foundation.org)
 #
-# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -63,6 +63,7 @@ lib_sources = \
   ../src/grid_movement/CVolumetricMovement.cpp \
   ../src/grid_movement/CSurfaceMovement.cpp \
   ../include/parallelization/mpi_structure.cpp \
+  ../include/parallelization/omp_structure.cpp \
   ../src/basic_types/ad_structure.cpp \
   ../src/fem/fem_gauss_jacobi_quadrature.cpp \
   ../src/geometry/CGeometry.cpp \
diff --git a/Common/src/CConfig.cpp b/Common/src/CConfig.cpp
index f9df51d521a..14061dfbb86 100644
--- a/Common/src/CConfig.cpp
+++ b/Common/src/CConfig.cpp
@@ -4425,7 +4425,11 @@ void CConfig::SetPostprocessing(unsigned short val_software, unsigned short val_
 #if defined CODI_REVERSE_TYPE
   AD_Mode = YES;
 
+#if defined HAVE_OMP
+  AD::PreaccEnabled = false;
+#else
   AD::PreaccEnabled = AD_Preaccumulation;
+#endif
 
 #else
   if (AD_Mode == YES) {
diff --git a/Common/src/basic_types/ad_structure.cpp b/Common/src/basic_types/ad_structure.cpp
index 6739bbc1e73..18342e13a90 100644
--- a/Common/src/basic_types/ad_structure.cpp
+++ b/Common/src/basic_types/ad_structure.cpp
@@ -37,9 +37,8 @@ namespace AD {
   std::vector<su2double::GradientData> localInputValues;
   std::vector<su2double*> localOutputValues;
 
-  su2double::TapeType& globalTape = su2double::getGlobalTape();
-  su2double::TapeType::Position StartPosition, EndPosition;
-  std::vector<su2double::TapeType::Position> TapePositions;
+  TapePosition StartPosition, EndPosition;
+  std::vector<TapePosition> TapePositions;
 
   bool PreaccActive = false;
   bool PreaccEnabled = true;
diff --git a/Common/src/geometry/CGeometry.cpp b/Common/src/geometry/CGeometry.cpp
index d8fb07b0eed..caa11e9239a 100644
--- a/Common/src/geometry/CGeometry.cpp
+++ b/Common/src/geometry/CGeometry.cpp
@@ -400,7 +400,9 @@ void CGeometry::AllocateP2PComms(unsigned short countPerPoint) {
   delete [] bufS_P2PRecv;
   bufS_P2PRecv = new unsigned short[maxCountPerPoint*nPoint_P2PRecv[nP2PRecv]] ();
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
 }
 
@@ -504,6 +506,7 @@ void CGeometry::PostP2PRecvs(CGeometry *geometry,
     }
 
   }
+  END_SU2_OMP_MASTER
 
 }
 
@@ -601,6 +604,7 @@ void CGeometry::PostP2PSends(CGeometry *geometry,
     }
 
   }
+  END_SU2_OMP_MASTER
 
 }
 
@@ -736,6 +740,7 @@ void CGeometry::InitiateComms(CGeometry *geometry,
           break;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Launch the point-to-point MPI send for this message. ---*/
 
@@ -782,6 +787,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
 
     SU2_OMP_MASTER
     SU2_MPI::Waitany(nP2PRecv, req_P2PRecv, &ind, &status);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Once we have recv'd a message, get the source rank. ---*/
@@ -839,6 +845,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
           break;
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Verify that all non-blocking point-to-point sends have finished.
@@ -848,6 +855,7 @@ void CGeometry::CompleteComms(CGeometry *geometry,
 #ifdef HAVE_MPI
   SU2_OMP_MASTER
   SU2_MPI::Waitall(nP2PSend, req_P2PSend, MPI_STATUS_IGNORE);
+  END_SU2_OMP_MASTER
 #endif
   SU2_OMP_BARRIER
 
@@ -1226,7 +1234,9 @@ void CGeometry::AllocatePeriodicComms(unsigned short countPerPeriodicPoint) {
   delete [] bufS_PeriodicRecv;
   bufS_PeriodicRecv = new unsigned short[nRecv] ();
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
@@ -1283,6 +1293,7 @@ void CGeometry::PostPeriodicRecvs(CGeometry *geometry,
     }
 
   }
+  END_SU2_OMP_MASTER
 
 #endif
 
@@ -1337,7 +1348,8 @@ void CGeometry::PostPeriodicSends(CGeometry *geometry,
                      CURRENT_FUNCTION);
       break;
   }
-  } // end master
+  }
+  END_SU2_OMP_MASTER
 #else
 
   /*--- Copy my own rank's data into the recv buffer directly in serial. ---*/
@@ -2480,44 +2492,24 @@ void CGeometry::ComputeAirfoil_Section(su2double *Plane_P0, su2double *Plane_Nor
 
 }
 
-void CGeometry::RegisterCoordinates(CConfig *config) const {
-  unsigned short iDim;
-  unsigned long iPoint;
-  bool input = true;
-  bool push_index = config->GetMultizone_Problem()? false : true;
+void CGeometry::RegisterCoordinates(const CConfig *config) const {
+  const bool input = true;
+  const bool push_index = config->GetMultizone_Problem()? false : true;
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++) {
-    for (iDim = 0; iDim < nDim; iDim++) {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
+    for (auto iDim = 0u; iDim < nDim; iDim++) {
       AD::RegisterInput(nodes->GetCoord(iPoint)[iDim], push_index);
     }
     if(!push_index) {
       nodes->SetIndex(iPoint, input);
     }
   }
-}
-
-void CGeometry::RegisterOutput_Coordinates(CConfig *config) const{
-  unsigned short iDim;
-  unsigned long iPoint;
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    if(config->GetMultizone_Problem()) {
-      for (iDim = 0; iDim < nDim; iDim++) {
-        AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]);
-      }
-    }
-    else {
-      for (iDim = 0; iDim < nDim; iDim++) {
-        AD::RegisterOutput(nodes->GetCoord(iPoint)[iDim]);
-      }
-    }
-  }
+  END_SU2_OMP_FOR
 }
 
 void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config) {
 
-  unsigned short iMesh;
-
   geometry_container[MESH_0]->InitiateComms(geometry_container[MESH_0], config, COORDINATES);
   geometry_container[MESH_0]->CompleteComms(geometry_container[MESH_0], config, COORDINATES);
   if (config->GetDynamic_Grid()){
@@ -2529,7 +2521,7 @@ void CGeometry::UpdateGeometry(CGeometry **geometry_container, CConfig *config)
   geometry_container[MESH_0]->SetBoundControlVolume(config, UPDATE);
   geometry_container[MESH_0]->SetMaxLength(config);
 
-  for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) {
+  for (unsigned short iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) {
     /*--- Update the control volume structures ---*/
 
     geometry_container[iMesh]->SetControlVolume(config,geometry_container[iMesh-1], UPDATE);
@@ -3159,6 +3151,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       cg_elem[nDim*iElem+iDim] = 0.0;
     vol_elem[iElem] = 0.0;
   }
+  END_SU2_OMP_FOR
 
   /*--- Populate ---*/
   SU2_OMP_FOR_STAT(256)
@@ -3168,6 +3161,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       cg_elem[nDim*iElem_global+iDim] = elem[iElem]->GetCG(iDim);
     vol_elem[iElem_global] = elem[iElem]->GetVolume();
   }
+  END_SU2_OMP_FOR
 
 #ifdef HAVE_MPI
   /*--- Account for the duplication introduced by the halo elements and the
@@ -3175,10 +3169,12 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
   SU2_OMP_FOR_STAT(256)
   for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem)
     halo_detect[iElem] = 0;
+  END_SU2_OMP_FOR
 
   SU2_OMP_FOR_STAT(256)
   for(auto iElem=0ul; iElem<nElem; ++iElem)
     halo_detect[elem[iElem]->GetGlobalIndex()] = 1;
+  END_SU2_OMP_FOR
 
   /*--- Share with all processors ---*/
   SU2_OMP_MASTER
@@ -3195,6 +3191,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
     MPI_Allreduce(halo_detect.data(),char_buffer.data(),Global_nElemDomain,MPI_CHAR,MPI_SUM,SU2_MPI::GetComm());
     halo_detect.swap(char_buffer);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   SU2_OMP_FOR_STAT(256)
@@ -3204,6 +3201,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       cg_elem[nDim*iElem+iDim] /= numRepeat;
     vol_elem[iElem] /= numRepeat;
   }
+  END_SU2_OMP_FOR
 #endif
 
   /*--- SECOND: Each processor performs the average for its elements. For each
@@ -3223,11 +3221,13 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem)
       work_values[iElem] = 0.0;
+    END_SU2_OMP_FOR
 
     /*--- Populate ---*/
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<nElem; ++iElem)
       work_values[elem[iElem]->GetGlobalIndex()] = values[iElem];
+    END_SU2_OMP_FOR
 
 #ifdef HAVE_MPI
     /*--- Share with all processors ---*/
@@ -3237,6 +3237,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       SU2_MPI::Allreduce(work_values,buffer,Global_nElemDomain,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
       swap(buffer, work_values); delete [] buffer;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Account for duplication ---*/
@@ -3245,6 +3246,7 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
       su2double numRepeat = halo_detect[iElem];
       work_values[iElem] /= numRepeat;
     }
+    END_SU2_OMP_FOR
 #endif
 
     /*--- Filter ---*/
@@ -3308,9 +3310,11 @@ void CGeometry::FilterValuesAtElementCG(const vector<su2double> &filter_radius,
           SU2_MPI::Error("Unknown type of filter kernel",CURRENT_FUNCTION);
       }
     }
+    END_SU2_OMP_FOR
   }
 
-  } // end OpenMP parallel section
+  }
+  END_SU2_OMP_PARALLEL
 
   limited_searches /= kernels.size();
 
@@ -3342,13 +3346,16 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector<unsigned long> &neighbour
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<Global_nElemDomain; ++iElem)
       nFaces_elem[iElem] = 0;
+    END_SU2_OMP_FOR
 
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<nElem; ++iElem) {
       auto iElem_global = elem[iElem]->GetGlobalIndex();
       nFaces_elem[iElem_global] = elem[iElem]->GetnFaces();
     }
+    END_SU2_OMP_FOR
   }
+  END_SU2_OMP_PARALLEL
 #ifdef HAVE_MPI
   /*--- Share with all processors ---*/
   {
@@ -3378,6 +3385,7 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector<unsigned long> &neighbour
     /*--- Initialize ---*/
     SU2_OMP_FOR_STAT(256)
     for(auto iElem=0ul; iElem<matrix_size; ++iElem) neighbour_idx[iElem] = -1;
+    END_SU2_OMP_FOR
 
     /*--- Populate ---*/
     SU2_OMP_FOR_STAT(128)
@@ -3395,7 +3403,9 @@ void CGeometry::GetGlobalElementAdjacencyMatrix(vector<unsigned long> &neighbour
         }
       }
     }
+    END_SU2_OMP_FOR
   }
+  END_SU2_OMP_PARALLEL
 #ifdef HAVE_MPI
   /*--- Share with all processors ---*/
   {
@@ -3523,6 +3533,7 @@ void CGeometry::SetElemVolume()
     if(nDim==2) elem[iElem]->SetVolume(element->ComputeArea());
     else        elem[iElem]->SetVolume(element->ComputeVolume());
   }
+  END_SU2_OMP_FOR
 
   delete elements[0];
   delete elements[1];
@@ -3531,7 +3542,8 @@ void CGeometry::SetElemVolume()
     delete elements[3];
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CGeometry::SetRotationalVelocity(CConfig *config, bool print) {
diff --git a/Common/src/geometry/CMultiGridGeometry.cpp b/Common/src/geometry/CMultiGridGeometry.cpp
index d15a4d073a8..c027b51fcf8 100644
--- a/Common/src/geometry/CMultiGridGeometry.cpp
+++ b/Common/src/geometry/CMultiGridGeometry.cpp
@@ -1142,7 +1142,9 @@ void CMultiGridGeometry::SetControlVolume(CConfig *config, CGeometry *fine_grid,
     }
   }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_grid, unsigned short action) {
@@ -1184,7 +1186,9 @@ void CMultiGridGeometry::SetBoundControlVolume(CConfig *config, CGeometry *fine_
       if (Area == 0.0) for (iDim = 0; iDim < nDim; iDim++) NormalFace[iDim] = EPS*EPS;
     }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CMultiGridGeometry::SetCoord(CGeometry *geometry) {
@@ -1202,6 +1206,7 @@ void CMultiGridGeometry::SetCoord(CGeometry *geometry) {
     }
     nodes->SetCoord(Point_Coarse, Coordinates);
   }
+  END_SU2_OMP_FOR
 }
 
 void CMultiGridGeometry::SetMultiGridWallHeatFlux(CGeometry *geometry, unsigned short val_marker){
@@ -1320,6 +1325,7 @@ void CMultiGridGeometry::SetRestricted_GridVelocity(CGeometry *fine_mesh, CConfi
     for (unsigned short iDim = 0; iDim < nDim; iDim++)
       nodes->SetGridVel(Point_Coarse, iDim, Grid_Vel[iDim]);
   }
+  END_SU2_OMP_FOR
 }
 
 
diff --git a/Common/src/geometry/CPhysicalGeometry.cpp b/Common/src/geometry/CPhysicalGeometry.cpp
index 9496a093054..688972ce1f1 100644
--- a/Common/src/geometry/CPhysicalGeometry.cpp
+++ b/Common/src/geometry/CPhysicalGeometry.cpp
@@ -4375,7 +4375,10 @@ void CPhysicalGeometry::Check_IntElem_Orientation(const CConfig *config) {
       }
     }
 
-  }} // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_FOR
+  }
+  END_SU2_OMP_PARALLEL
 
   auto reduce = [](unsigned long& val) {
     unsigned long tmp = val;
@@ -4522,7 +4525,10 @@ void CPhysicalGeometry::Check_BoundElem_Orientation(const CConfig *config) {
         }
       }
     }
-  }} // end SU2_OMP_PARALLEL
+    END_SU2_OMP_FOR
+  }
+  }
+  END_SU2_OMP_PARALLEL
 
   auto reduce = [](unsigned long& val) {
     unsigned long tmp = val;
@@ -4698,6 +4704,7 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
     }
     nodes->SetElems(elems);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- Loop over all the points ---*/
@@ -4734,11 +4741,14 @@ void CPhysicalGeometry::SetPoint_Connectivity() {
     /*--- Set the number of neighbors variable, this is important for JST and multigrid in parallel. ---*/
     nodes->SetnNeighbor(iPoint, points[iPoint].size());
   }
+  END_SU2_OMP_FOR
 
   SU2_OMP_MASTER
   nodes->SetPoints(points);
+  END_SU2_OMP_MASTER
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CPhysicalGeometry::SetRCM_Ordering(CConfig *config) {
@@ -6635,6 +6645,7 @@ void CPhysicalGeometry::SetMaxLength(CConfig* config) {
     max_delta = GeometryToolbox::Distance(nDim, Coord_i, Coord_j);
     nodes->SetMaxLength(iPoint, max_delta);
   }
+  END_SU2_OMP_FOR
 
   InitiateComms(this, config, MAX_LENGTH);
   CompleteComms(this, config, MAX_LENGTH);
@@ -7515,10 +7526,12 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
     SU2_OMP_FOR_STAT(1024)
     for (auto iEdge = 0ul; iEdge < nEdge; iEdge++)
       edges->SetNormal(iEdge, ZeroArea);
+    END_SU2_OMP_FOR
 
     SU2_OMP_FOR_STAT(1024)
     for (auto iPoint = 0ul; iPoint < nPoint; iPoint++)
       nodes->SetVolume(iPoint, 0.0);
+    END_SU2_OMP_FOR
   }
 
   SU2_OMP_MASTER { /*--- The following is difficult to parallelize with threads. ---*/
@@ -7653,7 +7666,9 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
     if (nDim == 3) cout <<"Volume of the computational grid: "<< DomainVolume <<"."<< endl;
   }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   /*--- Check if there is a normal with null area ---*/
   SU2_OMP_FOR_STAT(1024)
@@ -7662,6 +7677,7 @@ void CPhysicalGeometry::SetControlVolume(CConfig *config, unsigned short action)
     su2double DefaultArea[MAXNDIM] = {EPS*EPS};
     if (Area2 == 0.0) edges->SetNormal(iEdge, DefaultArea);
   }
+  END_SU2_OMP_FOR
 }
 
 void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned short action) {
@@ -7673,6 +7689,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
     for (unsigned short iMarker = 0; iMarker < nMarker; iMarker++)
       for (unsigned long iVertex = 0; iVertex < nVertex[iMarker]; iVertex++)
         vertex[iMarker][iVertex]->SetZeroValues();
+    END_SU2_OMP_FOR
   }
 
   /*--- Loop over all the boundary elements ---*/
@@ -7738,6 +7755,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
       AD::EndPreacc();
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Check if there is a normal with null area ---*/
 
@@ -7749,6 +7767,7 @@ void CPhysicalGeometry::SetBoundControlVolume(const CConfig *config, unsigned sh
       if (Area2 == 0.0) vertex[iMarker][iVertex]->SetNormal(DefaultArea);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CPhysicalGeometry::VisualizeControlVolume(const CConfig *config) const {
@@ -11030,9 +11049,10 @@ void CPhysicalGeometry::SetWallDistance(const CConfig *config, CADTElemClass *Wa
         nodes->SetRoughnessHeight(iPoint, localRoughness);
       }
     }
+    END_SU2_OMP_FOR
 
   }
-  // end SU2_OMP_PARALLEL
+  END_SU2_OMP_PARALLEL
 }
 
 void CPhysicalGeometry::SetGlobalMarkerRoughness(const CConfig* config) {
diff --git a/Common/src/interface_interpolation/CIsoparametric.cpp b/Common/src/interface_interpolation/CIsoparametric.cpp
index b1ad55af5aa..6cf27e002dd 100644
--- a/Common/src/interface_interpolation/CIsoparametric.cpp
+++ b/Common/src/interface_interpolation/CIsoparametric.cpp
@@ -253,13 +253,16 @@ void CIsoparametric::SetTransferCoeff(const CConfig* const* config) {
       }
 
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       MaxDistance = max(MaxDistance, maxDist);
       ErrorCounter += errorCount;
       nGlobalVertexTarget += totalCount;
     }
-    } // end SU2_OMP_PARALLEL
+    END_SU2_OMP_CRITICAL
+    }
+    END_SU2_OMP_PARALLEL
 
   } // end nMarkerInt loop
 
diff --git a/Common/src/interface_interpolation/CMirror.cpp b/Common/src/interface_interpolation/CMirror.cpp
index 079bc917bbd..ba805a58c89 100644
--- a/Common/src/interface_interpolation/CMirror.cpp
+++ b/Common/src/interface_interpolation/CMirror.cpp
@@ -231,7 +231,8 @@ void CMirror::SetTransferCoeff(const CConfig* const* config) {
         }
       }
 
-    } // end target loop
+    }
+    END_SU2_OMP_PARALLEL
 
     /*--- Free the heap allocations. ---*/
     for (auto ptr : GlobalIndex) if (ptr != sendGlobalIndex.data()) delete [] ptr;
diff --git a/Common/src/interface_interpolation/CNearestNeighbor.cpp b/Common/src/interface_interpolation/CNearestNeighbor.cpp
index 5d5b00c30c3..0c292068101 100644
--- a/Common/src/interface_interpolation/CNearestNeighbor.cpp
+++ b/Common/src/interface_interpolation/CNearestNeighbor.cpp
@@ -158,13 +158,16 @@ void CNearestNeighbor::SetTransferCoeff(const CConfig* const* config) {
         target_vertex.coefficient[iDonor] = donorInfo[iDonor].dist/denom;
       }
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       totalTargetPoints += numTarget;
       AvgDistance += avgDist;
       MaxDistance = max(MaxDistance, maxDist);
     }
-    } // end SU2_OMP_PARALLEL
+    END_SU2_OMP_CRITICAL
+    }
+    END_SU2_OMP_PARALLEL
 
     delete[] Buffer_Send_Coord;
     delete[] Buffer_Send_GlobalPoint;
diff --git a/Common/src/interface_interpolation/CRadialBasisFunction.cpp b/Common/src/interface_interpolation/CRadialBasisFunction.cpp
index 2a58e3c107a..8beb3483ecf 100644
--- a/Common/src/interface_interpolation/CRadialBasisFunction.cpp
+++ b/Common/src/interface_interpolation/CRadialBasisFunction.cpp
@@ -218,6 +218,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) {
                              keepPolynomialRowVec[iMarkerInt], CinvTrucVec[iMarkerInt]);
     }
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Final loop over interface markers to compute the interpolation coefficients. ---*/
 
@@ -381,7 +382,7 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) {
         }
       }
     } // end target vertex loop
-
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       totalDonorPoints += totalDonors;
@@ -390,7 +391,9 @@ void CRadialBasisFunction::SetTransferCoeff(const CConfig* const* config) {
       AvgCorrection += sumCorr;
       MaxCorrection = max(MaxCorrection, maxCorr);
     }
-    } // end SU2_OMP_PARALLEL
+    END_SU2_OMP_CRITICAL
+    }
+    END_SU2_OMP_PARALLEL
 
     /*--- Free global data that will no longer be used. ---*/
     donorCoord.resize(0,0);
diff --git a/Common/src/linear_algebra/CSysMatrix.cpp b/Common/src/linear_algebra/CSysMatrix.cpp
index fd574c9dd53..c8bd4164d32 100644
--- a/Common/src/linear_algebra/CSysMatrix.cpp
+++ b/Common/src/linear_algebra/CSysMatrix.cpp
@@ -94,12 +94,10 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   if(npoint == 0) return;
 
   if(matrix != nullptr) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("CSysMatrix can only be initialized once.", CURRENT_FUNCTION);
   }
 
   if(nvar > MAXNVAR) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("nVar larger than expected, increase MAXNVAR.", CURRENT_FUNCTION);
   }
 
@@ -160,22 +158,17 @@ void CSysMatrix<ScalarType>::Initialize(unsigned long npoint, unsigned long npoi
   }
 
   /*--- Allocate data. ---*/
-#define ALLOC_AND_INIT(ptr,num) {\
-  ptr = MemoryAllocation::aligned_alloc<ScalarType>(64,num*sizeof(ScalarType));\
-  for(size_t k=0; k<num; ++k) ptr[k]=0.0; }
+  auto allocAndInit = [](ScalarType*& ptr, unsigned long num) {
+    ptr = MemoryAllocation::aligned_alloc<ScalarType,true>(64, num*sizeof(ScalarType));
+  };
 
-  ALLOC_AND_INIT(matrix, nnz*nVar*nEqn)
+  allocAndInit(matrix, nnz*nVar*nEqn);
 
   /*--- Preconditioners. ---*/
 
-  if (ilu_needed) {
-    ALLOC_AND_INIT(ILU_matrix, nnz_ilu*nVar*nEqn)
-  }
+  if (ilu_needed) allocAndInit(ILU_matrix, nnz_ilu*nVar*nEqn);
 
-  if (diag_needed) {
-    ALLOC_AND_INIT(invM, nPointDomain*nVar*nEqn);
-  }
-#undef ALLOC_AND_INIT
+  if (diag_needed) allocAndInit(invM, nPointDomain*nVar*nEqn);
 
   /*--- Thread parallel initialization. ---*/
 
@@ -293,6 +286,7 @@ void CSysMatrixComms::Initiate(const CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             bufDSend[buf_offset+iVar] = x(iPoint,iVar);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -330,6 +324,7 @@ void CSysMatrixComms::Initiate(const CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             bufDSend[buf_offset+iVar] = x(iPoint,iVar);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -372,6 +367,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
 
     SU2_OMP_MASTER
     SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Once we have recv'd a message, get the source rank. ---*/
@@ -411,6 +407,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             x(iPoint,iVar) = CSysMatrix<T>::template ActiveAssign<T>(bufDRecv[buf_offset+iVar]);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -450,6 +447,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
           for (auto iVar = 0ul; iVar < x.GetNVar(); iVar++)
             x(iPoint,iVar) += CSysMatrix<T>::template ActiveAssign<T>(bufDRecv[buf_offset+iVar]);
         }
+        END_SU2_OMP_FOR
         break;
       }
 
@@ -466,6 +464,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
 #ifdef HAVE_MPI
   SU2_OMP_MASTER
   SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);
+  END_SU2_OMP_MASTER
 #endif
   SU2_OMP_BARRIER
 
@@ -474,7 +473,7 @@ void CSysMatrixComms::Complete(CSysVector<T>& x, CGeometry *geometry,
 template<class ScalarType>
 void CSysMatrix<ScalarType>::SetValZero() {
   const auto size = nnz*nVar*nEqn;
-  const auto chunk = roundUpDiv(size,omp_get_max_threads());
+  const auto chunk = roundUpDiv(size,omp_get_num_threads());
   const auto begin = chunk * omp_get_thread_num();
   const auto mySize = min(chunk, size-begin) * sizeof(ScalarType);
   memset(&matrix[begin], 0, mySize);
@@ -487,6 +486,7 @@ void CSysMatrix<ScalarType>::SetValDiagonalZero() {
   for (auto iPoint = 0ul; iPoint < nPointDomain; ++iPoint)
     for (auto index = 0ul; index < nVar*nEqn; ++index)
       matrix[dia_ptr[iPoint]*nVar*nEqn + index] = 0.0;
+  END_SU2_OMP_FOR
 }
 
 template<class ScalarType>
@@ -598,11 +598,9 @@ void CSysMatrix<ScalarType>::MatrixVectorProduct(const CSysVector<ScalarType> &
   /*--- Some checks for consistency between CSysMatrix and the CSysVector<ScalarType>s ---*/
 #ifndef NDEBUG
   if ((nEqn != vec.GetNVar()) || (nVar != prod.GetNVar())) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("nVar values incompatible.", CURRENT_FUNCTION);
   }
   if (nPoint != prod.GetNBlk()) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("nPoint and nBlk values incompatible.", CURRENT_FUNCTION);
   }
 #endif
@@ -617,6 +615,7 @@ void CSysMatrix<ScalarType>::MatrixVectorProduct(const CSysVector<ScalarType> &
   for (auto row_i = 0ul; row_i < nPointDomain; row_i++) {
     RowProduct(vec, row_i, &prod[row_i*nVar]);
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization. ---*/
 
@@ -629,9 +628,10 @@ template<class ScalarType>
 void CSysMatrix<ScalarType>::BuildJacobiPreconditioner() {
 
   /*--- Build Jacobi preconditioner (M = D), compute and store the inverses of the diagonal blocks. ---*/
-  SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait)
+  SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
     InverseDiagonalBlock(iPoint, &(invM[iPoint*nVar*nVar]));
+  END_SU2_OMP_FOR
 
 }
 
@@ -644,6 +644,7 @@ void CSysMatrix<ScalarType>::ComputeJacobiPreconditioner(const CSysVector<Scalar
   SU2_OMP_FOR_DYN(omp_heavy_size)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
     MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]);
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
   CSysMatrixComms::Initiate(prod, geometry, config);
@@ -661,12 +662,14 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
     SU2_OMP_FOR_STAT(omp_light_size)
     for (auto iVar = 0ul; iVar < nnz*nVar*nVar; ++iVar)
       ILU_matrix[iVar] = matrix[iVar];
+    END_SU2_OMP_FOR
   }
   else {
     /*--- ILUn clear the ILU matrix first. ---*/
     SU2_OMP_FOR_STAT(omp_light_size)
     for (auto iVar = 0ul; iVar < nnz_ilu*nVar*nVar; iVar++)
       ILU_matrix[iVar] = 0.0;
+    END_SU2_OMP_FOR
 
     /*--- ILUn, traverse matrix to access its blocks
      *    sequentially and set them in the ILU matrix. ---*/
@@ -677,6 +680,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
         SetBlock_ILUMatrix(iPoint, jPoint, &matrix[index*nVar*nVar]);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Transform system in Upper Matrix ---*/
@@ -751,6 +755,7 @@ void CSysMatrix<ScalarType>::BuildILUPreconditioner() {
     InverseDiagonalBlock_ILUMatrix(end-1, &invM[(end-1)*nVar*nVar]);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -804,6 +809,7 @@ void CSysMatrix<ScalarType>::ComputeILUPreconditioner(const CSysVector<ScalarTyp
       MatrixVectorProduct(&invM[iPoint*nVar*nVar], aux_vec, &prod[iPoint*nVar]);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -842,6 +848,7 @@ void CSysMatrix<ScalarType>::ComputeLU_SGSPreconditioner(const CSysVector<Scalar
       Gauss_Elimination(iPoint, &prod[idx]);              // Solve D.x* = y
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -871,6 +878,7 @@ void CSysMatrix<ScalarType>::ComputeLU_SGSPreconditioner(const CSysVector<Scalar
       Gauss_Elimination(iPoint, &prod[idx]);            // Solve D.x* = y
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -1052,10 +1060,11 @@ void CSysMatrix<ScalarType>::ComputeLineletPreconditioner(const CSysVector<Scala
 
   /*--- Jacobi preconditioning where there is no linelet ---*/
 
-  SU2_OMP(for schedule(dynamic,omp_heavy_size) nowait)
+  SU2_OMP_FOR_(schedule(dynamic,omp_heavy_size) SU2_NOWAIT)
   for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++)
     if (!LineletBool[iPoint])
       MatrixVectorProduct(&(invM[iPoint*nVar*nVar]), &vec[iPoint*nVar], &prod[iPoint*nVar]);
+  END_SU2_OMP_FOR
 
   /*--- Solve each linelet using the Thomas algorithm ---*/
 
@@ -1143,6 +1152,7 @@ void CSysMatrix<ScalarType>::ComputeLineletPreconditioner(const CSysVector<Scala
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI Parallelization ---*/
 
@@ -1161,6 +1171,7 @@ void CSysMatrix<ScalarType>::ComputeResidual(const CSysVector<ScalarType> & sol,
     RowProduct(sol, iPoint, aux_vec);
     VectorSubtraction(aux_vec, &f[iPoint*nVar], &res[iPoint*nVar]);
   }
+  END_SU2_OMP_FOR
 }
 
 template<class ScalarType>
@@ -1262,6 +1273,7 @@ void CSysMatrix<ScalarType>::SetDiagonalAsColumnSum() {
       if (block_ji != block_ii) MatrixSubtraction(block_ii, block_ji, block_ii);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 template<class ScalarType>
@@ -1287,13 +1299,14 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
 
   if (edge_ptr) {
     /*--- The FV way. ---*/
-    SU2_OMP_FOR_DYN(omp_light_size/2)
+    SU2_OMP_FOR_DYN(omp_heavy_size*2)
     for (auto iEdge = 0ul; iEdge < edge_ptr.nEdge; ++iEdge) {
       auto bij = &matrix[edge_ptr(iEdge,0)*nVar*nVar];
       auto bji = &matrix[edge_ptr(iEdge,1)*nVar*nVar];
 
       swapAndTransp(nVar, bij, bji);
     }
+    END_SU2_OMP_FOR
   }
   else if (col_ptr) {
     /*--- If the column pointer was built. ---*/
@@ -1306,6 +1319,7 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
         swapAndTransp(nVar, bij, bji);
       }
     }
+    END_SU2_OMP_FOR
   }
   else {
     /*--- Slow fallback, needs to search for ji. ---*/
@@ -1320,6 +1334,7 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
         swapAndTransp(nVar, bij, bji);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Transpose the diagonal blocks. ---*/
@@ -1331,9 +1346,12 @@ void CSysMatrix<ScalarType>::TransposeInPlace() {
       for (auto j=0ul; j<i; ++j)
         std::swap(bii[i*nVar+j], bii[j*nVar+i]);
   }
+  END_SU2_OMP_FOR
 
 #ifdef HAVE_PASTIX
+  SU2_OMP_MASTER
   pastix_wrapper.SetTransposedSolve();
+  END_SU2_OMP_MASTER
 #endif
 }
 
@@ -1346,13 +1364,13 @@ void CSysMatrix<ScalarType>::MatrixMatrixAddition(ScalarType alpha, const CSysMa
             (nVar == B.nVar) && (nEqn == B.nEqn) && (nnz == B.nnz);
 
   if (!ok) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Matrices do not have compatible sparsity.", CURRENT_FUNCTION);
   }
 
   SU2_OMP_FOR_STAT(omp_light_size)
   for (auto i = 0ul; i < nnz*nVar*nEqn; ++i)
     matrix[i] += alpha*B.matrix[i];
+  END_SU2_OMP_FOR
 
 }
 
@@ -1366,9 +1384,9 @@ void CSysMatrix<ScalarType>::BuildPastixPreconditioner(CGeometry *geometry, cons
     pastix_wrapper.SetMatrix(nVar,nPoint,nPointDomain,row_ptr,col_ind,matrix);
     pastix_wrapper.Factorize(geometry, config, kind_fact);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 #else
-  SU2_OMP_MASTER
   SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
 #endif
 }
@@ -1380,12 +1398,12 @@ void CSysMatrix<ScalarType>::ComputePastixPreconditioner(const CSysVector<Scalar
   SU2_OMP_BARRIER
   SU2_OMP_MASTER
   pastix_wrapper.Solve(vec,prod);
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   CSysMatrixComms::Initiate(prod, geometry, config);
   CSysMatrixComms::Complete(prod, geometry, config);
 #else
-  SU2_OMP_MASTER
   SU2_MPI::Error("SU2 was not compiled with -DHAVE_PASTIX", CURRENT_FUNCTION);
 #endif
 }
diff --git a/Common/src/linear_algebra/CSysSolve.cpp b/Common/src/linear_algebra/CSysSolve.cpp
index 1adfc9197dc..66306655407 100644
--- a/Common/src/linear_algebra/CSysSolve.cpp
+++ b/Common/src/linear_algebra/CSysSolve.cpp
@@ -130,7 +130,6 @@ void CSysSolve<ScalarType>::ModGramSchmidt(int i, su2matrix<ScalarType>& Hsbg,
 
   if ((nrm <= 0.0) || (nrm != nrm)) {
     /*--- nrm is the result of a dot product, communications are implicitly handled. ---*/
-    SU2_OMP_MASTER
     SU2_MPI::Error("FGMRES orthogonalization failed, linear solver diverged.", CURRENT_FUNCTION);
   }
 
@@ -209,7 +208,6 @@ unsigned long CSysSolve<ScalarType>::CG_LinSolver(const CSysVector<ScalarType> &
   /*--- Check the subspace size ---*/
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
   }
 
@@ -230,6 +228,7 @@ unsigned long CSysSolve<ScalarType>::CG_LinSolver(const CSysVector<ScalarType> &
 
       cg_ready = true;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -350,12 +349,10 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
   /*---  Check the subspace size ---*/
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
   }
 
   if (m > 5000) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("FGMRES subspace is too large.", CURRENT_FUNCTION);
   }
 
@@ -371,6 +368,7 @@ unsigned long CSysSolve<ScalarType>::FGMRES_LinSolver(const CSysVector<ScalarTyp
         for (auto& z : Z) z.Initialize(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
       }
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -520,6 +518,7 @@ unsigned long CSysSolve<ScalarType>::RFGMRES_LinSolver(const CSysVector<ScalarTy
     xIsZero = false;
     tol_type = LinearToleranceType::ABSOLUTE;
   }
+  END_SU2_OMP_MASTER
 
   const ScalarType norm0 = b.norm(); // <- Has a barrier
 
@@ -545,7 +544,6 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
   /*--- Check the subspace size ---*/
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
   }
 
@@ -567,6 +565,7 @@ unsigned long CSysSolve<ScalarType>::BCGSTAB_LinSolver(const CSysVector<ScalarTy
 
       bcg_ready = true;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -710,7 +709,6 @@ unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarT
   const ScalarType omega = SU2_TYPE::GetValue(config->GetLinear_Solver_Smoother_Relaxation());
 
   if (m < 1) {
-    SU2_OMP_MASTER
     SU2_MPI::Error("Number of linear solver iterations must be greater than 0.", CURRENT_FUNCTION);
   }
 
@@ -730,6 +728,7 @@ unsigned long CSysSolve<ScalarType>::Smoother_LinSolver(const CSysVector<ScalarT
 
       smooth_ready = true;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -860,11 +859,14 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
   if (config->GetDiscrete_Adjoint()) {
 #ifdef CODI_REVERSE_TYPE
 
-    TapeActive = AD::globalTape.isActive();
-
-    AD::StartExtFunc(false, false);
+    TapeActive = AD::getGlobalTape().isActive();
 
-    AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize());
+    SU2_OMP_MASTER {
+      AD::StartExtFunc(false, false);
+      AD::SetExtFuncIn(&LinSysRes[0], LinSysRes.GetLocSize());
+    }
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
 
     AD::StopRecording();
 #endif
@@ -920,6 +922,7 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
     Residual = residual;
     Iterations = IterLinSol;
   }
+  END_SU2_OMP_MASTER
 
   HandleTemporariesOut(LinSysSol);
 
@@ -933,22 +936,6 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
     if (!mesh_deform) KindPrecond = config->GetKind_DiscAdj_Linear_Prec();
     else              KindPrecond = config->GetKind_Deform_Linear_Solver_Prec();
 
-    /*--- Start recording if it was stopped for the linear solver ---*/
-
-    AD::StartRecording();
-
-    AD::SetExtFuncOut(&LinSysSol[0], (int)LinSysSol.GetLocSize());
-
-#ifdef CODI_REVERSE_TYPE
-    AD::FuncHelper->addUserData(&LinSysRes);
-    AD::FuncHelper->addUserData(&LinSysSol);
-    AD::FuncHelper->addUserData(&Jacobian);
-    AD::FuncHelper->addUserData(geometry);
-    AD::FuncHelper->addUserData(config);
-    AD::FuncHelper->addUserData(this);
-    AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
-#endif
-
     /*--- Build preconditioner for the transposed Jacobian ---*/
 
     if (RequiresTranspose) Jacobian.TransposeInPlace();
@@ -972,7 +959,30 @@ unsigned long CSysSolve<ScalarType>::Solve(CSysMatrix<ScalarType> & Jacobian, co
         break;
     }
 
+    /*--- Start recording if it was stopped for the linear solver ---*/
+#ifdef CODI_REVERSE_TYPE
+    AD::StartRecording();
+    SU2_OMP_BARRIER
+
+    SU2_OMP_MASTER {
+      AD::SetExtFuncOut(&LinSysSol[0], LinSysSol.GetLocSize());
+      AD::FuncHelper->addUserData(&LinSysRes);
+      AD::FuncHelper->addUserData(&LinSysSol);
+      AD::FuncHelper->addUserData(&Jacobian);
+      AD::FuncHelper->addUserData(geometry);
+      AD::FuncHelper->addUserData(config);
+      AD::FuncHelper->addUserData(this);
+    }
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
+
+    AD::FuncHelper->addToTape(CSysSolve_b<ScalarType>::Solve_b);
+    SU2_OMP_BARRIER
+
+    SU2_OMP_MASTER
     AD::EndExtFunc();
+    END_SU2_OMP_MASTER
+#endif
   }
 
   return IterLinSol;
@@ -1059,7 +1069,10 @@ unsigned long CSysSolve<ScalarType>::Solve_b(CSysMatrix<ScalarType> & Jacobian,
 
   delete precond;
 
+  SU2_OMP_MASTER
   Iterations = IterLinSol;
+  END_SU2_OMP_MASTER
+
   return IterLinSol;
 
 }
diff --git a/Common/src/linear_algebra/CSysSolve_b.cpp b/Common/src/linear_algebra/CSysSolve_b.cpp
index 4804fdfb2ed..a2d737a502c 100644
--- a/Common/src/linear_algebra/CSysSolve_b.cpp
+++ b/Common/src/linear_algebra/CSysSolve_b.cpp
@@ -1,7 +1,7 @@
 /*!
  * \file CSysSolve_b.cpp
  * \brief Routines for the linear solver used in the reverse sweep of AD.
- * \author T. Albring
+ * \author T. Albring, J. Blühdorn
  * \version 7.1.1 "Blackbird"
  *
  * SU2 Project Website: https://su2code.github.io
@@ -37,36 +37,40 @@ void CSysSolve_b<ScalarType>::Solve_b(const codi::RealReverse::Real* x, codi::Re
                                       codi::DataStore* d) {
 
   CSysVector<su2double>* LinSysRes_b = nullptr;
-  d->getData(LinSysRes_b);
+  d->getDataByIndex(LinSysRes_b, 0);
 
   CSysVector<su2double>* LinSysSol_b = nullptr;
-  d->getData(LinSysSol_b);
+  d->getDataByIndex(LinSysSol_b, 1);
 
   CSysMatrix<ScalarType>* Jacobian = nullptr;
-  d->getData(Jacobian);
+  d->getDataByIndex(Jacobian, 2);
 
   CGeometry* geometry = nullptr;
-  d->getData(geometry);
+  d->getDataByIndex(geometry, 3);
 
   const CConfig* config = nullptr;
-  d->getData(config);
+  d->getDataByIndex(config, 4);
 
   CSysSolve<ScalarType>* solver = nullptr;
-  d->getData(solver);
+  d->getDataByIndex(solver, 5);
 
   /*--- Initialize the right-hand side with the gradient of the solution of the primal linear system ---*/
 
+  SU2_OMP_BARRIER
+  SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads()))
   for (unsigned long i = 0; i < n; i++) {
     (*LinSysRes_b)[i] = y_b[i];
     (*LinSysSol_b)[i] = 0.0;
   }
+  END_SU2_OMP_FOR
 
   solver->Solve_b(*Jacobian, *LinSysRes_b, *LinSysSol_b, geometry, config, false);
 
+  SU2_OMP_FOR_STAT(roundUpDiv(n,omp_get_num_threads()))
   for (unsigned long i = 0; i < n; i ++) {
-    x_b[i] = SU2_TYPE::GetValue(LinSysSol_b->operator [](i));
+    x_b[i] = SU2_TYPE::GetValue((*LinSysSol_b)[i]);
   }
-
+  END_SU2_OMP_FOR
 }
 
 template class CSysSolve_b<su2mixedfloat>;
diff --git a/Common/src/linear_algebra/CSysVector.cpp b/Common/src/linear_algebra/CSysVector.cpp
index 4477d8b3fe1..9cb66905fde 100644
--- a/Common/src/linear_algebra/CSysVector.cpp
+++ b/Common/src/linear_algebra/CSysVector.cpp
@@ -50,7 +50,7 @@ void CSysVector<ScalarType>::Initialize(unsigned long numBlk, unsigned long numB
 
   omp_chunk_size = computeStaticChunkSize(nElm, omp_get_max_threads(), OMP_MAX_SIZE);
 
-  if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc<ScalarType>(64, nElm * sizeof(ScalarType));
+  if (vec_val == nullptr) vec_val = MemoryAllocation::aligned_alloc<ScalarType,true>(64, nElm*sizeof(ScalarType));
 
   if (val != nullptr) {
     if (!valIsArray) {
@@ -63,6 +63,8 @@ void CSysVector<ScalarType>::Initialize(unsigned long numBlk, unsigned long numB
 
 template <class ScalarType>
 CSysVector<ScalarType>::~CSysVector() {
+  if (!std::is_trivial<ScalarType>::value)
+    for (auto i = 0ul; i < nElm; i++) vec_val[i].~ScalarType();
   MemoryAllocation::aligned_free(vec_val);
 }
 
diff --git a/Common/src/meson.build b/Common/src/meson.build
index 5dcbb57c66f..b3e0726e70c 100644
--- a/Common/src/meson.build
+++ b/Common/src/meson.build
@@ -3,7 +3,8 @@ common_src =files(['graph_coloring_structure.cpp',
            'CConfig.cpp',
            'basic_types/ad_structure.cpp',
            'wall_model.cpp',
-           '../include/parallelization/mpi_structure.cpp'])
+           '../include/parallelization/mpi_structure.cpp',
+           '../include/parallelization/omp_structure.cpp'])
 
 subdir('linear_algebra')
 subdir('toolboxes')
diff --git a/Common/src/toolboxes/CLinearPartitioner.cpp b/Common/src/toolboxes/CLinearPartitioner.cpp
index 6a45f4fb20f..16ac5373762 100644
--- a/Common/src/toolboxes/CLinearPartitioner.cpp
+++ b/Common/src/toolboxes/CLinearPartitioner.cpp
@@ -28,9 +28,9 @@
 
 #include "../../include/toolboxes/CLinearPartitioner.hpp"
 
-CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
-                                       unsigned long val_offset,
-                                       bool          isDisjoint) {
+void CLinearPartitioner::Initialize(unsigned long global_count,
+                                    unsigned long offset,
+                                    bool isDisjoint) {
 
   /*--- Store MPI size ---*/
 
@@ -48,10 +48,10 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
    balancing for any remainder points. ---*/
 
   unsigned long quotient = 0;
-  if (val_global_count >= (unsigned long)size)
-    quotient = val_global_count/size;
+  if (global_count >= (unsigned long)size)
+    quotient = global_count/size;
 
-  int remainder = int(val_global_count%size);
+  int remainder = int(global_count%size);
   for (int ii = 0; ii < size; ii++) {
     sizeOnRank[ii] = quotient + int(ii < remainder);
   }
@@ -63,7 +63,7 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
   unsigned long adjust = 0;
   if (isDisjoint) adjust = 1;
 
-  firstIndex[0] = val_offset;
+  firstIndex[0] = offset;
   lastIndex[0]  = firstIndex[0] + sizeOnRank[0] - adjust;
   cumulativeSizeBeforeRank[0] = 0;
   for (int iProc = 1; iProc < size; iProc++) {
@@ -72,17 +72,15 @@ CLinearPartitioner::CLinearPartitioner(unsigned long val_global_count,
     cumulativeSizeBeforeRank[iProc] = (cumulativeSizeBeforeRank[iProc-1] +
                                        sizeOnRank[iProc-1]);
   }
-  cumulativeSizeBeforeRank[size] = val_global_count;
+  cumulativeSizeBeforeRank[size] = global_count;
 
 }
 
-CLinearPartitioner::~CLinearPartitioner(void) { }
-
-unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index) {
+unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long index) const {
 
   /*--- Initial guess ---*/
 
-  unsigned long iProcessor = val_index/sizeOnRank[0];
+  unsigned long iProcessor = index/sizeOnRank[0];
 
   /*--- Guard against going over size. ---*/
 
@@ -91,11 +89,11 @@ unsigned long CLinearPartitioner::GetRankContainingIndex(unsigned long val_index
 
   /*--- Move up or down until we find the processor. ---*/
 
-  if (val_index >= cumulativeSizeBeforeRank[iProcessor])
-    while(val_index >= cumulativeSizeBeforeRank[iProcessor+1])
+  if (index >= cumulativeSizeBeforeRank[iProcessor])
+    while(index >= cumulativeSizeBeforeRank[iProcessor+1])
       iProcessor++;
   else
-    while(val_index < cumulativeSizeBeforeRank[iProcessor])
+    while(index < cumulativeSizeBeforeRank[iProcessor])
       iProcessor--;
 
   return iProcessor;
diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py
index cbf999f412e..1d4d187fb83 100755
--- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py
+++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncEulerSolution.py
@@ -6,22 +6,12 @@
 #  \author T. Economon
 #  \version 7.1.1 "Blackbird"
 #
-# The current SU2 release has been coordinated by the
-# SU2 International Developers Society <www.su2devsociety.org>
-# with selected contributions from the open-source community.
+# SU2 Project Website: https://su2code.github.io
 #
-# The main research teams contributing to the current release are:
-#  - Prof. Juan J. Alonso's group at Stanford University.
-#  - Prof. Piero Colonna's group at Delft University of Technology.
-#  - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology.
-#  - Prof. Alberto Guardone's group at Polytechnic University of Milan.
-#  - Prof. Rafael Palacios' group at Imperial College London.
-#  - Prof. Vincent Terrapon's group at the University of Liege.
-#  - Prof. Edwin van der Weide's group at the University of Twente.
-#  - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics.
+# The SU2 Project is maintained by the SU2 Foundation
+# (http://su2foundation.org)
 #
-# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon,
-#                      Tim Albring, and the SU2 contributors.
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
diff --git a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py
index 614c458c103..c38335336aa 100755
--- a/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py
+++ b/Common/src/toolboxes/MMS/CreateMMSSourceTerms/CMMSIncNSSolution.py
@@ -6,22 +6,12 @@
 #  \author T. Economon
 #  \version 7.1.1 "Blackbird"
 #
-# The current SU2 release has been coordinated by the
-# SU2 International Developers Society <www.su2devsociety.org>
-# with selected contributions from the open-source community.
+# SU2 Project Website: https://su2code.github.io
 #
-# The main research teams contributing to the current release are:
-#  - Prof. Juan J. Alonso's group at Stanford University.
-#  - Prof. Piero Colonna's group at Delft University of Technology.
-#  - Prof. Nicolas R. Gauger's group at Kaiserslautern University of Technology.
-#  - Prof. Alberto Guardone's group at Polytechnic University of Milan.
-#  - Prof. Rafael Palacios' group at Imperial College London.
-#  - Prof. Vincent Terrapon's group at the University of Liege.
-#  - Prof. Edwin van der Weide's group at the University of Twente.
-#  - Lab. of New Concepts in Aeronautics at Tech. Institute of Aeronautics.
+# The SU2 Project is maintained by the SU2 Foundation
+# (http://su2foundation.org)
 #
-# Copyright 2012-2020, Francisco D. Palacios, Thomas D. Economon,
-#                      Tim Albring, and the SU2 contributors.
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
diff --git a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
index 9b69188b570..38934f8a2d9 100644
--- a/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsGreenGauss.hpp
@@ -127,6 +127,7 @@ void computeGradientsGreenGauss(CSolver* solver,
 
     AD::EndPreacc();
   }
+  END_SU2_OMP_FOR
 
   /*--- Add boundary fluxes. ---*/
 
@@ -160,6 +161,7 @@ void computeGradientsGreenGauss(CSolver* solver,
             gradient(iPoint, iVar, iDim) -= flux * area[iDim];
         }
       }
+      END_SU2_OMP_FOR
     }
   }
 
diff --git a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
index 802de977dbf..dcd923901dc 100644
--- a/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
+++ b/SU2_CFD/include/gradients/computeGradientsLeastSquares.hpp
@@ -76,35 +76,35 @@ FORCEINLINE void solveLeastSquares(size_t iPoint,
 
   /*--- Entries of upper triangular matrix R. ---*/
 
+  if (periodic) {
+    AD::StartPreacc();
+    AD::SetPreaccIn(Rmatrix(iPoint,0,0));
+    AD::SetPreaccIn(Rmatrix(iPoint,0,1));
+    AD::SetPreaccIn(Rmatrix(iPoint,1,1));
+  }
+
   su2double r11 = Rmatrix(iPoint,0,0);
   su2double r12 = Rmatrix(iPoint,0,1);
   su2double r22 = Rmatrix(iPoint,1,1);
   su2double r13 = 0.0, r23 = 0.0, r33 = 1.0;
 
-  if (periodic) {
-    AD::StartPreacc();
-    AD::SetPreaccIn(r11);
-    AD::SetPreaccIn(r12);
-    AD::SetPreaccIn(r22);
-  }
-
   r11 = sqrt(max(r11, eps));
   r12 /= r11;
   r22 = sqrt(max(r22 - r12*r12, eps));
 
   if (nDim == 3) {
+    if (periodic) {
+      AD::SetPreaccIn(Rmatrix(iPoint,0,2));
+      AD::SetPreaccIn(Rmatrix(iPoint,1,2));
+      AD::SetPreaccIn(Rmatrix(iPoint,2,1));
+      AD::SetPreaccIn(Rmatrix(iPoint,2,2));
+    }
+
     r13 = Rmatrix(iPoint,0,2);
     r33 = Rmatrix(iPoint,2,2);
     const auto r23_a = Rmatrix(iPoint,1,2);
     const auto r23_b = Rmatrix(iPoint,2,1);
 
-    if (periodic) {
-      AD::SetPreaccIn(r13);
-      AD::SetPreaccIn(r23_a);
-      AD::SetPreaccIn(r23_b);
-      AD::SetPreaccIn(r33);
-    }
-
     r13 /= r11;
     r23 = r23_a/r22 - r23_b*r12/(r11*r22);
     r33 = sqrt(max(r33 - r23*r23 - r13*r13, eps));
@@ -284,6 +284,7 @@ void computeGradientsLeastSquares(CSolver* solver,
       solveLeastSquares<nDim, false>(iPoint, varBegin, varEnd, Rmatrix, gradient);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Correct the gradient values across any periodic boundaries. ---*/
 
@@ -300,6 +301,7 @@ void computeGradientsLeastSquares(CSolver* solver,
     SU2_OMP_FOR_DYN(chunkSize)
     for (size_t iPoint = 0; iPoint < nPointDomain; ++iPoint)
       solveLeastSquares<nDim, true>(iPoint, varBegin, varEnd, Rmatrix, gradient);
+    END_SU2_OMP_FOR
   }
 
   /*--- If no solver was provided we do not communicate ---*/
diff --git a/SU2_CFD/include/integration/CNewtonIntegration.hpp b/SU2_CFD/include/integration/CNewtonIntegration.hpp
index 8d9b4bfc3b6..1149e704fb6 100644
--- a/SU2_CFD/include/integration/CNewtonIntegration.hpp
+++ b/SU2_CFD/include/integration/CNewtonIntegration.hpp
@@ -33,12 +33,14 @@
 
 #ifdef HAVE_OMP
 #ifdef HAVE_OMP_SIMD
-#define CNEWTON_PARFOR SU2_OMP(for simd schedule(static,omp_chunk_size) nowait)
+#define CNEWTON_PARFOR SU2_OMP_FOR_(simd schedule(static,omp_chunk_size) SU2_NOWAIT)
 #else
-#define CNEWTON_PARFOR SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+#define CNEWTON_PARFOR SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
 #endif
+#define END_CNEWTON_PARFOR END_SU2_OMP_FOR
 #else
 #define CNEWTON_PARFOR SU2_OMP_SIMD
+#define END_CNEWTON_PARFOR
 #endif
 
 /*!
@@ -114,6 +116,7 @@ class CNewtonIntegration final : public CIntegration {
   inline void SetSolutionResult(CSysVector<T>& x) const {
     CNEWTON_PARFOR
     for (auto i = 0ul; i < x.GetLocSize(); ++i) x[i] = LinSysSol[i];
+    END_CNEWTON_PARFOR
   }
 
   /*--- Preconditioner objects for each active solver. ---*/
@@ -127,11 +130,13 @@ class CNewtonIntegration final : public CIntegration {
                                            unsigned long iters, Scalar& eps) const {
     CNEWTON_PARFOR
     for (auto i = 0ul; i < u.GetLocSize(); ++i) precondIn[i] = u[i];
+    END_CNEWTON_PARFOR
 
     iters = Preconditioner_impl(precondIn, precondOut, iters, eps);
 
     CNEWTON_PARFOR
     for (auto i = 0ul; i < u.GetLocSize(); ++i) v[i] = precondOut[i];
+    END_CNEWTON_PARFOR
     SU2_OMP_BARRIER
 
     return iters;
@@ -212,3 +217,4 @@ class CNewtonIntegration final : public CIntegration {
 };
 
 #undef CNEWTON_PARFOR
+#undef END_CNEWTON_PARFOR
diff --git a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
index 82b2485ffef..083222664b8 100644
--- a/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjFEAIteration.hpp
@@ -37,11 +37,23 @@ class CFEAIteration;
  * \brief Class for driving an iteration of the discrete adjoint FEM system.
  * \author R. Sanchez
  */
-class CDiscAdjFEAIteration : public CIteration {
+class CDiscAdjFEAIteration final : public CIteration {
  private:
   CFEAIteration* fem_iteration;    /*!< \brief Pointer to the primal iteration class. */
   unsigned short CurrentRecording; /*!< \brief Stores the current status of the recording. */
 
+  /*!
+   * \brief load solution for dynamic problems
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] solver - Container vector with all the solutions.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] val_iZone - Index of the zone.
+   * \param[in] val_iInst - Index of the instance.
+   * \param[in] val_DirectIter - Direct iteration to load.
+   */
+  void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
+                            unsigned short val_iInst, int val_DirectIter);
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -92,25 +104,6 @@ class CDiscAdjFEAIteration : public CIteration {
                CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
                unsigned short val_iInst) override;
 
-  /*!
-   * \brief Updates the containers for the discrete adjoint mean flow system.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   */
-  void Update(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-              CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-              CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-              unsigned short val_iInst) override;
-
   /*!
    * \brief Monitors the convergence and other metrics for the discrete adjoint mean flow system.
    * \param[in] output - Pointer to the COutput class.
@@ -180,26 +173,6 @@ class CDiscAdjFEAIteration : public CIteration {
   void InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short iZone,
                          unsigned short iInst) override;
 
-  /*!
-   * \brief Record a single iteration of the direct FEM system.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   * \param[in] kind_recording - The kind of recording (geometry or flow).
-   */
-  void SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-                    CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-                    CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                    unsigned short val_iInst, unsigned short kind_recording);
-
   /*!
    * \brief Record a single iteration of the direct FEM system.
    * \param[in] solver - Container vector with all the solutions.
@@ -209,7 +182,6 @@ class CDiscAdjFEAIteration : public CIteration {
    * \param[in] val_iInst - Index of the instance.
    * \param[in] kind_recording - The kind of recording (geometry or flow).
    */
-
   void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone,
                     unsigned short val_iInst, unsigned short kind_recording) override;
 
@@ -226,15 +198,4 @@ class CDiscAdjFEAIteration : public CIteration {
   void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config,
                        unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override;
 
-  /*!
-   * \brief load solution for dynamic problems
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   * \param[in] val_DirectIter - Direct iteration to load.
-   */
-  void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
-                            unsigned short val_iInst, int val_DirectIter) override;
 };
diff --git a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp
index 93d1a9d2052..8647f709285 100644
--- a/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjFluidIteration.hpp
@@ -37,10 +37,22 @@ class CFluidIteration;
  * \brief Class for driving an iteration of the discrete adjoint fluid system.
  * \author T. Economon
  */
-class CDiscAdjFluidIteration : public CIteration {
+class CDiscAdjFluidIteration final : public CIteration {
  private:
   const bool turbulent;                      /*!< \brief Stores the turbulent flag. */
 
+  /*!
+   * \brief load unsteady solution for unsteady problems
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] solver - Container vector with all the solutions.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] val_iZone - Index of the zone.
+   * \param[in] val_iInst - Index of the instance.
+   * \param[in] val_DirectIter - Direct iteration to load.
+   */
+  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
+                             unsigned short val_iInst, int val_DirectIter);
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -126,25 +138,6 @@ class CDiscAdjFluidIteration : public CIteration {
                CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
                unsigned short val_iInst) override;
 
-  /*!
-   * \brief Postprocess the discrete adjoint fluid iteration.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   */
-  void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-                   CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-                   CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                   unsigned short val_iInst) override;
-
   /*!
    * \brief Registers all input variables of the fluid iteration.
    * \param[in] solver - Container vector with all the solutions.
@@ -188,7 +181,6 @@ class CDiscAdjFluidIteration : public CIteration {
    * \param[in] val_iInst - Index of the instance.
    * \param[in] kind_recording - The kind of recording (geometry or flow).
    */
-
   void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone,
                     unsigned short val_iInst, unsigned short kind_recording) override;
 
@@ -205,15 +197,4 @@ class CDiscAdjFluidIteration : public CIteration {
   void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config,
                        unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override;
 
-  /*!
-   * \brief load unsteady solution for unsteady problems
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance.
-   * \param[in] val_DirectIter - Direct iteration to load.
-   */
-  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
-                             unsigned short val_iInst, int val_DirectIter) override;
 };
diff --git a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp
index 8c69d1162f8..ce981317897 100644
--- a/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp
+++ b/SU2_CFD/include/iteration/CDiscAdjHeatIteration.hpp
@@ -35,7 +35,20 @@
  * \brief Class for driving an iteration of the discrete adjoint heat equation.
  * \author O. Burghardt
  */
-class CDiscAdjHeatIteration : public CIteration {
+class CDiscAdjHeatIteration final : public CIteration {
+
+  /*!
+   * \brief load unsteady solution for unsteady problems
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] solver - Container vector with all the solutions.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] val_iZone - Index of the zone.
+   * \param[in] val_iInst - Index of the instance layer.
+   * \param[in] val_DirectIter - Direct iteration to load.
+   */
+  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
+                             unsigned short val_iInst, int val_DirectIter);
+
  public:
   /*!
    * \brief Constructor of the class.
@@ -108,31 +121,6 @@ class CDiscAdjHeatIteration : public CIteration {
                CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
                unsigned short val_iInst) override;
 
-  /*!
-   * \brief Outputs desired files and quantities for the discrete adjoint fluid system.
-   */
-  void Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned long InnerIter,
-              bool StopCalc, unsigned short val_iZone, unsigned short val_iInst);
-
-  /*!
-   * \brief Perform a single iteration of the adjoint fluid system.
-   * \param[in] output - Pointer to the COutput class.
-   * \param[in] integration - Container vector with all the integration methods.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] numerics - Description of the numerical method (the way in which the equations are solved).
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] surface_movement - Surface movement classes of the problem.
-   * \param[in] grid_movement - Volume grid movement classes of the problem.
-   * \param[in] FFDBox - FFD FFDBoxes of the problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance layer.
-   */
-  void Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry, CSolver***** solver,
-                   CNumerics****** numerics, CConfig** config, CSurfaceMovement** surface_movement,
-                   CVolumetricMovement*** grid_movement, CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                   unsigned short val_iInst) override;
-
   /*!
    * \brief Registers all input variables of the fluid iteration.
    * \param[in] solver - Container vector with all the solutions.
@@ -180,15 +168,4 @@ class CDiscAdjHeatIteration : public CIteration {
   void SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics, CConfig** config,
                        unsigned short iZone, unsigned short iInst, unsigned short kind_recording) override;
 
-  /*!
-   * \brief load unsteady solution for unsteady problems
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] val_iZone - Index of the zone.
-   * \param[in] val_iInst - Index of the instance layer.
-   * \param[in] val_DirectIter - Direct iteration to load.
-   */
-  void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config, unsigned short val_iZone,
-                             unsigned short val_iInst, int val_DirectIter) override;
 };
diff --git a/SU2_CFD/include/iteration/CIteration.hpp b/SU2_CFD/include/iteration/CIteration.hpp
index 961fdb9ed6a..05947c02402 100644
--- a/SU2_CFD/include/iteration/CIteration.hpp
+++ b/SU2_CFD/include/iteration/CIteration.hpp
@@ -280,12 +280,6 @@ class CIteration {
   virtual void RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config, COutput* output,
                               unsigned short iZone, unsigned short iInst) {}
 
-  virtual void LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                     unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {}
-
-  virtual void LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                    unsigned short val_iZone, unsigned short val_iInst, int val_DirectIter) {}
-
   virtual void SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config, unsigned short val_iZone,
                             unsigned short val_iInst, unsigned short kind_recording) {}
 };
diff --git a/SU2_CFD/include/limiters/CLimiterDetails.hpp b/SU2_CFD/include/limiters/CLimiterDetails.hpp
index 24a38c5cc53..85fc7a1322e 100644
--- a/SU2_CFD/include/limiters/CLimiterDetails.hpp
+++ b/SU2_CFD/include/limiters/CLimiterDetails.hpp
@@ -177,6 +177,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
       sharedMin.resize(varEnd) = largeNum;
       sharedMax.resize(varEnd) =-largeNum;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Per thread reduction. ---*/
@@ -185,7 +186,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
     localMin = largeNum;
     localMax =-largeNum;
 
-    SU2_OMP(for schedule(static, 512) nowait)
+    SU2_OMP_FOR_(schedule(static, 512) SU2_NOWAIT)
     for(size_t iPoint = 0; iPoint < geometry.GetnPointDomain(); ++iPoint)
     {
       for(size_t iVar = varBegin; iVar < varEnd; ++iVar)
@@ -194,6 +195,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
         localMax(iVar) = max(localMax(iVar), field(iPoint, iVar));
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Per rank reduction. ---*/
 
@@ -203,6 +205,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
       sharedMin(iVar) = min(sharedMin(iVar), localMin(iVar));
       sharedMax(iVar) = max(sharedMax(iVar), localMax(iVar));
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     /*--- Global reduction. ---*/
@@ -215,6 +218,7 @@ struct CLimiterDetails<VENKATAKRISHNAN_WANG>
       localMax = sharedMax;
       SU2_MPI::Allreduce(localMax.data(), sharedMax.data(), varEnd, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Compute eps^2 (each thread has its own copy of it). ---*/
diff --git a/SU2_CFD/include/limiters/computeLimiters.hpp b/SU2_CFD/include/limiters/computeLimiters.hpp
index e5324ecec27..e3016ab0d45 100644
--- a/SU2_CFD/include/limiters/computeLimiters.hpp
+++ b/SU2_CFD/include/limiters/computeLimiters.hpp
@@ -68,6 +68,7 @@ if (geometry.GetnDim() == 2) {\
       for(size_t iPoint = 0; iPoint < geometry.GetnPoint(); ++iPoint)
         for(size_t iVar = varBegin; iVar < varEnd; ++iVar)
          limiter(iPoint, iVar) = 1.0;
+      END_SU2_OMP_FOR
       break;
     }
     case BARTH_JESPERSEN:
diff --git a/SU2_CFD/include/limiters/computeLimiters_impl.hpp b/SU2_CFD/include/limiters/computeLimiters_impl.hpp
index b8725f355b8..2876c889f66 100644
--- a/SU2_CFD/include/limiters/computeLimiters_impl.hpp
+++ b/SU2_CFD/include/limiters/computeLimiters_impl.hpp
@@ -115,6 +115,7 @@ void computeLimiters_impl(CSolver* solver,
     for (size_t iPoint = 0; iPoint < nPoint; ++iPoint)
       for (size_t iVar = varBegin; iVar < varEnd; ++iVar)
         fieldMax(iPoint,iVar) = fieldMin(iPoint,iVar) = field(iPoint,iVar);
+    END_SU2_OMP_FOR
 
     for (size_t iPeriodic = 1; iPeriodic <= config.GetnMarker_Periodic()/2; ++iPeriodic)
     {
@@ -215,6 +216,7 @@ void computeLimiters_impl(CSolver* solver,
 
     AD::EndPreacc();
   }
+  END_SU2_OMP_FOR
 
   /*--- Account for periodic effects, take the minimum limiter on each periodic pair. ---*/
   if (periodic)
diff --git a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
index 7e0bd6f8870..9b62a3a89db 100644
--- a/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
+++ b/SU2_CFD/include/numerics_simd/flow/convection/centered.hpp
@@ -77,7 +77,7 @@ class CCenteredBase : public Base {
 
 public:
   /*!
-   * \brief Implementation of the base Roe flux.
+   * \brief Implementation of the base centered flux.
    */
   void ComputeFlux(Int iEdge,
                    const CConfig& config,
diff --git a/SU2_CFD/include/numerics_simd/util.hpp b/SU2_CFD/include/numerics_simd/util.hpp
index 7127912329b..21c99c7e529 100644
--- a/SU2_CFD/include/numerics_simd/util.hpp
+++ b/SU2_CFD/include/numerics_simd/util.hpp
@@ -115,14 +115,13 @@ FORCEINLINE Double squaredNorm(const VectorDbl<nDim>& vector) {
 template<size_t nDim>
 FORCEINLINE Double norm(const VectorDbl<nDim>& vector) { return sqrt(squaredNorm(vector)); }
 
+#ifndef CODI_REVERSE_TYPE
 /*!
  * \brief Gather a single variable from index iPoint of a 1D container.
  */
 template<class Container>
 FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) {
-  auto x = *vars.innerIter(iPoint);
-  AD::SetPreaccIn(x, Double::Size);
-  return x;
+  return *vars.innerIter(iPoint);
 }
 
 /*!
@@ -130,9 +129,7 @@ FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) {
  */
 template<size_t nVar, class Container>
 FORCEINLINE VectorDbl<nVar> gatherVariables(Int iPoint, const Container& vars) {
-  auto x = vars.template get<VectorDbl<nVar> >(iPoint);
-  AD::SetPreaccIn(x, nVar, Double::Size);
-  return x;
+  return vars.template get<VectorDbl<nVar> >(iPoint);
 }
 
 /*!
@@ -140,10 +137,55 @@ FORCEINLINE VectorDbl<nVar> gatherVariables(Int iPoint, const Container& vars) {
  */
 template<size_t nRows, size_t nCols, class Container>
 FORCEINLINE MatrixDbl<nRows,nCols> gatherVariables(Int iPoint, const Container& vars) {
-  auto x = vars.template get<MatrixDbl<nRows,nCols> >(iPoint);
-  AD::SetPreaccIn(x, nRows, nCols, Double::Size);
+  return vars.template get<MatrixDbl<nRows,nCols> >(iPoint);
+}
+#else
+
+namespace {
+  template<class Container, su2enable_if<Container::IsVector> = 0>
+  FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint); }
+
+  /*--- When getting 1 variable from a matrix container, we assume it is the first. ---*/
+  template<class Container, su2enable_if<!Container::IsVector> = 0>
+  FORCEINLINE const su2double& get(const Container& vars, unsigned long iPoint) { return vars(iPoint,0); }
+}
+
+template<class Container>
+FORCEINLINE Double gatherVariables(Int iPoint, const Container& vars) {
+  Double x;
+  for (size_t k=0; k<Double::Size; ++k) {
+    AD::SetPreaccIn(get(vars, iPoint[k]));
+    x[k] = get(vars, iPoint[k]);
+  }
+  return x;
+}
+
+template<size_t nVar, class Container>
+FORCEINLINE VectorDbl<nVar> gatherVariables(Int iPoint, const Container& vars) {
+  VectorDbl<nVar> x;
+  for (size_t i=0; i<nVar; ++i) {
+    for (size_t k=0; k<Double::Size; ++k) {
+      AD::SetPreaccIn(vars(iPoint[k],i));
+      x[i][k] = vars(iPoint[k],i);
+    }
+  }
+  return x;
+}
+
+template<size_t nRows, size_t nCols, class Container>
+FORCEINLINE MatrixDbl<nRows,nCols> gatherVariables(Int iPoint, const Container& vars) {
+  MatrixDbl<nRows,nCols> x;
+  for (size_t i=0; i<nRows; ++i) {
+    for (size_t j=0; j<nCols; ++j) {
+      for (size_t k=0; k<Double::Size; ++k) {
+        AD::SetPreaccIn(vars(iPoint[k],i,j));
+        x(i,j)[k] = vars(iPoint[k],i,j);
+      }
+    }
+  }
   return x;
 }
+#endif
 
 /*!
  * \brief Stop the AD preaccumulation.
diff --git a/SU2_CFD/include/output/COutput.hpp b/SU2_CFD/include/output/COutput.hpp
index 95a07335e7b..829c0698502 100644
--- a/SU2_CFD/include/output/COutput.hpp
+++ b/SU2_CFD/include/output/COutput.hpp
@@ -581,7 +581,6 @@ class COutput {
     volumeOutput_List.push_back(name);
   }
 
-
   /*!
    * \brief Set the value of a volume output field
    * \param[in] name - Name of the field.
diff --git a/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp
index 372069c8735..91df74fcd3c 100644
--- a/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CFEMDataSorter.hpp
@@ -41,11 +41,6 @@ class CFEMDataSorter final: public CParallelDataSorter{
    */
   CFEMDataSorter(CConfig *config, CGeometry *geometry, const vector<string> &valFieldNames);
 
-  /*!
-   * \brief Destructor
-   */
-  ~CFEMDataSorter() override;
-
   /*!
    * \brief Sort the connectivities (volume and surface) into data structures used for output file writing.
    * \param[in] config - Definition of the particular problem.
@@ -60,7 +55,7 @@ class CFEMDataSorter final: public CParallelDataSorter{
    * \return Global index of a specific point.
    */
   unsigned long GetGlobalIndex(unsigned long iPoint) const override{
-    return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint;
+    return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint;
   }
 
 private:
diff --git a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp
index f2f70e23a7e..cd561c6a7bf 100644
--- a/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CFVMDataSorter.hpp
@@ -34,10 +34,9 @@ class CFVMDataSorter final: public CParallelDataSorter{
 
 private:
 
-  int* Local_Halo; //!< Array containing the flag whether a point is a halo node
+  vector<int> Local_Halo; //!< Array containing the flag whether a point is a halo node
 
 public:
-
   /*!
    * \brief Constructor
    * \param[in] config - Pointer to the current config structure
@@ -46,11 +45,6 @@ class CFVMDataSorter final: public CParallelDataSorter{
    */
   CFVMDataSorter(CConfig *config, CGeometry *geometry, const vector<string> &valFieldNames);
 
-  /*!
-   * \brief Destructor
-   */
-  ~CFVMDataSorter() override;
-
   /*!
    * \brief Sort the connectivities (volume and surface) into data structures used for output file writing.
    * \param[in] config - Definition of the particular problem.
@@ -65,7 +59,7 @@ class CFVMDataSorter final: public CParallelDataSorter{
    * \return Global index of a specific point.
    */
   unsigned long GetGlobalIndex(unsigned long iPoint) const override {
-    return linearPartitioner->GetFirstIndexOnRank(rank) + iPoint;
+    return linearPartitioner.GetFirstIndexOnRank(rank) + iPoint;
   }
 
   /*!
diff --git a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
index 5122eed672e..1a22dbda832 100644
--- a/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CParallelDataSorter.hpp
@@ -31,6 +31,7 @@
 #include "../../../../Common/include/option_structure.hpp"
 #include "../../../../Common/include/toolboxes/CLinearPartitioner.hpp"
 #include <array>
+#include <cassert>
 
 class CGeometry;
 class CConfig;
@@ -41,12 +42,12 @@ class CParallelDataSorter{
   /*!
    * \brief The MPI rank
    */
-  int rank;
+  const int rank;
 
   /*!
    * \brief The MPI size, aka the number of processors.
    */
-  int size;
+  const int size;
 
   unsigned long nGlobalPointBeforeSort; //!< Global number of points without halos before sorting
   unsigned long nLocalPointsBeforeSort;   //!< Local number of points without halos before sorting on this proc
@@ -66,7 +67,20 @@ class CParallelDataSorter{
    * \brief Map that stores the index for each GEO_TYPE type where to find information
    * in the element arrays.
    */
-  static const map<unsigned short, unsigned short> TypeMap;
+  struct {
+    static unsigned short at(unsigned short type) {
+      switch(type) {
+        case LINE: return 0;
+        case TRIANGLE: return 1;
+        case QUADRILATERAL: return 2;
+        case TETRAHEDRON: return 3;
+        case HEXAHEDRON: return 4;
+        case PRISM: return 5;
+        case PYRAMID: return 6;
+        default: assert(false); return 0;
+      };
+    }
+  } TypeMap;
 
   unsigned long nPointsGlobal;   //!< Global number of points without halos
   unsigned long nElemGlobal;    //!< Global number of elems without halos
@@ -75,7 +89,7 @@ class CParallelDataSorter{
   unsigned long nElem;     //!< Local number of elements
   unsigned long nConn;     //!< Local size of the connectivity array
 
-  CLinearPartitioner* linearPartitioner;  //!< Linear partitioner based on the global number of points.
+  CLinearPartitioner linearPartitioner;  //!< Linear partitioner based on the global number of points.
 
   unsigned short GlobalField_Counter;  //!< Number of output fields
 
@@ -88,11 +102,8 @@ class CParallelDataSorter{
   int *nElemConn_Send;                 //!< Number of element connectivity this processor has to send to other processors
   int *nElemConn_Cum;                  //!< Cumulative number of element connectivity entries
   unsigned long *Index;                //!< Index each point has in the send buffer
-  su2double *connSend;                 //!< Send buffer holding the data that will be send to other processors
-  passivedouble *passiveDoubleBuffer;  //!< Buffer holding the sorted, partitioned data as passivedouble types
-  su2double     *doubleBuffer;         //!< Buffer holding the sorted, partitioned data as su2double types
-  /// Pointer used to allocate the memory used for ::passiveDoubleBuffer and ::doubleBuffer.
-  char *dataBuffer;
+  passivedouble *connSend;             //!< Send buffer holding the data that will be send to other processors
+  passivedouble *dataBuffer;           //!< Buffer holding the sorted, partitioned data as passivedouble types
   unsigned long *idSend;               //!< Send buffer holding global indices that will be send to other processors
   int nSends,                          //!< Number of sends
   nRecvs;                              //!< Number of receives
@@ -243,7 +254,7 @@ class CParallelDataSorter{
    * \return The beginning node ID.
    */
   virtual unsigned long GetNodeBegin(unsigned short rank) const {
-    return linearPartitioner->GetFirstIndexOnRank(rank);
+    return linearPartitioner.GetFirstIndexOnRank(rank);
   }
 
   /*!
@@ -252,7 +263,7 @@ class CParallelDataSorter{
    * \return The ending node ID.
    */
   unsigned long GetNodeEnd(unsigned short rank) const {
-    return linearPartitioner->GetLastIndexOnRank(rank);
+    return linearPartitioner.GetLastIndexOnRank(rank);
   }
 
   /*!
@@ -261,13 +272,13 @@ class CParallelDataSorter{
    * \input iPoint - the point ID.
    * \return the value of the data field at a point.
    */
-  passivedouble GetData(unsigned short iField, unsigned long iPoint) const  {return passiveDoubleBuffer[iPoint*GlobalField_Counter + iField];}
+  passivedouble GetData(unsigned short iField, unsigned long iPoint) const  {return dataBuffer[iPoint*GlobalField_Counter + iField];}
 
   /*!
    * \brief Get the pointer to the sorted linear partitioned data.
    * \return Pointer to the sorted data.
    */
-  const passivedouble *GetData() const {return passiveDoubleBuffer;}
+  const passivedouble *GetData() const {return dataBuffer;}
 
   /*!
    * \brief Get the global index of a point.
@@ -281,14 +292,14 @@ class CParallelDataSorter{
    * \input rank - the processor rank.
    * \return The cumulated number of points up to certain processor rank.
    */
-  virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner->GetCumulativeSizeBeforeRank(rank);}
+  virtual unsigned long GetnPointCumulative(unsigned short rank) const {return linearPartitioner.GetCumulativeSizeBeforeRank(rank);}
 
   /*!
    * \brief Get the linear number of points
    * \input rank - the processor rank.
    * \return The linear number of points up to certain processor rank.
    */
-  unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner->GetSizeOnRank(rank);}
+  unsigned long GetnPointLinear(unsigned short rank) const {return linearPartitioner.GetSizeOnRank(rank);}
 
   /*!
    * \brief Check whether the current connectivity is sorted (i.e. if SortConnectivity has been called)
@@ -305,10 +316,10 @@ class CParallelDataSorter{
    * \param[in] data - Value of the field
    */
   void SetUnsorted_Data(unsigned long iPoint, unsigned short iField, su2double data){
-    connSend[Index[iPoint] + iField] = data;
+    connSend[Index[iPoint] + iField] = SU2_TYPE::GetValue(data);
   }
 
-  su2double GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const {
+  passivedouble GetUnsorted_Data(unsigned long iPoint, unsigned short iField) const {
     return connSend[Index[iPoint] + iField];
   }
 
@@ -318,7 +329,7 @@ class CParallelDataSorter{
    * \return The rank/processor number.
    */
   virtual unsigned short FindProcessor(unsigned long iPoint) const {
-    return linearPartitioner->GetRankContainingIndex(iPoint);
+    return linearPartitioner.GetRankContainingIndex(iPoint);
   }
 
   /*!
diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp
index 7775761f81c..5d2e7481364 100644
--- a/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CSurfaceFEMDataSorter.hpp
@@ -31,7 +31,7 @@
 
 class CSurfaceFEMDataSorter final: public CParallelDataSorter{
 
-  CFEMDataSorter* volumeSorter;                  //!< Pointer to the volume sorter instance
+  const CFEMDataSorter* volumeSorter;            //!< Pointer to the volume sorter instance
   vector<unsigned long> globalSurfaceDOFIDs;     //!< Structure to map the local sorted point ID to the global point ID
   vector<unsigned long> nSurfaceDOFsRanks;       //!< Number of points on each rank
 
@@ -43,12 +43,7 @@ class CSurfaceFEMDataSorter final: public CParallelDataSorter{
    * \param[in] geometry - Pointer to the current geometry
    * \param[in] valVolumeSorter - The datasorter containing the volume data
    */
-  CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter);
-
-  /*!
-   * \brief Destructor
-   */
-  ~CSurfaceFEMDataSorter() override;
+  CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter);
 
   /*!
    * \brief Sort the output data for each grid node into a linear partitioning across all processors.
diff --git a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp
index dd6132c4248..d65a2a03260 100644
--- a/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp
+++ b/SU2_CFD/include/output/filewriter/CSurfaceFVMDataSorter.hpp
@@ -31,7 +31,7 @@
 
 class CSurfaceFVMDataSorter final: public CParallelDataSorter{
 
-  CFVMDataSorter* volumeSorter;                    //!< Pointer to the volume sorter instance
+  const CFVMDataSorter* volumeSorter;               //!< Pointer to the volume sorter instance
   map<unsigned long,unsigned long> Renumber2Global; //! Structure to map the local sorted point ID to the global point ID
 public:
 
@@ -41,12 +41,7 @@ class CSurfaceFVMDataSorter final: public CParallelDataSorter{
    * \param[in] geometry - Pointer to the current geometry
    * \param[in] valVolumeSorter - The datasorter containing the volume data
    */
-  CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, CFVMDataSorter* valVolumeSorter);
-
-  /*!
-   * \brief Destructor
-   */
-  ~CSurfaceFVMDataSorter() override;
+  CSurfaceFVMDataSorter(CConfig *config, CGeometry* geometry, const CFVMDataSorter* valVolumeSorter);
 
   /*!
    * \brief Sort the output data for each grid node into a linear partitioning across all processors.
diff --git a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
index dec9c21e348..48c29f76dc7 100644
--- a/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjFEASolver.hpp
@@ -38,57 +38,84 @@
  */
 class CDiscAdjFEASolver final : public CSolver {
 private:
+  static constexpr size_t MAXNVAR = 3;  /*!< \brief Max number of variables, for static arrays. */
+
   unsigned short KindDirect_Solver = 0;
   CSolver *direct_solver = nullptr;
-  su2double *Sens_E = nullptr,          /*!< \brief Young modulus sensitivity coefficient for each boundary. */
-  *Sens_Nu = nullptr,                   /*!< \brief Poisson's ratio sensitivity coefficient for each boundary. */
-  *Sens_nL = nullptr,                   /*!< \brief Normal pressure sensitivity coefficient for each boundary. */
-  **CSensitivity = nullptr;             /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */
-
-  su2double *Solution_Vel = nullptr,    /*!< \brief Velocity componenent of the solution. */
-  *Solution_Accel = nullptr;            /*!< \brief Acceleration componenent of the solution. */
-
-  su2double *normalLoads = nullptr;     /*!< \brief Values of the normal loads for each marker iMarker_nL. */
-
-  unsigned short nMPROP = 0;            /*!< \brief Number of material properties */
-
-  su2double *E_i = nullptr,               /*!< \brief Values of the Young's Modulus. */
-            *Nu_i = nullptr,              /*!< \brief Values of the Poisson's ratio. */
-            *Rho_i = nullptr,             /*!< \brief Values of the density (for inertial effects). */
-            *Rho_DL_i = nullptr;          /*!< \brief Values of the density (for volume loading). */
-  int       *AD_Idx_E_i = nullptr,        /*!< \brief Derivative index of the Young's Modulus. */
-            *AD_Idx_Nu_i = nullptr,       /*!< \brief Derivative index of the Poisson's ratio. */
-            *AD_Idx_Rho_i = nullptr,      /*!< \brief Derivative index of the density (for inertial effects). */
-            *AD_Idx_Rho_DL_i = nullptr;   /*!< \brief Derivative index of the density (for volume loading). */
-
-  su2double *Local_Sens_E = nullptr,        /*!< \brief Local sensitivity of the Young's modulus. */
-            *Global_Sens_E = nullptr,       /*!< \brief Global sensitivity of the Young's modulus. */
-            *Total_Sens_E = nullptr;        /*!< \brief Total sensitivity of the Young's modulus (time domain). */
-  su2double *Local_Sens_Nu = nullptr,       /*!< \brief Local sensitivity of the Poisson ratio. */
-            *Global_Sens_Nu = nullptr,      /*!< \brief Global sensitivity of the Poisson ratio. */
-            *Total_Sens_Nu = nullptr;       /*!< \brief Total sensitivity of the Poisson ratio (time domain). */
-  su2double *Local_Sens_Rho = nullptr,      /*!< \brief Local sensitivity of the density. */
-            *Global_Sens_Rho = nullptr,     /*!< \brief Global sensitivity of the density. */
-            *Total_Sens_Rho = nullptr;      /*!< \brief Total sensitivity of the density (time domain). */
-  su2double *Local_Sens_Rho_DL = nullptr,   /*!< \brief Local sensitivity of the volume load. */
-            *Global_Sens_Rho_DL = nullptr,  /*!< \brief Global sensitivity of the volume load. */
-            *Total_Sens_Rho_DL = nullptr;   /*!< \brief Total sensitivity of the volume load (time domain). */
-
-  bool de_effects = false;                  /*!< \brief Determines if DE effects are considered. */
-  unsigned short nEField = 0;               /*!< \brief Number of electric field areas in the problem. */
-  su2double *EField = nullptr;              /*!< \brief Array that stores the electric field as design variables. */
-  int       *AD_Idx_EField = nullptr;       /*!< \brief Derivative index of the electric field as design variables. */
-  su2double *Local_Sens_EField = nullptr,   /*!< \brief Local sensitivity of the Electric Field. */
-            *Global_Sens_EField = nullptr,  /*!< \brief Global sensitivity of the Electric Field. */
-            *Total_Sens_EField = nullptr;   /*!< \brief Total sensitivity of the Electric Field (time domain). */
-
-  bool fea_dv = false;                  /*!< \brief Determines if the design variable we study is a FEA parameter. */
-  unsigned short nDV = 0;               /*!< \brief Number of design variables in the problem. */
-  su2double *DV_Val = nullptr;          /*!< \brief Values of the design variables. */
-  int       *AD_Idx_DV_Val = nullptr;   /*!< \brief Derivative index of the design variables. */
-  su2double *Local_Sens_DV = nullptr,   /*!< \brief Local sensitivity of the design variables. */
-            *Global_Sens_DV = nullptr,  /*!< \brief Global sensitivity of the design variables. */
-            *Total_Sens_DV = nullptr;   /*!< \brief Total sensitivity of the design variables (time domain). */
+
+  /*!
+   * \brief A type to manage sensitivities of design variables.
+   */
+  struct SensData {
+    unsigned short size = 0;
+    su2double* val = nullptr;         /*!< \brief Value of the variable. */
+    int* AD_Idx = nullptr;            /*!< \brief Derivative index in the AD tape. */
+    bool localIdx = false;
+    su2double* LocalSens = nullptr;   /*!< \brief Local sensitivity (domain). */
+    su2double* GlobalSens = nullptr;  /*!< \brief Global sensitivity (mpi). */
+    su2double* TotalSens = nullptr;   /*!< \brief Total sensitivity (time domain). */
+
+    su2double& operator[] (unsigned short i) { return val[i]; }
+    const su2double& operator[] (unsigned short i) const { return val[i]; }
+
+    void resize(unsigned short n) {
+      clear();
+      size = n;
+      val = new su2double[n]();
+      AD_Idx = new int[n]();
+      LocalSens = new su2double[n]();
+      GlobalSens = new su2double[n]();
+      TotalSens = new su2double[n]();
+    }
+
+    void clear() {
+      size = 0;
+      localIdx = false;
+      delete [] val;
+      delete [] AD_Idx;
+      delete [] LocalSens;
+      delete [] GlobalSens;
+      delete [] TotalSens;
+    }
+
+    void Register(bool push_index) {
+      for (auto i = 0u; i < size; ++i) AD::RegisterInput(val[i], push_index);
+    }
+
+    void SetIndex() {
+      for (auto i = 0u; i < size; ++i) AD::SetIndex(AD_Idx[i], val[i]);
+      localIdx = true;
+    }
+
+    void GetDerivative() {
+      if (localIdx)
+        for (auto i = 0u; i < size; ++i) LocalSens[i] = AD::GetDerivative(AD_Idx[i]);
+      else
+        for (auto i = 0u; i < size; ++i) LocalSens[i] = SU2_TYPE::GetDerivative(val[i]);
+
+      SU2_MPI::Allreduce(LocalSens, GlobalSens, size, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
+    }
+
+    void UpdateTotal() {
+      for (auto i = 0u; i < size; ++i) TotalSens[i] += GlobalSens[i];
+    }
+
+    ~SensData() { clear(); }
+  };
+
+  unsigned short nMPROP = 0;  /*!< \brief Number of material properties */
+  SensData E;                 /*!< \brief Values of the Young's Modulus. */
+  SensData Nu;                /*!< \brief Values of the Poisson's ratio. */
+  SensData Rho;               /*!< \brief Values of the density (for inertial effects). */
+  SensData Rho_DL;            /*!< \brief Values of the density (for volume loading). */
+
+  bool de_effects = false;    /*!< \brief Determines if DE effects are considered. */
+  unsigned short nEField = 0; /*!< \brief Number of electric field areas in the problem. */
+  SensData EField;            /*!< \brief Array that stores the electric field as design variables. */
+
+  bool fea_dv = false;        /*!< \brief Determines if the design variable we study is a FEA parameter. */
+  unsigned short nDV = 0;     /*!< \brief Number of design variables in the problem. */
+  SensData DV;                /*!< \brief Values of the design variables. */
 
   CDiscAdjFEABoundVariable* nodes = nullptr;  /*!< \brief The highest level in the variable hierarchy this solver can safely use. */
 
@@ -97,20 +124,17 @@ class CDiscAdjFEASolver final : public CSolver {
    */
   inline CVariable* GetBaseClassPointerToNodes() override { return nodes; }
 
-public:
-
   /*!
-   * \brief Constructor of the class.
+   * \brief Read the design variables for the adjoint solver
    */
-  CDiscAdjFEASolver(void);
+  void ReadDV(const CConfig *config);
+
+public:
 
   /*!
-   * \overload
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] iMesh - Index of the mesh in multigrid computations.
+   * \brief Constructor of the class.
    */
-  CDiscAdjFEASolver(CGeometry *geometry, CConfig *config);
+  CDiscAdjFEASolver() = default;
 
   /*!
    * \overload
@@ -124,7 +148,7 @@ class CDiscAdjFEASolver final : public CSolver {
   /*!
    * \brief Destructor of the class.
    */
-  ~CDiscAdjFEASolver(void) override;
+  ~CDiscAdjFEASolver() override;
 
   /*!
    * \brief Performs the preprocessing of the adjoint AD-based solver.
@@ -159,13 +183,6 @@ class CDiscAdjFEASolver final : public CSolver {
    */
   void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Set the surface sensitivity.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   */
-  void SetSurface_Sensitivity(CGeometry *geometry, CConfig* config) override;
-
   /*!
    * \brief Extract and set the geometrical sensitivity.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -178,97 +195,97 @@ class CDiscAdjFEASolver final : public CSolver {
    * \return Value of the total Young's modulus sensitivity
    *         (inviscid + viscous contribution).
    */
-  inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return Total_Sens_E[iVal]; }
+  inline su2double GetTotal_Sens_E(unsigned short iVal) const override { return E.TotalSens[iVal]; }
 
   /*!
    * \brief Set the total Poisson's ratio sensitivity.
    * \return Value of the Poisson's ratio sensitivity
    */
-  inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Total_Sens_Nu[iVal]; }
+  inline su2double GetTotal_Sens_Nu(unsigned short iVal) const override { return Nu.TotalSens[iVal]; }
 
   /*!
    * \brief Get the total sensitivity for the structural density
    * \return Value of the structural density sensitivity
    */
-  inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Total_Sens_Rho[iVal]; }
+  inline su2double GetTotal_Sens_Rho(unsigned short iVal) const override { return Rho.TotalSens[iVal]; }
 
   /*!
    * \brief Get the total sensitivity for the structural weight
    * \return Value of the structural weight sensitivity
    */
-  inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Total_Sens_Rho_DL[iVal]; }
+  inline su2double GetTotal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.TotalSens[iVal]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the Electric Field in the region iEField (time averaged)
    */
-  inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return Total_Sens_EField[iEField]; }
+  inline su2double GetTotal_Sens_EField(unsigned short iEField) const override { return EField.TotalSens[iEField]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the total sensitivity coefficient for the FEA DV in the region iDVFEA (time averaged)
    */
-  inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return Total_Sens_DV[iDVFEA]; }
+  inline su2double GetTotal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.TotalSens[iDVFEA]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the Young Modulus E
    */
-  inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return Global_Sens_E[iVal]; }
+  inline su2double GetGlobal_Sens_E(unsigned short iVal) const override { return E.GlobalSens[iVal]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the Mach sensitivity for the Poisson's ratio Nu
    */
-  inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Global_Sens_Nu[iVal]; }
+  inline su2double GetGlobal_Sens_Nu(unsigned short iVal) const override { return Nu.GlobalSens[iVal]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the Electric Field in the region iEField
    */
-  inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return Global_Sens_EField[iEField]; }
+  inline su2double GetGlobal_Sens_EField(unsigned short iEField) const override { return EField.GlobalSens[iEField]; }
 
   /*!
    * \brief A virtual member.
    * \return Value of the sensitivity coefficient for the FEA DV in the region iDVFEA
    */
-  inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return Global_Sens_DV[iDVFEA]; }
+  inline su2double GetGlobal_Sens_DVFEA(unsigned short iDVFEA) const override { return DV.GlobalSens[iDVFEA]; }
 
   /*!
    * \brief Get the total sensitivity for the structural density
    * \return Value of the structural density sensitivity
    */
-  inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Global_Sens_Rho[iVal]; }
+  inline su2double GetGlobal_Sens_Rho(unsigned short iVal) const override { return Rho.GlobalSens[iVal]; }
 
   /*!
    * \brief Get the total sensitivity for the structural weight
    * \return Value of the structural weight sensitivity
    */
-  inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Global_Sens_Rho_DL[iVal]; }
+  inline su2double GetGlobal_Sens_Rho_DL(unsigned short iVal) const override { return Rho_DL.GlobalSens[iVal]; }
 
   /*!
    * \brief Get the value of the Young modulus from the adjoint solver
    * \return Value of the Young modulus from the adjoint solver
    */
-  inline su2double GetVal_Young(unsigned short iVal) const override { return E_i[iVal]; }
+  inline su2double GetVal_Young(unsigned short iVal) const override { return E[iVal]; }
 
   /*!
    * \brief Get the value of the Poisson's ratio from the adjoint solver
    * \return Value of the Poisson's ratio from the adjoint solver
    */
-  inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu_i[iVal]; }
+  inline su2double GetVal_Poisson(unsigned short iVal) const override { return Nu[iVal]; }
 
   /*!
    * \brief Get the value of the density from the adjoint solver, for inertial effects
    * \return Value of the density from the adjoint solver
    */
-  inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho_i[iVal]; }
+  inline su2double GetVal_Rho(unsigned short iVal) const override { return Rho[iVal]; }
 
   /*!
    * \brief Get the value of the density from the adjoint solver, for dead loads
    * \return Value of the density for dead loads, from the adjoint solver
    */
-  inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL_i[iVal]; }
+  inline su2double GetVal_Rho_DL(unsigned short iVal) const override { return Rho_DL[iVal]; }
 
   /*!
    * \brief Get the number of variables for the Electric Field from the adjoint solver
@@ -276,11 +293,6 @@ class CDiscAdjFEASolver final : public CSolver {
    */
   inline unsigned short GetnEField(void) const override { return nEField; }
 
-  /*!
-   * \brief Read the design variables for the adjoint solver
-   */
-  void ReadDV(CConfig *config) override;
-
   /*!
    * \brief Get the number of design variables from the adjoint solver,
    * \return Number of design variables from the adjoint solver
@@ -297,7 +309,7 @@ class CDiscAdjFEASolver final : public CSolver {
    * \brief Get the value of the design variables from the adjoint solver
    * \return Pointer to the values of the design variables
    */
-  inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV_Val[iVal]; }
+  inline su2double GetVal_DVFEA(unsigned short iVal) const override { return DV[iVal]; }
 
   /*!
    * \brief Prepare the solver for a new recording.
diff --git a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp
index 4caa7e597e2..d7ba9d80b75 100644
--- a/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjMeshSolver.hpp
@@ -39,6 +39,13 @@
  */
 class CDiscAdjMeshSolver final : public CSolver {
 private:
+  static constexpr size_t MAXNDIM = 3;  /*!< \brief Max number of space dimensions, used in some static arrays. */
+  static constexpr size_t MAXNVAR = 3;  /*!< \brief Max number of variables, for static arrays. */
+
+  static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */
+
+  unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */
+
   CSolver *direct_solver = nullptr;
 
   CDiscAdjMeshBoundVariable* nodes = nullptr;   /*!< \brief Variables of the discrete adjoint mesh solver. */
@@ -53,15 +60,7 @@ class CDiscAdjMeshSolver final : public CSolver {
   /*!
    * \brief Constructor of the class.
    */
-  CDiscAdjMeshSolver(void);
-
-  /*!
-   * \overload
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] iMesh - Index of the mesh in multigrid computations.
-   */
-  CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config);
+  CDiscAdjMeshSolver() = default;
 
   /*!
    * \overload
@@ -75,7 +74,7 @@ class CDiscAdjMeshSolver final : public CSolver {
   /*!
    * \brief Destructor of the class.
    */
-  ~CDiscAdjMeshSolver(void) override;
+  ~CDiscAdjMeshSolver() override;
 
   /*!
    * \brief Performs the preprocessing of the AD-based mesh adjoint solver.
@@ -124,24 +123,6 @@ class CDiscAdjMeshSolver final : public CSolver {
    */
   void ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Update the dual-time derivatives.
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] solver_container - Container vector with all the solutions.
-   * \param[in] config - Definition of the particular problem.
-   * \param[in] iMesh - Index of the mesh in multigrid computations.
-   * \param[in] iRKStep - Current step of the Runge-Kutta iteration.
-   * \param[in] RunTime_EqSystem - System of equations which is going to be solved.
-   * \param[in] Output - boolean to determine whether to print output.
-   */
-  void Preprocessing(CGeometry *geometry,
-                    CSolver **solver_container,
-                    CConfig *config,
-                    unsigned short iMesh,
-                    unsigned short iRKStep,
-                    unsigned short RunTime_EqSystem,
-                    bool Output) override;
-
   /*!
    * \brief Load a solution from a restart file.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
index c14ed6ab149..cac68c1ab93 100644
--- a/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
+++ b/SU2_CFD/include/solvers/CDiscAdjSolver.hpp
@@ -38,9 +38,17 @@
  */
 class CDiscAdjSolver final : public CSolver {
 private:
+  static constexpr size_t MAXNDIM = 3;  /*!< \brief Max number of space dimensions, used in some static arrays. */
+  static constexpr size_t MAXNVAR = 32; /*!< \brief Max number of variables, for static arrays. */
+
+  static constexpr size_t OMP_MAX_SIZE = 1024; /*!< \brief Max chunk size for light point loops. */
+
+  unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */
+
   unsigned short KindDirect_Solver;
   CSolver *direct_solver;
-  su2double **CSensitivity;      /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */
+  vector<vector<su2double> > CSensitivity; /*!< \brief Shape sensitivity coefficient for each boundary and vertex. */
+  vector<su2double> Sens_Geo;    /*!< \brief Total shape sensitivity for each monitored boundary. */
   su2double Total_Sens_Mach;     /*!< \brief Total mach sensitivity coefficient for all the boundaries. */
   su2double Total_Sens_AoA;      /*!< \brief Total angle of attack sensitivity coefficient for all the boundaries. */
   su2double Total_Sens_Geo;      /*!< \brief Total shape sensitivity coefficient for all the boundaries. */
@@ -52,8 +60,6 @@ class CDiscAdjSolver final : public CSolver {
   su2double Mach, Alpha, Beta, Pressure, Temperature, BPressure, ModVel;
   su2double TemperatureRad, Total_Sens_Temp_Rad;
 
-  su2double *Solution_Geometry; /*!< \brief Auxiliary vector for the geometry solution (dimension nDim instead of nVar). */
-
   CDiscAdjVariable* nodes = nullptr;  /*!< \brief The highest level in the variable hierarchy this solver can safely use. */
 
   /*!
@@ -66,14 +72,7 @@ class CDiscAdjSolver final : public CSolver {
   /*!
    * \brief Constructor of the class.
    */
-  CDiscAdjSolver(void);
-
-  /*!
-   * \overload
-   * \param[in] geometry - Geometrical definition of the problem.
-   * \param[in] config - Definition of the particular problem.
-   */
-  CDiscAdjSolver(CGeometry *geometry, CConfig *config);
+  CDiscAdjSolver() = default;
 
   /*!
    * \overload
@@ -88,7 +87,7 @@ class CDiscAdjSolver final : public CSolver {
   /*!
    * \brief Destructor of the class.
    */
-  ~CDiscAdjSolver(void) override;
+  ~CDiscAdjSolver() override;
 
   /*!
    * \brief Performs the preprocessing of the adjoint AD-based solver.
@@ -115,14 +114,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   void SetAdjoint_Output(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Sets the adjoint values of the output of the mesh deformation iteration
-   *        before evaluation of the tape.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] config - The particular config.
-   */
-  void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) override;
-
   /*!
    * \brief Sets the adjoint values of the input variables of the flow (+turb.) iteration
    *        after tape has been evaluated.
@@ -131,14 +122,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] solver_container - The solver container holding all solutions.
-   * \param[in] config - The particular config.
-   */
-  void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) override;
-
   /*!
    * \brief Set the surface sensitivity.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -225,14 +208,6 @@ class CDiscAdjSolver final : public CSolver {
    */
   void SetRecording(CGeometry *geometry, CConfig *config) override;
 
-  /*!
-   * \brief Prepare the solver for a new recording.
-   * \param[in] kind_recording - Kind of AD recording.
-   */
-  void SetMesh_Recording(CGeometry **geometry,
-                         CVolumetricMovement *grid_movement,
-                         CConfig *config) override;
-
   /*!
    * \brief A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -282,4 +257,12 @@ class CDiscAdjSolver final : public CSolver {
                    int val_iter,
                    bool val_update_geo) override;
 
+  /*!
+   * \brief Depends on the direct solver.
+   */
+  inline bool GetHasHybridParallel() const override {
+    if (direct_solver) return direct_solver->GetHasHybridParallel();
+    return false;
+  }
+
 };
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
index 0517f9f03b2..94afc9e227a 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.hpp
@@ -34,6 +34,15 @@ class CNumericsSIMD;
 
 template <class VariableType, ENUM_REGIME FlowRegime>
 class CFVMFlowSolverBase : public CSolver {
+ private:
+  static void recursiveAssign() {}
+
+  template<class U, class V, class... Ts>
+  static void recursiveAssign(U& d, const V& s, Ts&&... otherPairs) {
+    d = s;
+    recursiveAssign(otherPairs...);
+  }
+
  protected:
   static constexpr size_t MAXNDIM = 3; /*!< \brief Max number of space dimensions, used in some static arrays. */
   static constexpr size_t MAXNVAR = VariableType::MAXNVAR; /*!< \brief Max number of variables, for static arrays. */
@@ -43,6 +52,18 @@ class CFVMFlowSolverBase : public CSolver {
 
   unsigned long omp_chunk_size; /*!< \brief Chunk size used in light point loops. */
 
+  /*!
+   * \brief Utility to set the value of a member variables safely, and so that the new values are seen by all threads.
+   * \param[in] lhsRhsPairs - Pairs of destination and source e.g. a,0,b,-1.
+   */
+  template<class... Ts>
+  static void ompMasterAssignBarrier(Ts&&... lhsRhsPairs) {
+    SU2_OMP_MASTER
+    recursiveAssign(lhsRhsPairs...);
+    END_SU2_OMP_MASTER
+    SU2_OMP_BARRIER
+  }
+
   su2double Mach_Inf = 0.0;          /*!< \brief Mach number at the infinity. */
   su2double Density_Inf = 0.0;       /*!< \brief Density at the infinity. */
   su2double Energy_Inf = 0.0;        /*!< \brief Energy at the infinity. */
@@ -318,13 +339,7 @@ class CFVMFlowSolverBase : public CSolver {
      *    Critical sections are used for this instead of reduction
      *    clauses for compatibility with OpenMP 2.0 (Windows...). ---*/
 
-    SU2_OMP_MASTER
-    {
-      Min_Delta_Time = 1e30;
-      Max_Delta_Time = 0.0;
-      Global_Delta_UnstTimeND = 1e30;
-    }
-    SU2_OMP_BARRIER
+    ompMasterAssignBarrier(Min_Delta_Time,1e30, Max_Delta_Time,0.0, Global_Delta_UnstTimeND,1e30);
 
     /*--- Loop domain points. ---*/
 
@@ -377,6 +392,7 @@ class CFVMFlowSolverBase : public CSolver {
       }
 
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop boundary edges ---*/
 
@@ -419,6 +435,7 @@ class CFVMFlowSolverBase : public CSolver {
           Lambda = lambdaVisc(*nodes,iPoint) * Area2;
           nodes->AddMax_Lambda_Visc(iPoint, Lambda);
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -427,7 +444,7 @@ class CFVMFlowSolverBase : public CSolver {
       /*--- Thread-local variables for min/max reduction. ---*/
       su2double minDt = 1e30, maxDt = 0.0;
 
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
 
         su2double Vol = geometry->nodes->GetVolume(iPoint);
@@ -449,6 +466,7 @@ class CFVMFlowSolverBase : public CSolver {
           nodes->SetDelta_Time(iPoint,0.0);
         }
       }
+      END_SU2_OMP_FOR
       /*--- Min/max over threads. ---*/
       SU2_OMP_CRITICAL
       {
@@ -456,6 +474,7 @@ class CFVMFlowSolverBase : public CSolver {
         Max_Delta_Time = max(Max_Delta_Time, maxDt);
         Global_Delta_Time = Min_Delta_Time;
       }
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
     }
 
@@ -470,6 +489,7 @@ class CFVMFlowSolverBase : public CSolver {
       SU2_MPI::Allreduce(&Max_Delta_Time, &rbuf_time, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
       Max_Delta_Time = rbuf_time;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- For exact time solution use the minimum delta time of the whole mesh. ---*/
@@ -490,6 +510,7 @@ class CFVMFlowSolverBase : public CSolver {
 
         config->SetDelta_UnstTimeND(Global_Delta_Time);
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
 
       /*--- Sets the regular CFL equal to the unsteady CFL. ---*/
@@ -499,6 +520,7 @@ class CFVMFlowSolverBase : public CSolver {
         nodes->SetLocalCFL(iPoint, config->GetUnst_CFL());
         nodes->SetDelta_Time(iPoint, Global_Delta_Time);
       }
+      END_SU2_OMP_FOR
 
     }
 
@@ -509,12 +531,14 @@ class CFVMFlowSolverBase : public CSolver {
       /*--- Thread-local variable for reduction. ---*/
       su2double glbDtND = 1e30;
 
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
         glbDtND = min(glbDtND, config->GetUnst_CFL()*Global_Delta_Time / nodes->GetLocalCFL(iPoint));
       }
+      END_SU2_OMP_FOR
       SU2_OMP_CRITICAL
       Global_Delta_UnstTimeND = min(Global_Delta_UnstTimeND, glbDtND);
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
 
       SU2_OMP_MASTER
@@ -524,6 +548,7 @@ class CFVMFlowSolverBase : public CSolver {
 
         config->SetDelta_UnstTimeND(Global_Delta_UnstTimeND);
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
@@ -535,6 +560,7 @@ class CFVMFlowSolverBase : public CSolver {
         su2double dt = min((2.0/3.0)*config->GetDelta_UnstTimeND(), nodes->GetDelta_Time(iPoint));
         nodes->SetDelta_Time(iPoint, dt);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -585,6 +611,7 @@ class CFVMFlowSolverBase : public CSolver {
         nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed);
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop boundary edges ---*/
 
@@ -619,6 +646,7 @@ class CFVMFlowSolverBase : public CSolver {
 
           nodes->AddLambda(iPoint, fabs(Mean_ProjVel) + Mean_SoundSpeed);
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -681,6 +709,7 @@ class CFVMFlowSolverBase : public CSolver {
         nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]);
       }
     }
+    END_SU2_OMP_FOR
 
     if (isPeriodic) {
       /*--- Correct the sensor values across any periodic boundaries. ---*/
@@ -695,6 +724,7 @@ class CFVMFlowSolverBase : public CSolver {
       SU2_OMP_FOR_STAT(omp_chunk_size)
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++)
         nodes->SetSensor(iPoint, fabs(iPoint_UndLapl[iPoint]) / jPoint_UndLapl[iPoint]);
+      END_SU2_OMP_FOR
     }
 
     /*--- MPI parallelization ---*/
@@ -739,7 +769,7 @@ class CFVMFlowSolverBase : public CSolver {
     /*--- Update the solution and residuals ---*/
 
     if (!adjoint) {
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
         su2double Vol = geometry->nodes->GetVolume(iPoint) + geometry->nodes->GetPeriodicVolume(iPoint);
@@ -792,12 +822,14 @@ class CFVMFlowSolverBase : public CSolver {
           }
         }
       }
+      END_SU2_OMP_FOR
       /*--- Reduce residual information over all threads in this rank. ---*/
       SU2_OMP_CRITICAL
       for (unsigned short iVar = 0; iVar < nVar; iVar++) {
         Residual_RMS[iVar] += resRMS[iVar];
         AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
       }
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
     }
 
@@ -807,16 +839,11 @@ class CFVMFlowSolverBase : public CSolver {
     CompleteComms(geometry, config, SOLUTION);
 
     if (!adjoint) {
-      SU2_OMP_MASTER {
-        /*--- Compute the root mean square residual ---*/
-
-        SetResidual_RMS(geometry, config);
+      /*--- Compute the root mean square residual ---*/
+      SetResidual_RMS(geometry, config);
 
-        /*--- For verification cases, compute the global error metrics. ---*/
-
-        ComputeVerificationError(geometry, config);
-      }
-      SU2_OMP_BARRIER
+      /*--- For verification cases, compute the global error metrics. ---*/
+      ComputeVerificationError(geometry, config);
     }
 
   }
@@ -859,7 +886,7 @@ class CFVMFlowSolverBase : public CSolver {
     /*--- Add pseudotime term to Jacobian. ---*/
 
     if (implicit) {
-      SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+      SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
       for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
         /*--- Modify matrix diagonal to improve diagonal dominance. ---*/
@@ -879,11 +906,12 @@ class CFVMFlowSolverBase : public CSolver {
           Jacobian.SetVal2Diag(iPoint, 1.0);
         }
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Right hand side of the system (-Residual) and initial guess (x = 0) ---*/
 
-    SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+    SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
     for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
       /*--- Multigrid contribution to residual. ---*/
@@ -911,17 +939,17 @@ class CFVMFlowSolverBase : public CSolver {
         }
       }
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     for (unsigned short iVar = 0; iVar < nVar; iVar++) {
       Residual_RMS[iVar] += resRMS[iVar];
       AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     /*--- Compute the root mean square residual ---*/
-    SU2_OMP_MASTER
     SetResidual_RMS(geometry, config);
-    SU2_OMP_BARRIER
   }
 
   /*!
@@ -942,6 +970,7 @@ class CFVMFlowSolverBase : public CSolver {
           nodes->AddSolution(iPoint, iVar, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint*nVar+iVar]);
         }
       }
+      END_SU2_OMP_FOR
     }
 
     for (unsigned short iPeriodic = 1; iPeriodic <= config->GetnMarker_Periodic()/2; iPeriodic++) {
@@ -953,9 +982,7 @@ class CFVMFlowSolverBase : public CSolver {
     CompleteComms(geometry, config, SOLUTION);
 
     /*--- For verification cases, compute the global error metrics. ---*/
-    SU2_OMP_MASTER
     ComputeVerificationError(geometry, config);
-    SU2_OMP_BARRIER
   }
 
   /*!
@@ -968,11 +995,7 @@ class CFVMFlowSolverBase : public CSolver {
     const auto& Gradient_Primitive = nodes->GetGradient_Primitive();
     auto& StrainMag = nodes->GetStrainMag();
 
-    SU2_OMP_MASTER {
-      StrainMag_Max = 0.0;
-      Omega_Max = 0.0;
-    }
-    SU2_OMP_BARRIER
+    ompMasterAssignBarrier(StrainMag_Max,0.0, Omega_Max,0.0);
 
     su2double strainMax = 0.0, omegaMax = 0.0;
 
@@ -1035,12 +1058,14 @@ class CFVMFlowSolverBase : public CSolver {
 
       AD::EndPreacc();
     }
+    END_SU2_OMP_FOR
 
     if ((iMesh == MESH_0) && (config.GetComm_Level() == COMM_FULL)) {
       SU2_OMP_CRITICAL {
         StrainMag_Max = max(StrainMag_Max, strainMax);
         Omega_Max = max(Omega_Max, omegaMax);
       }
+      END_SU2_OMP_CRITICAL
 
       SU2_OMP_BARRIER
       SU2_OMP_MASTER {
@@ -1050,6 +1075,7 @@ class CFVMFlowSolverBase : public CSolver {
         SU2_MPI::Allreduce(&MyStrainMag_Max, &StrainMag_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
         SU2_MPI::Allreduce(&MyOmega_Max, &Omega_Max, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
diff --git a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
index 948fb1da421..a668db4f00d 100644
--- a/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
+++ b/SU2_CFD/include/solvers/CFVMFlowSolverBase.inl
@@ -471,6 +471,7 @@ void CFVMFlowSolverBase<V, R>::Viscous_Residual_impl(unsigned long iEdge, CGeome
 
 template <class V, ENUM_REGIME R>
 void CFVMFlowSolverBase<V, R>::ComputeVerificationError(CGeometry* geometry, CConfig* config) {
+
   /*--- The errors only need to be computed on the finest grid. ---*/
   if (MGLevel != MESH_0) return;
 
@@ -485,6 +486,8 @@ void CFVMFlowSolverBase<V, R>::ComputeVerificationError(CGeometry* geometry, CCo
        (config->GetInnerIter() == 1));
   if (!write_heads) return;
 
+  SU2_OMP_MASTER {
+
   /*--- Check if there actually is an exact solution for this
         verification case, if computed at all. ---*/
   if (VerificationSolution && VerificationSolution->ExactSolutionKnown()) {
@@ -524,6 +527,10 @@ void CFVMFlowSolverBase<V, R>::ComputeVerificationError(CGeometry* geometry, CCo
 
     PrintVerificationError(config);
   }
+
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 template <class V, ENUM_REGIME R>
@@ -560,6 +567,7 @@ void CFVMFlowSolverBase<V, R>::ComputeUnderRelaxationFactor(const CConfig* confi
 
     nodes->SetUnderRelaxation(iPoint, localUnderRelaxation);
   }
+  END_SU2_OMP_FOR
 }
 
 template <class V, ENUM_REGIME R>
@@ -569,11 +577,12 @@ void CFVMFlowSolverBase<V, R>::ImplicitEuler_Iteration(CGeometry *geometry, CSol
 
   /*--- Solve or smooth the linear system. ---*/
 
-  SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait)
+  SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT)
   for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) {
     LinSysRes.SetBlock_Zero(iPoint);
     LinSysSol.SetBlock_Zero(iPoint);
   }
+  END_SU2_OMP_FOR
 
   auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config);
 
@@ -581,6 +590,7 @@ void CFVMFlowSolverBase<V, R>::ImplicitEuler_Iteration(CGeometry *geometry, CSol
     SetIterLinSolver(iter);
     SetResLinSolver(System.GetResidual());
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   CompleteImplicitIteration(geometry, nullptr, config);
@@ -836,7 +846,8 @@ void CFVMFlowSolverBase<V, R>::LoadRestart_impl(CGeometry **geometry, CSolver **
     SU2_MPI::Error(string("The solution file ") + restart_filename + string(" doesn't match with the mesh file!\n") +
                    string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
   }
-  } // end SU2_OMP_MASTER
+  }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- Update the geometry for flows on deforming meshes ---*/
@@ -906,6 +917,7 @@ void CFVMFlowSolverBase<V, R>::LoadRestart_impl(CGeometry **geometry, CSolver **
       }
       solver[iMesh][FLOW_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse);
     }
+    END_SU2_OMP_FOR
 
     solver[iMesh][FLOW_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION);
     solver[iMesh][FLOW_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION);
@@ -929,7 +941,8 @@ void CFVMFlowSolverBase<V, R>::LoadRestart_impl(CGeometry **geometry, CSolver **
   delete [] Restart_Vars; Restart_Vars = nullptr;
   delete [] Restart_Data; Restart_Data = nullptr;
 
-  } // end SU2_OMP_MASTER
+  }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
@@ -975,6 +988,7 @@ void CFVMFlowSolverBase<V, R>::SetInitialCondition(CGeometry **geometry, CSolver
            but this is not necessary. */
         VerificationSolution->GetInitialCondition(coor, solDOF);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -984,7 +998,8 @@ void CFVMFlowSolverBase<V, R>::SetInitialCondition(CGeometry **geometry, CSolver
     PushSolutionBackInTime(TimeIter, restart, rans, solver_container, geometry, config);
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1269,6 +1284,7 @@ void CFVMFlowSolverBase<V, R>::BC_Sym_Plane(CGeometry* geometry, CSolver** solve
       }  // if viscous
     }    // if GetDomain
   }      // for iVertex
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   for (iVar = 0; iVar < nPrimVarGrad; iVar++) delete[] Grad_Reflected[iVar];
@@ -1450,6 +1466,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::BC_Fluid_Interface(CGeometry* geometry,
           }
         }
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -1512,6 +1529,7 @@ void CFVMFlowSolverBase<V, R>::BC_Custom(CGeometry* geometry, CSolver** solver_c
         }
       }
     }
+    END_SU2_OMP_FOR
 
   } else {
     /* The user must specify the custom BC's here. */
@@ -1546,6 +1564,7 @@ void CFVMFlowSolverBase<V, R>::EdgeFluxResidual(const CGeometry *geometry,
         edgeNumerics->ComputeFlux(iEdge, *config, *geometry, *nodes, UpdateType::COLORING, mask, LinSysRes, Jacobian);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   if (ReducerStrategy) {
@@ -1571,6 +1590,7 @@ void CFVMFlowSolverBase<V, R>::SumEdgeFluxes(const CGeometry* geometry) {
         LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge));
     }
   }
+  END_SU2_OMP_FOR
 }
 
 template <class V, ENUM_REGIME FlowRegime>
@@ -1634,6 +1654,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
         if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep));
       }
     }
+    END_SU2_OMP_FOR
 
   }
 
@@ -1672,6 +1693,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
           LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over the boundary edges ---*/
 
@@ -1704,6 +1726,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
           for (iVar = 0; iVar < nVar; iVar++)
             LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -1746,6 +1769,7 @@ void CFVMFlowSolverBase<V, FlowRegime>::SetResidual_DualTime(CGeometry *geometry
         if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep));
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
diff --git a/SU2_CFD/include/solvers/CSolver.hpp b/SU2_CFD/include/solvers/CSolver.hpp
index 9110ec14344..e7656728c19 100644
--- a/SU2_CFD/include/solvers/CSolver.hpp
+++ b/SU2_CFD/include/solvers/CSolver.hpp
@@ -428,6 +428,7 @@ class CSolver {
       for (auto& r : Residual_Max) r = 0;
       for (auto& p : Point_Max) p = 0;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -3354,11 +3355,6 @@ class CSolver {
    */
   inline virtual unsigned short GetnDVFEA(void) const { return 0; }
 
-  /*!
-   * \brief A virtual member.
-   */
-  inline virtual void ReadDV(CConfig *config) { }
-
   /*!
    * \brief A virtual member.
    * \return Pointer to the values of the Electric Field
@@ -3464,6 +3460,18 @@ class CSolver {
   inline virtual void SetAitken_Relaxation(CGeometry *geometry,
                                            CConfig *config) { }
 
+  /*!
+   * \brief Loads the solution from the restart file.
+   * \param[in] geometry - Geometrical definition of the problem.
+   * \param[in] config - Definition of the particular problem.
+   * \param[in] filename - Name of the restart file.
+   * \param[in] skipVars - Number of variables preceeding the solution.
+   */
+  void BasicLoadRestart(CGeometry *geometry,
+                        const CConfig *config,
+                        const string& filename,
+                        unsigned long skipVars);
+
   /*!
    * \brief A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -3625,13 +3633,6 @@ class CSolver {
    */
   inline virtual void SetAdjoint_Output(CGeometry *geometry, CConfig *config){}
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] config - The particular config.
-   */
-  inline virtual void SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config) {}
-
   /*!
    * \brief A virtual member.
    * \param[in] geometry - The geometrical definition of the problem.
@@ -3640,14 +3641,6 @@ class CSolver {
    */
   inline virtual void ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){}
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] geometry - The geometrical definition of the problem.
-   * \param[in] solver_container - The solver container holding all solutions.
-   * \param[in] config - The particular config.
-   */
-  inline virtual void ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) {}
-
   /*!
    * \brief  A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
@@ -3830,14 +3823,6 @@ class CSolver {
    */
   inline virtual void SetRecording(CGeometry *geometry, CConfig *config){}
 
-  /*!
-   * \brief A virtual member.
-   * \param[in] kind_recording - Kind of AD recording.
-   */
-  inline virtual void SetMesh_Recording(CGeometry **geometry,
-                                        CVolumetricMovement *grid_movement,
-                                        CConfig *config) {}
-
   /*!
    * \brief A virtual member.
    * \param[in] geometry - Geometrical definition of the problem.
diff --git a/SU2_CFD/include/solvers/CTurbSolver.hpp b/SU2_CFD/include/solvers/CTurbSolver.hpp
index b157e3d3291..9e032a659f8 100644
--- a/SU2_CFD/include/solvers/CTurbSolver.hpp
+++ b/SU2_CFD/include/solvers/CTurbSolver.hpp
@@ -260,6 +260,7 @@ class CTurbSolver : public CSolver {
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){
       nodes->SetSolution(iPoint, Solution_Inf);
     }
+    END_SU2_OMP_FOR
   }
 
   /*!
diff --git a/SU2_CFD/obj/Makefile.am b/SU2_CFD/obj/Makefile.am
index 30e7636a0e5..054df6fa267 100644
--- a/SU2_CFD/obj/Makefile.am
+++ b/SU2_CFD/obj/Makefile.am
@@ -10,7 +10,7 @@
 # The SU2 Project is maintained by the SU2 Foundation 
 # (http://su2foundation.org)
 #
-# Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)
+# Copyright 2012-2021, SU2 Contributors (cf. AUTHORS.md)
 #
 # SU2 is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
diff --git a/SU2_CFD/src/SU2_CFD.cpp b/SU2_CFD/src/SU2_CFD.cpp
index 8b19f5d6134..d9dfbbdc12f 100644
--- a/SU2_CFD/src/SU2_CFD.cpp
+++ b/SU2_CFD/src/SU2_CFD.cpp
@@ -56,6 +56,10 @@ int main(int argc, char *argv[]) {
 
   CLI11_PARSE(app, argc, argv)
 
+  /*--- OpenMP initialization ---*/
+
+  omp_initialize();
+
   omp_set_num_threads(num_threads);
 
   /*--- MPI initialization, and buffer setting ---*/
@@ -69,6 +73,11 @@ int main(int argc, char *argv[]) {
 #endif
   SU2_MPI::Comm MPICommunicator = SU2_MPI::GetComm();
 
+  /*--- AD initialization ---*/
+#ifdef HAVE_OPDI
+  AD::getGlobalTape().initialize();
+#endif
+
   /*--- Uncomment the following line if runtime NaN catching is desired. ---*/
   // feenableexcept(FE_INVALID | FE_OVERFLOW);
 
@@ -160,9 +169,17 @@ int main(int argc, char *argv[]) {
   libxsmm_finalize();
 #endif
 
+  /*--- Finalize AD, if necessary. ---*/
+#ifdef HAVE_OPDI
+  AD::getGlobalTape().finalize();
+#endif
+
   /*--- Finalize MPI parallelization. ---*/
   SU2_MPI::Finalize();
 
+  /*--- Finalize OpenMP. ---*/
+  omp_finalize();
+
   return EXIT_SUCCESS;
 
 }
diff --git a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
index a3912276f13..605378c96ae 100644
--- a/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
+++ b/SU2_CFD/src/drivers/CDiscAdjMultizoneDriver.cpp
@@ -639,7 +639,7 @@ void CDiscAdjMultizoneDriver::SetRecording(unsigned short kind_recording, Kind_T
     if (rank == MASTER_NODE) AD::PrintStatistics();
 #ifdef CODI_REVERSE_TYPE
     if (size > SINGLE_NODE) {
-      su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0;
+      su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0;
       SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
       if (rank == MASTER_NODE) {
         cout << "MPI\n";
diff --git a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp
index 2806edd830a..208ce710e8c 100644
--- a/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp
+++ b/SU2_CFD/src/drivers/CDiscAdjSinglezoneDriver.cpp
@@ -295,7 +295,7 @@ void CDiscAdjSinglezoneDriver::SetRecording(unsigned short kind_recording){
     if (rank == MASTER_NODE) AD::PrintStatistics();
 #ifdef CODI_REVERSE_TYPE
     if (size > SINGLE_NODE) {
-      su2double myMem = AD::globalTape.getTapeValues().getUsedMemorySize(), totMem = 0.0;
+      su2double myMem = AD::getGlobalTape().getTapeValues().getUsedMemorySize(), totMem = 0.0;
       SU2_MPI::Allreduce(&myMem, &totMem, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
       if (rank == MASTER_NODE) {
         cout << "MPI\n";
diff --git a/SU2_CFD/src/drivers/CDriver.cpp b/SU2_CFD/src/drivers/CDriver.cpp
index 7fc307204ab..214f83c5b64 100644
--- a/SU2_CFD/src/drivers/CDriver.cpp
+++ b/SU2_CFD/src/drivers/CDriver.cpp
@@ -815,6 +815,7 @@ void CDriver::Geometrical_Preprocessing_FVM(CConfig *config, CGeometry **&geomet
     geometry[MESH_0]->SetControlVolume(config, ALLOCATE);
     geometry[MESH_0]->SetBoundControlVolume(config, ALLOCATE);
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Visualize a dual control volume if requested ---*/
 
@@ -1280,6 +1281,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry,
     if (euler || ns) {
       SU2_OMP_PARALLEL_(if(solver[MESH_0][FLOW_SOL]->GetHasHybridParallel()))
       solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
+      END_SU2_OMP_PARALLEL
     }
     if (NEMO_euler || NEMO_ns) {
       solver[MESH_0][FLOW_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
@@ -1287,6 +1289,7 @@ void CDriver::Solver_Restart(CSolver ***solver, CGeometry **geometry,
     if (turbulent) {
       SU2_OMP_PARALLEL_(if(solver[MESH_0][TURB_SOL]->GetHasHybridParallel()))
       solver[MESH_0][TURB_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
+      END_SU2_OMP_PARALLEL
     }
     if (config->AddRadiation()) {
       solver[MESH_0][RAD_SOL]->LoadRestart(geometry, solver, config, val_iter, update_geo);
@@ -1598,6 +1601,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Convective scheme not implemented (template_solver).", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -1624,6 +1628,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_CONVECTIVE :
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
 
       case SPACE_CENTERED :
@@ -1643,6 +1648,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid centered scheme or not implemented.\n Currently, only JST and LAX-FRIEDRICH are available for incompressible flows.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
           for (iMGlevel = 1; iMGlevel <= config->GetnMGLevels(); iMGlevel++)
@@ -1761,6 +1767,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
 
@@ -1777,6 +1784,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid upwind scheme or not implemented.\n Currently, only FDS is available for incompressible flows.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
         }
@@ -1785,6 +1793,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the Euler / Navier-Stokes equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -1885,6 +1894,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_CONVECTIVE :
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_FLOW option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
 
       case SPACE_CENTERED :
@@ -1895,6 +1905,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
             SU2_OMP_MASTER
             SU2_MPI::Error("Invalid centered scheme or not implemented.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
           }
 
@@ -1948,6 +1959,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Invalid upwind scheme or not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
 
@@ -1957,6 +1969,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the NEMO Euler / Navier-Stokes equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2028,6 +2041,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Riemann solver not implemented.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2043,6 +2057,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_UPWIND:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
       case SPACE_UPWIND :
         for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) {
@@ -2055,6 +2070,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the turbulence equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2106,6 +2122,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_UPWIND:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_TURB option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
       case SPACE_UPWIND:
         for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++) {
@@ -2115,6 +2132,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the transition equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2159,6 +2177,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
         default:
           SU2_OMP_MASTER
           SU2_MPI::Error("Invalid convective scheme for the heat transfer equations.", CURRENT_FUNCTION);
+          END_SU2_OMP_MASTER
           break;
       }
     }
@@ -2184,6 +2203,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
     if (incompressible)
       SU2_OMP_MASTER
       SU2_MPI::Error("Convective schemes not implemented for incompressible continuous adjoint.", CURRENT_FUNCTION);
+      END_SU2_OMP_MASTER
 
     /*--- Definition of the convective scheme for each equation and mesh level ---*/
 
@@ -2191,6 +2211,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       case NO_CONVECTIVE:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJFLOW option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
 
       case SPACE_CENTERED :
@@ -2205,6 +2226,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Centered scheme not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
 
@@ -2233,6 +2255,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             default:
               SU2_OMP_MASTER
               SU2_MPI::Error("Upwind scheme not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
               break;
           }
         }
@@ -2241,6 +2264,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Invalid convective scheme for the continuous adjoint Euler / Navier-Stokes equations.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2304,12 +2328,14 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
     if (!spalart_allmaras)
       SU2_OMP_MASTER
       SU2_MPI::Error("Only the SA turbulence model can be used with the continuous adjoint solver.", CURRENT_FUNCTION);
+      END_SU2_OMP_MASTER
 
     /*--- Definition of the convective scheme for each equation and mesh level ---*/
     switch (config->GetKind_ConvNumScheme_AdjTurb()) {
       case NO_CONVECTIVE:
         SU2_OMP_MASTER
         SU2_MPI::Error("Config file is missing the CONV_NUM_METHOD_ADJTURB option.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
       case SPACE_UPWIND :
         for (iMGlevel = 0; iMGlevel <= config->GetnMGLevels(); iMGlevel++)
@@ -2318,6 +2344,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Convective scheme not implemented (adjoint turbulence).", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
@@ -2350,10 +2377,12 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
           case NEO_HOOKEAN:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
           default:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
         }
         break;
@@ -2362,6 +2391,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
           case LINEAR_ELASTIC:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model does not correspond to geometric conditions.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
           case NEO_HOOKEAN:
             if (config->GetMaterialCompressibility() == COMPRESSIBLE_MAT) {
@@ -2369,6 +2399,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             } else {
               SU2_OMP_MASTER
               SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
             }
             break;
           case KNOWLES:
@@ -2377,6 +2408,7 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             } else {
               SU2_OMP_MASTER
               SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
             }
             break;
           case IDEAL_DE:
@@ -2385,17 +2417,20 @@ void CDriver::Numerics_Preprocessing(CConfig *config, CGeometry **geometry, CSol
             } else {
               SU2_OMP_MASTER
               SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+              END_SU2_OMP_MASTER
             }
             break;
           default:
             SU2_OMP_MASTER
             SU2_MPI::Error("Material model not implemented.", CURRENT_FUNCTION);
+            END_SU2_OMP_MASTER
             break;
         }
         break;
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Solver not implemented.", CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
         break;
     }
 
diff --git a/SU2_CFD/src/fluid/CPengRobinson.cpp b/SU2_CFD/src/fluid/CPengRobinson.cpp
index 7dc56bd4d25..09d0e67a557 100644
--- a/SU2_CFD/src/fluid/CPengRobinson.cpp
+++ b/SU2_CFD/src/fluid/CPengRobinson.cpp
@@ -130,7 +130,7 @@ void CPengRobinson::SetTDState_rhoe(su2double rho, su2double e) {
 
   dTde_rho = 1 / Cv;
 
-  Zed = Pressure / (Gas_Constant * Temperature * Density);
+  Zed = Pressure / (Gas_Constant * Temperature * rho);
 
   AD::SetPreaccOut(Temperature);
   AD::SetPreaccOut(SoundSpeed2);
diff --git a/SU2_CFD/src/integration/CIntegration.cpp b/SU2_CFD/src/integration/CIntegration.cpp
index 47b1743ef4d..146c453988f 100644
--- a/SU2_CFD/src/integration/CIntegration.cpp
+++ b/SU2_CFD/src/integration/CIntegration.cpp
@@ -217,7 +217,8 @@ void CIntegration::SetDualTime_Geometry(CGeometry *geometry, CSolver *mesh_solve
 
   if ((iMesh==MESH_0) && config->GetDeform_Mesh()) mesh_solver->SetDualTime_Mesh();
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver, const CConfig *config, unsigned short iMesh) {
@@ -230,6 +231,7 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver
 
   SU2_OMP_MASTER
   solver->ResetCFLAdapt();
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPoint(), omp_get_num_threads()))
@@ -241,6 +243,8 @@ void CIntegration::SetDualTime_Solver(const CGeometry *geometry, CSolver *solver
     /*--- Initialize the local CFL number ---*/
     solver->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(iMesh));
   }
+  END_SU2_OMP_FOR
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
diff --git a/SU2_CFD/src/integration/CMultiGridIntegration.cpp b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
index 43ca2c9e9c3..f3ae3835121 100644
--- a/SU2_CFD/src/integration/CMultiGridIntegration.cpp
+++ b/SU2_CFD/src/integration/CMultiGridIntegration.cpp
@@ -94,6 +94,7 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry,
 
     SU2_OMP_MASTER
     config[iZone]->SubtractFinestMesh();
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -120,7 +121,8 @@ void CMultiGridIntegration::MultiGrid_Iteration(CGeometry ****geometry,
                             numerics_container[iZone][iInst], config[iZone],
                             FinestMesh, RunTime_EqSystem, &monitor);
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -334,6 +336,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
     for (iVar = 0; iVar < nVar; iVar++)
       sol_coarse->GetNodes()->SetSolution_Old(Point_Coarse,Solution);
   }
+  END_SU2_OMP_FOR
 
   delete [] Solution;
 
@@ -354,6 +357,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
         sol_coarse->GetNodes()->SetVelocity_Old(Point_Coarse, zero);
 
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -369,6 +373,7 @@ void CMultiGridIntegration::GetProlongated_Correction(unsigned short RunTime_EqS
       sol_fine->LinSysRes.SetBlock(Point_Fine, sol_coarse->GetNodes()->GetSolution_Old(Point_Coarse));
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -389,6 +394,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
     Residual_Old = solver->LinSysRes.GetBlock(iPoint);
     solver->GetNodes()->SetResidual_Old(iPoint,Residual_Old);
   }
+  END_SU2_OMP_FOR
 
   /*--- Jacobi iterations. ---*/
 
@@ -408,6 +414,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
       }
 
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over all mesh points (update residuals with the neighbor averages). ---*/
 
@@ -422,6 +429,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
       for (iVar = 0; iVar < nVar; iVar++)
         solver->LinSysRes(iPoint,iVar) = (Residual_Old[iVar] + val_smooth_coeff*Residual_Sum[iVar])*factor;
     }
+    END_SU2_OMP_FOR
 
     /*--- Restore original residuals (without average) at boundary points. ---*/
 
@@ -435,6 +443,7 @@ void CMultiGridIntegration::SmoothProlongated_Correction(unsigned short RunTime_
           Residual_Old = solver->GetNodes()->GetResidual_Old(iPoint);
           solver->LinSysRes.SetBlock(iPoint, Residual_Old);
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -462,6 +471,7 @@ void CMultiGridIntegration::SetProlongated_Correction(CSolver *sol_fine, CGeomet
       Solution_Fine[iVar] += factor*Residual_Fine[iVar];
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- MPI the new interpolated solution ---*/
 
@@ -482,6 +492,7 @@ void CMultiGridIntegration::SetProlongated_Solution(unsigned short RunTime_EqSys
       sol_fine->GetNodes()->SetSolution(Point_Fine, sol_coarse->GetNodes()->GetSolution(Point_Coarse));
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coarse, CGeometry *geo_fine,
@@ -511,6 +522,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
     }
     sol_coarse->GetNodes()->AddRes_TruncError(Point_Coarse, Residual);
   }
+  END_SU2_OMP_FOR
 
   delete [] Residual;
 
@@ -521,6 +533,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
         Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode();
         sol_coarse->GetNodes()->SetVel_ResTruncError_Zero(Point_Coarse);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -528,6 +541,7 @@ void CMultiGridIntegration::SetForcing_Term(CSolver *sol_fine, CSolver *sol_coar
   for (Point_Coarse = 0; Point_Coarse < geo_coarse->GetnPointDomain(); Point_Coarse++) {
     sol_coarse->GetNodes()->SubtractRes_TruncError(Point_Coarse, sol_coarse->LinSysRes.GetBlock(Point_Coarse));
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -536,6 +550,7 @@ void CMultiGridIntegration::SetResidual_Term(CGeometry *geometry, CSolver *solve
   SU2_OMP_FOR_STAT(roundUpDiv(geometry->GetnPointDomain(), omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < geometry->GetnPointDomain(); iPoint++)
     solver->LinSysRes.AddBlock(iPoint, solver->GetNodes()->GetResTruncError(iPoint));
+  END_SU2_OMP_FOR
 
 }
 
@@ -575,6 +590,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst
     sol_coarse->GetNodes()->SetSolution(Point_Coarse, Solution);
 
   }
+  END_SU2_OMP_FOR
 
   delete [] Solution;
 
@@ -609,6 +625,7 @@ void CMultiGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSyst
         }
 
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -652,6 +669,7 @@ void CMultiGridIntegration::SetRestricted_Gradient(unsigned short RunTime_EqSyst
     }
     sol_coarse->GetNodes()->SetGradient(Point_Coarse,Gradient);
   }
+  END_SU2_OMP_FOR
 
   for (iVar = 0; iVar < nVar; iVar++)
     delete [] Gradient[iVar];
@@ -693,6 +711,7 @@ void CMultiGridIntegration::NonDimensional_Parameters(CGeometry **geometry, CSol
                                                    numerics_container[FinestMesh][ADJFLOW_SOL][CONV_BOUND_TERM], config);
       break;
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
@@ -719,6 +738,7 @@ void CMultiGridIntegration::Adjoint_Setup(CGeometry ****geometry, CSolver *****s
       solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CT(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CT());
       solver_container[iZone][INST_0][iMGLevel][FLOW_SOL]->SetTotal_CQ(solver_container[iZone][INST_0][MESH_0][FLOW_SOL]->GetTotal_CQ());
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /*--- Restrict solution and gradients to the coarse levels ---*/
diff --git a/SU2_CFD/src/integration/CNewtonIntegration.cpp b/SU2_CFD/src/integration/CNewtonIntegration.cpp
index 127640df26d..da3ec5e14e4 100644
--- a/SU2_CFD/src/integration/CNewtonIntegration.cpp
+++ b/SU2_CFD/src/integration/CNewtonIntegration.cpp
@@ -113,6 +113,7 @@ void CNewtonIntegration::PerturbSolution(const CSysVector<Scalar>& dir, Scalar m
     for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar)
       solvers[FLOW_SOL]->GetNodes()->AddSolution(iPoint,iVar, mag*dir(iPoint,iVar));
   }
+  END_SU2_OMP_FOR
 }
 
 void CNewtonIntegration::ComputeResiduals(ResEvalType type) {
@@ -122,6 +123,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) {
   if (type == EXPLICIT) {
     SU2_OMP_MASTER
     config->SetKind_TimeIntScheme(EULER_EXPLICIT);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -133,6 +135,7 @@ void CNewtonIntegration::ComputeResiduals(ResEvalType type) {
   if (type == EXPLICIT) {
     SU2_OMP_MASTER
     config->SetKind_TimeIntScheme(TimeIntScheme);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -145,11 +148,13 @@ void CNewtonIntegration::ComputeFinDiffStep() {
 
   SU2_OMP_MASTER
   rmsSol = 0.0;
+  END_SU2_OMP_MASTER
 
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < geometry->GetnPointDomain(); ++iPoint)
     for (auto iVar = 0ul; iVar < solvers[FLOW_SOL]->GetnVar(); ++iVar)
       rmsSol_loc += pow(solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint,iVar), 2);
+  END_SU2_OMP_FOR
 
   atomicAdd(rmsSol_loc, rmsSol);
 
@@ -159,6 +164,7 @@ void CNewtonIntegration::ComputeFinDiffStep() {
     SU2_MPI::Allreduce(&t, &rmsSol, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
     finDiffStep = finDiffStepND * max(1.0, sqrt(SU2_TYPE::GetValue(rmsSol) / geometry->GetGlobal_nPointDomain()));
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
@@ -194,6 +200,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto i = 0ul; i < LinSysRes.GetNElmDomain(); ++i)
     LinSysRes[i] = SU2_TYPE::GetValue(solvers[FLOW_SOL]->LinSysRes[i]);
+  END_SU2_OMP_FOR
 
   su2double residual = 0.0;
   for (auto iVar = 0ul; iVar < LinSysRes.GetNVar(); ++iVar)
@@ -208,6 +215,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
       firstResidual = max(firstResidual, residual);
       if (startupIters) startupIters -= 1;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
     endStartup = (startupIters == 0) && (residual - firstResidual < startupResidual);
   }
@@ -219,6 +227,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
   if (!startupPeriod && tolRelaxFactor > 1 && fullTolResidual < 0.0) {
     SU2_OMP_MASTER
     firstResidual = max(firstResidual, residual);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
     su2double x = (residual - firstResidual) / fullTolResidual;
     toleranceFactor = 1.0 + (tolRelaxFactor-1)*max(0.0, 1.0-SU2_TYPE::GetValue(x));
@@ -249,6 +258,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
     solvers[FLOW_SOL]->SetIterLinSolver(iter);
     solvers[FLOW_SOL]->SetResLinSolver(eps);
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /// TODO: Clever back-tracking and CFL adaptation based on residual reduction.
@@ -268,6 +278,7 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
     solvers[FLOW_SOL]->Momentum_Forces(geometry, config);
     solvers[FLOW_SOL]->Friction_Forces(geometry, config);
   }
+  END_SU2_OMP_MASTER
 
   /*--- At the end of the startup period the CFL is reset to the initial value. ---*/
 
@@ -276,12 +287,15 @@ void CNewtonIntegration::MultiGrid_Iteration(CGeometry ****geometry_, CSolver **
       startupPeriod = false;
       firstResidual = residual;
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < geometry->GetnPoint(); ++iPoint)
       solvers[FLOW_SOL]->GetNodes()->SetLocalCFL(iPoint, config->GetCFL(MESH_0));
+    END_SU2_OMP_FOR
   }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CNewtonIntegration::MatrixFreeProduct(const CSysVector<Scalar>& u, CSysVector<Scalar>& v) {
@@ -310,6 +324,7 @@ void CNewtonIntegration::MatrixFreeProduct(const CSysVector<Scalar>& u, CSysVect
       v(iPoint,iVar) += SU2_TYPE::GetValue(delta) * u(iPoint,iVar);
     }
   }
+  END_SU2_OMP_FOR
 
   CSysMatrixComms::Initiate(v, geometry, config);
   CSysMatrixComms::Complete(v, geometry, config);
@@ -332,6 +347,7 @@ void CNewtonIntegration::Preconditioner(const CSysVector<Scalar>& u, CSysVector<
       for (auto iVar = 0ul; iVar < u.GetNVar(); ++iVar)
         v(iPoint,iVar) = SU2_TYPE::GetValue(delta) * u(iPoint,iVar);
     }
+    END_SU2_OMP_FOR
 
     CSysMatrixComms::Initiate(v, geometry, config);
     CSysMatrixComms::Complete(v, geometry, config);
diff --git a/SU2_CFD/src/integration/CSingleGridIntegration.cpp b/SU2_CFD/src/integration/CSingleGridIntegration.cpp
index 6fff9748820..d20f2013820 100644
--- a/SU2_CFD/src/integration/CSingleGridIntegration.cpp
+++ b/SU2_CFD/src/integration/CSingleGridIntegration.cpp
@@ -79,6 +79,7 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve
   if (RunTime_EqSystem == RUNTIME_HEAT_SYS) {
     SU2_OMP_MASTER
     solvers_fine[HEAT_SOL]->Heat_Fluxes(geometry_fine, solvers_fine, config[iZone]);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -105,8 +106,8 @@ void CSingleGridIntegration::SingleGrid_Iteration(CGeometry ****geometry, CSolve
 
   }
 
-  } // end SU2_OMP_PARALLEL
-
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSystem, CSolver *sol_fine, CSolver *sol_coarse,
@@ -141,6 +142,7 @@ void CSingleGridIntegration::SetRestricted_Solution(unsigned short RunTime_EqSys
     sol_coarse->GetNodes()->SetSolution(Point_Coarse,Solution);
 
   }
+  END_SU2_OMP_FOR
 
   delete [] Solution;
 
@@ -177,6 +179,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys
     sol_coarse->GetNodes()->SetmuT(Point_Coarse,EddyVisc);
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Update solution at the no slip wall boundary, only the first
    variable (nu_tilde -in SA and SA_NEG- and k -in SST-), to guarantee that the eddy viscoisty
@@ -189,6 +192,7 @@ void CSingleGridIntegration::SetRestricted_EddyVisc(unsigned short RunTime_EqSys
         Point_Coarse = geo_coarse->vertex[iMarker][iVertex]->GetNode();
         sol_coarse->GetNodes()->SetmuT(Point_Coarse,0.0);
       }
+      END_SU2_OMP_FOR
     }
   }
 
diff --git a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
index d4237ca4f13..a19210d468f 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFEAIteration.cpp
@@ -63,20 +63,22 @@ CDiscAdjFEAIteration::CDiscAdjFEAIteration(const CConfig *config) : CIteration(c
 }
 
 CDiscAdjFEAIteration::~CDiscAdjFEAIteration(void) {}
+
 void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                       CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                       CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
                                       CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
   unsigned long iPoint;
-  unsigned short TimeIter = config[val_iZone]->GetTimeIter();
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
-
-  int Direct_Iter;
+  auto solvers0 = solver[val_iZone][val_iInst][MESH_0];
+  auto geometry0 = geometry[val_iZone][val_iInst][MESH_0];
+  auto dirNodes = solvers0[FEA_SOL]->GetNodes();
+  auto adjNodes = solvers0[ADJFEA_SOL]->GetNodes();
 
   /*--- For the dynamic adjoint, load direct solutions from restart files. ---*/
 
-  if (dynamic) {
-    Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1;
+  if (config[val_iZone]->GetTime_Domain()) {
+    const int TimeIter = config[val_iZone]->GetTimeIter();
+    const int Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - TimeIter - 1;
 
     /*--- We want to load the already converged solution at timesteps n and n-1 ---*/
 
@@ -86,15 +88,15 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat
 
     /*--- Push solution back to correct array ---*/
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->Set_Solution_time_n();
+    dirNodes->Set_Solution_time_n();
 
     /*--- Push solution back to correct array ---*/
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Accel_time_n();
+    dirNodes->SetSolution_Accel_time_n();
 
     /*--- Push solution back to correct array ---*/
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->SetSolution_Vel_time_n();
+    dirNodes->SetSolution_Vel_time_n();
 
     /*--- Load solution timestep n ---*/
 
@@ -102,33 +104,28 @@ void CDiscAdjFEAIteration::Preprocess(COutput* output, CIntegration**** integrat
 
     /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint));
     }
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Accel_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Accel(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Accel_Direct(iPoint, dirNodes->GetSolution_Accel(iPoint));
     }
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Vel_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution_Vel(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Vel_Direct(iPoint, dirNodes->GetSolution_Vel(iPoint));
     }
 
   } else {
     /*--- Store FEA solution also in the adjoint solver in order to be able to reset it later ---*/
 
-    for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-      solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetNodes()->SetSolution_Direct(
-          iPoint, solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetNodes()->GetSolution(iPoint));
+    for (iPoint = 0; iPoint < geometry0->GetnPoint(); iPoint++) {
+      adjNodes->SetSolution_Direct(iPoint, dirNodes->GetSolution(iPoint));
     }
   }
 
-  solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->Preprocessing(
-      geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-      RUNTIME_ADJFEA_SYS, false);
+  solvers0[ADJFEA_SOL]->Preprocessing(geometry0, solvers0, config[val_iZone], MESH_0, 0, RUNTIME_ADJFEA_SYS, false);
+
 }
 
 void CDiscAdjFEAIteration::LoadDynamic_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
@@ -162,7 +159,6 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration
                                    CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                    CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement,
                                    CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
 
   /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/
 
@@ -171,92 +167,11 @@ void CDiscAdjFEAIteration::Iterate(COutput* output, CIntegration**** integration
 
   solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->ExtractAdjoint_Variables(geometry[val_iZone][val_iInst][MESH_0],
                                                                              config[val_iZone]);
-  if (dynamic) {
+  if (config[val_iZone]->GetTime_Domain()) {
     integration[val_iZone][val_iInst][ADJFEA_SOL]->SetConvergence(false);
   }
 }
 
-void CDiscAdjFEAIteration::SetRecording(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                        CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                        CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst,
-                                        unsigned short kind_recording) {
-  unsigned long InnerIter = config[ZONE_0]->GetInnerIter();
-  unsigned long TimeIter = config[val_iZone]->GetTimeIter(), DirectTimeIter;
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
-
-  DirectTimeIter = 0;
-  if (dynamic) {
-    DirectTimeIter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 1;
-  }
-
-  /*--- Reset the tape ---*/
-
-  AD::Reset();
-
-  /*--- We only need to reset the indices if the current recording is different from the recording we want to have ---*/
-
-  if (CurrentRecording != kind_recording && (CurrentRecording != NONE)) {
-    solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0],
-                                                                   config[val_iZone]);
-
-    /*--- Clear indices of coupling variables ---*/
-
-    SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, SOLUTION_AND_MESH);
-
-    /*--- Run one iteration while tape is passive - this clears all indices ---*/
-
-    fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement,
-                           FFDBox, val_iZone, val_iInst);
-  }
-
-  /*--- Prepare for recording ---*/
-
-  solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[val_iZone][val_iInst][MESH_0],
-                                                                 config[val_iZone]);
-
-  /*--- Start the recording of all operations ---*/
-
-  AD::StartRecording();
-
-  /*--- Register FEA variables ---*/
-
-  RegisterInput(solver, geometry, config, val_iZone, val_iInst, kind_recording);
-
-  /*--- Compute coupling or update the geometry ---*/
-
-  SetDependencies(solver, geometry, numerics, config, val_iZone, val_iInst, kind_recording);
-
-  /*--- Set the correct direct iteration number ---*/
-
-  if (dynamic) {
-    config[val_iZone]->SetTimeIter(DirectTimeIter);
-  }
-
-  /*--- Run the direct iteration ---*/
-
-  fem_iteration->Iterate(output, integration, geometry, solver, numerics, config, surface_movement, grid_movement,
-                         FFDBox, val_iZone, val_iInst);
-
-  config[val_iZone]->SetTimeIter(TimeIter);
-
-  /*--- Register structural variables and objective function as output ---*/
-
-  RegisterOutput(solver, geometry, config, val_iZone, val_iInst);
-
-  /*--- Stop the recording ---*/
-
-  AD::StopRecording();
-
-  /*--- Set the recording status ---*/
-
-  CurrentRecording = kind_recording;
-
-  /* --- Reset the number of the internal iterations---*/
-
-  config[ZONE_0]->SetInnerIter(InnerIter);
-}
-
 void CDiscAdjFEAIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                         unsigned short val_iZone, unsigned short val_iInst,
                                         unsigned short kind_recording) {
@@ -296,10 +211,21 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
   auto structural_numerics = numerics[iZone][iInst][MESH_0][FEA_SOL];
 
   /*--- Some numerics are only instanciated under these conditions ---*/
-  bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem();
-  bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS;
-  bool de_effects = config[iZone]->GetDE_Effects() && nonlinear;
-  bool element_based = dir_solver->IsElementBased() && nonlinear;
+  const bool fsi = config[iZone]->GetFSI_Simulation() || config[iZone]->GetMultizone_Problem();
+  const bool nonlinear = config[iZone]->GetGeometricConditions() == LARGE_DEFORMATIONS;
+  const bool de_effects = config[iZone]->GetDE_Effects() && nonlinear;
+  const bool element_based = dir_solver->IsElementBased() && nonlinear;
+
+  SU2_OMP_PARALLEL
+  {
+
+  const int thread = omp_get_thread_num();
+  const int offset = thread*MAX_TERMS;
+  const int fea_term = FEA_TERM+offset;
+  const int mat_nhcomp = MAT_NHCOMP+offset;
+  const int mat_idealde = MAT_IDEALDE+offset;
+  const int mat_knowles = MAT_KNOWLES+offset;
+  const int de_term = DE_TERM+offset;
 
   for (unsigned short iProp = 0; iProp < config[iZone]->GetnElasticityMod(); iProp++) {
     su2double E = adj_solver->GetVal_Young(iProp);
@@ -309,33 +235,33 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
 
     /*--- Add dependencies for E and Nu ---*/
 
-    structural_numerics[FEA_TERM]->SetMaterial_Properties(iProp, E, nu);
+    structural_numerics[fea_term]->SetMaterial_Properties(iProp, E, nu);
 
     /*--- Add dependencies for Rho and Rho_DL ---*/
 
-    structural_numerics[FEA_TERM]->SetMaterial_Density(iProp, rho, rhoDL);
+    structural_numerics[fea_term]->SetMaterial_Density(iProp, rho, rhoDL);
 
     /*--- Add dependencies for element-based simulations. ---*/
 
     if (element_based) {
       /*--- Neo Hookean Compressible ---*/
-      structural_numerics[MAT_NHCOMP]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_NHCOMP]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[mat_nhcomp]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[mat_nhcomp]->SetMaterial_Density(iProp, rho, rhoDL);
 
       /*--- Ideal DE ---*/
-      structural_numerics[MAT_IDEALDE]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_IDEALDE]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[mat_idealde]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[mat_idealde]->SetMaterial_Density(iProp, rho, rhoDL);
 
       /*--- Knowles ---*/
-      structural_numerics[MAT_KNOWLES]->SetMaterial_Properties(iProp, E, nu);
-      structural_numerics[MAT_KNOWLES]->SetMaterial_Density(iProp, rho, rhoDL);
+      structural_numerics[mat_knowles]->SetMaterial_Properties(iProp, E, nu);
+      structural_numerics[mat_knowles]->SetMaterial_Density(iProp, rho, rhoDL);
     }
   }
 
   if (de_effects) {
     for (unsigned short iEField = 0; iEField < adj_solver->GetnEField(); iEField++) {
-      structural_numerics[FEA_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
-      structural_numerics[DE_TERM]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
+      structural_numerics[fea_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
+      structural_numerics[de_term]->Set_ElectricField(iEField, adj_solver->GetVal_EField(iEField));
     }
   }
 
@@ -351,14 +277,14 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
       for (unsigned short iDV = 0; iDV < adj_solver->GetnDVFEA(); iDV++) {
         su2double dvfea = adj_solver->GetVal_DVFEA(iDV);
 
-        structural_numerics[FEA_TERM]->Set_DV_Val(iDV, dvfea);
+        structural_numerics[fea_term]->Set_DV_Val(iDV, dvfea);
 
-        if (de_effects) structural_numerics[DE_TERM]->Set_DV_Val(iDV, dvfea);
+        if (de_effects) structural_numerics[de_term]->Set_DV_Val(iDV, dvfea);
 
         if (element_based) {
-          structural_numerics[MAT_NHCOMP]->Set_DV_Val(iDV, dvfea);
-          structural_numerics[MAT_IDEALDE]->Set_DV_Val(iDV, dvfea);
-          structural_numerics[MAT_KNOWLES]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[mat_nhcomp]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[mat_idealde]->Set_DV_Val(iDV, dvfea);
+          structural_numerics[mat_knowles]->Set_DV_Val(iDV, dvfea);
         }
       }
       break;
@@ -374,6 +300,9 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
     structural_geometry->CompleteComms(structural_geometry, config[iZone], COORDINATES);
   }
 
+  }
+  END_SU2_OMP_PARALLEL
+
   /*--- FSI specific dependencies. ---*/
   if (fsi) {
     /*--- Set relation between solution and predicted displacements, which are the transferred ones. ---*/
@@ -390,6 +319,7 @@ void CDiscAdjFEAIteration::SetDependencies(CSolver***** solver, CGeometry**** ge
     ///       making it a virtual method of CSolver does not feel "right" as its purpose could be confused.
     static_cast<CFEASolver*>(dir_solver)->FilterElementDensities(structural_geometry, config[iZone]);
   }
+
 }
 
 void CDiscAdjFEAIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
@@ -406,11 +336,6 @@ void CDiscAdjFEAIteration::InitializeAdjoint(CSolver***** solver, CGeometry****
   solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]);
 }
 
-void CDiscAdjFEAIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                  CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                  CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                  CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {}
-
 bool CDiscAdjFEAIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                    CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                    CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
@@ -427,13 +352,14 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
                                        CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                        CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-  bool dynamic = (config[val_iZone]->GetTime_Domain());
+  const bool dynamic = (config[val_iZone]->GetTime_Domain());
+  auto solvers0 = solver[val_iZone][val_iInst][MESH_0];
 
   // TEMPORARY output only for standalone structural problems
   if ((!config[val_iZone]->GetFSI_Simulation()) && (rank == MASTER_NODE)) {
     unsigned short iVar;
 
-    bool de_effects = config[val_iZone]->GetDE_Effects();
+    const bool de_effects = config[val_iZone]->GetDE_Effects();
 
     /*--- Header of the temporary output file ---*/
     ofstream myfile_res;
@@ -443,24 +369,23 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
 
     myfile_res << config[val_iZone]->GetTimeIter() << "\t";
 
-    solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]);
-    myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][FEA_SOL]->GetTotal_ComboObj() << "\t";
+    solvers0[FEA_SOL]->Evaluate_ObjFunc(config[val_iZone]);
+    myfile_res << scientific << solvers0[FEA_SOL]->GetTotal_ComboObj() << "\t";
 
     for (iVar = 0; iVar < config[val_iZone]->GetnElasticityMod(); iVar++)
-      myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t";
+      myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_E(iVar) << "\t";
     for (iVar = 0; iVar < config[val_iZone]->GetnPoissonRatio(); iVar++)
-      myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t";
+      myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Nu(iVar) << "\t";
     if (dynamic) {
       for (iVar = 0; iVar < config[val_iZone]->GetnMaterialDensity(); iVar++)
-        myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t";
+        myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_Rho(iVar) << "\t";
     }
     if (de_effects) {
       for (iVar = 0; iVar < config[val_iZone]->GetnElectric_Field(); iVar++)
-        myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_EField(iVar)
-                   << "\t";
+        myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_EField(iVar) << "\t";
     }
-    for (iVar = 0; iVar < solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA(); iVar++) {
-      myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t";
+    for (iVar = 0; iVar < solvers0[ADJFEA_SOL]->GetnDVFEA(); iVar++) {
+      myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iVar) << "\t";
     }
 
     myfile_res << endl;
@@ -499,7 +424,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
 
     if (outputDVFEA) {
       unsigned short iDV;
-      unsigned short nDV = solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetnDVFEA();
+      unsigned short nDV = solvers0[ADJFEA_SOL]->GetnDVFEA();
 
       myfile_res << "INDEX"
                  << "\t"
@@ -510,7 +435,7 @@ void CDiscAdjFEAIteration::Postprocess(COutput* output, CIntegration**** integra
       for (iDV = 0; iDV < nDV; iDV++) {
         myfile_res << iDV;
         myfile_res << "\t";
-        myfile_res << scientific << solver[val_iZone][val_iInst][MESH_0][ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV);
+        myfile_res << scientific << solvers0[ADJFEA_SOL]->GetTotal_Sens_DVFEA(iDV);
         myfile_res << endl;
       }
 
diff --git a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
index 59b212c8b9e..dbf2a950dfb 100644
--- a/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjFluidIteration.cpp
@@ -31,91 +31,90 @@
 void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                         CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                         CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
+                                        CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   StartTime = SU2_MPI::Wtime();
 
-  unsigned long iPoint;
-  unsigned short TimeIter = config[val_iZone]->GetTimeIter();
-  bool dual_time_1st = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST);
-  bool dual_time_2nd = (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND);
-  bool dual_time = (dual_time_1st || dual_time_2nd);
-  unsigned short iMesh;
-  int Direct_Iter;
-  bool heat = config[val_iZone]->GetWeakly_Coupled_Heat();
-  bool grid_IsMoving = config[val_iZone]->GetGrid_Movement();
+  const auto TimeIter = config[iZone]->GetTimeIter();
+  const bool dual_time_1st = (config[iZone]->GetTime_Marching() == DT_STEPPING_1ST);
+  const bool dual_time_2nd = (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND);
+  const bool dual_time = (dual_time_1st || dual_time_2nd);
+  const bool grid_IsMoving = config[iZone]->GetGrid_Movement();
+  const bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  auto solvers0 = solver[iZone][iInst][MESH_0];
+  auto geometries = geometry[iZone][iInst];
 
   //  /*--- Read the target pressure for inverse design. ---------------------------------------------*/
-  //  if (config[val_iZone]->GetInvDesign_Cp() == YES)
-  //    output->SetCp_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL],
-  //    geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter);
+  //  if (config[iZone]->GetInvDesign_Cp() == YES)
+  //    output->SetCp_InverseDesign(solvers0[FLOW_SOL],
+  //    geometries[MESH_0], config[iZone], ExtIter);
 
   //  /*--- Read the target heat flux ----------------------------------------------------------------*/
   //  if (config[ZONE_0]->GetInvDesign_HeatFlux() == YES)
-  //    output->SetHeatFlux_InverseDesign(solver[val_iZone][val_iInst][MESH_0][FLOW_SOL],
-  //    geometry[val_iZone][val_iInst][MESH_0], config[val_iZone], ExtIter);
+  //    output->SetHeatFlux_InverseDesign(solvers0[FLOW_SOL],
+  //    geometries[MESH_0], config[iZone], ExtIter);
 
   /*--- For the unsteady adjoint, load direct solutions from restart files. ---*/
 
-  if (config[val_iZone]->GetTime_Marching()) {
-    Direct_Iter = SU2_TYPE::Int(config[val_iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2;
+  if (config[iZone]->GetTime_Marching()) {
+    const int Direct_Iter = SU2_TYPE::Int(config[iZone]->GetUnst_AdjointIter()) - SU2_TYPE::Int(TimeIter) - 2 + dual_time;
 
     /*--- For dual-time stepping we want to load the already converged solution at timestep n ---*/
 
-    if (dual_time) {
-      Direct_Iter += 1;
-    }
-
     if (TimeIter == 0) {
       if (dual_time_2nd) {
         /*--- Load solution at timestep n-2 ---*/
-        LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 2);
+        LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 2);
 
         /*--- Push solution back to correct array ---*/
 
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n();
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1();
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
+
+          solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
+          solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1();
           if (turbulent) {
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n();
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1();
+            solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1();
           }
           if (heat) {
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n();
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1();
+            solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1();
           }
           if (grid_IsMoving) {
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n();
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1();
+            geometries[iMesh]->nodes->SetCoord_n();
+            geometries[iMesh]->nodes->SetCoord_n1();
           }
         }
       }
       if (dual_time) {
         /*--- Load solution at timestep n-1 ---*/
-        LoadUnsteady_Solution(geometry, solver, config, val_iZone, val_iInst, Direct_Iter - 1);
+        LoadUnsteady_Solution(geometry, solver, config, iZone, iInst, Direct_Iter - 1);
 
         /*--- Push solution back to correct array ---*/
 
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n();
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
+
+          solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n();
           if (turbulent) {
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n();
           }
           if (heat) {
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n();
+            solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n();
           }
           if (grid_IsMoving) {
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n();
+            geometries[iMesh]->nodes->SetCoord_n();
           }
         }
       }
 
       /*--- Load solution timestep n ---*/
 
-      LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter);
+      LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter);
 
-      if (config[val_iZone]->GetDeform_Mesh()) {
-        solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart(
-            geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true);
+      if (config[iZone]->GetDeform_Mesh()) {
+        solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true);
       }
 
     } else if ((TimeIter > 0) && dual_time) {
@@ -126,114 +125,120 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
       Afterwards the GridVelocity is computed based on the Coordinates.
       ---*/
 
-      if (config[val_iZone]->GetDeform_Mesh()) {
-        solver[val_iZone][val_iInst][MESH_0][MESH_SOL]->LoadRestart(
-            geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], Direct_Iter, true);
+      if (config[iZone]->GetDeform_Mesh()) {
+        solvers0[MESH_SOL]->LoadRestart(geometries, solver[iZone][iInst], config[iZone], Direct_Iter, true);
       }
 
       /*--- Load solution timestep n-1 | n-2 for DualTimestepping 1st | 2nd order ---*/
       if (dual_time_1st) {
-        LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 1);
+        LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 1);
       } else {
-        LoadUnsteady_Solution(geometry, solver, config, val_iInst, val_iZone, Direct_Iter - 2);
+        LoadUnsteady_Solution(geometry, solver, config, iInst, iZone, Direct_Iter - 2);
       }
 
       /*--- Temporarily store the loaded solution in the Solution_Old array ---*/
 
-      for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-        solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Set_OldSolution();
+      for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        auto solvers = solver[iZone][iInst][iMesh];
+
+        solvers[FLOW_SOL]->Set_OldSolution();
         if (turbulent) {
-          solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Set_OldSolution();
+          solvers[TURB_SOL]->Set_OldSolution();
         }
         if (heat) {
-          solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Set_OldSolution();
+          solvers[HEAT_SOL]->Set_OldSolution();
         }
         if (grid_IsMoving) {
-          geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_Old();
+          geometries[iMesh]->nodes->SetCoord_Old();
         }
       }
 
       /*--- Set Solution at timestep n to solution at n-1 ---*/
 
-      for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-        for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-          solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->SetSolution(
-              iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint));
+      for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+        auto solvers = solver[iZone][iInst][iMesh];
+
+        for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
+          solvers[FLOW_SOL]->GetNodes()->SetSolution(
+              iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n(iPoint));
 
           if (grid_IsMoving) {
-            geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord(
-                iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n(iPoint));
+            geometries[iMesh]->nodes->SetCoord(
+                iPoint, geometries[iMesh]->nodes->GetCoord_n(iPoint));
           }
           if (turbulent) {
-            solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->SetSolution(
-                iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint));
+            solvers[TURB_SOL]->GetNodes()->SetSolution(
+                iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n(iPoint));
           }
           if (heat) {
-            solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->SetSolution(
-                iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint));
+            solvers[HEAT_SOL]->GetNodes()->SetSolution(
+                iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n(iPoint));
           }
         }
       }
       if (dual_time_1st) {
         /*--- Set Solution at timestep n-1 to the previously loaded solution ---*/
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-            solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(
-                iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
+
+          for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
+            solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
+                iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
-              geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(
-                  iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint));
+              geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
-              solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
             if (heat) {
-              solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
           }
         }
       }
       if (dual_time_2nd) {
         /*--- Set Solution at timestep n-1 to solution at n-2 ---*/
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-            solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n(
-                iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
+
+          for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
+            solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n(
+                iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
 
             if (grid_IsMoving) {
-              geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n(
-                  iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_n1(iPoint));
+              geometries[iMesh]->nodes->SetCoord_n(iPoint, geometries[iMesh]->nodes->GetCoord_n1(iPoint));
             }
             if (turbulent) {
-              solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
+              solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
             }
             if (heat) {
-              solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
+              solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n(
+                  iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_time_n1(iPoint));
             }
           }
         }
         /*--- Set Solution at timestep n-2 to the previously loaded solution ---*/
-        for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-          for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-            solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->Set_Solution_time_n1(
-                iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
+        for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+          auto solvers = solver[iZone][iInst][iMesh];
+
+          for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++) {
+            solvers[FLOW_SOL]->GetNodes()->Set_Solution_time_n1(
+                iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution_Old(iPoint));
 
             if (grid_IsMoving) {
-              geometry[val_iZone][val_iInst][iMesh]->nodes->SetCoord_n1(
-                  iPoint, geometry[val_iZone][val_iInst][iMesh]->nodes->GetCoord_Old(iPoint));
+              geometries[iMesh]->nodes->SetCoord_n1(iPoint, geometries[iMesh]->nodes->GetCoord_Old(iPoint));
             }
             if (turbulent) {
-              solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->Set_Solution_time_n1(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[TURB_SOL]->GetNodes()->Set_Solution_time_n1(
+                  iPoint, solvers[TURB_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
             if (heat) {
-              solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->Set_Solution_time_n1(
-                  iPoint, solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
+              solvers[HEAT_SOL]->GetNodes()->Set_Solution_time_n1(
+                  iPoint, solvers[HEAT_SOL]->GetNodes()->GetSolution_Old(iPoint));
             }
           }
         }
@@ -243,97 +248,98 @@ void CDiscAdjFluidIteration::Preprocess(COutput* output, CIntegration**** integr
 
     /*--- Compute & set Grid Velocity via finite differences of the Coordinates. ---*/
     if (grid_IsMoving)
-      for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++)
-        geometry[val_iZone][val_iInst][iMesh]->SetGridVelocity(config[val_iZone], TimeIter);
+      for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++)
+        geometries[iMesh]->SetGridVelocity(config[iZone], TimeIter);
 
   }  // if unsteady
 
+  SU2_OMP_PARALLEL_(if(solvers0[ADJFLOW_SOL]->GetHasHybridParallel())) {
+
   /*--- Store flow solution also in the adjoint solver in order to be able to reset it later ---*/
 
   if (TimeIter == 0 || dual_time) {
-    for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][iMesh]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+    for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      auto solvers = solver[iZone][iInst][iMesh];
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[iMesh]->GetnPoint(); iPoint++)
+        solvers[ADJFLOW_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers[FLOW_SOL]->GetNodes()->GetSolution(iPoint));
+      END_SU2_OMP_FOR
     }
-    if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+    if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) {
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++)
+        solvers0[ADJTURB_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[TURB_SOL]->GetNodes()->GetSolution(iPoint));
+      END_SU2_OMP_FOR
     }
     if (heat) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++)
+        solvers0[ADJHEAT_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[HEAT_SOL]->GetNodes()->GetSolution(iPoint));
+      END_SU2_OMP_FOR
     }
-    if (config[val_iZone]->AddRadiation()) {
-      for (iPoint = 0; iPoint < geometry[val_iZone][val_iInst][MESH_0]->GetnPoint(); iPoint++) {
-        solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->GetNodes()->SetSolution_Direct(
-            iPoint, solver[val_iZone][val_iInst][MESH_0][RAD_SOL]->GetNodes()->GetSolution(iPoint));
-      }
+    if (config[iZone]->AddRadiation()) {
+      SU2_OMP_FOR_STAT(1024)
+      for (auto iPoint = 0ul; iPoint < geometries[MESH_0]->GetnPoint(); iPoint++)
+        solvers0[ADJRAD_SOL]->GetNodes()->SetSolution_Direct(iPoint, solvers0[RAD_SOL]->GetNodes()->GetSolution(iPoint));
+      END_SU2_OMP_FOR
     }
   }
 
-  solver[val_iZone][val_iInst][MESH_0][ADJFLOW_SOL]->Preprocessing(
-      geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-      RUNTIME_ADJFLOW_SYS, false);
-  if (turbulent && !config[val_iZone]->GetFrozen_Visc_Disc()) {
-    solver[val_iZone][val_iInst][MESH_0][ADJTURB_SOL]->Preprocessing(
-        geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-        RUNTIME_ADJTURB_SYS, false);
+  solvers0[ADJFLOW_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
+                                       MESH_0, 0, RUNTIME_ADJFLOW_SYS, false);
+
+  if (turbulent && !config[iZone]->GetFrozen_Visc_Disc()) {
+    solvers0[ADJTURB_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
+                                         MESH_0, 0, RUNTIME_ADJTURB_SYS, false);
   }
   if (heat) {
-    solver[val_iZone][val_iInst][MESH_0][ADJHEAT_SOL]->Preprocessing(
-        geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-        RUNTIME_ADJHEAT_SYS, false);
+    solvers0[ADJHEAT_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
+                                         MESH_0, 0, RUNTIME_ADJHEAT_SYS, false);
   }
-  if (config[val_iZone]->AddRadiation()) {
-    solver[val_iZone][val_iInst][MESH_0][ADJRAD_SOL]->Preprocessing(
-        geometry[val_iZone][val_iInst][MESH_0], solver[val_iZone][val_iInst][MESH_0], config[val_iZone], MESH_0, 0,
-        RUNTIME_ADJRAD_SYS, false);
+  if (config[iZone]->AddRadiation()) {
+    solvers0[ADJRAD_SOL]->Preprocessing(geometries[MESH_0], solvers0, config[iZone],
+                                        MESH_0, 0, RUNTIME_ADJRAD_SYS, false);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::LoadUnsteady_Solution(CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                                   unsigned short val_iZone, unsigned short val_iInst,
-                                                   int val_DirectIter) {
+                                                   unsigned short iZone, unsigned short iInst, int DirectIter) {
   unsigned short iMesh;
-  bool heat = config[val_iZone]->GetWeakly_Coupled_Heat();
+  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  auto solvers = solver[iZone][iInst];
+
+  if (DirectIter >= 0) {
+    if (rank == MASTER_NODE && iZone == ZONE_0)
+      cout << " Loading flow solution from direct iteration " << DirectIter << "." << endl;
+
+    solvers[MESH_0][FLOW_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, true);
 
-  if (val_DirectIter >= 0) {
-    if (rank == MASTER_NODE && val_iZone == ZONE_0)
-      cout << " Loading flow solution from direct iteration " << val_DirectIter << "." << endl;
-    solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->LoadRestart(
-        geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, true);
     if (turbulent) {
-      solver[val_iZone][val_iInst][MESH_0][TURB_SOL]->LoadRestart(
-          geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false);
+      solvers[MESH_0][TURB_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false);
     }
     if (heat) {
-      solver[val_iZone][val_iInst][MESH_0][HEAT_SOL]->LoadRestart(
-          geometry[val_iZone][val_iInst], solver[val_iZone][val_iInst], config[val_iZone], val_DirectIter, false);
+      solvers[MESH_0][HEAT_SOL]->LoadRestart(geometry[iZone][iInst], solvers, config[iZone], DirectIter, false);
     }
   } else {
     /*--- If there is no solution file we set the freestream condition ---*/
-    if (rank == MASTER_NODE && val_iZone == ZONE_0)
-      cout << " Setting freestream conditions at direct iteration " << val_DirectIter << "." << endl;
-    for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-      solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->SetFreeStream_Solution(config[val_iZone]);
-      solver[val_iZone][val_iInst][iMesh][FLOW_SOL]->Preprocessing(
-          geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh,
-          val_DirectIter, RUNTIME_FLOW_SYS, false);
+    if (rank == MASTER_NODE && iZone == ZONE_0)
+      cout << " Setting freestream conditions at direct iteration " << DirectIter << "." << endl;
+
+    for (iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      solvers[iMesh][FLOW_SOL]->SetFreeStream_Solution(config[iZone]);
+      solvers[iMesh][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh,
+                                              DirectIter, RUNTIME_FLOW_SYS, false);
       if (turbulent) {
-        solver[val_iZone][val_iInst][iMesh][TURB_SOL]->SetFreeStream_Solution(config[val_iZone]);
-        solver[val_iZone][val_iInst][iMesh][TURB_SOL]->Postprocessing(
-            geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh);
+        solvers[iMesh][TURB_SOL]->SetFreeStream_Solution(config[iZone]);
+        solvers[iMesh][TURB_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh);
       }
       if (heat) {
-        solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->SetFreeStream_Solution(config[val_iZone]);
-        solver[val_iZone][val_iInst][iMesh][HEAT_SOL]->Postprocessing(
-            geometry[val_iZone][val_iInst][iMesh], solver[val_iZone][val_iInst][iMesh], config[val_iZone], iMesh);
+        solvers[iMesh][HEAT_SOL]->SetFreeStream_Solution(config[iZone]);
+        solvers[iMesh][HEAT_SOL]->Postprocessing(geometry[iZone][iInst][iMesh], solvers[iMesh], config[iZone], iMesh);
       }
     }
   }
@@ -343,8 +349,11 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati
                                      CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                      CSurfaceMovement** surface_movement, CVolumetricMovement*** volume_grid_movement,
                                      CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
+  const bool heat = config[iZone]->GetWeakly_Coupled_Heat();
 
   /*--- Extract the adjoints of the conservative input variables and store them for the next iteration ---*/
 
@@ -364,12 +373,17 @@ void CDiscAdjFluidIteration::Iterate(COutput* output, CIntegration**** integrati
 
     solver[iZone][iInst][MESH_0][ADJRAD_SOL]->ExtractAdjoint_Variables(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                                unsigned short iZone, unsigned short iInst) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   /*--- Initialize the adjoints the conservative variables ---*/
 
@@ -381,7 +395,7 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry***
     solver[iZone][iInst][MESH_0][ADJTURB_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
 
-  if (heat) {
+  if (config[iZone]->GetWeakly_Coupled_Heat()) {
     solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->SetAdjoint_Output(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
 
@@ -392,12 +406,17 @@ void CDiscAdjFluidIteration::InitializeAdjoint(CSolver***** solver, CGeometry***
   if (config[iZone]->GetFluidProblem()) {
     solver[iZone][iInst][MESH_0][FLOW_SOL]->SetVertexTractionsAdjoint(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                            unsigned short iZone, unsigned short iInst, unsigned short kind_recording) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   if (kind_recording == SOLUTION_VARIABLES || kind_recording == SOLUTION_AND_MESH) {
     /*--- Register flow and turbulent variables as input ---*/
@@ -411,7 +430,7 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
     if (turbulent && !frozen_visc) {
       solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]);
     }
-    if (heat) {
+    if (config[iZone]->GetWeakly_Coupled_Heat()) {
       solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterSolution(geometry[iZone][iInst][MESH_0], config[iZone]);
     }
     if (config[iZone]->AddRadiation()) {
@@ -435,17 +454,19 @@ void CDiscAdjFluidIteration::RegisterInput(CSolver***** solver, CGeometry**** ge
     /*--- Boundary displacements ---*/
     solver[iZone][iInst][MESH_0][ADJMESH_SOL]->RegisterVariables(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                           unsigned short iZone, unsigned short iInst, unsigned short kind_recording) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
-  /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
 
-  if (solver[iZone][iInst][MESH_0][ADJFEA_SOL]) {
-    solver[iZone][iInst][MESH_0][ADJFEA_SOL]->SetRecording(geometry[iZone][iInst][MESH_0], config[iZone]);
-  }
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
+
+  /*--- Prepare for recording by resetting the solution to the initial converged solution ---*/
 
   for (auto iMesh = 0u; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
     solver[iZone][iInst][iMesh][ADJFLOW_SOL]->SetRecording(geometry[iZone][iInst][iMesh], config[iZone]);
@@ -459,13 +480,16 @@ void CDiscAdjFluidIteration::SetRecording(CSolver***** solver, CGeometry**** geo
   if (config[iZone]->AddRadiation()) {
     solver[iZone][INST_0][MESH_0][ADJRAD_SOL]->SetRecording(geometry[iZone][INST_0][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry**** geometry, CNumerics****** numerics,
                                              CConfig** config, unsigned short iZone, unsigned short iInst,
                                              unsigned short kind_recording) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   if ((kind_recording == MESH_COORDS) || (kind_recording == NONE) || (kind_recording == SOLUTION_AND_MESH)) {
     /*--- Update geometry to get the influence on other geometry variables (normals, volume etc) ---*/
@@ -475,6 +499,8 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
     CGeometry::ComputeWallDistance(config, geometry);
   }
 
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
   /*--- Compute coupling between flow and turbulent equations ---*/
   solver[iZone][iInst][MESH_0][FLOW_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0],
                                                         config[iZone], MESH_0, NO_RK_ITER, RUNTIME_FLOW_SYS, true);
@@ -488,7 +514,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
     solver[iZone][iInst][MESH_0][TURB_SOL]->CompleteComms(geometry[iZone][iInst][MESH_0], config[iZone], SOLUTION);
   }
 
-  if (heat) {
+  }
+  END_SU2_OMP_PARALLEL
+
+  if (config[iZone]->GetWeakly_Coupled_Heat()) {
     solver[iZone][iInst][MESH_0][HEAT_SOL]->Set_Heatflux_Areas(geometry[iZone][iInst][MESH_0], config[iZone]);
     solver[iZone][iInst][MESH_0][HEAT_SOL]->Preprocessing(geometry[iZone][iInst][MESH_0], solver[iZone][iInst][MESH_0],
                                                           config[iZone], MESH_0, NO_RK_ITER, RUNTIME_HEAT_SYS, true);
@@ -507,8 +536,10 @@ void CDiscAdjFluidIteration::SetDependencies(CSolver***** solver, CGeometry****
 
 void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                             COutput* output, unsigned short iZone, unsigned short iInst) {
-  bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
-  bool heat = config[iZone]->GetWeakly_Coupled_Heat();
+
+  SU2_OMP_PARALLEL_(if(solver[iZone][iInst][MESH_0][ADJFLOW_SOL]->GetHasHybridParallel())) {
+
+  const bool frozen_visc = config[iZone]->GetFrozen_Visc_Disc();
 
   /*--- Register conservative variables as output of the iteration ---*/
 
@@ -518,7 +549,7 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g
   if (turbulent && !frozen_visc) {
     solver[iZone][iInst][MESH_0][ADJTURB_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
-  if (heat) {
+  if (config[iZone]->GetWeakly_Coupled_Heat()) {
     solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
   if (config[iZone]->AddRadiation()) {
@@ -527,20 +558,21 @@ void CDiscAdjFluidIteration::RegisterOutput(CSolver***** solver, CGeometry**** g
   if (config[iZone]->GetFluidProblem()) {
     solver[iZone][iInst][MESH_0][FLOW_SOL]->RegisterVertexTractions(geometry[iZone][iInst][MESH_0], config[iZone]);
   }
+
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                     CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                     CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                    CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-  unsigned short iMesh;
-
+                                    CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   /*--- Dual time stepping strategy ---*/
 
-  if ((config[val_iZone]->GetTime_Marching() == DT_STEPPING_1ST) ||
-      (config[val_iZone]->GetTime_Marching() == DT_STEPPING_2ND)) {
-    for (iMesh = 0; iMesh <= config[val_iZone]->GetnMGLevels(); iMesh++) {
-      integration[val_iZone][val_iInst][ADJFLOW_SOL]->SetConvergence(false);
+  if ((config[iZone]->GetTime_Marching() == DT_STEPPING_1ST) ||
+      (config[iZone]->GetTime_Marching() == DT_STEPPING_2ND)) {
+    for (unsigned short iMesh = 0; iMesh <= config[iZone]->GetnMGLevels(); iMesh++) {
+      integration[iZone][iInst][ADJFLOW_SOL]->SetConvergence(false);
     }
   }
 }
@@ -548,21 +580,16 @@ void CDiscAdjFluidIteration::Update(COutput* output, CIntegration**** integratio
 bool CDiscAdjFluidIteration::Monitor(COutput* output, CIntegration**** integration, CGeometry**** geometry,
                                      CSolver***** solver, CNumerics****** numerics, CConfig** config,
                                      CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                     CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
+                                     CFreeFormDefBox*** FFDBox, unsigned short iZone, unsigned short iInst) {
   StopTime = SU2_MPI::Wtime();
 
   UsedTime = StopTime - StartTime;
 
   /*--- Write the convergence history for the fluid (only screen output) ---*/
 
-  output->SetHistory_Output(geometry[val_iZone][INST_0][MESH_0], solver[val_iZone][INST_0][MESH_0], config[val_iZone],
-                            config[val_iZone]->GetTimeIter(), config[val_iZone]->GetOuterIter(),
-                            config[val_iZone]->GetInnerIter());
+  output->SetHistory_Output(geometry[iZone][INST_0][MESH_0], solver[iZone][INST_0][MESH_0], config[iZone],
+                            config[iZone]->GetTimeIter(), config[iZone]->GetOuterIter(),
+                            config[iZone]->GetInnerIter());
 
   return output->GetConvergence();
 }
-void CDiscAdjFluidIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                         CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                         CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                         CFreeFormDefBox*** FFDBox, unsigned short val_iZone,
-                                         unsigned short val_iInst) {}
diff --git a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
index 4258a631a52..5d278eb97f9 100644
--- a/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
+++ b/SU2_CFD/src/iteration/CDiscAdjHeatIteration.cpp
@@ -227,9 +227,8 @@ void CDiscAdjHeatIteration::SetDependencies(CSolver***** solver, CGeometry**** g
 
 void CDiscAdjHeatIteration::RegisterOutput(CSolver***** solver, CGeometry**** geometry, CConfig** config,
                                            COutput* output, unsigned short iZone, unsigned short iInst) {
-  solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
 
-  geometry[iZone][iInst][MESH_0]->RegisterOutput_Coordinates(config[iZone]);
+  solver[iZone][iInst][MESH_0][ADJHEAT_SOL]->RegisterOutput(geometry[iZone][iInst][MESH_0], config[iZone]);
 }
 
 void CDiscAdjHeatIteration::Update(COutput* output, CIntegration**** integration, CGeometry**** geometry,
@@ -258,13 +257,3 @@ bool CDiscAdjHeatIteration::Monitor(COutput* output, CIntegration**** integratio
 
   return output->GetConvergence();
 }
-
-void CDiscAdjHeatIteration::Output(COutput* output, CGeometry**** geometry, CSolver***** solver, CConfig** config,
-                                   unsigned long InnerIter, bool StopCalc, unsigned short val_iZone,
-                                   unsigned short val_iInst) {}
-
-void CDiscAdjHeatIteration::Postprocess(COutput* output, CIntegration**** integration, CGeometry**** geometry,
-                                        CSolver***** solver, CNumerics****** numerics, CConfig** config,
-                                        CSurfaceMovement** surface_movement, CVolumetricMovement*** grid_movement,
-                                        CFreeFormDefBox*** FFDBox, unsigned short val_iZone, unsigned short val_iInst) {
-}
diff --git a/SU2_CFD/src/iteration/CFluidIteration.cpp b/SU2_CFD/src/iteration/CFluidIteration.cpp
index 859a11cee03..478235d9973 100644
--- a/SU2_CFD/src/iteration/CFluidIteration.cpp
+++ b/SU2_CFD/src/iteration/CFluidIteration.cpp
@@ -141,6 +141,7 @@ void CFluidIteration::Iterate(COutput* output, CIntegration**** integration, CGe
     SU2_OMP_PARALLEL
     solver[val_iZone][val_iInst][MESH_0][FLOW_SOL]->AdaptCFLNumber(geometry[val_iZone][val_iInst],
                                                                    solver[val_iZone][val_iInst], config[val_iZone]);
+    END_SU2_OMP_PARALLEL
   }
 
   /*--- Call Dynamic mesh update if AEROELASTIC motion was specified ---*/
diff --git a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp
index 4025f5b9d0d..c1d78ea11a7 100644
--- a/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp
+++ b/SU2_CFD/src/numerics/elasticity/CFEAElasticity.cpp
@@ -333,6 +333,7 @@ void CFEAElasticity::ReadDV(const CConfig *config) {
   bool master_node = false;
   SU2_OMP_MASTER
   master_node = (rank == MASTER_NODE);
+  END_SU2_OMP_MASTER
 
   unsigned long index;
 
diff --git a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp
index ed75ae1e0d4..f9052653abb 100644
--- a/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CFEMDataSorter.cpp
@@ -66,7 +66,7 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
   /*--- Create a linear partition --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
   /*--- Prepare the send buffers ---*/
 
@@ -74,17 +74,6 @@ CFEMDataSorter::CFEMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
 }
 
-CFEMDataSorter::~CFEMDataSorter(){
-
-        delete [] Index;
-       delete [] idSend;
-  delete linearPartitioner;
-
-}
-
-
-
-
 void CFEMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) {
 
   /*--- Sort connectivity for each type of element (excluding halos). Note
diff --git a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
index 768438a9b39..c193c98c6a6 100644
--- a/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CFVMDataSorter.cpp
@@ -39,7 +39,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
   nGlobalPointBeforeSort = geometry->GetGlobal_nPointDomain();
   nLocalPointsBeforeSort  = geometry->GetnPointDomain();
 
-  Local_Halo = new int[geometry->GetnPoint()]();
+  Local_Halo.resize(geometry->GetnPoint());
 
   for (unsigned long iPoint = 0; iPoint < geometry->GetnPoint(); iPoint++){
 
@@ -60,7 +60,7 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
   /*--- Create the linear partitioner --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
   /*--- Prepare the send buffers ---*/
 
@@ -68,16 +68,6 @@ CFVMDataSorter::CFVMDataSorter(CConfig *config, CGeometry *geometry, const vecto
 
 }
 
-CFVMDataSorter::~CFVMDataSorter(){
-
-  delete [] Local_Halo;
-
-        delete [] Index;
-       delete [] idSend;
-  delete linearPartitioner;
-
-}
-
 void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){
 
   unsigned long iPoint, iVertex;
@@ -107,9 +97,6 @@ void CFVMDataSorter::SetHaloPoints(CGeometry *geometry, CConfig *config){
   }
 }
 
-
-
-
 void CFVMDataSorter::SortConnectivity(CConfig *config, CGeometry *geometry, bool val_sort) {
 
   /*--- Sort connectivity for each type of element (excluding halos). Note
@@ -218,7 +205,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
          own elements into the connectivity data structure. ---*/
 
         if (val_sort) {
-          iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+          iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
         } else {
           iProcessor = rank;
         }
@@ -262,14 +249,11 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
   /*--- Allocate memory to hold the connectivity that we are
    sending. ---*/
 
-  unsigned long *connSend = nullptr;
-  connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]]();
+  auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]]();
 
   /*--- Allocate arrays for storing halo flags. ---*/
 
-  unsigned short *haloSend = new unsigned short[nElem_Send[size]]();
-  for (int ii = 0; ii < nElem_Send[size]; ii++)
-    haloSend[ii] = false;
+  auto haloSend = new unsigned short[nElem_Send[size]]();
 
   /*--- Create an index variable to keep track of our index
    position as we load up the send buffer. ---*/
@@ -308,7 +292,7 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
          own elements into the connectivity data structure. ---*/
 
         if (val_sort) {
-          iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+          iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
         } else {
           iProcessor = rank;
         }
@@ -356,10 +340,9 @@ void CFVMDataSorter::SortVolumetricConnectivity(CConfig *config,
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *connRecv = nullptr;
-  connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]]();
+  auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Cum[size]]();
 
-  unsigned short *haloRecv = new unsigned short[nElem_Cum[size]]();
+  auto haloRecv = new unsigned short[nElem_Cum[size]]();
 
 #ifdef HAVE_MPI
 
diff --git a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
index d65e55317ce..e600566155f 100644
--- a/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CParallelDataSorter.cpp
@@ -29,24 +29,12 @@
 #include <cassert>
 #include <numeric>
 
-
-const map<unsigned short, unsigned short> CParallelDataSorter::TypeMap = {
-  {LINE, 0},
-  {TRIANGLE, 1},
-  {QUADRILATERAL, 2},
-  {TETRAHEDRON, 3},
-  {HEXAHEDRON, 4},
-  {PRISM, 5},
-  {PYRAMID, 6}
-};
-
 CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &valFieldNames) :
-  fieldNames(std::move(valFieldNames)){
+  rank(SU2_MPI::GetRank()),
+  size(SU2_MPI::GetSize()),
+  fieldNames(std::move(valFieldNames)) {
 
-  rank = SU2_MPI::GetRank();
-  size = SU2_MPI::GetSize();
-
-  GlobalField_Counter = this->fieldNames.size();
+  GlobalField_Counter = fieldNames.size();
 
   Conn_Line_Par = nullptr;
   Conn_Hexa_Par = nullptr;
@@ -56,13 +44,9 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &
   Conn_Tria_Par = nullptr;
   Conn_Pyra_Par = nullptr;
 
-  nPoint_Send  = nullptr;
-  nPoint_Recv  = nullptr;
   Index        = nullptr;
   connSend     = nullptr;
   dataBuffer   = nullptr;
-  passiveDoubleBuffer = nullptr;
-  doubleBuffer = nullptr;
   idSend       = nullptr;
   nSends = 0;
   nRecvs = 0;
@@ -77,8 +61,6 @@ CParallelDataSorter::CParallelDataSorter(CConfig *config, const vector<string> &
   nElemConn_Send = new int[size+1]();
   nElemConn_Cum = new int[size+1]();
 
-  linearPartitioner = nullptr;
-
   nElemPerType.fill(0);
   nElemPerTypeGlobal.fill(0);
 
@@ -104,34 +86,32 @@ CParallelDataSorter::~CParallelDataSorter(){
   delete [] Conn_Pyra_Par;
 
   delete [] connSend;
-
   delete [] dataBuffer;
+  delete [] Index;
+  delete [] idSend;
+
 }
 
 void CParallelDataSorter::SortOutputData() {
 
-  int VARS_PER_POINT = GlobalField_Counter;
-
-#ifdef HAVE_MPI
-  SU2_MPI::Request *send_req, *recv_req;
-  SU2_MPI::Status status;
-  int ind;
-#endif
+  const int VARS_PER_POINT = GlobalField_Counter;
 
   /*--- Allocate the memory that we need for receiving the conn
    values and then cue up the non-blocking receives. Note that
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-
-  unsigned long *idRecv = new unsigned long[nPoint_Recv[size]]();
+  vector<unsigned long> idRecv(nPoint_Recv[size], 0);
 
 #ifdef HAVE_MPI
-  /*--- We need double the number of messages to send both the conn.
-   and the global IDs. ---*/
+  /*--- NOTE: This function calls MPI routines directly, instead of via SU2_MPI::,
+   * because it communicates passivedoubles and not AD types. This avoids some
+   * creative C++ to communicate AD types and then convert to passive. ---*/
+
+  /*--- We need double the number of messages to send both the conn. and the global IDs. ---*/
 
-  send_req = new SU2_MPI::Request[2*nSends];
-  recv_req = new SU2_MPI::Request[2*nRecvs];
+  auto send_req = new MPI_Request[2*nSends];
+  auto recv_req = new MPI_Request[2*nRecvs];
 
   unsigned long iMessage = 0;
   for (int ii=0; ii<size; ii++) {
@@ -141,8 +121,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = VARS_PER_POINT*kk;
       int source = ii;
       int tag    = ii + 1;
-      SU2_MPI::Irecv(&(doubleBuffer[ll]), count, MPI_DOUBLE, source, tag,
-                     SU2_MPI::GetComm(), &(recv_req[iMessage]));
+      MPI_Irecv(&(dataBuffer[ll]), count, MPI_DOUBLE, source, tag,
+                SU2_MPI::GetComm(), &(recv_req[iMessage]));
       iMessage++;
     }
   }
@@ -155,10 +135,10 @@ void CParallelDataSorter::SortOutputData() {
       int ll = VARS_PER_POINT*nPoint_Send[ii];
       int kk = nPoint_Send[ii+1] - nPoint_Send[ii];
       int count  = VARS_PER_POINT*kk;
-      int dest = ii;
+      int dest   = ii;
       int tag    = rank + 1;
-      SU2_MPI::Isend(&(connSend[ll]), count, MPI_DOUBLE, dest, tag,
-                     SU2_MPI::GetComm(), &(send_req[iMessage]));
+      MPI_Isend(&(connSend[ll]), count, MPI_DOUBLE, dest, tag,
+                SU2_MPI::GetComm(), &(send_req[iMessage]));
       iMessage++;
     }
   }
@@ -173,8 +153,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = kk;
       int source = ii;
       int tag    = ii + 1;
-      SU2_MPI::Irecv(&(idRecv[ll]), count, MPI_UNSIGNED_LONG, source, tag,
-                     SU2_MPI::GetComm(), &(recv_req[iMessage+nRecvs]));
+      MPI_Irecv(&(idRecv[ll]), count, MPI_UNSIGNED_LONG, source, tag,
+                SU2_MPI::GetComm(), &(recv_req[iMessage+nRecvs]));
       iMessage++;
     }
   }
@@ -189,8 +169,8 @@ void CParallelDataSorter::SortOutputData() {
       int count  = kk;
       int dest   = ii;
       int tag    = rank + 1;
-      SU2_MPI::Isend(&(idSend[ll]), count, MPI_UNSIGNED_LONG, dest, tag,
-                     SU2_MPI::GetComm(), &(send_req[iMessage+nSends]));
+      MPI_Isend(&(idSend[ll]), count, MPI_UNSIGNED_LONG, dest, tag,
+                SU2_MPI::GetComm(), &(send_req[iMessage+nSends]));
       iMessage++;
     }
   }
@@ -202,7 +182,7 @@ void CParallelDataSorter::SortOutputData() {
   int ll = VARS_PER_POINT*nPoint_Send[rank];
   int kk = VARS_PER_POINT*nPoint_Send[rank+1];
 
-  for (int nn=ll; nn<kk; nn++, mm++) doubleBuffer[mm] = connSend[nn];
+  for (int nn=ll; nn<kk; nn++, mm++) dataBuffer[mm] = connSend[nn];
 
   mm = nPoint_Recv[rank];
   ll = nPoint_Send[rank];
@@ -213,47 +193,34 @@ void CParallelDataSorter::SortOutputData() {
   /*--- Wait for the non-blocking sends and recvs to complete. ---*/
 
 #ifdef HAVE_MPI
+  MPI_Status status;
+  int ind;
+
   int number = 2*nSends;
   for (int ii = 0; ii < number; ii++)
-    SU2_MPI::Waitany(number, send_req, &ind, &status);
+    MPI_Waitany(number, send_req, &ind, &status);
 
   number = 2*nRecvs;
   for (int ii = 0; ii < number; ii++)
-    SU2_MPI::Waitany(number, recv_req, &ind, &status);
+    MPI_Waitany(number, recv_req, &ind, &status);
 
   delete [] send_req;
   delete [] recv_req;
 #endif
 
-  /*--- Note, passiveDoubleBuffer and doubleBuffer point to the same address.
-   * This is the reason why we have to do the following copy/reordering in two steps. ---*/
+  /*--- Reorder the data in the buffer. ---*/
 
-  /*--- Step 1: Extract the underlying double value --- */
+  vector<passivedouble> tmpBuffer(nPoint_Recv[size]);
 
-  if (!std::is_same<su2double, passivedouble>::value){
-    for (int jj = 0; jj < VARS_PER_POINT*nPoint_Recv[size]; jj++){
-      const passivedouble tmpVal = SU2_TYPE::GetValue(doubleBuffer[jj]);
-      passiveDoubleBuffer[jj] = tmpVal;
-      /*--- For some AD datatypes a call of the destructor is
-       *  necessary to properly delete the AD type ---*/
-      doubleBuffer[jj].~su2double();
-    }
-  }
-
-  /*--- Step 2: Reorder the data in the buffer --- */
-
-  passivedouble *tmpBuffer = new passivedouble[nPoint_Recv[size]];
   for (int jj = 0; jj < VARS_PER_POINT; jj++){
     for (int ii = 0; ii < nPoint_Recv[size]; ii++){
-      tmpBuffer[idRecv[ii]] = passiveDoubleBuffer[ii*VARS_PER_POINT+jj];
+      tmpBuffer[idRecv[ii]] = dataBuffer[ii*VARS_PER_POINT+jj];
     }
     for (int ii = 0; ii < nPoint_Recv[size]; ii++){
-      passiveDoubleBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii];
+      dataBuffer[ii*VARS_PER_POINT+jj] = tmpBuffer[ii];
     }
   }
 
-  delete [] tmpBuffer;
-
   /*--- Store the total number of local points my rank has for
    the current section after completing the communications. ---*/
 
@@ -261,12 +228,8 @@ void CParallelDataSorter::SortOutputData() {
 
   /*--- Reduce the total number of points we will write in the output files. ---*/
 
-  SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1,
-                     MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
+  SU2_MPI::Allreduce(&nPoints, &nPointsGlobal, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
 
-  /*--- Free temporary memory from communications ---*/
-
-  delete [] idRecv;
 }
 
 void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalID){
@@ -285,7 +248,7 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
 
   for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++ ) {
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]);
+    iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]);
 
     /*--- If we have not visited this node yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -318,18 +281,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
   /*--- Allocate memory to hold the connectivity that we are
    sending. ---*/
 
-  connSend = nullptr;
-  connSend = new su2double[VARS_PER_POINT*nPoint_Send[size]]();
+  connSend = new passivedouble[VARS_PER_POINT*nPoint_Send[size]] ();
 
   /*--- Allocate the data buffer to hold the sorted data. We have to make it large enough
    * to hold passivedoubles and su2doubles ---*/
-  unsigned short maxSize = max(sizeof(passivedouble), sizeof(su2double));
-  dataBuffer = new char[VARS_PER_POINT*nPoint_Recv[size]*maxSize] {};
-
-  /*--- doubleBuffer and passiveDouble buffer use the same memory allocated above using the dataBuffer. ---*/
 
-  doubleBuffer = reinterpret_cast<su2double*>(dataBuffer);
-  passiveDoubleBuffer = reinterpret_cast<passivedouble*>(dataBuffer);
+  dataBuffer = new passivedouble[VARS_PER_POINT*nPoint_Recv[size]] ();
 
   /*--- Allocate arrays for sending the global ID. ---*/
 
@@ -338,11 +295,12 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
   /*--- Create an index variable to keep track of our index
    positions as we load up the send buffer. ---*/
 
-  unsigned long *index = new unsigned long[size]();
-  for (int ii=0; ii < size; ii++) index[ii] = VARS_PER_POINT*nPoint_Send[ii];
+  vector<unsigned long> index(size), idIndex(size);
 
-  unsigned long *idIndex = new unsigned long[size]();
-  for (int ii=0; ii < size; ii++) idIndex[ii] = nPoint_Send[ii];
+  for (int ii=0; ii < size; ii++) {
+    index[ii] = VARS_PER_POINT*nPoint_Send[ii];
+    idIndex[ii] = nPoint_Send[ii];
+  }
 
   Index = new unsigned long[nLocalPointsBeforeSort]();
 
@@ -351,13 +309,13 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
 
   for (iPoint = 0; iPoint < nLocalPointsBeforeSort; iPoint++) {
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(globalID[iPoint]);
+    iProcessor = linearPartitioner.GetRankContainingIndex(globalID[iPoint]);
 
     /*--- Load the global ID (minus offset) for sorting the
          points once they all reach the correct processor. ---*/
 
     unsigned long nn = idIndex[iProcessor];
-    idSend[nn] = globalID[iPoint] - linearPartitioner->GetFirstIndexOnRank(iProcessor);
+    idSend[nn] = globalID[iPoint] - linearPartitioner.GetFirstIndexOnRank(iProcessor);
 
     /*--- Store the index this point has in the send buffer ---*/
 
@@ -368,13 +326,8 @@ void CParallelDataSorter::PrepareSendBuffers(std::vector<unsigned long>& globalI
     index[iProcessor]  += VARS_PER_POINT;
     idIndex[iProcessor]++;
 
-
   }
 
-  /*--- Free memory after loading up the send buffer. ---*/
-
-  delete [] index;
-  delete [] idIndex;
 }
 
 unsigned long CParallelDataSorter::GetElem_Connectivity(GEO_TYPE type, unsigned long iElem, unsigned long iNode) const {
@@ -463,6 +416,4 @@ void CParallelDataSorter::SetTotalElements(){
     nElemConn_Cum[ii+1] += nElemConn_Cum[ii];
   }
 
-
 }
-
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
index 99c314f2e5c..9a7bc400418 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFEMDataSorter.cpp
@@ -29,12 +29,12 @@
 #include "../../../../Common/include/fem/fem_geometry_structure.hpp"
 #include <numeric>
 
-CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, CFEMDataSorter* valVolumeSorter) :
+CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometry, const CFEMDataSorter* valVolumeSorter) :
   CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){
 
   nDim = geometry->GetnDim();
 
-  this->volumeSorter = valVolumeSorter;
+  volumeSorter = valVolumeSorter;
 
   connectivitySorted = false;
 
@@ -62,19 +62,10 @@ CSurfaceFEMDataSorter::CSurfaceFEMDataSorter(CConfig *config, CGeometry *geometr
 
   /*--- Create the linear partitioner --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
 }
 
-CSurfaceFEMDataSorter::~CSurfaceFEMDataSorter(){
-
-  delete linearPartitioner;
-  delete [] passiveDoubleBuffer;
-
-}
-
-
-
 void CSurfaceFEMDataSorter::SortOutputData() {
 
   if (!connectivitySorted){
@@ -136,7 +127,7 @@ void CSurfaceFEMDataSorter::SortOutputData() {
   for(unsigned long i=0; i<globalSurfaceDOFIDs.size(); ++i) {
 
     /* Search for the processor that owns this point. */
-    unsigned long iProcessor = linearPartitioner->GetRankContainingIndex(globalSurfaceDOFIDs[i]);
+    unsigned long iProcessor = linearPartitioner.GetRankContainingIndex(globalSurfaceDOFIDs[i]);
 
     /* Store the global ID in the send buffer for iProcessor. */
     sendBuf[iProcessor].push_back(globalSurfaceDOFIDs[i]);
@@ -220,19 +211,16 @@ void CSurfaceFEMDataSorter::SortOutputData() {
   /* Allocate the memory for Parallel_Surf_Data. */
   nPoints = globalSurfaceDOFIDs.size();
 
-
-    delete [] passiveDoubleBuffer;
-
-
-  passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT];
+  delete [] dataBuffer;
+  dataBuffer = new passivedouble[nPoints*VARS_PER_POINT];
 
   /* Determine the local index of the global surface DOFs and
      copy the data into Parallel_Surf_Data. */
   for(unsigned long i=0; i<nPoints; ++i) {
-    const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner->GetCumulativeSizeBeforeRank(rank);
+    const unsigned long ii = globalSurfaceDOFIDs[i] - linearPartitioner.GetCumulativeSizeBeforeRank(rank);
 
     for(int jj=0; jj<VARS_PER_POINT; jj++)
-      passiveDoubleBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii);
+      dataBuffer[i*VARS_PER_POINT+jj] = volumeSorter->GetData(jj,ii);
   }
 
   /*--- Reduce the total number of surf points we have. This will be
diff --git a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
index a4dc31c32a5..f9b36ab1648 100644
--- a/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
+++ b/SU2_CFD/src/output/filewriter/CSurfaceFVMDataSorter.cpp
@@ -29,12 +29,12 @@
 #include "../../../../Common/include/geometry/CGeometry.hpp"
 #include <numeric>
 
-CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, CFVMDataSorter* valVolumeSorter) :
+CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometry, const CFVMDataSorter* valVolumeSorter) :
   CParallelDataSorter(config, valVolumeSorter->GetFieldNames()){
 
   nDim = geometry->GetnDim();
 
-  this->volumeSorter = valVolumeSorter;
+  volumeSorter = valVolumeSorter;
 
   connectivitySorted = false;
 
@@ -43,14 +43,7 @@ CSurfaceFVMDataSorter::CSurfaceFVMDataSorter(CConfig *config, CGeometry *geometr
 
   /*--- Create the linear partitioner --- */
 
-  linearPartitioner = new CLinearPartitioner(nGlobalPointBeforeSort, 0);
-
-}
-
-CSurfaceFVMDataSorter::~CSurfaceFVMDataSorter(){
-
-  delete linearPartitioner;
-  delete [] passiveDoubleBuffer;
+  linearPartitioner.Initialize(nGlobalPointBeforeSort, 0);
 
 }
 
@@ -101,7 +94,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- If we have not visited this element yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -129,7 +122,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- If we have not visited this element yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -157,7 +150,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- If we have not visited this element yet, increment our
        number of elements that must be sent to a particular proc. ---*/
@@ -216,7 +209,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Load global ID into the buffer for sending ---*/
 
@@ -250,7 +243,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Load global ID into the buffer for sending ---*/
 
@@ -284,7 +277,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Load global ID into the buffer for sending ---*/
 
@@ -438,17 +431,14 @@ void CSurfaceFVMDataSorter::SortOutputData() {
    we can allocate the new data structure to hold these points alone. Here,
    we also copy the data for those points from our volume data structure. ---*/
 
-
-    delete [] passiveDoubleBuffer;
-
-
-  passiveDoubleBuffer = new passivedouble[nPoints*VARS_PER_POINT];
+  delete [] dataBuffer;
+  dataBuffer = new passivedouble[nPoints*VARS_PER_POINT];
 
   for (int jj = 0; jj < VARS_PER_POINT; jj++) {
     count = 0;
     for (int ii = 0; ii < (int)volumeSorter->GetnPoints(); ii++) {
       if (surfPoint[ii] !=-1) {
-        passiveDoubleBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii);
+        dataBuffer[count*VARS_PER_POINT + jj] = volumeSorter->GetData(jj,ii);
         count++;
       }
     }
@@ -507,7 +497,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     /*--- If we have not visited this element yet, increment our
      number of elements that must be sent to a particular proc. ---*/
@@ -545,14 +535,12 @@ void CSurfaceFVMDataSorter::SortOutputData() {
   /*--- Allocate memory to hold the globals that we are
    sending. ---*/
 
-  unsigned long *globalSend = nullptr;
-  globalSend = new unsigned long[nElem_Send[size]]();
+  auto globalSend = new unsigned long[nElem_Send[size]]();
 
   /*--- Allocate memory to hold the renumbering that we are
    sending. ---*/
 
-  unsigned long *renumbSend = nullptr;
-  renumbSend = new unsigned long[nElem_Send[size]]();
+  auto renumbSend = new unsigned long[nElem_Send[size]]();
 
   /*--- Create an index variable to keep track of our index
    position as we load up the send buffer. ---*/
@@ -569,7 +557,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     if (nElem_Flag[iProcessor] != ii) {
 
@@ -595,11 +583,8 @@ void CSurfaceFVMDataSorter::SortOutputData() {
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *globalRecv = nullptr;
-  globalRecv = new unsigned long[nElem_Recv[size]]();
-
-  unsigned long *renumbRecv = nullptr;
-  renumbRecv = new unsigned long[nElem_Recv[size]]();
+  auto globalRecv = new unsigned long[nElem_Recv[size]]();
+  auto renumbRecv = new unsigned long[nElem_Recv[size]]();
 
 #ifdef HAVE_MPI
   /*--- We need double the number of messages to send both the conn.
@@ -731,7 +716,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Store the global ID if it is outside our own linear partition. ---*/
 
@@ -752,7 +737,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Store the global ID if it is outside our own linear partition. ---*/
 
@@ -773,7 +758,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
       /*--- Search for the processor that owns this point ---*/
 
-      iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+      iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
       /*--- Store the global ID if it is outside our own linear partition. ---*/
 
@@ -808,7 +793,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     /*--- If we have not visited this element yet, increment our
      number of elements that must be sent to a particular proc. ---*/
@@ -860,7 +845,7 @@ void CSurfaceFVMDataSorter::SortOutputData() {
 
     /*--- Search for the processor that owns this point ---*/
 
-    iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+    iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
     /*--- If we have not visited this element yet, increment our
      number of elements that must be sent to a particular proc. ---*/
@@ -1205,7 +1190,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
 
             /*--- Search for the processor that owns this point ---*/
 
-            iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+            iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
             /*--- If we have not visited this element yet, increment our
              number of elements that must be sent to a particular proc. ---*/
@@ -1247,16 +1232,11 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
   /*--- Allocate memory to hold the connectivity that we are
    sending. ---*/
 
-  unsigned long *connSend = nullptr;
-  connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]];
-  for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Send[size]; ii++)
-    connSend[ii] = 0;
+  auto connSend = new unsigned long[NODES_PER_ELEMENT*nElem_Send[size]] ();
 
   /*--- Allocate arrays for storing halo flags. ---*/
 
-  unsigned short *haloSend = new unsigned short[nElem_Send[size]];
-  for (int ii = 0; ii < nElem_Send[size]; ii++)
-    haloSend[ii] = false;
+  auto haloSend = new unsigned short[nElem_Send[size]] ();
 
   /*--- Create an index variable to keep track of our index
    position as we load up the send buffer. ---*/
@@ -1300,7 +1280,7 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
 
             /*--- Search for the processor that owns this point ---*/
 
-            iProcessor = linearPartitioner->GetRankContainingIndex(Global_Index);
+            iProcessor = linearPartitioner.GetRankContainingIndex(Global_Index);
 
             /*--- Load connectivity into the buffer for sending ---*/
 
@@ -1346,14 +1326,9 @@ void CSurfaceFVMDataSorter::SortSurfaceConnectivity(CConfig *config, CGeometry *
    we do not include our own rank in the communications. We will
    directly copy our own data later. ---*/
 
-  unsigned long *connRecv = nullptr;
-  connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]];
-  for (int ii = 0; ii < NODES_PER_ELEMENT*nElem_Recv[size]; ii++)
-    connRecv[ii] = 0;
+  auto connRecv = new unsigned long[NODES_PER_ELEMENT*nElem_Recv[size]] ();
 
-  unsigned short *haloRecv = new unsigned short[nElem_Recv[size]];
-  for (int ii = 0; ii < nElem_Recv[size]; ii++)
-    haloRecv[ii] = false;
+  auto haloRecv = new unsigned short[nElem_Recv[size]] ();
 
 #ifdef HAVE_MPI
   /*--- We need double the number of messages to send both the conn.
diff --git a/SU2_CFD/src/python_wrapper_structure.cpp b/SU2_CFD/src/python_wrapper_structure.cpp
index 6125d80966e..62cfc4a23bd 100644
--- a/SU2_CFD/src/python_wrapper_structure.cpp
+++ b/SU2_CFD/src/python_wrapper_structure.cpp
@@ -602,6 +602,7 @@ void CSinglezoneDriver::SetInitialMesh() {
         /*--- Set the grid velocity for this coarse node. ---*/
         geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetGridVel(iPoint, Grid_Vel);
       }
+      END_SU2_OMP_FOR
       /*--- Push back the volume. ---*/
       geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_n();
       geometry_container[ZONE_0][INST_0][iMesh]->nodes->SetVolume_nM1();
@@ -610,6 +611,7 @@ void CSinglezoneDriver::SetInitialMesh() {
     solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n();
     solver_container[ZONE_0][INST_0][MESH_0][MESH_SOL]->GetNodes()->Set_Solution_time_n1();
   }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDriver::BoundaryConditionsUpdate(){
diff --git a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
index 287e1c50eaa..9b377b54751 100644
--- a/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjFEASolver.cpp
@@ -25,26 +25,17 @@
  * License along with SU2. If not, see <http://www.gnu.org/licenses/>.
  */
 
-
 #include "../../include/solvers/CDiscAdjFEASolver.hpp"
 #include "../../include/variables/CDiscAdjFEAVariable.hpp"
 
-CDiscAdjFEASolver::CDiscAdjFEASolver(void) : CSolver() { }
-
-CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config)  : CSolver() { }
-
 CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver,
                                      unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
 
   adjoint = true;
 
-  unsigned short iVar, iMarker;
-
   unsigned long iPoint;
-  string text_line, mesh_filename;
-  string filename, AdjExt;
 
-  bool dynamic = (config->GetTime_Domain());
+  const bool dynamic = (config->GetTime_Domain());
 
   nVar = direct_solver->GetnVar();
   nDim = geometry->GetnDim();
@@ -59,8 +50,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
 
   /*--- Define some auxiliary vectors related to the residual ---*/
 
-  Residual      = new su2double[nVar];         for (iVar = 0; iVar < nVar; iVar++) Residual[iVar]      = 1.0;
-
   Residual_RMS.resize(nVar,1.0);
   Residual_Max.resize(nVar,1.0);
   Point_Max.resize(nVar,0);
@@ -76,32 +65,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0);
   }
 
-  /*--- Define some auxiliary vectors related to the solution ---*/
+  /*--- Initialize the adjoint solution. ---*/
 
-  Solution = new su2double[nVar];
-  for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16;
-
-  if (dynamic) {
-    Solution_Vel    = new su2double[nVar];
-    Solution_Accel  = new su2double[nVar];
-
-    for (iVar = 0; iVar < nVar; iVar++) Solution_Vel[iVar]      = 1e-16;
-    for (iVar = 0; iVar < nVar; iVar++) Solution_Accel[iVar]    = 1e-16;
-  }
-
-  /*--- Sensitivity definition and coefficient in all the markers ---*/
-
-  CSensitivity = new su2double* [nMarker];
-
-  for (iMarker = 0; iMarker < nMarker; iMarker++) {
-    CSensitivity[iMarker] = new su2double [geometry->nVertex[iMarker]]();
-  }
-
-  Sens_E  = new su2double[nMarker]();
-  Sens_Nu = new su2double[nMarker]();
-  Sens_nL = new su2double[nMarker]();
-
-  nodes = new CDiscAdjFEABoundVariable(Solution, Solution_Accel, Solution_Vel, nPoint, nDim, nVar, dynamic, config);
+  vector<su2double> init(nVar,1e-16);
+  nodes = new CDiscAdjFEABoundVariable(init.data(), init.data(), init.data(), nPoint, nDim, nVar, dynamic, config);
   SetBaseClassPointerToNodes();
 
   /*--- Set which points are vertices and allocate boundary data. ---*/
@@ -116,23 +83,6 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     }
   nodes->AllocateBoundaryVariables(config);
 
-
-  /*--- Store the direct solution ---*/
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    nodes->SetSolution_Direct(iPoint, direct_solver->GetNodes()->GetSolution(iPoint));
-  }
-
-  if (dynamic){
-    for (iPoint = 0; iPoint < nPoint; iPoint++){
-      nodes->SetSolution_Accel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Accel(iPoint));
-    }
-
-    for (iPoint = 0; iPoint < nPoint; iPoint++){
-      nodes->SetSolution_Vel_Direct(iPoint, direct_solver->GetNodes()->GetSolution_Vel(iPoint));
-    }
-  }
-
   /*--- Initialize vector structures for multiple material definition ---*/
 
   nMPROP = config->GetnElasticityMod();
@@ -146,29 +96,10 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
     SU2_MPI::Error("WARNING: For a material to be fully defined, E, Nu and Rho need to have the same dimensions.", CURRENT_FUNCTION);
   }
 
-  E_i           = new su2double[nMPROP]();
-  Local_Sens_E  = new su2double[nMPROP]();
-  Global_Sens_E = new su2double[nMPROP]();
-  Total_Sens_E  = new su2double[nMPROP]();
-  AD_Idx_E_i    = new int[nMPROP]();
-
-  Nu_i           = new su2double[nMPROP]();
-  Local_Sens_Nu  = new su2double[nMPROP]();
-  Global_Sens_Nu = new su2double[nMPROP]();
-  Total_Sens_Nu  = new su2double[nMPROP]();
-  AD_Idx_Nu_i    = new int[nMPROP]();
-
-  Rho_i           = new su2double[nMPROP](); // For inertial effects
-  Local_Sens_Rho  = new su2double[nMPROP]();
-  Global_Sens_Rho = new su2double[nMPROP]();
-  Total_Sens_Rho  = new su2double[nMPROP]();
-  AD_Idx_Rho_i    = new int[nMPROP]();
-
-  Rho_DL_i           = new su2double[nMPROP](); // For dead loads
-  Local_Sens_Rho_DL  = new su2double[nMPROP]();
-  Global_Sens_Rho_DL = new su2double[nMPROP]();
-  Total_Sens_Rho_DL  = new su2double[nMPROP]();
-  AD_Idx_Rho_DL_i    = new int[nMPROP]();
+  E.resize(nMPROP);
+  Nu.resize(nMPROP);
+  Rho.resize(nMPROP);
+  Rho_DL.resize(nMPROP);
 
   /*--- Initialize vector structures for multiple electric regions ---*/
 
@@ -176,12 +107,7 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
 
   if (de_effects) {
     nEField = config->GetnElectric_Field();
-
-    EField             = new su2double[nEField]();
-    Local_Sens_EField  = new su2double[nEField]();
-    Global_Sens_EField = new su2double[nEField]();
-    Total_Sens_EField  = new su2double[nEField]();
-    AD_Idx_EField      = new int[nEField]();
+    EField.resize(nEField);
   }
 
   /*--- Initialize vector structures for structural-based design variables ---*/
@@ -199,80 +125,14 @@ CDiscAdjFEASolver::CDiscAdjFEASolver(CGeometry *geometry, CConfig *config, CSolv
       break;
   }
 
-  if (fea_dv) {
-    ReadDV(config);
-    Local_Sens_DV  = new su2double[nDV]();
-    Global_Sens_DV = new su2double[nDV]();
-    Total_Sens_DV  = new su2double[nDV]();
-    AD_Idx_DV_Val  = new int[nDV]();
-  }
+  if (fea_dv) ReadDV(config);
 
 }
 
-CDiscAdjFEASolver::~CDiscAdjFEASolver(void){
-
-  unsigned short iMarker;
-
-  if (CSensitivity != nullptr) {
-    for (iMarker = 0; iMarker < nMarker; iMarker++) {
-      delete [] CSensitivity[iMarker];
-    }
-    delete [] CSensitivity;
-  }
-
-  delete [] E_i;
-  delete [] Nu_i;
-  delete [] Rho_i;
-  delete [] Rho_DL_i;
-
-  delete [] AD_Idx_E_i;
-  delete [] AD_Idx_Nu_i;
-  delete [] AD_Idx_Rho_i;
-  delete [] AD_Idx_Rho_DL_i;
-
-  delete [] Local_Sens_E;
-  delete [] Local_Sens_Nu;
-  delete [] Local_Sens_Rho;
-  delete [] Local_Sens_Rho_DL;
-
-  delete [] Global_Sens_E;
-  delete [] Global_Sens_Nu;
-  delete [] Global_Sens_Rho;
-  delete [] Global_Sens_Rho_DL;
-
-  delete [] Total_Sens_E;
-  delete [] Total_Sens_Nu;
-  delete [] Total_Sens_Rho;
-  delete [] Total_Sens_Rho_DL;
-
-  delete [] normalLoads;
-  delete [] Sens_E;
-  delete [] Sens_Nu;
-  delete [] Sens_nL;
-
-  delete [] EField;
-  delete [] Local_Sens_EField;
-  delete [] Global_Sens_EField;
-  delete [] Total_Sens_EField;
-  delete [] AD_Idx_EField;
-
-  delete [] DV_Val;
-  delete [] Local_Sens_DV;
-  delete [] Global_Sens_DV;
-  delete [] Total_Sens_DV;
-  delete [] AD_Idx_DV_Val;
-
-  delete [] Solution_Vel;
-  delete [] Solution_Accel;
-
-  delete nodes;
-}
+CDiscAdjFEASolver::~CDiscAdjFEASolver() { delete nodes; }
 
 void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){
 
-
-  bool dynamic (config->GetTime_Domain());
-
   unsigned long iPoint;
   unsigned short iVar;
 
@@ -282,7 +142,7 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){
     direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint));
   }
 
-  if (dynamic){
+  if (config->GetTime_Domain()){
     /*--- Reset the solution to the initial (converged) solution ---*/
 
     for (iPoint = 0; iPoint < nPoint; iPoint++){
@@ -326,9 +186,9 @@ void CDiscAdjFEASolver::SetRecording(CGeometry* geometry, CConfig *config){
 
 void CDiscAdjFEASolver::RegisterSolution(CGeometry *geometry, CConfig *config){
 
-  bool input = true;
-  bool dynamic = config->GetTime_Domain();
-  bool push_index = !config->GetMultizone_Problem();
+  const bool input = true;
+  const bool dynamic = config->GetTime_Domain();
+  const bool push_index = !config->GetMultizone_Problem();
 
   /*--- Register solution at all necessary time instances and other variables on the tape ---*/
 
@@ -359,13 +219,13 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config,
 
   if (KindDirect_Solver == RUNTIME_FEA_SYS) {
 
-    bool pseudo_static = config->GetPseudoStatic();
+    const bool pseudo_static = config->GetPseudoStatic();
 
     for (iVar = 0; iVar < nMPROP; iVar++) {
-      E_i[iVar]      = config->GetElasticyMod(iVar);
-      Nu_i[iVar]     = config->GetPoissonRatio(iVar);
-      Rho_i[iVar]    = pseudo_static? 0.0 : config->GetMaterialDensity(iVar);
-      Rho_DL_i[iVar] = config->GetMaterialDensity(iVar);
+      E[iVar]      = config->GetElasticyMod(iVar);
+      Nu[iVar]     = config->GetPoissonRatio(iVar);
+      Rho[iVar]    = pseudo_static? 0.0 : config->GetMaterialDensity(iVar);
+      Rho_DL[iVar] = config->GetMaterialDensity(iVar);
     }
 
     /*--- Read the values of the electric field ---*/
@@ -376,48 +236,28 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config,
 
     /*--- Reset index, otherwise messes up other derivatives ---*/
     if (fea_dv) {
-      for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV_Val[iVar]);
+      for (iVar = 0; iVar < nDV; iVar++) AD::ResetInput(DV[iVar]);
     }
 
     if (!reset) {
-      bool local_index = config->GetMultizone_Problem();
-      bool push_index = !local_index;
-
-      for (iVar = 0; iVar < nMPROP; iVar++) {
-        AD::RegisterInput(E_i[iVar], push_index);
-        AD::RegisterInput(Nu_i[iVar], push_index);
-        AD::RegisterInput(Rho_i[iVar], push_index);
-        AD::RegisterInput(Rho_DL_i[iVar], push_index);
-      }
-
-      if(de_effects){
-        for (iVar = 0; iVar < nEField; iVar++)
-          AD::RegisterInput(EField[iVar], push_index);
-      }
+      const bool local_index = config->GetMultizone_Problem();
+      const bool push_index = !local_index;
 
-      if(fea_dv){
-        for (iVar = 0; iVar < nDV; iVar++)
-          AD::RegisterInput(DV_Val[iVar], push_index);
-      }
+      E.Register(push_index);
+      Nu.Register(push_index);
+      Rho.Register(push_index);
+      Rho_DL.Register(push_index);
+      if (de_effects) EField.Register(push_index);
+      if (fea_dv) DV.Register(push_index);
 
       /*--- Explicitly store the tape indices for when we extract the derivatives ---*/
       if (local_index) {
-        for (iVar = 0; iVar < nMPROP; iVar++) {
-          AD::SetIndex(AD_Idx_E_i[iVar], E_i[iVar]);
-          AD::SetIndex(AD_Idx_Nu_i[iVar], Nu_i[iVar]);
-          AD::SetIndex(AD_Idx_Rho_i[iVar], Rho_i[iVar]);
-          AD::SetIndex(AD_Idx_Rho_DL_i[iVar], Rho_DL_i[iVar]);
-        }
-
-        if (de_effects) {
-          for (iVar = 0; iVar < nEField; iVar++)
-            AD::SetIndex(AD_Idx_EField[iVar], EField[iVar]);
-        }
-
-        if (fea_dv) {
-          for (iVar = 0; iVar < nDV; iVar++)
-            AD::SetIndex(AD_Idx_DV_Val[iVar], DV_Val[iVar]);
-        }
+        E.SetIndex();
+        Nu.SetIndex();
+        Rho.SetIndex();
+        Rho_DL.SetIndex();
+        if (de_effects) EField.SetIndex();
+        if (fea_dv) DV.SetIndex();
       }
 
       /*--- Register the flow tractions ---*/
@@ -427,16 +267,16 @@ void CDiscAdjFEASolver::RegisterVariables(CGeometry *geometry, CConfig *config,
 
   }
 
-    /*--- Here it is possible to register other variables as input that influence the flow solution
-     * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be
-     * extracted in the ExtractAdjointVariables routine. ---*/
+  /*--- Here it is possible to register other variables as input that influence the flow solution
+   * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be
+   * extracted in the ExtractAdjointVariables routine. ---*/
 }
 
 void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){
 
-  bool input = false;
-  bool dynamic = config->GetTime_Domain();
-  bool push_index = !config->GetMultizone_Problem();
+  const bool input = false;
+  const bool dynamic = config->GetTime_Domain();
+  const bool push_index = !config->GetMultizone_Problem();
 
   /*--- Register variables as output of the solver iteration ---*/
 
@@ -452,13 +292,15 @@ void CDiscAdjFEASolver::RegisterOutput(CGeometry *geometry, CConfig *config){
 
 void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){
 
-  bool dynamic = config->GetTime_Domain();
-  bool multizone = config->GetMultizone_Problem();
+  const bool dynamic = config->GetTime_Domain();
+  const bool multizone = config->GetMultizone_Problem();
 
   unsigned short iVar;
   unsigned long iPoint;
   su2double residual;
 
+  su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0};
+
   /*--- Set Residuals to zero ---*/
 
   SetResToZero();
@@ -600,78 +442,40 @@ void CDiscAdjFEASolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *co
 
 void CDiscAdjFEASolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config){
 
-  unsigned short iVar;
-  bool local_index = config->GetMultizone_Problem();
-
-  /*--- Extract the adjoint values of the farfield values ---*/
-
-  if (KindDirect_Solver == RUNTIME_FEA_SYS){
-
-    if (local_index) {
-      for (iVar = 0; iVar < nMPROP; iVar++) {
-        Local_Sens_E[iVar] = AD::GetDerivative(AD_Idx_E_i[iVar]);
-        Local_Sens_Nu[iVar] = AD::GetDerivative(AD_Idx_Nu_i[iVar]);
-        Local_Sens_Rho[iVar] = AD::GetDerivative(AD_Idx_Rho_i[iVar]);
-        Local_Sens_Rho_DL[iVar] = AD::GetDerivative(AD_Idx_Rho_DL_i[iVar]);
-      }
-    }
-    else {
-      for (iVar = 0; iVar < nMPROP; iVar++) {
-        Local_Sens_E[iVar] = SU2_TYPE::GetDerivative(E_i[iVar]);
-        Local_Sens_Nu[iVar] = SU2_TYPE::GetDerivative(Nu_i[iVar]);
-        Local_Sens_Rho[iVar] = SU2_TYPE::GetDerivative(Rho_i[iVar]);
-        Local_Sens_Rho_DL[iVar] = SU2_TYPE::GetDerivative(Rho_DL_i[iVar]);
-      }
-    }
-
-    SU2_MPI::Allreduce(Local_Sens_E, Global_Sens_E,  nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    SU2_MPI::Allreduce(Local_Sens_Nu, Global_Sens_Nu, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    SU2_MPI::Allreduce(Local_Sens_Rho, Global_Sens_Rho, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    SU2_MPI::Allreduce(Local_Sens_Rho_DL, Global_Sens_Rho_DL, nMPROP, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-
-    /*--- Extract the adjoint values of the electric field in the case that it is a parameter of the problem. ---*/
+  if (KindDirect_Solver != RUNTIME_FEA_SYS) return;
 
-    if (de_effects) {
-      for (iVar = 0; iVar < nEField; iVar++) {
-        if (local_index) Local_Sens_EField[iVar] = AD::GetDerivative(AD_Idx_EField[iVar]);
-        else             Local_Sens_EField[iVar] = SU2_TYPE::GetDerivative(EField[iVar]);
-      }
-      SU2_MPI::Allreduce(Local_Sens_EField, Global_Sens_EField, nEField, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    }
+  /*--- Sensitivities of material properties and design variables. ---*/
 
-    if (fea_dv) {
-      for (iVar = 0; iVar < nDV; iVar++) {
-        if (local_index) Local_Sens_DV[iVar] = AD::GetDerivative(AD_Idx_DV_Val[iVar]);
-        else             Local_Sens_DV[iVar] = SU2_TYPE::GetDerivative(DV_Val[iVar]);
-      }
-      SU2_MPI::Allreduce(Local_Sens_DV, Global_Sens_DV, nDV, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-    }
+  E.GetDerivative();
+  Nu.GetDerivative();
+  Rho.GetDerivative();
+  Rho_DL.GetDerivative();
+  if (de_effects) EField.GetDerivative();
+  if (fea_dv) DV.GetDerivative();
 
-    /*--- Extract the flow traction sensitivities ---*/
+  /*--- Extract the flow traction sensitivities. ---*/
 
-    if (config->GetnMarker_Fluid_Load() > 0){
-      su2double val_sens;
-      for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){
-        for (unsigned short iDim = 0; iDim < nDim; iDim++){
-          val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim);
-          nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens);
-        }
+  if (config->GetnMarker_Fluid_Load() > 0) {
+    for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++){
+      for (unsigned short iDim = 0; iDim < nDim; iDim++){
+        su2double val_sens = direct_solver->GetNodes()->ExtractFlowTraction_Sensitivity(iPoint,iDim);
+        nodes->SetFlowTractionSensitivity(iPoint, iDim, val_sens);
       }
     }
-
   }
 
 }
 
 void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){
 
-  bool dynamic = (config->GetTime_Domain());
-  bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0);
+  const bool dynamic = (config->GetTime_Domain());
+  const bool deform_mesh = (config->GetnMarker_Deform_Mesh() > 0);
+
+  su2double Solution[MAXNVAR] = {0.0}, Solution_Vel[MAXNVAR] = {0.0}, Solution_Accel[MAXNVAR] = {0.0};
 
   unsigned short iVar;
-  unsigned long iPoint;
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
     for (iVar = 0; iVar < nVar; iVar++){
       Solution[iVar] = nodes->GetSolution(iPoint,iVar);
     }
@@ -681,20 +485,14 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){
       }
     }
     if (dynamic){
-      for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar);
-      }
-      for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar);
-      }
       for (iVar = 0; iVar < nVar; iVar++){
         Solution[iVar] += nodes->GetDynamic_Derivative_n(iPoint,iVar);
       }
       for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Accel[iVar] += nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar);
+        Solution_Accel[iVar] = nodes->GetSolution_Accel(iPoint,iVar) + nodes->GetDynamic_Derivative_Accel_n(iPoint,iVar);
       }
       for (iVar = 0; iVar < nVar; iVar++){
-        Solution_Vel[iVar] += nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar);
+        Solution_Vel[iVar] = nodes->GetSolution_Vel(iPoint,iVar) + nodes->GetDynamic_Derivative_Vel_n(iPoint,iVar);
       }
     }
     direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution);
@@ -709,12 +507,10 @@ void CDiscAdjFEASolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config){
 
 void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){
 
-  bool dynamic = (config_container->GetTime_Domain());
-  unsigned long iPoint;
   unsigned short iVar;
 
-  if (dynamic){
-    for (iPoint = 0; iPoint<geometry->GetnPoint(); iPoint++){
+  if (config_container->GetTime_Domain()){
+    for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
       for (iVar=0; iVar < nVar; iVar++){
         nodes->SetDynamic_Derivative_n(iPoint, iVar, nodes->GetSolution_time_n(iPoint, iVar));
       }
@@ -731,26 +527,14 @@ void CDiscAdjFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_cont
 
 void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*){
 
-  unsigned short iVar;
-
-  for (iVar = 0; iVar < nMPROP; iVar++){
-    Total_Sens_E[iVar]        += Global_Sens_E[iVar];
-    Total_Sens_Nu[iVar]       += Global_Sens_Nu[iVar];
-    Total_Sens_Rho[iVar]      += Global_Sens_Rho[iVar];
-    Total_Sens_Rho_DL[iVar]   += Global_Sens_Rho_DL[iVar];
-  }
-
-  if (de_effects){
-    for (iVar = 0; iVar < nEField; iVar++)
-      Total_Sens_EField[iVar]+= Global_Sens_EField[iVar];
-  }
-
-  if (fea_dv){
-    for (iVar = 0; iVar < nDV; iVar++)
-      Total_Sens_DV[iVar] += Global_Sens_DV[iVar];
-  }
+  E.UpdateTotal();
+  Nu.UpdateTotal();
+  Rho.UpdateTotal();
+  Rho_DL.UpdateTotal();
+  if (de_effects) EField.UpdateTotal();
+  if (fea_dv) DV.UpdateTotal();
 
-  /*--- Extract the topology optimization density sensitivities ---*/
+  /*--- Extract the topology optimization density sensitivities. ---*/
 
   direct_solver->ExtractAdjoint_Variables(geometry, config);
 
@@ -776,45 +560,34 @@ void CDiscAdjFEASolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSo
       nodes->SetSensitivity(iPoint, iDim, Sensitivity);
     }
   }
-  SetSurface_Sensitivity(geometry, config);
-}
-
-void CDiscAdjFEASolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config){
-
 }
 
-void CDiscAdjFEASolver::ReadDV(CConfig *config) {
-
-  unsigned long index;
+void CDiscAdjFEASolver::ReadDV(const CConfig *config) {
 
   string filename;
   ifstream properties_file;
 
   /*--- Choose the filename of the design variable ---*/
 
-  string input_name;
-
   switch (config->GetDV_FEA()) {
     case YOUNG_MODULUS:
-      input_name = "dv_young.opt";
+      filename = "dv_young.opt";
       break;
     case POISSON_RATIO:
-      input_name = "dv_poisson.opt";
+      filename = "dv_poisson.opt";
       break;
     case DENSITY_VAL:
     case DEAD_WEIGHT:
-      input_name = "dv_density.opt";
+      filename = "dv_density.opt";
       break;
     case ELECTRIC_FIELD:
-      input_name = "dv_efield.opt";
+      filename = "dv_efield.opt";
       break;
     default:
-      input_name = "dv.opt";
+      filename = "dv.opt";
       break;
   }
 
-  filename = input_name;
-
   if (rank == MASTER_NODE) cout << "Filename: " << filename << "." << endl;
 
   properties_file.open(filename.data(), ios::in);
@@ -826,55 +599,32 @@ void CDiscAdjFEASolver::ReadDV(CConfig *config) {
     if (rank == MASTER_NODE)
       cout << "There is no design variable file." << endl;
 
-    nDV   = 1;
-    DV_Val = new su2double[nDV];
-    for (unsigned short iDV = 0; iDV < nDV; iDV++)
-      DV_Val[iDV] = 1.0;
-
+    nDV = 1;
+    DV.resize(nDV);
+    DV[0] = 1.0;
   }
   else{
 
     string text_line;
-
-     /*--- First pass: determine number of design variables ---*/
-
-    unsigned short iDV = 0;
-
-    /*--- Skip the first line: it is the header ---*/
-
-    getline (properties_file, text_line);
-
-    while (getline (properties_file, text_line)) iDV++;
-
-    /*--- Close the restart file ---*/
-
-    properties_file.close();
-
-    nDV = iDV;
-    DV_Val = new su2double[nDV];
-
-    /*--- Reopen the file (TODO: improve this) ---*/
-
-    properties_file.open(filename.data(), ios::in);
+    vector<su2double> values;
 
     /*--- Skip the first line: it is the header ---*/
-
     getline (properties_file, text_line);
 
-    iDV = 0;
     while (getline (properties_file, text_line)) {
-
       istringstream point_line(text_line);
 
-      point_line >> index >> DV_Val[iDV];
-
-      iDV++;
+      unsigned long index;
+      su2double value;
+      point_line >> index >> value;
 
+      values.push_back(value);
     }
 
-    /*--- Close the restart file ---*/
-
-    properties_file.close();
+    nDV = values.size();
+    DV.resize(nDV);
+    unsigned short iDV = 0;
+    for (auto x : values) DV[iDV++] = x;
 
   }
 
@@ -882,72 +632,12 @@ void CDiscAdjFEASolver::ReadDV(CConfig *config) {
 
 void CDiscAdjFEASolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
 
-  unsigned short iVar;
-  unsigned long index, counter;
-  string restart_filename, filename;
-
   /*--- Restart the solution from file information ---*/
 
-  filename = config->GetSolution_AdjFileName();
-  restart_filename = config->GetObjFunc_Extension(filename);
+  auto filename = config->GetSolution_AdjFileName();
+  auto restart_filename = config->GetObjFunc_Extension(filename);
   restart_filename = config->GetFilename(restart_filename, "", val_iter);
 
-  /*--- Read and store the restart metadata. ---*/
-
-//  Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename);
-
-  /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/
-
-  if (config->GetRead_Binary_Restart()) {
-    Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename);
-  } else {
-    Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename);
-  }
-
-  /*--- Read all lines in the restart file ---*/
-
-  long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0;
-
-  /*--- Skip coordinates ---*/
-
-  unsigned short skipVars = geometry[MESH_0]->GetnDim();
-
-  /*--- Load data from the restart into correct containers. ---*/
-
-  counter = 0;
-  for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) {
-
-    /*--- Retrieve local index. If this node from the restart file lives
-     on the current processor, we will load and instantiate the vars. ---*/
-
-    iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global);
-
-    if (iPoint_Local > -1) {
-
-      /*--- We need to store this point's data, so jump to the correct
-       offset in the buffer of data from the restart file and load it. ---*/
-
-      index = counter*Restart_Vars[1] + skipVars;
-      for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar];
-      nodes->SetSolution(iPoint_Local,Solution);
-      iPoint_Global_Local++;
-
-      /*--- Increment the overall counter for how many points have been loaded. ---*/
-      counter++;
-    }
-
-  }
-
-  /*--- Detect a wrong solution file ---*/
-
-  if (iPoint_Global_Local < nPointDomain) {
-    SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
-                   string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
-  }
-
-  /*--- Delete the class memory that is used to load the restart. ---*/
-
-  delete [] Restart_Vars; Restart_Vars = nullptr;
-  delete [] Restart_Data; Restart_Data = nullptr;
+  BasicLoadRestart(geometry[MESH_0], config, restart_filename, geometry[MESH_0]->GetnDim());
 
 }
diff --git a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
index 249c59b4722..610d2026043 100644
--- a/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjMeshSolver.cpp
@@ -28,15 +28,8 @@
 #include "../../include/solvers/CDiscAdjMeshSolver.hpp"
 #include "../../include/variables/CDiscAdjMeshBoundVariable.hpp"
 
-
-CDiscAdjMeshSolver::CDiscAdjMeshSolver() : CSolver () {}
-
-CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config) : CSolver() {}
-
 CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver) : CSolver() {
 
-  unsigned short iVar;
-
   nVar = geometry->GetnDim();
   nDim = geometry->GetnDim();
 
@@ -46,6 +39,8 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
   nPoint       = geometry->GetnPoint();
   nPointDomain = geometry->GetnPointDomain();
 
+  omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE);
+
   /*--- Define some auxiliary vectors related to the residual ---*/
 
   Residual_RMS.resize(nVar,1.0);
@@ -63,20 +58,17 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
     Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0);
   }
 
-  /*--- Define some auxiliary vectors related to the solution ---*/
-
-  Solution = new su2double[nVar];
-  for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16;
-
   /*--- Initialize the node structure ---*/
   nodes = new CDiscAdjMeshBoundVariable(nPoint,nDim,config);
   SetBaseClassPointerToNodes();
 
   /*--- Set which points are vertices and allocate boundary data. ---*/
 
+  vector<su2double> Solution(nVar,1e-16);
+
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++) {
 
-    nodes->SetSolution(iPoint,Solution);
+    nodes->SetSolution(iPoint,Solution.data());
 
     for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
       long iVertex = geometry->nodes->GetVertex(iPoint, iMarker);
@@ -90,22 +82,17 @@ CDiscAdjMeshSolver::CDiscAdjMeshSolver(CGeometry *geometry, CConfig *config, CSo
 
 }
 
-CDiscAdjMeshSolver::~CDiscAdjMeshSolver(void){
-  delete nodes;
-}
-
-
-void CDiscAdjMeshSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container,
-                                       unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output){
-}
+CDiscAdjMeshSolver::~CDiscAdjMeshSolver() { delete nodes; }
 
 void CDiscAdjMeshSolver::SetRecording(CGeometry* geometry, CConfig *config){
 
   /*--- Reset the solution to the initial (converged) solution ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
     direct_solver->GetNodes()->SetBound_Disp(iPoint,nodes->GetBoundDisp_Direct(iPoint));
   }
+  END_SU2_OMP_FOR
 
   /*--- Set indices to zero ---*/
 
@@ -123,20 +110,25 @@ void CDiscAdjMeshSolver::RegisterSolution(CGeometry *geometry, CConfig *config){
 
 void CDiscAdjMeshSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset){
 
-  /*--- Register boundary displacements as input ---*/
-  bool input = true;
-  direct_solver->GetNodes()->Register_BoundDisp(input);
-
+  SU2_OMP_MASTER {
+    /*--- Register boundary displacements as input ---*/
+    bool input = true;
+    direct_solver->GetNodes()->Register_BoundDisp(input);
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *config){
 
   /*--- Extract the sensitivities of the mesh coordinates ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
 
     /*--- Extract the adjoint solution from the original mesh coordinates ---*/
 
+    su2double Solution[MAXNVAR] = {0.0};
     direct_solver->GetNodes()->GetAdjoint_MeshCoord(iPoint,Solution);
 
     /*--- Store the adjoint solution (the container is reused) ---*/
@@ -144,6 +136,7 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *c
     nodes->SetSolution(iPoint,Solution);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -151,10 +144,12 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *
 
   /*--- Extract the sensitivities of the boundary displacements ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++){
 
     /*--- Extract the adjoint solution of the boundary displacements ---*/
 
+    su2double Solution[MAXNVAR] = {0.0};
     direct_solver->GetNodes()->GetAdjoint_BoundDisp(iPoint,Solution);
 
     /*--- Store the sensitivities of the boundary displacements ---*/
@@ -162,11 +157,14 @@ void CDiscAdjMeshSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *
     nodes->SetBoundDisp_Sens(iPoint,Solution);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
 void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver *solver) {
 
+  SU2_OMP_PARALLEL {
+
   const bool time_stepping = (config->GetTime_Marching() != STEADY);
   const auto eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength();
 
@@ -177,6 +175,8 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS
   ExtractAdjoint_Variables(geometry, config);
 
   /*--- Store the sensitivities in the flow adjoint container ---*/
+
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
 
     /*--- If sharp edge, set the sensitivity to 0 on that region ---*/
@@ -198,8 +198,12 @@ void CDiscAdjMeshSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CS
       }
     }
   }
+  END_SU2_OMP_FOR
+
   solver->SetSurface_Sensitivity(geometry, config);
 
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CDiscAdjMeshSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
diff --git a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
index 77ba74ab257..39e0cf2001d 100644
--- a/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
+++ b/SU2_CFD/src/solvers/CDiscAdjSolver.cpp
@@ -27,22 +27,10 @@
 
 #include "../../include/solvers/CDiscAdjSolver.hpp"
 #include "../../../Common/include/toolboxes/geometry_toolbox.hpp"
+#include "../../../Common/include/parallelization/omp_structure.hpp"
 
-CDiscAdjSolver::CDiscAdjSolver(void) : CSolver () {
-
-}
-
-CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config)  : CSolver() {
-
-}
-
-CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver, unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
-
-  unsigned short iVar, iMarker, iDim;
-  unsigned long iVertex;
-  string text_line, mesh_filename;
-  ifstream restart_file;
-  string filename, AdjExt;
+CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *direct_solver,
+                               unsigned short Kind_Solver, unsigned short iMesh)  : CSolver() {
 
   adjoint = true;
 
@@ -51,21 +39,17 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
 
   /*--- Initialize arrays to NULL ---*/
 
-  CSensitivity = nullptr;
-
   /*-- Store some information about direct solver ---*/
   this->KindDirect_Solver = Kind_Solver;
   this->direct_solver = direct_solver;
 
-
   nMarker      = config->GetnMarker_All();
   nPoint       = geometry->GetnPoint();
   nPointDomain = geometry->GetnPointDomain();
 
-  /*--- Define some auxiliary vectors related to the residual ---*/
+  omp_chunk_size = computeStaticChunkSize(nPoint, omp_get_max_threads(), OMP_MAX_SIZE);
 
-  Residual      = new su2double[nVar];         for (iVar = 0; iVar < nVar; iVar++) Residual[iVar]      = 1.0;
-  Solution_Geometry = new su2double[nDim];     for (iDim = 0; iDim < nDim; iDim++) Solution_Geometry[iDim] = 1.0;
+  /*--- Define some auxiliary vectors related to the residual ---*/
 
   Residual_RMS.resize(nVar,1.0);
   Residual_Max.resize(nVar,1.0);
@@ -82,27 +66,24 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
     Point_Max_Coord_BGS.resize(nVar,nDim) = su2double(0.0);
   }
 
-  /*--- Define some auxiliary vectors related to the solution ---*/
-
-  Solution = new su2double[nVar];
-
-  for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 1e-16;
-
   /*--- Sensitivity definition and coefficient in all the markers ---*/
 
-  CSensitivity = new su2double* [nMarker];
-
-  for (iMarker = 0; iMarker < nMarker; iMarker++) {
-    unsigned long nVertex = geometry->nVertex[iMarker];
-    CSensitivity[iMarker] = new su2double [nVertex];
-
-    for (iVertex = 0; iVertex < nVertex; iVertex++)
-      CSensitivity[iMarker][iVertex] = 0.0;
+  CSensitivity.resize(nMarker);
+  for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) {
+    const auto nVertex = geometry->nVertex[iMarker];
+    CSensitivity[iMarker].resize(nVertex, 0.0);
   }
 
+  Sens_Geo.resize(config->GetnMarker_Monitoring(), 0.0);
+
   /*--- Initialize the discrete adjoint solution to zero everywhere. ---*/
 
-  nodes = new CDiscAdjVariable(Solution, nPoint, nDim, nVar, config);
+  if (nVar > MAXNVAR) {
+    SU2_MPI::Error("Oops! The CDiscAdjSolver static array sizes are not large enough.",CURRENT_FUNCTION);
+  }
+
+  vector<su2double> Solution(nVar,1e-16);
+  nodes = new CDiscAdjVariable(Solution.data(), nPoint, nDim, nVar, config);
   SetBaseClassPointerToNodes();
 
   switch(KindDirect_Solver){
@@ -124,47 +105,41 @@ CDiscAdjSolver::CDiscAdjSolver(CGeometry *geometry, CConfig *config, CSolver *di
   }
 }
 
-CDiscAdjSolver::~CDiscAdjSolver(void) {
-
-  unsigned short iMarker;
-
-  if (CSensitivity != nullptr) {
-    for (iMarker = 0; iMarker < nMarker; iMarker++) {
-      delete [] CSensitivity[iMarker];
-    }
-    delete [] CSensitivity;
-  }
-
-  delete nodes;
-}
+CDiscAdjSolver::~CDiscAdjSolver(void) { delete nodes; }
 
 void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){
 
-  bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND;
-  bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
+  const bool time_n1_needed = config->GetTime_Marching() == DT_STEPPING_2ND;
+  const bool time_n_needed = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
 
   unsigned long iPoint;
   unsigned short iVar;
 
   /*--- Reset the solution to the initial (converged) solution ---*/
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (iPoint = 0; iPoint < nPoint; iPoint++) {
     direct_solver->GetNodes()->SetSolution(iPoint, nodes->GetSolution_Direct(iPoint));
   }
+  END_SU2_OMP_FOR
 
   if (time_n_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPoint; iPoint++) {
       for (iVar = 0; iVar < nVar; iVar++) {
         AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint)[iVar]);
       }
     }
+    END_SU2_OMP_FOR
   }
   if (time_n1_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (iPoint = 0; iPoint < nPoint; iPoint++) {
       for (iVar = 0; iVar < nVar; iVar++) {
         AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint)[iVar]);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Set the Jacobian to zero since this is not done inside the fluid iteration
@@ -178,64 +153,12 @@ void CDiscAdjSolver::SetRecording(CGeometry* geometry, CConfig *config){
 
 }
 
-void CDiscAdjSolver::SetMesh_Recording(CGeometry** geometry, CVolumetricMovement *grid_movement, CConfig *config) {
-
-
-//  bool time_n_needed  = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) ||
-//      (config->GetUnsteady_Simulation() == DT_STEPPING_2ND)),
-//  time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND;
-
-//  unsigned long ExtIter = config->GetExtIter();
-
-  unsigned long iPoint;
-  unsigned short iDim;
-
-  /*--- Reset the solution to the initial (converged) position ---*/
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    for (iDim = 0; iDim < nDim; iDim++){
-      geometry[MESH_0]->nodes->SetCoord(iPoint, iDim,nodes->GetGeometry_Direct(iPoint,iDim));
-    }
-  }
-
-  /*--- After moving all nodes, update the dual mesh. Recompute the edges and
-   dual mesh control volumes in the domain and on the boundaries. ---*/
-
-  grid_movement->UpdateDualGrid(geometry[MESH_0], config);
-
-  /*--- After updating the dual mesh, compute the grid velocities (only dynamic problems). ---*/
-//  if (time_n_needed){
-//    geometry[MESH_0]->SetGridVelocity(config, ExtIter);
-//  }
-
-  /*--- Update the multigrid structure after moving the finest grid,
-   including computing the grid velocities on the coarser levels. ---*/
-
-  grid_movement->UpdateMultiGrid(geometry, config);
-
-//  if (time_n_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//      for (iVar = 0; iVar < nVar; iVar++){
-//        AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n(iPoint,iVar));
-//      }
-//    }
-//  }
-//  if (time_n1_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//      for (iVar = 0; iVar < nVar; iVar++){
-//        AD::ResetInput(direct_solver->GetNodes()->GetSolution_time_n1(iPoint,iVar));
-//      }
-//    }
-//  }
-
-}
-
 void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) {
 
-  bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND);
-  bool time_n_needed  = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
-  bool input          = true;
-  bool push_index     = !config->GetMultizone_Problem();
+  const bool time_n1_needed = (config->GetTime_Marching() == DT_STEPPING_2ND);
+  const bool time_n_needed  = (config->GetTime_Marching() == DT_STEPPING_1ST) || time_n1_needed;
+  const bool input          = true;
+  const bool push_index     = !config->GetMultizone_Problem();
 
   /*--- Register solution at all necessary time instances and other variables on the tape ---*/
 
@@ -250,6 +173,8 @@ void CDiscAdjSolver::RegisterSolution(CGeometry *geometry, CConfig *config) {
 
 void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, bool reset) {
 
+  SU2_OMP_MASTER {
+
   /*--- Register farfield values as input ---*/
 
   if((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS && !config->GetBoolTurbomachinery())) {
@@ -363,12 +288,16 @@ void CDiscAdjSolver::RegisterVariables(CGeometry *geometry, CConfig *config, boo
   /*--- Here it is possible to register other variables as input that influence the flow solution
    * and thereby also the objective function. The adjoint values (i.e. the derivatives) can be
    * extracted in the ExtractAdjointVariables routine. ---*/
+
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CDiscAdjSolver::RegisterOutput(CGeometry *geometry, CConfig *config) {
 
-  bool input        = false;
-  bool push_index   = !config->GetMultizone_Problem();
+  const bool input        = false;
+  const bool push_index   = !config->GetMultizone_Problem();
 
   /*--- Register variables as output of the solver iteration ---*/
 
@@ -383,18 +312,23 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
 
   const su2double relax = (config->GetInnerIter()==0)? 1.0 : config->GetRelaxation_Factor_Adjoint();
 
+  su2double Solution[MAXNVAR] = {0.0};
+
   /*--- Set Residuals to zero ---*/
 
   SetResToZero();
 
+  su2double resMax[MAXNVAR] = {0.0}, resRMS[MAXNVAR] = {0.0};
+  const su2double* coordMax[MAXNVAR] = {nullptr};
+  unsigned long idxMax[MAXNVAR] = {0};
+
   /*--- Set the old solution and compute residuals. ---*/
 
   if(!multizone) nodes->Set_OldSolution();
 
+  SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
-    const su2double isdomain = (iPoint < nPointDomain)? 1.0 : 0.0;
-
     /*--- Extract the adjoint solution ---*/
 
     if(config->GetMultizone_Problem()) {
@@ -410,18 +344,38 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
       su2double residual = Solution[iVar]-nodes->GetSolution_Old(iPoint,iVar);
       nodes->AddSolution(iPoint, iVar, relax*residual);
 
-      residual *= isdomain;
-      Residual_RMS[iVar] += pow(residual,2);
-      AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint));
+      if (iPoint < nPointDomain) {
+        /*--- Update residual information for current thread. ---*/
+        resRMS[iVar] += residual*residual;
+        if (fabs(residual) > resMax[iVar]) {
+          resMax[iVar] = fabs(residual);
+          idxMax[iVar] = iPoint;
+          coordMax[iVar] = geometry->nodes->GetCoord(iPoint);
+        }
+      }
     }
   }
+  END_SU2_OMP_FOR
+
+  /*--- Reduce residual information over all threads in this rank. ---*/
+  SU2_OMP_CRITICAL
+  for (auto iVar = 0u; iVar < nVar; iVar++) {
+    Residual_RMS[iVar] += resRMS[iVar];
+    AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
+  }
+  END_SU2_OMP_CRITICAL
+  SU2_OMP_BARRIER
 
   SetResidual_RMS(geometry, config);
 
-  SetIterLinSolver(direct_solver->System.GetIterations());
-  SetResLinSolver(direct_solver->System.GetResidual());
+  SU2_OMP_MASTER {
+    SetIterLinSolver(direct_solver->System.GetIterations());
+    SetResLinSolver(direct_solver->System.GetResidual());
+  }
+  END_SU2_OMP_MASTER
 
   if (time_n_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
       /*--- Extract the adjoint solution at time n ---*/
@@ -432,9 +386,11 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
 
       nodes->Set_Solution_time_n(iPoint,Solution);
     }
+    END_SU2_OMP_FOR
   }
 
   if (time_n1_needed) {
+    SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0u; iPoint < nPoint; iPoint++) {
 
       /*--- Extract the adjoint solution at time n-1 ---*/
@@ -445,12 +401,15 @@ void CDiscAdjSolver::ExtractAdjoint_Solution(CGeometry *geometry, CConfig *confi
 
       nodes->Set_Solution_time_n1(iPoint,Solution);
     }
+    END_SU2_OMP_FOR
   }
 
 }
 
 void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *config) {
 
+  SU2_OMP_MASTER {
+
   /*--- Extract the adjoint values of the farfield values ---*/
 
   if ((config->GetKind_Regime() == COMPRESSIBLE) && (KindDirect_Solver == RUNTIME_FLOW_SYS) && !config->GetBoolTurbomachinery()) {
@@ -508,98 +467,25 @@ void CDiscAdjSolver::ExtractAdjoint_Variables(CGeometry *geometry, CConfig *conf
 
   /*--- Extract here the adjoint values of everything else that is registered as input in RegisterInput. ---*/
 
-}
-
-
-void CDiscAdjSolver::ExtractAdjoint_Geometry(CGeometry *geometry, CConfig *config) {
-
-//  bool time_n_needed  = ((config->GetUnsteady_Simulation() == DT_STEPPING_1ST) ||
-//      (config->GetUnsteady_Simulation() == DT_STEPPING_2ND));
-
-//  bool time_n1_needed = config->GetUnsteady_Simulation() == DT_STEPPING_2ND;
-
-//  unsigned short iVar;
-  unsigned long iPoint;
-
-  /*--- Set Residuals to zero ---*/
-
-//  for (iVar = 0; iVar < nVar; iVar++){
-//      SetRes_RMS(iVar,0.0);
-//      SetRes_Max(iVar,0.0,0);
-//  }
-
-  /*--- Set the old solution ---*/
-
-  nodes->Set_OldSolution_Geometry();
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-
-    /*--- Extract the adjoint solution ---*/
-
-    if (config->GetMultizone_Problem())
-      geometry->nodes->GetAdjointCoord_LocalIndex(iPoint, Solution_Geometry);
-    else
-      geometry->nodes->GetAdjointCoord(iPoint, Solution_Geometry);
-
-    /*--- Store the adjoint solution ---*/
-
-    nodes->SetSolution_Geometry(iPoint,Solution_Geometry);
-
   }
-
-//  if (time_n_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//
-//      /*--- Extract the adjoint solution at time n ---*/
-//
-//      direct_solver->GetNodes()->GetAdjointSolution_time_n(iPoint,Solution);
-//
-//      /*--- Store the adjoint solution at time n ---*/
-//
-//      nodes->Set_Solution_time_n(iPoint,Solution);
-//    }
-//  }
-//  if (time_n1_needed){
-//    for (iPoint = 0; iPoint < nPoint; iPoint++){
-//
-//      /*--- Extract the adjoint solution at time n-1 ---*/
-//
-//      direct_solver->GetNodes()->GetAdjointSolution_time_n1(iPoint,Solution);
-//
-//      /*--- Store the adjoint solution at time n-1 ---*/
-//
-//      nodes->Set_Solution_time_n1(iPoint,Solution);
-//    }
-//  }
-
-  /*--- Set the residuals ---*/
-
-//  for (iPoint = 0; iPoint < nPointDomain; iPoint++){
-//      for (iVar = 0; iVar < nVar; iVar++){
-//          residual = node[iPoint]->GetSolution_Geometry(iVar) - node[iPoint]->Get_OldSolution_Geometry(iVar);
-//
-//          Residual_RMS[iVar] += residual*residual;
-//          AddRes_Max(iVar,fabs(residual),geometry->nodes->GetGlobalIndex(iPoint),geometry->nodes->GetCoord(iPoint));
-//      }
-//  }
-//
-//  SetResidual_RMS(geometry, config);
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) {
 
-  bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST ||
-                    config->GetTime_Marching() == DT_STEPPING_2ND);
+  const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST ||
+                          config->GetTime_Marching() == DT_STEPPING_2ND);
 
-  unsigned short iVar;
-  unsigned long iPoint;
+  su2double Solution[MAXNVAR] = {0.0};
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++) {
-    for (iVar = 0; iVar < nVar; iVar++) {
+  SU2_OMP_FOR_STAT(omp_chunk_size)
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
+    for (auto iVar = 0u; iVar < nVar; iVar++) {
       Solution[iVar] = nodes->GetSolution(iPoint,iVar);
     }
     if (dual_time) {
-      for (iVar = 0; iVar < nVar; iVar++) {
+      for (auto iVar = 0u; iVar < nVar; iVar++) {
         Solution[iVar] += nodes->GetDual_Time_Derivative(iPoint,iVar);
       }
     }
@@ -610,45 +496,24 @@ void CDiscAdjSolver::SetAdjoint_Output(CGeometry *geometry, CConfig *config) {
       direct_solver->GetNodes()->SetAdjointSolution(iPoint,Solution);
     }
   }
+  END_SU2_OMP_FOR
 }
 
-void CDiscAdjSolver::SetAdjoint_OutputMesh(CGeometry *geometry, CConfig *config){
-
-//  bool dual_time = (config->GetUnsteady_Simulation() == DT_STEPPING_1ST ||
-//      config->GetUnsteady_Simulation() == DT_STEPPING_2ND);
-
-  unsigned short iDim;
-  unsigned long iPoint;
-
-  for (iPoint = 0; iPoint < nPoint; iPoint++){
-    for (iDim = 0; iDim < nDim; iDim++){
-      Solution_Geometry[iDim] = 0.0;
-    }
-//    if (dual_time){
-//      for (iDim = 0; iDim < nVar; iDim++){
-//        Solution_Geometry[iDim] += nodes->GetDual_Time_Derivative_Geometry(iPoint,iDim);
-//      }
-//    }
-    for (iDim = 0; iDim < nDim; iDim++){
-      nodes->SetSensitivity(iPoint,iDim, Solution_Geometry[iDim]);
-    }
-    geometry->nodes->SetAdjointCoord(iPoint, Solution_Geometry);
-  }
+void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) {
 
-}
+  SU2_OMP_PARALLEL {
 
-void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolver*) {
+  const bool time_stepping = (config->GetTime_Marching() != STEADY);
+  const su2double eps = config->GetAdjSharp_LimiterCoeff()*config->GetRefElemLength();
 
-  unsigned long iPoint;
-  unsigned short iDim;
-  su2double *Coord, Sensitivity, eps;
+  SU2_OMP_FOR_STAT(omp_chunk_size)
+  for (auto iPoint = 0ul; iPoint < nPoint; iPoint++) {
 
-  bool time_stepping = (config->GetTime_Marching() != STEADY);
+    auto Coord = geometry->nodes->GetCoord(iPoint);
 
-  for (iPoint = 0; iPoint < nPoint; iPoint++) {
-    Coord = geometry->nodes->GetCoord(iPoint);
+    for (auto iDim = 0u; iDim < nDim; iDim++) {
 
-    for (iDim = 0; iDim < nDim; iDim++) {
+      su2double Sensitivity = 0.0;
 
       if(config->GetMultizone_Problem()) {
         Sensitivity = geometry->nodes->GetAdjointSolution(iPoint, iDim);
@@ -663,235 +528,151 @@ void CDiscAdjSolver::SetSensitivity(CGeometry *geometry, CConfig *config, CSolve
 
       /*--- If sharp edge, set the sensitivity to 0 on that region ---*/
 
-      if (config->GetSens_Remove_Sharp()) {
-        eps = config->GetVenkat_LimiterCoeff()*config->GetRefElemLength();
-        if ( geometry->nodes->GetSharpEdge_Distance(iPoint) < config->GetAdjSharp_LimiterCoeff()*eps )
-          Sensitivity = 0.0;
+      if (config->GetSens_Remove_Sharp() && geometry->nodes->GetSharpEdge_Distance(iPoint) < eps) {
+        Sensitivity = 0.0;
       }
+
       if (!time_stepping) {
         nodes->SetSensitivity(iPoint,iDim, Sensitivity);
       } else {
-        nodes->SetSensitivity(iPoint, iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity);
+        nodes->SetSensitivity(iPoint,iDim, nodes->GetSensitivity(iPoint,iDim) + Sensitivity);
       }
     }
   }
+  END_SU2_OMP_FOR
+
   SetSurface_Sensitivity(geometry, config);
-}
 
-void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) {
-  unsigned short iMarker, iDim, iMarker_Monitoring;
-  unsigned long iVertex, iPoint;
-  su2double *Normal, Prod, Sens = 0.0, SensDim, Area, Sens_Vertex, *Sens_Geo;
-  Total_Sens_Geo = 0.0;
-  string Monitoring_Tag, Marker_Tag;
-
-  Sens_Geo = new su2double[config->GetnMarker_Monitoring()];
-  for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-    Sens_Geo[iMarker_Monitoring] = 0.0;
   }
+  END_SU2_OMP_PARALLEL
+}
 
-  for (iMarker = 0; iMarker < nMarker; iMarker++) {
+void CDiscAdjSolver::SetSurface_Sensitivity(CGeometry *geometry, CConfig *config) {
 
-    /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/
+  SU2_OMP_MASTER
+  for (auto& x : Sens_Geo) x = 0.0;
+  END_SU2_OMP_MASTER
 
-    if(config->GetSolid_Wall(iMarker)) {
+  /*--- Loop over boundary markers to select those for Euler walls and NS walls ---*/
 
-      Sens = 0.0;
+  for (auto iMarker = 0ul; iMarker < nMarker; iMarker++) {
 
-      for (iVertex = 0; iVertex < geometry->GetnVertex(iMarker); iVertex++) {
+    if (!config->GetSolid_Wall(iMarker)) continue;
 
-        iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
-        Normal = geometry->vertex[iMarker][iVertex]->GetNormal();
-        Prod = 0.0;
-        for (iDim = 0; iDim < nDim; iDim++) {
-          /*--- retrieve the gradient calculated with AD -- */
-          SensDim = nodes->GetSensitivity(iPoint,iDim);
+    su2double Sens = 0.0;
 
-          /*--- calculate scalar product for projection onto the normal vector ---*/
-          Prod += Normal[iDim]*SensDim;
+    SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
+    for (auto iVertex = 0ul; iVertex < geometry->GetnVertex(iMarker); iVertex++) {
 
-        }
+      /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/
 
-        Area = GeometryToolbox::Norm(nDim, Normal);
+      const auto iPoint = geometry->vertex[iMarker][iVertex]->GetNode();
+      const auto Normal = geometry->vertex[iMarker][iVertex]->GetNormal();
 
+      su2double Sens_Vertex = 0.0;
+      for (auto iDim = 0u; iDim < nDim; iDim++) {
+        Sens_Vertex += Normal[iDim] * nodes->GetSensitivity(iPoint,iDim);
+      }
+      Sens_Vertex /= GeometryToolbox::Norm(nDim, Normal);
 
-        /*--- Projection of the gradient calculated with AD onto the normal vector of the surface ---*/
+      CSensitivity[iMarker][iVertex] = -Sens_Vertex;
+      Sens += pow(Sens_Vertex,2);
+    }
+    END_SU2_OMP_FOR
 
-        Sens_Vertex = Prod/Area;
-        CSensitivity[iMarker][iVertex] = -Sens_Vertex;
-        Sens += Sens_Vertex*Sens_Vertex;
-      }
+    if (config->GetMarker_All_Monitoring(iMarker) == NO) continue;
 
-      if (config->GetMarker_All_Monitoring(iMarker) == YES){
+    /*--- Compute sensitivity for each surface point ---*/
 
-        /*--- Compute sensitivity for each surface point ---*/
+    const auto Marker_Tag = config->GetMarker_All_TagBound(iMarker);
 
-        for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-          Monitoring_Tag = config->GetMarker_Monitoring_TagBound(iMarker_Monitoring);
-          Marker_Tag = config->GetMarker_All_TagBound(iMarker);
-          if (Marker_Tag == Monitoring_Tag) {
-            Sens_Geo[iMarker_Monitoring] = Sens;
-          }
-        }
+    for (size_t iMarker_Mon = 0; iMarker_Mon < Sens_Geo.size(); iMarker_Mon++) {
+      if (Marker_Tag == config->GetMarker_Monitoring_TagBound(iMarker_Mon)) {
+        atomicAdd(Sens, Sens_Geo[iMarker_Mon]);
+        break;
       }
     }
   }
 
-#ifdef HAVE_MPI
-  su2double *MySens_Geo;
-  MySens_Geo = new su2double[config->GetnMarker_Monitoring()];
+  SU2_OMP_BARRIER
+  SU2_OMP_MASTER {
+    auto local = Sens_Geo;
+    SU2_MPI::Allreduce(local.data(), Sens_Geo.data(), Sens_Geo.size(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
 
-  for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-    MySens_Geo[iMarker_Monitoring] = Sens_Geo[iMarker_Monitoring];
-    Sens_Geo[iMarker_Monitoring]   = 0.0;
+    Total_Sens_Geo = 0.0;
+    for (auto& x : Sens_Geo) {
+      x = sqrt(x);
+      Total_Sens_Geo += x;
+    }
   }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
-  SU2_MPI::Allreduce(MySens_Geo, Sens_Geo, config->GetnMarker_Monitoring(), MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
-  delete [] MySens_Geo;
-#endif
+}
 
-  for (iMarker_Monitoring = 0; iMarker_Monitoring < config->GetnMarker_Monitoring(); iMarker_Monitoring++) {
-    Sens_Geo[iMarker_Monitoring] = sqrt(Sens_Geo[iMarker_Monitoring]);
-    Total_Sens_Geo   += Sens_Geo[iMarker_Monitoring];
-  }
+void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config, unsigned short iMesh,
+                                   unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) {
 
-  delete [] Sens_Geo;
+  const bool dual_time = (config->GetTime_Marching() == DT_STEPPING_1ST) || (config->GetTime_Marching() == DT_STEPPING_2ND);
 
-}
+  if (!dual_time) return;
 
-void CDiscAdjSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, CConfig *config_container, unsigned short iMesh, unsigned short iRKStep, unsigned short RunTime_EqSystem, bool Output) {
-  const bool dual_time_1st = (config_container->GetTime_Marching() == DT_STEPPING_1ST);
-  const bool dual_time_2nd = (config_container->GetTime_Marching() == DT_STEPPING_2ND);
-  const bool dual_time = (dual_time_1st || dual_time_2nd);
-  su2double *solution_n, *solution_n1;
-
-  if (dual_time) {
-    for (auto iPoint = 0ul; iPoint<geometry->GetnPoint(); iPoint++) {
-      solution_n = nodes->GetSolution_time_n(iPoint);
-      solution_n1 = nodes->GetSolution_time_n1(iPoint);
-      for (unsigned short iVar=0; iVar < nVar; iVar++) {
-        nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar));
-        nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]);
-      }
-    } // for iPoint
-  } // if dual_time
+  SU2_OMP_FOR_STAT(omp_chunk_size)
+  for (auto iPoint = 0ul; iPoint<geometry->GetnPoint(); iPoint++) {
+    const auto solution_n = nodes->GetSolution_time_n(iPoint);
+    const auto solution_n1 = nodes->GetSolution_time_n1(iPoint);
+
+    for (auto iVar = 0u; iVar < nVar; iVar++) {
+      nodes->SetDual_Time_Derivative(iPoint, iVar, solution_n[iVar]+nodes->GetDual_Time_Derivative_n(iPoint, iVar));
+      nodes->SetDual_Time_Derivative_n(iPoint,iVar, solution_n1[iVar]);
+    }
+  }
+  END_SU2_OMP_FOR
 }
 
 void CDiscAdjSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *config, int val_iter, bool val_update_geo) {
 
-  unsigned short iVar, iMesh;
-  unsigned long iPoint, index, iChildren, Point_Fine, counter;
-  su2double Area_Children, Area_Parent, *Solution_Fine;
-  string restart_filename, filename;
-
-  bool compressible = (config->GetKind_Regime() == COMPRESSIBLE);
-  bool incompressible = (config->GetKind_Regime() == INCOMPRESSIBLE);
-  bool rans = ((config->GetKind_Solver() == DISC_ADJ_RANS) || (config->GetKind_Solver() == DISC_ADJ_INC_RANS)) ;
-
   /*--- Restart the solution from file information ---*/
 
-  filename = config->GetSolution_AdjFileName();
-  restart_filename = config->GetObjFunc_Extension(filename);
-
+  auto filename = config->GetSolution_AdjFileName();
+  auto restart_filename = config->GetObjFunc_Extension(filename);
   restart_filename = config->GetFilename(restart_filename, "", val_iter);
 
-
-  /*--- Read and store the restart metadata. ---*/
-
-//  Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, restart_filename);
-
-  /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/
-
-  if (config->GetRead_Binary_Restart()) {
-    Read_SU2_Restart_Binary(geometry[MESH_0], config, restart_filename);
-  } else {
-    Read_SU2_Restart_ASCII(geometry[MESH_0], config, restart_filename);
-  }
-
-  /*--- Read all lines in the restart file ---*/
-
-  long iPoint_Local; unsigned long iPoint_Global = 0; unsigned long iPoint_Global_Local = 0;
-  unsigned short rbuf_NotMatching = 0, sbuf_NotMatching = 0;
+  const bool rans = (config->GetKind_Turb_Model() != NONE);
 
   /*--- Skip coordinates ---*/
   unsigned short skipVars = geometry[MESH_0]->GetnDim();
 
   /*--- Skip flow adjoint variables ---*/
   if (KindDirect_Solver== RUNTIME_TURB_SYS) {
-    if (compressible) {
-      skipVars += nDim + 2;
-    }
-    if (incompressible) {
-      skipVars += nDim + 2;
-    }
+    skipVars += nDim + 2;
   }
 
   /*--- Skip flow adjoint and turbulent variables ---*/
   if (KindDirect_Solver == RUNTIME_RADIATION_SYS) {
-    if (compressible) skipVars += nDim + 2;
-    if (incompressible) skipVars += nDim + 2;
+    skipVars += nDim + 2;
     if (rans) skipVars += solver[MESH_0][TURB_SOL]->GetnVar();
   }
 
-  /*--- Load data from the restart into correct containers. ---*/
-
-  counter = 0;
-  for (iPoint_Global = 0; iPoint_Global < geometry[MESH_0]->GetGlobal_nPointDomain(); iPoint_Global++ ) {
-
-    /*--- Retrieve local index. If this node from the restart file lives
-     on the current processor, we will load and instantiate the vars. ---*/
+  BasicLoadRestart(geometry[MESH_0], config, restart_filename, skipVars);
 
-    iPoint_Local = geometry[MESH_0]->GetGlobal_to_Local_Point(iPoint_Global);
+  /*--- Interpolate solution on coarse grids ---*/
 
-    if (iPoint_Local > -1) {
+  for (auto iMesh = 1u; iMesh <= config->GetnMGLevels(); iMesh++) {
 
-      /*--- We need to store this point's data, so jump to the correct
-       offset in the buffer of data from the restart file and load it. ---*/
+    const auto& fineSol = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution();
 
-      index = counter*Restart_Vars[1] + skipVars;
-      for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = Restart_Data[index+iVar];
-      nodes->SetSolution(iPoint_Local,Solution);
-      iPoint_Global_Local++;
-
-      /*--- Increment the overall counter for how many points have been loaded. ---*/
-      counter++;
-    }
-
-  }
+    for (auto iPoint = 0ul; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) {
+      su2double Solution[MAXNVAR] = {0.0};
+      const su2double Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint);
 
-  /*--- Detect a wrong solution file ---*/
+      for (auto iChildren = 0u; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) {
+        const auto Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren);
+        const su2double weight = geometry[iMesh-1]->nodes->GetVolume(Point_Fine) / Area_Parent;
 
-  if (iPoint_Global_Local < nPointDomain) { sbuf_NotMatching = 1; }
-
-  SU2_MPI::Allreduce(&sbuf_NotMatching, &rbuf_NotMatching, 1, MPI_UNSIGNED_SHORT, MPI_SUM, SU2_MPI::GetComm());
-
-  if (rbuf_NotMatching != 0) {
-    SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
-                   string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
-  }
-
-  /*--- Communicate the loaded solution on the fine grid before we transfer
-   it down to the coarse levels. ---*/
-
-  for (iMesh = 1; iMesh <= config->GetnMGLevels(); iMesh++) {
-    for (iPoint = 0; iPoint < geometry[iMesh]->GetnPoint(); iPoint++) {
-      Area_Parent = geometry[iMesh]->nodes->GetVolume(iPoint);
-      for (iVar = 0; iVar < nVar; iVar++) Solution[iVar] = 0.0;
-      for (iChildren = 0; iChildren < geometry[iMesh]->nodes->GetnChildren_CV(iPoint); iChildren++) {
-        Point_Fine = geometry[iMesh]->nodes->GetChildren_CV(iPoint, iChildren);
-        Area_Children = geometry[iMesh-1]->nodes->GetVolume(Point_Fine);
-        Solution_Fine = solver[iMesh-1][ADJFLOW_SOL]->GetNodes()->GetSolution(Point_Fine);
-        for (iVar = 0; iVar < nVar; iVar++) {
-          Solution[iVar] += Solution_Fine[iVar]*Area_Children/Area_Parent;
-        }
+        for (auto iVar = 0u; iVar < nVar; iVar++) Solution[iVar] += weight * fineSol(Point_Fine, iVar);
       }
       solver[iMesh][ADJFLOW_SOL]->GetNodes()->SetSolution(iPoint, Solution);
     }
   }
-
-  /*--- Delete the class memory that is used to load the restart. ---*/
-
-  delete [] Restart_Vars;  Restart_Vars = nullptr;
-  delete [] Restart_Data;  Restart_Data = nullptr;
-
 }
diff --git a/SU2_CFD/src/solvers/CEulerSolver.cpp b/SU2_CFD/src/solvers/CEulerSolver.cpp
index fd0a6598fef..d220d815a4d 100644
--- a/SU2_CFD/src/solvers/CEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CEulerSolver.cpp
@@ -372,6 +372,7 @@ void CEulerSolver::InstantiateEdgeNumerics(const CSolver* const* solver_containe
                    "support vectorization.", CURRENT_FUNCTION);
 
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 }
 
@@ -1388,7 +1389,8 @@ void CEulerSolver::SetNondimensionalization(CConfig *config, unsigned short iMes
       GetFluidModel()->SetThermalConductivityModel(config);
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   Energy_FreeStreamND = GetFluidModel()->GetStaticEnergy() + 0.5*ModVel_FreeStreamND*ModVel_FreeStreamND;
 
@@ -1724,12 +1726,14 @@ void CEulerSolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_c
         }
 
       }
+      END_SU2_OMP_FOR
 
       FlowNodes->Set_OldSolution();
 
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1753,61 +1757,47 @@ void CEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_con
                                config->GetKind_Upwind_Flow() == SLAU ||
                                config->GetKind_Upwind_Flow() == SLAU2);
 
-  /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/
-
-  if (fixed_cl && !disc_adjoint && !cont_adjoint) {
-    SU2_OMP_MASTER
-    SetFarfield_AoA(geometry, solver_container, config, iMesh, Output);
-    SU2_OMP_BARRIER
-  }
-
   /*--- Set the primitive variables ---*/
 
-  SU2_OMP_MASTER
-  ErrorCounter = 0;
-  SU2_OMP_BARRIER
+  ompMasterAssignBarrier(ErrorCounter, 0);
 
   SU2_OMP_ATOMIC
   ErrorCounter += SetPrimitive_Variables(solver_container, config);
+  SU2_OMP_BARRIER
 
-  if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) {
-    SU2_OMP_BARRIER
-    SU2_OMP_MASTER
-    {
+  SU2_OMP_MASTER { /*--- Ops that are not OpenMP parallel go in this block. ---*/
+
+    if ((iMesh == MESH_0) && (config->GetComm_Level() == COMM_FULL)) {
       unsigned long tmp = ErrorCounter;
       SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
       config->SetNonphysical_Points(ErrorCounter);
     }
-    SU2_OMP_BARRIER
-  }
 
-  /*--- Compute the engine properties ---*/
+    /*--- Update the angle of attack at the far-field for fixed CL calculations (only direct problem). ---*/
 
-  if (engine) {
-    SU2_OMP_MASTER
-    GetPower_Properties(geometry, config, iMesh, Output);
-    SU2_OMP_BARRIER
-  }
+    if (fixed_cl && !disc_adjoint && !cont_adjoint) {
+      SetFarfield_AoA(geometry, solver_container, config, iMesh, Output);
+    }
 
-  /*--- Compute the actuator disk properties and distortion levels ---*/
+    /*--- Compute the engine properties ---*/
 
-  if (actuator_disk) {
-    SU2_OMP_MASTER
-    {
+    if (engine) GetPower_Properties(geometry, config, iMesh, Output);
+
+    /*--- Compute the actuator disk properties and distortion levels ---*/
+
+    if (actuator_disk) {
       Set_MPI_ActDisk(solver_container, geometry, config);
       GetPower_Properties(geometry, config, iMesh, Output);
       SetActDisk_BCThrust(geometry, solver_container, config, iMesh, Output);
     }
-    SU2_OMP_BARRIER
-  }
 
-  /*--- Compute NearField MPI ---*/
+    /*--- Compute NearField MPI ---*/
+
+    if (nearfield) Set_MPI_Nearfield(geometry, config);
 
-  if (nearfield) {
-    SU2_OMP_MASTER
-    Set_MPI_Nearfield(geometry, config);
-    SU2_OMP_BARRIER
   }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   /*--- Artificial dissipation ---*/
 
@@ -1891,6 +1881,7 @@ unsigned long CEulerSolver::SetPrimitive_Variables(CSolver **solver_container, c
 
     if (!physical) nonPhysicalPoints++;
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 }
@@ -1975,6 +1966,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
   unsigned long counter_local = 0;
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
 
   /*--- Pick one numerics object per thread. ---*/
   CNumerics* numerics = numerics_container[CONV_TERM + omp_get_thread_num()*MAX_TERMS];
@@ -2164,6 +2156,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -2186,6 +2179,7 @@ void CEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_contain
       SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
       config->SetNonphysical_Reconstr(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -2290,6 +2284,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (rotating_frame) {
@@ -2320,6 +2315,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (axisymmetric) {
@@ -2396,6 +2392,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       if (implicit)
         Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
+    END_SU2_OMP_FOR
   }
 
   if (gravity) {
@@ -2417,6 +2414,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
 
   }
 
@@ -2434,6 +2432,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
         LinSysRes(iPoint,iVar) += Volume * nodes->GetHarmonicBalance_Source(iPoint,iVar);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   if (windgust) {
@@ -2464,6 +2463,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Check if a verification solution is to be computed. ---*/
@@ -2494,6 +2494,7 @@ void CEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_contain
           LinSysRes(iPoint,iVar) -= sourceMan[iVar]*Volume;
         }
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -2564,6 +2565,7 @@ void CEulerSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *co
       nodes->AddUnd_Lapl(iPoint, nVar-1, Pressure_j-Pressure_i);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Correct the Laplacian across any periodic boundaries. ---*/
 
@@ -2636,6 +2638,7 @@ void CEulerSolver::SetUpwind_Ducros_Sensor(CGeometry *geometry, CConfig *config)
 
     nodes->SetSensor(iPoint, Ducros_i);
   }
+  END_SU2_OMP_FOR
 
   InitiateComms(geometry, config, SENSOR);
   CompleteComms(geometry, config, SENSOR);
@@ -4816,6 +4819,7 @@ void CEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -5307,6 +5311,7 @@ void CEulerSolver::BC_Riemann(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -5823,7 +5828,8 @@ void CEulerSolver::BC_TurboRiemann(CGeometry *geometry, CSolver **solver_contain
         }
       }
     }
-}
+    END_SU2_OMP_FOR
+  }
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -6723,6 +6729,7 @@ void CEulerSolver::BC_Giles(CGeometry *geometry, CSolver **solver_container, CNu
       }
 
     }
+    END_SU2_OMP_FOR
   }
 
   /*--- Free locally allocated memory ---*/
@@ -7048,6 +7055,7 @@ void CEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -7224,6 +7232,7 @@ void CEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
   delete [] Normal;
@@ -7371,6 +7380,7 @@ void CEulerSolver::BC_Supersonic_Inlet(CGeometry *geometry, CSolver **solver_con
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -7496,6 +7506,7 @@ void CEulerSolver::BC_Supersonic_Outlet(CGeometry *geometry, CSolver **solver_co
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -7720,6 +7731,7 @@ void CEulerSolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_contai
 
     }
   }
+  END_SU2_OMP_FOR
 
   delete [] Normal;
 
@@ -7974,6 +7986,7 @@ void CEulerSolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_conta
 
     }
   }
+  END_SU2_OMP_FOR
 
   delete [] Normal;
 
@@ -8033,6 +8046,7 @@ void CEulerSolver::BC_Interface_Boundary(CGeometry *geometry, CSolver **solver_c
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -8096,6 +8110,7 @@ void CEulerSolver::BC_NearField_Boundary(CGeometry *geometry, CSolver **solver_c
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -8539,6 +8554,7 @@ void CEulerSolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container, C
     }
 
   }
+  END_SU2_OMP_FOR
 
   /*--- Free locally allocated memory ---*/
 
@@ -8777,6 +8793,7 @@ void CEulerSolver::BC_ActDisk_VariableLoad(CGeometry *geometry, CSolver **solver
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CEulerSolver::PrintVerificationError(const CConfig *config) const {
@@ -8830,6 +8847,7 @@ void CEulerSolver::SetFreeStream_Solution(const CConfig *config) {
     }
     nodes->SetSolution(iPoint,nVar-1, Density_Inf*Energy_Inf);
   }
+  END_SU2_OMP_FOR
 }
 
 void CEulerSolver::SetFreeStream_TurboSolution(CConfig *config) {
diff --git a/SU2_CFD/src/solvers/CFEASolver.cpp b/SU2_CFD/src/solvers/CFEASolver.cpp
index c96fef43746..79502c49923 100644
--- a/SU2_CFD/src/solvers/CFEASolver.cpp
+++ b/SU2_CFD/src/solvers/CFEASolver.cpp
@@ -114,6 +114,7 @@ CFEASolver::CFEASolver(CGeometry *geometry, CConfig *config) : CSolver() {
       }
     }
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Set element properties ---*/
   Set_ElementProperties(geometry, config);
@@ -669,6 +670,7 @@ void CFEASolver::Preprocessing(CGeometry *geometry, CSolver **solver_container,
   {
     LinSysSol.SetValZero();
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Clear external forces. ---*/
   nodes->Clear_SurfaceLoad_Res();
@@ -687,13 +689,16 @@ void CFEASolver::SetInitialCondition(CGeometry **geometry, CSolver ***solver_con
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint)
       nodes->SetSolution(iPoint, zeros);
+    END_SU2_OMP_FOR
   }
   else {
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint)
       nodes->SetSolution(iPoint, nodes->GetPrestretch(iPoint));
+    END_SU2_OMP_FOR
   }
-  } // end parallel
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics, const CConfig *config) {
@@ -778,10 +783,12 @@ void CFEASolver::Compute_StiffMatrix(CGeometry *geometry, CNumerics **numerics,
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -918,10 +925,12 @@ void CFEASolver::Compute_StiffMatrix_NodalStressRes(CGeometry *geometry, CNumeri
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1001,10 +1010,12 @@ void CFEASolver::Compute_MassMatrix(const CGeometry *geometry, CNumerics **numer
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   AD::EndPassive(wasActive);
 
@@ -1080,6 +1091,7 @@ void CFEASolver::Compute_MassRes(const CGeometry *geometry, CNumerics **numerics
       }
 
     } // end iElem loop
+    END_SU2_OMP_FOR
 
   } // end color loop
 
@@ -1169,10 +1181,12 @@ void CFEASolver::Compute_NodalStressRes(CGeometry *geometry, CNumerics **numeric
         }
 
       } // end iElem loop
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1210,6 +1224,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
         nodes->SetStress_FEM(iPoint,iStress, 0.0);
       }
     }
+    END_SU2_OMP_FOR
     AD::EndPassive(wasActive);
 
     for(auto color : ElemColoring) {
@@ -1297,6 +1312,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
         AD::EndPassive(wasActive);
 
       } // end iElem loop
+      END_SU2_OMP_FOR
       atomicAdd(stressPen, StressPenalty);
 
     } // end color loop
@@ -1306,7 +1322,7 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
     /*--- Compute the von Misses stress at each point, and the maximum for the domain. ---*/
     su2double maxVonMises = 0.0;
 
-    SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+    SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
     for (auto iPoint = 0ul; iPoint < nPointDomain; iPoint++) {
 
       const auto vms = CFEAElasticity::VonMisesStress(nDim, nodes->GetStress_FEM(iPoint));
@@ -1315,12 +1331,15 @@ void CFEASolver::Compute_NodalStress(CGeometry *geometry, CNumerics **numerics,
 
       maxVonMises = max(maxVonMises, vms);
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     MaxVonMises_Stress = max(MaxVonMises_Stress, maxVonMises);
+    END_SU2_OMP_CRITICAL
 
     AD::EndPassive(wasActive);
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
   /*--- Set the value of the MaxVonMises_Stress as the CFEA coeffient ---*/
   SU2_MPI::Allreduce(&MaxVonMises_Stress, &Total_CFEA, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
@@ -1462,6 +1481,7 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       nodes->Clear_BodyForces_Res(iPoint);
+    END_SU2_OMP_FOR
 
     for(auto color : ElemColoring) {
 
@@ -1519,11 +1539,13 @@ void CFEASolver::Compute_DeadLoad(CGeometry *geometry, CNumerics **numerics, con
           if (LockStrategy) omp_unset_lock(&UpdateLocks[indexNode[iNode]]);
         }
 
-      } // end iElem loop
+      }
+      END_SU2_OMP_FOR
 
     } // end color loop
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -1732,6 +1754,7 @@ CSysVector<T> computeLinearResidual(const CSysMatrix<T>& A,
                                     const CSysVector<U>& b) {
   CSysVector<T> r(x.GetNBlk(), x.GetNBlkDomain(), x.GetNVar(), nullptr);
   SU2_OMP_PARALLEL { A.ComputeResidual(x, b, r); }
+  END_SU2_OMP_PARALLEL
   return r;
 }
 
@@ -1751,6 +1774,7 @@ CSysVector<T> computeLinearResidual(const CSysMatrix<T>& A,
     btmp.PassiveCopy(b);
     A.ComputeResidual(xtmp, btmp, r);
   }
+  END_SU2_OMP_PARALLEL
   return r;
 }
 
@@ -1812,7 +1836,9 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics
       Conv_Check[1] = rtol;
       Conv_Check[2] = etol;
     }
-    } // end parallel
+    END_SU2_OMP_MASTER
+    }
+    END_SU2_OMP_PARALLEL
   }
   else {
 
@@ -1845,18 +1871,20 @@ void CFEASolver::Postprocessing(CGeometry *geometry, CConfig *config, CNumerics
         }
       }
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     for (auto iVar = 0ul; iVar < nVar; iVar++) {
       Residual_RMS[iVar] += resRMS[iVar];
       AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     /*--- Compute the root mean square residual. ---*/
-    SU2_OMP_MASTER
     SetResidual_RMS(geometry, config);
 
-    } // end SU2_OMP_PARALLEL
+    }
+    END_SU2_OMP_PARALLEL
 
   }
 
@@ -2210,6 +2238,7 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics
       }
 
     }
+    END_SU2_OMP_FOR
 
     /*--- Dynamic contribution. ---*/
 
@@ -2241,13 +2270,15 @@ void CFEASolver::ImplicitNewmark_Iteration(const CGeometry *geometry, CNumerics
             a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t)
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Add M*TimeRes_Aux to the residual. ---*/
       Compute_MassRes(geometry, numerics, config);
       LinSysRes += TimeRes;
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2268,6 +2299,7 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig
       for (iVar = 0; iVar < nVar; iVar++)
         nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar));
     }
+    END_SU2_OMP_FOR
 
     if (dynamic) {
       SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -2294,8 +2326,10 @@ void CFEASolver::ImplicitNewmark_Update(const CGeometry *geometry, const CConfig
           nodes->SetSolution_Vel(iPoint, iVar, sol);
         }
       }
+      END_SU2_OMP_FOR
     }
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CConfig *config) {
@@ -2313,6 +2347,7 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo
       nodes->SetSolution(iPoint, nodes->GetSolution_Pred(iPoint));
       nodes->SetSolution_Pred_Old(iPoint, nodes->GetSolution(iPoint));
     }
+    END_SU2_OMP_FOR
 
     if (dynamic) {
       SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -2339,9 +2374,11 @@ void CFEASolver::ImplicitNewmark_Relaxation(const CGeometry *geometry, const CCo
           nodes->SetSolution_Vel(iPoint, iVar, sol);
         }
       }
+      END_SU2_OMP_FOR
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2392,6 +2429,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics
         }
 
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Loads for dynamic problems. ---*/
@@ -2416,6 +2454,7 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics
             a_dt[3]*nodes->GetSolution_Accel_time_n(iPoint,iVar); // a3*U''(t)
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Add M*TimeRes_Aux to the residual. ---*/
       Compute_MassRes(geometry, numerics, config);
@@ -2448,9 +2487,11 @@ void CFEASolver::GeneralizedAlpha_Iteration(const CGeometry *geometry, CNumerics
                                                     alpha_f  * nodes->Get_FlowTraction_n(iPoint,iVar) );
         }
       }
+      END_SU2_OMP_FOR
     }
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2462,6 +2503,7 @@ void CFEASolver::GeneralizedAlpha_UpdateDisp(const CGeometry *geometry, const CC
   for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
     for (unsigned short iVar = 0; iVar < nVar; iVar++)
       nodes->Add_DeltaSolution(iPoint, iVar, LinSysSol(iPoint,iVar));
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2516,6 +2558,7 @@ void CFEASolver::GeneralizedAlpha_UpdateSolution(const CGeometry *geometry, cons
     }
 
   }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2542,6 +2585,7 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) {
   /*--- This is required for the discrete adjoint. ---*/
   SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
   for (auto i = nPointDomain*nVar; i < nPoint*nVar; ++i) LinSysRes[i] = 0.0;
+  END_SU2_OMP_FOR
 
   /*--- Solve or smooth the linear system. ---*/
 
@@ -2552,8 +2596,10 @@ void CFEASolver::Solve_System(CGeometry *geometry, CConfig *config) {
     SetIterLinSolver(iter);
     SetResLinSolver(System.GetResidual());
   }
+  END_SU2_OMP_MASTER
   //SU2_OMP_BARRIER
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2602,6 +2648,7 @@ void CFEASolver::PredictStruct_Displacement(CGeometry *geometry, CConfig *config
     }
 
   }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2720,6 +2767,7 @@ void CFEASolver::SetAitken_Relaxation(CGeometry *geometry, CConfig *config) {
 
     nodes->SetSolution_Pred(iPoint, newDispPred);
   }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -2811,6 +2859,7 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){
     for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
       obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint));
     }
+    END_SU2_OMP_FOR
   }
   else {
     for (unsigned short iMarker = 0; iMarker < config->GetnMarker_All(); iMarker++) {
@@ -2826,12 +2875,14 @@ void CFEASolver::Compute_OFRefGeom(CGeometry *geometry, const CConfig *config){
           if (geometry->nodes->GetDomain(iPoint))
             obj_fun_local += SquaredDistance(nVar, nodes->GetReference_Geometry(iPoint), nodes->GetSolution(iPoint));
         }
+        END_SU2_OMP_FOR
       }
     }
   }
   atomicAdd(obj_fun_local, objective_function);
   atomicAdd(nSurf_local, nSurfPoints);
   }
+  END_SU2_OMP_PARALLEL
   SU2_MPI::Allreduce(&objective_function, &Total_OFRefGeom, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
 
   unsigned long nPointsOF = geometry->GetGlobal_nPointDomain();
@@ -2926,10 +2977,13 @@ void CFEASolver::Compute_OFVolFrac(CGeometry *geometry, const CConfig *config)
       discrete_loc += volume*4.0*rho*(1.0-rho);
     }
   }
+  END_SU2_OMP_FOR
+
   atomicAdd(tot_vol_loc, total_volume);
   atomicAdd(integral_loc, integral);
   atomicAdd(discrete_loc, discreteness);
   }
+  END_SU2_OMP_PARALLEL
 
   su2double tmp;
   SU2_MPI::Allreduce(&total_volume,&tmp,1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
@@ -2989,8 +3043,11 @@ void CFEASolver::Compute_OFCompliance(CGeometry *geometry, const CConfig *config
     for (iVar = 0; iVar < nVar; iVar++)
       comp_local += nodalForce[iVar]*nodes->GetSolution(iPoint,iVar);
   }
+  END_SU2_OMP_FOR
+
   atomicAdd(comp_local, compliance);
   }
+  END_SU2_OMP_PARALLEL
 
   SU2_MPI::Allreduce(&compliance, &Total_OFCompliance, 1,MPI_DOUBLE,MPI_SUM,SU2_MPI::GetComm());
 
@@ -3059,9 +3116,12 @@ void CFEASolver::Stiffness_Penalty(CGeometry *geometry, CNumerics **numerics, CC
 
     }
   }
+  END_SU2_OMP_FOR
+
   atomicAdd(totalVol_loc, totalVolume);
   atomicAdd(weighted_loc, weightedValue);
   }
+  END_SU2_OMP_PARALLEL
 
   // Reduce value across processors for parallelization
 
@@ -3253,6 +3313,7 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
     else if (rho < 0.0) physical_rho[iElem] = 0.0;
     else                physical_rho[iElem] = rho;
   }
+  END_SU2_OMP_PARALLEL
 
   geometry->FilterValuesAtElementCG(filter_radius, kernels, search_lim, physical_rho);
 
@@ -3265,15 +3326,18 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
         SU2_OMP_FOR_STAT(omp_chunk_size)
         for (auto iElem=0ul; iElem<nElement; ++iElem)
           physical_rho[iElem] = 1.0-exp(-param*physical_rho[iElem])+physical_rho[iElem]*exp(-param);
+        END_SU2_OMP_FOR
         break;
       case HEAVISIDE_DOWN:
         SU2_OMP_FOR_STAT(omp_chunk_size)
         for (auto iElem=0ul; iElem<nElement; ++iElem)
           physical_rho[iElem] = exp(-param*(1.0-physical_rho[iElem]))-(1.0-physical_rho[iElem])*exp(-param);
+        END_SU2_OMP_FOR
         break;
       default:
         SU2_OMP_MASTER
         SU2_MPI::Error("Unknown type of projection function",CURRENT_FUNCTION);
+        END_SU2_OMP_MASTER
     }
 
     /*--- If input was out of bounds use the bound instead of the filtered
@@ -3285,6 +3349,7 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
       else if (rho < 0.0) element_properties[iElem]->SetPhysicalDensity(0.0);
       else element_properties[iElem]->SetPhysicalDensity(physical_rho[iElem]);
     }
+    END_SU2_OMP_FOR
 
     /*--- Compute nodal averages for output. ---*/
     SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -3297,7 +3362,9 @@ void CFEASolver::FilterElementDensities(CGeometry *geometry, const CConfig *conf
       }
       nodes->SetAuxVar(iPoint, 0, sum/vol);
     }
+    END_SU2_OMP_FOR
   }
+  END_SU2_OMP_PARALLEL
 
   delete [] physical_rho;
 
diff --git a/SU2_CFD/src/solvers/CIncEulerSolver.cpp b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
index 79c8034b5ac..830cdd25b4d 100644
--- a/SU2_CFD/src/solvers/CIncEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncEulerSolver.cpp
@@ -825,9 +825,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
 
   /*--- Set the primitive variables ---*/
 
-  SU2_OMP_MASTER
-  ErrorCounter = 0;
-  SU2_OMP_BARRIER
+  ompMasterAssignBarrier(ErrorCounter, 0);
 
   SU2_OMP_ATOMIC
   ErrorCounter += SetPrimitive_Variables(solver_container, config);
@@ -840,6 +838,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
       SU2_MPI::Allreduce(&tmp, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, SU2_MPI::GetComm());
       config->SetNonphysical_Points(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -862,6 +861,7 @@ void CIncEulerSolver::CommonPreprocessing(CGeometry *geometry, CSolver **solver_
   if (outlet) {
     SU2_OMP_MASTER
     GetOutlet_Properties(geometry, config, iMesh, Output);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -924,6 +924,7 @@ unsigned long CIncEulerSolver::SetPrimitive_Variables(CSolver **solver_container
 
     if (!physical) nonPhysicalPoints++;
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 }
@@ -1053,6 +1054,7 @@ void CIncEulerSolver::Centered_Residual(CGeometry *geometry, CSolver **solver_co
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -1076,6 +1078,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
 
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
 
   const bool implicit   = (config->GetKind_TimeIntScheme() == EULER_IMPLICIT);
   const bool muscl      = (config->GetMUSCL_Flow() && (iMesh == MESH_0));
@@ -1219,6 +1222,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -1241,6 +1245,7 @@ void CIncEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_cont
       SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
       config->SetNonphysical_Reconstr(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -1298,6 +1303,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (boussinesq) {
@@ -1330,6 +1336,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       LinSysRes.AddBlock(iPoint, residual);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (rotating_frame) {
@@ -1364,6 +1371,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (axisymmetric) {
@@ -1388,6 +1396,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         nodes->SetAuxVar(iPoint, 0, AuxVar);
 
       }
+      END_SU2_OMP_FOR
 
       /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/
 
@@ -1451,6 +1460,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   if (radiation) {
@@ -1493,6 +1503,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       }
 
     }
+    END_SU2_OMP_FOR
 
   }
 
@@ -1506,6 +1517,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         /*--- Set the auxiliary variable, Eddy viscosity mu_t, for this node. ---*/
         nodes->SetAuxVar(iPoint, 0, nodes->GetEddyViscosity(iPoint));
       }
+      END_SU2_OMP_FOR
 
       /*--- Compute the auxiliary variable gradient with GG or WLS. ---*/
       if (config->GetKind_Gradient_Method() == GREEN_GAUSS) {
@@ -1545,6 +1557,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
 
     } // for iPoint
+    END_SU2_OMP_FOR
 
     if(!streamwise_periodic_temperature && energy) {
 
@@ -1584,6 +1597,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
             LinSysRes.AddBlock(iPoint, residual);
 
           }// for iVertex
+          END_SU2_OMP_FOR
         }// if periodic inlet boundary
       }// for iMarker
 
@@ -1619,6 +1633,7 @@ void CIncEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_cont
         }
 
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -1754,9 +1769,11 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co
     SU2_OMP_FOR_STAT(omp_chunk_size)
     for (auto iPoint = 0ul; iPoint < nPoint; iPoint++)
       maxVel2 = max(maxVel2, nodes->GetVelocity2(iPoint));
+    END_SU2_OMP_FOR
 
     SU2_OMP_CRITICAL
     MaxVel2 = max(MaxVel2, maxVel2);
+    END_SU2_OMP_CRITICAL
 
     SU2_OMP_BARRIER
 
@@ -1766,6 +1783,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co
 
       config->SetMax_Vel2(max(1e-10, MaxVel2));
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -1776,6 +1794,7 @@ void CIncEulerSolver::SetBeta_Parameter(CGeometry *geometry, CSolver **solver_co
   SU2_OMP_FOR_STAT(omp_chunk_size)
   for (auto iPoint = 0ul; iPoint < nPoint; iPoint++)
     nodes->SetBetaInc2(iPoint, BetaInc2);
+  END_SU2_OMP_FOR
 
 }
 
@@ -2008,6 +2027,7 @@ void CIncEulerSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_contain
       Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -2249,6 +2269,7 @@ void CIncEulerSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container,
     if (implicit)
       Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i);
   }
+  END_SU2_OMP_FOR
 }
 
 void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
@@ -2446,6 +2467,7 @@ void CIncEulerSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
       Jacobian.SubtractBlock2Diag(iPoint, residual_v.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -2536,6 +2558,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
         Jacobian.AddVal2Diag(iPoint, nDim+1, delta);
       }
     }
+    END_SU2_OMP_FOR
   }
 
   else {
@@ -2579,6 +2602,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
           LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over the boundary edges ---*/
 
@@ -2615,6 +2639,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
           for (iVar = 0; iVar < nVar-!energy; iVar++)
             LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -2675,6 +2700,7 @@ void CIncEulerSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver
         Jacobian.AddVal2Diag(iPoint, nDim+1, delta);
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -2954,4 +2980,6 @@ void CIncEulerSolver::SetFreeStream_Solution(const CConfig *config){
     }
     nodes->SetSolution(iPoint,nDim+1, Temperature_Inf);
   }
+  END_SU2_OMP_FOR
+
 }
diff --git a/SU2_CFD/src/solvers/CIncNSSolver.cpp b/SU2_CFD/src/solvers/CIncNSSolver.cpp
index 05b354f769b..92b0fdfc925 100644
--- a/SU2_CFD/src/solvers/CIncNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CIncNSSolver.cpp
@@ -290,10 +290,12 @@ void CIncNSSolver::Compute_Streamwise_Periodic_Recovered_Values(CConfig *config,
       nodes->SetStreamwise_Periodic_RecoveredTemperature(iPoint, Temperature_Recovered);
     }
   } // for iPoint
+  END_SU2_OMP_FOR
 
   /*--- Compute the integrated Heatflux Q into the domain, and massflow over periodic markers ---*/
   SU2_OMP_MASTER
   GetStreamwise_Periodic_Properties(geometry, config, iMesh);
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 }
 
@@ -338,6 +340,7 @@ unsigned long CIncNSSolver::SetPrimitive_Variables(CSolver **solver_container, c
     nodes->SetDES_LengthScale(iPoint,DES_LengthScale);
 
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 
@@ -476,6 +479,7 @@ void CIncNSSolver::BC_Wall_Generic(const CGeometry *geometry, const CConfig *con
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CIncNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver**, CNumerics*,
@@ -585,4 +589,5 @@ void CIncNSSolver::BC_ConjugateHeat_Interface(CGeometry *geometry, CSolver **sol
     nodes->SetSolution_Old(iPoint, nDim+1, Twall);
     nodes->SetEnergy_ResTruncError_Zero(iPoint);
   }
+  END_SU2_OMP_FOR
 }
diff --git a/SU2_CFD/src/solvers/CMeshSolver.cpp b/SU2_CFD/src/solvers/CMeshSolver.cpp
index 49807e01908..e784308a743 100644
--- a/SU2_CFD/src/solvers/CMeshSolver.cpp
+++ b/SU2_CFD/src/solvers/CMeshSolver.cpp
@@ -147,6 +147,7 @@ CMeshSolver::CMeshSolver(CGeometry *geometry, CConfig *config) : CFEASolver(true
   SU2_OMP_PARALLEL {
     SetMinMaxVolume(geometry, config, false);
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Compute the wall distance using the reference coordinates ---*/
   SetWallDistance(geometry, config);
@@ -177,6 +178,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
     MaxVolume = -1E22; MinVolume = 1E22;
     ElemCounter = 0;
   }
+  END_SU2_OMP_MASTER
 
   /*--- Local min/max, final reduction outside loop. ---*/
   su2double maxVol = -1E22, minVol = 1E22;
@@ -228,12 +230,14 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
     /*--- Count distorted elements. ---*/
     if (ElemVolume <= 0.0) elCount++;
   }
+  END_SU2_OMP_FOR
   SU2_OMP_CRITICAL
   {
     MaxVolume = max(MaxVolume, maxVol);
     MinVolume = min(MinVolume, minVol);
     ElemCounter += elCount;
   }
+  END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
   SU2_OMP_MASTER
@@ -243,6 +247,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
     SU2_MPI::Allreduce(&maxVol, &MaxVolume, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
     SU2_MPI::Allreduce(&minVol, &MinVolume, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- Volume from 0 to 1 ---*/
@@ -258,6 +263,7 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
       element[iElem].SetRef_Volume(ElemVolume);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Store the maximum and minimum volume. ---*/
   SU2_OMP_MASTER {
@@ -273,7 +279,9 @@ void CMeshSolver::SetMinMaxVolume(CGeometry *geometry, CConfig *config, bool upd
   if ((ElemCounter != 0) && (rank == MASTER_NODE))
     cout <<"There are " << ElemCounter << " elements with negative volume.\n" << endl;
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   AD::EndPassive(wasActive);
 }
@@ -336,6 +344,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
     for (auto iPoint = 0ul; iPoint < nPoint; ++iPoint) {
       nodes->SetWallDistance(iPoint, MaxDistance);
     }
+    END_SU2_OMP_FOR
   }
   else {
     su2double MaxDistance_Local = -1E22, MinDistance_Local = 1E22;
@@ -358,11 +367,13 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
       if (dist > EPS)  MinDistance_Local = min(MinDistance_Local, dist);
 
     }
+    END_SU2_OMP_FOR
     SU2_OMP_CRITICAL
     {
       MaxDistance = max(MaxDistance, MaxDistance_Local);
       MinDistance = min(MinDistance, MinDistance_Local);
     }
+    END_SU2_OMP_CRITICAL
     SU2_OMP_BARRIER
 
     SU2_OMP_MASTER
@@ -372,6 +383,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
       SU2_MPI::Allreduce(&MaxDistance_Local, &MaxDistance, 1, MPI_DOUBLE, MPI_MAX, SU2_MPI::GetComm());
       SU2_MPI::Allreduce(&MinDistance_Local, &MinDistance, 1, MPI_DOUBLE, MPI_MIN, SU2_MPI::GetComm());
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 
@@ -381,6 +393,7 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
     su2double nodeDist = nodes->GetWallDistance(iPoint)/MaxDistance;
     nodes->SetWallDistance(iPoint,nodeDist);
   }
+  END_SU2_OMP_FOR
 
   /*--- Compute the element distances ---*/
   SU2_OMP_FOR_STAT(omp_chunk_size)
@@ -401,8 +414,10 @@ void CMeshSolver::SetWallDistance(CGeometry *geometry, CConfig *config) {
 
     element[iElem].SetWallDistance(ElemDist);
   }
+  END_SU2_OMP_FOR
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
 }
 
 void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics, CConfig *config){
@@ -456,6 +471,8 @@ void CMeshSolver::SetMesh_Stiffness(CGeometry **geometry, CNumerics **numerics,
     break;
   }
   }
+  END_SU2_OMP_PARALLEL
+
   stiffness_set = true;
 
 }
@@ -486,6 +503,7 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig
   SU2_OMP_PARALLEL {
     LinSysRes.SetValZero();
   }
+  END_SU2_OMP_PARALLEL
 
   /*--- Impose boundary conditions (all of them are ESSENTIAL BC's - displacements). ---*/
   SetBoundaryDisplacements(geometry[MESH_0], numerics[FEA_TERM], config);
@@ -511,7 +529,8 @@ void CMeshSolver::DeformMesh(CGeometry **geometry, CNumerics **numerics, CConfig
   /*--- Check for failed deformation (negative volumes). ---*/
   SetMinMaxVolume(geometry[MESH_0], config, true);
 
-  } // end parallel
+  }
+  END_SU2_OMP_PARALLEL
 
 }
 
@@ -533,6 +552,7 @@ void CMeshSolver::UpdateGridCoord(CGeometry *geometry, CConfig *config){
       geometry->nodes->SetCoord(iPoint, iDim, val_coord);
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Communicate the updated displacements and mesh coordinates. ---*/
   geometry->InitiateComms(geometry, config, COORDINATES);
@@ -590,6 +610,7 @@ void CMeshSolver::ComputeGridVelocity(CGeometry *geometry, CConfig *config){
 
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- The velocity was computed for nPointDomain, now we communicate it. ---*/
   geometry->InitiateComms(geometry, config, GRID_VELOCITY);
diff --git a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
index 778de7cad45..879ee715952 100644
--- a/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
+++ b/SU2_CFD/src/solvers/CNEMOEulerSolver.cpp
@@ -525,6 +525,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con
   unsigned long counter_local = 0;
   SU2_OMP_MASTER
   ErrorCounter = 0;
+  END_SU2_OMP_MASTER
 
   /*--- Pick one numerics object per thread. ---*/
   CNumerics* numerics = numerics_container[CONV_TERM];
@@ -697,6 +698,7 @@ void CNEMOEulerSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_con
       SU2_MPI::Reduce(&counter_local, &ErrorCounter, 1, MPI_UNSIGNED_LONG, MPI_SUM, MASTER_NODE, SU2_MPI::GetComm());
       config->SetNonphysical_Reconstr(ErrorCounter);
     }
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
   }
 }
@@ -985,6 +987,7 @@ void CNEMOEulerSolver::Source_Residual(CGeometry *geometry, CSolver **solver_con
         }else
           eAxi_local++;
       }
+      END_SU2_OMP_FOR
     }
 
   /*--- Checking for NaN ---*/
diff --git a/SU2_CFD/src/solvers/CNSSolver.cpp b/SU2_CFD/src/solvers/CNSSolver.cpp
index 7e143a369f5..2e561b70a37 100644
--- a/SU2_CFD/src/solvers/CNSSolver.cpp
+++ b/SU2_CFD/src/solvers/CNSSolver.cpp
@@ -86,12 +86,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C
    turbulence solver, and post) only temperature and velocity are needed ---*/
 
   const auto nPrimVarGrad_bak = nPrimVarGrad;
-  if (Output) {
-    SU2_OMP_BARRIER
-    SU2_OMP_MASTER
-    nPrimVarGrad = 1+nDim;
-    SU2_OMP_BARRIER
-  }
+  if (Output) ompMasterAssignBarrier(nPrimVarGrad, 1+nDim);
 
   if (config->GetReconstructionGradientRequired() && muscl && !center) {
     switch (config->GetKind_Gradient_Method_Recon()) {
@@ -113,11 +108,7 @@ void CNSSolver::Preprocessing(CGeometry *geometry, CSolver **solver_container, C
     SetPrimitive_Gradient_LS(geometry, config);
   }
 
-  if (Output) {
-    SU2_OMP_MASTER
-    nPrimVarGrad = nPrimVarGrad_bak;
-    SU2_OMP_BARRIER
-  }
+  if (Output) ompMasterAssignBarrier(nPrimVarGrad, nPrimVarGrad_bak);
 
   /*--- Compute the limiters ---*/
 
@@ -171,6 +162,7 @@ unsigned long CNSSolver::SetPrimitive_Variables(CSolver **solver_container, cons
     nonPhysicalPoints += !physical;
 
   }
+  END_SU2_OMP_FOR
 
   return nonPhysicalPoints;
 }
@@ -316,6 +308,7 @@ void CNSSolver::SetRoe_Dissipation(CGeometry *geometry, CConfig *config){
       nodes->SetRoe_Dissipation_NTS(iPoint, delta, config->GetConst_DES());
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -520,6 +513,7 @@ void CNSSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_container
       }
     }
   }
+  END_SU2_OMP_FOR
 
   if (Jacobian_i)
     for (auto iVar = 0u; iVar < nVar; iVar++)
@@ -717,6 +711,7 @@ void CNSSolver::BC_Isothermal_Wall_Generic(CGeometry *geometry, CSolver **solver
       }
     }
   }
+  END_SU2_OMP_FOR
 
   if (Jacobian_i)
     for (auto iVar = 0u; iVar < nVar; iVar++)
@@ -914,6 +909,7 @@ void CNSSolver::SetTauWall_WF(CGeometry *geometry, CSolver **solver_container, c
       nodes->SetTauWall(iPoint, Tau_Wall);
 
     }
+    END_SU2_OMP_FOR
 
   }
 
diff --git a/SU2_CFD/src/solvers/CSolver.cpp b/SU2_CFD/src/solvers/CSolver.cpp
index fa9aa896987..433e295132c 100644
--- a/SU2_CFD/src/solvers/CSolver.cpp
+++ b/SU2_CFD/src/solvers/CSolver.cpp
@@ -955,6 +955,7 @@ void CSolver::InitiatePeriodicComms(CGeometry *geometry,
             break;
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Launch the point-to-point MPI send for this message. ---*/
 
@@ -1037,6 +1038,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry,
       SU2_MPI::Waitany(geometry->nPeriodicRecv,
                        geometry->req_PeriodicRecv,
                        &ind, &status);
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
       source = status.MPI_SOURCE;
 #else
@@ -1283,6 +1285,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry,
           }
         }
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Verify that all non-blocking point-to-point sends have finished.
@@ -1294,6 +1297,7 @@ void CSolver::CompletePeriodicComms(CGeometry *geometry,
     SU2_MPI::Waitall(geometry->nPeriodicSend,
                      geometry->req_PeriodicSend,
                      MPI_STATUS_IGNORE);
+    END_SU2_OMP_MASTER
 #endif
     SU2_OMP_BARRIER
   }
@@ -1520,6 +1524,7 @@ void CSolver::InitiateComms(CGeometry *geometry,
             break;
         }
       }
+      END_SU2_OMP_FOR
 
       /*--- Launch the point-to-point MPI send for this message. ---*/
 
@@ -1572,6 +1577,7 @@ void CSolver::CompleteComms(CGeometry *geometry,
 
       SU2_OMP_MASTER
       SU2_MPI::Waitany(geometry->nP2PRecv, geometry->req_P2PRecv, &ind, &status);
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
 
       /*--- Once we have recv'd a message, get the source rank. ---*/
@@ -1669,6 +1675,7 @@ void CSolver::CompleteComms(CGeometry *geometry,
             break;
         }
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Verify that all non-blocking point-to-point sends have finished.
@@ -1678,6 +1685,7 @@ void CSolver::CompleteComms(CGeometry *geometry,
 #ifdef HAVE_MPI
     SU2_OMP_MASTER
     SU2_MPI::Waitall(geometry->nP2PSend, geometry->req_P2PSend, MPI_STATUS_IGNORE);
+    END_SU2_OMP_MASTER
 #endif
     SU2_OMP_BARRIER
   }
@@ -1804,6 +1812,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
       }
     }
     } /* End SU2_OMP_MASTER, now all threads update the CFL number. */
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
 
     /* Loop over all points on this grid and apply CFL adaption. */
@@ -1816,6 +1825,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
       Max_CFL_Local = 0.0;
       Avg_CFL_Local = 0.0;
     }
+    END_SU2_OMP_MASTER
 
     SU2_OMP_FOR_STAT(roundUpDiv(geometry[iMesh]->GetnPointDomain(),omp_get_max_threads()))
     for (unsigned long iPoint = 0; iPoint < geometry[iMesh]->GetnPointDomain(); iPoint++) {
@@ -1884,6 +1894,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
       }
 
     }
+    END_SU2_OMP_FOR
 
     /* Reduce the min/max/avg local CFL numbers. */
 
@@ -1894,6 +1905,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
         Max_CFL_Local = max(Max_CFL_Local,myCFLMax);
         Avg_CFL_Local += myCFLSum;
       }
+      END_SU2_OMP_CRITICAL
       SU2_OMP_BARRIER
 
       SU2_OMP_MASTER
@@ -1904,6 +1916,7 @@ void CSolver::AdaptCFLNumber(CGeometry **geometry,
         SU2_MPI::Allreduce(&myCFLSum, &Avg_CFL_Local, 1, MPI_DOUBLE, MPI_SUM, SU2_MPI::GetComm());
         Avg_CFL_Local /= su2double(geometry[iMesh]->GetGlobal_nPointDomain());
       }
+      END_SU2_OMP_MASTER
       SU2_OMP_BARRIER
     }
 
@@ -1915,6 +1928,8 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config)
 
   if (geometry->GetMGLevel() != MESH_0) return;
 
+  SU2_OMP_MASTER {
+
   /*--- Set the L2 Norm residual in all the processors. ---*/
 
   vector<su2double> rbuf_res(nVar);
@@ -1947,30 +1962,36 @@ void CSolver::SetResidual_RMS(const CGeometry *geometry, const CConfig *config)
 
   /*--- Set the Maximum residual in all the processors. ---*/
 
-  if (config->GetComm_Level() != COMM_FULL) return;
+  if (config->GetComm_Level() == COMM_FULL) {
 
-  const unsigned long nProcessor = size;
+    const unsigned long nProcessor = size;
 
-  su2activematrix rbuf_residual(nProcessor,nVar);
-  su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
-  su2activematrix rbuf_coord(nProcessor*nVar, nDim);
+    su2activematrix rbuf_residual(nProcessor,nVar);
+    su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
+    su2activematrix rbuf_coord(nProcessor*nVar, nDim);
 
-  SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Residual_Max.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max_Coord.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
 
-  for (unsigned short iVar = 0; iVar < nVar; iVar++) {
-    for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
-      AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+    for (unsigned short iVar = 0; iVar < nVar; iVar++) {
+      for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
+        AddRes_Max(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+      }
     }
   }
 
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config) {
 
   if (geometry->GetMGLevel() != MESH_0) return;
 
+  SU2_OMP_MASTER {
+
   /*--- Set the L2 Norm residual in all the processors. ---*/
 
   vector<su2double> rbuf_res(nVar);
@@ -1982,26 +2003,30 @@ void CSolver::SetResidual_BGS(const CGeometry *geometry, const CConfig *config)
     Residual_BGS[iVar] = max(EPS*EPS, sqrt(rbuf_res[iVar]/Global_nPointDomain));
   }
 
-  if (config->GetComm_Level() != COMM_FULL) return;
+  if (config->GetComm_Level() == COMM_FULL) {
 
-  /*--- Set the Maximum residual in all the processors. ---*/
+    /*--- Set the Maximum residual in all the processors. ---*/
 
-  const unsigned long nProcessor = size;
+    const unsigned long nProcessor = size;
 
-  su2activematrix rbuf_residual(nProcessor,nVar);
-  su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
-  su2activematrix rbuf_coord(nProcessor*nVar, nDim);
+    su2activematrix rbuf_residual(nProcessor,nVar);
+    su2matrix<unsigned long> rbuf_point(nProcessor,nVar);
+    su2activematrix rbuf_coord(nProcessor*nVar, nDim);
 
-  SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
-  SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Residual_Max_BGS.data(), nVar, MPI_DOUBLE, rbuf_residual.data(), nVar, MPI_DOUBLE, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max_BGS.data(), nVar, MPI_UNSIGNED_LONG, rbuf_point.data(), nVar, MPI_UNSIGNED_LONG, SU2_MPI::GetComm());
+    SU2_MPI::Allgather(Point_Max_Coord_BGS.data(), nVar*nDim, MPI_DOUBLE, rbuf_coord.data(), nVar*nDim, MPI_DOUBLE, SU2_MPI::GetComm());
 
-  for (unsigned short iVar = 0; iVar < nVar; iVar++) {
-    for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
-      AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+    for (unsigned short iVar = 0; iVar < nVar; iVar++) {
+      for (auto iProcessor = 0ul; iProcessor < nProcessor; iProcessor++) {
+        AddRes_Max_BGS(iVar, rbuf_residual(iProcessor,iVar), rbuf_point(iProcessor,iVar), rbuf_coord[iProcessor*nVar+iVar]);
+      }
     }
   }
 
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 }
 
 void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) {
@@ -2033,6 +2058,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) {
         LinSysRes(iPoint,iVar) += Flux * Solution_i[iVar];
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Loop boundary edges ---*/
 
@@ -2058,6 +2084,7 @@ void CSolver::SetRotatingFrame_GCL(CGeometry *geometry, const CConfig *config) {
         for (auto iVar = 0u; iVar < nVar; iVar++)
           LinSysRes(iPoint,iVar) -= Flux * base_nodes->GetSolution(iPoint,iVar);
       }
+      END_SU2_OMP_FOR
     }
   }
 
@@ -2145,6 +2172,7 @@ void CSolver::SetUndivided_Laplacian(CGeometry *geometry, const CConfig *config)
       }
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Correct the Laplacian across any periodic boundaries. ---*/
 
@@ -2686,7 +2714,9 @@ void CSolver::Restart_OldGeometry(CGeometry *geometry, CConfig *config) {
 
   }
 
-  } SU2_OMP_BARRIER
+  }
+  END_SU2_OMP_MASTER
+  SU2_OMP_BARRIER
 
   /*--- It's necessary to communicate this information ---*/
 
@@ -3721,6 +3751,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config
     if (!config->GetSolid_Wall(iMarker)) continue;
 
     /*--- Loop over the vertices ---*/
+    SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
     for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
 
       /*--- Recover the point index ---*/
@@ -3734,6 +3765,7 @@ void CSolver::RegisterVertexTractions(CGeometry *geometry, const CConfig *config
         AD::RegisterOutput(VertexTraction[iMarker][iVertex][iDim]);
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -3750,6 +3782,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf
     if (!config->GetSolid_Wall(iMarker)) continue;
 
     /*--- Loop over the vertices ---*/
+    SU2_OMP_FOR_STAT(OMP_MIN_SIZE)
     for (iVertex = 0; iVertex < geometry->nVertex[iMarker]; iVertex++) {
 
       /*--- Recover the point index ---*/
@@ -3764,6 +3797,7 @@ void CSolver::SetVertexTractionsAdjoint(CGeometry *geometry, const CConfig *conf
                                 SU2_TYPE::GetValue(VertexTractionAdjoint[iMarker][iVertex][iDim]));
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -3816,6 +3850,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
     Residual_BGS[iVar] = 0.0;
     Residual_Max_BGS[iVar] = 0.0;
   }
+  END_SU2_OMP_MASTER
 
   vector<su2double> resMax(nVar,0.0), resRMS(nVar,0.0);
   vector<const su2double*> coordMax(nVar,nullptr);
@@ -3839,6 +3874,7 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
       }
     }
   }
+  END_SU2_OMP_FOR
 
   /*--- Reduce residual information over all threads in this rank. ---*/
   SU2_OMP_CRITICAL
@@ -3846,11 +3882,65 @@ void CSolver::ComputeResidual_Multizone(const CGeometry *geometry, const CConfig
     Residual_BGS[iVar] += resRMS[iVar];
     AddRes_Max_BGS(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
   }
+  END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
-  SU2_OMP_MASTER
   SetResidual_BGS(geometry, config);
-  SU2_OMP_BARRIER
 
-  } // end SU2_OMP_PARALLEL
+  }
+  END_SU2_OMP_PARALLEL
+}
+
+void CSolver::BasicLoadRestart(CGeometry *geometry, const CConfig *config, const string& filename, unsigned long skipVars) {
+
+  /*--- Read and store the restart metadata. ---*/
+
+//  Read_SU2_Restart_Metadata(geometry[MESH_0], config, true, filename);
+
+  /*--- Read the restart data from either an ASCII or binary SU2 file. ---*/
+
+  if (config->GetRead_Binary_Restart()) {
+    Read_SU2_Restart_Binary(geometry, config, filename);
+  } else {
+    Read_SU2_Restart_ASCII(geometry, config, filename);
+  }
+
+  /*--- Load data from the restart into correct containers. ---*/
+
+  unsigned long iPoint_Global_Local = 0;
+
+  for (auto iPoint_Global = 0ul; iPoint_Global < geometry->GetGlobal_nPointDomain(); iPoint_Global++ ) {
+
+    /*--- Retrieve local index. If this node from the restart file lives
+     on the current processor, we will load and instantiate the vars. ---*/
+
+    const auto iPoint_Local = geometry->GetGlobal_to_Local_Point(iPoint_Global);
+
+    if (iPoint_Local > -1) {
+
+      /*--- We need to store this point's data, so jump to the correct
+       offset in the buffer of data from the restart file and load it. ---*/
+
+      const auto index = iPoint_Global_Local*Restart_Vars[1] + skipVars;
+
+      for (auto iVar = 0u; iVar < nVar; iVar++) {
+        base_nodes->SetSolution(iPoint_Local, iVar, Restart_Data[index+iVar]);
+      }
+
+      iPoint_Global_Local++;
+    }
+
+  }
+
+  /*--- Delete the class memory that is used to load the restart. ---*/
+
+  delete [] Restart_Vars;  Restart_Vars = nullptr;
+  delete [] Restart_Data;  Restart_Data = nullptr;
+
+  /*--- Detect a wrong solution file ---*/
+
+  if (iPoint_Global_Local != nPointDomain) {
+    SU2_MPI::Error(string("The solution file ") + filename + string(" doesn't match with the mesh file!\n") +
+                   string("It could be empty lines at the end of the file."), CURRENT_FUNCTION);
+  }
 }
diff --git a/SU2_CFD/src/solvers/CTurbSASolver.cpp b/SU2_CFD/src/solvers/CTurbSASolver.cpp
index d72f80c8bdd..33690ea185f 100644
--- a/SU2_CFD/src/solvers/CTurbSASolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSASolver.cpp
@@ -236,6 +236,7 @@ void CTurbSASolver::Preprocessing(CGeometry *geometry, CSolver **solver_containe
         auto Laminar_Viscosity  = solver_container[FLOW_SOL]->GetNodes()->GetLaminarViscosity(iPoint);
         nodes->SetVortex_Tilting(iPoint, PrimGrad_Flow, Vorticity, Laminar_Viscosity);
       }
+      END_SU2_OMP_FOR
     }
 
     /*--- Compute the DES length scale ---*/
@@ -281,6 +282,7 @@ void CTurbSASolver::Postprocessing(CGeometry *geometry, CSolver **solver_contain
     nodes->SetmuT(iPoint,muT);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -379,6 +381,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai
     if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
   if (harmonic_balance) {
 
@@ -394,6 +397,7 @@ void CTurbSASolver::Source_Residual(CGeometry *geometry, CSolver **solver_contai
         LinSysRes(iPoint,iVar) += Source*Volume;
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -410,6 +414,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta
   if (config->GetWall_Functions()) {
     SU2_OMP_MASTER
     SetNuTilde_WF(geometry, solver_container, conv_numerics, visc_numerics, config, val_marker);
+    END_SU2_OMP_MASTER
     SU2_OMP_BARRIER
     return;
   }
@@ -475,6 +480,7 @@ void CTurbSASolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_conta
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CTurbSASolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics,
@@ -535,6 +541,7 @@ void CTurbSASolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_container
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -622,6 +629,7 @@ void CTurbSASolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, CN
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -707,6 +715,7 @@ void CTurbSASolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container, C
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -795,6 +804,7 @@ void CTurbSASolver::BC_Engine_Inflow(CGeometry *geometry, CSolver **solver_conta
     }
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -883,6 +893,7 @@ void CTurbSASolver::BC_Engine_Exhaust(CGeometry *geometry, CSolver **solver_cont
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -1032,6 +1043,7 @@ void CTurbSASolver::BC_ActDisk(CGeometry *geometry, CSolver **solver_container,
 //        Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -1125,6 +1137,7 @@ void CTurbSASolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_c
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -1229,6 +1242,7 @@ void CTurbSASolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contain
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -1889,6 +1903,7 @@ void CTurbSASolver::SetDES_LengthScale(CSolver **solver, CGeometry *geometry, CC
     nodes->SetDES_LengthScale(iPoint, lengthScale);
 
   }
+  END_SU2_OMP_FOR
 }
 
 void CTurbSASolver::SetInletAtVertex(const su2double *val_inlet,
diff --git a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
index 224df226e3b..0ba644fe1bc 100644
--- a/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSSTSolver.cpp
@@ -273,6 +273,7 @@ void CTurbSSTSolver::Postprocessing(CGeometry *geometry, CSolver **solver_contai
     nodes->SetmuT(iPoint,muT);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -347,6 +348,7 @@ void CTurbSSTSolver::Source_Residual(CGeometry *geometry, CSolver **solver_conta
     if (implicit) Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -441,6 +443,7 @@ void CTurbSSTSolver::BC_HeatFlux_Wall(CGeometry *geometry, CSolver **solver_cont
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CTurbSSTSolver::BC_Isothermal_Wall(CGeometry *geometry, CSolver **solver_container, CNumerics *conv_numerics,
@@ -501,6 +504,7 @@ void CTurbSSTSolver::BC_Far_Field(CGeometry *geometry, CSolver **solver_containe
       if (implicit) Jacobian.AddBlock2Diag(iPoint, residual.jacobian_i);
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -594,6 +598,7 @@ void CTurbSSTSolver::BC_Inlet(CGeometry *geometry, CSolver **solver_container, C
     }
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -684,6 +689,7 @@ void CTurbSSTSolver::BC_Outlet(CGeometry *geometry, CSolver **solver_container,
 
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -775,6 +781,7 @@ void CTurbSSTSolver::BC_Inlet_MixingPlane(CGeometry *geometry, CSolver **solver_
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -885,6 +892,7 @@ void CTurbSSTSolver::BC_Inlet_Turbo(CGeometry *geometry, CSolver **solver_contai
       if (implicit) Jacobian.SubtractBlock2Diag(iPoint, visc_residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
 }
diff --git a/SU2_CFD/src/solvers/CTurbSolver.cpp b/SU2_CFD/src/solvers/CTurbSolver.cpp
index 82df13a563a..2ace5c5c495 100644
--- a/SU2_CFD/src/solvers/CTurbSolver.cpp
+++ b/SU2_CFD/src/solvers/CTurbSolver.cpp
@@ -229,6 +229,7 @@ void CTurbSolver::Upwind_Residual(CGeometry *geometry, CSolver **solver_containe
     Viscous_Residual(iEdge, geometry, solver_container,
                      numerics_container[VISC_TERM + omp_get_thread_num()*MAX_TERMS], config);
   }
+  END_SU2_OMP_FOR
   } // end color loop
 
   if (ReducerStrategy) {
@@ -305,6 +306,7 @@ void CTurbSolver::SumEdgeFluxes(CGeometry* geometry) {
         LinSysRes.SubtractBlock(iPoint, EdgeFluxes.GetBlock(iEdge));
     }
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -503,6 +505,7 @@ void CTurbSolver::BC_Fluid_Interface(CGeometry *geometry, CSolver **solver_conta
       Jacobian.SubtractBlock2Diag(iPoint, residual.jacobian_i);
 
     }
+    END_SU2_OMP_FOR
   }
 
   delete [] PrimVar_j;
@@ -540,6 +543,7 @@ void CTurbSolver::Impose_Fixed_Values(const CGeometry *geometry, const CConfig *
         }
       }
     }
+    END_SU2_OMP_FOR
   }
 
 }
@@ -559,7 +563,7 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver
 
   /*--- Build implicit system ---*/
 
-  SU2_OMP(for schedule(static,omp_chunk_size) nowait)
+  SU2_OMP_FOR_(schedule(static,omp_chunk_size) SU2_NOWAIT)
   for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
 
     /// TODO: This could be the SetTime_Step of this solver.
@@ -593,17 +597,17 @@ void CTurbSolver::PrepareImplicitIteration(CGeometry *geometry, CSolver** solver
       }
     }
   }
+  END_SU2_OMP_FOR
   SU2_OMP_CRITICAL
   for (unsigned short iVar = 0; iVar < nVar; iVar++) {
     Residual_RMS[iVar] += resRMS[iVar];
     AddRes_Max(iVar, resMax[iVar], geometry->nodes->GetGlobalIndex(idxMax[iVar]), coordMax[iVar]);
   }
+  END_SU2_OMP_CRITICAL
   SU2_OMP_BARRIER
 
   /*--- Compute the root mean square residual ---*/
-  SU2_OMP_MASTER
   SetResidual_RMS(geometry, config);
-  SU2_OMP_BARRIER
 }
 
 void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solver_container, CConfig *config) {
@@ -628,6 +632,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve
         for (unsigned long iPoint = 0; iPoint < nPointDomain; iPoint++) {
           nodes->AddSolution(iPoint, 0, nodes->GetUnderRelaxation(iPoint)*LinSysSol[iPoint]);
         }
+        END_SU2_OMP_FOR
         break;
 
       case SST: case SST_SUST:
@@ -647,6 +652,7 @@ void CTurbSolver::CompleteImplicitIteration(CGeometry *geometry, CSolver **solve
                       density, density_old, lowerlimit[iVar], upperlimit[iVar]);
           }
         }
+        END_SU2_OMP_FOR
         break;
 
     }
@@ -668,11 +674,12 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_
 
   /*--- Solve or smooth the linear system. ---*/
 
-  SU2_OMP(for schedule(static,OMP_MIN_SIZE) nowait)
+  SU2_OMP_FOR_(schedule(static,OMP_MIN_SIZE) SU2_NOWAIT)
   for (unsigned long iPoint = nPointDomain; iPoint < nPoint; iPoint++) {
     LinSysRes.SetBlock_Zero(iPoint);
     LinSysSol.SetBlock_Zero(iPoint);
   }
+  END_SU2_OMP_FOR
 
   auto iter = System.Solve(Jacobian, LinSysRes, LinSysSol, geometry, config);
 
@@ -680,6 +687,7 @@ void CTurbSolver::ImplicitEuler_Iteration(CGeometry *geometry, CSolver **solver_
     SetIterLinSolver(iter);
     SetResLinSolver(System.GetResidual());
   }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   CompleteImplicitIteration(geometry, solver_container, config);
@@ -733,6 +741,7 @@ void CTurbSolver::ComputeUnderRelaxationFactor(const CConfig *config) {
     nodes->SetUnderRelaxation(iPoint, localUnderRelaxation);
 
   }
+  END_SU2_OMP_FOR
 
 }
 
@@ -834,6 +843,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
       }
 
     }
+    END_SU2_OMP_FOR
 
   } else {
 
@@ -880,6 +890,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
           LinSysRes(iPoint,iVar) += U_time_n[iVar]*Residual_GCL;
       }
     }
+    END_SU2_OMP_FOR
 
     /*--- Loop over the boundary edges ---*/
 
@@ -927,6 +938,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
           }
 
         }
+        END_SU2_OMP_FOR
       }
     }
 
@@ -999,6 +1011,7 @@ void CTurbSolver::SetResidual_DualTime(CGeometry *geometry, CSolver **solver_con
         if (second_order) Jacobian.AddVal2Diag(iPoint, (Volume_nP1*3.0)/(2.0*TimeStep));
       }
     }
+    END_SU2_OMP_FOR
 
   } // end dynamic grid
 
@@ -1081,6 +1094,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *
   }
 
   } // end SU2_OMP_MASTER, pre and postprocessing are thread-safe.
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
   /*--- MPI solution and compute the eddy viscosity ---*/
@@ -1108,6 +1122,7 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *
       }
       solver[iMesh][TURB_SOL]->GetNodes()->SetSolution(iPoint,Solution_Coarse);
     }
+    END_SU2_OMP_FOR
 
     solver[iMesh][TURB_SOL]->InitiateComms(geometry[iMesh], config, SOLUTION);
     solver[iMesh][TURB_SOL]->CompleteComms(geometry[iMesh], config, SOLUTION);
@@ -1124,7 +1139,8 @@ void CTurbSolver::LoadRestart(CGeometry **geometry, CSolver ***solver, CConfig *
   delete [] Restart_Vars; Restart_Vars = nullptr;
   delete [] Restart_Data; Restart_Data = nullptr;
 
-  } // end SU2_OMP_MASTER
+  }
+  END_SU2_OMP_MASTER
   SU2_OMP_BARRIER
 
 }
diff --git a/SU2_CFD/src/variables/CMeshVariable.cpp b/SU2_CFD/src/variables/CMeshVariable.cpp
index 0f35b0fc442..d4786c3ed75 100644
--- a/SU2_CFD/src/variables/CMeshVariable.cpp
+++ b/SU2_CFD/src/variables/CMeshVariable.cpp
@@ -50,13 +50,17 @@ CMeshVariable::CMeshVariable(unsigned long npoint, unsigned long ndim, CConfig *
 
 void CMeshVariable::Register_MeshCoord(bool input) {
   if (input) {
+    SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       for (unsigned long iDim = 0; iDim < nDim; iDim++)
         AD::RegisterInput(Mesh_Coord(iPoint,iDim));
+    END_SU2_OMP_FOR
   }
   else {
+    SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
     for (unsigned long iPoint = 0; iPoint < nPoint; iPoint++)
       for (unsigned long iDim = 0; iDim < nDim; iDim++)
         AD::RegisterOutput(Mesh_Coord(iPoint,iDim));
+    END_SU2_OMP_FOR
   }
 }
diff --git a/SU2_CFD/src/variables/CVariable.cpp b/SU2_CFD/src/variables/CVariable.cpp
index 4d7b170bc47..ff9e9ef4eb3 100644
--- a/SU2_CFD/src/variables/CVariable.cpp
+++ b/SU2_CFD/src/variables/CVariable.cpp
@@ -113,6 +113,7 @@ void CVariable::Restore_BGSSolution_k() {
 void CVariable::SetExternalZero() { parallelSet(External.size(), 0.0, External.data()); }
 
 void CVariable::RegisterSolution(bool input, bool push_index) {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint) {
     for(unsigned long iVar=0; iVar<nVar; ++iVar) {
       if(input) {
@@ -131,16 +132,21 @@ void CVariable::RegisterSolution(bool input, bool push_index) {
       }
     }
   }
+  END_SU2_OMP_FOR
 }
 
 void CVariable::RegisterSolution_time_n() {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint)
     for(unsigned long iVar=0; iVar<nVar; ++iVar)
       AD::RegisterInput(Solution_time_n(iPoint,iVar));
+  END_SU2_OMP_FOR
 }
 
 void CVariable::RegisterSolution_time_n1() {
+  SU2_OMP_FOR_STAT(roundUpDiv(nPoint,omp_get_num_threads()))
   for (unsigned long iPoint = 0; iPoint < nPoint; ++iPoint)
     for(unsigned long iVar=0; iVar<nVar; ++iVar)
       AD::RegisterInput(Solution_time_n1(iPoint,iVar));
+  END_SU2_OMP_FOR
 }
diff --git a/SU2_DOT/src/SU2_DOT.cpp b/SU2_DOT/src/SU2_DOT.cpp
index 7b9927c1637..e2b2de498be 100644
--- a/SU2_DOT/src/SU2_DOT.cpp
+++ b/SU2_DOT/src/SU2_DOT.cpp
@@ -36,6 +36,10 @@ int main(int argc, char *argv[]) {
 
   char config_file_name[MAX_STRING_SIZE];
 
+  /*--- OpenMP initialization ---*/
+
+  omp_initialize();
+
   /*--- MPI initialization, and buffer setting ---*/
 
 #if defined(HAVE_OMP) && defined(HAVE_MPI)
@@ -49,6 +53,11 @@ int main(int argc, char *argv[]) {
   const int rank = SU2_MPI::GetRank();
   const int size = SU2_MPI::GetSize();
 
+  /*--- AD initialization ---*/
+#ifdef HAVE_OPDI
+  AD::getGlobalTape().initialize();
+#endif
+
   /*--- Pointer to different structures that will be used throughout the entire code ---*/
 
   CConfig **config_container            = nullptr;
@@ -406,9 +415,17 @@ int main(int argc, char *argv[]) {
   if (rank == MASTER_NODE)
     cout << "\n------------------------- Exit Success (SU2_DOT) ------------------------\n" << endl;
 
-  /*--- Finalize MPI parallelization ---*/
+  /*--- Finalize AD, if necessary. ---*/
+#ifdef HAVE_OPDI
+  AD::getGlobalTape().finalize();
+#endif
+
+  /*--- Finalize MPI parallelization. ---*/
   SU2_MPI::Finalize();
 
+  /*--- Finalize OpenMP. ---*/
+  omp_finalize();
+
   return EXIT_SUCCESS;
 
 }
diff --git a/SU2_PY/pySU2/pySU2.i b/SU2_PY/pySU2/pySU2.i
index 7e13c0f6006..f73635fbdd2 100644
--- a/SU2_PY/pySU2/pySU2.i
+++ b/SU2_PY/pySU2/pySU2.i
@@ -46,6 +46,7 @@ threads="1"
 %}
 
 // ----------- USED MODULES ------------
+%import "../../Common/include/code_config.hpp"
 %import "../../Common/include/basic_types/datatype_structure.hpp"
 %import "../../Common/include/parallelization/mpi_structure.hpp"
 %include "std_string.i"
diff --git a/SU2_PY/pySU2/pySU2ad.i b/SU2_PY/pySU2/pySU2ad.i
index 940bda8c997..19bb87c2b3d 100644
--- a/SU2_PY/pySU2/pySU2ad.i
+++ b/SU2_PY/pySU2/pySU2ad.i
@@ -46,6 +46,7 @@ threads="1"
 %}
 
 // ----------- USED MODULES ------------
+%import "../../Common/include/code_config.hpp"
 %import "../../Common/include/basic_types/datatype_structure.hpp"
 %import "../../Common/include/parallelization/mpi_structure.hpp"
 %include "std_string.i"
diff --git a/externals/codi b/externals/codi
index 1b8d3f5f03d..6a67202a388 160000
--- a/externals/codi
+++ b/externals/codi
@@ -1 +1 @@
-Subproject commit 1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8
+Subproject commit 6a67202a3887c8da490fdfde82bc46507de68692
diff --git a/externals/opdi b/externals/opdi
new file mode 160000
index 00000000000..e56f79cada2
--- /dev/null
+++ b/externals/opdi
@@ -0,0 +1 @@
+Subproject commit e56f79cada202d21e7425f5d5cfd5b1153f2465e
diff --git a/meson.build b/meson.build
index d15905334d9..500ebd87fad 100644
--- a/meson.build
+++ b/meson.build
@@ -104,6 +104,17 @@ endif
 if omp
   # add OpenMP dependency
   su2_deps += omp_dep
+
+  # add opdi dependency
+  if get_option('enable-autodiff')
+    codi_dep += declare_dependency(include_directories: 'externals/opdi/include')
+
+    if get_option('opdi-backend') == 'macro'
+      su2_cpp_args += '-DFORCE_OPDI_MACRO_BACKEND'
+    elif get_option('opdi-backend') == 'ompt'
+      su2_cpp_args += '-DFORCE_OPDI_OMPT_BACKEND'
+    endif
+  endif
 endif
 
 if get_option('enable-tecio')
@@ -175,6 +186,14 @@ if get_option('enable-mpp')
   su2_cpp_args += '-DHAVE_MPP'
 endif
 
+if omp and get_option('enable-autodiff')
+  py = find_program('python3','python')
+  p = run_command(py, 'externals/opdi/syntax/check.py', 'su2omp.syntax.json', 'Common', 'SU2_CFD', '-p', '*.hpp', '*.cpp', '*.inl', '-r', '-q')
+  if p.returncode() != 0
+    error(p.stdout())
+  endif
+endif
+
 # compile common library
 subdir('Common/src')
 # compile SU2_CFD executable
diff --git a/meson_options.txt b/meson_options.txt
index fd354b12276..b5d9ccdddc8 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -18,3 +18,4 @@ option('enable-tests',  type : 'boolean', value : false, description: 'compile U
 option('enable-mixedprec', type : 'boolean', value : false, description: 'use single precision floating point arithmetic for sparse algebra')
 option('extra-deps', type : 'string', value : '', description: 'comma-separated list of extra (custom) dependencies to add for compilation')
 option('enable-mpp',  type : 'boolean', value : false, description: 'enable Mutation++ support')
+option('opdi-backend', type : 'combo', choices : ['auto', 'macro', 'ompt'], value : 'auto', description: 'OpDiLib backend choice')
diff --git a/meson_scripts/init.py b/meson_scripts/init.py
index c9338cb1c8d..4d9a4e35ac3 100755
--- a/meson_scripts/init.py
+++ b/meson_scripts/init.py
@@ -44,10 +44,12 @@ def init_submodules(method = 'auto'):
 
   # This information of the modules is used if projects was not cloned using git
   # The sha tag must be maintained manually to point to the correct commit
-  sha_version_codi = '1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8'
+  sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692'
   github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
   sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
   github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
+  sha_version_opdi = 'e56f79cada202d21e7425f5d5cfd5b1153f2465e'
+  github_repo_opdi = 'https://github.com/SciCompKL/OpDiLib'
   sha_version_meson = '29ef4478df6d3aaca40c7993f125b29409be1de2'
   github_repo_meson = 'https://github.com/mesonbuild/meson'
   sha_version_ninja = '52649de2c56b63f42bc59513d51286531c595b44'
@@ -57,12 +59,14 @@ def init_submodules(method = 'auto'):
 
   medi_name = 'MeDiPack'
   codi_name = 'CoDiPack'
+  opdi_name = 'OpDiLib'
   meson_name = 'meson'
   ninja_name= 'ninja'
   mpp_name= 'Mutationpp'
   base_path = cur_dir + os.path.sep + 'externals' + os.path.sep 
   alt_name_medi = base_path + 'medi'
   alt_name_codi = base_path + 'codi'
+  alt_name_opdi = base_path + 'opdi'
   alt_name_meson =  base_path + 'meson'
   alt_name_ninja =  base_path + 'ninja'
   alt_name_mpp =  cur_dir + os.path.sep + 'subprojects' + os.path.sep  + 'Mutationpp'
@@ -83,6 +87,7 @@ def init_submodules(method = 'auto'):
   if is_git:
     submodule_status(alt_name_codi, sha_version_codi)
     submodule_status(alt_name_medi, sha_version_medi)
+    submodule_status(alt_name_opdi, sha_version_opdi)
     submodule_status(alt_name_meson, sha_version_meson)
     submodule_status(alt_name_ninja, sha_version_ninja)
     submodule_status(alt_name_mpp, sha_version_mpp)
@@ -90,6 +95,7 @@ def init_submodules(method = 'auto'):
   else:
     download_module(codi_name, alt_name_codi, github_repo_codi, sha_version_codi)
     download_module(medi_name, alt_name_medi, github_repo_medi, sha_version_medi)
+    download_module(opdi_name, alt_name_opdi, github_repo_opdi, sha_version_opdi)
     download_module(meson_name, alt_name_meson, github_repo_meson, sha_version_meson)
     download_module(ninja_name, alt_name_ninja, github_repo_ninja, sha_version_ninja)
     download_module(mpp_name, alt_name_mpp, github_repo_mpp, sha_version_mpp)
diff --git a/preconfigure.py b/preconfigure.py
index 6dc52ce1598..16cd5f307c2 100755
--- a/preconfigure.py
+++ b/preconfigure.py
@@ -287,7 +287,7 @@ def init_codi(argument_dict, modes, mpi_support = False, update = False):
     
     # This information of the modules is used if projects was not cloned using git
     # The sha tag must be maintained manually to point to the correct commit
-    sha_version_codi = '1b8d3f5f03de560fb63a2a76ad91ab7bb3fa67d8'
+    sha_version_codi = '6a67202a3887c8da490fdfde82bc46507de68692'
     github_repo_codi = 'https://github.com/scicompkl/CoDiPack'
     sha_version_medi = '6aef76912e7099c4f08c9705848797ca9e8070da'
     github_repo_medi = 'https://github.com/SciCompKL/MeDiPack'
diff --git a/su2omp.syntax.json b/su2omp.syntax.json
new file mode 100644
index 00000000000..5a524950142
--- /dev/null
+++ b/su2omp.syntax.json
@@ -0,0 +1,42 @@
+{
+  "this file's header":
+  [
+    "\\file su2omp.syntax.json",
+    "\\brief Definitions for the OpDiLib syntax checker",
+    "\\author J. Blühdorn",
+    "\\version 7.1.1 \"Blackbird\"",
+
+    "SU2 Project Website: https://su2code.github.io",
+
+    "The SU2 Project is maintained by the SU2 Foundation ",
+    "(http://su2foundation.org)",
+
+    "Copyright 2012-2020, SU2 Contributors (cf. AUTHORS.md)",
+
+    "SU2 is free software; you can redistribute it and/or",
+    "modify it under the terms of the GNU Lesser General Public",
+    "License as published by the Free Software Foundation; either",
+    "version 2.1 of the License, or (at your option) any later version.",
+
+    "SU2 is distributed in the hope that it will be useful,",
+    "but WITHOUT ANY WARRANTY; without even the implied warranty of",
+    "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU",
+    "Lesser General Public License for more details.",
+
+    "You should have received a copy of the GNU Lesser General Public",
+    "License along with SU2. If not, see <http://www.gnu.org/licenses/>."
+  ],
+  "pairs":
+  {
+    "SU2_OMP_MASTER": "END_SU2_OMP_MASTER",
+    "SU2_OMP_CRITICAL": "END_SU2_OMP_CRITICAL",
+    "SU2_OMP_PARALLEL": "END_SU2_OMP_PARALLEL",
+    "SU2_OMP_PARALLEL_": "END_SU2_OMP_PARALLEL",
+    "SU2_OMP_PARALLEL_ON": "END_SU2_OMP_PARALLEL",
+    "SU2_OMP_FOR_": "END_SU2_OMP_FOR",
+    "SU2_OMP_FOR_DYN": "END_SU2_OMP_FOR",
+    "SU2_OMP_FOR_STAT": "END_SU2_OMP_FOR",
+    "CSYSVEC_PARFOR": "END_CSYSVEC_PARFOR",
+    "CNEWTON_PARFOR": "END_CNEWTON_PARFOR"
+  }
+}