diff --git a/cime/machines-acme/Depends.intel b/cime/machines-acme/Depends.intel
index c213b908ff3..577f7aa320c 100644
--- a/cime/machines-acme/Depends.intel
+++ b/cime/machines-acme/Depends.intel
@@ -1,9 +1,10 @@
 #
 PERFOBJS=\
-prim_advection_mod.o \
-edge_mod.o \
-derivative_mod.o \
-bndry_mod.o \
+prim_advection_mod_base.o \
+vertremap_mod_base.o \
+edge_mod_base.o \
+derivative_mod_base.o \
+bndry_mod_base.o \
 prim_advance_mod.o \
 uwshcu.o
 
diff --git a/cime/machines-acme/Depends.intel14 b/cime/machines-acme/Depends.intel14
index 0c913270b2b..32e4747d7a3 100644
--- a/cime/machines-acme/Depends.intel14
+++ b/cime/machines-acme/Depends.intel14
@@ -6,23 +6,23 @@
 #ecosys_mod.o: ecosys_mod.F90
 #	$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -CU $<
 
-prim_advection_mod.o: prim_advection_mod.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3  -no-prec-div  $<
 
-edge_mod.o: edge_mod.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div  $<
 
-derivative_mod.o: derivative_mod.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div  $<
+PERFOBJS=\
+prim_advection_mod_base.o \
+vertremap_mod_base.o \
+edge_mod_base.o \
+derivative_mod_base.o \
+bndry_mod_base.o \
+prim_advance_mod.o \
+uwshcu.o \
+wetdep.o
 
-bndry_mod.o: bndry_mod.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div  $<
-
-prim_advance_mod.o: prim_advance_mod.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div  $<
-
-uwshcu.o: uwshcu.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div  $<
-
-wetdep.o: wetdep.F90
-		      $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div  $<
+ifeq ($(DEBUG),FALSE)
+  $(PERFOBJS): %.o: %.F90
+	  $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O3  -no-prec-div $<
+  $(REDUCED_OPT_OBJS): %.o: %.F90
+	  $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS)  -O1 $<
+  $(REDUCED_PRECISION_OBJS): %.o: %.F90
+	  $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -fimf-precision=low -fp-model fast $<
+endif
diff --git a/cime/machines-acme/Depends.intelmic b/cime/machines-acme/Depends.intelmic
index 0f84949daa3..6f05aee44a7 100644
--- a/cime/machines-acme/Depends.intelmic
+++ b/cime/machines-acme/Depends.intelmic
@@ -1,6 +1,6 @@
 
-derivative_mod.o: derivative_mod.F90
-	 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< 
+#derivative_mod_base.o: derivative_mod_base.F90
+#	 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< 
 
 shr_ncread_mod.o: shr_ncread_mod.F90
 	 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $<
diff --git a/cime/machines-acme/Depends.intelmic14 b/cime/machines-acme/Depends.intelmic14
index 0f84949daa3..6f05aee44a7 100644
--- a/cime/machines-acme/Depends.intelmic14
+++ b/cime/machines-acme/Depends.intelmic14
@@ -1,6 +1,6 @@
 
-derivative_mod.o: derivative_mod.F90
-	 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< 
+#derivative_mod_base.o: derivative_mod_base.F90
+#	 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< 
 
 shr_ncread_mod.o: shr_ncread_mod.F90
 	 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $<
diff --git a/cime/machines-acme/Depends.mira b/cime/machines-acme/Depends.mira
index 85453dd931a..7f9c05b6299 100644
--- a/cime/machines-acme/Depends.mira
+++ b/cime/machines-acme/Depends.mira
@@ -13,13 +13,16 @@ shr_reprosum_mod.o: shr_reprosum_mod.F90
 
 # These routines benefit from -qnostrict without violating the bfb test
 PERFOBJS=\
-prim_advection_mod.o \
-edge_mod.o \
-derivative_mod.o \
-bndry_mod.o \
+prim_advection_mod_base.o \
+vertremap_mod_base.o \
+edge_mod_base.o \
+derivative_mod_base.o \
+bndry_mod_base.o \
 prim_advance_mod.o \
 uwshcu.o \
-wetdep.o 
+wetdep.o
+
+
 ifeq ($(DEBUG),FALSE)
   $(PERFOBJS): %.o: %.F90
 	  $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qnostrict  $<
diff --git a/cime/scripts/Tools/config_definition.xml b/cime/scripts/Tools/config_definition.xml
index 4a5f76e22f2..5dcf37c5532 100644
--- a/cime/scripts/Tools/config_definition.xml
+++ b/cime/scripts/Tools/config_definition.xml
@@ -1363,6 +1363,14 @@
 	 sdesc="CAM dynamical core"
 	 ></entry>
 
+  <entry id="CAM_TARGET" 
+	 type="char"
+	 valid_values="preqx,preqx_acc" 
+	 value="preqx" 
+	 group="build_component_cam"
+	 sdesc="CAM-SE cmake target (only used with CAM_DYCORE == 'se')"
+	 ></entry>
+
   <entry id="CAM_NML_USE_CASE" 
 	 type="char"
 	 valid_values=""
diff --git a/cime/scripts/create_newcase b/cime/scripts/create_newcase
index 396b4ac8761..b7d49cde766 100755
--- a/cime/scripts/create_newcase
+++ b/cime/scripts/create_newcase
@@ -96,6 +96,9 @@ OPTIONS
                                     or read from ~/.cesm_proj or ~/.ccsm_proj
   -compiler <name>         Specify a compiler for the target machine (optional)
                            default: default compiler for the target machine
+  -camse_target <name>     Spedify a target for the cam-se dycore (ignored for other atmospheric dycores)
+                           default: preqx
+                           options: preqx, preqx-acc
   -mpilib <name>           Specify a mpi library for the target machine (optional)
                            default: default mpi library for the target machine
                            allowed: openmpi, mpich, ibm, mpi-serial, etc
@@ -166,6 +169,7 @@ GetOptions(
     "confopts=s"                => \$opts{'confopts'},
     "project=s"                 => \$opts{'project'},  
     "compiler=s"                => \$opts{'compiler'},  
+    "camse_target=s"            => \$opts{'camse_target'},  
     "mpilib=s"                  => \$opts{'mpilib'},  
     "res=s"                     => \$opts{'res'},
     "h|help"                    => \$opts{'help'},
@@ -211,6 +215,7 @@ my $pecount;
 my $xmlmode;
 my $project;
 my $compiler;
+my $camse_target;
 my $mpilib;
 if (!$opts{'list'} && !$opts{'testlist'}) {
     # Check for manditory case input
@@ -260,6 +265,13 @@ if (!$opts{'list'} && !$opts{'testlist'}) {
 	$compiler = $opts{'compiler'};
     }
 
+    # Check if machine compiler option is given
+    if ($opts{'camse_target'}) {
+      $camse_target = $opts{'camse_target'};
+    } else {
+      $camse_target = "preqx";
+    }
+
     # Check if machine mpilib option is given
     $mpilib = 'unset';
     if ($opts{'mpilib'}) {
@@ -502,36 +514,51 @@ if ($mach =~ /(.*)_(.*)/){
 }
 $cfg_ref->set('CCSM_MACHDIR', "$machdir");
 
+# Handle the camse_target option
+# Do this before handling the compiler so that default compiler can be overwritten
+if ($camse_target eq "preqx") {
+    $cfg_ref->set('CAM_TARGET', "preqx");
+} elsif ($camse_target eq "preqx_acc") {
+    $cfg_ref->set('CAM_TARGET', "preqx_acc");
+} else {
+    die "ERROR: option -camse_target must be preqx (default) or preqx_acc"
+}
+
 # Check that compiler request for target machine matches a supported value
 # Or set default compiler - if not provided compiler request
 
 my $compilers;
 if ($mach =~ /userdefined/){
-    $cfg_ref->set('COMPILER', "USERDEFINED_required_build");
+  $cfg_ref->set('COMPILER', "USERDEFINED_required_build");
 } else { 
-    $compilers = $cfg_ref->get('COMPILERS');
-    my @compilers = split ",", $compilers, -1;
-    if ($compiler) {
-	if (! ($mach =~ "generic")){
-	    my $found = 0;
-	    foreach my $comp (@compilers) {
-		if ($compiler eq $comp) {
-		    $found = 1;
-		}
-	    }
-	    if (!$found) {
-		my $sysmod = "rm -rf $caseroot";
-		system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n";
-		die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n";
-	    }
-	}
-	$cfg_ref->set('COMPILER', "$compiler");
-	if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
-    } else {
-	$compiler = $compilers[0];   
-	$cfg_ref->set('COMPILER', "$compiler");
-	if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
+  $compilers = $cfg_ref->get('COMPILERS');
+  my @compilers = split ",", $compilers, -1;
+  if ($compiler) {
+    if (! ($mach =~ "generic")){
+      my $found = 0;
+      foreach my $comp (@compilers) {
+        if ($compiler eq $comp) {
+            $found = 1;
+        }
+      }
+      if (!$found) {
+        my $sysmod = "rm -rf $caseroot";
+        system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n";
+        die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n";
+      }
     }
+    $cfg_ref->set('COMPILER', "$compiler");
+    if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
+  } else {
+    $compiler = $compilers[0];
+    if ($camse_target eq "preqx_acc") {
+      if ($mach eq "titan") {
+        $compiler = "pgi_acc";
+      }
+    }
+    $cfg_ref->set('COMPILER', "$compiler");
+    if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
+  }
 }
 
 if ($print>=2) { print "Machine specifier: $mach.$eol"; }
diff --git a/components/cam/bld/cam.buildnml b/components/cam/bld/cam.buildnml
index ee1651cf2d9..42b4660cdaa 100755
--- a/components/cam/bld/cam.buildnml
+++ b/components/cam/bld/cam.buildnml
@@ -20,6 +20,7 @@ my $CAM_CONFIG_OPTS	= `./xmlquery  CAM_CONFIG_OPTS		-value`;
 my $CAM_NAMELIST_OPTS	= `./xmlquery  CAM_NAMELIST_OPTS	-value`;
 my $CAM_NML_USE_CASE	= `./xmlquery  CAM_NML_USE_CASE		-value`;
 my $CAM_DYCORE		= `./xmlquery  CAM_DYCORE		-value`;
+my $CAM_TARGET		= `./xmlquery  CAM_TARGET		-value`;
 my $CASEBUILD		= `./xmlquery  CASEBUILD		-value`;
 my $CCSM_CO2_PPMV	= `./xmlquery  CCSM_CO2_PPMV		-value`;
 my $CCSMROOT		= `./xmlquery  CCSMROOT			-value`;
@@ -84,7 +85,7 @@ if ($BUILD_COMPLETE eq 'FALSE') {
 	$ATM_GRID = $1;
 	$nlev="-nlev $3";
     }
-    $sysmod = "$CCSMROOT/components/cam/bld/configure -s -ccsm_seq -ice none -ocn $ocn -comp_intf $comp $scm -spmd $spmd -smp $smp -dyn $CAM_DYCORE -res $ATM_GRID $nlev $CAM_LIB_DIRS $CAM_CONFIG_OPTS";
+    $sysmod = "$CCSMROOT/components/cam/bld/configure -s -ccsm_seq -ice none -ocn $ocn -comp_intf $comp $scm -spmd $spmd -smp $smp -dyn $CAM_DYCORE -dyn_target $CAM_TARGET -res $ATM_GRID $nlev $CAM_LIB_DIRS $CAM_CONFIG_OPTS";
     system($sysmod) == 0 or die "ERROR cam.buildnml: $sysmod failed: $?\n";
 
 } else {
diff --git a/components/cam/bld/configure b/components/cam/bld/configure
index db01087394a..457564857d0 100755
--- a/components/cam/bld/configure
+++ b/components/cam/bld/configure
@@ -324,6 +324,7 @@ GetOptions(
     "rain_evap_to_coarse_aero"  => \$opts{'rain_evap_to_coarse_aero'},
     "defaults=s"                => \$opts{'defaults'},
     "dyn=s"                     => \$opts{'dyn'},
+    "dyn_target=s"              => \$opts{'dyn_target'},
     "edit_chem_mech"            => \$opts{'edit_chem_mech'},
     "waccm_phys"                => \$opts{'waccm_phys'},
     "offline_dyn"               => \$opts{'offline_dyn'},
@@ -2764,8 +2765,13 @@ sub write_filepath
 
     print $fh "$camsrcdir/cam/src/dynamics/$dyn\n";
     if($dyn eq 'se') {
-	print $fh "$camsrcdir/homme/src/share\n";
-	print $fh "$camsrcdir/homme/src/share/openacc\n";
+	    print $fh "$camsrcdir/homme/src/share\n";
+      if ($opts{'dyn_target'} eq 'preqx') {
+	      print $fh "$camsrcdir/homme/src/preqx\n";
+      } 
+      elsif($opts{'dyn_target'} eq 'preqx_acc') {
+	      print $fh "$camsrcdir/homme/src/preqx_acc\n";
+      }
     }
 
     # Parallelization utilies
diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt
index d3458a68476..0a39c868c57 100644
--- a/components/homme/CMakeLists.txt
+++ b/components/homme/CMakeLists.txt
@@ -56,6 +56,7 @@ ENDIF ()
 # Some options for conditional builds
 OPTION(BUILD_HOMME_SWEQX "Shallow water equations FEM" ON)
 OPTION(BUILD_HOMME_PREQX "Primitive equations FEM" ON)
+OPTION(BUILD_HOMME_PREQX_ACC "Primitive equations FEM with OpenACC" ON)
 OPTION(BUILD_HOMME_SWDGX "Shallow water equations DG" OFF)
 OPTION(BUILD_HOMME_PRIMDGX "Primitive equations DG" OFF)
 OPTION(BUILD_HOMME_SWIM  "Shallow water equations implicit" OFF)
@@ -175,6 +176,10 @@ IF(${BUILD_HOMME_PREQX})
   ADD_SUBDIRECTORY(src/preqx)
 ENDIF()
 
+IF(${BUILD_HOMME_PREQX_ACC})
+  ADD_SUBDIRECTORY(src/preqx_acc)
+ENDIF()
+
 IF(${BUILD_HOMME_SWDGX})
   ADD_SUBDIRECTORY(src/swdgx)
 ENDIF()
diff --git a/components/homme/cmake/SetCompilerFlags.cmake b/components/homme/cmake/SetCompilerFlags.cmake
index b83bd3fecb7..1d42d29c2cf 100644
--- a/components/homme/cmake/SetCompilerFlags.cmake
+++ b/components/homme/cmake/SetCompilerFlags.cmake
@@ -222,68 +222,7 @@ ENDIF ()
 ##############################################################################
 OPTION(ENABLE_OPENACC "Whether to build with OpenACC support" FALSE)
 IF (${ENABLE_OPENACC})
-  IF (${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI)
-    # Need to add -acc to the Fortran FLAGS to see if it will compile 
-    # "call acc_init()"
-    SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -acc")
-    TRY_COMPILE(OPENACC_SUCCESS
-                ${CMAKE_BINARY_DIR}/tests/compilerTests/
-                ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/openAccTest.f90
-                OUTPUT_VARIABLE COMPILE_OUTPUT)
-    IF (${OPENACC_SUCCESS})
-      MESSAGE(STATUS "Using OpenACC through PGI compiler")
-      SET(PREQX_USE_OPENACC TRUE)
-    ELSE()
-      MESSAGE(FATAL_ERROR "Unable to use OpenACC through the PGI compiler")
-    ENDIF()
-  ELSEIF (${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray)
-    SET(PREQX_USE_OPENACC TRUE)
-    MESSAGE(STATUS "Using OpenACC through Cray compiler")
-  ELSE ()
-    MESSAGE(FATAL_ERROR "OpenACC only supported through the PGI and Cray compilers")
-  ENDIF()
-ENDIF ()
-
-##############################################################################
-# CUDA Fortran specific flags - only supporting PGI compiler
-##############################################################################
-OPTION(ENABLE_CUDA_FORTRAN "Whether to build with CUDA Fortran support" FALSE)
-IF (${ENABLE_CUDA_FORTRAN})
-  IF (NOT ${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI)
-    MESSAGE(FATAL_ERROR "CUDA Fortran only supported through the PGI compiler")
-  ELSE ()
-    # Set PGI CUDA Fortran flags
-
-    # Set defaults as lowest version of CUDA and device capability allowed
-    # To do: determine a way to generalize this
-    IF (NOT CUDA_VERSION)
-      SET(CUDA_VERSION "4.1")
-    ENDIF ()
-
-    # Compute capability: cc2x is for devices with compute capability >= 2.0 
-    IF (NOT CUDA_DEVICE_CAPABILITY)
-      SET(CUDA_DEVICE_CAPABILITY "cc2x")
-    ENDIF ()
-
-    SET(CMAKE_Fortran_FLAGS 
-        "${CMAKE_Fortran_FLAGS} -ta=nvidia -Mcuda=${CUDA_VERSION},${CUDA_DEVICE_CAPABILITY},ptxinfo,keepgpu")
-
-    MESSAGE(STATUS "Testing PGI CUDA Fortran Compilation with flags: ${CMAKE_Fortran_FLAGS}")
-
-    TRY_COMPILE(CUDAFOR
-                ${CMAKE_BINARY_DIR}/tests/compilerTests/
-                ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/cudaFortranTest.f90
-                OUTPUT_VARIABLE COMPILE_OUTPUT)
-    IF (${CUDAFOR})
-      SET(PREQX_USE_CUDA_FORTRAN TRUE)
-      MESSAGE(STATUS "Succeeded. Using CUDA Fortran through PGI compiler")
-    ELSE()
-      SET(PREQX_USE_CUDA_FORTRAN FALSE)
-      MESSAGE(FATAL_ERROR "Unable to use CUDA Fortran through the PGI "
-              "compiler. Compilation failed with the following "
-              "output.\n${COMPILE_OUTPUT}")
-    ENDIF()
-  ENDIF()
+  SET(PREQX_USE_OPENACC TRUE)
 ENDIF ()
 
 ##############################################################################
diff --git a/components/homme/cmake/machineFiles/titan.cmake b/components/homme/cmake/machineFiles/titan.cmake
index 2a73ce474f6..aa94a807240 100644
--- a/components/homme/cmake/machineFiles/titan.cmake
+++ b/components/homme/cmake/machineFiles/titan.cmake
@@ -5,7 +5,16 @@ SET (CMAKE_CXX_COMPILER CC CACHE FILEPATH "")
 SET (NETCDF_DIR $ENV{NETCDF_DIR} CACHE FILEPATH "")
 SET (PNETCDF_DIR $ENV{PARALLEL_NETCDF_DIR} CACHE FILEPATH "")
 SET (HDF5_DIR $ENV{HDF5_DIR} CACHE FILEPATH "")
+SET (DEBUG_FLAGS " " CACHE STRING "")
+SET (Netcdf_NC_CONFIG_BIN "/opt/cray/netcdf/4.3.3.1/bin" CACHE FILEPATH "")
+SET (HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")
+SET (USE_MPIEXEC "aprun" CACHE STRING "")
 
 # The following is required for cross compilation
 SET (CMAKE_SYSTEM_NAME Catamount CACHE FILEPATH "")
+
+#Regression test parameters
 SET (USE_QUEUING FALSE CACHE BOOL "")
+SET (USE_NUM_PROCS 64 CACHE STRING "")
+
+
diff --git a/components/homme/compile_scripts/thatchroof/compile.cpu b/components/homme/compile_scripts/thatchroof/compile.cpu
old mode 100644
new mode 100755
diff --git a/components/homme/compile_scripts/thatchroof/compile.openacc b/components/homme/compile_scripts/thatchroof/compile.openacc
old mode 100644
new mode 100755
index c7b896db37e..b0f055ea508
--- a/components/homme/compile_scripts/thatchroof/compile.openacc
+++ b/components/homme/compile_scripts/thatchroof/compile.openacc
@@ -20,7 +20,7 @@ cmake                                                                          \
   -DCMAKE_Fortran_COMPILER=mpif90                                              \
   -DCMAKE_C_COMPILER=mpicc                                                     \
   -DCMAKE_CXX_COMPILER=mpiCC                                                   \
-  -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \
+  -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel"  \
   -DDEBUG_FLAGS=" "                                                            \
   -DNETCDF_DIR=$NETCDF_DIR                                                     \
   -DWITH_PNETCDF=FALSE                                                         \
@@ -30,6 +30,8 @@ cmake                                                                          \
   -DBUILD_HOMME_SWDGX=FALSE                                                    \
   -DBUILD_HOMME_SWEQX=FALSE                                                    \
   -DBUILD_HOMME_PRIMDGX=FALSE                                                  \
+  -DBUILD_HOMME_PREQX=FALSE                                                    \
+  -DBUILD_HOMME_PREQX_ACC=TRUE                                                 \
   -DENABLE_OPENMP=TRUE                                                         \
   -DHOMME_PROJID=STF006                                                        \
   -DENABLE_OPENACC=TRUE                                                        \
@@ -39,11 +41,11 @@ cmake                                                                          \
   -DZLIB_DIR=$ZLIB_PATH                                                        \
   -DSZIP_DIR=$SZIP_PATH                                                        \
   -DCURL_DIR=/opt/curl-7.45_pgi15.9                                            \
-  -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.0,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl'         \
+  -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.5,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl'         \
                                                                                \
                                                                                \
                                                                                \
-  -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts//thatchroof/customFlags.cmake.openacc \
+  -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/thatchroof/customFlags.cmake.openacc \
   $HOMME_ROOT                                               
 fi
 # -DOPT_FLAGS="-fastsse -Mvect"                                                \
@@ -54,7 +56,7 @@ make clean                                                  || exit -1
 fi
 
 if [ $do_make -eq 1 ]; then
-make -j16 preqx                                             || exit -1
+make -j16 preqx_acc                                         || exit -1
 mkdir -p $HOMME_ROOT/build/preqx
-cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc  || exit -1
+cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc  || exit -1
 fi
diff --git a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc
index 12506a81cb6..248d7c27be8 100644
--- a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc
+++ b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc
@@ -1,13 +1,6 @@
 SET(CUSTOM_FLAG_FILES
   ${HOMME_SOURCE_DIR}/physics/column_model_mod.F90 
   ${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
-
 )
 
 SET_SOURCE_FILES_PROPERTIES(
@@ -17,14 +10,4 @@ SET_SOURCE_FILES_PROPERTIES(
   "  -Mpreprocess -Mextend -Mflushz -O0 -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin"
 )
 
-SET_SOURCE_FILES_PROPERTIES(
-  ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
-  PROPERTIES GENERATED TRUE COMPILE_FLAGS
-  "  -Mpreprocess -Mextend -Mflushz -fast -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape"
-)
 
diff --git a/components/homme/compile_scripts/titan/compile.cpu b/components/homme/compile_scripts/titan/compile.cpu
index 1a21f327622..a88be66966f 100755
--- a/components/homme/compile_scripts/titan/compile.cpu
+++ b/components/homme/compile_scripts/titan/compile.cpu
@@ -5,52 +5,42 @@ do_clean=1
 do_make=1
  
 HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
-NTRACERS=50
-NLEVELS=64
+BUILD_ROOT=/lustre/atlas/scratch/imn/stf006/homme_tests
+NTRACERS=4
+NLEVELS=26
 
 source ./env_mach_specific.cpu
 
-mkdir -p titan-cpu     || exit -1
-cd titan-cpu
+mkdir -p $BUILD_ROOT/titan-cpu     || exit -1
+cd $BUILD_ROOT/titan-cpu
  
 if [ $do_cmake -eq 1 ]; then
-sed -i "s/  integer, parameter         :: qsize_d=.*/  integer, parameter         :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1
 rm -rf CMakeFiles CMakeCache.txt
 cmake                                                                          \
   -C $HOMME_ROOT/cmake/machineFiles/titan.cmake                                \
-  -DCMAKE_Fortran_COMPILER=ftn                                                 \
-  -DCMAKE_C_COMPILER=cc                                                        \
-  -DCMAKE_CXX_COMPILER=CC                                                      \
   -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse"                                        \
-  -DDEBUG_FLAGS=" "                                                            \
-  -DNETCDF_DIR=$NETCDF_DIR                                                     \
-  -DWITH_PNETCDF=FALSE                                                         \
-  -DHDF5_DIR=$HDF5_DIR                                                         \
+  -DWITH_PNETCDF=TRUE                                                          \
   -DPREQX_NP=4                                                                 \
   -DPREQX_PLEV=$NLEVELS                                                        \
-  -DBUILD_HOMME_SWDGX=FALSE                                                    \
-  -DBUILD_HOMME_SWEQX=FALSE                                                    \
-  -DBUILD_HOMME_PRIMDGX=FALSE                                                  \
+  -DPREQX_QSIZE_D=$TRACERS                                                     \
+  -DBUILD_HOMME_SWEQX=TRUE                                                     \
+  -DBUILD_HOMME_PREQX=TRUE                                                     \
+  -DBUILD_HOMME_PREQX_ACC=TRUE                                                 \
   -DENABLE_OPENMP=TRUE                                                         \
   -DHOMME_PROJID=STF006                                                        \
-  -DENABLE_OPENACC=FALSE                                                       \
-  -DENABLE_CUDA_FORTRAN=FALSE                                                  \
-  -DHOMME_FIND_BLASLAPACK=TRUE                                                 \
-  -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin"                        \
-                                                                               \
-                                                                               \
-                                                                               \
+  -DOPENACC_Fortran_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Minfo=accel"       \
+  -DOPENACC_Linker_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Mcuda"              \
   -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \
   $HOMME_ROOT                                               
 fi
-# -DOPT_FLAGS="-fastsse -Mvect"                                                \
 
 if [ $do_clean -eq 1 ]; then
 make clean                                                  || exit -1
 fi
 
 if [ $do_make -eq 1 ]; then
-make -j16 preqx                                             || exit -1
+make -j32                                                   || exit -1
 mkdir -p $HOMME_ROOT/build/preqx
 cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.cpu      || exit -1
+#cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc      || exit -1
 fi
diff --git a/components/homme/compile_scripts/titan/compile.openacc b/components/homme/compile_scripts/titan/compile.openacc
index 2e09b6529e5..ac2d06f16e0 100755
--- a/components/homme/compile_scripts/titan/compile.openacc
+++ b/components/homme/compile_scripts/titan/compile.openacc
@@ -1,7 +1,7 @@
 #!/bin/bash
 
-do_cmake=0
-do_clean=0
+do_cmake=1
+do_clean=1
 do_make=1
  
 HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
@@ -14,7 +14,6 @@ mkdir -p titan-openacc     || exit -1
 cd titan-openacc
  
 if [ $do_cmake -eq 1 ]; then
-sed -i "s/  integer, parameter         :: qsize_d=.*/  integer, parameter         :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1
 rm -rf CMakeFiles CMakeCache.txt
 cmake                                                                                           \
   -C $HOMME_ROOT/cmake/machineFiles/titan.cmake                                                 \
@@ -27,10 +26,13 @@ cmake
   -DWITH_PNETCDF=FALSE                                                                          \
   -DHDF5_DIR=$HDF5_DIR                                                                          \
   -DPREQX_NP=4                                                                                  \
+  -DPREQX_QSIZE_D=$NTRACERS                                                                     \
   -DPREQX_PLEV=$NLEVELS                                                                         \
   -DBUILD_HOMME_SWDGX=FALSE                                                                     \
   -DBUILD_HOMME_SWEQX=FALSE                                                                     \
   -DBUILD_HOMME_PRIMDGX=FALSE                                                                   \
+  -DBUILD_HOMME_PREQX_ACC=TRUE                                                                  \
+  -DBUILD_HOMME_PREQX=FALSE                                                                     \
   -DENABLE_OPENMP=TRUE                                                                          \
   -DHOMME_PROJID=STF006                                                                         \
   -DENABLE_OPENACC=TRUE                                                                         \
@@ -49,7 +51,7 @@ make clean                                                  || exit -1
 fi
 
 if [ $do_make -eq 1 ]; then
-make -j16 preqx                                             || exit -1
+make -j16 preqx_acc                                         || exit -1
 mkdir -p $HOMME_ROOT/build/preqx
-cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc  || exit -1
+cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc  || exit -1
 fi
diff --git a/components/homme/compile_scripts/titan/compile.openacc.oncpu b/components/homme/compile_scripts/titan/compile.openacc.oncpu
new file mode 100755
index 00000000000..a6e3f9d381f
--- /dev/null
+++ b/components/homme/compile_scripts/titan/compile.openacc.oncpu
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+do_cmake=1
+do_clean=1
+do_make=1
+ 
+HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
+NTRACERS=50
+NLEVELS=64
+
+source ./env_mach_specific.cpu
+
+mkdir -p titan-openacc     || exit -1
+cd titan-openacc
+ 
+if [ $do_cmake -eq 1 ]; then
+rm -rf CMakeFiles CMakeCache.txt
+cmake                                                    \
+  -C $HOMME_ROOT/cmake/machineFiles/titan.cmake          \
+  -DCMAKE_Fortran_COMPILER=ftn                           \
+  -DCMAKE_C_COMPILER=cc                                  \
+  -DCMAKE_CXX_COMPILER=CC                                \
+  -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse"                  \
+  -DDEBUG_FLAGS=" "                                      \
+  -DNETCDF_DIR=$NETCDF_DIR                               \
+  -DWITH_PNETCDF=FALSE                                   \
+  -DHDF5_DIR=$HDF5_DIR                                   \
+  -DPREQX_NP=4                                           \
+  -DPREQX_QSIZE_D=$NTRACERS                              \
+  -DPREQX_PLEV=$NLEVELS                                  \
+  -DBUILD_HOMME_SWDGX=FALSE                              \
+  -DBUILD_HOMME_SWEQX=FALSE                              \
+  -DBUILD_HOMME_PRIMDGX=FALSE                            \
+  -DBUILD_HOMME_PREQX_ACC=TRUE                           \
+  -DBUILD_HOMME_PREQX=FALSE                              \
+  -DENABLE_OPENMP=TRUE                                   \
+  -DHOMME_PROJID=STF006                                  \
+  -DENABLE_OPENACC=TRUE                                  \
+  -DENABLE_CUDA_FORTRAN=FALSE                            \
+  -DHOMME_FIND_BLASLAPACK=TRUE                           \
+  -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin"  \
+  -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \
+  $HOMME_ROOT                                               
+fi
+
+if [ $do_clean -eq 1 ]; then
+make clean                                                  || exit -1
+fi
+
+if [ $do_make -eq 1 ]; then
+make -j preqx_acc                                         || exit -1
+mkdir -p $HOMME_ROOT/build/preqx
+cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc  || exit -1
+fi
diff --git a/components/homme/compile_scripts/titan/compile.sweqx b/components/homme/compile_scripts/titan/compile.sweqx
new file mode 100755
index 00000000000..6cfca993f55
--- /dev/null
+++ b/components/homme/compile_scripts/titan/compile.sweqx
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+do_cmake=1
+do_clean=1
+do_make=1
+ 
+HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
+NLEVELS=64
+
+source ./env_mach_specific.cpu
+
+mkdir -p titan-cpu     || exit -1
+cd titan-cpu
+ 
+if [ $do_cmake -eq 1 ]; then
+rm -rf CMakeFiles CMakeCache.txt
+cmake                                                                          \
+  -C $HOMME_ROOT/cmake/machineFiles/titan.cmake                                \
+  -DCMAKE_Fortran_COMPILER=ftn                                                 \
+  -DCMAKE_C_COMPILER=cc                                                        \
+  -DCMAKE_CXX_COMPILER=CC                                                      \
+  -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse"                                        \
+  -DDEBUG_FLAGS=" "                                                            \
+  -DNETCDF_DIR=$NETCDF_DIR                                                     \
+  -DWITH_PNETCDF=FALSE                                                         \
+  -DHDF5_DIR=$HDF5_DIR                                                         \
+  -DSWEQX_NP=4                                                                 \
+  -DSWEQX_PLEV=$NLEVELS                                                        \
+  -DBUILD_HOMME_SWDGX=FALSE                                                    \
+  -DBUILD_HOMME_SWEQX=TRUE                                                     \
+  -DBUILD_HOMME_PRIMDGX=FALSE                                                  \
+  -DBUILD_HOMME_PREQX=FALSE                                                    \
+  -DENABLE_OPENMP=TRUE                                                         \
+  -DHOMME_PROJID=STF006                                                        \
+  -DENABLE_OPENACC=FALSE                                                       \
+  -DENABLE_CUDA_FORTRAN=FALSE                                                  \
+  -DHOMME_FIND_BLASLAPACK=TRUE                                                 \
+  -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin"                        \
+  $HOMME_ROOT                                               
+fi
+# -DOPT_FLAGS="-fastsse -Mvect"                                                \
+
+if [ $do_clean -eq 1 ]; then
+make clean                                                  || exit -1
+fi
+
+if [ $do_make -eq 1 ]; then
+make -j16 sweqx                                             || exit -1
+mkdir -p $HOMME_ROOT/build/sweqx
+cp ./src/sweqx/sweqx $HOMME_ROOT/build/sweqx/sweqx.cpu      || exit -1
+fi
diff --git a/components/homme/compile_scripts/titan/customFlags.cmake.openacc b/components/homme/compile_scripts/titan/customFlags.cmake.openacc
index 5bb9c6a1676..9d2729e62e0 100644
--- a/components/homme/compile_scripts/titan/customFlags.cmake.openacc
+++ b/components/homme/compile_scripts/titan/customFlags.cmake.openacc
@@ -1,12 +1,6 @@
 SET(CUSTOM_FLAG_FILES
   ${HOMME_SOURCE_DIR}/physics/column_model_mod.F90 
   ${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
 
 )
 
@@ -16,15 +10,3 @@ SET_SOURCE_FILES_PROPERTIES(
   PROPERTIES GENERATED TRUE COMPILE_FLAGS
   "  -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O0 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin"
 )
-
-SET_SOURCE_FILES_PROPERTIES(
-  ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
-  ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
-  PROPERTIES GENERATED TRUE COMPILE_FLAGS
-  "  -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O2 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape"
-)
-
diff --git a/components/homme/compile_scripts/titan/env_mach_specific.cpu b/components/homme/compile_scripts/titan/env_mach_specific.cpu
index e3d54196c33..41aee75a4e7 100644
--- a/components/homme/compile_scripts/titan/env_mach_specific.cpu
+++ b/components/homme/compile_scripts/titan/env_mach_specific.cpu
@@ -32,12 +32,13 @@
   module rm cray-parallel-netcdf
     module load PrgEnv-pgi
 #   module switch pgi pgi/14.2.0
-    module switch pgi pgi/15.10.lustre
+    module switch pgi pgi/15.10.0
     module switch cray-mpich cray-mpich/7.2.2
     module switch cray-libsci cray-libsci/13.0.4
     module load esmf/5.2.0rp2
     module switch atp atp/1.7.5
-
+    module add cudatoolkit
+    export CRAY_CUDA_MPS=1
 
     export CRAY_CPU_TARGET=istanbul
     module load cray-netcdf-hdf5parallel/4.3.3.1
diff --git a/components/homme/compile_scripts/titan/env_mach_specific.openacc b/components/homme/compile_scripts/titan/env_mach_specific.openacc
index 966acd15154..f0374679a7f 100644
--- a/components/homme/compile_scripts/titan/env_mach_specific.openacc
+++ b/components/homme/compile_scripts/titan/env_mach_specific.openacc
@@ -32,7 +32,7 @@
   module rm cray-parallel-netcdf
     module load PrgEnv-pgi
 #   module switch pgi pgi/14.2.0
-    module switch pgi pgi/15.9.lustre
+    module switch pgi pgi/15.9.home
     module switch cray-mpich cray-mpich/7.2.2
     module switch cray-libsci cray-libsci/13.0.4
     module load esmf/5.2.0rp2
diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt
index c28852c3005..710b9c0a964 100644
--- a/components/homme/src/preqx/CMakeLists.txt
+++ b/components/homme/src/preqx/CMakeLists.txt
@@ -16,13 +16,20 @@ SET (PREQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS}
                         ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx")
 
 SET(PREQX_SRCS_F90
-  ${SRC_SHARE_DIR}/openacc/bndry_openacc_mod.F90
-  ${SRC_SHARE_DIR}/openacc/derivative_openacc_mod.F90
-  ${SRC_SHARE_DIR}/openacc/edge_openacc_mod.F90
-  ${SRC_SHARE_DIR}/openacc/openacc_utils_mod.F90
-  ${SRC_SHARE_DIR}/openacc/prim_advection_openacc_mod.F90
-  ${SRC_SHARE_DIR}/openacc/viscosity_openacc_mod.F90
-  ${SRC_SHARE_DIR}/arch_switch_mod.F90
+  ${SRC_SHARE_DIR}/bndry_mod_base.F90
+  ${SRC_SHARE_DIR}/derivative_mod_base.F90
+  ${SRC_SHARE_DIR}/edge_mod_base.F90
+  ${SRC_SHARE_DIR}/prim_advection_mod_base.F90 
+  ${SRC_SHARE_DIR}/solver_init_mod_base.F90
+  ${SRC_SHARE_DIR}/vertremap_mod_base.F90
+  ${SRC_SHARE_DIR}/viscosity_mod_base.F90
+  ${SRC_DIR}/preqx/bndry_mod.F90
+  ${SRC_DIR}/preqx/derivative_mod.F90
+  ${SRC_DIR}/preqx/edge_mod.F90
+  ${SRC_DIR}/preqx/prim_advection_mod.F90 
+  ${SRC_DIR}/preqx/solver_init_mod.F90
+  ${SRC_DIR}/preqx/vertremap_mod.F90
+  ${SRC_DIR}/preqx/viscosity_mod.F90
   ${SRC_SHARE_DIR}/kinds.F90
   ${SRC_SHARE_DIR}/dimensions_mod.F90
   ${SRC_SHARE_DIR}/control_mod.F90
@@ -30,7 +37,6 @@ SET(PREQX_SRCS_F90
   ${SRC_SHARE_DIR}/coordinate_systems_mod.F90
   ${SRC_SHARE_DIR}/params_mod.F90
   ${SRC_SHARE_DIR}/gridgraph_mod.F90
-  ${SRC_SHARE_DIR}/edge_mod.F90
   ${SRC_SHARE_DIR}/element_mod.F90
   ${SRC_SHARE_DIR}/parallel_mod.F90
   ${SRC_SHARE_DIR}/hybrid_mod.F90
@@ -43,9 +49,6 @@ SET(PREQX_SRCS_F90
   ${SRC_SHARE_DIR}/schedtype_mod.F90 
   ${SRC_SHARE_DIR}/edgetype_mod.F90
   ${SRC_SHARE_DIR}/thread_mod.F90
-  ${SRC_SHARE_DIR}/bndry_mod.F90
-  ${SRC_SHARE_DIR}/derivative_mod.F90
-  ${SRC_SHARE_DIR}/viscosity_mod.F90
   ${SRC_SHARE_DIR}/unit_tests_mod.F90
   ${SRC_SHARE_DIR}/spacecurve_mod.F90
   ${SRC_SHARE_DIR}/cube_mod.F90
@@ -93,7 +96,6 @@ SET(PREQX_SRCS_F90
   ${SRC_DIR}/prim_restart_mod.F90 
   ${SRC_SHARE_DIR}/prim_si_ref_mod.F90 
   ${SRC_SHARE_DIR}/prim_advance_mod.F90 
-  ${SRC_SHARE_DIR}/prim_advection_mod.F90 
   ${SRC_DIR}/forcing_mod.F90 
   ${SRC_SHARE_DIR}/metis_mod.F90 
   ${SRC_SHARE_DIR}/prim_driver_mod.F90 
@@ -111,7 +113,8 @@ SET(PREQX_SRCS_F90
   ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 
   ${PHYSICS_DIR}/column_model_mod.F90 
   ${PHYSICS_DIR}/column_types_mod.F90 
-)
+  CACHE INTERNAL "")
+
 
 SET(PREQX_SRCS_C
   ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c
@@ -153,15 +156,7 @@ MESSAGE(STATUS "  PLEV = ${PREQX_PLEV}")
 MESSAGE(STATUS "  PIO = ${PREQX_USE_PIO}")
 MESSAGE(STATUS "  ENERGY = ${PREQX_USE_ENERGY}")
 
-IF (${PREQX_USE_CUDA_FORTRAN})
-  SET(USE_CUDA_FORTRAN TRUE)
-  MESSAGE(STATUS "  Using CUDA Fortran")
-ENDIF ()
-
-IF (${PREQX_USE_OPENACC})
-  SET(USE_OPENACC TRUE)
-  MESSAGE(STATUS "  Using OpenACC")
-ENDIF ()
+SET(USE_OPENACC FALSE)
 
 IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP})
   SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading")
@@ -174,10 +169,4 @@ ENDIF ()
 createTestExec(preqx preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} 
                ${PREQX_USE_PIO}  ${PREQX_USE_ENERGY})
 
-IF (${BUILD_PREQX_SPELT})
-  MESSAGE(STATUS "  Also building preqx_spelt")
-  createTestExec(preqx_spelt preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} 
-                 ${PREQX_USE_PIO}  ${PREQX_USE_ENERGY})
-  SET_TARGET_PROPERTIES(preqx_spelt PROPERTIES COMPILE_DEFINITIONS "_SPELT")
-ENDIF ()
 
diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90
new file mode 100644
index 00000000000..5ad1125cbef
--- /dev/null
+++ b/components/homme/src/preqx/bndry_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module bndry_mod
+  use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
+  implicit none
+end module bndry_mod
diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90
new file mode 100644
index 00000000000..e9c3f81be3f
--- /dev/null
+++ b/components/homme/src/preqx/derivative_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module derivative_mod
+  use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix,   &
+                                 derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+                                 remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere,     &
+                                 curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+                                 laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+  implicit none
+end module derivative_mod
diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90
new file mode 100644
index 00000000000..902503b36cb
--- /dev/null
+++ b/components/homme/src/preqx/edge_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module edge_mod
+  use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack,       &
+                           edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+                           ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR,     &
+                           ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level,       &
+                           ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+  implicit none
+end module edge_mod
diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90
new file mode 100644
index 00000000000..a3e235c41aa
--- /dev/null
+++ b/components/homme/src/preqx/prim_advection_mod.F90
@@ -0,0 +1,9 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module prim_advection_mod
+  use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap, Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, &
+                                     prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2
+  implicit none
+end module prim_advection_mod
diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90
new file mode 100644
index 00000000000..90efb25eadf
--- /dev/null
+++ b/components/homme/src/preqx/solver_init_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod
+  use solver_init_mod_base, only: solver_init2
+  implicit none
+end module solver_init_mod
diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90
new file mode 100644
index 00000000000..63617bf66e6
--- /dev/null
+++ b/components/homme/src/preqx/vertremap_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module vertremap_mod
+  use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm
+  implicit none
+end module vertremap_mod
diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90
new file mode 100644
index 00000000000..f78ae69a8f3
--- /dev/null
+++ b/components/homme/src/preqx/viscosity_mod.F90
@@ -0,0 +1,11 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module viscosity_mod
+  use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax
+#ifdef _PRIM
+  use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+  implicit none
+end module viscosity_mod
diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt
new file mode 100644
index 00000000000..83e28c4d87b
--- /dev/null
+++ b/components/homme/src/preqx_acc/CMakeLists.txt
@@ -0,0 +1,174 @@
+# Relevant Directories within the HOMME repository
+# holding source files needed for building the library
+# and executable preqx.
+
+
+
+SET(UTILS_SHARE_DIR ${HOMME_SOURCE_DIR}/utils/csm_share)
+SET(SRC_DIR         ${HOMME_SOURCE_DIR}/src)
+SET(SRC_SHARE_DIR   ${HOMME_SOURCE_DIR}/src/share)
+SET(PHYSICS_DIR     ${HOMME_SOURCE_DIR}/physics)
+SET(UTILS_TIMING_DIR ${CMAKE_BINARY_DIR}/utils/timing)
+
+# Make INCLUDE_DIRS global so the tests can access it
+SET (PREQX_ACC_INCLUDE_DIRS ${PIO_INCLUDE_DIRS}
+                            ${UTILS_TIMING_DIR}
+                            ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx")
+
+SET(PREQX_SRCS_F90
+  ${SRC_SHARE_DIR}/bndry_mod_base.F90
+  ${SRC_SHARE_DIR}/derivative_mod_base.F90
+  ${SRC_SHARE_DIR}/edge_mod_base.F90
+  ${SRC_SHARE_DIR}/prim_advection_mod_base.F90 
+  ${SRC_SHARE_DIR}/solver_init_mod_base.F90
+  ${SRC_SHARE_DIR}/vertremap_mod_base.F90
+  ${SRC_SHARE_DIR}/viscosity_mod_base.F90
+  ${SRC_DIR}/preqx_acc/bndry_mod.F90
+  ${SRC_DIR}/preqx_acc/derivative_mod.F90
+  ${SRC_DIR}/preqx_acc/edge_mod.F90
+  ${SRC_DIR}/preqx_acc/openacc_utils_mod.F90
+  ${SRC_DIR}/preqx_acc/prim_advection_mod.F90 
+  ${SRC_DIR}/preqx_acc/solver_init_mod.F90
+  ${SRC_DIR}/preqx_acc/vertremap_mod.F90
+  ${SRC_DIR}/preqx_acc/viscosity_mod.F90
+  ${SRC_SHARE_DIR}/kinds.F90
+  ${SRC_SHARE_DIR}/dimensions_mod.F90
+  ${SRC_SHARE_DIR}/control_mod.F90
+  ${SRC_SHARE_DIR}/physical_constants.F90
+  ${SRC_SHARE_DIR}/coordinate_systems_mod.F90
+  ${SRC_SHARE_DIR}/params_mod.F90
+  ${SRC_SHARE_DIR}/gridgraph_mod.F90
+  ${SRC_SHARE_DIR}/element_mod.F90
+  ${SRC_SHARE_DIR}/parallel_mod.F90
+  ${SRC_SHARE_DIR}/hybrid_mod.F90
+  ${SRC_SHARE_DIR}/reduction_mod.F90
+  ${SRC_SHARE_DIR}/quadrature_mod.F90
+  ${SRC_SHARE_DIR}/global_norms_mod.F90
+  ${SRC_SHARE_DIR}/ll_mod.F90
+  ${SRC_SHARE_DIR}/metagraph_mod.F90
+  ${SRC_SHARE_DIR}/schedule_mod.F90
+  ${SRC_SHARE_DIR}/schedtype_mod.F90 
+  ${SRC_SHARE_DIR}/edgetype_mod.F90
+  ${SRC_SHARE_DIR}/thread_mod.F90
+  ${SRC_SHARE_DIR}/unit_tests_mod.F90
+  ${SRC_SHARE_DIR}/spacecurve_mod.F90
+  ${SRC_SHARE_DIR}/cube_mod.F90
+  ${SRC_SHARE_DIR}/interpolate_mod.F90
+  ${SRC_SHARE_DIR}/fvm_bsp_mod.F90
+  ${SRC_SHARE_DIR}/fvm_transformation_mod.F90
+  ${SRC_SHARE_DIR}/dof_mod.F90
+  ${SRC_DIR}/common_io_mod.F90
+  ${SRC_SHARE_DIR}/spelt_mod.F90
+  ${SRC_SHARE_DIR}/fvm_mod.F90
+  ${SRC_SHARE_DIR}/fvm_control_volume_mod.F90
+  ${SRC_SHARE_DIR}/fvm_line_integrals_mod.F90 
+  ${SRC_SHARE_DIR}/fvm_line_integrals_flux_mod.F90 
+  ${SRC_SHARE_DIR}/fvm_analytic_mod.F90 
+  ${SRC_SHARE_DIR}/fvm_filter_mod.F90 
+  ${SRC_SHARE_DIR}/fvm_reconstruction_mod.F90 
+  ${SRC_DIR}/pio_io_mod.F90 
+  ${SRC_DIR}/repro_sum_mod.F90 
+  ${SRC_SHARE_DIR}/time_mod.F90 
+  ${SRC_SHARE_DIR}/physics_mod.F90 
+  ${SRC_SHARE_DIR}/hybvcoord_mod.F90 
+  ${SRC_DIR}/interp_movie_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_kind_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_mpi_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_sys_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_file_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_vmath_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_const_mod.F90 
+  ${UTILS_SHARE_DIR}/shr_spfn_mod.F90 
+  ${SRC_SHARE_DIR}/domain_mod.F90 
+  ${SRC_DIR}/restart_io_mod.F90 
+  ${SRC_SHARE_DIR}/cg_mod.F90 
+  ${SRC_SHARE_DIR}/linear_algebra_mod.F90 
+  ${SRC_SHARE_DIR}/solver_mod.F90 
+  ${SRC_SHARE_DIR}/filter_mod.F90 
+  ${SRC_DIR}/common_movie_mod.F90 
+  ${SRC_SHARE_DIR}/namelist_mod.F90 
+  ${SRC_SHARE_DIR}/mass_matrix_mod.F90 
+  ${SRC_DIR}/checksum_mod.F90 
+  ${SRC_DIR}/baroclinic_inst_mod.F90 
+  ${SRC_SHARE_DIR}/diffusion_mod.F90 
+  ${SRC_SHARE_DIR}/prim_si_mod.F90 
+  ${SRC_DIR}/asp_tests.F90 
+  ${SRC_SHARE_DIR}/prim_state_mod.F90 
+  ${SRC_DIR}/prim_restart_mod.F90 
+  ${SRC_SHARE_DIR}/prim_si_ref_mod.F90 
+  ${SRC_SHARE_DIR}/prim_advance_mod.F90 
+  ${SRC_DIR}/forcing_mod.F90 
+  ${SRC_SHARE_DIR}/metis_mod.F90 
+  ${SRC_SHARE_DIR}/prim_driver_mod.F90 
+  ${SRC_DIR}/vertical_mod.F90 
+  ${SRC_DIR}/prim_movie_mod.F90 
+  ${SRC_DIR}/surfaces_mod.F90 
+  ${SRC_DIR}/interpolate_driver_mod.F90 
+  ${SRC_DIR}/netcdf_io_mod.F90 
+  ${SRC_SHARE_DIR}/mesh_mod.F90 
+  ${SRC_SHARE_DIR}/Manager.F90
+  ${SRC_DIR}/prim_main.F90 
+  ${SRC_DIR}/ref_state_mod.F90 
+  ${PHYSICS_DIR}/physics_types_mod.F90
+  ${PHYSICS_DIR}/physics_io_mod.F90 
+  ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 
+  ${PHYSICS_DIR}/column_model_mod.F90 
+  ${PHYSICS_DIR}/column_types_mod.F90 
+  CACHE INTERNAL "")
+
+SET(PREQX_SRCS_C
+  ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c
+  ${SRC_DIR}/jrio.c
+)
+
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+  setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
+
+# Make SRCS global so the tests can access it
+SET(PREQX_ACC_SRCS ${PREQX_ACC_SRCS} ${PREQX_SRCS_C} ${PREQX_SRCS_F90} CACHE INTERNAL "Sources for preqx_acc")
+
+# Set up defaults
+IF (NOT PREQX_NP)
+  SET (PREQX_NP 4) 
+ENDIF ()
+IF (NOT PREQX_PLEV)
+  SET (PREQX_PLEV 20) 
+ENDIF ()
+IF (NOT PREQX_USE_PIO)
+  SET (PREQX_USE_PIO FALSE) 
+ENDIF ()
+IF (NOT PREQX_USE_ENERGY)
+  SET (PREQX_USE_ENERGY FALSE) 
+ENDIF ()
+IF (NOT PREQX_NC)
+  SET (PREQX_NC 4)
+ENDIF ()
+IF (PREQX_QSIZE_D)
+  SET (QSIZE_D ${PREQX_QSIZE_D})
+ENDIF ()
+
+MESSAGE(STATUS "Building preqx with:")
+MESSAGE(STATUS "  NP = ${PREQX_NP}")
+MESSAGE(STATUS "  NC = ${PREQX_NC}")
+MESSAGE(STATUS "  PLEV = ${PREQX_PLEV}")
+MESSAGE(STATUS "  PIO = ${PREQX_USE_PIO}")
+MESSAGE(STATUS "  ENERGY = ${PREQX_USE_ENERGY}")
+
+SET(USE_OPENACC TRUE)
+SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}")
+SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}")
+
+IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP})
+  SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading")
+ENDIF ()
+
+# The following macro handles the compilation with the appropriate variables
+############################################################################
+# createTestExec(exec_name exec_type NP PLEV USE_PIO USE_ENERGY)
+############################################################################
+createTestExec(preqx_acc preqx_acc ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} 
+               ${PREQX_USE_PIO}  ${PREQX_USE_ENERGY})
+
+
diff --git a/components/homme/src/share/openacc/bndry_openacc_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90
similarity index 96%
rename from components/homme/src/share/openacc/bndry_openacc_mod.F90
rename to components/homme/src/preqx_acc/bndry_mod.F90
index 783f884c00f..75fd0ddd909 100644
--- a/components/homme/src/share/openacc/bndry_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/bndry_mod.F90
@@ -3,14 +3,17 @@
 #include "config.h"
 #endif
 
-module bndry_openacc_mod
-#if USE_OPENACC
-  use kinds     , only: real_kind
+module bndry_mod
+  use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
+  use parallel_mod, only : syncmp,parallel_t,abortmp,iam
+  use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t
+  use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads
+  use kinds, only: real_kind
   implicit none
   private
-  integer, parameter :: maxCycles = 20
-  integer, parameter :: maxChunks = 64
-  real(kind=real_kind), parameter :: chunk_denom = 1.e5
+  integer, parameter, private :: maxCycles = 20
+  integer, parameter, private :: maxChunks = 64
+  real(kind=real_kind), parameter, private :: chunk_denom = 1.e5
 
   type send_stager_t
     integer :: nUpdateHost, nSendComp
@@ -24,12 +27,13 @@ module bndry_openacc_mod
     integer :: beg(maxchunks), end(maxchunks), len(maxchunks), asyncid(maxchunks), tag(maxchunks), req(maxchunks)
   end type recv_stager_t
 
-  type(send_stager_t) :: stg_send(maxCycles)
-  type(recv_stager_t) :: stg_recv(maxCycles)
+  type(send_stager_t), private :: stg_send(maxCycles)
+  type(recv_stager_t), private :: stg_recv(maxCycles)
 
+  public :: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
   public :: bndry_exchangeS_simple_overlap
-  public :: bndry_exchangeV_simple_overlap
   public :: bndry_exchangeV_timing
+  public :: bndry_exchangeV_simple_overlap
   public :: bndry_exchangeV_finer_overlap
 
 contains
@@ -514,8 +518,8 @@ function mpi_isend_openacc_stage(buf, count, dest, tag_root, comm, ierror, nchun
     use parallel_mod     , only: mpireal_t
     use openacc_utils_mod, only: update_host_async, acc_async_test_wrap
     implicit none
-    real(kind=real_kind), intent(in   ) :: buf(count)   !buffer from which to send data
     integer             , intent(in   ) :: count        !number of elements in buffer
+    real(kind=real_kind), intent(in   ) :: buf(count)   !buffer from which to send data
     integer             , intent(in   ) :: dest         !the MPI rank I'm sending data to
     integer             , intent(in   ) :: tag_root     !tag of the original send (I'll alter this for internal mpi_isend calls)
     integer             , intent(in   ) :: comm         !Communicator to use
@@ -576,8 +580,8 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch
     use parallel_mod     , only: mpireal_t
     use openacc_utils_mod, only: update_device_async, copy_ondev_async
     implicit none
-    real(kind=real_kind), intent(in   ) :: buf(count)   !buffer in which to receive data
     integer             , intent(in   ) :: count        !number of elements in buffer
+    real(kind=real_kind), intent(in   ) :: buf(count)   !buffer in which to receive data
     integer             , intent(in   ) :: source       !the MPI rank I'm receiving data from
     integer             , intent(in   ) :: tag_root     !tag of the original send (I'll alter this for internal mpi_isend calls)
     integer             , intent(in   ) :: comm         !Communicator to use
@@ -634,6 +638,5 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch
     if (stg_recv(myid)%nUpdateDev == nchunks) finished = .true.
   end function mpi_irecv_openacc_stage
 
-#endif
-end module bndry_openacc_mod
+end module bndry_mod
 
diff --git a/components/homme/src/preqx_acc/config.h.cmake.in b/components/homme/src/preqx_acc/config.h.cmake.in
new file mode 100644
index 00000000000..8c2c17cb05f
--- /dev/null
+++ b/components/homme/src/preqx_acc/config.h.cmake.in
@@ -0,0 +1,61 @@
+/* PRIM Options */
+
+/* Energy Diagnostics */
+#cmakedefine ENERGY_DIAGNOSTICS
+
+/* Using PRIMITIVE */
+#define _PRIM
+
+/* WK_GRAD */
+#define _WK_GRAD 1
+
+/* _PRESTART */
+#define _PRESTART 1
+
+/* Using MPI */
+#define _MPI 1
+
+/* PIO with interpolation */
+#cmakedefine PIO_INTERP
+
+/* Number of cells NC*NC in each element (Default value NC=4) */
+#define NC @NUM_CELLS@
+
+/* Number of POINTS in the element (Default value is 4) */
+#define NP @NUM_POINTS@
+
+/* Number of POINTS in the element (Default value is 1) */
+#define PLEV @NUM_PLEV@
+
+/* Number of tracers (Default value 4 if not specified) */
+#cmakedefine QSIZE_D @QSIZE_D@
+
+/* Basic PIO */
+#cmakedefine PIO
+
+/* Whether to use CUDA Fortran */
+#cmakedefine01 USE_CUDA_FORTRAN
+
+/* Whether to use OPENACC */
+#cmakedefine01 USE_OPENACC
+
+/* Quadruple-precision */
+#cmakedefine01 HOMME_QUAD_PREC
+
+/* Horizontal OpenMP (over elements) */
+#cmakedefine HORIZ_OPENMP
+
+/* Vertical OpenMP (within elements) */
+#cmakedefine COLUMN_OPENMP
+
+/* Nested OpenMP (both horizontal and vertical) */
+#cmakedefine NESTED_OPENMP
+
+/* Zoltan partitioning library */
+#cmakedefine01 HAVE_ZOLTAN
+
+/* Enable persistent MPI comm */
+#cmakedefine MPI_PERSISTENT
+
+/* Using Accelerator */
+#cmakedefine IS_ACCELERATOR
diff --git a/components/homme/src/share/openacc/derivative_openacc_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90
similarity index 74%
rename from components/homme/src/share/openacc/derivative_openacc_mod.F90
rename to components/homme/src/preqx_acc/derivative_mod.F90
index 4676d05a053..97dea44734d 100644
--- a/components/homme/src/share/openacc/derivative_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/derivative_mod.F90
@@ -3,22 +3,36 @@
 #include "config.h"
 #endif
 
-module derivative_openacc_mod
-#if USE_OPENACC
-  use kinds, only: real_kind
-  use dimensions_mod, only: np, nelemd
+module derivative_mod
+  use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix,   &
+                                 derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+                                 remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere,     &
+                                 curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+                                 laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+  use kinds, only : real_kind, longdouble_kind
+  use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev
+  use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi
+  use parallel_mod, only : abortmp
+  ! needed for spherical differential operators:
+  use physical_constants, only : rrearth 
+  use element_mod, only : element_t
+  use control_mod, only : hypervis_scaling, hypervis_power
   implicit none
   private
 
-  public :: laplace_sphere_wk
-  public :: divergence_sphere_wk
-  public :: gradient_sphere
-  public :: divergence_sphere
+  public ::  derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix,   &
+             derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+             remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere,     &
+             curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+             laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+  public :: laplace_sphere_wk_openacc
+  public :: divergence_sphere_wk_openacc
+  public :: gradient_sphere_openacc
+  public :: divergence_sphere_openacc
 
 contains
 
-  subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl)
-    use derivative_mod, only: derivative_t
+  subroutine laplace_sphere_wk_openacc(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl)
     use element_mod, only: element_t
     use control_mod, only: hypervis_scaling, hypervis_power
     implicit none
@@ -35,7 +49,7 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n
     integer :: i,j,k,ie
     ! Local
     real(kind=real_kind) :: oldgrads(2)
-    call gradient_sphere(s,deriv,elem(:),grads,len,nets,nete,ntl,tl)
+    call gradient_sphere_openacc(s,deriv,elem(:),grads,len,nets,nete,ntl,tl)
     !$acc parallel loop gang vector collapse(4) present(grads,elem(:)) private(oldgrads)
     do ie = nets , nete
       do k = 1 , len
@@ -58,12 +72,11 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n
     enddo
     ! note: divergnece_sphere and divergence_sphere_wk are identical *after* bndry_exchange
     ! if input is C_0.  Here input is not C_0, so we should use divergence_sphere_wk().  
-    call divergence_sphere_wk(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl)
-  end subroutine laplace_sphere_wk
+    call divergence_sphere_wk_openacc(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl)
+  end subroutine laplace_sphere_wk_openacc
 
-  subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl)
+  subroutine divergence_sphere_wk_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl)
     use element_mod, only: element_t
-    use derivative_mod, only: derivative_t
     use physical_constants, only: rrearth
     implicit none
 !   input:  v = velocity in lat-lon coordinates
@@ -117,11 +130,10 @@ subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl)
         enddo
       enddo
     enddo
-  end subroutine divergence_sphere_wk
+  end subroutine divergence_sphere_wk_openacc
 
-  subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl)
+  subroutine gradient_sphere_openacc(s,deriv,elem,ds,len,nets,nete,ntl,tl)
     use element_mod, only: element_t
-    use derivative_mod, only: derivative_t
     use physical_constants, only: rrearth
     implicit none
     !   input s:  scalar
@@ -171,13 +183,12 @@ subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl)
         enddo
       enddo
     enddo
-  end subroutine gradient_sphere
+  end subroutine gradient_sphere_openacc
 
-  subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl)
+  subroutine divergence_sphere_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl)
 !   input:  v = velocity in lat-lon coordinates
 !   ouput:  div(v)  spherical divergence of v
     use element_mod   , only: element_t
-    use derivative_mod, only: derivative_t
     use physical_constants, only: rrearth
     implicit none
     real(kind=real_kind), intent(in   ) :: v(np,np,2,len,ntl,nelemd)  ! in lat-lon coordinates
@@ -227,8 +238,7 @@ subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl)
         enddo
       enddo
     enddo
-  end subroutine divergence_sphere
+  end subroutine divergence_sphere_openacc
 
-#endif
-end module derivative_openacc_mod
+end module derivative_mod
 
diff --git a/components/homme/src/share/openacc/edge_openacc_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90
similarity index 86%
rename from components/homme/src/share/openacc/edge_openacc_mod.F90
rename to components/homme/src/preqx_acc/edge_mod.F90
index 29178a9fda1..645fe05a66a 100644
--- a/components/homme/src/share/openacc/edge_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/edge_mod.F90
@@ -3,24 +3,43 @@
 #include "config.h"
 #endif
 
-module edge_openacc_mod
-#if USE_OPENACC
-  use kinds, only: real_kind, int_kind, log_kind
-  use dimensions_mod, only: max_neigh_edges,nelemd,np,max_corner_elem
+module edge_mod
+  use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack,       &
+                           edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+                           ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR,     &
+                           ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level,       &
+                           ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+  use kinds, only : int_kind, log_kind, real_kind
+  use dimensions_mod, only : max_neigh_edges, nelemd, np
+  use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL
+  use thread_mod, only: nthreadshoriz, omp_get_num_threads, omp_get_thread_num
+  use coordinate_systems_mod, only : cartesian3D_t
+  use schedtype_mod, only : cycle_t, schedule_t, schedule
+  use parallel_mod, only : abortmp, haltmp, MPIreal_t, iam,parallel_t, &
+      MAX_ACTIVE_MSG, HME_status_size, BNDRY_TAG_BASE
+  use edgetype_mod, only : edgedescriptor_t, edgebuffer_t, &
+      Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid
+  use element_mod, only : element_t
   implicit none
   private
 
-  public :: edgeSpack
-  public :: edgeSunpackMin
-  public :: edgeSunpackMax
-  public :: edgeVpack
-  public :: edgeVunpack
-  public :: edgeVunpackMin
-  public :: edgeVunpackMax
+  public :: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack,       &
+            edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+            ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR,     &
+            ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level,       &
+            ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+  public :: edgeSpack_openacc
+  public :: edgeSunpackMin_openacc
+  public :: edgeSunpackMax_openacc
+  public :: edgeVpack_openacc
+  public :: edgeVunpack_openacc
+  public :: edgeVunpackMin_openacc
+  public :: edgeVunpackMax_openacc
+
 
 contains
 
-  subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeSpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : max_corner_elem
     use control_mod   , only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod      , only : t_startf, t_stopf
@@ -29,10 +48,10 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)     ,intent(inout) :: edge
     integer                ,intent(in   ) :: vlyr
-    real (kind=real_kind)  ,intent(in   ) :: v(vlyr,tdim,nelemd)
     integer                ,intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real (kind=real_kind)  ,intent(in   ) :: v(vlyr,tdim,nelemd)
     ! Local variables
     integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk
     integer, parameter :: kchunk = 64
@@ -66,9 +85,9 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_s_pack')
-  end subroutine edgeSpack
+  end subroutine edgeSpack_openacc
 
-  subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeSunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : np, max_corner_elem
     use control_mod, only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod, only: t_startf, t_stopf
@@ -76,10 +95,10 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)    , intent(in   ) :: edge
     integer               , intent(in   ) :: vlyr
-    real(kind=real_kind)  , intent(inout) :: v(vlyr,tdim,nelemd)
     integer               , intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real(kind=real_kind)  , intent(inout) :: v(vlyr,tdim,nelemd)
     ! Local
     integer :: i,k,ll,is,ie,in,iw,el,kc,kk
     integer, parameter :: kchunk = 64
@@ -125,9 +144,9 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_s_unpack_min')
-  end subroutine edgeSunpackMin
+  end subroutine edgeSunpackMin_openacc
 
-  subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeSunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : np, max_corner_elem
     use control_mod, only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod, only: t_startf, t_stopf
@@ -135,10 +154,10 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)    , intent(in   ) :: edge
     integer               , intent(in   ) :: vlyr
-    real(kind=real_kind)  , intent(inout) :: v(vlyr,tdim,nelemd)
     integer               , intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real(kind=real_kind)  , intent(inout) :: v(vlyr,tdim,nelemd)
     ! Local
     integer :: i,k,ll,is,ie,in,iw,el,kc,kk
     integer, parameter :: kchunk = 64
@@ -184,9 +203,9 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_s_unpack_max')
-  end subroutine edgeSunpackMax
+  end subroutine edgeSunpackMax_openacc
 
-  subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeVpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : max_corner_elem
     use control_mod   , only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod      , only : t_startf, t_stopf
@@ -195,10 +214,10 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)     ,intent(inout) :: edge
     integer                ,intent(in   ) :: vlyr
-    real (kind=real_kind)  ,intent(in   ) :: v(np,np,vlyr,tdim,nelemd)
     integer                ,intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real (kind=real_kind)  ,intent(in   ) :: v(np,np,vlyr,tdim,nelemd)
     ! Local variables
     integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk
     integer, parameter :: kchunk = 32
@@ -247,9 +266,9 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_pack')
-  end subroutine edgeVpack
+  end subroutine edgeVpack_openacc
 
-  subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeVunpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : np, max_corner_elem
     use control_mod, only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod, only: t_startf, t_stopf
@@ -257,10 +276,10 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)    , intent(in   ) :: edge
     integer               , intent(in   ) :: vlyr
-    real(kind=real_kind)  , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
     integer               , intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real(kind=real_kind)  , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
     ! Local
     integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j
     integer, parameter :: kchunk = 32
@@ -324,9 +343,9 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_unpack')
-  end subroutine edgeVunpack
+  end subroutine edgeVunpack_openacc
 
-  subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeVunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : np, max_corner_elem
     use control_mod, only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod, only: t_startf, t_stopf
@@ -334,10 +353,10 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)    , intent(in   ) :: edge
     integer               , intent(in   ) :: vlyr
-    real(kind=real_kind)  , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
     integer               , intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real(kind=real_kind)  , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
     ! Local
     integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j
     integer, parameter :: kchunk = 32
@@ -401,9 +420,9 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_unpack_min')
-  end subroutine edgeVunpackMin
+  end subroutine edgeVunpackMin_openacc
 
-  subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+  subroutine edgeVunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use dimensions_mod, only : np, max_corner_elem
     use control_mod, only : north, south, east, west, neast, nwest, seast, swest
     use perf_mod, only: t_startf, t_stopf
@@ -411,10 +430,10 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
     use edgetype_mod  , only : EdgeBuffer_t
     type(EdgeBuffer_t)    , intent(in   ) :: edge
     integer               , intent(in   ) :: vlyr
-    real(kind=real_kind)  , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
     integer               , intent(in   ) :: kptr
     type(element_t)        ,intent(in   ) :: elem(:)
     integer                ,intent(in   ) :: nets,nete,tdim,tl
+    real(kind=real_kind)  , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
     ! Local
     integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j
     integer, parameter :: kchunk = 32
@@ -478,8 +497,7 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
       enddo
     enddo
     call t_stopf('edge_unpack_max')
-  end subroutine edgeVunpackMax
+  end subroutine edgeVunpackMax_openacc
 
-#endif
-end module edge_openacc_mod
+end module edge_mod
 
diff --git a/components/homme/src/share/openacc/openacc_utils_mod.F90 b/components/homme/src/preqx_acc/openacc_utils_mod.F90
similarity index 74%
rename from components/homme/src/share/openacc/openacc_utils_mod.F90
rename to components/homme/src/preqx_acc/openacc_utils_mod.F90
index 5b716e1849a..8e0acbb5f4d 100644
--- a/components/homme/src/share/openacc/openacc_utils_mod.F90
+++ b/components/homme/src/preqx_acc/openacc_utils_mod.F90
@@ -4,7 +4,6 @@
 #endif
 
 module openacc_utils_mod
-#if USE_OPENACC
   use kinds, only: real_kind
   use dimensions_mod, only: nelemd
   implicit none
@@ -16,41 +15,25 @@ module openacc_utils_mod
   public :: update_device_async
   public :: copy_ondev
   public :: copy_ondev_async
-  public :: arch_init2
   public :: acc_async_test_wrap
 
 contains
 
   function acc_async_test_wrap( asyncid )  result(rslt)
-    use openacc, only: acc_async_test
+#   ifdef _OPENACC
+      use openacc, only: acc_async_test
+#   endif
     implicit none
     integer, intent(in) :: asyncid
     logical             :: rslt
-    rslt = .false.
-    rslt = acc_async_test(asyncid)
+#   ifdef _OPENACC
+      rslt = .false.
+      rslt = acc_async_test(asyncid)
+#   else
+      rslt = .true.
+#   endif
   end function acc_async_test_wrap
 
-  subroutine arch_init2( elem , deriv )
-    use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj
-    use derivative_mod, only: derivative_t
-    implicit none
-    type(element_t)   , intent(in) :: elem(:)
-    type(derivative_t), intent(in) :: deriv
-    integer :: ie
-    !$omp barrier
-    !$omp master
-
-    !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj)
-    !$acc enter data pcopyin(elem(1:nelemd),deriv)
-    do ie = 1 , nelemd
-      !$acc enter data pcopyin(elem(ie)%desc)
-      !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse)
-    enddo
-
-    !$omp end master
-    !$omp barrier
-  end subroutine arch_init2
-
   subroutine copy_qdp_h2d( elem , tl )
     use element_mod, only: element_t, state_qdp
     implicit none
@@ -119,6 +102,5 @@ subroutine copy_ondev_async(dest,src,len,id)
     enddo
   end subroutine copy_ondev_async
 
-#endif
 end module openacc_utils_mod
 
diff --git a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90
similarity index 87%
rename from components/homme/src/share/openacc/prim_advection_openacc_mod.F90
rename to components/homme/src/preqx_acc/prim_advection_mod.F90
index ba1c7fa48ad..b9ed14c295c 100644
--- a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/prim_advection_mod.F90
@@ -6,34 +6,56 @@
 #include "config.h"
 #endif
 
-module prim_advection_openacc_mod
-#if USE_OPENACC
-  use kinds          , only: real_kind, int_kind, log_kind
-  use dimensions_mod , only: np,nlevp,nlev,qsize,qsize_d,max_corner_elem,max_neigh_edges,nelemd
-  use element_mod    , only: timelevels
-  use edgetype_mod   , only: EdgeBuffer_t
-  use derivative_mod , only: derivative_t
+module prim_advection_mod
+  !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2
+  use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap
+  use kinds, only              : real_kind
+  use dimensions_mod, only     : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd
+  use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp
+  use element_mod, only        : element_t
+  use fvm_control_volume_mod, only        : fvm_struct
+  use spelt_mod, only          : spelt_struct
+  use filter_mod, only         : filter_t, filter_P
+  use hybvcoord_mod, only      : hvcoord_t
+  use time_mod, only           : TimeLevel_t, smooth, TimeLevel_Qdp
+  use prim_si_mod, only        : preq_pressure
+  use diffusion_mod, only      : scalar_diffusion, diffusion_init
+  use control_mod, only        : integration, test_case, filter_freq_advection,  hypervis_order, &
+        statefreq, moisture, TRACERADV_TOTAL_DIVERGENCE, TRACERADV_UGRADQ, &
+        nu_q, nu_p, limiter_option, hypervis_subcycle_q, rsplit
+  use edge_mod, only           : edgevpack, edgerotate, edgevunpack, initedgebuffer, initedgesbuffer, &
+        edgevunpackmin, initghostbuffer3D
+ 
+  use edgetype_mod, only       : EdgeDescriptor_t, EdgeBuffer_t, ghostbuffer3D_t
+  use hybrid_mod, only         : hybrid_t
+  use bndry_mod, only          : bndry_exchangev
+  use perf_mod, only           : t_startf, t_stopf, t_barrierf ! _EXTERNAL
+  use parallel_mod, only   : abortmp
+  use derivative_mod, only: derivative_t
   implicit none
   private
-  type (derivative_t), public, allocatable :: deriv(:) ! derivative struct (nthreads)
-  real(kind=real_kind), allocatable :: qmin(:,:,:), qmax(:,:,:)
-  real(kind=real_kind), allocatable :: dp0(:)
-  real(kind=real_kind), allocatable :: Qtens_biharmonic(:,:,:,:,:)
-  real(kind=real_kind), allocatable :: Qtens(:,:,:,:,:)
-  real(kind=real_kind), allocatable :: grads_tracer(:,:,:,:,:,:)
-  real(kind=real_kind), allocatable :: dp_star(:,:,:,:)
-  type (EdgeBuffer_t) :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax
-  integer,parameter :: DSSeta = 1
-  integer,parameter :: DSSomega = 2
-  integer,parameter :: DSSdiv_vdp_ave = 3
-  integer,parameter :: DSSno_var = -1
-  real(kind=real_kind), allocatable :: data_pack(:,:,:,:), data_pack2(:,:,:,:)
-  logical :: first_time = .true.
-
+  type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads)
+  real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:)
+  real(kind=real_kind), private, allocatable :: dp0(:)
+  real(kind=real_kind), private, allocatable :: Qtens_biharmonic(:,:,:,:,:)
+  real(kind=real_kind), private, allocatable :: Qtens(:,:,:,:,:)
+  real(kind=real_kind), private, allocatable :: grads_tracer(:,:,:,:,:,:)
+  real(kind=real_kind), private, allocatable :: dp_star(:,:,:,:)
+  type (EdgeBuffer_t), private :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax
+  integer,parameter, private :: DSSeta = 1
+  integer,parameter, private :: DSSomega = 2
+  integer,parameter, private :: DSSdiv_vdp_ave = 3
+  integer,parameter, private :: DSSno_var = -1
+  real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:)
+  logical, private :: first_time = .true.
+
+  public :: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap
   public :: Prim_Advec_Tracers_remap
   public :: prim_advec_init1
   public :: prim_advec_init2
   public :: prim_advec_init_deriv
+  public :: deriv
+  public :: Prim_Advec_Tracers_remap_rk2
 
 contains
 
@@ -45,28 +67,30 @@ subroutine copy_qdp1_h2d( elem , tl , nets , nete )
     integer        , intent(in) :: tl, nets , nete
     integer :: ie, k, j, i
     call t_startf('qdp1_pcie')
-    do ie = nets , nete
-      data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie)
-    enddo
-    !$omp barrier
-    !$omp master
-!   do ie = 1 , nelemd
-!     !$acc update device(state_qdp(:,:,:,1,tl,ie))
-!   enddo
-    !$acc update device(data_pack) async(1)
-    !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
-    do ie = 1 , nelemd
-      do k = 1 , nlev
-        do j = 1 , np
-          do i = 1 , np
-            state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie)
+#   if USE_OPENACC
+      do ie = nets , nete
+        data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie)
+      enddo
+      !$omp barrier
+      !$omp master
+!     do ie = 1 , nelemd
+!       !$acc update device(state_qdp(:,:,:,1,tl,ie))
+!     enddo
+      !$acc update device(data_pack) async(1)
+      !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
+      do ie = 1 , nelemd
+        do k = 1 , nlev
+          do j = 1 , np
+            do i = 1 , np
+              state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie)
+            enddo
           enddo
         enddo
       enddo
-    enddo
-    !$acc wait(1)
-    !$omp end master
-    !$omp barrier
+      !$acc wait(1)
+      !$omp end master
+      !$omp barrier
+#   endif
     call t_stopf('qdp1_pcie')
   end subroutine copy_qdp1_h2d
 
@@ -78,28 +102,30 @@ subroutine copy_qdp1_d2h( elem , tl , nets , nete )
     integer        , intent(in) :: tl, nets , nete
     integer :: ie, k, j, i
     call t_startf('qdp1_pcie')
-    !$omp barrier
-    !$omp master
-    !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
-    do ie = 1 , nelemd
-      do k = 1 , nlev
-        do j = 1 , np
-          do i = 1 , np
-            data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie)
+#   if USE_OPENACC
+      !$omp barrier
+      !$omp master
+      !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
+      do ie = 1 , nelemd
+        do k = 1 , nlev
+          do j = 1 , np
+            do i = 1 , np
+              data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie)
+            enddo
           enddo
         enddo
       enddo
-    enddo
-    !$acc update host(data_pack) async(1)
-    !$acc wait(1)
-!   do ie = 1 , nelemd
-!     !$acc update host(state_qdp(:,:,:,1,tl,ie))
-!   enddo
-    !$omp end master
-    !$omp barrier
-    do ie = nets , nete
-      state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie)
-    enddo
+      !$acc update host(data_pack) async(1)
+      !$acc wait(1)
+!     do ie = 1 , nelemd
+!       !$acc update host(state_qdp(:,:,:,1,tl,ie))
+!     enddo
+      !$omp end master
+      !$omp barrier
+      do ie = nets , nete
+        state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie)
+      enddo
+#   endif
     call t_stopf('qdp1_pcie')
   end subroutine copy_qdp1_d2h
 
@@ -112,7 +138,7 @@ subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , flt , hybrid , dt
     use filter_mod    , only: filter_t
     use time_mod      , only: TimeLevel_t, TimeLevel_Qdp
     use control_mod   , only: limiter_option, nu_p, qsplit
-    use bndry_openacc_mod, only: bndry_exchangeV_timing
+    use bndry_mod, only: bndry_exchangeV_timing
     implicit none
     type (element_t)     , intent(inout) :: elem(:)
     type (derivative_t)  , intent(in   ) :: deriv
@@ -321,13 +347,12 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
     use hybrid_mod           , only: hybrid_t
     use element_mod          , only: element_t, derived_divdp_proj, state_qdp
     use derivative_mod       , only: derivative_t
-    use edge_mod             , only: EdgeBuffer_t
     use perf_mod             , only: t_startf, t_stopf                          ! _EXTERNAL
     use hybvcoord_mod        , only: hvcoord_t
     use control_mod          , only: nu_q, hypervis_order, hypervis_subcycle_q, nu_p
-    use viscosity_openacc_mod, only: biharmonic_wk_scalar
-    use edge_openacc_mod     , only: edgeVpack, edgeVunpack
-    use bndry_openacc_mod    , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
+    use viscosity_mod, only: biharmonic_wk_scalar_openacc
+    use edge_mod     , only: edgeVpack_openacc, edgeVunpack_openacc
+    use bndry_mod    , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
     implicit none
     type (EdgeBuffer_t)  , intent(inout)         :: edgeAdv_dontuse
     type (element_t)     , intent(inout), target :: elem(:)
@@ -388,7 +413,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
       !$omp end master
       !$omp barrier
       ! compute biharmonic operator. Qtens = input and output 
-      call biharmonic_wk_scalar( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
+      call biharmonic_wk_scalar_openacc( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
       !$omp barrier
       !$omp master
       !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:),qtens)
@@ -407,7 +432,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
       enddo
       call limiter2d_zero(state_Qdp,2,nt_qdp)
       call t_startf('ah_scalar_PEU')
-      call edgeVpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
+      call edgeVpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
       !$omp end master
       !$omp barrier
 
@@ -417,7 +442,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
       
       !$omp barrier
       !$omp master
-      call edgeVunpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
+      call edgeVunpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
       call t_stopf('ah_scalar_PEU')
       !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:))
       do ie = 1 , nelemd
@@ -441,7 +466,6 @@ end subroutine advance_hypervis_scalar
 
   subroutine qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , limiter_option , nu_p , nets , nete )
     use element_mod, only: element_t, state_qdp
-    use control_mod, only: limiter_option
     implicit none
     type(element_t)     , intent(inout) :: elem(:)
     integer             , intent(in   ) :: rkstage , n0_qdp , np1_qdp , nets , nete , limiter_option
@@ -488,10 +512,10 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
   use control_mod           , only: limiter_option, nu_p, nu_q
   use perf_mod              , only: t_startf, t_stopf
   use element_mod           , only: derived_divdp_proj, state_qdp, derived_vn0, derived_divdp
-  use derivative_openacc_mod, only: divergence_sphere
-  use viscosity_openacc_mod , only: biharmonic_wk_scalar, neighbor_minmax
-  use edge_openacc_mod      , only: edgeVpack, edgeVunpack
-  use bndry_openacc_mod     , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
+  use derivative_mod, only: divergence_sphere_openacc
+  use viscosity_mod , only: biharmonic_wk_scalar_openacc, neighbor_minmax_openacc
+  use edge_mod      , only: edgeVpack_openacc, edgeVunpack_openacc
+  use bndry_mod     , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
   implicit none
   integer              , intent(in   )         :: np1_qdp, n0_qdp
   real (kind=real_kind), intent(in   )         :: dt
@@ -606,7 +630,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
     enddo
     !$omp end master
     !$omp barrier
-    if ( rhs_multiplier == 0 ) call neighbor_minmax(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax)
+    if ( rhs_multiplier == 0 ) call neighbor_minmax_openacc(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax)
     ! compute biharmonic mixing term
     if ( rhs_multiplier == 2 ) then
       rhs_viss = 3
@@ -633,8 +657,8 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
         !$omp end master
         !$omp barrier
       endif
-      call biharmonic_wk_scalar( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
-      call neighbor_minmax( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax )
+      call biharmonic_wk_scalar_openacc( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
+      call neighbor_minmax_openacc( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax )
       !$omp barrier
       !$omp master
       !$acc parallel loop gang vector collapse(4) present(qtens_biharmonic,dp0,elem(:))
@@ -716,7 +740,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
       enddo
     enddo
   enddo
-  call divergence_sphere( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 )
+  call divergence_sphere_openacc( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 )
   !$acc parallel loop gang vector collapse(5) present(qtens,state_qdp,qtens_biharmonic)
   do ie = 1 , nelemd
     ! advance Qdp
@@ -761,7 +785,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
   ! note: eta_dot_dpdn is actually dimension nlev+1, but nlev+1 data is
   ! all zero so we only have to DSS 1:nlev
   call t_startf('eus_PEU')
-  call edgeVpack(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
+  call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
   !$omp end master
   !$omp barrier
 
@@ -771,7 +795,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
 
   !$omp barrier
   !$omp master
-  call edgeVunpack( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
+  call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
   call t_stopf('eus_PEU')
   !$acc parallel loop gang vector collapse(4) present(state_Qdp,elem(:))
   do ie = 1 , nelemd
@@ -800,9 +824,9 @@ subroutine limiter2d_zero(Qdp,tdim,tl)
     ! ps is only used when advecting Q instead of Qdp
     ! so ps should be at one timelevel behind Q
     implicit none
-    real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd)
     integer              , intent(in   ) :: tdim
     integer              , intent(in   ) :: tl
+    real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd)
     ! local
     real (kind=real_kind) :: mass,mass_new
     real (kind=real_kind) :: qtmp(np,np)
@@ -939,7 +963,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp )
     use edge_mod              , only: edgeVpack, edgeVunpack
     use bndry_mod             , only: bndry_exchangeV
     use control_mod           , only: limiter_option
-    use derivative_openacc_mod, only: divergence_sphere
+    use derivative_mod, only: divergence_sphere_openacc
     use openacc_utils_mod     , only: copy_ondev
     use perf_mod              , only: t_startf, t_stopf
     implicit none
@@ -954,7 +978,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp )
     !$omp barrier
     !$omp master
     !$acc update device(derived_vn0)
-    call divergence_sphere(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1)
+    call divergence_sphere_openacc(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1)
     call copy_ondev(derived_divdp_proj,derived_divdp,product(shape(derived_divdp)))
     !$acc update host(derived_divdp,derived_divdp_proj)
     !$omp end master
@@ -988,7 +1012,6 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp )
     call t_stopf('derived PEU')
   end subroutine precompute_divdp
 
-#endif
-end module prim_advection_openacc_mod
+end module prim_advection_mod
 
 
diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90
new file mode 100644
index 00000000000..0b2027af701
--- /dev/null
+++ b/components/homme/src/preqx_acc/solver_init_mod.F90
@@ -0,0 +1,40 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod
+  !OVERWRITING: solver_init2
+  use solver_init_mod_base, only: 
+  use dimensions_mod, only: nelemd
+  implicit none
+  private
+
+  public :: solver_init2
+
+
+contains
+
+
+  subroutine solver_init2( elem , deriv )
+    use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj
+    use derivative_mod, only: derivative_t
+    implicit none
+    type(element_t)   , intent(in) :: elem(:)
+    type(derivative_t), intent(in) :: deriv
+    integer :: ie
+    !$omp barrier
+    !$omp master
+
+    !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj)
+    !$acc enter data pcopyin(elem(1:nelemd),deriv)
+    do ie = 1 , nelemd
+      !$acc enter data pcopyin(elem(ie)%desc)
+      !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse)
+    enddo
+
+    !$omp end master
+    !$omp barrier
+  end subroutine solver_init2
+
+
+end module solver_init_mod
diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90
new file mode 100644
index 00000000000..4c3471721bb
--- /dev/null
+++ b/components/homme/src/preqx_acc/vertremap_mod.F90
@@ -0,0 +1,11 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module vertremap_mod
+  use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm
+  implicit none
+  private
+
+  public :: remap1, remap1_nofilter, remap_q_ppm
+end module vertremap_mod
diff --git a/components/homme/src/share/openacc/viscosity_openacc_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90
similarity index 54%
rename from components/homme/src/share/openacc/viscosity_openacc_mod.F90
rename to components/homme/src/preqx_acc/viscosity_mod.F90
index bf3cda132b5..4bc0c10960f 100644
--- a/components/homme/src/share/openacc/viscosity_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/viscosity_mod.F90
@@ -3,28 +3,45 @@
 #include "config.h"
 #endif
 
-module viscosity_openacc_mod
-#if USE_OPENACC
-  use kinds, only: real_kind
-  use dimensions_mod, only: np,nlev,qsize,nelemd
+module viscosity_mod
+  use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0
+#ifdef _PRIM
+  use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+  use thread_mod, only : omp_get_num_threads
+  use kinds, only : real_kind, iulog
+  use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac
+  use hybrid_mod, only : hybrid_t, hybrid_create
+  use parallel_mod, only : parallel_t
+  use element_mod, only : element_t
+  use edgetype_mod, only : EdgeBuffer_t, EdgeDescriptor_t
+  use bndry_mod, only : bndry_exchangev, bndry_exchangeS, bndry_exchangeS_start,bndry_exchangeS_finish
+  use control_mod, only : hypervis_scaling, nu, nu_div
+  use perf_mod, only: t_startf, t_stopf
   implicit none
   private
 
-  public :: neighbor_minmax
-  public :: biharmonic_wk_scalar
+  public :: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0
+#ifdef _PRIM
+  public :: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+  public :: biharmonic_wk_scalar_openacc
+  public :: neighbor_minmax_openacc
+
+
 
 contains
 
-  subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
+  subroutine biharmonic_wk_scalar_openacc(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
     use hybrid_mod            , only: hybrid_t
     use element_mod           , only: element_t
     use edgetype_mod          , only: edgeBuffer_t
     use derivative_mod        , only: derivative_t
     use control_mod           , only: hypervis_scaling
     use perf_mod              , only: t_startf, t_stopf
-    use derivative_openacc_mod, only: laplace_sphere_wk
-    use edge_openacc_mod      , only: edgeVpack, edgeVunpack
-    use bndry_openacc_mod     , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
+    use derivative_mod, only: laplace_sphere_wk_openacc
+    use edge_mod      , only: edgeVpack_openacc, edgeVunpack_openacc
+    use bndry_mod     , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
     implicit none
     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ! compute weak biharmonic operator
@@ -47,9 +64,9 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
     if(hypervis_scaling > 0) var_coef1 = .false.
     !$omp barrier
     !$omp master
-    call laplace_sphere_wk(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1)
+    call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1)
     call t_startf('biwksc_PEU')
-    call edgeVpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
+    call edgeVpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
     !$omp end master
     !$omp barrier
 
@@ -59,7 +76,7 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
     
     !$omp barrier
     !$omp master
-    call edgeVunpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
+    call edgeVunpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
     call t_stopf('biwksc_PEU')
     !$acc parallel loop gang vector collapse(5) present(qtens,elem(:))
     do ie = nets , nete
@@ -74,18 +91,18 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
         enddo
       enddo
     enddo
-    call laplace_sphere_wk(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1)
+    call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1)
     !$omp end master
     !$omp barrier
-  end subroutine biharmonic_wk_scalar
+  end subroutine biharmonic_wk_scalar_openacc
 
-  subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
+  subroutine neighbor_minmax_openacc(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
     use hybrid_mod       , only: hybrid_t
     use element_mod      , only: element_t
     use perf_mod         , only: t_startf, t_stopf
     use edgetype_mod     , only: edgeBuffer_t
-    use edge_openacc_mod , only: edgeSpack, edgeSunpackMin, edgeSunpackMax
-    use bndry_openacc_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap
+    use edge_mod , only: edgeSpack_openacc, edgeSunpackMin_openacc, edgeSunpackMax_openacc
+    use bndry_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap
     implicit none
     ! compute Q min&max over the element and all its neighbors
     integer :: nets,nete
@@ -100,8 +117,8 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
     !$omp barrier
     !$omp master
     call t_startf('nmm_PEU')
-    call edgeSpack(edgeMinMax,min_neigh,nlev*qsize,0         ,elem(:),nets,nete,1,1)
-    call edgeSpack(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
+    call edgeSpack_openacc(edgeMinMax,min_neigh,nlev*qsize,0         ,elem(:),nets,nete,1,1)
+    call edgeSpack_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
     !$omp end master
     !$omp barrier
 
@@ -111,13 +128,12 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
        
     !$omp barrier
     !$omp master
-    call edgeSunpackMin(edgeMinMax,min_neigh,nlev*qsize,0         ,elem(:),nets,nete,1,1)
-    call edgeSunpackMax(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
+    call edgeSunpackMin_openacc(edgeMinMax,min_neigh,nlev*qsize,0         ,elem(:),nets,nete,1,1)
+    call edgeSunpackMax_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
     call t_stopf('nmm_PEU')
     !$omp end master
     !$omp barrier
-  end subroutine neighbor_minmax
+  end subroutine neighbor_minmax_openacc
 
-#endif
-end module viscosity_openacc_mod
+end module viscosity_mod
 
diff --git a/components/homme/src/share/arch_switch_mod.F90 b/components/homme/src/share/arch_switch_mod.F90
deleted file mode 100644
index 8720652254f..00000000000
--- a/components/homme/src/share/arch_switch_mod.F90
+++ /dev/null
@@ -1,29 +0,0 @@
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-module arch_switch_mod
-#if USE_OPENACC
-  use prim_advection_openacc_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap
-  use openacc_utils_mod, only: arch_init2
-#else
-  use prim_advection_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap
-#endif
-  implicit none
-
-contains
-
-#if (! USE_OPENACC)
-  subroutine arch_init2( elem , deriv )
-    use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj
-    use derivative_mod, only: derivative_t
-    implicit none
-    type(element_t)   , intent(in) :: elem(:)
-    type(derivative_t), intent(in) :: deriv
-    !CPU case, nothing to do
-  end subroutine arch_init2
-#endif
-
-end module arch_switch_mod
-
diff --git a/components/homme/src/share/bndry_mod.F90 b/components/homme/src/share/bndry_mod_base.F90
similarity index 99%
rename from components/homme/src/share/bndry_mod.F90
rename to components/homme/src/share/bndry_mod_base.F90
index 3be3590cf5e..998c80d2952 100644
--- a/components/homme/src/share/bndry_mod.F90
+++ b/components/homme/src/share/bndry_mod_base.F90
@@ -3,7 +3,7 @@
 #endif
 
 
-module bndry_mod
+module bndry_mod_base
   use parallel_mod, only : syncmp,parallel_t,abortmp,iam
   use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t
   use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads
@@ -1140,4 +1140,4 @@ subroutine sort_neighbor_buffer_mapping(par,elem,nets,nete)
 
 
 
-end module bndry_mod
+end module bndry_mod_base
diff --git a/components/homme/src/share/derivative_mod.F90 b/components/homme/src/share/derivative_mod_base.F90
similarity index 99%
rename from components/homme/src/share/derivative_mod.F90
rename to components/homme/src/share/derivative_mod_base.F90
index 7b2f1fce0a1..0c89625a739 100644
--- a/components/homme/src/share/derivative_mod.F90
+++ b/components/homme/src/share/derivative_mod_base.F90
@@ -3,7 +3,7 @@
 #endif
 
 
-module derivative_mod
+module derivative_mod_base
   use kinds, only : real_kind, longdouble_kind
   use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev
   use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi
@@ -2810,4 +2810,4 @@ end subroutine limiter_optim_iter_full
 
 
 
-end module derivative_mod
+end module derivative_mod_base
diff --git a/components/homme/src/share/edge_mod.F90 b/components/homme/src/share/edge_mod_base.F90
similarity index 99%
rename from components/homme/src/share/edge_mod.F90
rename to components/homme/src/share/edge_mod_base.F90
index 2205a27d5eb..6cd64ca3f66 100644
--- a/components/homme/src/share/edge_mod.F90
+++ b/components/homme/src/share/edge_mod_base.F90
@@ -2,7 +2,7 @@
 #include "config.h"
 #endif
 
-module edge_mod
+module edge_mod_base
 
   use kinds, only : int_kind, log_kind, real_kind
   use dimensions_mod, only : max_neigh_edges, nelemd
@@ -4792,7 +4792,7 @@ subroutine ghostVunpack3d(g, v, vlyr, kptr, desc, sw, se, nw, ne, mult)
   end subroutine ghostVunpack3d
 
 
-End module edge_mod
+End module edge_mod_base
 
 #if 0
 #ifndef HAVE_F2003_PTR_BND_REMAP
diff --git a/components/homme/src/share/element_mod.F90 b/components/homme/src/share/element_mod.F90
index 84811e50676..d6ce91490ac 100644
--- a/components/homme/src/share/element_mod.F90
+++ b/components/homme/src/share/element_mod.F90
@@ -36,7 +36,7 @@ module element_mod
     real (kind=real_kind) :: ps_v(np,np,timelevels)                   ! surface pressure                   4
     real (kind=real_kind) :: phis(np,np)                              ! surface geopotential (prescribed)  5
     real (kind=real_kind) :: Q   (np,np,nlev,qsize_d)                 ! Tracer concentration               6
-    real (kind=real_kind), pointer :: Qdp (:,:,:,:,:)  ! Tracer mass                        7  (np,np,nlev,qsize_d,2)   
+    real (kind=real_kind), pointer :: Qdp (:,:,:,:,:)  ! Tracer mass                        7  (np,np,nlev,qsize,2)   
   end type elem_state_t
 
   integer(kind=int_kind),public,parameter::StateComponents=8  ! num prognistics variables (for prim_restart_mod.F90)
@@ -564,12 +564,12 @@ end subroutine allocate_element_desc
 
   !___________________________________________________________________
   subroutine setup_element_pointers(elem)
-    use dimensions_mod, only: nelemd
+    use dimensions_mod, only: nelemd, qsize
     implicit none
     type(element_t), intent(inout) :: elem(:)
 #if USE_OPENACC
     integer :: ie
-    allocate( state_Qdp                (np,np,nlev,qsize_d,2,nelemd)          )
+    allocate( state_Qdp                (np,np,nlev,qsize,2,nelemd)            )
     allocate( derived_vn0              (np,np,2,nlev,nelemd)                  )
     allocate( derived_divdp            (np,np,nlev,nelemd)                    )
     allocate( derived_divdp_proj       (np,np,nlev,nelemd)                    )
diff --git a/components/homme/src/share/prim_advection_mod.F90 b/components/homme/src/share/prim_advection_mod_base.F90
similarity index 73%
rename from components/homme/src/share/prim_advection_mod.F90
rename to components/homme/src/share/prim_advection_mod_base.F90
index 11369232d72..81620cd9784 100644
--- a/components/homme/src/share/prim_advection_mod.F90
+++ b/components/homme/src/share/prim_advection_mod_base.F90
@@ -4,827 +4,39 @@
 #define NEWEULER_B4B 1
 #define OVERLAP 1
 
-      module EXTRAE_MODULE
-
-     interface
-
-         subroutine extrae_user_function (enter)
-         integer*4, intent(in) :: enter
-         end subroutine extrae_user_function
-
-      end interface
-
-      end module EXTRAE_MODULE
-
-#if 0
-SUBROUTINES:
-   prim_advec_tracers_remap_rk2()
-      SEM 2D RK2 + monotone remap + hyper viscosity
-      SEM 2D RK2 can use sign-preserving or monotone reconstruction
-
-Notes on Lagrange+REMAP advection
-dynamics will compute mean fluxes, so that (i.e. for qsplit=3)
-
-    dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3)  + 3dt D(dpdiss_sum/3)
-
-Where the floating lagrangian component:
-    dp_star(t+3) = dp(t)  -3dt div(Udp_sum/3)  + 3dt D(dpdiss_sum/3)
-OR:
-    dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) )
-
-
-For RK2 advection of Q:  (example of 2 stage RK for tracers):   dtq = qsplit*dt
-For consistency, if Q=1
-  dp1  = dp(t)- dtq div[ U1 dp(t)]
-  dp2  = dp1  - dtq div[ U2 dp1  ]  + 2*dtq D( dpdiss_ave )
-  dp*  = (dp(t) + dp2 )/2
-       =  dp(t) - dtq  div[ U1 dp(t) + U2 dp1 ]/2   + dtq D( dpdiss_ave )
-
-so we require:
-  U1 = Udp_ave / dp(t)
-  U2 = Udp_ave / dp1
-
-For tracer advection:
-  Qdp1  = Qdp(t)- dtq div[ U1 Qdp(t)]
-  Qdp2  = Qdp1  - dtq div[ U2 Qdp1  ]  + 2*dtq D( Q dpdiss_ave )
-  Qdp*  = (Qdp(t) + Qdp2 )/2
-       =  Qdp(t) - dtq  div[ U1 Qdp(t) + U2 Qdp1 ]   + dtq D( Q dpdiss_ave )
-
-Qdp1:  limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS)      with dp1 as computed above
-Qdp2:  limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS)      with dp2 as computed above
-
-For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS
-
-
-last step:
-  remap Qdp* to Qdp(t+1)   [ dp_star(t+1) -> dp(t+1) ]
-
-#endif
-
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!! Begin GPU remap module  !!
-!! by Rick Archibald, 2010  !!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-module vertremap_mod
-
-  !**************************************************************************************
-  !
-  !  Purpose:
-  !        Construct sub-grid-scale polynomials using piecewise spline method with
-  !        monotone filters.
-  !
-  !  References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR)
-  !              PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF)
-  !
-  !**************************************************************************************
-
-  use kinds, only                  : real_kind,int_kind
-  use dimensions_mod, only         : np,nlev,qsize,nlevp,npsq,ntrac,nc
-  use hybvcoord_mod, only          : hvcoord_t
-  use element_mod, only            : element_t
-  use fvm_control_volume_mod, only : fvm_struct
-  use spelt_mod, only              : spelt_struct
-  use perf_mod, only               : t_startf, t_stopf  ! _EXTERNAL
-  use parallel_mod, only           : abortmp, parallel_t
-  use control_mod, only : vert_remap_q_alg
-
-  public remap1                  ! remap any field, splines, monotone
-  public remap1_nofilter         ! remap any field, splines, no filter
-! todo: tweak interface to match remap1 above, rename remap1_ppm:
-  public remap_q_ppm             ! remap state%Q, PPM, monotone
-
-  contains
-
-!=======================================================================================================!
-
-!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid
-!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task.
-subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref )
-  implicit none
-  type(hvcoord_t)      , intent(in   ) :: hvcoord               !Derived type to hold vertical sigma grid parameters
-  real(kind=real_kind) , intent(in   ) :: ps                    !Surface pressure for this column
-  real(kind=real_kind) , intent(in   ) :: dt                    !Time step
-  real(kind=real_kind) , intent(in   ) :: eta_dot_dpdn(nlev+1)  !Looks like a vertical pressure flux
-                                                                !to compute deformed grid spacing
-  real(kind=real_kind) , intent(  out) :: p_lag(nlev+1)         !Pressures at interfaces of the Lagrangian deformed grid
-  real(kind=real_kind) , intent(  out) :: p_ref(nlev+1)         !Pressures at interfaces of the reference grid
-  real(kind=real_kind) , intent(  out) :: dp_lag(nlev)          !Pressure differences on Lagrangian deformed grid
-  real(kind=real_kind) , intent(  out) :: dp_ref(nlev)          !Pressure differences on reference grid
-  integer :: k                                                  !Iterator
-  p_ref(1) = 0  !Both grids have a model top pressure of zero
-  p_lag(1) = 0  !Both grids have a model top pressure of zero
-  do k = 1 , nlev
-    dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
-         ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps  !Reference pressure difference
-    ! Lagrangian pressure difference (flux in - flux out over the time step)
-    dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) )
-    p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell
-    p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell
-  enddo
-end subroutine remap_calc_grids
-
-!=======================================================================================================!
-
-
-
-subroutine remap1(Qdp,nx,qsize,dp1,dp2)
-  ! remap 1 field
-  ! input:  Qdp   field to be remapped (NOTE: MASS, not MIXING RATIO)
-  !         dp1   layer thickness (source)
-  !         dp2   layer thickness (target)
-  !
-  ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp
-  !
-  implicit none
-  integer, intent(in) :: nx,qsize
-  real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
-  real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
-  ! ========================
-  ! Local Variables
-  ! ========================
-
-  real (kind=real_kind), dimension(nlev+1)    :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
-  real (kind=real_kind), dimension(nlev)      :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
-  real (kind=real_kind)  :: f_xm,level1,level2,level3,level4,level5, &
-                            peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
-                            zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
-  integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
-                            lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
-  logical :: abort=.false.
-
-  if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then
-     call remap_Q_ppm(qdp,nx,qsize,dp1,dp2)
-     return
-  endif
-
-  call t_startf('remap_Q_noppm')
-#if (defined COLUMN_OPENMP)
-!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
-!$omp    private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
-!$omp    private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
-!$omp    private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
-#endif
-  do q=1,qsize
-  do i=1,nx
-    do j=1,nx
-
-      z1c(1)=0 ! source grid
-      z2c(1)=0 ! target grid
-      do k=1,nlev
-         z1c(k+1)=z1c(k)+dp1(i,j,k)
-         z2c(k+1)=z2c(k)+dp2(i,j,k)
-      enddo
-
-      zv(1)=0
-      do k=1,nlev
-        Qcol(k)=Qdp(i,j,k,q)!  *(z1c(k+1)-z1c(k)) input is mass
-        zv(k+1) = zv(k)+Qcol(k)
-      enddo
-
-      if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
-        write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
-        write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN    '
-        write(6,*) 'DATA FOR MODEL LEVELS'
-        write(6,*) 'PLEVMODEL=',z2c(nlev+1)
-        write(6,*) 'PLEV     =',z1c(nlev+1)
-        write(6,*) 'DIFF     =',z2c(nlev+1)-z1c(nlev+1)
-        abort=.true.
-      endif
-
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! quadratic splies with UK met office monotonicity constraints  !!
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-      zkr  = 99
-      ilev = 2
-      zkr(1) = 1
-      zkr(nlev+1) = nlev
-      kloop: do k = 2,nlev
-        do jk = ilev,nlev+1
-          if (z1c(jk).ge.z2c(k)) then
-            ilev      = jk
-            zkr(k)   = jk-1
-            cycle kloop
-          endif
-        enddo
-      enddo kloop
-
-      zgam  = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
-      zgam(1)      = 0.0
-      zgam(nlev+1) = 1.0
-      zhdp = z1c(2:nlev+1)-z1c(1:nlev)
-
-
-      h = 1/zhdp
-      zarg = Qcol * h
-      rhs = 0
-      lower_diag = 0
-      diag = 0
-      upper_diag = 0
-
-      rhs(1)=3*zarg(1)
-      rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
-      rhs(nlev+1)=3*zarg(nlev)
-
-      lower_diag(1)=1
-      lower_diag(2:nlev) = h(1:nlev-1)
-      lower_diag(nlev+1)=1
-
-      diag(1)=2
-      diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
-      diag(nlev+1)=2
-
-      upper_diag(1)=1
-      upper_diag(2:nlev) = h(2:nlev)
-      upper_diag(nlev+1)=0
-
-      q_diag(1)=-upper_diag(1)/diag(1)
-      rhs(1)= rhs(1)/diag(1)
-
-      do k=2,nlev+1
-        tmp_cal    =  1/(diag(k)+lower_diag(k)*q_diag(k-1))
-        q_diag(k) = -upper_diag(k)*tmp_cal
-        rhs(k) =  (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
-      enddo
-      do k=nlev,1,-1
-        rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
-      enddo
-
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !!  monotonicity modifications  !!
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-      filter_code = 0
-      dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1)
-      dy(nlev) = dy(nlev-1)
-
-      dy = merge(zero, dy, abs(dy) < tiny )
-
-      do k=1,nlev
-        im1=MAX(1,k-1)
-        im2=MAX(1,k-2)
-        im3=MAX(1,k-3)
-        ip1=MIN(nlev,k+1)
-        t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0)
-        t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 &
-             .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 )
-        t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k)))
-
-        filter_code(k) = merge(0,1,t1+t2 > 0)
-        rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1))
-        filter_code(im1) = MAX(filter_code(im1),filter_code(k))
-      enddo
-
-      rhs = merge(qmax,rhs,rhs > qmax)
-      rhs = merge(zero,rhs,rhs < zero)
-
-      za0 = rhs(1:nlev)
-      za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
-      za2 =  3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
-
-      dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev)
-      dy = merge(zero, dy, abs(dy) < tiny )
-
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! Compute the 3 quadratic spline coeffients {za0, za1, za2}				   !!
-      !! knowing the quadratic spline parameters {rho_left,rho_right,zarg}		   !!
-      !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002).   !!
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-      h = rhs(2:nlev+1)
-
-      do k=1,nlev
-        xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny)
-        xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny)
-        f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2
-
-        t1 = merge(1,0,ABS(za2(k)) > tiny)
-        t2 = merge(1,0,xm <= zero .OR. xm >= 1)
-        t3 = merge(1,0,za2(k) > zero)
-        t4 = merge(1,0,za2(k) < zero)
-        tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2)
-        tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3)
-
-        peaks=0
-        peaks = merge(-1,peaks,tm .EQ. 1)
-        peaks = merge(+1,peaks,tp .EQ. 1)
-        peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1)
-        peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1)
-
-        im1=MAX(1,k-1)
-        im2=MAX(1,k-2)
-        ip1=MIN(nlev,k+1)
-        ip2=MIN(nlev,k+2)
-
-        t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. &
-             (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. &
-             (dy(im1)*float(peaks) <= tiny))
-
-        filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. &
-             (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny))
-
-        if (filter_code(k) > 0) then
-          level1 = rhs(k)
-          level2 = (2*rhs(k)+h(k))/3
-          level3 = 0.5*(rhs(k)+h(k))
-          level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k)
-          level5 = h(k)
-
-          t1 = merge(1,0,h(k) >= rhs(k))
-          t2 = merge(1,0,zarg(k) <= level1 .OR.  zarg(k) >= level5)
-          t3 = merge(1,0,zarg(k) >  level1 .AND. zarg(k) <  level2)
-          t4 = merge(1,0,zarg(k) >  level4 .AND. zarg(k) <  level5)
-
-          lt1 = t1*t2
-          lt2 = t1*(1-t2+t3)
-          lt3 = t1*(1-t2+1-t3+t4)
-
-          za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
-          za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
-          za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
-
-          za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
-          za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
-          za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
-
-          za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
-          za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
-          za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
-
-          t2 = merge(1,0,zarg(k) >= level1 .OR.  zarg(k) <= level5)
-          t3 = merge(1,0,zarg(k) <  level1 .AND. zarg(k) >  level2)
-          t4 = merge(1,0,zarg(k) <  level4 .AND. zarg(k) >  level5)
-
-          lt1 = (1-t1)*t2
-          lt2 = (1-t1)*(1-t2+t3)
-          lt3 = (1-t1)*(1-t2+1-t3+t4)
-
-          za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
-          za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
-          za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
-
-          za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
-          za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
-          za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
-
-          za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
-          za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
-          za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
-        endif
-      enddo
-
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! start iteration from top to bottom of atmosphere !!
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-      zv1 = 0
-      do k=1,nlev
-        if (zgam(k+1)>1d0) then
-          WRITE(*,*) 'r not in [0:1]', zgam(k+1)
-          abort=.true.
-        endif
-        zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
-             (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
-        Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
-        zv1 = zv2
-      enddo
-    enddo
-  enddo
-  enddo ! q loop
-  if (abort) call abortmp('Bad levels in remap1.  usually CFL violatioin')
-  call t_stopf('remap_Q_noppm')
-
-end subroutine remap1
-
-subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2)
-  ! remap 1 field
-  ! input:  Qdp   field to be remapped (NOTE: MASS, not MIXING RATIO)
-  !         dp1   layer thickness (source)
-  !         dp2   layer thickness (target)
-  !
-  ! output: remaped Qdp, conserving mass
-  !
-  implicit none
-  integer, intent(in) :: nx,qsize
-  real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
-  real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
-  ! ========================
-  ! Local Variables
-  ! ========================
-
-  real (kind=real_kind), dimension(nlev+1)    :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
-  real (kind=real_kind), dimension(nlev)      :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
-  real (kind=real_kind)  :: f_xm,level1,level2,level3,level4,level5, &
-                            peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
-                            zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
-  integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
-                            lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
-  logical :: abort=.false.
-!   call t_startf('remap1_nofilter')
-
-#if (defined COLUMN_OPENMP)
-!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
-!$omp    private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
-!$omp    private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
-!$omp    private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
-#endif
-  do q=1,qsize
-  do i=1,nx
-    do j=1,nx
-
-      z1c(1)=0 ! source grid
-      z2c(1)=0 ! target grid
-      do k=1,nlev
-         z1c(k+1)=z1c(k)+dp1(i,j,k)
-         z2c(k+1)=z2c(k)+dp2(i,j,k)
-      enddo
-
-      zv(1)=0
-      do k=1,nlev
-        Qcol(k)=Qdp(i,j,k,q)!  *(z1c(k+1)-z1c(k)) input is mass
-        zv(k+1) = zv(k)+Qcol(k)
-      enddo
-
-      if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
-        write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
-        write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN    '
-        write(6,*) 'DATA FOR MODEL LEVELS'
-        write(6,*) 'PLEVMODEL=',z2c(nlev+1)
-        write(6,*) 'PLEV     =',z1c(nlev+1)
-        write(6,*) 'DIFF     =',z2c(nlev+1)-z1c(nlev+1)
-        abort=.true.
-      endif
-
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! quadratic splies with UK met office monotonicity constraints  !!
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-      zkr  = 99
-      ilev = 2
-      zkr(1) = 1
-      zkr(nlev+1) = nlev
-      kloop: do k = 2,nlev
-        do jk = ilev,nlev+1
-          if (z1c(jk).ge.z2c(k)) then
-            ilev      = jk
-            zkr(k)   = jk-1
-            cycle kloop
-          endif
-        enddo
-      enddo kloop
-
-      zgam  = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
-      zgam(1)      = 0.0
-      zgam(nlev+1) = 1.0
-      zhdp = z1c(2:nlev+1)-z1c(1:nlev)
-
-
-      h = 1/zhdp
-      zarg = Qcol * h
-      rhs = 0
-      lower_diag = 0
-      diag = 0
-      upper_diag = 0
-
-      rhs(1)=3*zarg(1)
-      rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
-      rhs(nlev+1)=3*zarg(nlev)
-
-      lower_diag(1)=1
-      lower_diag(2:nlev) = h(1:nlev-1)
-      lower_diag(nlev+1)=1
-
-      diag(1)=2
-      diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
-      diag(nlev+1)=2
-
-      upper_diag(1)=1
-      upper_diag(2:nlev) = h(2:nlev)
-      upper_diag(nlev+1)=0
-
-      q_diag(1)=-upper_diag(1)/diag(1)
-      rhs(1)= rhs(1)/diag(1)
-
-      do k=2,nlev+1
-        tmp_cal    =  1/(diag(k)+lower_diag(k)*q_diag(k-1))
-        q_diag(k) = -upper_diag(k)*tmp_cal
-        rhs(k) =  (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
-      enddo
-      do k=nlev,1,-1
-        rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
-      enddo
-
-      za0 = rhs(1:nlev)
-      za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
-      za2 =  3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
-
-
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-      !! start iteration from top to bottom of atmosphere !!
-      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-      zv1 = 0
-      do k=1,nlev
-        if (zgam(k+1)>1d0) then
-          WRITE(*,*) 'r not in [0:1]', zgam(k+1)
-          abort=.true.
-        endif
-        zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
-             (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
-        Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
-        zv1 = zv2
-      enddo
-    enddo
-  enddo
-  enddo ! q loop
-  if (abort) call abortmp('Bad levels in remap1_nofilter.  usually CFL violatioin')
-!   call t_stopf('remap1_nofilter')
-end subroutine remap1_nofilter
-
-!=======================================================================================================!
-
-
-!This uses the exact same model and reference grids and data as remap_Q, but it interpolates
-!using PPM instead of splines.
-subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2)
-  ! remap 1 field
-  ! input:  Qdp   field to be remapped (NOTE: MASS, not MIXING RATIO)
-  !         dp1   layer thickness (source)
-  !         dp2   layer thickness (target)
-  !
-  ! output: remaped Qdp, conserving mass
-  !
-  use control_mod, only        : vert_remap_q_alg
-  implicit none
-  integer,intent(in) :: nx,qsize
-  real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
-  real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
-  ! Local Variables
-  integer, parameter :: gs = 2                              !Number of cells to place in the ghost region
-  real(kind=real_kind), dimension(       nlev+2 ) :: pio    !Pressure at interfaces for old grid
-  real(kind=real_kind), dimension(       nlev+1 ) :: pin    !Pressure at interfaces for new grid
-  real(kind=real_kind), dimension(       nlev+1 ) :: masso  !Accumulate mass up to each interface
-  real(kind=real_kind), dimension(  1-gs:nlev+gs) :: ao     !Tracer value on old grid
-  real(kind=real_kind), dimension(  1-gs:nlev+gs) :: dpo    !change in pressure over a cell for old grid
-  real(kind=real_kind), dimension(  1-gs:nlev+gs) :: dpn    !change in pressure over a cell for old grid
-  real(kind=real_kind), dimension(3,     nlev   ) :: coefs  !PPM coefficients within each cell
-  real(kind=real_kind), dimension(       nlev   ) :: z1, z2
-  real(kind=real_kind) :: ppmdx(10,0:nlev+1)  !grid spacings
-  real(kind=real_kind) :: mymass, massn1, massn2
-  integer :: i, j, k, q, kk, kid(nlev)
-
-  call t_startf('remap_Q_ppm')
-  do j = 1 , nx
-    do i = 1 , nx
-
-      pin(1)=0
-      pio(1)=0
-      do k=1,nlev
-         dpn(k)=dp2(i,j,k)
-         dpo(k)=dp1(i,j,k)
-         pin(k+1)=pin(k)+dpn(k)
-         pio(k+1)=pio(k)+dpo(k)
-      enddo
-
-
-
-      pio(nlev+2) = pio(nlev+1) + 1.  !This is here to allow an entire block of k threads to run in the remapping phase.
-                                      !It makes sure there's an old interface value below the domain that is larger.
-      pin(nlev+1) = pio(nlev+1)       !The total mass in a column does not change.
-                                      !Therefore, the pressure of that mass cannot either.
-      !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence.
-      do k = 1 , gs
-        dpo(1   -k) = dpo(       k)
-        dpo(nlev+k) = dpo(nlev+1-k)
-      enddo
-
-      !Compute remapping intervals once for all tracers. Find the old grid cell index in which the
-      !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new
-      !interface location. In practice, the grid never deforms past one cell, so the search can be
-      !simplified by this. Also, the interval of integration is usually of magnitude close to zero
-      !or close to dpo because of minimial deformation.
-      !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so
-      !I set them equal to each other.
-      do k = 1 , nlev
-        kk = k  !Keep from an order n^2 search operation by assuming the old cell index is close.
-        !Find the index of the old grid cell in which this new cell's bottom interface resides.
-        do while ( pio(kk) <= pin(k+1) )
-          kk = kk + 1
-        enddo
-        kk = kk - 1                   !kk is now the cell index we're integrating over.
-        if (kk == nlev+1) kk = nlev   !This is to keep the indices in bounds.
-                                      !Top bounds match anyway, so doesn't matter what coefficients are used
-        kid(k) = kk                   !Save for reuse
-        z1(k) = -0.5D0                !This remapping assumes we're starting from the left interface of an old grid cell
-                                      !In fact, we're usually integrating very little or almost all of the cell in question
-        z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk)  !PPM interpolants are normalized to an independent
-                                                                        !coordinate domain [-0.5,0.5].
-      enddo
-
-      !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the
-      !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop.
-      ppmdx(:,:) = compute_ppm_grids( dpo )
-
-      !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and
-      !mass accumulation
-      do q = 1 , qsize
-        !Accumulate the old mass up to old grid cell interface locations to simplify integration
-        !during remapping. Also, divide out the grid spacing so we're working with actual tracer
-        !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure
-        !tracer consistency for an initially uniform field. I copied it from the old remap routine.
-        masso(1) = 0.
-        do k = 1 , nlev
-          ao(k) = Qdp(i,j,k,q)
-          masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping
-          ao(k) = ao(k) / dpo(k)        !Divide out the old grid spacing because we want the tracer mixing ratio, not mass.
-        enddo
-        !Fill in ghost values. Ignored if vert_remap_q_alg == 2
-        do k = 1 , gs
-          ao(1   -k) = ao(       k)
-          ao(nlev+k) = ao(nlev+1-k)
-        enddo
-        !Compute monotonic and conservative PPM reconstruction over every cell
-        coefs(:,:) = compute_ppm( ao , ppmdx )
-        !Compute tracer values on the new grid by integrating from the old cell bottom to the new
-        !cell interface to form a new grid mass accumulation. Taking the difference between
-        !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is
-        !supposed to hold the full mass this needs no normalization.
-        massn1 = 0.
-        do k = 1 , nlev
-          kk = kid(k)
-          massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk)
-          Qdp(i,j,k,q) = massn2 - massn1
-          massn1 = massn2
-        enddo
-      enddo
-    enddo
-  enddo
-  call t_stopf('remap_Q_ppm')
-end subroutine remap_Q_ppm
-
-
-!=======================================================================================================!
-
-
-!THis compute grid-based coefficients from Collela & Woodward 1984.
-function compute_ppm_grids( dx )   result(rslt)
-  use control_mod, only: vert_remap_q_alg
-  implicit none
-  real(kind=real_kind), intent(in) :: dx(-1:nlev+2)  !grid spacings
-  real(kind=real_kind)             :: rslt(10,0:nlev+1)  !grid spacings
-  integer :: j
-  integer :: indB, indE
-
-  !Calculate grid-based coefficients for stage 1 of compute_ppm
-  if (vert_remap_q_alg == 2) then
-    indB = 2
-    indE = nlev-1
-  else
-    indB = 0
-    indE = nlev+1
-  endif
-  do j = indB , indE
-    rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) )
-    rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) )
-    rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) )
-  enddo
-
-  !Caculate grid-based coefficients for stage 2 of compute_ppm
-  if (vert_remap_q_alg == 2) then
-    indB = 2
-    indE = nlev-2
-  else
-    indB = 0
-    indE = nlev
-  endif
-  do j = indB , indE
-    rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) )
-    rslt( 5,j) = 1. / sum( dx(j-1:j+2) )
-    rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) )
-    rslt( 7,j) = ( dx(j-1) + dx(j  ) ) / ( 2. * dx(j  ) + dx(j+1) )
-    rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j  ) )
-    rslt( 9,j) = dx(j  ) * ( dx(j-1) + dx(j  ) ) / ( 2.*dx(j  ) +    dx(j+1) )
-    rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / (    dx(j  ) + 2.*dx(j+1) )
-  enddo
-end function compute_ppm_grids
-
-!=======================================================================================================!
-
-
-
-!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages
-function compute_ppm( a , dx )    result(coefs)
-  use control_mod, only: vert_remap_q_alg
-  implicit none
-  real(kind=real_kind), intent(in) :: a    (    -1:nlev+2)  !Cell-mean values
-  real(kind=real_kind), intent(in) :: dx   (10,  0:nlev+1)  !grid spacings
-  real(kind=real_kind) ::             coefs(0:2,   nlev  )  !PPM coefficients (for parabola)
-  real(kind=real_kind) :: ai (0:nlev  )                     !fourth-order accurate, then limited interface values
-  real(kind=real_kind) :: dma(0:nlev+1)                     !An expression from Collela's '84 publication
-  real(kind=real_kind) :: da                                !Ditto
-  ! Hold expressions based on the grid (which are cumbersome).
-  real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10
-  real(kind=real_kind) :: al, ar                            !Left and right interface values for cell-local limiting
-  integer :: j
-  integer :: indB, indE
-
-  ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain
-  if (vert_remap_q_alg == 2) then
-    indB = 2
-    indE = nlev-1
-  else
-    indB = 0
-    indE = nlev+1
-  endif
-  do j = indB , indE
-    da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) )
-    dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da)
-    if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0.
-  enddo
-
-  ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1)
-  if (vert_remap_q_alg == 2) then
-    indB = 2
-    indE = nlev-2
-  else
-    indB = 0
-    indE = nlev
-  endif
-  do j = indB , indE
-    ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) &
-         * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) )
-  enddo
-
-  ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain
-  ! (dimension nlev) using ai on either side and ao within the cell.
-  if (vert_remap_q_alg == 2) then
-    indB = 3
-    indE = nlev-2
-  else
-    indB = 1
-    indE = nlev
-  endif
-  do j = indB , indE
-    al = ai(j-1)
-    ar = ai(j  )
-    if ( (ar - a(j)) * (a(j) - al) <= 0. ) then
-      al = a(j)
-      ar = a(j)
-    endif
-    if ( (ar - al) * (a(j) - (al + ar)/2.) >  (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar
-    if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al
-    !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx
-    coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4.
-    coefs(1,j) = ar - al
-    coefs(2,j) = -6. * a(j) + 3. * ( al + ar )
-  enddo
-
-  !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom
-  !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth
-  !moment to the cell mean is sufficient to maintain conservation.
-  if (vert_remap_q_alg == 2) then
-    coefs(0,1:2) = a(1:2)
-    coefs(1:2,1:2) = 0.
-    coefs(0,nlev-1:nlev) = a(nlev-1:nlev)
-    coefs(1:2,nlev-1:nlev) = 0.D0
-  endif
-end function compute_ppm
-
-!=======================================================================================================!
-
-
-!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx,
-!given two bounds. Make sure this gets inlined during compilation.
-function integrate_parabola( a , x1 , x2 )    result(mass)
-  implicit none
-  real(kind=real_kind), intent(in) :: a(0:2)  !Coefficients of the parabola
-  real(kind=real_kind), intent(in) :: x1      !lower domain bound for integration
-  real(kind=real_kind), intent(in) :: x2      !upper domain bound for integration
-  real(kind=real_kind)             :: mass
-  mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1
-end function integrate_parabola
-
-
-!=============================================================================================!
-
-
-
-end module vertremap_mod
-
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!! End GPU remap module    !!
-!! by Rick Archibald, 2010  !!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-!=======================================================================================================!
-
-
+!SUBROUTINES:
+!   prim_advec_tracers_remap_rk2()
+!      SEM 2D RK2 + monotone remap + hyper viscosity
+!      SEM 2D RK2 can use sign-preserving or monotone reconstruction
+!
+!For RK2 advection of Q:  (example of 2 stage RK for tracers):   dtq = qsplit*dt
+!For consistency, if Q=1
+!  dp1  = dp(t)- dtq div[ U1 dp(t)]
+!  dp2  = dp1  - dtq div[ U2 dp1  ]  + 2*dtq D( dpdiss_ave )
+!  dp*  = (dp(t) + dp2 )/2
+!       =  dp(t) - dtq  div[ U1 dp(t) + U2 dp1 ]/2   + dtq D( dpdiss_ave )
+!
+!so we require:
+!  U1 = Udp_ave / dp(t)
+!  U2 = Udp_ave / dp1
+!
+!For tracer advection:
+!  Qdp1  = Qdp(t)- dtq div[ U1 Qdp(t)]
+!  Qdp2  = Qdp1  - dtq div[ U2 Qdp1  ]  + 2*dtq D( Q dpdiss_ave )
+!  Qdp*  = (Qdp(t) + Qdp2 )/2
+!       =  Qdp(t) - dtq  div[ U1 Qdp(t) + U2 Qdp1 ]   + dtq D( Q dpdiss_ave )
+!
+!Qdp1:  limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS)      with dp1 as computed above
+!Qdp2:  limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS)      with dp2 as computed above
+!
+!For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS
+!
+!
+!last step:
+!  remap Qdp* to Qdp(t+1)   [ dp_star(t+1) -> dp(t+1) ]
 
 
-module prim_advection_mod
+module prim_advection_mod_base
 !
 ! two formulations.  both are conservative
 ! u grad Q formulation:
@@ -3068,4 +2280,4 @@ subroutine vertical_remap(hybrid,elem,fvm,hvcoord,dt,np1,np1_qdp,np1_fvm,nets,ne
   call t_stopf('vertical_remap')
   end subroutine vertical_remap
 
-end module prim_advection_mod
+end module prim_advection_mod_base
diff --git a/components/homme/src/share/prim_driver_mod.F90 b/components/homme/src/share/prim_driver_mod.F90
index a4e8b23fa48..9c54f0454dc 100644
--- a/components/homme/src/share/prim_driver_mod.F90
+++ b/components/homme/src/share/prim_driver_mod.F90
@@ -93,7 +93,7 @@ subroutine prim_init1(elem, fvm, par, dom_mt, Tl)
     ! --------------------------------
     use schedule_mod, only : genEdgeSched,  PrintSchedule
     ! --------------------------------
-    use arch_switch_mod, only: prim_advec_init1
+    use prim_advection_mod, only: prim_advec_init1
     ! --------------------------------
     use prim_advance_mod, only: prim_advance_init
     ! --------------------------------
@@ -608,7 +608,8 @@ subroutine prim_init2(elem, fvm, hybrid, nets, nete, tl, hvcoord)
     use derivative_mod, only : derivinit, interpolate_gll2fvm_points, interpolate_gll2spelt_points, v2pinit
     use global_norms_mod, only : test_global_integral, print_cfl
     use hybvcoord_mod, only : hvcoord_t
-    use arch_switch_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv, arch_init2
+    use prim_advection_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv
+    use solver_init_mod, only: solver_init2
 #ifdef CAM
 #else
     use column_model_mod, only : InitColumnModel
@@ -1101,7 +1102,7 @@ end subroutine noxinit
     if (hybrid%masterthread) write(iulog,*) "initial state:"
     call prim_printstate(elem, tl, hybrid,hvcoord,nets,nete, fvm)
 
-    call arch_init2(elem(:), deriv(hybrid%ithr))
+    call solver_init2(elem(:), deriv(hybrid%ithr))
     call Prim_Advec_Init2(elem(:), hvcoord, hybrid)
 
   end subroutine prim_init2
@@ -1161,7 +1162,7 @@ subroutine prim_run(elem, hybrid,nets,nete, dt, tl, hvcoord, advance_name)
     use control_mod, only: statefreq, integration, ftype, qsplit, disable_diagnostics
     use prim_advance_mod, only : prim_advance_exp, prim_advance_si, preq_robert3
     use prim_state_mod, only : prim_printstate, prim_diag_scalars, prim_energy_halftimes
-    use arch_switch_mod, only: deriv
+    use prim_advection_mod, only: deriv
     use parallel_mod, only : abortmp
 #ifndef CAM
     use column_model_mod, only : ApplyColumnModel
@@ -1617,7 +1618,7 @@ subroutine prim_step(elem, fvm, hybrid,nets,nete, dt, tl, hvcoord, compute_diagn
     use fvm_bsp_mod, only : get_boomerang_velocities_gll, get_solidbody_velocities_gll
     use prim_advance_mod, only : prim_advance_exp, overwrite_SEdensity
     use prim_advection_mod, only : prim_advec_tracers_fvm
-    use arch_switch_mod, only : prim_advec_tracers_remap, deriv
+    use prim_advection_mod, only : prim_advec_tracers_remap, deriv
     use derivative_mod, only : subcell_integration
 #if defined(_SPELT)
     use prim_advection_mod, only : prim_advec_tracers_spelt
@@ -2026,7 +2027,7 @@ subroutine smooth_topo_datasets(phis,sghdyn,sgh30dyn,elem,hybrid,nets,nete)
     use derivative_mod, only : derivative_t , laplace_sphere_wk
     use viscosity_mod, only : biharmonic_wk
     use prim_advance_mod, only : smooth_phis
-    use arch_switch_mod, only: deriv
+    use prim_advection_mod, only: deriv
     implicit none
 
     integer , intent(in) :: nets,nete
diff --git a/components/homme/src/share/solver_init_mod_base.F90 b/components/homme/src/share/solver_init_mod_base.F90
new file mode 100644
index 00000000000..774d103a9c3
--- /dev/null
+++ b/components/homme/src/share/solver_init_mod_base.F90
@@ -0,0 +1,25 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod_base
+  implicit none
+  private
+
+  public :: solver_init2
+
+
+contains
+
+
+  subroutine solver_init2( elem , deriv )
+    use element_mod, only: element_t
+    use derivative_mod, only: derivative_t
+    implicit none
+    type(element_t)   , intent(in) :: elem(:)
+    type(derivative_t), intent(in) :: deriv
+    !do nothing
+  end subroutine solver_init2
+
+
+end module solver_init_mod_base
diff --git a/components/homme/src/share/vertremap_mod_base.F90 b/components/homme/src/share/vertremap_mod_base.F90
new file mode 100644
index 00000000000..c915acf7350
--- /dev/null
+++ b/components/homme/src/share/vertremap_mod_base.F90
@@ -0,0 +1,767 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+!SUBROUTINES:
+!
+!Notes on Lagrange+REMAP advection
+!dynamics will compute mean fluxes, so that (i.e. for qsplit=3)
+!
+!    dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3)  + 3dt D(dpdiss_sum/3)
+!
+!Where the floating lagrangian component:
+!    dp_star(t+3) = dp(t)  -3dt div(Udp_sum/3)  + 3dt D(dpdiss_sum/3)
+!OR:
+!    dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) )
+
+
+module vertremap_mod_base
+
+  !**************************************************************************************
+  !
+  !  Purpose:
+  !        Construct sub-grid-scale polynomials using piecewise spline method with
+  !        monotone filters.
+  !
+  !  References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR)
+  !              PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF)
+  !
+  !**************************************************************************************
+
+  use kinds, only                  : real_kind,int_kind
+  use dimensions_mod, only         : np,nlev,qsize,nlevp,npsq,ntrac,nc
+  use hybvcoord_mod, only          : hvcoord_t
+  use element_mod, only            : element_t
+  use fvm_control_volume_mod, only : fvm_struct
+  use spelt_mod, only              : spelt_struct
+  use perf_mod, only               : t_startf, t_stopf  ! _EXTERNAL
+  use parallel_mod, only           : abortmp, parallel_t
+  use control_mod, only : vert_remap_q_alg
+
+  public remap1                  ! remap any field, splines, monotone
+  public remap1_nofilter         ! remap any field, splines, no filter
+! todo: tweak interface to match remap1 above, rename remap1_ppm:
+  public remap_q_ppm             ! remap state%Q, PPM, monotone
+
+  contains
+
+!=======================================================================================================!
+
+!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid
+!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task.
+subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref )
+  implicit none
+  type(hvcoord_t)      , intent(in   ) :: hvcoord               !Derived type to hold vertical sigma grid parameters
+  real(kind=real_kind) , intent(in   ) :: ps                    !Surface pressure for this column
+  real(kind=real_kind) , intent(in   ) :: dt                    !Time step
+  real(kind=real_kind) , intent(in   ) :: eta_dot_dpdn(nlev+1)  !Looks like a vertical pressure flux
+                                                                !to compute deformed grid spacing
+  real(kind=real_kind) , intent(  out) :: p_lag(nlev+1)         !Pressures at interfaces of the Lagrangian deformed grid
+  real(kind=real_kind) , intent(  out) :: p_ref(nlev+1)         !Pressures at interfaces of the reference grid
+  real(kind=real_kind) , intent(  out) :: dp_lag(nlev)          !Pressure differences on Lagrangian deformed grid
+  real(kind=real_kind) , intent(  out) :: dp_ref(nlev)          !Pressure differences on reference grid
+  integer :: k                                                  !Iterator
+  p_ref(1) = 0  !Both grids have a model top pressure of zero
+  p_lag(1) = 0  !Both grids have a model top pressure of zero
+  do k = 1 , nlev
+    dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
+         ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps  !Reference pressure difference
+    ! Lagrangian pressure difference (flux in - flux out over the time step)
+    dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) )
+    p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell
+    p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell
+  enddo
+end subroutine remap_calc_grids
+
+!=======================================================================================================!
+
+
+
+subroutine remap1(Qdp,nx,qsize,dp1,dp2)
+  ! remap 1 field
+  ! input:  Qdp   field to be remapped (NOTE: MASS, not MIXING RATIO)
+  !         dp1   layer thickness (source)
+  !         dp2   layer thickness (target)
+  !
+  ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp
+  !
+  implicit none
+  integer, intent(in) :: nx,qsize
+  real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
+  real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
+  ! ========================
+  ! Local Variables
+  ! ========================
+
+  real (kind=real_kind), dimension(nlev+1)    :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
+  real (kind=real_kind), dimension(nlev)      :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
+  real (kind=real_kind)  :: f_xm,level1,level2,level3,level4,level5, &
+                            peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
+                            zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
+  integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
+                            lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
+  logical :: abort=.false.
+
+  if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then
+     call remap_Q_ppm(qdp,nx,qsize,dp1,dp2)
+     return
+  endif
+
+  call t_startf('remap_Q_noppm')
+#if (defined COLUMN_OPENMP)
+!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
+!$omp    private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
+!$omp    private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
+!$omp    private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
+#endif
+  do q=1,qsize
+  do i=1,nx
+    do j=1,nx
+
+      z1c(1)=0 ! source grid
+      z2c(1)=0 ! target grid
+      do k=1,nlev
+         z1c(k+1)=z1c(k)+dp1(i,j,k)
+         z2c(k+1)=z2c(k)+dp2(i,j,k)
+      enddo
+
+      zv(1)=0
+      do k=1,nlev
+        Qcol(k)=Qdp(i,j,k,q)!  *(z1c(k+1)-z1c(k)) input is mass
+        zv(k+1) = zv(k)+Qcol(k)
+      enddo
+
+      if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
+        write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
+        write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN    '
+        write(6,*) 'DATA FOR MODEL LEVELS'
+        write(6,*) 'PLEVMODEL=',z2c(nlev+1)
+        write(6,*) 'PLEV     =',z1c(nlev+1)
+        write(6,*) 'DIFF     =',z2c(nlev+1)-z1c(nlev+1)
+        abort=.true.
+      endif
+
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !! quadratic splies with UK met office monotonicity constraints  !!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+      zkr  = 99
+      ilev = 2
+      zkr(1) = 1
+      zkr(nlev+1) = nlev
+      kloop: do k = 2,nlev
+        do jk = ilev,nlev+1
+          if (z1c(jk).ge.z2c(k)) then
+            ilev      = jk
+            zkr(k)   = jk-1
+            cycle kloop
+          endif
+        enddo
+      enddo kloop
+
+      zgam  = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
+      zgam(1)      = 0.0
+      zgam(nlev+1) = 1.0
+      zhdp = z1c(2:nlev+1)-z1c(1:nlev)
+
+
+      h = 1/zhdp
+      zarg = Qcol * h
+      rhs = 0
+      lower_diag = 0
+      diag = 0
+      upper_diag = 0
+
+      rhs(1)=3*zarg(1)
+      rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
+      rhs(nlev+1)=3*zarg(nlev)
+
+      lower_diag(1)=1
+      lower_diag(2:nlev) = h(1:nlev-1)
+      lower_diag(nlev+1)=1
+
+      diag(1)=2
+      diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
+      diag(nlev+1)=2
+
+      upper_diag(1)=1
+      upper_diag(2:nlev) = h(2:nlev)
+      upper_diag(nlev+1)=0
+
+      q_diag(1)=-upper_diag(1)/diag(1)
+      rhs(1)= rhs(1)/diag(1)
+
+      do k=2,nlev+1
+        tmp_cal    =  1/(diag(k)+lower_diag(k)*q_diag(k-1))
+        q_diag(k) = -upper_diag(k)*tmp_cal
+        rhs(k) =  (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
+      enddo
+      do k=nlev,1,-1
+        rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
+      enddo
+
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !!  monotonicity modifications  !!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+      filter_code = 0
+      dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1)
+      dy(nlev) = dy(nlev-1)
+
+      dy = merge(zero, dy, abs(dy) < tiny )
+
+      do k=1,nlev
+        im1=MAX(1,k-1)
+        im2=MAX(1,k-2)
+        im3=MAX(1,k-3)
+        ip1=MIN(nlev,k+1)
+        t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0)
+        t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 &
+             .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 )
+        t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k)))
+
+        filter_code(k) = merge(0,1,t1+t2 > 0)
+        rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1))
+        filter_code(im1) = MAX(filter_code(im1),filter_code(k))
+      enddo
+
+      rhs = merge(qmax,rhs,rhs > qmax)
+      rhs = merge(zero,rhs,rhs < zero)
+
+      za0 = rhs(1:nlev)
+      za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
+      za2 =  3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
+
+      dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev)
+      dy = merge(zero, dy, abs(dy) < tiny )
+
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !! Compute the 3 quadratic spline coeffients {za0, za1, za2}				   !!
+      !! knowing the quadratic spline parameters {rho_left,rho_right,zarg}		   !!
+      !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002).   !!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+      h = rhs(2:nlev+1)
+
+      do k=1,nlev
+        xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny)
+        xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny)
+        f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2
+
+        t1 = merge(1,0,ABS(za2(k)) > tiny)
+        t2 = merge(1,0,xm <= zero .OR. xm >= 1)
+        t3 = merge(1,0,za2(k) > zero)
+        t4 = merge(1,0,za2(k) < zero)
+        tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2)
+        tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3)
+
+        peaks=0
+        peaks = merge(-1,peaks,tm .EQ. 1)
+        peaks = merge(+1,peaks,tp .EQ. 1)
+        peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1)
+        peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1)
+
+        im1=MAX(1,k-1)
+        im2=MAX(1,k-2)
+        ip1=MIN(nlev,k+1)
+        ip2=MIN(nlev,k+2)
+
+        t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. &
+             (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. &
+             (dy(im1)*float(peaks) <= tiny))
+
+        filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. &
+             (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny))
+
+        if (filter_code(k) > 0) then
+          level1 = rhs(k)
+          level2 = (2*rhs(k)+h(k))/3
+          level3 = 0.5*(rhs(k)+h(k))
+          level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k)
+          level5 = h(k)
+
+          t1 = merge(1,0,h(k) >= rhs(k))
+          t2 = merge(1,0,zarg(k) <= level1 .OR.  zarg(k) >= level5)
+          t3 = merge(1,0,zarg(k) >  level1 .AND. zarg(k) <  level2)
+          t4 = merge(1,0,zarg(k) >  level4 .AND. zarg(k) <  level5)
+
+          lt1 = t1*t2
+          lt2 = t1*(1-t2+t3)
+          lt3 = t1*(1-t2+1-t3+t4)
+
+          za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
+          za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
+          za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
+
+          za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
+          za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
+          za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
+
+          za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
+          za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
+          za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
+
+          t2 = merge(1,0,zarg(k) >= level1 .OR.  zarg(k) <= level5)
+          t3 = merge(1,0,zarg(k) <  level1 .AND. zarg(k) >  level2)
+          t4 = merge(1,0,zarg(k) <  level4 .AND. zarg(k) >  level5)
+
+          lt1 = (1-t1)*t2
+          lt2 = (1-t1)*(1-t2+t3)
+          lt3 = (1-t1)*(1-t2+1-t3+t4)
+
+          za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
+          za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
+          za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
+
+          za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
+          za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
+          za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
+
+          za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
+          za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
+          za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
+        endif
+      enddo
+
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !! start iteration from top to bottom of atmosphere !!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+      zv1 = 0
+      do k=1,nlev
+        if (zgam(k+1)>1d0) then
+          WRITE(*,*) 'r not in [0:1]', zgam(k+1)
+          abort=.true.
+        endif
+        zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
+             (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
+        Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
+        zv1 = zv2
+      enddo
+    enddo
+  enddo
+  enddo ! q loop
+  if (abort) call abortmp('Bad levels in remap1.  usually CFL violatioin')
+  call t_stopf('remap_Q_noppm')
+
+end subroutine remap1
+
+subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2)
+  ! remap 1 field
+  ! input:  Qdp   field to be remapped (NOTE: MASS, not MIXING RATIO)
+  !         dp1   layer thickness (source)
+  !         dp2   layer thickness (target)
+  !
+  ! output: remaped Qdp, conserving mass
+  !
+  implicit none
+  integer, intent(in) :: nx,qsize
+  real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
+  real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
+  ! ========================
+  ! Local Variables
+  ! ========================
+
+  real (kind=real_kind), dimension(nlev+1)    :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
+  real (kind=real_kind), dimension(nlev)      :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
+  real (kind=real_kind)  :: f_xm,level1,level2,level3,level4,level5, &
+                            peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
+                            zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
+  integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
+                            lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
+  logical :: abort=.false.
+!   call t_startf('remap1_nofilter')
+
+#if (defined COLUMN_OPENMP)
+!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
+!$omp    private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
+!$omp    private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
+!$omp    private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
+#endif
+  do q=1,qsize
+  do i=1,nx
+    do j=1,nx
+
+      z1c(1)=0 ! source grid
+      z2c(1)=0 ! target grid
+      do k=1,nlev
+         z1c(k+1)=z1c(k)+dp1(i,j,k)
+         z2c(k+1)=z2c(k)+dp2(i,j,k)
+      enddo
+
+      zv(1)=0
+      do k=1,nlev
+        Qcol(k)=Qdp(i,j,k,q)!  *(z1c(k+1)-z1c(k)) input is mass
+        zv(k+1) = zv(k)+Qcol(k)
+      enddo
+
+      if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
+        write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
+        write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN    '
+        write(6,*) 'DATA FOR MODEL LEVELS'
+        write(6,*) 'PLEVMODEL=',z2c(nlev+1)
+        write(6,*) 'PLEV     =',z1c(nlev+1)
+        write(6,*) 'DIFF     =',z2c(nlev+1)-z1c(nlev+1)
+        abort=.true.
+      endif
+
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !! quadratic splies with UK met office monotonicity constraints  !!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+      zkr  = 99
+      ilev = 2
+      zkr(1) = 1
+      zkr(nlev+1) = nlev
+      kloop: do k = 2,nlev
+        do jk = ilev,nlev+1
+          if (z1c(jk).ge.z2c(k)) then
+            ilev      = jk
+            zkr(k)   = jk-1
+            cycle kloop
+          endif
+        enddo
+      enddo kloop
+
+      zgam  = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
+      zgam(1)      = 0.0
+      zgam(nlev+1) = 1.0
+      zhdp = z1c(2:nlev+1)-z1c(1:nlev)
+
+
+      h = 1/zhdp
+      zarg = Qcol * h
+      rhs = 0
+      lower_diag = 0
+      diag = 0
+      upper_diag = 0
+
+      rhs(1)=3*zarg(1)
+      rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
+      rhs(nlev+1)=3*zarg(nlev)
+
+      lower_diag(1)=1
+      lower_diag(2:nlev) = h(1:nlev-1)
+      lower_diag(nlev+1)=1
+
+      diag(1)=2
+      diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
+      diag(nlev+1)=2
+
+      upper_diag(1)=1
+      upper_diag(2:nlev) = h(2:nlev)
+      upper_diag(nlev+1)=0
+
+      q_diag(1)=-upper_diag(1)/diag(1)
+      rhs(1)= rhs(1)/diag(1)
+
+      do k=2,nlev+1
+        tmp_cal    =  1/(diag(k)+lower_diag(k)*q_diag(k-1))
+        q_diag(k) = -upper_diag(k)*tmp_cal
+        rhs(k) =  (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
+      enddo
+      do k=nlev,1,-1
+        rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
+      enddo
+
+      za0 = rhs(1:nlev)
+      za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
+      za2 =  3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
+
+
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+      !! start iteration from top to bottom of atmosphere !!
+      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+      zv1 = 0
+      do k=1,nlev
+        if (zgam(k+1)>1d0) then
+          WRITE(*,*) 'r not in [0:1]', zgam(k+1)
+          abort=.true.
+        endif
+        zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
+             (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
+        Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
+        zv1 = zv2
+      enddo
+    enddo
+  enddo
+  enddo ! q loop
+  if (abort) call abortmp('Bad levels in remap1_nofilter.  usually CFL violatioin')
+!   call t_stopf('remap1_nofilter')
+end subroutine remap1_nofilter
+
+!=======================================================================================================!
+
+
+!This uses the exact same model and reference grids and data as remap_Q, but it interpolates
+!using PPM instead of splines.
+subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2)
+  ! remap 1 field
+  ! input:  Qdp   field to be remapped (NOTE: MASS, not MIXING RATIO)
+  !         dp1   layer thickness (source)
+  !         dp2   layer thickness (target)
+  !
+  ! output: remaped Qdp, conserving mass
+  !
+  use control_mod, only        : vert_remap_q_alg
+  implicit none
+  integer,intent(in) :: nx,qsize
+  real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
+  real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
+  ! Local Variables
+  integer, parameter :: gs = 2                              !Number of cells to place in the ghost region
+  real(kind=real_kind), dimension(       nlev+2 ) :: pio    !Pressure at interfaces for old grid
+  real(kind=real_kind), dimension(       nlev+1 ) :: pin    !Pressure at interfaces for new grid
+  real(kind=real_kind), dimension(       nlev+1 ) :: masso  !Accumulate mass up to each interface
+  real(kind=real_kind), dimension(  1-gs:nlev+gs) :: ao     !Tracer value on old grid
+  real(kind=real_kind), dimension(  1-gs:nlev+gs) :: dpo    !change in pressure over a cell for old grid
+  real(kind=real_kind), dimension(  1-gs:nlev+gs) :: dpn    !change in pressure over a cell for old grid
+  real(kind=real_kind), dimension(3,     nlev   ) :: coefs  !PPM coefficients within each cell
+  real(kind=real_kind), dimension(       nlev   ) :: z1, z2
+  real(kind=real_kind) :: ppmdx(10,0:nlev+1)  !grid spacings
+  real(kind=real_kind) :: mymass, massn1, massn2
+  integer :: i, j, k, q, kk, kid(nlev)
+
+  call t_startf('remap_Q_ppm')
+  do j = 1 , nx
+    do i = 1 , nx
+
+      pin(1)=0
+      pio(1)=0
+      do k=1,nlev
+         dpn(k)=dp2(i,j,k)
+         dpo(k)=dp1(i,j,k)
+         pin(k+1)=pin(k)+dpn(k)
+         pio(k+1)=pio(k)+dpo(k)
+      enddo
+
+
+
+      pio(nlev+2) = pio(nlev+1) + 1.  !This is here to allow an entire block of k threads to run in the remapping phase.
+                                      !It makes sure there's an old interface value below the domain that is larger.
+      pin(nlev+1) = pio(nlev+1)       !The total mass in a column does not change.
+                                      !Therefore, the pressure of that mass cannot either.
+      !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence.
+      do k = 1 , gs
+        dpo(1   -k) = dpo(       k)
+        dpo(nlev+k) = dpo(nlev+1-k)
+      enddo
+
+      !Compute remapping intervals once for all tracers. Find the old grid cell index in which the
+      !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new
+      !interface location. In practice, the grid never deforms past one cell, so the search can be
+      !simplified by this. Also, the interval of integration is usually of magnitude close to zero
+      !or close to dpo because of minimial deformation.
+      !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so
+      !I set them equal to each other.
+      do k = 1 , nlev
+        kk = k  !Keep from an order n^2 search operation by assuming the old cell index is close.
+        !Find the index of the old grid cell in which this new cell's bottom interface resides.
+        do while ( pio(kk) <= pin(k+1) )
+          kk = kk + 1
+        enddo
+        kk = kk - 1                   !kk is now the cell index we're integrating over.
+        if (kk == nlev+1) kk = nlev   !This is to keep the indices in bounds.
+                                      !Top bounds match anyway, so doesn't matter what coefficients are used
+        kid(k) = kk                   !Save for reuse
+        z1(k) = -0.5D0                !This remapping assumes we're starting from the left interface of an old grid cell
+                                      !In fact, we're usually integrating very little or almost all of the cell in question
+        z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk)  !PPM interpolants are normalized to an independent
+                                                                        !coordinate domain [-0.5,0.5].
+      enddo
+
+      !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the
+      !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop.
+      ppmdx(:,:) = compute_ppm_grids( dpo )
+
+      !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and
+      !mass accumulation
+      do q = 1 , qsize
+        !Accumulate the old mass up to old grid cell interface locations to simplify integration
+        !during remapping. Also, divide out the grid spacing so we're working with actual tracer
+        !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure
+        !tracer consistency for an initially uniform field. I copied it from the old remap routine.
+        masso(1) = 0.
+        do k = 1 , nlev
+          ao(k) = Qdp(i,j,k,q)
+          masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping
+          ao(k) = ao(k) / dpo(k)        !Divide out the old grid spacing because we want the tracer mixing ratio, not mass.
+        enddo
+        !Fill in ghost values. Ignored if vert_remap_q_alg == 2
+        do k = 1 , gs
+          ao(1   -k) = ao(       k)
+          ao(nlev+k) = ao(nlev+1-k)
+        enddo
+        !Compute monotonic and conservative PPM reconstruction over every cell
+        coefs(:,:) = compute_ppm( ao , ppmdx )
+        !Compute tracer values on the new grid by integrating from the old cell bottom to the new
+        !cell interface to form a new grid mass accumulation. Taking the difference between
+        !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is
+        !supposed to hold the full mass this needs no normalization.
+        massn1 = 0.
+        do k = 1 , nlev
+          kk = kid(k)
+          massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk)
+          Qdp(i,j,k,q) = massn2 - massn1
+          massn1 = massn2
+        enddo
+      enddo
+    enddo
+  enddo
+  call t_stopf('remap_Q_ppm')
+end subroutine remap_Q_ppm
+
+
+!=======================================================================================================!
+
+
+!THis compute grid-based coefficients from Collela & Woodward 1984.
+function compute_ppm_grids( dx )   result(rslt)
+  use control_mod, only: vert_remap_q_alg
+  implicit none
+  real(kind=real_kind), intent(in) :: dx(-1:nlev+2)  !grid spacings
+  real(kind=real_kind)             :: rslt(10,0:nlev+1)  !grid spacings
+  integer :: j
+  integer :: indB, indE
+
+  !Calculate grid-based coefficients for stage 1 of compute_ppm
+  if (vert_remap_q_alg == 2) then
+    indB = 2
+    indE = nlev-1
+  else
+    indB = 0
+    indE = nlev+1
+  endif
+  do j = indB , indE
+    rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) )
+    rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) )
+    rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) )
+  enddo
+
+  !Caculate grid-based coefficients for stage 2 of compute_ppm
+  if (vert_remap_q_alg == 2) then
+    indB = 2
+    indE = nlev-2
+  else
+    indB = 0
+    indE = nlev
+  endif
+  do j = indB , indE
+    rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) )
+    rslt( 5,j) = 1. / sum( dx(j-1:j+2) )
+    rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) )
+    rslt( 7,j) = ( dx(j-1) + dx(j  ) ) / ( 2. * dx(j  ) + dx(j+1) )
+    rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j  ) )
+    rslt( 9,j) = dx(j  ) * ( dx(j-1) + dx(j  ) ) / ( 2.*dx(j  ) +    dx(j+1) )
+    rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / (    dx(j  ) + 2.*dx(j+1) )
+  enddo
+end function compute_ppm_grids
+
+!=======================================================================================================!
+
+
+
+!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages
+function compute_ppm( a , dx )    result(coefs)
+  use control_mod, only: vert_remap_q_alg
+  implicit none
+  real(kind=real_kind), intent(in) :: a    (    -1:nlev+2)  !Cell-mean values
+  real(kind=real_kind), intent(in) :: dx   (10,  0:nlev+1)  !grid spacings
+  real(kind=real_kind) ::             coefs(0:2,   nlev  )  !PPM coefficients (for parabola)
+  real(kind=real_kind) :: ai (0:nlev  )                     !fourth-order accurate, then limited interface values
+  real(kind=real_kind) :: dma(0:nlev+1)                     !An expression from Collela's '84 publication
+  real(kind=real_kind) :: da                                !Ditto
+  ! Hold expressions based on the grid (which are cumbersome).
+  real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10
+  real(kind=real_kind) :: al, ar                            !Left and right interface values for cell-local limiting
+  integer :: j
+  integer :: indB, indE
+
+  ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain
+  if (vert_remap_q_alg == 2) then
+    indB = 2
+    indE = nlev-1
+  else
+    indB = 0
+    indE = nlev+1
+  endif
+  do j = indB , indE
+    da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) )
+    dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da)
+    if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0.
+  enddo
+
+  ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1)
+  if (vert_remap_q_alg == 2) then
+    indB = 2
+    indE = nlev-2
+  else
+    indB = 0
+    indE = nlev
+  endif
+  do j = indB , indE
+    ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) &
+         * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) )
+  enddo
+
+  ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain
+  ! (dimension nlev) using ai on either side and ao within the cell.
+  if (vert_remap_q_alg == 2) then
+    indB = 3
+    indE = nlev-2
+  else
+    indB = 1
+    indE = nlev
+  endif
+  do j = indB , indE
+    al = ai(j-1)
+    ar = ai(j  )
+    if ( (ar - a(j)) * (a(j) - al) <= 0. ) then
+      al = a(j)
+      ar = a(j)
+    endif
+    if ( (ar - al) * (a(j) - (al + ar)/2.) >  (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar
+    if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al
+    !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx
+    coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4.
+    coefs(1,j) = ar - al
+    coefs(2,j) = -6. * a(j) + 3. * ( al + ar )
+  enddo
+
+  !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom
+  !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth
+  !moment to the cell mean is sufficient to maintain conservation.
+  if (vert_remap_q_alg == 2) then
+    coefs(0,1:2) = a(1:2)
+    coefs(1:2,1:2) = 0.
+    coefs(0,nlev-1:nlev) = a(nlev-1:nlev)
+    coefs(1:2,nlev-1:nlev) = 0.D0
+  endif
+end function compute_ppm
+
+!=======================================================================================================!
+
+
+!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx,
+!given two bounds. Make sure this gets inlined during compilation.
+function integrate_parabola( a , x1 , x2 )    result(mass)
+  implicit none
+  real(kind=real_kind), intent(in) :: a(0:2)  !Coefficients of the parabola
+  real(kind=real_kind), intent(in) :: x1      !lower domain bound for integration
+  real(kind=real_kind), intent(in) :: x2      !upper domain bound for integration
+  real(kind=real_kind)             :: mass
+  mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1
+end function integrate_parabola
+
+
+!=============================================================================================!
+
+
+
+end module vertremap_mod_base
+
+
+
+
diff --git a/components/homme/src/share/viscosity_mod.F90 b/components/homme/src/share/viscosity_mod_base.F90
similarity index 99%
rename from components/homme/src/share/viscosity_mod.F90
rename to components/homme/src/share/viscosity_mod_base.F90
index c005a3eff3b..c8cb824ded3 100644
--- a/components/homme/src/share/viscosity_mod.F90
+++ b/components/homme/src/share/viscosity_mod_base.F90
@@ -2,7 +2,7 @@
 #include "config.h"
 #endif
 
-module viscosity_mod
+module viscosity_mod_base
 !
 !  This module should be renamed "global_deriv_mod.F90"
 ! 
@@ -1032,4 +1032,4 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,nt,min_neigh,max_nei
 end subroutine
 
 #endif
-end module
+end module viscosity_mod_base
diff --git a/components/homme/src/sweqx/CMakeLists.txt b/components/homme/src/sweqx/CMakeLists.txt
index 8409fd086be..56af9356ec7 100644
--- a/components/homme/src/sweqx/CMakeLists.txt
+++ b/components/homme/src/sweqx/CMakeLists.txt
@@ -13,6 +13,16 @@ SET (SWEQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS}
                         ${UTILS_TIMING_DIR} CACHE INTERNAL "include directories for sweqx")
 
 SET(SWEQX_SRCS
+               ${SRC_SHARE}/bndry_mod_base.F90
+               ${SRC_SHARE}/derivative_mod_base.F90
+               ${SRC_SHARE}/edge_mod_base.F90
+               ${SRC_SHARE}/solver_init_mod_base.F90
+               ${SRC_SHARE}/viscosity_mod_base.F90
+               ${SRC_BASE}/sweqx/bndry_mod.F90
+               ${SRC_BASE}/sweqx/derivative_mod.F90
+               ${SRC_BASE}/sweqx/edge_mod.F90
+               ${SRC_BASE}/sweqx/solver_init_mod.F90
+               ${SRC_BASE}/sweqx/viscosity_mod.F90
                ${SRC_BASE}/advance_mod.F90
                ${SRC_BASE}/checksum_mod.F90
                ${SRC_BASE}/common_io_mod.F90
@@ -32,16 +42,13 @@ SET(SWEQX_SRCS
                ${SRC_BASE}/types_mod.F90
                ${SRC_SHARE}/fvm_bsp_mod.F90
                ${SRC_SHARE}/fvm_transformation_mod.F90
-               ${SRC_SHARE}/bndry_mod.F90
                ${SRC_SHARE}/cg_mod.F90
                ${SRC_SHARE}/control_mod.F90
                ${SRC_SHARE}/coordinate_systems_mod.F90
                ${SRC_SHARE}/cube_mod.F90
-               ${SRC_SHARE}/derivative_mod.F90
                ${SRC_SHARE}/dimensions_mod.F90
                ${SRC_SHARE}/dof_mod.F90
                ${SRC_SHARE}/domain_mod.F90
-               ${SRC_SHARE}/edge_mod.F90
                ${SRC_SHARE}/element_mod.F90
                ${SRC_SHARE}/filter_mod.F90
                ${SRC_SHARE}/fvm_analytic_mod.F90
@@ -76,7 +83,6 @@ SET(SWEQX_SRCS
                ${SRC_SHARE}/spacecurve_mod.F90
                ${SRC_SHARE}/thread_mod.F90
                ${SRC_SHARE}/time_mod.F90
-               ${SRC_SHARE}/viscosity_mod.F90
                ${SRC_SHARE}/unit_tests_mod.F90
                ${SRC_UTILS}/shr_file_mod.F90
                ${SRC_UTILS}/shr_kind_mod.F90
diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90
new file mode 100644
index 00000000000..5ad1125cbef
--- /dev/null
+++ b/components/homme/src/sweqx/bndry_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module bndry_mod
+  use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
+  implicit none
+end module bndry_mod
diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90
new file mode 100644
index 00000000000..e9c3f81be3f
--- /dev/null
+++ b/components/homme/src/sweqx/derivative_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module derivative_mod
+  use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix,   &
+                                 derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+                                 remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere,     &
+                                 curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+                                 laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+  implicit none
+end module derivative_mod
diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90
new file mode 100644
index 00000000000..902503b36cb
--- /dev/null
+++ b/components/homme/src/sweqx/edge_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module edge_mod
+  use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack,       &
+                           edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+                           ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR,     &
+                           ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level,       &
+                           ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+  implicit none
+end module edge_mod
diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90
new file mode 100644
index 00000000000..90efb25eadf
--- /dev/null
+++ b/components/homme/src/sweqx/solver_init_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod
+  use solver_init_mod_base, only: solver_init2
+  implicit none
+end module solver_init_mod
diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90
new file mode 100644
index 00000000000..f78ae69a8f3
--- /dev/null
+++ b/components/homme/src/sweqx/viscosity_mod.F90
@@ -0,0 +1,11 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module viscosity_mod
+  use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax
+#ifdef _PRIM
+  use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+  implicit none
+end module viscosity_mod
diff --git a/components/homme/test/jw_baroclinic/openacc_work/cpu.job b/components/homme/test/jw_baroclinic/openacc_work/cpu.job
index bebb6ea3fb5..7771f0f2628 100644
--- a/components/homme/test/jw_baroclinic/openacc_work/cpu.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/cpu.job
@@ -1,6 +1,6 @@
 #!/bin/tcsh -f
 #PBS -A STF006
-#PBS -l nodes=24
+#PBS -l nodes=3
 #PBS -q batch
 #PBS -l walltime=0:45:00
 #PBS -N baroclinic
@@ -83,7 +83,7 @@ echo $mpirun
 
 set u_perturb = 1
 set rotate = 0
-set ne    = 30     # horizontal resolution   4,16,30,60,120,240
+set ne    =  8     # horizontal resolution   4,16,30,60,120,240
 set nlev  = 64     # vertical resolution  26,30,60,64,96
 set qsize = 50     # number of passive tracers
 set namelist = jw_baroclinic.nl  # CAM-style, vertically lagrangian
diff --git a/components/homme/test/jw_baroclinic/openacc_work/openacc.job b/components/homme/test/jw_baroclinic/openacc_work/openacc.job
index 8600dcee570..3193fcea868 100644
--- a/components/homme/test/jw_baroclinic/openacc_work/openacc.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/openacc.job
@@ -1,6 +1,6 @@
 #!/bin/tcsh -f
 #PBS -A STF006
-#PBS -l nodes=24
+#PBS -l nodes=3
 #PBS -q batch
 #PBS -l walltime=0:45:00
 #PBS -N baroclinic
@@ -43,7 +43,7 @@ set exe    = preqx.openacc
   module rm cray-netcdf-hdf5parallel
   module rm cray-parallel-netcdf
     module load PrgEnv-pgi
-    module switch pgi pgi/15.9.lustre
+    module switch pgi pgi/15.10.0
     module switch cray-mpich cray-mpich/7.2.2
     module switch cray-libsci cray-libsci/13.0.4
     module load esmf/5.2.0rp2
@@ -71,7 +71,7 @@ setenv MPICH_CPUMASK_DISPLAY 1
 #setenv CUDA_PROFILE_CONFIG ./cuda_config
 #setenv PGI_ACC_DEBUG 1
 
-setenv OMP_NUM_THREADS 4
+setenv OMP_NUM_THREADS 1
 set NCPU = $PBS_NUM_NODES
 @ NCPU *= 16
 @ NCPU /= $OMP_NUM_THREADS
@@ -83,7 +83,7 @@ echo $mpirun
 
 set u_perturb = 1
 set rotate = 0
-set ne    = 30     # horizontal resolution   4,16,30,60,120,240
+set ne    = 8      # horizontal resolution   4,16,30,60,120,240
 set nlev  = 64     # vertical resolution  26,30,60,64,96
 set qsize = 50     # number of passive tracers
 set namelist = jw_baroclinic.nl  # CAM-style, vertically lagrangian
diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job
old mode 100644
new mode 100755
index 5dde0fbab77..7147e817eb8
--- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job
@@ -7,7 +7,7 @@
 
 #  set paths to source code, build directory and run directory
 set wdir   = /proj/imn/HOMME_ACME             # run directory
-set HOMME  = /home/imn/ACME/models/atm/homme                # HOMME svn checkout     
+set HOMME  = /home/imn/ACME/components/homme                # HOMME svn checkout     
 set input  = $HOMME/test/jw_baroclinic/openacc_work    # input files for test case
 set vdir   = $HOMME/test/vcoord            # vertical coordinate files
 set exedir = $HOMME/build/preqx
diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job
old mode 100644
new mode 100755
index 63846c35c5b..df7e9943c9c
--- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job
@@ -7,7 +7,7 @@
 
 #  set paths to source code, build directory and run directory
 set wdir   = /proj/imn/HOMME_ACME             # run directory
-set HOMME  = /home/imn/ACME/models/atm/homme                # HOMME svn checkout     
+set HOMME  = /home/imn/ACME/components/homme                # HOMME svn checkout     
 set input  = $HOMME/test/jw_baroclinic/openacc_work    # input files for test case
 set vdir   = $HOMME/test/vcoord            # vertical coordinate files
 set exedir = $HOMME/build/preqx
@@ -28,9 +28,9 @@ limit stacksize unlimited
 setenv MPSTKZ 64M
 setenv OMP_STACKSIZE 64M
 
-setenv CUDA_PROFILE 1
-setenv CUDA_PROFILE_LOG "cuda_profile_%p.log"
-setenv CUDA_PROFILE_CONFIG ./cuda_config
+#setenv CUDA_PROFILE 1
+#setenv CUDA_PROFILE_LOG "cuda_profile_%p.log"
+#setenv CUDA_PROFILE_CONFIG ./cuda_config
 
 #set i=0
 #mkdir -f /tmp/mps_$i
diff --git a/components/homme/test/reg_test/namelists/swtc1-dg.nl b/components/homme/test/reg_test/namelists/swtc1-dg.nl
deleted file mode 100644
index 5bed81bde07..00000000000
--- a/components/homme/test/reg_test/namelists/swtc1-dg.nl
+++ /dev/null
@@ -1,69 +0,0 @@
-!=======================================================!
-! 	1 day = 1 * 24 * 3600 = 86400 sec		!
-! 	nmax  = ndays * 86400 / tstep = 100
-! 	12 days at 120.0 stepsize: nmax= 8640 		!
-!=======================================================!
-&ctl_nl
-NThreads      = 1
-partmethod    = 4
-topology      = "cube"
-test_case     = 'swtc1'
-ne            = 10
-ndays	      = 12
-statefreq     = 864
-tasknum       = 0
-restartfreq   = -1
-restartfile   = "./restart/R000000050"
-runtype       = 0
-tstep         = 100
-integration   = "explicit"
-smooth        = 0.05
-nu            = 0
-/
-&solver_nl
-precon_method = "block_jacobi"
-maxits        = 100
-tol           = 1.e-12
-/
-&filter_nl
-transfer_type = "bv"
-filter_type   = "taylor"
-filter_freq   = 1
-filter_mu     = 0.05D0
-p_bv          = 12.0D0
-s_bv          = .666666666666666666D0
-wght_fm       = 0.10D0
-kcut_fm       = 2
-/
-&analysis_nl
-!=======================================================!
-!  currently up to 5 streams are allowed		!
-!  output_stream_count=1				!
-!							!
-!  timunits: 0= steps, 1=days, 2=hours			!
-!  output_timeunits=1,2 				!
-!  output_start_time=0,1176				!			
-!  output_end_time=-1,-1				!
-!  output_frequency=1,1 				!
-!  output_dir ="./movies/"				!
-!							!
-!  allowed variables: 'ps   ','geop ','u    ','v    ',	!
-!                     'latp ','lonp ','latv ','lonv ',	!
-!                     'elem ','Time ' 			!
-!							!
-!  output_varnames1-5					!
-!=======================================================!
-output_start_time =  0
-output_end_time   = -1
-output_frequency  =  1
-output_timeunits  =  1
-output_varnames1  = 'u', 'v', 'ps', 'geop'
-output_type       = 'netcdf'
-interp_nlon       = 720
-interp_nlat       = 360
-/
-&dg_nl
-riemanntype= 0
-alphatype= 4
-alpha_dg = 0.0D0
-/
diff --git a/components/homme/test/reg_test/namelists/swtc2-dg.nl b/components/homme/test/reg_test/namelists/swtc2-dg.nl
deleted file mode 100644
index eca5a773ca5..00000000000
--- a/components/homme/test/reg_test/namelists/swtc2-dg.nl
+++ /dev/null
@@ -1,70 +0,0 @@
-!=======================================================!
-! 	1 day = 1 * 24 * 3600 = 86400 sec		!
-! 	nmax  = ndays * 86400 / tstep 			!
-! 	12 days at 30.0 stepsize: nmax= 34560		!
-!=======================================================!
-&ctl_nl
-NThreads      = 1
-partmethod    = 4
-topology      = "cube"
-test_case     = "swtc2"
-ne            = 6
-ndays         = 5
-statefreq     = 2880
-tasknum       = 0
-restartfreq   = -1
-restartfile   = "./restart/R000000050"
-runtype       = 0
-tstep         = 30.0
-integration   = "explicit"
-smooth        = 0.05
-nu            = 7.0e5
-/
-&solver_nl
-precon_method = "block_jacobi"
-maxits        = 100
-tol           = 1.e-12
-/
-&filter_nl
-transfer_type = "bv"
-filter_type   = "taylor"
-filter_freq   = 1
-filter_mu     = 0.05D0
-p_bv          = 12.0D0
-s_bv          = .666666666666666666D0
-wght_fm       = 0.10D0
-kcut_fm       = 2
-/
-&analysis_nl
-!=======================================================!
-!  currently up to 5 streams are allowed		!
-!  output_stream_count=1				!
-!							!
-!  timunits: 0= steps, 1=days, 2=hours			!
-!  output_timeunits=1,2 				!
-!  output_start_time=0,1176				!			
-!  output_end_time=-1,-1				!
-!  output_frequency=1,1 				!
-!  output_dir ="./movies/"				!
-!							!
-!  allowed variables: 'ps   ','geop ','u    ','v    ',	!
-!                     'latp ','lonp ','latv ','lonv ',	!
-!                     'elem ','Time ' 			!
-!							!
-!  output_varnames1-5					!
-!=======================================================!
-output_start_time = 0
-output_end_time   = -1
-output_frequency  = 1
-output_timeunits  = 1
-output_varnames1  = 'u', 'v', 'ps', 'geop', 'zeta'
-output_type       = 'netcdf'
-interp_nlon       = 720
-interp_nlat       = 360
-/
-&dg_nl
-riemanntype= 0
-alphatype= 4
-alpha_dg = 0.0D0
-/
-
diff --git a/components/homme/test/reg_test/namelists/swtc5-dg.nl b/components/homme/test/reg_test/namelists/swtc5-dg.nl
deleted file mode 100644
index 75324e58b89..00000000000
--- a/components/homme/test/reg_test/namelists/swtc5-dg.nl
+++ /dev/null
@@ -1,69 +0,0 @@
-!=======================================================!
-! 	1 day = 1 * 24 * 3600 = 86400 sec		!
-! 	nmax  = ndays * 86400 / tstep 			!
-! 	12 days at 30.0 stepsize: nmax= 34560		!
-!=======================================================!
-&ctl_nl
-NThreads      = 1
-partmethod    = 4
-topology      = "cube"
-test_case     = "swtc5"
-ne            = 6
-ndays         = 15
-statefreq     = 2880
-tasknum       = 0
-restartfreq   = -1
-restartfile   = "./restart/R000000050"
-runtype       = 0
-tstep         = 30.0
-integration   = "explicit"
-smooth        = 0.05
-nu            = 7.0e5
-/
-&solver_nl
-precon_method = "block_jacobi"
-maxits        = 100
-tol           = 1.e-12
-/
-&filter_nl
-transfer_type = "bv"
-filter_type   = "taylor"
-filter_freq   = 1
-filter_mu     = 0.05D0
-p_bv          = 12.0D0
-s_bv          = .666666666666666666D0
-wght_fm       = 0.10D0
-kcut_fm       = 2
-/
-&analysis_nl
-!=======================================================!
-!  currently up to 5 streams are allowed		!
-!  output_stream_count=1				!
-!							!
-!  timunits: 0= steps, 1=days, 2=hours			!
-!  output_timeunits=1,2 				!
-!  output_start_time=0,1176				!			
-!  output_end_time=-1,-1				!
-!  output_frequency=1,1 				!
-!  output_dir ="./movies/"				!
-!							!
-!  allowed variables: 'ps   ','geop ','u    ','v    ',	!
-!                     'latp ','lonp ','latv ','lonv ',	!
-!                     'elem ','Time ' 			!
-!							!
-!  output_varnames1-5					!
-!=======================================================!
-output_start_time = 0
-output_end_time   = -1
-output_frequency  = 1
-output_timeunits  = 1
-output_varnames1  = 'u', 'v', 'ps', 'geop', 'zeta'
-output_type       = 'netcdf'
-interp_nlon       = 720
-interp_nlat       = 360
-/
-&dg_nl
-riemanntype= 0
-alphatype= 0
-alpha_dg = 0.0D0
-/
diff --git a/components/homme/test/reg_test/run_tests/baro1a.cmake b/components/homme/test/reg_test/run_tests/baro1a.cmake
index 89b79b23c36..973a7ceab6e 100644
--- a/components/homme/test/reg_test/run_tests/baro1a.cmake
+++ b/components/homme/test/reg_test/run_tests/baro1a.cmake
@@ -10,8 +10,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro1a)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroA)
 
diff --git a/components/homme/test/reg_test/run_tests/baro1b.cmake b/components/homme/test/reg_test/run_tests/baro1b.cmake
index 42bf345f79a..8c6f9125397 100644
--- a/components/homme/test/reg_test/run_tests/baro1b.cmake
+++ b/components/homme/test/reg_test/run_tests/baro1b.cmake
@@ -10,8 +10,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro1b)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroB)
 
diff --git a/components/homme/test/reg_test/run_tests/baro2a.cmake b/components/homme/test/reg_test/run_tests/baro2a.cmake
index 0eee75a6c69..f7bfbfcce9a 100644
--- a/components/homme/test/reg_test/run_tests/baro2a.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2a.cmake
@@ -9,8 +9,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro2a)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroC)
 
diff --git a/components/homme/test/reg_test/run_tests/baro2b.cmake b/components/homme/test/reg_test/run_tests/baro2b.cmake
index cfeb4ae313b..cc88c15c1e6 100644
--- a/components/homme/test/reg_test/run_tests/baro2b.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2b.cmake
@@ -11,8 +11,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro2b)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroC)
 
diff --git a/components/homme/test/reg_test/run_tests/baro2c.cmake b/components/homme/test/reg_test/run_tests/baro2c.cmake
index 82101c0e9af..425da22414f 100644
--- a/components/homme/test/reg_test/run_tests/baro2c.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2c.cmake
@@ -4,8 +4,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro2c)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroC)
 
diff --git a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake
index 66881f565d9..f7082997f4d 100644
--- a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake
@@ -10,9 +10,6 @@
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro2d-imp)
 
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE prim)
-
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME primA)
 
diff --git a/components/homme/test/reg_test/run_tests/baro2d.cmake b/components/homme/test/reg_test/run_tests/baro2d.cmake
index bfc76e55de7..f51a7955525 100644
--- a/components/homme/test/reg_test/run_tests/baro2d.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2d.cmake
@@ -11,8 +11,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baro2d)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroC)
 
diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake
index 5a240184657..688ff6644e9 100644
--- a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake
+++ b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake
@@ -11,8 +11,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baroCamMoistSL)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroCam)
 
diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake
new file mode 100644
index 00000000000..9767cc947a8
--- /dev/null
+++ b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake
@@ -0,0 +1,51 @@
+###############################################################
+# RK + PIO_INTERP 
+###############################################################
+#
+# Spectral Element -- 9 days of ASP baroclinic test
+# (Jablonowski and Williamson test + 4 tracers)
+# NE=15, dt=150, nu=1e16, filter_freq=0, NV=4, PLEV=26
+# (explicit RK with subcycling)
+#
+###############################################################
+
+# The name of this test (should be the basename of this file)
+SET(TEST_NAME baroCamMoist-acc)
+
+# The specifically compiled executable that this test uses
+SET(EXEC_NAME baroCam-acc)
+
+SET(NUM_CPUS 16)
+
+SET(NAMELIST_FILES 
+${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist.nl
+)
+SET(VCOORD_FILES ${HOMME_ROOT}/test/vcoord/*26*)
+SET(REFSOLN_FILES ${HOMME_ROOT}/test/reg_test/ref_sol/T340ref.nc)
+
+SET(NC_OUTPUT_FILES 
+  camBaroMoist-asp_baroclinic1.nc 
+  camBaroMoist-asp_baroclinic2.nc
+)
+
+
+# OMP tests (disabled for now)
+#SET(OMP_SUB_TESTS true)
+#SET(OMP_NUM_THREADS 4)
+#SET(OMP_NAMELIST_FILES 
+#${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist-omp4.nl
+#)
+
+
+# compare output with CPU-only baroCamMoist test:
+SET(TESTCASE_REF_TOL 1E-11)
+SET(NC_OUTPUT_REF   
+  ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic1.nc 
+  ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic2.nc 
+)
+SET(NC_OUTPUT_CHECKREF    
+  camBaroMoist-asp_baroclinic1.nc 
+  camBaroMoist-asp_baroclinic2.nc
+)
+
+
diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake
index 904ee7f6696..3989967caee 100644
--- a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake
+++ b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake
@@ -11,8 +11,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME baroCamMoist)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroCam)
 
diff --git a/components/homme/test/reg_test/run_tests/swimtc5.cmake b/components/homme/test/reg_test/run_tests/swimtc5.cmake
index a2ba5e84b9f..e5398816556 100644
--- a/components/homme/test/reg_test/run_tests/swimtc5.cmake
+++ b/components/homme/test/reg_test/run_tests/swimtc5.cmake
@@ -5,8 +5,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME swimtc5)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swim)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME swim5)
 
diff --git a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake b/components/homme/test/reg_test/run_tests/swtc1-dg.cmake
deleted file mode 100644
index 7dc8bc37c9a..00000000000
--- a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-###############################################################
-#
-# Discontinuous Galerkin -- swtc1
-# NE=10, dt=100, nu=0, limiter=0, filter_freq=1, NP=6
-#
-###############################################################
-
-# The name of this test (should be the basename of this file)
-SET(TEST_NAME swtc1-dg)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swdgx)
-# The specifically compiled executable that this test uses
-SET(EXEC_NAME swtc-dgA)
-
-SET(NUM_CPUS 16)
-
-SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl)
-
-SET(NC_OUTPUT_FILES swtc11.nc)
diff --git a/components/homme/test/reg_test/run_tests/swtc1.cmake b/components/homme/test/reg_test/run_tests/swtc1.cmake
index de170ab52b3..ce3ddaf3590 100644
--- a/components/homme/test/reg_test/run_tests/swtc1.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc1.cmake
@@ -9,8 +9,7 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME swtc1)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
+
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME swtcA)
 
diff --git a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake b/components/homme/test/reg_test/run_tests/swtc2-dg.cmake
deleted file mode 100644
index dcbc5b9d6ee..00000000000
--- a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-###############################################################
-# RKSSP default benchmark (used to check nothing is broken)
-###############################################################
-#
-# Discontinous Galerkin swtc2
-# NE=6, dt=30, nu=0, filter_freq=1, NP=6
-#
-###############################################################
-
-# The name of this test (should be the basename of this file)
-SET(TEST_NAME swtc2-dg)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swdgx)
-# The specifically compiled executable that this test uses
-SET(EXEC_NAME swtc-dgA)
-
-SET(NUM_CPUS 16)
-
-SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl)
-
-SET(NC_OUTPUT_FILES swtc21.nc)
diff --git a/components/homme/test/reg_test/run_tests/swtc2.cmake b/components/homme/test/reg_test/run_tests/swtc2.cmake
index eac2e7bfdd0..1659361a0b9 100644
--- a/components/homme/test/reg_test/run_tests/swtc2.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc2.cmake
@@ -9,8 +9,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME swtc2)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME swtcA)
 
diff --git a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake b/components/homme/test/reg_test/run_tests/swtc5-dg.cmake
deleted file mode 100644
index 8d482b38d7d..00000000000
--- a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-###############################################################
-#
-# Discontinuous Galerkin -- swtc5
-# NE=6, dt=30, nu=7e5, limiter=0, filter_freq=1, NP=6
-#
-###############################################################
-
-# The name of this test (should be the basename of this file)
-SET(TEST_NAME swtc5-dg)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swdgx)
-# The specifically compiled executable that this test uses
-SET(EXEC_NAME swtc-dgA)
-
-SET(NUM_CPUS 16)
-
-SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl)
-SET(NCL_FILES ${HOMME_ROOT}/test/reg_test/ncl/swtc5ref.ncl)
-
-SET(NC_OUTPUT_FILES swtc51.nc)
-
diff --git a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake
index 166bd7e3893..16155506e38 100644
--- a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake
@@ -9,8 +9,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME swtc5-mr)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME swtcB)
 
diff --git a/components/homme/test/reg_test/run_tests/swtc5.cmake b/components/homme/test/reg_test/run_tests/swtc5.cmake
index 9407f6c62ec..35de2a7b160 100644
--- a/components/homme/test/reg_test/run_tests/swtc5.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc5.cmake
@@ -9,8 +9,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME swtc5)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME swtcB)
 
diff --git a/components/homme/test/reg_test/run_tests/swtc6.cmake b/components/homme/test/reg_test/run_tests/swtc6.cmake
index 894eb9b578f..d0a8bef306e 100644
--- a/components/homme/test/reg_test/run_tests/swtc6.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc6.cmake
@@ -7,8 +7,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME swtc6)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME swtcC)
 
diff --git a/components/homme/test/reg_test/run_tests/templates.cmake b/components/homme/test/reg_test/run_tests/templates.cmake
index 2b413d73ccb..d50f27d9de9 100644
--- a/components/homme/test/reg_test/run_tests/templates.cmake
+++ b/components/homme/test/reg_test/run_tests/templates.cmake
@@ -15,8 +15,6 @@
 
 # The name of this test (should be the basename of this file)
 SET(TEST_NAME templates)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
 # The specifically compiled executable that this test uses
 SET(EXEC_NAME baroCam)
 
diff --git a/components/homme/test/reg_test/run_tests/test-list.cmake b/components/homme/test/reg_test/run_tests/test-list.cmake
index c66831ccebb..12fbf8a3367 100644
--- a/components/homme/test/reg_test/run_tests/test-list.cmake
+++ b/components/homme/test/reg_test/run_tests/test-list.cmake
@@ -10,6 +10,13 @@ SET(HOMME_TESTS
   baro2d.cmake
   baroCamMoist.cmake
   baroCamMoist-SL.cmake
+  baroCamMoist-acc.cmake
   baro2d-imp.cmake
   templates.cmake
 )
+
+        
+#set_test_properties(  
+#  baroCamMoist-acc.cmake PROPERTY DEPENDS   baroCamMoist.cmake 
+#)
+
diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh
index a2a364de619..8ce21fcb1f9 100644
--- a/components/homme/test/reg_test/run_tests/testing-utils.sh
+++ b/components/homme/test/reg_test/run_tests/testing-utils.sh
@@ -482,16 +482,20 @@ execLine() {
       echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
     elif [ "${MPI_EXEC}" = "runjob" ]; then
       echo "runjob -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} --block \$COBALT_PARTNAME --verbose=INFO : $EXEC $OPT" >> $RUN_SCRIPT
+    elif [ "${MPI_EXEC}" = "aprun" ] ; then
+      if [[ $4 == *"_OMP"* ]]; then
+        echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
+      else
+        echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
+      fi
     else
       echo "${MPI_EXEC} -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
     fi
   else
     if [ "$HOMME_Submission_Type" = lsf ]; then
       echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
-
     elif [ "$HOMME_Submission_Type" = pbs ]; then
-      echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
-
+        echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
     else
       echo "mpiexec -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
 
diff --git a/components/homme/test_execs/CMakeLists.txt b/components/homme/test_execs/CMakeLists.txt
index e2e6b4abda1..5017b92ceea 100644
--- a/components/homme/test_execs/CMakeLists.txt
+++ b/components/homme/test_execs/CMakeLists.txt
@@ -167,6 +167,7 @@ IF(${BUILD_HOMME_PREQX})
 #  ADD_SUBDIRECTORY(baroB)
   ADD_SUBDIRECTORY(baroC)
   ADD_SUBDIRECTORY(baroCam)
+  ADD_SUBDIRECTORY(baroCam-acc)
 ENDIF()
 
 # Add the test exec subdirs for the prim executable
diff --git a/components/homme/test_execs/baroA/CMakeLists.txt b/components/homme/test_execs/baroA/CMakeLists.txt
index cf504ddc027..a378d446ec0 100644
--- a/components/homme/test_execs/baroA/CMakeLists.txt
+++ b/components/homme/test_execs/baroA/CMakeLists.txt
@@ -1,4 +1,8 @@
 
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+  setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
 # Set the variables for this test executable
 #                          NP  NC PLEV USE_PIO  WITH_ENERGY
 createTestExec(baroA preqx  8   4   20   FALSE        TRUE)
diff --git a/components/homme/test_execs/baroB/CMakeLists.txt b/components/homme/test_execs/baroB/CMakeLists.txt
index ef96e91490e..1d4ba9cbd5f 100644
--- a/components/homme/test_execs/baroB/CMakeLists.txt
+++ b/components/homme/test_execs/baroB/CMakeLists.txt
@@ -1,4 +1,8 @@
 
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+  setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
 # Set the variables for this test executable
 #                          NP  NC PLEV USE_PIO WITH_ENERGY
 createTestExec(baroB preqx  8   4   20    TRUE      FALSE)
diff --git a/components/homme/test_execs/baroC/CMakeLists.txt b/components/homme/test_execs/baroC/CMakeLists.txt
index b1500f3abe1..c4118fa1298 100644
--- a/components/homme/test_execs/baroC/CMakeLists.txt
+++ b/components/homme/test_execs/baroC/CMakeLists.txt
@@ -1,4 +1,8 @@
 
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+  setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
 # Set the variables for this test executable
 #                         NP  NC PLEV USE_PIO  WITH_ENERGY
 createTestExec(baroC preqx 4   4   26   FALSE        TRUE)
diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt
new file mode 100644
index 00000000000..f48313201f8
--- /dev/null
+++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Set the qsize_d in dimensions_mod.F90
+SET(QSIZE_D 25)
+SET(USE_OPENACC TRUE)
+SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}")
+SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}")
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+  setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
+# Set the variables for this test executable
+#                           NP  NC PLEV USE_PIO  WITH_ENERGY
+createTestExec(baroCam-acc preqx_acc 4   4   26   TRUE        TRUE)
diff --git a/components/homme/test_execs/baroCam/CMakeLists.txt b/components/homme/test_execs/baroCam/CMakeLists.txt
index 21c7160c5c5..a56151eefb0 100644
--- a/components/homme/test_execs/baroCam/CMakeLists.txt
+++ b/components/homme/test_execs/baroCam/CMakeLists.txt
@@ -1,5 +1,9 @@
 # Set the qsize_d in dimensions_mod.F90
 SET(QSIZE_D 25)
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+  setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
 # Set the variables for this test executable
 #                           NP  NC PLEV USE_PIO  WITH_ENERGY
 createTestExec(baroCam preqx 4   4   26   TRUE        TRUE)