diff --git a/cime/machines-acme/Depends.intel b/cime/machines-acme/Depends.intel
index c213b908ff3..577f7aa320c 100644
--- a/cime/machines-acme/Depends.intel
+++ b/cime/machines-acme/Depends.intel
@@ -1,9 +1,10 @@
#
PERFOBJS=\
-prim_advection_mod.o \
-edge_mod.o \
-derivative_mod.o \
-bndry_mod.o \
+prim_advection_mod_base.o \
+vertremap_mod_base.o \
+edge_mod_base.o \
+derivative_mod_base.o \
+bndry_mod_base.o \
prim_advance_mod.o \
uwshcu.o
diff --git a/cime/machines-acme/Depends.intel14 b/cime/machines-acme/Depends.intel14
index 0c913270b2b..32e4747d7a3 100644
--- a/cime/machines-acme/Depends.intel14
+++ b/cime/machines-acme/Depends.intel14
@@ -6,23 +6,23 @@
#ecosys_mod.o: ecosys_mod.F90
# $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -CU $<
-prim_advection_mod.o: prim_advection_mod.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
-edge_mod.o: edge_mod.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
-derivative_mod.o: derivative_mod.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
+PERFOBJS=\
+prim_advection_mod_base.o \
+vertremap_mod_base.o \
+edge_mod_base.o \
+derivative_mod_base.o \
+bndry_mod_base.o \
+prim_advance_mod.o \
+uwshcu.o \
+wetdep.o
-bndry_mod.o: bndry_mod.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
-
-prim_advance_mod.o: prim_advance_mod.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
-
-uwshcu.o: uwshcu.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
-
-wetdep.o: wetdep.F90
- $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
+ifeq ($(DEBUG),FALSE)
+ $(PERFOBJS): %.o: %.F90
+ $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $<
+ $(REDUCED_OPT_OBJS): %.o: %.F90
+ $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O1 $<
+ $(REDUCED_PRECISION_OBJS): %.o: %.F90
+ $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -fimf-precision=low -fp-model fast $<
+endif
diff --git a/cime/machines-acme/Depends.intelmic b/cime/machines-acme/Depends.intelmic
index 0f84949daa3..6f05aee44a7 100644
--- a/cime/machines-acme/Depends.intelmic
+++ b/cime/machines-acme/Depends.intelmic
@@ -1,6 +1,6 @@
-derivative_mod.o: derivative_mod.F90
- $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $<
+#derivative_mod_base.o: derivative_mod_base.F90
+# $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $<
shr_ncread_mod.o: shr_ncread_mod.F90
$(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $<
diff --git a/cime/machines-acme/Depends.intelmic14 b/cime/machines-acme/Depends.intelmic14
index 0f84949daa3..6f05aee44a7 100644
--- a/cime/machines-acme/Depends.intelmic14
+++ b/cime/machines-acme/Depends.intelmic14
@@ -1,6 +1,6 @@
-derivative_mod.o: derivative_mod.F90
- $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $<
+#derivative_mod_base.o: derivative_mod_base.F90
+# $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $<
shr_ncread_mod.o: shr_ncread_mod.F90
$(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $<
diff --git a/cime/machines-acme/Depends.mira b/cime/machines-acme/Depends.mira
index 85453dd931a..7f9c05b6299 100644
--- a/cime/machines-acme/Depends.mira
+++ b/cime/machines-acme/Depends.mira
@@ -13,13 +13,16 @@ shr_reprosum_mod.o: shr_reprosum_mod.F90
# These routines benefit from -qnostrict without violating the bfb test
PERFOBJS=\
-prim_advection_mod.o \
-edge_mod.o \
-derivative_mod.o \
-bndry_mod.o \
+prim_advection_mod_base.o \
+vertremap_mod_base.o \
+edge_mod_base.o \
+derivative_mod_base.o \
+bndry_mod_base.o \
prim_advance_mod.o \
uwshcu.o \
-wetdep.o
+wetdep.o
+
+
ifeq ($(DEBUG),FALSE)
$(PERFOBJS): %.o: %.F90
$(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qnostrict $<
diff --git a/cime/scripts/Tools/config_definition.xml b/cime/scripts/Tools/config_definition.xml
index 4a5f76e22f2..5dcf37c5532 100644
--- a/cime/scripts/Tools/config_definition.xml
+++ b/cime/scripts/Tools/config_definition.xml
@@ -1363,6 +1363,14 @@
sdesc="CAM dynamical core"
>
+
+
Specify a compiler for the target machine (optional)
default: default compiler for the target machine
+ -camse_target Spedify a target for the cam-se dycore (ignored for other atmospheric dycores)
+ default: preqx
+ options: preqx, preqx-acc
-mpilib Specify a mpi library for the target machine (optional)
default: default mpi library for the target machine
allowed: openmpi, mpich, ibm, mpi-serial, etc
@@ -166,6 +169,7 @@ GetOptions(
"confopts=s" => \$opts{'confopts'},
"project=s" => \$opts{'project'},
"compiler=s" => \$opts{'compiler'},
+ "camse_target=s" => \$opts{'camse_target'},
"mpilib=s" => \$opts{'mpilib'},
"res=s" => \$opts{'res'},
"h|help" => \$opts{'help'},
@@ -211,6 +215,7 @@ my $pecount;
my $xmlmode;
my $project;
my $compiler;
+my $camse_target;
my $mpilib;
if (!$opts{'list'} && !$opts{'testlist'}) {
# Check for manditory case input
@@ -260,6 +265,13 @@ if (!$opts{'list'} && !$opts{'testlist'}) {
$compiler = $opts{'compiler'};
}
+ # Check if machine compiler option is given
+ if ($opts{'camse_target'}) {
+ $camse_target = $opts{'camse_target'};
+ } else {
+ $camse_target = "preqx";
+ }
+
# Check if machine mpilib option is given
$mpilib = 'unset';
if ($opts{'mpilib'}) {
@@ -502,36 +514,51 @@ if ($mach =~ /(.*)_(.*)/){
}
$cfg_ref->set('CCSM_MACHDIR', "$machdir");
+# Handle the camse_target option
+# Do this before handling the compiler so that default compiler can be overwritten
+if ($camse_target eq "preqx") {
+ $cfg_ref->set('CAM_TARGET', "preqx");
+} elsif ($camse_target eq "preqx_acc") {
+ $cfg_ref->set('CAM_TARGET', "preqx_acc");
+} else {
+ die "ERROR: option -camse_target must be preqx (default) or preqx_acc"
+}
+
# Check that compiler request for target machine matches a supported value
# Or set default compiler - if not provided compiler request
my $compilers;
if ($mach =~ /userdefined/){
- $cfg_ref->set('COMPILER', "USERDEFINED_required_build");
+ $cfg_ref->set('COMPILER', "USERDEFINED_required_build");
} else {
- $compilers = $cfg_ref->get('COMPILERS');
- my @compilers = split ",", $compilers, -1;
- if ($compiler) {
- if (! ($mach =~ "generic")){
- my $found = 0;
- foreach my $comp (@compilers) {
- if ($compiler eq $comp) {
- $found = 1;
- }
- }
- if (!$found) {
- my $sysmod = "rm -rf $caseroot";
- system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n";
- die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n";
- }
- }
- $cfg_ref->set('COMPILER', "$compiler");
- if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
- } else {
- $compiler = $compilers[0];
- $cfg_ref->set('COMPILER', "$compiler");
- if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
+ $compilers = $cfg_ref->get('COMPILERS');
+ my @compilers = split ",", $compilers, -1;
+ if ($compiler) {
+ if (! ($mach =~ "generic")){
+ my $found = 0;
+ foreach my $comp (@compilers) {
+ if ($compiler eq $comp) {
+ $found = 1;
+ }
+ }
+ if (!$found) {
+ my $sysmod = "rm -rf $caseroot";
+ system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n";
+ die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n";
+ }
}
+ $cfg_ref->set('COMPILER', "$compiler");
+ if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
+ } else {
+ $compiler = $compilers[0];
+ if ($camse_target eq "preqx_acc") {
+ if ($mach eq "titan") {
+ $compiler = "pgi_acc";
+ }
+ }
+ $cfg_ref->set('COMPILER', "$compiler");
+ if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; }
+ }
}
if ($print>=2) { print "Machine specifier: $mach.$eol"; }
diff --git a/components/cam/bld/cam.buildnml b/components/cam/bld/cam.buildnml
index ee1651cf2d9..42b4660cdaa 100755
--- a/components/cam/bld/cam.buildnml
+++ b/components/cam/bld/cam.buildnml
@@ -20,6 +20,7 @@ my $CAM_CONFIG_OPTS = `./xmlquery CAM_CONFIG_OPTS -value`;
my $CAM_NAMELIST_OPTS = `./xmlquery CAM_NAMELIST_OPTS -value`;
my $CAM_NML_USE_CASE = `./xmlquery CAM_NML_USE_CASE -value`;
my $CAM_DYCORE = `./xmlquery CAM_DYCORE -value`;
+my $CAM_TARGET = `./xmlquery CAM_TARGET -value`;
my $CASEBUILD = `./xmlquery CASEBUILD -value`;
my $CCSM_CO2_PPMV = `./xmlquery CCSM_CO2_PPMV -value`;
my $CCSMROOT = `./xmlquery CCSMROOT -value`;
@@ -84,7 +85,7 @@ if ($BUILD_COMPLETE eq 'FALSE') {
$ATM_GRID = $1;
$nlev="-nlev $3";
}
- $sysmod = "$CCSMROOT/components/cam/bld/configure -s -ccsm_seq -ice none -ocn $ocn -comp_intf $comp $scm -spmd $spmd -smp $smp -dyn $CAM_DYCORE -res $ATM_GRID $nlev $CAM_LIB_DIRS $CAM_CONFIG_OPTS";
+ $sysmod = "$CCSMROOT/components/cam/bld/configure -s -ccsm_seq -ice none -ocn $ocn -comp_intf $comp $scm -spmd $spmd -smp $smp -dyn $CAM_DYCORE -dyn_target $CAM_TARGET -res $ATM_GRID $nlev $CAM_LIB_DIRS $CAM_CONFIG_OPTS";
system($sysmod) == 0 or die "ERROR cam.buildnml: $sysmod failed: $?\n";
} else {
diff --git a/components/cam/bld/configure b/components/cam/bld/configure
index db01087394a..457564857d0 100755
--- a/components/cam/bld/configure
+++ b/components/cam/bld/configure
@@ -324,6 +324,7 @@ GetOptions(
"rain_evap_to_coarse_aero" => \$opts{'rain_evap_to_coarse_aero'},
"defaults=s" => \$opts{'defaults'},
"dyn=s" => \$opts{'dyn'},
+ "dyn_target=s" => \$opts{'dyn_target'},
"edit_chem_mech" => \$opts{'edit_chem_mech'},
"waccm_phys" => \$opts{'waccm_phys'},
"offline_dyn" => \$opts{'offline_dyn'},
@@ -2764,8 +2765,13 @@ sub write_filepath
print $fh "$camsrcdir/cam/src/dynamics/$dyn\n";
if($dyn eq 'se') {
- print $fh "$camsrcdir/homme/src/share\n";
- print $fh "$camsrcdir/homme/src/share/openacc\n";
+ print $fh "$camsrcdir/homme/src/share\n";
+ if ($opts{'dyn_target'} eq 'preqx') {
+ print $fh "$camsrcdir/homme/src/preqx\n";
+ }
+ elsif($opts{'dyn_target'} eq 'preqx_acc') {
+ print $fh "$camsrcdir/homme/src/preqx_acc\n";
+ }
}
# Parallelization utilies
diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt
index d3458a68476..0a39c868c57 100644
--- a/components/homme/CMakeLists.txt
+++ b/components/homme/CMakeLists.txt
@@ -56,6 +56,7 @@ ENDIF ()
# Some options for conditional builds
OPTION(BUILD_HOMME_SWEQX "Shallow water equations FEM" ON)
OPTION(BUILD_HOMME_PREQX "Primitive equations FEM" ON)
+OPTION(BUILD_HOMME_PREQX_ACC "Primitive equations FEM with OpenACC" ON)
OPTION(BUILD_HOMME_SWDGX "Shallow water equations DG" OFF)
OPTION(BUILD_HOMME_PRIMDGX "Primitive equations DG" OFF)
OPTION(BUILD_HOMME_SWIM "Shallow water equations implicit" OFF)
@@ -175,6 +176,10 @@ IF(${BUILD_HOMME_PREQX})
ADD_SUBDIRECTORY(src/preqx)
ENDIF()
+IF(${BUILD_HOMME_PREQX_ACC})
+ ADD_SUBDIRECTORY(src/preqx_acc)
+ENDIF()
+
IF(${BUILD_HOMME_SWDGX})
ADD_SUBDIRECTORY(src/swdgx)
ENDIF()
diff --git a/components/homme/cmake/SetCompilerFlags.cmake b/components/homme/cmake/SetCompilerFlags.cmake
index b83bd3fecb7..1d42d29c2cf 100644
--- a/components/homme/cmake/SetCompilerFlags.cmake
+++ b/components/homme/cmake/SetCompilerFlags.cmake
@@ -222,68 +222,7 @@ ENDIF ()
##############################################################################
OPTION(ENABLE_OPENACC "Whether to build with OpenACC support" FALSE)
IF (${ENABLE_OPENACC})
- IF (${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI)
- # Need to add -acc to the Fortran FLAGS to see if it will compile
- # "call acc_init()"
- SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -acc")
- TRY_COMPILE(OPENACC_SUCCESS
- ${CMAKE_BINARY_DIR}/tests/compilerTests/
- ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/openAccTest.f90
- OUTPUT_VARIABLE COMPILE_OUTPUT)
- IF (${OPENACC_SUCCESS})
- MESSAGE(STATUS "Using OpenACC through PGI compiler")
- SET(PREQX_USE_OPENACC TRUE)
- ELSE()
- MESSAGE(FATAL_ERROR "Unable to use OpenACC through the PGI compiler")
- ENDIF()
- ELSEIF (${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray)
- SET(PREQX_USE_OPENACC TRUE)
- MESSAGE(STATUS "Using OpenACC through Cray compiler")
- ELSE ()
- MESSAGE(FATAL_ERROR "OpenACC only supported through the PGI and Cray compilers")
- ENDIF()
-ENDIF ()
-
-##############################################################################
-# CUDA Fortran specific flags - only supporting PGI compiler
-##############################################################################
-OPTION(ENABLE_CUDA_FORTRAN "Whether to build with CUDA Fortran support" FALSE)
-IF (${ENABLE_CUDA_FORTRAN})
- IF (NOT ${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI)
- MESSAGE(FATAL_ERROR "CUDA Fortran only supported through the PGI compiler")
- ELSE ()
- # Set PGI CUDA Fortran flags
-
- # Set defaults as lowest version of CUDA and device capability allowed
- # To do: determine a way to generalize this
- IF (NOT CUDA_VERSION)
- SET(CUDA_VERSION "4.1")
- ENDIF ()
-
- # Compute capability: cc2x is for devices with compute capability >= 2.0
- IF (NOT CUDA_DEVICE_CAPABILITY)
- SET(CUDA_DEVICE_CAPABILITY "cc2x")
- ENDIF ()
-
- SET(CMAKE_Fortran_FLAGS
- "${CMAKE_Fortran_FLAGS} -ta=nvidia -Mcuda=${CUDA_VERSION},${CUDA_DEVICE_CAPABILITY},ptxinfo,keepgpu")
-
- MESSAGE(STATUS "Testing PGI CUDA Fortran Compilation with flags: ${CMAKE_Fortran_FLAGS}")
-
- TRY_COMPILE(CUDAFOR
- ${CMAKE_BINARY_DIR}/tests/compilerTests/
- ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/cudaFortranTest.f90
- OUTPUT_VARIABLE COMPILE_OUTPUT)
- IF (${CUDAFOR})
- SET(PREQX_USE_CUDA_FORTRAN TRUE)
- MESSAGE(STATUS "Succeeded. Using CUDA Fortran through PGI compiler")
- ELSE()
- SET(PREQX_USE_CUDA_FORTRAN FALSE)
- MESSAGE(FATAL_ERROR "Unable to use CUDA Fortran through the PGI "
- "compiler. Compilation failed with the following "
- "output.\n${COMPILE_OUTPUT}")
- ENDIF()
- ENDIF()
+ SET(PREQX_USE_OPENACC TRUE)
ENDIF ()
##############################################################################
diff --git a/components/homme/cmake/machineFiles/titan.cmake b/components/homme/cmake/machineFiles/titan.cmake
index 2a73ce474f6..aa94a807240 100644
--- a/components/homme/cmake/machineFiles/titan.cmake
+++ b/components/homme/cmake/machineFiles/titan.cmake
@@ -5,7 +5,16 @@ SET (CMAKE_CXX_COMPILER CC CACHE FILEPATH "")
SET (NETCDF_DIR $ENV{NETCDF_DIR} CACHE FILEPATH "")
SET (PNETCDF_DIR $ENV{PARALLEL_NETCDF_DIR} CACHE FILEPATH "")
SET (HDF5_DIR $ENV{HDF5_DIR} CACHE FILEPATH "")
+SET (DEBUG_FLAGS " " CACHE STRING "")
+SET (Netcdf_NC_CONFIG_BIN "/opt/cray/netcdf/4.3.3.1/bin" CACHE FILEPATH "")
+SET (HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "")
+SET (USE_MPIEXEC "aprun" CACHE STRING "")
# The following is required for cross compilation
SET (CMAKE_SYSTEM_NAME Catamount CACHE FILEPATH "")
+
+#Regression test parameters
SET (USE_QUEUING FALSE CACHE BOOL "")
+SET (USE_NUM_PROCS 64 CACHE STRING "")
+
+
diff --git a/components/homme/compile_scripts/thatchroof/compile.cpu b/components/homme/compile_scripts/thatchroof/compile.cpu
old mode 100644
new mode 100755
diff --git a/components/homme/compile_scripts/thatchroof/compile.openacc b/components/homme/compile_scripts/thatchroof/compile.openacc
old mode 100644
new mode 100755
index c7b896db37e..b0f055ea508
--- a/components/homme/compile_scripts/thatchroof/compile.openacc
+++ b/components/homme/compile_scripts/thatchroof/compile.openacc
@@ -20,7 +20,7 @@ cmake \
-DCMAKE_Fortran_COMPILER=mpif90 \
-DCMAKE_C_COMPILER=mpicc \
-DCMAKE_CXX_COMPILER=mpiCC \
- -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \
+ -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \
-DDEBUG_FLAGS=" " \
-DNETCDF_DIR=$NETCDF_DIR \
-DWITH_PNETCDF=FALSE \
@@ -30,6 +30,8 @@ cmake \
-DBUILD_HOMME_SWDGX=FALSE \
-DBUILD_HOMME_SWEQX=FALSE \
-DBUILD_HOMME_PRIMDGX=FALSE \
+ -DBUILD_HOMME_PREQX=FALSE \
+ -DBUILD_HOMME_PREQX_ACC=TRUE \
-DENABLE_OPENMP=TRUE \
-DHOMME_PROJID=STF006 \
-DENABLE_OPENACC=TRUE \
@@ -39,11 +41,11 @@ cmake \
-DZLIB_DIR=$ZLIB_PATH \
-DSZIP_DIR=$SZIP_PATH \
-DCURL_DIR=/opt/curl-7.45_pgi15.9 \
- -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.0,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl' \
+ -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.5,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl' \
\
\
\
- -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts//thatchroof/customFlags.cmake.openacc \
+ -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/thatchroof/customFlags.cmake.openacc \
$HOMME_ROOT
fi
# -DOPT_FLAGS="-fastsse -Mvect" \
@@ -54,7 +56,7 @@ make clean || exit -1
fi
if [ $do_make -eq 1 ]; then
-make -j16 preqx || exit -1
+make -j16 preqx_acc || exit -1
mkdir -p $HOMME_ROOT/build/preqx
-cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc || exit -1
+cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1
fi
diff --git a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc
index 12506a81cb6..248d7c27be8 100644
--- a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc
+++ b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc
@@ -1,13 +1,6 @@
SET(CUSTOM_FLAG_FILES
${HOMME_SOURCE_DIR}/physics/column_model_mod.F90
${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
-
)
SET_SOURCE_FILES_PROPERTIES(
@@ -17,14 +10,4 @@ SET_SOURCE_FILES_PROPERTIES(
" -Mpreprocess -Mextend -Mflushz -O0 -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin"
)
-SET_SOURCE_FILES_PROPERTIES(
- ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
- PROPERTIES GENERATED TRUE COMPILE_FLAGS
- " -Mpreprocess -Mextend -Mflushz -fast -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape"
-)
diff --git a/components/homme/compile_scripts/titan/compile.cpu b/components/homme/compile_scripts/titan/compile.cpu
index 1a21f327622..a88be66966f 100755
--- a/components/homme/compile_scripts/titan/compile.cpu
+++ b/components/homme/compile_scripts/titan/compile.cpu
@@ -5,52 +5,42 @@ do_clean=1
do_make=1
HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
-NTRACERS=50
-NLEVELS=64
+BUILD_ROOT=/lustre/atlas/scratch/imn/stf006/homme_tests
+NTRACERS=4
+NLEVELS=26
source ./env_mach_specific.cpu
-mkdir -p titan-cpu || exit -1
-cd titan-cpu
+mkdir -p $BUILD_ROOT/titan-cpu || exit -1
+cd $BUILD_ROOT/titan-cpu
if [ $do_cmake -eq 1 ]; then
-sed -i "s/ integer, parameter :: qsize_d=.*/ integer, parameter :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1
rm -rf CMakeFiles CMakeCache.txt
cmake \
-C $HOMME_ROOT/cmake/machineFiles/titan.cmake \
- -DCMAKE_Fortran_COMPILER=ftn \
- -DCMAKE_C_COMPILER=cc \
- -DCMAKE_CXX_COMPILER=CC \
-DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \
- -DDEBUG_FLAGS=" " \
- -DNETCDF_DIR=$NETCDF_DIR \
- -DWITH_PNETCDF=FALSE \
- -DHDF5_DIR=$HDF5_DIR \
+ -DWITH_PNETCDF=TRUE \
-DPREQX_NP=4 \
-DPREQX_PLEV=$NLEVELS \
- -DBUILD_HOMME_SWDGX=FALSE \
- -DBUILD_HOMME_SWEQX=FALSE \
- -DBUILD_HOMME_PRIMDGX=FALSE \
+ -DPREQX_QSIZE_D=$TRACERS \
+ -DBUILD_HOMME_SWEQX=TRUE \
+ -DBUILD_HOMME_PREQX=TRUE \
+ -DBUILD_HOMME_PREQX_ACC=TRUE \
-DENABLE_OPENMP=TRUE \
-DHOMME_PROJID=STF006 \
- -DENABLE_OPENACC=FALSE \
- -DENABLE_CUDA_FORTRAN=FALSE \
- -DHOMME_FIND_BLASLAPACK=TRUE \
- -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \
- \
- \
- \
+ -DOPENACC_Fortran_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Minfo=accel" \
+ -DOPENACC_Linker_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Mcuda" \
-DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \
$HOMME_ROOT
fi
-# -DOPT_FLAGS="-fastsse -Mvect" \
if [ $do_clean -eq 1 ]; then
make clean || exit -1
fi
if [ $do_make -eq 1 ]; then
-make -j16 preqx || exit -1
+make -j32 || exit -1
mkdir -p $HOMME_ROOT/build/preqx
cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.cpu || exit -1
+#cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1
fi
diff --git a/components/homme/compile_scripts/titan/compile.openacc b/components/homme/compile_scripts/titan/compile.openacc
index 2e09b6529e5..ac2d06f16e0 100755
--- a/components/homme/compile_scripts/titan/compile.openacc
+++ b/components/homme/compile_scripts/titan/compile.openacc
@@ -1,7 +1,7 @@
#!/bin/bash
-do_cmake=0
-do_clean=0
+do_cmake=1
+do_clean=1
do_make=1
HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
@@ -14,7 +14,6 @@ mkdir -p titan-openacc || exit -1
cd titan-openacc
if [ $do_cmake -eq 1 ]; then
-sed -i "s/ integer, parameter :: qsize_d=.*/ integer, parameter :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1
rm -rf CMakeFiles CMakeCache.txt
cmake \
-C $HOMME_ROOT/cmake/machineFiles/titan.cmake \
@@ -27,10 +26,13 @@ cmake
-DWITH_PNETCDF=FALSE \
-DHDF5_DIR=$HDF5_DIR \
-DPREQX_NP=4 \
+ -DPREQX_QSIZE_D=$NTRACERS \
-DPREQX_PLEV=$NLEVELS \
-DBUILD_HOMME_SWDGX=FALSE \
-DBUILD_HOMME_SWEQX=FALSE \
-DBUILD_HOMME_PRIMDGX=FALSE \
+ -DBUILD_HOMME_PREQX_ACC=TRUE \
+ -DBUILD_HOMME_PREQX=FALSE \
-DENABLE_OPENMP=TRUE \
-DHOMME_PROJID=STF006 \
-DENABLE_OPENACC=TRUE \
@@ -49,7 +51,7 @@ make clean || exit -1
fi
if [ $do_make -eq 1 ]; then
-make -j16 preqx || exit -1
+make -j16 preqx_acc || exit -1
mkdir -p $HOMME_ROOT/build/preqx
-cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc || exit -1
+cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1
fi
diff --git a/components/homme/compile_scripts/titan/compile.openacc.oncpu b/components/homme/compile_scripts/titan/compile.openacc.oncpu
new file mode 100755
index 00000000000..a6e3f9d381f
--- /dev/null
+++ b/components/homme/compile_scripts/titan/compile.openacc.oncpu
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+do_cmake=1
+do_clean=1
+do_make=1
+
+HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
+NTRACERS=50
+NLEVELS=64
+
+source ./env_mach_specific.cpu
+
+mkdir -p titan-openacc || exit -1
+cd titan-openacc
+
+if [ $do_cmake -eq 1 ]; then
+rm -rf CMakeFiles CMakeCache.txt
+cmake \
+ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \
+ -DCMAKE_Fortran_COMPILER=ftn \
+ -DCMAKE_C_COMPILER=cc \
+ -DCMAKE_CXX_COMPILER=CC \
+ -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \
+ -DDEBUG_FLAGS=" " \
+ -DNETCDF_DIR=$NETCDF_DIR \
+ -DWITH_PNETCDF=FALSE \
+ -DHDF5_DIR=$HDF5_DIR \
+ -DPREQX_NP=4 \
+ -DPREQX_QSIZE_D=$NTRACERS \
+ -DPREQX_PLEV=$NLEVELS \
+ -DBUILD_HOMME_SWDGX=FALSE \
+ -DBUILD_HOMME_SWEQX=FALSE \
+ -DBUILD_HOMME_PRIMDGX=FALSE \
+ -DBUILD_HOMME_PREQX_ACC=TRUE \
+ -DBUILD_HOMME_PREQX=FALSE \
+ -DENABLE_OPENMP=TRUE \
+ -DHOMME_PROJID=STF006 \
+ -DENABLE_OPENACC=TRUE \
+ -DENABLE_CUDA_FORTRAN=FALSE \
+ -DHOMME_FIND_BLASLAPACK=TRUE \
+ -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \
+ -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \
+ $HOMME_ROOT
+fi
+
+if [ $do_clean -eq 1 ]; then
+make clean || exit -1
+fi
+
+if [ $do_make -eq 1 ]; then
+make -j preqx_acc || exit -1
+mkdir -p $HOMME_ROOT/build/preqx
+cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1
+fi
diff --git a/components/homme/compile_scripts/titan/compile.sweqx b/components/homme/compile_scripts/titan/compile.sweqx
new file mode 100755
index 00000000000..6cfca993f55
--- /dev/null
+++ b/components/homme/compile_scripts/titan/compile.sweqx
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+do_cmake=1
+do_clean=1
+do_make=1
+
+HOMME_ROOT=/ccs/home/$USER/ACME/components/homme
+NLEVELS=64
+
+source ./env_mach_specific.cpu
+
+mkdir -p titan-cpu || exit -1
+cd titan-cpu
+
+if [ $do_cmake -eq 1 ]; then
+rm -rf CMakeFiles CMakeCache.txt
+cmake \
+ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \
+ -DCMAKE_Fortran_COMPILER=ftn \
+ -DCMAKE_C_COMPILER=cc \
+ -DCMAKE_CXX_COMPILER=CC \
+ -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \
+ -DDEBUG_FLAGS=" " \
+ -DNETCDF_DIR=$NETCDF_DIR \
+ -DWITH_PNETCDF=FALSE \
+ -DHDF5_DIR=$HDF5_DIR \
+ -DSWEQX_NP=4 \
+ -DSWEQX_PLEV=$NLEVELS \
+ -DBUILD_HOMME_SWDGX=FALSE \
+ -DBUILD_HOMME_SWEQX=TRUE \
+ -DBUILD_HOMME_PRIMDGX=FALSE \
+ -DBUILD_HOMME_PREQX=FALSE \
+ -DENABLE_OPENMP=TRUE \
+ -DHOMME_PROJID=STF006 \
+ -DENABLE_OPENACC=FALSE \
+ -DENABLE_CUDA_FORTRAN=FALSE \
+ -DHOMME_FIND_BLASLAPACK=TRUE \
+ -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \
+ $HOMME_ROOT
+fi
+# -DOPT_FLAGS="-fastsse -Mvect" \
+
+if [ $do_clean -eq 1 ]; then
+make clean || exit -1
+fi
+
+if [ $do_make -eq 1 ]; then
+make -j16 sweqx || exit -1
+mkdir -p $HOMME_ROOT/build/sweqx
+cp ./src/sweqx/sweqx $HOMME_ROOT/build/sweqx/sweqx.cpu || exit -1
+fi
diff --git a/components/homme/compile_scripts/titan/customFlags.cmake.openacc b/components/homme/compile_scripts/titan/customFlags.cmake.openacc
index 5bb9c6a1676..9d2729e62e0 100644
--- a/components/homme/compile_scripts/titan/customFlags.cmake.openacc
+++ b/components/homme/compile_scripts/titan/customFlags.cmake.openacc
@@ -1,12 +1,6 @@
SET(CUSTOM_FLAG_FILES
${HOMME_SOURCE_DIR}/physics/column_model_mod.F90
${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
)
@@ -16,15 +10,3 @@ SET_SOURCE_FILES_PROPERTIES(
PROPERTIES GENERATED TRUE COMPILE_FLAGS
" -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O0 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin"
)
-
-SET_SOURCE_FILES_PROPERTIES(
- ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90
- ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90
- PROPERTIES GENERATED TRUE COMPILE_FLAGS
- " -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O2 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape"
-)
-
diff --git a/components/homme/compile_scripts/titan/env_mach_specific.cpu b/components/homme/compile_scripts/titan/env_mach_specific.cpu
index e3d54196c33..41aee75a4e7 100644
--- a/components/homme/compile_scripts/titan/env_mach_specific.cpu
+++ b/components/homme/compile_scripts/titan/env_mach_specific.cpu
@@ -32,12 +32,13 @@
module rm cray-parallel-netcdf
module load PrgEnv-pgi
# module switch pgi pgi/14.2.0
- module switch pgi pgi/15.10.lustre
+ module switch pgi pgi/15.10.0
module switch cray-mpich cray-mpich/7.2.2
module switch cray-libsci cray-libsci/13.0.4
module load esmf/5.2.0rp2
module switch atp atp/1.7.5
-
+ module add cudatoolkit
+ export CRAY_CUDA_MPS=1
export CRAY_CPU_TARGET=istanbul
module load cray-netcdf-hdf5parallel/4.3.3.1
diff --git a/components/homme/compile_scripts/titan/env_mach_specific.openacc b/components/homme/compile_scripts/titan/env_mach_specific.openacc
index 966acd15154..f0374679a7f 100644
--- a/components/homme/compile_scripts/titan/env_mach_specific.openacc
+++ b/components/homme/compile_scripts/titan/env_mach_specific.openacc
@@ -32,7 +32,7 @@
module rm cray-parallel-netcdf
module load PrgEnv-pgi
# module switch pgi pgi/14.2.0
- module switch pgi pgi/15.9.lustre
+ module switch pgi pgi/15.9.home
module switch cray-mpich cray-mpich/7.2.2
module switch cray-libsci cray-libsci/13.0.4
module load esmf/5.2.0rp2
diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt
index c28852c3005..710b9c0a964 100644
--- a/components/homme/src/preqx/CMakeLists.txt
+++ b/components/homme/src/preqx/CMakeLists.txt
@@ -16,13 +16,20 @@ SET (PREQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS}
${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx")
SET(PREQX_SRCS_F90
- ${SRC_SHARE_DIR}/openacc/bndry_openacc_mod.F90
- ${SRC_SHARE_DIR}/openacc/derivative_openacc_mod.F90
- ${SRC_SHARE_DIR}/openacc/edge_openacc_mod.F90
- ${SRC_SHARE_DIR}/openacc/openacc_utils_mod.F90
- ${SRC_SHARE_DIR}/openacc/prim_advection_openacc_mod.F90
- ${SRC_SHARE_DIR}/openacc/viscosity_openacc_mod.F90
- ${SRC_SHARE_DIR}/arch_switch_mod.F90
+ ${SRC_SHARE_DIR}/bndry_mod_base.F90
+ ${SRC_SHARE_DIR}/derivative_mod_base.F90
+ ${SRC_SHARE_DIR}/edge_mod_base.F90
+ ${SRC_SHARE_DIR}/prim_advection_mod_base.F90
+ ${SRC_SHARE_DIR}/solver_init_mod_base.F90
+ ${SRC_SHARE_DIR}/vertremap_mod_base.F90
+ ${SRC_SHARE_DIR}/viscosity_mod_base.F90
+ ${SRC_DIR}/preqx/bndry_mod.F90
+ ${SRC_DIR}/preqx/derivative_mod.F90
+ ${SRC_DIR}/preqx/edge_mod.F90
+ ${SRC_DIR}/preqx/prim_advection_mod.F90
+ ${SRC_DIR}/preqx/solver_init_mod.F90
+ ${SRC_DIR}/preqx/vertremap_mod.F90
+ ${SRC_DIR}/preqx/viscosity_mod.F90
${SRC_SHARE_DIR}/kinds.F90
${SRC_SHARE_DIR}/dimensions_mod.F90
${SRC_SHARE_DIR}/control_mod.F90
@@ -30,7 +37,6 @@ SET(PREQX_SRCS_F90
${SRC_SHARE_DIR}/coordinate_systems_mod.F90
${SRC_SHARE_DIR}/params_mod.F90
${SRC_SHARE_DIR}/gridgraph_mod.F90
- ${SRC_SHARE_DIR}/edge_mod.F90
${SRC_SHARE_DIR}/element_mod.F90
${SRC_SHARE_DIR}/parallel_mod.F90
${SRC_SHARE_DIR}/hybrid_mod.F90
@@ -43,9 +49,6 @@ SET(PREQX_SRCS_F90
${SRC_SHARE_DIR}/schedtype_mod.F90
${SRC_SHARE_DIR}/edgetype_mod.F90
${SRC_SHARE_DIR}/thread_mod.F90
- ${SRC_SHARE_DIR}/bndry_mod.F90
- ${SRC_SHARE_DIR}/derivative_mod.F90
- ${SRC_SHARE_DIR}/viscosity_mod.F90
${SRC_SHARE_DIR}/unit_tests_mod.F90
${SRC_SHARE_DIR}/spacecurve_mod.F90
${SRC_SHARE_DIR}/cube_mod.F90
@@ -93,7 +96,6 @@ SET(PREQX_SRCS_F90
${SRC_DIR}/prim_restart_mod.F90
${SRC_SHARE_DIR}/prim_si_ref_mod.F90
${SRC_SHARE_DIR}/prim_advance_mod.F90
- ${SRC_SHARE_DIR}/prim_advection_mod.F90
${SRC_DIR}/forcing_mod.F90
${SRC_SHARE_DIR}/metis_mod.F90
${SRC_SHARE_DIR}/prim_driver_mod.F90
@@ -111,7 +113,8 @@ SET(PREQX_SRCS_F90
${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90
${PHYSICS_DIR}/column_model_mod.F90
${PHYSICS_DIR}/column_types_mod.F90
-)
+ CACHE INTERNAL "")
+
SET(PREQX_SRCS_C
${UTILS_SHARE_DIR}/shr_vmath_fwrap.c
@@ -153,15 +156,7 @@ MESSAGE(STATUS " PLEV = ${PREQX_PLEV}")
MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}")
MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}")
-IF (${PREQX_USE_CUDA_FORTRAN})
- SET(USE_CUDA_FORTRAN TRUE)
- MESSAGE(STATUS " Using CUDA Fortran")
-ENDIF ()
-
-IF (${PREQX_USE_OPENACC})
- SET(USE_OPENACC TRUE)
- MESSAGE(STATUS " Using OpenACC")
-ENDIF ()
+SET(USE_OPENACC FALSE)
IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP})
SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading")
@@ -174,10 +169,4 @@ ENDIF ()
createTestExec(preqx preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV}
${PREQX_USE_PIO} ${PREQX_USE_ENERGY})
-IF (${BUILD_PREQX_SPELT})
- MESSAGE(STATUS " Also building preqx_spelt")
- createTestExec(preqx_spelt preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV}
- ${PREQX_USE_PIO} ${PREQX_USE_ENERGY})
- SET_TARGET_PROPERTIES(preqx_spelt PROPERTIES COMPILE_DEFINITIONS "_SPELT")
-ENDIF ()
diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90
new file mode 100644
index 00000000000..5ad1125cbef
--- /dev/null
+++ b/components/homme/src/preqx/bndry_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module bndry_mod
+ use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
+ implicit none
+end module bndry_mod
diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90
new file mode 100644
index 00000000000..e9c3f81be3f
--- /dev/null
+++ b/components/homme/src/preqx/derivative_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module derivative_mod
+ use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, &
+ derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+ remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, &
+ curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+ laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+ implicit none
+end module derivative_mod
diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90
new file mode 100644
index 00000000000..902503b36cb
--- /dev/null
+++ b/components/homme/src/preqx/edge_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module edge_mod
+ use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, &
+ edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+ ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, &
+ ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, &
+ ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+ implicit none
+end module edge_mod
diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90
new file mode 100644
index 00000000000..a3e235c41aa
--- /dev/null
+++ b/components/homme/src/preqx/prim_advection_mod.F90
@@ -0,0 +1,9 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module prim_advection_mod
+ use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap, Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, &
+ prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2
+ implicit none
+end module prim_advection_mod
diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90
new file mode 100644
index 00000000000..90efb25eadf
--- /dev/null
+++ b/components/homme/src/preqx/solver_init_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod
+ use solver_init_mod_base, only: solver_init2
+ implicit none
+end module solver_init_mod
diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90
new file mode 100644
index 00000000000..63617bf66e6
--- /dev/null
+++ b/components/homme/src/preqx/vertremap_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module vertremap_mod
+ use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm
+ implicit none
+end module vertremap_mod
diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90
new file mode 100644
index 00000000000..f78ae69a8f3
--- /dev/null
+++ b/components/homme/src/preqx/viscosity_mod.F90
@@ -0,0 +1,11 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module viscosity_mod
+ use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax
+#ifdef _PRIM
+ use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+ implicit none
+end module viscosity_mod
diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt
new file mode 100644
index 00000000000..83e28c4d87b
--- /dev/null
+++ b/components/homme/src/preqx_acc/CMakeLists.txt
@@ -0,0 +1,174 @@
+# Relevant Directories within the HOMME repository
+# holding source files needed for building the library
+# and executable preqx.
+
+
+
+SET(UTILS_SHARE_DIR ${HOMME_SOURCE_DIR}/utils/csm_share)
+SET(SRC_DIR ${HOMME_SOURCE_DIR}/src)
+SET(SRC_SHARE_DIR ${HOMME_SOURCE_DIR}/src/share)
+SET(PHYSICS_DIR ${HOMME_SOURCE_DIR}/physics)
+SET(UTILS_TIMING_DIR ${CMAKE_BINARY_DIR}/utils/timing)
+
+# Make INCLUDE_DIRS global so the tests can access it
+SET (PREQX_ACC_INCLUDE_DIRS ${PIO_INCLUDE_DIRS}
+ ${UTILS_TIMING_DIR}
+ ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx")
+
+SET(PREQX_SRCS_F90
+ ${SRC_SHARE_DIR}/bndry_mod_base.F90
+ ${SRC_SHARE_DIR}/derivative_mod_base.F90
+ ${SRC_SHARE_DIR}/edge_mod_base.F90
+ ${SRC_SHARE_DIR}/prim_advection_mod_base.F90
+ ${SRC_SHARE_DIR}/solver_init_mod_base.F90
+ ${SRC_SHARE_DIR}/vertremap_mod_base.F90
+ ${SRC_SHARE_DIR}/viscosity_mod_base.F90
+ ${SRC_DIR}/preqx_acc/bndry_mod.F90
+ ${SRC_DIR}/preqx_acc/derivative_mod.F90
+ ${SRC_DIR}/preqx_acc/edge_mod.F90
+ ${SRC_DIR}/preqx_acc/openacc_utils_mod.F90
+ ${SRC_DIR}/preqx_acc/prim_advection_mod.F90
+ ${SRC_DIR}/preqx_acc/solver_init_mod.F90
+ ${SRC_DIR}/preqx_acc/vertremap_mod.F90
+ ${SRC_DIR}/preqx_acc/viscosity_mod.F90
+ ${SRC_SHARE_DIR}/kinds.F90
+ ${SRC_SHARE_DIR}/dimensions_mod.F90
+ ${SRC_SHARE_DIR}/control_mod.F90
+ ${SRC_SHARE_DIR}/physical_constants.F90
+ ${SRC_SHARE_DIR}/coordinate_systems_mod.F90
+ ${SRC_SHARE_DIR}/params_mod.F90
+ ${SRC_SHARE_DIR}/gridgraph_mod.F90
+ ${SRC_SHARE_DIR}/element_mod.F90
+ ${SRC_SHARE_DIR}/parallel_mod.F90
+ ${SRC_SHARE_DIR}/hybrid_mod.F90
+ ${SRC_SHARE_DIR}/reduction_mod.F90
+ ${SRC_SHARE_DIR}/quadrature_mod.F90
+ ${SRC_SHARE_DIR}/global_norms_mod.F90
+ ${SRC_SHARE_DIR}/ll_mod.F90
+ ${SRC_SHARE_DIR}/metagraph_mod.F90
+ ${SRC_SHARE_DIR}/schedule_mod.F90
+ ${SRC_SHARE_DIR}/schedtype_mod.F90
+ ${SRC_SHARE_DIR}/edgetype_mod.F90
+ ${SRC_SHARE_DIR}/thread_mod.F90
+ ${SRC_SHARE_DIR}/unit_tests_mod.F90
+ ${SRC_SHARE_DIR}/spacecurve_mod.F90
+ ${SRC_SHARE_DIR}/cube_mod.F90
+ ${SRC_SHARE_DIR}/interpolate_mod.F90
+ ${SRC_SHARE_DIR}/fvm_bsp_mod.F90
+ ${SRC_SHARE_DIR}/fvm_transformation_mod.F90
+ ${SRC_SHARE_DIR}/dof_mod.F90
+ ${SRC_DIR}/common_io_mod.F90
+ ${SRC_SHARE_DIR}/spelt_mod.F90
+ ${SRC_SHARE_DIR}/fvm_mod.F90
+ ${SRC_SHARE_DIR}/fvm_control_volume_mod.F90
+ ${SRC_SHARE_DIR}/fvm_line_integrals_mod.F90
+ ${SRC_SHARE_DIR}/fvm_line_integrals_flux_mod.F90
+ ${SRC_SHARE_DIR}/fvm_analytic_mod.F90
+ ${SRC_SHARE_DIR}/fvm_filter_mod.F90
+ ${SRC_SHARE_DIR}/fvm_reconstruction_mod.F90
+ ${SRC_DIR}/pio_io_mod.F90
+ ${SRC_DIR}/repro_sum_mod.F90
+ ${SRC_SHARE_DIR}/time_mod.F90
+ ${SRC_SHARE_DIR}/physics_mod.F90
+ ${SRC_SHARE_DIR}/hybvcoord_mod.F90
+ ${SRC_DIR}/interp_movie_mod.F90
+ ${UTILS_SHARE_DIR}/shr_kind_mod.F90
+ ${UTILS_SHARE_DIR}/shr_mpi_mod.F90
+ ${UTILS_SHARE_DIR}/shr_sys_mod.F90
+ ${UTILS_SHARE_DIR}/shr_file_mod.F90
+ ${UTILS_SHARE_DIR}/shr_vmath_mod.F90
+ ${UTILS_SHARE_DIR}/shr_const_mod.F90
+ ${UTILS_SHARE_DIR}/shr_spfn_mod.F90
+ ${SRC_SHARE_DIR}/domain_mod.F90
+ ${SRC_DIR}/restart_io_mod.F90
+ ${SRC_SHARE_DIR}/cg_mod.F90
+ ${SRC_SHARE_DIR}/linear_algebra_mod.F90
+ ${SRC_SHARE_DIR}/solver_mod.F90
+ ${SRC_SHARE_DIR}/filter_mod.F90
+ ${SRC_DIR}/common_movie_mod.F90
+ ${SRC_SHARE_DIR}/namelist_mod.F90
+ ${SRC_SHARE_DIR}/mass_matrix_mod.F90
+ ${SRC_DIR}/checksum_mod.F90
+ ${SRC_DIR}/baroclinic_inst_mod.F90
+ ${SRC_SHARE_DIR}/diffusion_mod.F90
+ ${SRC_SHARE_DIR}/prim_si_mod.F90
+ ${SRC_DIR}/asp_tests.F90
+ ${SRC_SHARE_DIR}/prim_state_mod.F90
+ ${SRC_DIR}/prim_restart_mod.F90
+ ${SRC_SHARE_DIR}/prim_si_ref_mod.F90
+ ${SRC_SHARE_DIR}/prim_advance_mod.F90
+ ${SRC_DIR}/forcing_mod.F90
+ ${SRC_SHARE_DIR}/metis_mod.F90
+ ${SRC_SHARE_DIR}/prim_driver_mod.F90
+ ${SRC_DIR}/vertical_mod.F90
+ ${SRC_DIR}/prim_movie_mod.F90
+ ${SRC_DIR}/surfaces_mod.F90
+ ${SRC_DIR}/interpolate_driver_mod.F90
+ ${SRC_DIR}/netcdf_io_mod.F90
+ ${SRC_SHARE_DIR}/mesh_mod.F90
+ ${SRC_SHARE_DIR}/Manager.F90
+ ${SRC_DIR}/prim_main.F90
+ ${SRC_DIR}/ref_state_mod.F90
+ ${PHYSICS_DIR}/physics_types_mod.F90
+ ${PHYSICS_DIR}/physics_io_mod.F90
+ ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90
+ ${PHYSICS_DIR}/column_model_mod.F90
+ ${PHYSICS_DIR}/column_types_mod.F90
+ CACHE INTERNAL "")
+
+SET(PREQX_SRCS_C
+ ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c
+ ${SRC_DIR}/jrio.c
+)
+
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+ setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
+
+# Make SRCS global so the tests can access it
+SET(PREQX_ACC_SRCS ${PREQX_ACC_SRCS} ${PREQX_SRCS_C} ${PREQX_SRCS_F90} CACHE INTERNAL "Sources for preqx_acc")
+
+# Set up defaults
+IF (NOT PREQX_NP)
+ SET (PREQX_NP 4)
+ENDIF ()
+IF (NOT PREQX_PLEV)
+ SET (PREQX_PLEV 20)
+ENDIF ()
+IF (NOT PREQX_USE_PIO)
+ SET (PREQX_USE_PIO FALSE)
+ENDIF ()
+IF (NOT PREQX_USE_ENERGY)
+ SET (PREQX_USE_ENERGY FALSE)
+ENDIF ()
+IF (NOT PREQX_NC)
+ SET (PREQX_NC 4)
+ENDIF ()
+IF (PREQX_QSIZE_D)
+ SET (QSIZE_D ${PREQX_QSIZE_D})
+ENDIF ()
+
+MESSAGE(STATUS "Building preqx with:")
+MESSAGE(STATUS " NP = ${PREQX_NP}")
+MESSAGE(STATUS " NC = ${PREQX_NC}")
+MESSAGE(STATUS " PLEV = ${PREQX_PLEV}")
+MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}")
+MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}")
+
+SET(USE_OPENACC TRUE)
+SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}")
+SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}")
+
+IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP})
+ SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading")
+ENDIF ()
+
+# The following macro handles the compilation with the appropriate variables
+############################################################################
+# createTestExec(exec_name exec_type NP PLEV USE_PIO USE_ENERGY)
+############################################################################
+createTestExec(preqx_acc preqx_acc ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV}
+ ${PREQX_USE_PIO} ${PREQX_USE_ENERGY})
+
+
diff --git a/components/homme/src/share/openacc/bndry_openacc_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90
similarity index 96%
rename from components/homme/src/share/openacc/bndry_openacc_mod.F90
rename to components/homme/src/preqx_acc/bndry_mod.F90
index 783f884c00f..75fd0ddd909 100644
--- a/components/homme/src/share/openacc/bndry_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/bndry_mod.F90
@@ -3,14 +3,17 @@
#include "config.h"
#endif
-module bndry_openacc_mod
-#if USE_OPENACC
- use kinds , only: real_kind
+module bndry_mod
+ use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
+ use parallel_mod, only : syncmp,parallel_t,abortmp,iam
+ use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t
+ use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads
+ use kinds, only: real_kind
implicit none
private
- integer, parameter :: maxCycles = 20
- integer, parameter :: maxChunks = 64
- real(kind=real_kind), parameter :: chunk_denom = 1.e5
+ integer, parameter, private :: maxCycles = 20
+ integer, parameter, private :: maxChunks = 64
+ real(kind=real_kind), parameter, private :: chunk_denom = 1.e5
type send_stager_t
integer :: nUpdateHost, nSendComp
@@ -24,12 +27,13 @@ module bndry_openacc_mod
integer :: beg(maxchunks), end(maxchunks), len(maxchunks), asyncid(maxchunks), tag(maxchunks), req(maxchunks)
end type recv_stager_t
- type(send_stager_t) :: stg_send(maxCycles)
- type(recv_stager_t) :: stg_recv(maxCycles)
+ type(send_stager_t), private :: stg_send(maxCycles)
+ type(recv_stager_t), private :: stg_recv(maxCycles)
+ public :: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
public :: bndry_exchangeS_simple_overlap
- public :: bndry_exchangeV_simple_overlap
public :: bndry_exchangeV_timing
+ public :: bndry_exchangeV_simple_overlap
public :: bndry_exchangeV_finer_overlap
contains
@@ -514,8 +518,8 @@ function mpi_isend_openacc_stage(buf, count, dest, tag_root, comm, ierror, nchun
use parallel_mod , only: mpireal_t
use openacc_utils_mod, only: update_host_async, acc_async_test_wrap
implicit none
- real(kind=real_kind), intent(in ) :: buf(count) !buffer from which to send data
integer , intent(in ) :: count !number of elements in buffer
+ real(kind=real_kind), intent(in ) :: buf(count) !buffer from which to send data
integer , intent(in ) :: dest !the MPI rank I'm sending data to
integer , intent(in ) :: tag_root !tag of the original send (I'll alter this for internal mpi_isend calls)
integer , intent(in ) :: comm !Communicator to use
@@ -576,8 +580,8 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch
use parallel_mod , only: mpireal_t
use openacc_utils_mod, only: update_device_async, copy_ondev_async
implicit none
- real(kind=real_kind), intent(in ) :: buf(count) !buffer in which to receive data
integer , intent(in ) :: count !number of elements in buffer
+ real(kind=real_kind), intent(in ) :: buf(count) !buffer in which to receive data
integer , intent(in ) :: source !the MPI rank I'm receiving data from
integer , intent(in ) :: tag_root !tag of the original send (I'll alter this for internal mpi_isend calls)
integer , intent(in ) :: comm !Communicator to use
@@ -634,6 +638,5 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch
if (stg_recv(myid)%nUpdateDev == nchunks) finished = .true.
end function mpi_irecv_openacc_stage
-#endif
-end module bndry_openacc_mod
+end module bndry_mod
diff --git a/components/homme/src/preqx_acc/config.h.cmake.in b/components/homme/src/preqx_acc/config.h.cmake.in
new file mode 100644
index 00000000000..8c2c17cb05f
--- /dev/null
+++ b/components/homme/src/preqx_acc/config.h.cmake.in
@@ -0,0 +1,61 @@
+/* PRIM Options */
+
+/* Energy Diagnostics */
+#cmakedefine ENERGY_DIAGNOSTICS
+
+/* Using PRIMITIVE */
+#define _PRIM
+
+/* WK_GRAD */
+#define _WK_GRAD 1
+
+/* _PRESTART */
+#define _PRESTART 1
+
+/* Using MPI */
+#define _MPI 1
+
+/* PIO with interpolation */
+#cmakedefine PIO_INTERP
+
+/* Number of cells NC*NC in each element (Default value NC=4) */
+#define NC @NUM_CELLS@
+
+/* Number of POINTS in the element (Default value is 4) */
+#define NP @NUM_POINTS@
+
+/* Number of POINTS in the element (Default value is 1) */
+#define PLEV @NUM_PLEV@
+
+/* Number of tracers (Default value 4 if not specified) */
+#cmakedefine QSIZE_D @QSIZE_D@
+
+/* Basic PIO */
+#cmakedefine PIO
+
+/* Whether to use CUDA Fortran */
+#cmakedefine01 USE_CUDA_FORTRAN
+
+/* Whether to use OPENACC */
+#cmakedefine01 USE_OPENACC
+
+/* Quadruple-precision */
+#cmakedefine01 HOMME_QUAD_PREC
+
+/* Horizontal OpenMP (over elements) */
+#cmakedefine HORIZ_OPENMP
+
+/* Vertical OpenMP (within elements) */
+#cmakedefine COLUMN_OPENMP
+
+/* Nested OpenMP (both horizontal and vertical) */
+#cmakedefine NESTED_OPENMP
+
+/* Zoltan partitioning library */
+#cmakedefine01 HAVE_ZOLTAN
+
+/* Enable persistent MPI comm */
+#cmakedefine MPI_PERSISTENT
+
+/* Using Accelerator */
+#cmakedefine IS_ACCELERATOR
diff --git a/components/homme/src/share/openacc/derivative_openacc_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90
similarity index 74%
rename from components/homme/src/share/openacc/derivative_openacc_mod.F90
rename to components/homme/src/preqx_acc/derivative_mod.F90
index 4676d05a053..97dea44734d 100644
--- a/components/homme/src/share/openacc/derivative_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/derivative_mod.F90
@@ -3,22 +3,36 @@
#include "config.h"
#endif
-module derivative_openacc_mod
-#if USE_OPENACC
- use kinds, only: real_kind
- use dimensions_mod, only: np, nelemd
+module derivative_mod
+ use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, &
+ derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+ remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, &
+ curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+ laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+ use kinds, only : real_kind, longdouble_kind
+ use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev
+ use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi
+ use parallel_mod, only : abortmp
+ ! needed for spherical differential operators:
+ use physical_constants, only : rrearth
+ use element_mod, only : element_t
+ use control_mod, only : hypervis_scaling, hypervis_power
implicit none
private
- public :: laplace_sphere_wk
- public :: divergence_sphere_wk
- public :: gradient_sphere
- public :: divergence_sphere
+ public :: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, &
+ derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+ remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, &
+ curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+ laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+ public :: laplace_sphere_wk_openacc
+ public :: divergence_sphere_wk_openacc
+ public :: gradient_sphere_openacc
+ public :: divergence_sphere_openacc
contains
- subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl)
- use derivative_mod, only: derivative_t
+ subroutine laplace_sphere_wk_openacc(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl)
use element_mod, only: element_t
use control_mod, only: hypervis_scaling, hypervis_power
implicit none
@@ -35,7 +49,7 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n
integer :: i,j,k,ie
! Local
real(kind=real_kind) :: oldgrads(2)
- call gradient_sphere(s,deriv,elem(:),grads,len,nets,nete,ntl,tl)
+ call gradient_sphere_openacc(s,deriv,elem(:),grads,len,nets,nete,ntl,tl)
!$acc parallel loop gang vector collapse(4) present(grads,elem(:)) private(oldgrads)
do ie = nets , nete
do k = 1 , len
@@ -58,12 +72,11 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n
enddo
! note: divergnece_sphere and divergence_sphere_wk are identical *after* bndry_exchange
! if input is C_0. Here input is not C_0, so we should use divergence_sphere_wk().
- call divergence_sphere_wk(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl)
- end subroutine laplace_sphere_wk
+ call divergence_sphere_wk_openacc(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl)
+ end subroutine laplace_sphere_wk_openacc
- subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl)
+ subroutine divergence_sphere_wk_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl)
use element_mod, only: element_t
- use derivative_mod, only: derivative_t
use physical_constants, only: rrearth
implicit none
! input: v = velocity in lat-lon coordinates
@@ -117,11 +130,10 @@ subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl)
enddo
enddo
enddo
- end subroutine divergence_sphere_wk
+ end subroutine divergence_sphere_wk_openacc
- subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl)
+ subroutine gradient_sphere_openacc(s,deriv,elem,ds,len,nets,nete,ntl,tl)
use element_mod, only: element_t
- use derivative_mod, only: derivative_t
use physical_constants, only: rrearth
implicit none
! input s: scalar
@@ -171,13 +183,12 @@ subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl)
enddo
enddo
enddo
- end subroutine gradient_sphere
+ end subroutine gradient_sphere_openacc
- subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl)
+ subroutine divergence_sphere_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl)
! input: v = velocity in lat-lon coordinates
! ouput: div(v) spherical divergence of v
use element_mod , only: element_t
- use derivative_mod, only: derivative_t
use physical_constants, only: rrearth
implicit none
real(kind=real_kind), intent(in ) :: v(np,np,2,len,ntl,nelemd) ! in lat-lon coordinates
@@ -227,8 +238,7 @@ subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl)
enddo
enddo
enddo
- end subroutine divergence_sphere
+ end subroutine divergence_sphere_openacc
-#endif
-end module derivative_openacc_mod
+end module derivative_mod
diff --git a/components/homme/src/share/openacc/edge_openacc_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90
similarity index 86%
rename from components/homme/src/share/openacc/edge_openacc_mod.F90
rename to components/homme/src/preqx_acc/edge_mod.F90
index 29178a9fda1..645fe05a66a 100644
--- a/components/homme/src/share/openacc/edge_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/edge_mod.F90
@@ -3,24 +3,43 @@
#include "config.h"
#endif
-module edge_openacc_mod
-#if USE_OPENACC
- use kinds, only: real_kind, int_kind, log_kind
- use dimensions_mod, only: max_neigh_edges,nelemd,np,max_corner_elem
+module edge_mod
+ use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, &
+ edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+ ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, &
+ ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, &
+ ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+ use kinds, only : int_kind, log_kind, real_kind
+ use dimensions_mod, only : max_neigh_edges, nelemd, np
+ use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL
+ use thread_mod, only: nthreadshoriz, omp_get_num_threads, omp_get_thread_num
+ use coordinate_systems_mod, only : cartesian3D_t
+ use schedtype_mod, only : cycle_t, schedule_t, schedule
+ use parallel_mod, only : abortmp, haltmp, MPIreal_t, iam,parallel_t, &
+ MAX_ACTIVE_MSG, HME_status_size, BNDRY_TAG_BASE
+ use edgetype_mod, only : edgedescriptor_t, edgebuffer_t, &
+ Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid
+ use element_mod, only : element_t
implicit none
private
- public :: edgeSpack
- public :: edgeSunpackMin
- public :: edgeSunpackMax
- public :: edgeVpack
- public :: edgeVunpack
- public :: edgeVunpackMin
- public :: edgeVunpackMax
+ public :: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, &
+ edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+ ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, &
+ ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, &
+ ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+ public :: edgeSpack_openacc
+ public :: edgeSunpackMin_openacc
+ public :: edgeSunpackMax_openacc
+ public :: edgeVpack_openacc
+ public :: edgeVunpack_openacc
+ public :: edgeVunpackMin_openacc
+ public :: edgeVunpackMax_openacc
+
contains
- subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeSpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : max_corner_elem
use control_mod , only : north, south, east, west, neast, nwest, seast, swest
use perf_mod , only : t_startf, t_stopf
@@ -29,10 +48,10 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) ,intent(inout) :: edge
integer ,intent(in ) :: vlyr
- real (kind=real_kind) ,intent(in ) :: v(vlyr,tdim,nelemd)
integer ,intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real (kind=real_kind) ,intent(in ) :: v(vlyr,tdim,nelemd)
! Local variables
integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk
integer, parameter :: kchunk = 64
@@ -66,9 +85,9 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_s_pack')
- end subroutine edgeSpack
+ end subroutine edgeSpack_openacc
- subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeSunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : np, max_corner_elem
use control_mod, only : north, south, east, west, neast, nwest, seast, swest
use perf_mod, only: t_startf, t_stopf
@@ -76,10 +95,10 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) , intent(in ) :: edge
integer , intent(in ) :: vlyr
- real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd)
integer , intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd)
! Local
integer :: i,k,ll,is,ie,in,iw,el,kc,kk
integer, parameter :: kchunk = 64
@@ -125,9 +144,9 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_s_unpack_min')
- end subroutine edgeSunpackMin
+ end subroutine edgeSunpackMin_openacc
- subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeSunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : np, max_corner_elem
use control_mod, only : north, south, east, west, neast, nwest, seast, swest
use perf_mod, only: t_startf, t_stopf
@@ -135,10 +154,10 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) , intent(in ) :: edge
integer , intent(in ) :: vlyr
- real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd)
integer , intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd)
! Local
integer :: i,k,ll,is,ie,in,iw,el,kc,kk
integer, parameter :: kchunk = 64
@@ -184,9 +203,9 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_s_unpack_max')
- end subroutine edgeSunpackMax
+ end subroutine edgeSunpackMax_openacc
- subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeVpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : max_corner_elem
use control_mod , only : north, south, east, west, neast, nwest, seast, swest
use perf_mod , only : t_startf, t_stopf
@@ -195,10 +214,10 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) ,intent(inout) :: edge
integer ,intent(in ) :: vlyr
- real (kind=real_kind) ,intent(in ) :: v(np,np,vlyr,tdim,nelemd)
integer ,intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real (kind=real_kind) ,intent(in ) :: v(np,np,vlyr,tdim,nelemd)
! Local variables
integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk
integer, parameter :: kchunk = 32
@@ -247,9 +266,9 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_pack')
- end subroutine edgeVpack
+ end subroutine edgeVpack_openacc
- subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeVunpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : np, max_corner_elem
use control_mod, only : north, south, east, west, neast, nwest, seast, swest
use perf_mod, only: t_startf, t_stopf
@@ -257,10 +276,10 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) , intent(in ) :: edge
integer , intent(in ) :: vlyr
- real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
integer , intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
! Local
integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j
integer, parameter :: kchunk = 32
@@ -324,9 +343,9 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_unpack')
- end subroutine edgeVunpack
+ end subroutine edgeVunpack_openacc
- subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeVunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : np, max_corner_elem
use control_mod, only : north, south, east, west, neast, nwest, seast, swest
use perf_mod, only: t_startf, t_stopf
@@ -334,10 +353,10 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) , intent(in ) :: edge
integer , intent(in ) :: vlyr
- real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
integer , intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
! Local
integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j
integer, parameter :: kchunk = 32
@@ -401,9 +420,9 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_unpack_min')
- end subroutine edgeVunpackMin
+ end subroutine edgeVunpackMin_openacc
- subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
+ subroutine edgeVunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use dimensions_mod, only : np, max_corner_elem
use control_mod, only : north, south, east, west, neast, nwest, seast, swest
use perf_mod, only: t_startf, t_stopf
@@ -411,10 +430,10 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
use edgetype_mod , only : EdgeBuffer_t
type(EdgeBuffer_t) , intent(in ) :: edge
integer , intent(in ) :: vlyr
- real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
integer , intent(in ) :: kptr
type(element_t) ,intent(in ) :: elem(:)
integer ,intent(in ) :: nets,nete,tdim,tl
+ real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd)
! Local
integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j
integer, parameter :: kchunk = 32
@@ -478,8 +497,7 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl)
enddo
enddo
call t_stopf('edge_unpack_max')
- end subroutine edgeVunpackMax
+ end subroutine edgeVunpackMax_openacc
-#endif
-end module edge_openacc_mod
+end module edge_mod
diff --git a/components/homme/src/share/openacc/openacc_utils_mod.F90 b/components/homme/src/preqx_acc/openacc_utils_mod.F90
similarity index 74%
rename from components/homme/src/share/openacc/openacc_utils_mod.F90
rename to components/homme/src/preqx_acc/openacc_utils_mod.F90
index 5b716e1849a..8e0acbb5f4d 100644
--- a/components/homme/src/share/openacc/openacc_utils_mod.F90
+++ b/components/homme/src/preqx_acc/openacc_utils_mod.F90
@@ -4,7 +4,6 @@
#endif
module openacc_utils_mod
-#if USE_OPENACC
use kinds, only: real_kind
use dimensions_mod, only: nelemd
implicit none
@@ -16,41 +15,25 @@ module openacc_utils_mod
public :: update_device_async
public :: copy_ondev
public :: copy_ondev_async
- public :: arch_init2
public :: acc_async_test_wrap
contains
function acc_async_test_wrap( asyncid ) result(rslt)
- use openacc, only: acc_async_test
+# ifdef _OPENACC
+ use openacc, only: acc_async_test
+# endif
implicit none
integer, intent(in) :: asyncid
logical :: rslt
- rslt = .false.
- rslt = acc_async_test(asyncid)
+# ifdef _OPENACC
+ rslt = .false.
+ rslt = acc_async_test(asyncid)
+# else
+ rslt = .true.
+# endif
end function acc_async_test_wrap
- subroutine arch_init2( elem , deriv )
- use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj
- use derivative_mod, only: derivative_t
- implicit none
- type(element_t) , intent(in) :: elem(:)
- type(derivative_t), intent(in) :: deriv
- integer :: ie
- !$omp barrier
- !$omp master
-
- !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj)
- !$acc enter data pcopyin(elem(1:nelemd),deriv)
- do ie = 1 , nelemd
- !$acc enter data pcopyin(elem(ie)%desc)
- !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse)
- enddo
-
- !$omp end master
- !$omp barrier
- end subroutine arch_init2
-
subroutine copy_qdp_h2d( elem , tl )
use element_mod, only: element_t, state_qdp
implicit none
@@ -119,6 +102,5 @@ subroutine copy_ondev_async(dest,src,len,id)
enddo
end subroutine copy_ondev_async
-#endif
end module openacc_utils_mod
diff --git a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90
similarity index 87%
rename from components/homme/src/share/openacc/prim_advection_openacc_mod.F90
rename to components/homme/src/preqx_acc/prim_advection_mod.F90
index ba1c7fa48ad..b9ed14c295c 100644
--- a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/prim_advection_mod.F90
@@ -6,34 +6,56 @@
#include "config.h"
#endif
-module prim_advection_openacc_mod
-#if USE_OPENACC
- use kinds , only: real_kind, int_kind, log_kind
- use dimensions_mod , only: np,nlevp,nlev,qsize,qsize_d,max_corner_elem,max_neigh_edges,nelemd
- use element_mod , only: timelevels
- use edgetype_mod , only: EdgeBuffer_t
- use derivative_mod , only: derivative_t
+module prim_advection_mod
+ !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2
+ use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap
+ use kinds, only : real_kind
+ use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd
+ use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp
+ use element_mod, only : element_t
+ use fvm_control_volume_mod, only : fvm_struct
+ use spelt_mod, only : spelt_struct
+ use filter_mod, only : filter_t, filter_P
+ use hybvcoord_mod, only : hvcoord_t
+ use time_mod, only : TimeLevel_t, smooth, TimeLevel_Qdp
+ use prim_si_mod, only : preq_pressure
+ use diffusion_mod, only : scalar_diffusion, diffusion_init
+ use control_mod, only : integration, test_case, filter_freq_advection, hypervis_order, &
+ statefreq, moisture, TRACERADV_TOTAL_DIVERGENCE, TRACERADV_UGRADQ, &
+ nu_q, nu_p, limiter_option, hypervis_subcycle_q, rsplit
+ use edge_mod, only : edgevpack, edgerotate, edgevunpack, initedgebuffer, initedgesbuffer, &
+ edgevunpackmin, initghostbuffer3D
+
+ use edgetype_mod, only : EdgeDescriptor_t, EdgeBuffer_t, ghostbuffer3D_t
+ use hybrid_mod, only : hybrid_t
+ use bndry_mod, only : bndry_exchangev
+ use perf_mod, only : t_startf, t_stopf, t_barrierf ! _EXTERNAL
+ use parallel_mod, only : abortmp
+ use derivative_mod, only: derivative_t
implicit none
private
- type (derivative_t), public, allocatable :: deriv(:) ! derivative struct (nthreads)
- real(kind=real_kind), allocatable :: qmin(:,:,:), qmax(:,:,:)
- real(kind=real_kind), allocatable :: dp0(:)
- real(kind=real_kind), allocatable :: Qtens_biharmonic(:,:,:,:,:)
- real(kind=real_kind), allocatable :: Qtens(:,:,:,:,:)
- real(kind=real_kind), allocatable :: grads_tracer(:,:,:,:,:,:)
- real(kind=real_kind), allocatable :: dp_star(:,:,:,:)
- type (EdgeBuffer_t) :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax
- integer,parameter :: DSSeta = 1
- integer,parameter :: DSSomega = 2
- integer,parameter :: DSSdiv_vdp_ave = 3
- integer,parameter :: DSSno_var = -1
- real(kind=real_kind), allocatable :: data_pack(:,:,:,:), data_pack2(:,:,:,:)
- logical :: first_time = .true.
-
+ type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads)
+ real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:)
+ real(kind=real_kind), private, allocatable :: dp0(:)
+ real(kind=real_kind), private, allocatable :: Qtens_biharmonic(:,:,:,:,:)
+ real(kind=real_kind), private, allocatable :: Qtens(:,:,:,:,:)
+ real(kind=real_kind), private, allocatable :: grads_tracer(:,:,:,:,:,:)
+ real(kind=real_kind), private, allocatable :: dp_star(:,:,:,:)
+ type (EdgeBuffer_t), private :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax
+ integer,parameter, private :: DSSeta = 1
+ integer,parameter, private :: DSSomega = 2
+ integer,parameter, private :: DSSdiv_vdp_ave = 3
+ integer,parameter, private :: DSSno_var = -1
+ real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:)
+ logical, private :: first_time = .true.
+
+ public :: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap
public :: Prim_Advec_Tracers_remap
public :: prim_advec_init1
public :: prim_advec_init2
public :: prim_advec_init_deriv
+ public :: deriv
+ public :: Prim_Advec_Tracers_remap_rk2
contains
@@ -45,28 +67,30 @@ subroutine copy_qdp1_h2d( elem , tl , nets , nete )
integer , intent(in) :: tl, nets , nete
integer :: ie, k, j, i
call t_startf('qdp1_pcie')
- do ie = nets , nete
- data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie)
- enddo
- !$omp barrier
- !$omp master
-! do ie = 1 , nelemd
-! !$acc update device(state_qdp(:,:,:,1,tl,ie))
-! enddo
- !$acc update device(data_pack) async(1)
- !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
- do ie = 1 , nelemd
- do k = 1 , nlev
- do j = 1 , np
- do i = 1 , np
- state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie)
+# if USE_OPENACC
+ do ie = nets , nete
+ data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie)
+ enddo
+ !$omp barrier
+ !$omp master
+! do ie = 1 , nelemd
+! !$acc update device(state_qdp(:,:,:,1,tl,ie))
+! enddo
+ !$acc update device(data_pack) async(1)
+ !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
+ do ie = 1 , nelemd
+ do k = 1 , nlev
+ do j = 1 , np
+ do i = 1 , np
+ state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie)
+ enddo
enddo
enddo
enddo
- enddo
- !$acc wait(1)
- !$omp end master
- !$omp barrier
+ !$acc wait(1)
+ !$omp end master
+ !$omp barrier
+# endif
call t_stopf('qdp1_pcie')
end subroutine copy_qdp1_h2d
@@ -78,28 +102,30 @@ subroutine copy_qdp1_d2h( elem , tl , nets , nete )
integer , intent(in) :: tl, nets , nete
integer :: ie, k, j, i
call t_startf('qdp1_pcie')
- !$omp barrier
- !$omp master
- !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
- do ie = 1 , nelemd
- do k = 1 , nlev
- do j = 1 , np
- do i = 1 , np
- data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie)
+# if USE_OPENACC
+ !$omp barrier
+ !$omp master
+ !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack)
+ do ie = 1 , nelemd
+ do k = 1 , nlev
+ do j = 1 , np
+ do i = 1 , np
+ data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie)
+ enddo
enddo
enddo
enddo
- enddo
- !$acc update host(data_pack) async(1)
- !$acc wait(1)
-! do ie = 1 , nelemd
-! !$acc update host(state_qdp(:,:,:,1,tl,ie))
-! enddo
- !$omp end master
- !$omp barrier
- do ie = nets , nete
- state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie)
- enddo
+ !$acc update host(data_pack) async(1)
+ !$acc wait(1)
+! do ie = 1 , nelemd
+! !$acc update host(state_qdp(:,:,:,1,tl,ie))
+! enddo
+ !$omp end master
+ !$omp barrier
+ do ie = nets , nete
+ state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie)
+ enddo
+# endif
call t_stopf('qdp1_pcie')
end subroutine copy_qdp1_d2h
@@ -112,7 +138,7 @@ subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , flt , hybrid , dt
use filter_mod , only: filter_t
use time_mod , only: TimeLevel_t, TimeLevel_Qdp
use control_mod , only: limiter_option, nu_p, qsplit
- use bndry_openacc_mod, only: bndry_exchangeV_timing
+ use bndry_mod, only: bndry_exchangeV_timing
implicit none
type (element_t) , intent(inout) :: elem(:)
type (derivative_t) , intent(in ) :: deriv
@@ -321,13 +347,12 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
use hybrid_mod , only: hybrid_t
use element_mod , only: element_t, derived_divdp_proj, state_qdp
use derivative_mod , only: derivative_t
- use edge_mod , only: EdgeBuffer_t
use perf_mod , only: t_startf, t_stopf ! _EXTERNAL
use hybvcoord_mod , only: hvcoord_t
use control_mod , only: nu_q, hypervis_order, hypervis_subcycle_q, nu_p
- use viscosity_openacc_mod, only: biharmonic_wk_scalar
- use edge_openacc_mod , only: edgeVpack, edgeVunpack
- use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
+ use viscosity_mod, only: biharmonic_wk_scalar_openacc
+ use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc
+ use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
implicit none
type (EdgeBuffer_t) , intent(inout) :: edgeAdv_dontuse
type (element_t) , intent(inout), target :: elem(:)
@@ -388,7 +413,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
!$omp end master
!$omp barrier
! compute biharmonic operator. Qtens = input and output
- call biharmonic_wk_scalar( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
+ call biharmonic_wk_scalar_openacc( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
!$omp barrier
!$omp master
!$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:),qtens)
@@ -407,7 +432,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
enddo
call limiter2d_zero(state_Qdp,2,nt_qdp)
call t_startf('ah_scalar_PEU')
- call edgeVpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
+ call edgeVpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
!$omp end master
!$omp barrier
@@ -417,7 +442,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid ,
!$omp barrier
!$omp master
- call edgeVunpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
+ call edgeVunpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp)
call t_stopf('ah_scalar_PEU')
!$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:))
do ie = 1 , nelemd
@@ -441,7 +466,6 @@ end subroutine advance_hypervis_scalar
subroutine qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , limiter_option , nu_p , nets , nete )
use element_mod, only: element_t, state_qdp
- use control_mod, only: limiter_option
implicit none
type(element_t) , intent(inout) :: elem(:)
integer , intent(in ) :: rkstage , n0_qdp , np1_qdp , nets , nete , limiter_option
@@ -488,10 +512,10 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
use control_mod , only: limiter_option, nu_p, nu_q
use perf_mod , only: t_startf, t_stopf
use element_mod , only: derived_divdp_proj, state_qdp, derived_vn0, derived_divdp
- use derivative_openacc_mod, only: divergence_sphere
- use viscosity_openacc_mod , only: biharmonic_wk_scalar, neighbor_minmax
- use edge_openacc_mod , only: edgeVpack, edgeVunpack
- use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
+ use derivative_mod, only: divergence_sphere_openacc
+ use viscosity_mod , only: biharmonic_wk_scalar_openacc, neighbor_minmax_openacc
+ use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc
+ use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
implicit none
integer , intent(in ) :: np1_qdp, n0_qdp
real (kind=real_kind), intent(in ) :: dt
@@ -606,7 +630,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
enddo
!$omp end master
!$omp barrier
- if ( rhs_multiplier == 0 ) call neighbor_minmax(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax)
+ if ( rhs_multiplier == 0 ) call neighbor_minmax_openacc(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax)
! compute biharmonic mixing term
if ( rhs_multiplier == 2 ) then
rhs_viss = 3
@@ -633,8 +657,8 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
!$omp end master
!$omp barrier
endif
- call biharmonic_wk_scalar( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
- call neighbor_minmax( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax )
+ call biharmonic_wk_scalar_openacc( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd )
+ call neighbor_minmax_openacc( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax )
!$omp barrier
!$omp master
!$acc parallel loop gang vector collapse(4) present(qtens_biharmonic,dp0,elem(:))
@@ -716,7 +740,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
enddo
enddo
enddo
- call divergence_sphere( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 )
+ call divergence_sphere_openacc( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 )
!$acc parallel loop gang vector collapse(5) present(qtens,state_qdp,qtens_biharmonic)
do ie = 1 , nelemd
! advance Qdp
@@ -761,7 +785,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
! note: eta_dot_dpdn is actually dimension nlev+1, but nlev+1 data is
! all zero so we only have to DSS 1:nlev
call t_startf('eus_PEU')
- call edgeVpack(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
+ call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
!$omp end master
!$omp barrier
@@ -771,7 +795,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv ,
!$omp barrier
!$omp master
- call edgeVunpack( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
+ call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp )
call t_stopf('eus_PEU')
!$acc parallel loop gang vector collapse(4) present(state_Qdp,elem(:))
do ie = 1 , nelemd
@@ -800,9 +824,9 @@ subroutine limiter2d_zero(Qdp,tdim,tl)
! ps is only used when advecting Q instead of Qdp
! so ps should be at one timelevel behind Q
implicit none
- real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd)
integer , intent(in ) :: tdim
integer , intent(in ) :: tl
+ real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd)
! local
real (kind=real_kind) :: mass,mass_new
real (kind=real_kind) :: qtmp(np,np)
@@ -939,7 +963,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp )
use edge_mod , only: edgeVpack, edgeVunpack
use bndry_mod , only: bndry_exchangeV
use control_mod , only: limiter_option
- use derivative_openacc_mod, only: divergence_sphere
+ use derivative_mod, only: divergence_sphere_openacc
use openacc_utils_mod , only: copy_ondev
use perf_mod , only: t_startf, t_stopf
implicit none
@@ -954,7 +978,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp )
!$omp barrier
!$omp master
!$acc update device(derived_vn0)
- call divergence_sphere(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1)
+ call divergence_sphere_openacc(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1)
call copy_ondev(derived_divdp_proj,derived_divdp,product(shape(derived_divdp)))
!$acc update host(derived_divdp,derived_divdp_proj)
!$omp end master
@@ -988,7 +1012,6 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp )
call t_stopf('derived PEU')
end subroutine precompute_divdp
-#endif
-end module prim_advection_openacc_mod
+end module prim_advection_mod
diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90
new file mode 100644
index 00000000000..0b2027af701
--- /dev/null
+++ b/components/homme/src/preqx_acc/solver_init_mod.F90
@@ -0,0 +1,40 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod
+ !OVERWRITING: solver_init2
+ use solver_init_mod_base, only:
+ use dimensions_mod, only: nelemd
+ implicit none
+ private
+
+ public :: solver_init2
+
+
+contains
+
+
+ subroutine solver_init2( elem , deriv )
+ use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj
+ use derivative_mod, only: derivative_t
+ implicit none
+ type(element_t) , intent(in) :: elem(:)
+ type(derivative_t), intent(in) :: deriv
+ integer :: ie
+ !$omp barrier
+ !$omp master
+
+ !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj)
+ !$acc enter data pcopyin(elem(1:nelemd),deriv)
+ do ie = 1 , nelemd
+ !$acc enter data pcopyin(elem(ie)%desc)
+ !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse)
+ enddo
+
+ !$omp end master
+ !$omp barrier
+ end subroutine solver_init2
+
+
+end module solver_init_mod
diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90
new file mode 100644
index 00000000000..4c3471721bb
--- /dev/null
+++ b/components/homme/src/preqx_acc/vertremap_mod.F90
@@ -0,0 +1,11 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module vertremap_mod
+ use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm
+ implicit none
+ private
+
+ public :: remap1, remap1_nofilter, remap_q_ppm
+end module vertremap_mod
diff --git a/components/homme/src/share/openacc/viscosity_openacc_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90
similarity index 54%
rename from components/homme/src/share/openacc/viscosity_openacc_mod.F90
rename to components/homme/src/preqx_acc/viscosity_mod.F90
index bf3cda132b5..4bc0c10960f 100644
--- a/components/homme/src/share/openacc/viscosity_openacc_mod.F90
+++ b/components/homme/src/preqx_acc/viscosity_mod.F90
@@ -3,28 +3,45 @@
#include "config.h"
#endif
-module viscosity_openacc_mod
-#if USE_OPENACC
- use kinds, only: real_kind
- use dimensions_mod, only: np,nlev,qsize,nelemd
+module viscosity_mod
+ use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0
+#ifdef _PRIM
+ use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+ use thread_mod, only : omp_get_num_threads
+ use kinds, only : real_kind, iulog
+ use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac
+ use hybrid_mod, only : hybrid_t, hybrid_create
+ use parallel_mod, only : parallel_t
+ use element_mod, only : element_t
+ use edgetype_mod, only : EdgeBuffer_t, EdgeDescriptor_t
+ use bndry_mod, only : bndry_exchangev, bndry_exchangeS, bndry_exchangeS_start,bndry_exchangeS_finish
+ use control_mod, only : hypervis_scaling, nu, nu_div
+ use perf_mod, only: t_startf, t_stopf
implicit none
private
- public :: neighbor_minmax
- public :: biharmonic_wk_scalar
+ public :: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0
+#ifdef _PRIM
+ public :: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+ public :: biharmonic_wk_scalar_openacc
+ public :: neighbor_minmax_openacc
+
+
contains
- subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
+ subroutine biharmonic_wk_scalar_openacc(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
use hybrid_mod , only: hybrid_t
use element_mod , only: element_t
use edgetype_mod , only: edgeBuffer_t
use derivative_mod , only: derivative_t
use control_mod , only: hypervis_scaling
use perf_mod , only: t_startf, t_stopf
- use derivative_openacc_mod, only: laplace_sphere_wk
- use edge_openacc_mod , only: edgeVpack, edgeVunpack
- use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
+ use derivative_mod, only: laplace_sphere_wk_openacc
+ use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc
+ use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap
implicit none
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! compute weak biharmonic operator
@@ -47,9 +64,9 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
if(hypervis_scaling > 0) var_coef1 = .false.
!$omp barrier
!$omp master
- call laplace_sphere_wk(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1)
+ call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1)
call t_startf('biwksc_PEU')
- call edgeVpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
+ call edgeVpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
!$omp end master
!$omp barrier
@@ -59,7 +76,7 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
!$omp barrier
!$omp master
- call edgeVunpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
+ call edgeVunpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1)
call t_stopf('biwksc_PEU')
!$acc parallel loop gang vector collapse(5) present(qtens,elem(:))
do ie = nets , nete
@@ -74,18 +91,18 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete)
enddo
enddo
enddo
- call laplace_sphere_wk(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1)
+ call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1)
!$omp end master
!$omp barrier
- end subroutine biharmonic_wk_scalar
+ end subroutine biharmonic_wk_scalar_openacc
- subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
+ subroutine neighbor_minmax_openacc(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
use hybrid_mod , only: hybrid_t
use element_mod , only: element_t
use perf_mod , only: t_startf, t_stopf
use edgetype_mod , only: edgeBuffer_t
- use edge_openacc_mod , only: edgeSpack, edgeSunpackMin, edgeSunpackMax
- use bndry_openacc_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap
+ use edge_mod , only: edgeSpack_openacc, edgeSunpackMin_openacc, edgeSunpackMax_openacc
+ use bndry_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap
implicit none
! compute Q min&max over the element and all its neighbors
integer :: nets,nete
@@ -100,8 +117,8 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
!$omp barrier
!$omp master
call t_startf('nmm_PEU')
- call edgeSpack(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1)
- call edgeSpack(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
+ call edgeSpack_openacc(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1)
+ call edgeSpack_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
!$omp end master
!$omp barrier
@@ -111,13 +128,12 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh)
!$omp barrier
!$omp master
- call edgeSunpackMin(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1)
- call edgeSunpackMax(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
+ call edgeSunpackMin_openacc(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1)
+ call edgeSunpackMax_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1)
call t_stopf('nmm_PEU')
!$omp end master
!$omp barrier
- end subroutine neighbor_minmax
+ end subroutine neighbor_minmax_openacc
-#endif
-end module viscosity_openacc_mod
+end module viscosity_mod
diff --git a/components/homme/src/share/arch_switch_mod.F90 b/components/homme/src/share/arch_switch_mod.F90
deleted file mode 100644
index 8720652254f..00000000000
--- a/components/homme/src/share/arch_switch_mod.F90
+++ /dev/null
@@ -1,29 +0,0 @@
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-module arch_switch_mod
-#if USE_OPENACC
- use prim_advection_openacc_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap
- use openacc_utils_mod, only: arch_init2
-#else
- use prim_advection_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap
-#endif
- implicit none
-
-contains
-
-#if (! USE_OPENACC)
- subroutine arch_init2( elem , deriv )
- use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj
- use derivative_mod, only: derivative_t
- implicit none
- type(element_t) , intent(in) :: elem(:)
- type(derivative_t), intent(in) :: deriv
- !CPU case, nothing to do
- end subroutine arch_init2
-#endif
-
-end module arch_switch_mod
-
diff --git a/components/homme/src/share/bndry_mod.F90 b/components/homme/src/share/bndry_mod_base.F90
similarity index 99%
rename from components/homme/src/share/bndry_mod.F90
rename to components/homme/src/share/bndry_mod_base.F90
index 3be3590cf5e..998c80d2952 100644
--- a/components/homme/src/share/bndry_mod.F90
+++ b/components/homme/src/share/bndry_mod_base.F90
@@ -3,7 +3,7 @@
#endif
-module bndry_mod
+module bndry_mod_base
use parallel_mod, only : syncmp,parallel_t,abortmp,iam
use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t
use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads
@@ -1140,4 +1140,4 @@ subroutine sort_neighbor_buffer_mapping(par,elem,nets,nete)
-end module bndry_mod
+end module bndry_mod_base
diff --git a/components/homme/src/share/derivative_mod.F90 b/components/homme/src/share/derivative_mod_base.F90
similarity index 99%
rename from components/homme/src/share/derivative_mod.F90
rename to components/homme/src/share/derivative_mod_base.F90
index 7b2f1fce0a1..0c89625a739 100644
--- a/components/homme/src/share/derivative_mod.F90
+++ b/components/homme/src/share/derivative_mod_base.F90
@@ -3,7 +3,7 @@
#endif
-module derivative_mod
+module derivative_mod_base
use kinds, only : real_kind, longdouble_kind
use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev
use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi
@@ -2810,4 +2810,4 @@ end subroutine limiter_optim_iter_full
-end module derivative_mod
+end module derivative_mod_base
diff --git a/components/homme/src/share/edge_mod.F90 b/components/homme/src/share/edge_mod_base.F90
similarity index 99%
rename from components/homme/src/share/edge_mod.F90
rename to components/homme/src/share/edge_mod_base.F90
index 2205a27d5eb..6cd64ca3f66 100644
--- a/components/homme/src/share/edge_mod.F90
+++ b/components/homme/src/share/edge_mod_base.F90
@@ -2,7 +2,7 @@
#include "config.h"
#endif
-module edge_mod
+module edge_mod_base
use kinds, only : int_kind, log_kind, real_kind
use dimensions_mod, only : max_neigh_edges, nelemd
@@ -4792,7 +4792,7 @@ subroutine ghostVunpack3d(g, v, vlyr, kptr, desc, sw, se, nw, ne, mult)
end subroutine ghostVunpack3d
-End module edge_mod
+End module edge_mod_base
#if 0
#ifndef HAVE_F2003_PTR_BND_REMAP
diff --git a/components/homme/src/share/element_mod.F90 b/components/homme/src/share/element_mod.F90
index 84811e50676..d6ce91490ac 100644
--- a/components/homme/src/share/element_mod.F90
+++ b/components/homme/src/share/element_mod.F90
@@ -36,7 +36,7 @@ module element_mod
real (kind=real_kind) :: ps_v(np,np,timelevels) ! surface pressure 4
real (kind=real_kind) :: phis(np,np) ! surface geopotential (prescribed) 5
real (kind=real_kind) :: Q (np,np,nlev,qsize_d) ! Tracer concentration 6
- real (kind=real_kind), pointer :: Qdp (:,:,:,:,:) ! Tracer mass 7 (np,np,nlev,qsize_d,2)
+ real (kind=real_kind), pointer :: Qdp (:,:,:,:,:) ! Tracer mass 7 (np,np,nlev,qsize,2)
end type elem_state_t
integer(kind=int_kind),public,parameter::StateComponents=8 ! num prognistics variables (for prim_restart_mod.F90)
@@ -564,12 +564,12 @@ end subroutine allocate_element_desc
!___________________________________________________________________
subroutine setup_element_pointers(elem)
- use dimensions_mod, only: nelemd
+ use dimensions_mod, only: nelemd, qsize
implicit none
type(element_t), intent(inout) :: elem(:)
#if USE_OPENACC
integer :: ie
- allocate( state_Qdp (np,np,nlev,qsize_d,2,nelemd) )
+ allocate( state_Qdp (np,np,nlev,qsize,2,nelemd) )
allocate( derived_vn0 (np,np,2,nlev,nelemd) )
allocate( derived_divdp (np,np,nlev,nelemd) )
allocate( derived_divdp_proj (np,np,nlev,nelemd) )
diff --git a/components/homme/src/share/prim_advection_mod.F90 b/components/homme/src/share/prim_advection_mod_base.F90
similarity index 73%
rename from components/homme/src/share/prim_advection_mod.F90
rename to components/homme/src/share/prim_advection_mod_base.F90
index 11369232d72..81620cd9784 100644
--- a/components/homme/src/share/prim_advection_mod.F90
+++ b/components/homme/src/share/prim_advection_mod_base.F90
@@ -4,827 +4,39 @@
#define NEWEULER_B4B 1
#define OVERLAP 1
- module EXTRAE_MODULE
-
- interface
-
- subroutine extrae_user_function (enter)
- integer*4, intent(in) :: enter
- end subroutine extrae_user_function
-
- end interface
-
- end module EXTRAE_MODULE
-
-#if 0
-SUBROUTINES:
- prim_advec_tracers_remap_rk2()
- SEM 2D RK2 + monotone remap + hyper viscosity
- SEM 2D RK2 can use sign-preserving or monotone reconstruction
-
-Notes on Lagrange+REMAP advection
-dynamics will compute mean fluxes, so that (i.e. for qsplit=3)
-
- dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3) + 3dt D(dpdiss_sum/3)
-
-Where the floating lagrangian component:
- dp_star(t+3) = dp(t) -3dt div(Udp_sum/3) + 3dt D(dpdiss_sum/3)
-OR:
- dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) )
-
-
-For RK2 advection of Q: (example of 2 stage RK for tracers): dtq = qsplit*dt
-For consistency, if Q=1
- dp1 = dp(t)- dtq div[ U1 dp(t)]
- dp2 = dp1 - dtq div[ U2 dp1 ] + 2*dtq D( dpdiss_ave )
- dp* = (dp(t) + dp2 )/2
- = dp(t) - dtq div[ U1 dp(t) + U2 dp1 ]/2 + dtq D( dpdiss_ave )
-
-so we require:
- U1 = Udp_ave / dp(t)
- U2 = Udp_ave / dp1
-
-For tracer advection:
- Qdp1 = Qdp(t)- dtq div[ U1 Qdp(t)]
- Qdp2 = Qdp1 - dtq div[ U2 Qdp1 ] + 2*dtq D( Q dpdiss_ave )
- Qdp* = (Qdp(t) + Qdp2 )/2
- = Qdp(t) - dtq div[ U1 Qdp(t) + U2 Qdp1 ] + dtq D( Q dpdiss_ave )
-
-Qdp1: limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS) with dp1 as computed above
-Qdp2: limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS) with dp2 as computed above
-
-For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS
-
-
-last step:
- remap Qdp* to Qdp(t+1) [ dp_star(t+1) -> dp(t+1) ]
-
-#endif
-
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!! Begin GPU remap module !!
-!! by Rick Archibald, 2010 !!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-module vertremap_mod
-
- !**************************************************************************************
- !
- ! Purpose:
- ! Construct sub-grid-scale polynomials using piecewise spline method with
- ! monotone filters.
- !
- ! References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR)
- ! PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF)
- !
- !**************************************************************************************
-
- use kinds, only : real_kind,int_kind
- use dimensions_mod, only : np,nlev,qsize,nlevp,npsq,ntrac,nc
- use hybvcoord_mod, only : hvcoord_t
- use element_mod, only : element_t
- use fvm_control_volume_mod, only : fvm_struct
- use spelt_mod, only : spelt_struct
- use perf_mod, only : t_startf, t_stopf ! _EXTERNAL
- use parallel_mod, only : abortmp, parallel_t
- use control_mod, only : vert_remap_q_alg
-
- public remap1 ! remap any field, splines, monotone
- public remap1_nofilter ! remap any field, splines, no filter
-! todo: tweak interface to match remap1 above, rename remap1_ppm:
- public remap_q_ppm ! remap state%Q, PPM, monotone
-
- contains
-
-!=======================================================================================================!
-
-!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid
-!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task.
-subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref )
- implicit none
- type(hvcoord_t) , intent(in ) :: hvcoord !Derived type to hold vertical sigma grid parameters
- real(kind=real_kind) , intent(in ) :: ps !Surface pressure for this column
- real(kind=real_kind) , intent(in ) :: dt !Time step
- real(kind=real_kind) , intent(in ) :: eta_dot_dpdn(nlev+1) !Looks like a vertical pressure flux
- !to compute deformed grid spacing
- real(kind=real_kind) , intent( out) :: p_lag(nlev+1) !Pressures at interfaces of the Lagrangian deformed grid
- real(kind=real_kind) , intent( out) :: p_ref(nlev+1) !Pressures at interfaces of the reference grid
- real(kind=real_kind) , intent( out) :: dp_lag(nlev) !Pressure differences on Lagrangian deformed grid
- real(kind=real_kind) , intent( out) :: dp_ref(nlev) !Pressure differences on reference grid
- integer :: k !Iterator
- p_ref(1) = 0 !Both grids have a model top pressure of zero
- p_lag(1) = 0 !Both grids have a model top pressure of zero
- do k = 1 , nlev
- dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
- ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps !Reference pressure difference
- ! Lagrangian pressure difference (flux in - flux out over the time step)
- dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) )
- p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell
- p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell
- enddo
-end subroutine remap_calc_grids
-
-!=======================================================================================================!
-
-
-
-subroutine remap1(Qdp,nx,qsize,dp1,dp2)
- ! remap 1 field
- ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO)
- ! dp1 layer thickness (source)
- ! dp2 layer thickness (target)
- !
- ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp
- !
- implicit none
- integer, intent(in) :: nx,qsize
- real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
- real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
- ! ========================
- ! Local Variables
- ! ========================
-
- real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
- real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
- real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, &
- peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
- zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
- integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
- lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
- logical :: abort=.false.
-
- if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then
- call remap_Q_ppm(qdp,nx,qsize,dp1,dp2)
- return
- endif
-
- call t_startf('remap_Q_noppm')
-#if (defined COLUMN_OPENMP)
-!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
-!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
-!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
-!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
-#endif
- do q=1,qsize
- do i=1,nx
- do j=1,nx
-
- z1c(1)=0 ! source grid
- z2c(1)=0 ! target grid
- do k=1,nlev
- z1c(k+1)=z1c(k)+dp1(i,j,k)
- z2c(k+1)=z2c(k)+dp2(i,j,k)
- enddo
-
- zv(1)=0
- do k=1,nlev
- Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass
- zv(k+1) = zv(k)+Qcol(k)
- enddo
-
- if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
- write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
- write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN '
- write(6,*) 'DATA FOR MODEL LEVELS'
- write(6,*) 'PLEVMODEL=',z2c(nlev+1)
- write(6,*) 'PLEV =',z1c(nlev+1)
- write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1)
- abort=.true.
- endif
-
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- !! quadratic splies with UK met office monotonicity constraints !!
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- zkr = 99
- ilev = 2
- zkr(1) = 1
- zkr(nlev+1) = nlev
- kloop: do k = 2,nlev
- do jk = ilev,nlev+1
- if (z1c(jk).ge.z2c(k)) then
- ilev = jk
- zkr(k) = jk-1
- cycle kloop
- endif
- enddo
- enddo kloop
-
- zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
- zgam(1) = 0.0
- zgam(nlev+1) = 1.0
- zhdp = z1c(2:nlev+1)-z1c(1:nlev)
-
-
- h = 1/zhdp
- zarg = Qcol * h
- rhs = 0
- lower_diag = 0
- diag = 0
- upper_diag = 0
-
- rhs(1)=3*zarg(1)
- rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
- rhs(nlev+1)=3*zarg(nlev)
-
- lower_diag(1)=1
- lower_diag(2:nlev) = h(1:nlev-1)
- lower_diag(nlev+1)=1
-
- diag(1)=2
- diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
- diag(nlev+1)=2
-
- upper_diag(1)=1
- upper_diag(2:nlev) = h(2:nlev)
- upper_diag(nlev+1)=0
-
- q_diag(1)=-upper_diag(1)/diag(1)
- rhs(1)= rhs(1)/diag(1)
-
- do k=2,nlev+1
- tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1))
- q_diag(k) = -upper_diag(k)*tmp_cal
- rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
- enddo
- do k=nlev,1,-1
- rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
- enddo
-
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- !! monotonicity modifications !!
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- filter_code = 0
- dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1)
- dy(nlev) = dy(nlev-1)
-
- dy = merge(zero, dy, abs(dy) < tiny )
-
- do k=1,nlev
- im1=MAX(1,k-1)
- im2=MAX(1,k-2)
- im3=MAX(1,k-3)
- ip1=MIN(nlev,k+1)
- t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0)
- t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 &
- .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 )
- t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k)))
-
- filter_code(k) = merge(0,1,t1+t2 > 0)
- rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1))
- filter_code(im1) = MAX(filter_code(im1),filter_code(k))
- enddo
-
- rhs = merge(qmax,rhs,rhs > qmax)
- rhs = merge(zero,rhs,rhs < zero)
-
- za0 = rhs(1:nlev)
- za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
- za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
-
- dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev)
- dy = merge(zero, dy, abs(dy) < tiny )
-
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- !! Compute the 3 quadratic spline coeffients {za0, za1, za2} !!
- !! knowing the quadratic spline parameters {rho_left,rho_right,zarg} !!
- !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002). !!
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
- h = rhs(2:nlev+1)
-
- do k=1,nlev
- xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny)
- xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny)
- f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2
-
- t1 = merge(1,0,ABS(za2(k)) > tiny)
- t2 = merge(1,0,xm <= zero .OR. xm >= 1)
- t3 = merge(1,0,za2(k) > zero)
- t4 = merge(1,0,za2(k) < zero)
- tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2)
- tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3)
-
- peaks=0
- peaks = merge(-1,peaks,tm .EQ. 1)
- peaks = merge(+1,peaks,tp .EQ. 1)
- peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1)
- peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1)
-
- im1=MAX(1,k-1)
- im2=MAX(1,k-2)
- ip1=MIN(nlev,k+1)
- ip2=MIN(nlev,k+2)
-
- t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. &
- (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. &
- (dy(im1)*float(peaks) <= tiny))
-
- filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. &
- (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny))
-
- if (filter_code(k) > 0) then
- level1 = rhs(k)
- level2 = (2*rhs(k)+h(k))/3
- level3 = 0.5*(rhs(k)+h(k))
- level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k)
- level5 = h(k)
-
- t1 = merge(1,0,h(k) >= rhs(k))
- t2 = merge(1,0,zarg(k) <= level1 .OR. zarg(k) >= level5)
- t3 = merge(1,0,zarg(k) > level1 .AND. zarg(k) < level2)
- t4 = merge(1,0,zarg(k) > level4 .AND. zarg(k) < level5)
-
- lt1 = t1*t2
- lt2 = t1*(1-t2+t3)
- lt3 = t1*(1-t2+1-t3+t4)
-
- za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
- za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
- za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
-
- za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
- za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
- za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
-
- za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
- za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
- za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
-
- t2 = merge(1,0,zarg(k) >= level1 .OR. zarg(k) <= level5)
- t3 = merge(1,0,zarg(k) < level1 .AND. zarg(k) > level2)
- t4 = merge(1,0,zarg(k) < level4 .AND. zarg(k) > level5)
-
- lt1 = (1-t1)*t2
- lt2 = (1-t1)*(1-t2+t3)
- lt3 = (1-t1)*(1-t2+1-t3+t4)
-
- za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
- za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
- za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
-
- za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
- za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
- za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
-
- za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
- za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
- za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
- endif
- enddo
-
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- !! start iteration from top to bottom of atmosphere !!
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- zv1 = 0
- do k=1,nlev
- if (zgam(k+1)>1d0) then
- WRITE(*,*) 'r not in [0:1]', zgam(k+1)
- abort=.true.
- endif
- zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
- (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
- Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
- zv1 = zv2
- enddo
- enddo
- enddo
- enddo ! q loop
- if (abort) call abortmp('Bad levels in remap1. usually CFL violatioin')
- call t_stopf('remap_Q_noppm')
-
-end subroutine remap1
-
-subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2)
- ! remap 1 field
- ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO)
- ! dp1 layer thickness (source)
- ! dp2 layer thickness (target)
- !
- ! output: remaped Qdp, conserving mass
- !
- implicit none
- integer, intent(in) :: nx,qsize
- real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
- real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
- ! ========================
- ! Local Variables
- ! ========================
-
- real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
- real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
- real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, &
- peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
- zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
- integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
- lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
- logical :: abort=.false.
-! call t_startf('remap1_nofilter')
-
-#if (defined COLUMN_OPENMP)
-!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
-!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
-!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
-!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
-#endif
- do q=1,qsize
- do i=1,nx
- do j=1,nx
-
- z1c(1)=0 ! source grid
- z2c(1)=0 ! target grid
- do k=1,nlev
- z1c(k+1)=z1c(k)+dp1(i,j,k)
- z2c(k+1)=z2c(k)+dp2(i,j,k)
- enddo
-
- zv(1)=0
- do k=1,nlev
- Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass
- zv(k+1) = zv(k)+Qcol(k)
- enddo
-
- if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
- write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
- write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN '
- write(6,*) 'DATA FOR MODEL LEVELS'
- write(6,*) 'PLEVMODEL=',z2c(nlev+1)
- write(6,*) 'PLEV =',z1c(nlev+1)
- write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1)
- abort=.true.
- endif
-
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- !! quadratic splies with UK met office monotonicity constraints !!
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- zkr = 99
- ilev = 2
- zkr(1) = 1
- zkr(nlev+1) = nlev
- kloop: do k = 2,nlev
- do jk = ilev,nlev+1
- if (z1c(jk).ge.z2c(k)) then
- ilev = jk
- zkr(k) = jk-1
- cycle kloop
- endif
- enddo
- enddo kloop
-
- zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
- zgam(1) = 0.0
- zgam(nlev+1) = 1.0
- zhdp = z1c(2:nlev+1)-z1c(1:nlev)
-
-
- h = 1/zhdp
- zarg = Qcol * h
- rhs = 0
- lower_diag = 0
- diag = 0
- upper_diag = 0
-
- rhs(1)=3*zarg(1)
- rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
- rhs(nlev+1)=3*zarg(nlev)
-
- lower_diag(1)=1
- lower_diag(2:nlev) = h(1:nlev-1)
- lower_diag(nlev+1)=1
-
- diag(1)=2
- diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
- diag(nlev+1)=2
-
- upper_diag(1)=1
- upper_diag(2:nlev) = h(2:nlev)
- upper_diag(nlev+1)=0
-
- q_diag(1)=-upper_diag(1)/diag(1)
- rhs(1)= rhs(1)/diag(1)
-
- do k=2,nlev+1
- tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1))
- q_diag(k) = -upper_diag(k)*tmp_cal
- rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
- enddo
- do k=nlev,1,-1
- rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
- enddo
-
- za0 = rhs(1:nlev)
- za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
- za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
-
-
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
- !! start iteration from top to bottom of atmosphere !!
- !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
- zv1 = 0
- do k=1,nlev
- if (zgam(k+1)>1d0) then
- WRITE(*,*) 'r not in [0:1]', zgam(k+1)
- abort=.true.
- endif
- zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
- (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
- Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
- zv1 = zv2
- enddo
- enddo
- enddo
- enddo ! q loop
- if (abort) call abortmp('Bad levels in remap1_nofilter. usually CFL violatioin')
-! call t_stopf('remap1_nofilter')
-end subroutine remap1_nofilter
-
-!=======================================================================================================!
-
-
-!This uses the exact same model and reference grids and data as remap_Q, but it interpolates
-!using PPM instead of splines.
-subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2)
- ! remap 1 field
- ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO)
- ! dp1 layer thickness (source)
- ! dp2 layer thickness (target)
- !
- ! output: remaped Qdp, conserving mass
- !
- use control_mod, only : vert_remap_q_alg
- implicit none
- integer,intent(in) :: nx,qsize
- real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
- real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
- ! Local Variables
- integer, parameter :: gs = 2 !Number of cells to place in the ghost region
- real(kind=real_kind), dimension( nlev+2 ) :: pio !Pressure at interfaces for old grid
- real(kind=real_kind), dimension( nlev+1 ) :: pin !Pressure at interfaces for new grid
- real(kind=real_kind), dimension( nlev+1 ) :: masso !Accumulate mass up to each interface
- real(kind=real_kind), dimension( 1-gs:nlev+gs) :: ao !Tracer value on old grid
- real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpo !change in pressure over a cell for old grid
- real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpn !change in pressure over a cell for old grid
- real(kind=real_kind), dimension(3, nlev ) :: coefs !PPM coefficients within each cell
- real(kind=real_kind), dimension( nlev ) :: z1, z2
- real(kind=real_kind) :: ppmdx(10,0:nlev+1) !grid spacings
- real(kind=real_kind) :: mymass, massn1, massn2
- integer :: i, j, k, q, kk, kid(nlev)
-
- call t_startf('remap_Q_ppm')
- do j = 1 , nx
- do i = 1 , nx
-
- pin(1)=0
- pio(1)=0
- do k=1,nlev
- dpn(k)=dp2(i,j,k)
- dpo(k)=dp1(i,j,k)
- pin(k+1)=pin(k)+dpn(k)
- pio(k+1)=pio(k)+dpo(k)
- enddo
-
-
-
- pio(nlev+2) = pio(nlev+1) + 1. !This is here to allow an entire block of k threads to run in the remapping phase.
- !It makes sure there's an old interface value below the domain that is larger.
- pin(nlev+1) = pio(nlev+1) !The total mass in a column does not change.
- !Therefore, the pressure of that mass cannot either.
- !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence.
- do k = 1 , gs
- dpo(1 -k) = dpo( k)
- dpo(nlev+k) = dpo(nlev+1-k)
- enddo
-
- !Compute remapping intervals once for all tracers. Find the old grid cell index in which the
- !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new
- !interface location. In practice, the grid never deforms past one cell, so the search can be
- !simplified by this. Also, the interval of integration is usually of magnitude close to zero
- !or close to dpo because of minimial deformation.
- !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so
- !I set them equal to each other.
- do k = 1 , nlev
- kk = k !Keep from an order n^2 search operation by assuming the old cell index is close.
- !Find the index of the old grid cell in which this new cell's bottom interface resides.
- do while ( pio(kk) <= pin(k+1) )
- kk = kk + 1
- enddo
- kk = kk - 1 !kk is now the cell index we're integrating over.
- if (kk == nlev+1) kk = nlev !This is to keep the indices in bounds.
- !Top bounds match anyway, so doesn't matter what coefficients are used
- kid(k) = kk !Save for reuse
- z1(k) = -0.5D0 !This remapping assumes we're starting from the left interface of an old grid cell
- !In fact, we're usually integrating very little or almost all of the cell in question
- z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk) !PPM interpolants are normalized to an independent
- !coordinate domain [-0.5,0.5].
- enddo
-
- !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the
- !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop.
- ppmdx(:,:) = compute_ppm_grids( dpo )
-
- !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and
- !mass accumulation
- do q = 1 , qsize
- !Accumulate the old mass up to old grid cell interface locations to simplify integration
- !during remapping. Also, divide out the grid spacing so we're working with actual tracer
- !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure
- !tracer consistency for an initially uniform field. I copied it from the old remap routine.
- masso(1) = 0.
- do k = 1 , nlev
- ao(k) = Qdp(i,j,k,q)
- masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping
- ao(k) = ao(k) / dpo(k) !Divide out the old grid spacing because we want the tracer mixing ratio, not mass.
- enddo
- !Fill in ghost values. Ignored if vert_remap_q_alg == 2
- do k = 1 , gs
- ao(1 -k) = ao( k)
- ao(nlev+k) = ao(nlev+1-k)
- enddo
- !Compute monotonic and conservative PPM reconstruction over every cell
- coefs(:,:) = compute_ppm( ao , ppmdx )
- !Compute tracer values on the new grid by integrating from the old cell bottom to the new
- !cell interface to form a new grid mass accumulation. Taking the difference between
- !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is
- !supposed to hold the full mass this needs no normalization.
- massn1 = 0.
- do k = 1 , nlev
- kk = kid(k)
- massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk)
- Qdp(i,j,k,q) = massn2 - massn1
- massn1 = massn2
- enddo
- enddo
- enddo
- enddo
- call t_stopf('remap_Q_ppm')
-end subroutine remap_Q_ppm
-
-
-!=======================================================================================================!
-
-
-!THis compute grid-based coefficients from Collela & Woodward 1984.
-function compute_ppm_grids( dx ) result(rslt)
- use control_mod, only: vert_remap_q_alg
- implicit none
- real(kind=real_kind), intent(in) :: dx(-1:nlev+2) !grid spacings
- real(kind=real_kind) :: rslt(10,0:nlev+1) !grid spacings
- integer :: j
- integer :: indB, indE
-
- !Calculate grid-based coefficients for stage 1 of compute_ppm
- if (vert_remap_q_alg == 2) then
- indB = 2
- indE = nlev-1
- else
- indB = 0
- indE = nlev+1
- endif
- do j = indB , indE
- rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) )
- rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) )
- rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) )
- enddo
-
- !Caculate grid-based coefficients for stage 2 of compute_ppm
- if (vert_remap_q_alg == 2) then
- indB = 2
- indE = nlev-2
- else
- indB = 0
- indE = nlev
- endif
- do j = indB , indE
- rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) )
- rslt( 5,j) = 1. / sum( dx(j-1:j+2) )
- rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) )
- rslt( 7,j) = ( dx(j-1) + dx(j ) ) / ( 2. * dx(j ) + dx(j+1) )
- rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j ) )
- rslt( 9,j) = dx(j ) * ( dx(j-1) + dx(j ) ) / ( 2.*dx(j ) + dx(j+1) )
- rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / ( dx(j ) + 2.*dx(j+1) )
- enddo
-end function compute_ppm_grids
-
-!=======================================================================================================!
-
-
-
-!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages
-function compute_ppm( a , dx ) result(coefs)
- use control_mod, only: vert_remap_q_alg
- implicit none
- real(kind=real_kind), intent(in) :: a ( -1:nlev+2) !Cell-mean values
- real(kind=real_kind), intent(in) :: dx (10, 0:nlev+1) !grid spacings
- real(kind=real_kind) :: coefs(0:2, nlev ) !PPM coefficients (for parabola)
- real(kind=real_kind) :: ai (0:nlev ) !fourth-order accurate, then limited interface values
- real(kind=real_kind) :: dma(0:nlev+1) !An expression from Collela's '84 publication
- real(kind=real_kind) :: da !Ditto
- ! Hold expressions based on the grid (which are cumbersome).
- real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10
- real(kind=real_kind) :: al, ar !Left and right interface values for cell-local limiting
- integer :: j
- integer :: indB, indE
-
- ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain
- if (vert_remap_q_alg == 2) then
- indB = 2
- indE = nlev-1
- else
- indB = 0
- indE = nlev+1
- endif
- do j = indB , indE
- da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) )
- dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da)
- if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0.
- enddo
-
- ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1)
- if (vert_remap_q_alg == 2) then
- indB = 2
- indE = nlev-2
- else
- indB = 0
- indE = nlev
- endif
- do j = indB , indE
- ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) &
- * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) )
- enddo
-
- ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain
- ! (dimension nlev) using ai on either side and ao within the cell.
- if (vert_remap_q_alg == 2) then
- indB = 3
- indE = nlev-2
- else
- indB = 1
- indE = nlev
- endif
- do j = indB , indE
- al = ai(j-1)
- ar = ai(j )
- if ( (ar - a(j)) * (a(j) - al) <= 0. ) then
- al = a(j)
- ar = a(j)
- endif
- if ( (ar - al) * (a(j) - (al + ar)/2.) > (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar
- if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al
- !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx
- coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4.
- coefs(1,j) = ar - al
- coefs(2,j) = -6. * a(j) + 3. * ( al + ar )
- enddo
-
- !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom
- !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth
- !moment to the cell mean is sufficient to maintain conservation.
- if (vert_remap_q_alg == 2) then
- coefs(0,1:2) = a(1:2)
- coefs(1:2,1:2) = 0.
- coefs(0,nlev-1:nlev) = a(nlev-1:nlev)
- coefs(1:2,nlev-1:nlev) = 0.D0
- endif
-end function compute_ppm
-
-!=======================================================================================================!
-
-
-!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx,
-!given two bounds. Make sure this gets inlined during compilation.
-function integrate_parabola( a , x1 , x2 ) result(mass)
- implicit none
- real(kind=real_kind), intent(in) :: a(0:2) !Coefficients of the parabola
- real(kind=real_kind), intent(in) :: x1 !lower domain bound for integration
- real(kind=real_kind), intent(in) :: x2 !upper domain bound for integration
- real(kind=real_kind) :: mass
- mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1
-end function integrate_parabola
-
-
-!=============================================================================================!
-
-
-
-end module vertremap_mod
-
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!! End GPU remap module !!
-!! by Rick Archibald, 2010 !!
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-!=======================================================================================================!
-
-
+!SUBROUTINES:
+! prim_advec_tracers_remap_rk2()
+! SEM 2D RK2 + monotone remap + hyper viscosity
+! SEM 2D RK2 can use sign-preserving or monotone reconstruction
+!
+!For RK2 advection of Q: (example of 2 stage RK for tracers): dtq = qsplit*dt
+!For consistency, if Q=1
+! dp1 = dp(t)- dtq div[ U1 dp(t)]
+! dp2 = dp1 - dtq div[ U2 dp1 ] + 2*dtq D( dpdiss_ave )
+! dp* = (dp(t) + dp2 )/2
+! = dp(t) - dtq div[ U1 dp(t) + U2 dp1 ]/2 + dtq D( dpdiss_ave )
+!
+!so we require:
+! U1 = Udp_ave / dp(t)
+! U2 = Udp_ave / dp1
+!
+!For tracer advection:
+! Qdp1 = Qdp(t)- dtq div[ U1 Qdp(t)]
+! Qdp2 = Qdp1 - dtq div[ U2 Qdp1 ] + 2*dtq D( Q dpdiss_ave )
+! Qdp* = (Qdp(t) + Qdp2 )/2
+! = Qdp(t) - dtq div[ U1 Qdp(t) + U2 Qdp1 ] + dtq D( Q dpdiss_ave )
+!
+!Qdp1: limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS) with dp1 as computed above
+!Qdp2: limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS) with dp2 as computed above
+!
+!For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS
+!
+!
+!last step:
+! remap Qdp* to Qdp(t+1) [ dp_star(t+1) -> dp(t+1) ]
-module prim_advection_mod
+module prim_advection_mod_base
!
! two formulations. both are conservative
! u grad Q formulation:
@@ -3068,4 +2280,4 @@ subroutine vertical_remap(hybrid,elem,fvm,hvcoord,dt,np1,np1_qdp,np1_fvm,nets,ne
call t_stopf('vertical_remap')
end subroutine vertical_remap
-end module prim_advection_mod
+end module prim_advection_mod_base
diff --git a/components/homme/src/share/prim_driver_mod.F90 b/components/homme/src/share/prim_driver_mod.F90
index a4e8b23fa48..9c54f0454dc 100644
--- a/components/homme/src/share/prim_driver_mod.F90
+++ b/components/homme/src/share/prim_driver_mod.F90
@@ -93,7 +93,7 @@ subroutine prim_init1(elem, fvm, par, dom_mt, Tl)
! --------------------------------
use schedule_mod, only : genEdgeSched, PrintSchedule
! --------------------------------
- use arch_switch_mod, only: prim_advec_init1
+ use prim_advection_mod, only: prim_advec_init1
! --------------------------------
use prim_advance_mod, only: prim_advance_init
! --------------------------------
@@ -608,7 +608,8 @@ subroutine prim_init2(elem, fvm, hybrid, nets, nete, tl, hvcoord)
use derivative_mod, only : derivinit, interpolate_gll2fvm_points, interpolate_gll2spelt_points, v2pinit
use global_norms_mod, only : test_global_integral, print_cfl
use hybvcoord_mod, only : hvcoord_t
- use arch_switch_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv, arch_init2
+ use prim_advection_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv
+ use solver_init_mod, only: solver_init2
#ifdef CAM
#else
use column_model_mod, only : InitColumnModel
@@ -1101,7 +1102,7 @@ end subroutine noxinit
if (hybrid%masterthread) write(iulog,*) "initial state:"
call prim_printstate(elem, tl, hybrid,hvcoord,nets,nete, fvm)
- call arch_init2(elem(:), deriv(hybrid%ithr))
+ call solver_init2(elem(:), deriv(hybrid%ithr))
call Prim_Advec_Init2(elem(:), hvcoord, hybrid)
end subroutine prim_init2
@@ -1161,7 +1162,7 @@ subroutine prim_run(elem, hybrid,nets,nete, dt, tl, hvcoord, advance_name)
use control_mod, only: statefreq, integration, ftype, qsplit, disable_diagnostics
use prim_advance_mod, only : prim_advance_exp, prim_advance_si, preq_robert3
use prim_state_mod, only : prim_printstate, prim_diag_scalars, prim_energy_halftimes
- use arch_switch_mod, only: deriv
+ use prim_advection_mod, only: deriv
use parallel_mod, only : abortmp
#ifndef CAM
use column_model_mod, only : ApplyColumnModel
@@ -1617,7 +1618,7 @@ subroutine prim_step(elem, fvm, hybrid,nets,nete, dt, tl, hvcoord, compute_diagn
use fvm_bsp_mod, only : get_boomerang_velocities_gll, get_solidbody_velocities_gll
use prim_advance_mod, only : prim_advance_exp, overwrite_SEdensity
use prim_advection_mod, only : prim_advec_tracers_fvm
- use arch_switch_mod, only : prim_advec_tracers_remap, deriv
+ use prim_advection_mod, only : prim_advec_tracers_remap, deriv
use derivative_mod, only : subcell_integration
#if defined(_SPELT)
use prim_advection_mod, only : prim_advec_tracers_spelt
@@ -2026,7 +2027,7 @@ subroutine smooth_topo_datasets(phis,sghdyn,sgh30dyn,elem,hybrid,nets,nete)
use derivative_mod, only : derivative_t , laplace_sphere_wk
use viscosity_mod, only : biharmonic_wk
use prim_advance_mod, only : smooth_phis
- use arch_switch_mod, only: deriv
+ use prim_advection_mod, only: deriv
implicit none
integer , intent(in) :: nets,nete
diff --git a/components/homme/src/share/solver_init_mod_base.F90 b/components/homme/src/share/solver_init_mod_base.F90
new file mode 100644
index 00000000000..774d103a9c3
--- /dev/null
+++ b/components/homme/src/share/solver_init_mod_base.F90
@@ -0,0 +1,25 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod_base
+ implicit none
+ private
+
+ public :: solver_init2
+
+
+contains
+
+
+ subroutine solver_init2( elem , deriv )
+ use element_mod, only: element_t
+ use derivative_mod, only: derivative_t
+ implicit none
+ type(element_t) , intent(in) :: elem(:)
+ type(derivative_t), intent(in) :: deriv
+ !do nothing
+ end subroutine solver_init2
+
+
+end module solver_init_mod_base
diff --git a/components/homme/src/share/vertremap_mod_base.F90 b/components/homme/src/share/vertremap_mod_base.F90
new file mode 100644
index 00000000000..c915acf7350
--- /dev/null
+++ b/components/homme/src/share/vertremap_mod_base.F90
@@ -0,0 +1,767 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+!SUBROUTINES:
+!
+!Notes on Lagrange+REMAP advection
+!dynamics will compute mean fluxes, so that (i.e. for qsplit=3)
+!
+! dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3) + 3dt D(dpdiss_sum/3)
+!
+!Where the floating lagrangian component:
+! dp_star(t+3) = dp(t) -3dt div(Udp_sum/3) + 3dt D(dpdiss_sum/3)
+!OR:
+! dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) )
+
+
+module vertremap_mod_base
+
+ !**************************************************************************************
+ !
+ ! Purpose:
+ ! Construct sub-grid-scale polynomials using piecewise spline method with
+ ! monotone filters.
+ !
+ ! References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR)
+ ! PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF)
+ !
+ !**************************************************************************************
+
+ use kinds, only : real_kind,int_kind
+ use dimensions_mod, only : np,nlev,qsize,nlevp,npsq,ntrac,nc
+ use hybvcoord_mod, only : hvcoord_t
+ use element_mod, only : element_t
+ use fvm_control_volume_mod, only : fvm_struct
+ use spelt_mod, only : spelt_struct
+ use perf_mod, only : t_startf, t_stopf ! _EXTERNAL
+ use parallel_mod, only : abortmp, parallel_t
+ use control_mod, only : vert_remap_q_alg
+
+ public remap1 ! remap any field, splines, monotone
+ public remap1_nofilter ! remap any field, splines, no filter
+! todo: tweak interface to match remap1 above, rename remap1_ppm:
+ public remap_q_ppm ! remap state%Q, PPM, monotone
+
+ contains
+
+!=======================================================================================================!
+
+!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid
+!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task.
+subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref )
+ implicit none
+ type(hvcoord_t) , intent(in ) :: hvcoord !Derived type to hold vertical sigma grid parameters
+ real(kind=real_kind) , intent(in ) :: ps !Surface pressure for this column
+ real(kind=real_kind) , intent(in ) :: dt !Time step
+ real(kind=real_kind) , intent(in ) :: eta_dot_dpdn(nlev+1) !Looks like a vertical pressure flux
+ !to compute deformed grid spacing
+ real(kind=real_kind) , intent( out) :: p_lag(nlev+1) !Pressures at interfaces of the Lagrangian deformed grid
+ real(kind=real_kind) , intent( out) :: p_ref(nlev+1) !Pressures at interfaces of the reference grid
+ real(kind=real_kind) , intent( out) :: dp_lag(nlev) !Pressure differences on Lagrangian deformed grid
+ real(kind=real_kind) , intent( out) :: dp_ref(nlev) !Pressure differences on reference grid
+ integer :: k !Iterator
+ p_ref(1) = 0 !Both grids have a model top pressure of zero
+ p_lag(1) = 0 !Both grids have a model top pressure of zero
+ do k = 1 , nlev
+ dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
+ ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps !Reference pressure difference
+ ! Lagrangian pressure difference (flux in - flux out over the time step)
+ dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) )
+ p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell
+ p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell
+ enddo
+end subroutine remap_calc_grids
+
+!=======================================================================================================!
+
+
+
+subroutine remap1(Qdp,nx,qsize,dp1,dp2)
+ ! remap 1 field
+ ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO)
+ ! dp1 layer thickness (source)
+ ! dp2 layer thickness (target)
+ !
+ ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp
+ !
+ implicit none
+ integer, intent(in) :: nx,qsize
+ real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
+ real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
+ ! ========================
+ ! Local Variables
+ ! ========================
+
+ real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
+ real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
+ real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, &
+ peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
+ zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
+ integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
+ lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
+ logical :: abort=.false.
+
+ if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then
+ call remap_Q_ppm(qdp,nx,qsize,dp1,dp2)
+ return
+ endif
+
+ call t_startf('remap_Q_noppm')
+#if (defined COLUMN_OPENMP)
+!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
+!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
+!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
+!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
+#endif
+ do q=1,qsize
+ do i=1,nx
+ do j=1,nx
+
+ z1c(1)=0 ! source grid
+ z2c(1)=0 ! target grid
+ do k=1,nlev
+ z1c(k+1)=z1c(k)+dp1(i,j,k)
+ z2c(k+1)=z2c(k)+dp2(i,j,k)
+ enddo
+
+ zv(1)=0
+ do k=1,nlev
+ Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass
+ zv(k+1) = zv(k)+Qcol(k)
+ enddo
+
+ if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
+ write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
+ write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN '
+ write(6,*) 'DATA FOR MODEL LEVELS'
+ write(6,*) 'PLEVMODEL=',z2c(nlev+1)
+ write(6,*) 'PLEV =',z1c(nlev+1)
+ write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1)
+ abort=.true.
+ endif
+
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ !! quadratic splies with UK met office monotonicity constraints !!
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ zkr = 99
+ ilev = 2
+ zkr(1) = 1
+ zkr(nlev+1) = nlev
+ kloop: do k = 2,nlev
+ do jk = ilev,nlev+1
+ if (z1c(jk).ge.z2c(k)) then
+ ilev = jk
+ zkr(k) = jk-1
+ cycle kloop
+ endif
+ enddo
+ enddo kloop
+
+ zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
+ zgam(1) = 0.0
+ zgam(nlev+1) = 1.0
+ zhdp = z1c(2:nlev+1)-z1c(1:nlev)
+
+
+ h = 1/zhdp
+ zarg = Qcol * h
+ rhs = 0
+ lower_diag = 0
+ diag = 0
+ upper_diag = 0
+
+ rhs(1)=3*zarg(1)
+ rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
+ rhs(nlev+1)=3*zarg(nlev)
+
+ lower_diag(1)=1
+ lower_diag(2:nlev) = h(1:nlev-1)
+ lower_diag(nlev+1)=1
+
+ diag(1)=2
+ diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
+ diag(nlev+1)=2
+
+ upper_diag(1)=1
+ upper_diag(2:nlev) = h(2:nlev)
+ upper_diag(nlev+1)=0
+
+ q_diag(1)=-upper_diag(1)/diag(1)
+ rhs(1)= rhs(1)/diag(1)
+
+ do k=2,nlev+1
+ tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1))
+ q_diag(k) = -upper_diag(k)*tmp_cal
+ rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
+ enddo
+ do k=nlev,1,-1
+ rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
+ enddo
+
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ !! monotonicity modifications !!
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ filter_code = 0
+ dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1)
+ dy(nlev) = dy(nlev-1)
+
+ dy = merge(zero, dy, abs(dy) < tiny )
+
+ do k=1,nlev
+ im1=MAX(1,k-1)
+ im2=MAX(1,k-2)
+ im3=MAX(1,k-3)
+ ip1=MIN(nlev,k+1)
+ t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0)
+ t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 &
+ .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 )
+ t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k)))
+
+ filter_code(k) = merge(0,1,t1+t2 > 0)
+ rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1))
+ filter_code(im1) = MAX(filter_code(im1),filter_code(k))
+ enddo
+
+ rhs = merge(qmax,rhs,rhs > qmax)
+ rhs = merge(zero,rhs,rhs < zero)
+
+ za0 = rhs(1:nlev)
+ za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
+ za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
+
+ dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev)
+ dy = merge(zero, dy, abs(dy) < tiny )
+
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ !! Compute the 3 quadratic spline coeffients {za0, za1, za2} !!
+ !! knowing the quadratic spline parameters {rho_left,rho_right,zarg} !!
+ !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002). !!
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+
+ h = rhs(2:nlev+1)
+
+ do k=1,nlev
+ xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny)
+ xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny)
+ f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2
+
+ t1 = merge(1,0,ABS(za2(k)) > tiny)
+ t2 = merge(1,0,xm <= zero .OR. xm >= 1)
+ t3 = merge(1,0,za2(k) > zero)
+ t4 = merge(1,0,za2(k) < zero)
+ tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2)
+ tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3)
+
+ peaks=0
+ peaks = merge(-1,peaks,tm .EQ. 1)
+ peaks = merge(+1,peaks,tp .EQ. 1)
+ peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1)
+ peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1)
+
+ im1=MAX(1,k-1)
+ im2=MAX(1,k-2)
+ ip1=MIN(nlev,k+1)
+ ip2=MIN(nlev,k+2)
+
+ t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. &
+ (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. &
+ (dy(im1)*float(peaks) <= tiny))
+
+ filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. &
+ (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny))
+
+ if (filter_code(k) > 0) then
+ level1 = rhs(k)
+ level2 = (2*rhs(k)+h(k))/3
+ level3 = 0.5*(rhs(k)+h(k))
+ level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k)
+ level5 = h(k)
+
+ t1 = merge(1,0,h(k) >= rhs(k))
+ t2 = merge(1,0,zarg(k) <= level1 .OR. zarg(k) >= level5)
+ t3 = merge(1,0,zarg(k) > level1 .AND. zarg(k) < level2)
+ t4 = merge(1,0,zarg(k) > level4 .AND. zarg(k) < level5)
+
+ lt1 = t1*t2
+ lt2 = t1*(1-t2+t3)
+ lt3 = t1*(1-t2+1-t3+t4)
+
+ za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
+ za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
+ za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
+
+ za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
+ za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
+ za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
+
+ za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
+ za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
+ za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
+
+ t2 = merge(1,0,zarg(k) >= level1 .OR. zarg(k) <= level5)
+ t3 = merge(1,0,zarg(k) < level1 .AND. zarg(k) > level2)
+ t4 = merge(1,0,zarg(k) < level4 .AND. zarg(k) > level5)
+
+ lt1 = (1-t1)*t2
+ lt2 = (1-t1)*(1-t2+t3)
+ lt3 = (1-t1)*(1-t2+1-t3+t4)
+
+ za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1)
+ za1(k) = merge(zero,za1(k),lt1 .EQ. 1)
+ za2(k) = merge(zero,za2(k),lt1 .EQ. 1)
+
+ za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2)
+ za1(k) = merge(zero,za1(k),lt2 .EQ. 2)
+ za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2)
+
+ za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3)
+ za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3)
+ za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3)
+ endif
+ enddo
+
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ !! start iteration from top to bottom of atmosphere !!
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ zv1 = 0
+ do k=1,nlev
+ if (zgam(k+1)>1d0) then
+ WRITE(*,*) 'r not in [0:1]', zgam(k+1)
+ abort=.true.
+ endif
+ zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
+ (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
+ Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
+ zv1 = zv2
+ enddo
+ enddo
+ enddo
+ enddo ! q loop
+ if (abort) call abortmp('Bad levels in remap1. usually CFL violatioin')
+ call t_stopf('remap_Q_noppm')
+
+end subroutine remap1
+
+subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2)
+ ! remap 1 field
+ ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO)
+ ! dp1 layer thickness (source)
+ ! dp2 layer thickness (target)
+ !
+ ! output: remaped Qdp, conserving mass
+ !
+ implicit none
+ integer, intent(in) :: nx,qsize
+ real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
+ real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
+ ! ========================
+ ! Local Variables
+ ! ========================
+
+ real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv
+ real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1
+ real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, &
+ peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, &
+ zero = 0,one = 1,tiny = 1e-12,qmax = 1d50
+ integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, &
+ lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q
+ logical :: abort=.false.
+! call t_startf('remap1_nofilter')
+
+#if (defined COLUMN_OPENMP)
+!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) &
+!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) &
+!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) &
+!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2)
+#endif
+ do q=1,qsize
+ do i=1,nx
+ do j=1,nx
+
+ z1c(1)=0 ! source grid
+ z2c(1)=0 ! target grid
+ do k=1,nlev
+ z1c(k+1)=z1c(k)+dp1(i,j,k)
+ z2c(k+1)=z2c(k)+dp2(i,j,k)
+ enddo
+
+ zv(1)=0
+ do k=1,nlev
+ Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass
+ zv(k+1) = zv(k)+Qcol(k)
+ enddo
+
+ if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then
+ write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME'
+ write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN '
+ write(6,*) 'DATA FOR MODEL LEVELS'
+ write(6,*) 'PLEVMODEL=',z2c(nlev+1)
+ write(6,*) 'PLEV =',z1c(nlev+1)
+ write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1)
+ abort=.true.
+ endif
+
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ !! quadratic splies with UK met office monotonicity constraints !!
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ zkr = 99
+ ilev = 2
+ zkr(1) = 1
+ zkr(nlev+1) = nlev
+ kloop: do k = 2,nlev
+ do jk = ilev,nlev+1
+ if (z1c(jk).ge.z2c(k)) then
+ ilev = jk
+ zkr(k) = jk-1
+ cycle kloop
+ endif
+ enddo
+ enddo kloop
+
+ zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr))
+ zgam(1) = 0.0
+ zgam(nlev+1) = 1.0
+ zhdp = z1c(2:nlev+1)-z1c(1:nlev)
+
+
+ h = 1/zhdp
+ zarg = Qcol * h
+ rhs = 0
+ lower_diag = 0
+ diag = 0
+ upper_diag = 0
+
+ rhs(1)=3*zarg(1)
+ rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1))
+ rhs(nlev+1)=3*zarg(nlev)
+
+ lower_diag(1)=1
+ lower_diag(2:nlev) = h(1:nlev-1)
+ lower_diag(nlev+1)=1
+
+ diag(1)=2
+ diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1))
+ diag(nlev+1)=2
+
+ upper_diag(1)=1
+ upper_diag(2:nlev) = h(2:nlev)
+ upper_diag(nlev+1)=0
+
+ q_diag(1)=-upper_diag(1)/diag(1)
+ rhs(1)= rhs(1)/diag(1)
+
+ do k=2,nlev+1
+ tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1))
+ q_diag(k) = -upper_diag(k)*tmp_cal
+ rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal
+ enddo
+ do k=nlev,1,-1
+ rhs(k)=rhs(k)+q_diag(k)*rhs(k+1)
+ enddo
+
+ za0 = rhs(1:nlev)
+ za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg
+ za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg
+
+
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ !! start iteration from top to bottom of atmosphere !!
+ !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ zv1 = 0
+ do k=1,nlev
+ if (zgam(k+1)>1d0) then
+ WRITE(*,*) 'r not in [0:1]', zgam(k+1)
+ abort=.true.
+ endif
+ zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ &
+ (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1))
+ Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio
+ zv1 = zv2
+ enddo
+ enddo
+ enddo
+ enddo ! q loop
+ if (abort) call abortmp('Bad levels in remap1_nofilter. usually CFL violatioin')
+! call t_stopf('remap1_nofilter')
+end subroutine remap1_nofilter
+
+!=======================================================================================================!
+
+
+!This uses the exact same model and reference grids and data as remap_Q, but it interpolates
+!using PPM instead of splines.
+subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2)
+ ! remap 1 field
+ ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO)
+ ! dp1 layer thickness (source)
+ ! dp2 layer thickness (target)
+ !
+ ! output: remaped Qdp, conserving mass
+ !
+ use control_mod, only : vert_remap_q_alg
+ implicit none
+ integer,intent(in) :: nx,qsize
+ real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize)
+ real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev)
+ ! Local Variables
+ integer, parameter :: gs = 2 !Number of cells to place in the ghost region
+ real(kind=real_kind), dimension( nlev+2 ) :: pio !Pressure at interfaces for old grid
+ real(kind=real_kind), dimension( nlev+1 ) :: pin !Pressure at interfaces for new grid
+ real(kind=real_kind), dimension( nlev+1 ) :: masso !Accumulate mass up to each interface
+ real(kind=real_kind), dimension( 1-gs:nlev+gs) :: ao !Tracer value on old grid
+ real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpo !change in pressure over a cell for old grid
+ real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpn !change in pressure over a cell for old grid
+ real(kind=real_kind), dimension(3, nlev ) :: coefs !PPM coefficients within each cell
+ real(kind=real_kind), dimension( nlev ) :: z1, z2
+ real(kind=real_kind) :: ppmdx(10,0:nlev+1) !grid spacings
+ real(kind=real_kind) :: mymass, massn1, massn2
+ integer :: i, j, k, q, kk, kid(nlev)
+
+ call t_startf('remap_Q_ppm')
+ do j = 1 , nx
+ do i = 1 , nx
+
+ pin(1)=0
+ pio(1)=0
+ do k=1,nlev
+ dpn(k)=dp2(i,j,k)
+ dpo(k)=dp1(i,j,k)
+ pin(k+1)=pin(k)+dpn(k)
+ pio(k+1)=pio(k)+dpo(k)
+ enddo
+
+
+
+ pio(nlev+2) = pio(nlev+1) + 1. !This is here to allow an entire block of k threads to run in the remapping phase.
+ !It makes sure there's an old interface value below the domain that is larger.
+ pin(nlev+1) = pio(nlev+1) !The total mass in a column does not change.
+ !Therefore, the pressure of that mass cannot either.
+ !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence.
+ do k = 1 , gs
+ dpo(1 -k) = dpo( k)
+ dpo(nlev+k) = dpo(nlev+1-k)
+ enddo
+
+ !Compute remapping intervals once for all tracers. Find the old grid cell index in which the
+ !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new
+ !interface location. In practice, the grid never deforms past one cell, so the search can be
+ !simplified by this. Also, the interval of integration is usually of magnitude close to zero
+ !or close to dpo because of minimial deformation.
+ !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so
+ !I set them equal to each other.
+ do k = 1 , nlev
+ kk = k !Keep from an order n^2 search operation by assuming the old cell index is close.
+ !Find the index of the old grid cell in which this new cell's bottom interface resides.
+ do while ( pio(kk) <= pin(k+1) )
+ kk = kk + 1
+ enddo
+ kk = kk - 1 !kk is now the cell index we're integrating over.
+ if (kk == nlev+1) kk = nlev !This is to keep the indices in bounds.
+ !Top bounds match anyway, so doesn't matter what coefficients are used
+ kid(k) = kk !Save for reuse
+ z1(k) = -0.5D0 !This remapping assumes we're starting from the left interface of an old grid cell
+ !In fact, we're usually integrating very little or almost all of the cell in question
+ z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk) !PPM interpolants are normalized to an independent
+ !coordinate domain [-0.5,0.5].
+ enddo
+
+ !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the
+ !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop.
+ ppmdx(:,:) = compute_ppm_grids( dpo )
+
+ !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and
+ !mass accumulation
+ do q = 1 , qsize
+ !Accumulate the old mass up to old grid cell interface locations to simplify integration
+ !during remapping. Also, divide out the grid spacing so we're working with actual tracer
+ !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure
+ !tracer consistency for an initially uniform field. I copied it from the old remap routine.
+ masso(1) = 0.
+ do k = 1 , nlev
+ ao(k) = Qdp(i,j,k,q)
+ masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping
+ ao(k) = ao(k) / dpo(k) !Divide out the old grid spacing because we want the tracer mixing ratio, not mass.
+ enddo
+ !Fill in ghost values. Ignored if vert_remap_q_alg == 2
+ do k = 1 , gs
+ ao(1 -k) = ao( k)
+ ao(nlev+k) = ao(nlev+1-k)
+ enddo
+ !Compute monotonic and conservative PPM reconstruction over every cell
+ coefs(:,:) = compute_ppm( ao , ppmdx )
+ !Compute tracer values on the new grid by integrating from the old cell bottom to the new
+ !cell interface to form a new grid mass accumulation. Taking the difference between
+ !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is
+ !supposed to hold the full mass this needs no normalization.
+ massn1 = 0.
+ do k = 1 , nlev
+ kk = kid(k)
+ massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk)
+ Qdp(i,j,k,q) = massn2 - massn1
+ massn1 = massn2
+ enddo
+ enddo
+ enddo
+ enddo
+ call t_stopf('remap_Q_ppm')
+end subroutine remap_Q_ppm
+
+
+!=======================================================================================================!
+
+
+!THis compute grid-based coefficients from Collela & Woodward 1984.
+function compute_ppm_grids( dx ) result(rslt)
+ use control_mod, only: vert_remap_q_alg
+ implicit none
+ real(kind=real_kind), intent(in) :: dx(-1:nlev+2) !grid spacings
+ real(kind=real_kind) :: rslt(10,0:nlev+1) !grid spacings
+ integer :: j
+ integer :: indB, indE
+
+ !Calculate grid-based coefficients for stage 1 of compute_ppm
+ if (vert_remap_q_alg == 2) then
+ indB = 2
+ indE = nlev-1
+ else
+ indB = 0
+ indE = nlev+1
+ endif
+ do j = indB , indE
+ rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) )
+ rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) )
+ rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) )
+ enddo
+
+ !Caculate grid-based coefficients for stage 2 of compute_ppm
+ if (vert_remap_q_alg == 2) then
+ indB = 2
+ indE = nlev-2
+ else
+ indB = 0
+ indE = nlev
+ endif
+ do j = indB , indE
+ rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) )
+ rslt( 5,j) = 1. / sum( dx(j-1:j+2) )
+ rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) )
+ rslt( 7,j) = ( dx(j-1) + dx(j ) ) / ( 2. * dx(j ) + dx(j+1) )
+ rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j ) )
+ rslt( 9,j) = dx(j ) * ( dx(j-1) + dx(j ) ) / ( 2.*dx(j ) + dx(j+1) )
+ rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / ( dx(j ) + 2.*dx(j+1) )
+ enddo
+end function compute_ppm_grids
+
+!=======================================================================================================!
+
+
+
+!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages
+function compute_ppm( a , dx ) result(coefs)
+ use control_mod, only: vert_remap_q_alg
+ implicit none
+ real(kind=real_kind), intent(in) :: a ( -1:nlev+2) !Cell-mean values
+ real(kind=real_kind), intent(in) :: dx (10, 0:nlev+1) !grid spacings
+ real(kind=real_kind) :: coefs(0:2, nlev ) !PPM coefficients (for parabola)
+ real(kind=real_kind) :: ai (0:nlev ) !fourth-order accurate, then limited interface values
+ real(kind=real_kind) :: dma(0:nlev+1) !An expression from Collela's '84 publication
+ real(kind=real_kind) :: da !Ditto
+ ! Hold expressions based on the grid (which are cumbersome).
+ real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10
+ real(kind=real_kind) :: al, ar !Left and right interface values for cell-local limiting
+ integer :: j
+ integer :: indB, indE
+
+ ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain
+ if (vert_remap_q_alg == 2) then
+ indB = 2
+ indE = nlev-1
+ else
+ indB = 0
+ indE = nlev+1
+ endif
+ do j = indB , indE
+ da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) )
+ dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da)
+ if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0.
+ enddo
+
+ ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1)
+ if (vert_remap_q_alg == 2) then
+ indB = 2
+ indE = nlev-2
+ else
+ indB = 0
+ indE = nlev
+ endif
+ do j = indB , indE
+ ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) &
+ * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) )
+ enddo
+
+ ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain
+ ! (dimension nlev) using ai on either side and ao within the cell.
+ if (vert_remap_q_alg == 2) then
+ indB = 3
+ indE = nlev-2
+ else
+ indB = 1
+ indE = nlev
+ endif
+ do j = indB , indE
+ al = ai(j-1)
+ ar = ai(j )
+ if ( (ar - a(j)) * (a(j) - al) <= 0. ) then
+ al = a(j)
+ ar = a(j)
+ endif
+ if ( (ar - al) * (a(j) - (al + ar)/2.) > (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar
+ if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al
+ !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx
+ coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4.
+ coefs(1,j) = ar - al
+ coefs(2,j) = -6. * a(j) + 3. * ( al + ar )
+ enddo
+
+ !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom
+ !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth
+ !moment to the cell mean is sufficient to maintain conservation.
+ if (vert_remap_q_alg == 2) then
+ coefs(0,1:2) = a(1:2)
+ coefs(1:2,1:2) = 0.
+ coefs(0,nlev-1:nlev) = a(nlev-1:nlev)
+ coefs(1:2,nlev-1:nlev) = 0.D0
+ endif
+end function compute_ppm
+
+!=======================================================================================================!
+
+
+!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx,
+!given two bounds. Make sure this gets inlined during compilation.
+function integrate_parabola( a , x1 , x2 ) result(mass)
+ implicit none
+ real(kind=real_kind), intent(in) :: a(0:2) !Coefficients of the parabola
+ real(kind=real_kind), intent(in) :: x1 !lower domain bound for integration
+ real(kind=real_kind), intent(in) :: x2 !upper domain bound for integration
+ real(kind=real_kind) :: mass
+ mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1
+end function integrate_parabola
+
+
+!=============================================================================================!
+
+
+
+end module vertremap_mod_base
+
+
+
+
diff --git a/components/homme/src/share/viscosity_mod.F90 b/components/homme/src/share/viscosity_mod_base.F90
similarity index 99%
rename from components/homme/src/share/viscosity_mod.F90
rename to components/homme/src/share/viscosity_mod_base.F90
index c005a3eff3b..c8cb824ded3 100644
--- a/components/homme/src/share/viscosity_mod.F90
+++ b/components/homme/src/share/viscosity_mod_base.F90
@@ -2,7 +2,7 @@
#include "config.h"
#endif
-module viscosity_mod
+module viscosity_mod_base
!
! This module should be renamed "global_deriv_mod.F90"
!
@@ -1032,4 +1032,4 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,nt,min_neigh,max_nei
end subroutine
#endif
-end module
+end module viscosity_mod_base
diff --git a/components/homme/src/sweqx/CMakeLists.txt b/components/homme/src/sweqx/CMakeLists.txt
index 8409fd086be..56af9356ec7 100644
--- a/components/homme/src/sweqx/CMakeLists.txt
+++ b/components/homme/src/sweqx/CMakeLists.txt
@@ -13,6 +13,16 @@ SET (SWEQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS}
${UTILS_TIMING_DIR} CACHE INTERNAL "include directories for sweqx")
SET(SWEQX_SRCS
+ ${SRC_SHARE}/bndry_mod_base.F90
+ ${SRC_SHARE}/derivative_mod_base.F90
+ ${SRC_SHARE}/edge_mod_base.F90
+ ${SRC_SHARE}/solver_init_mod_base.F90
+ ${SRC_SHARE}/viscosity_mod_base.F90
+ ${SRC_BASE}/sweqx/bndry_mod.F90
+ ${SRC_BASE}/sweqx/derivative_mod.F90
+ ${SRC_BASE}/sweqx/edge_mod.F90
+ ${SRC_BASE}/sweqx/solver_init_mod.F90
+ ${SRC_BASE}/sweqx/viscosity_mod.F90
${SRC_BASE}/advance_mod.F90
${SRC_BASE}/checksum_mod.F90
${SRC_BASE}/common_io_mod.F90
@@ -32,16 +42,13 @@ SET(SWEQX_SRCS
${SRC_BASE}/types_mod.F90
${SRC_SHARE}/fvm_bsp_mod.F90
${SRC_SHARE}/fvm_transformation_mod.F90
- ${SRC_SHARE}/bndry_mod.F90
${SRC_SHARE}/cg_mod.F90
${SRC_SHARE}/control_mod.F90
${SRC_SHARE}/coordinate_systems_mod.F90
${SRC_SHARE}/cube_mod.F90
- ${SRC_SHARE}/derivative_mod.F90
${SRC_SHARE}/dimensions_mod.F90
${SRC_SHARE}/dof_mod.F90
${SRC_SHARE}/domain_mod.F90
- ${SRC_SHARE}/edge_mod.F90
${SRC_SHARE}/element_mod.F90
${SRC_SHARE}/filter_mod.F90
${SRC_SHARE}/fvm_analytic_mod.F90
@@ -76,7 +83,6 @@ SET(SWEQX_SRCS
${SRC_SHARE}/spacecurve_mod.F90
${SRC_SHARE}/thread_mod.F90
${SRC_SHARE}/time_mod.F90
- ${SRC_SHARE}/viscosity_mod.F90
${SRC_SHARE}/unit_tests_mod.F90
${SRC_UTILS}/shr_file_mod.F90
${SRC_UTILS}/shr_kind_mod.F90
diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90
new file mode 100644
index 00000000000..5ad1125cbef
--- /dev/null
+++ b/components/homme/src/sweqx/bndry_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module bndry_mod
+ use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping
+ implicit none
+end module bndry_mod
diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90
new file mode 100644
index 00000000000..e9c3f81be3f
--- /dev/null
+++ b/components/homme/src/sweqx/derivative_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module derivative_mod
+ use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, &
+ derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, &
+ remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, &
+ curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, &
+ laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere
+ implicit none
+end module derivative_mod
diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90
new file mode 100644
index 00000000000..902503b36cb
--- /dev/null
+++ b/components/homme/src/sweqx/edge_mod.F90
@@ -0,0 +1,12 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module edge_mod
+ use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, &
+ edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, &
+ ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, &
+ ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, &
+ ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax
+ implicit none
+end module edge_mod
diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90
new file mode 100644
index 00000000000..90efb25eadf
--- /dev/null
+++ b/components/homme/src/sweqx/solver_init_mod.F90
@@ -0,0 +1,8 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module solver_init_mod
+ use solver_init_mod_base, only: solver_init2
+ implicit none
+end module solver_init_mod
diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90
new file mode 100644
index 00000000000..f78ae69a8f3
--- /dev/null
+++ b/components/homme/src/sweqx/viscosity_mod.F90
@@ -0,0 +1,11 @@
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+module viscosity_mod
+ use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax
+#ifdef _PRIM
+ use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d
+#endif
+ implicit none
+end module viscosity_mod
diff --git a/components/homme/test/jw_baroclinic/openacc_work/cpu.job b/components/homme/test/jw_baroclinic/openacc_work/cpu.job
index bebb6ea3fb5..7771f0f2628 100644
--- a/components/homme/test/jw_baroclinic/openacc_work/cpu.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/cpu.job
@@ -1,6 +1,6 @@
#!/bin/tcsh -f
#PBS -A STF006
-#PBS -l nodes=24
+#PBS -l nodes=3
#PBS -q batch
#PBS -l walltime=0:45:00
#PBS -N baroclinic
@@ -83,7 +83,7 @@ echo $mpirun
set u_perturb = 1
set rotate = 0
-set ne = 30 # horizontal resolution 4,16,30,60,120,240
+set ne = 8 # horizontal resolution 4,16,30,60,120,240
set nlev = 64 # vertical resolution 26,30,60,64,96
set qsize = 50 # number of passive tracers
set namelist = jw_baroclinic.nl # CAM-style, vertically lagrangian
diff --git a/components/homme/test/jw_baroclinic/openacc_work/openacc.job b/components/homme/test/jw_baroclinic/openacc_work/openacc.job
index 8600dcee570..3193fcea868 100644
--- a/components/homme/test/jw_baroclinic/openacc_work/openacc.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/openacc.job
@@ -1,6 +1,6 @@
#!/bin/tcsh -f
#PBS -A STF006
-#PBS -l nodes=24
+#PBS -l nodes=3
#PBS -q batch
#PBS -l walltime=0:45:00
#PBS -N baroclinic
@@ -43,7 +43,7 @@ set exe = preqx.openacc
module rm cray-netcdf-hdf5parallel
module rm cray-parallel-netcdf
module load PrgEnv-pgi
- module switch pgi pgi/15.9.lustre
+ module switch pgi pgi/15.10.0
module switch cray-mpich cray-mpich/7.2.2
module switch cray-libsci cray-libsci/13.0.4
module load esmf/5.2.0rp2
@@ -71,7 +71,7 @@ setenv MPICH_CPUMASK_DISPLAY 1
#setenv CUDA_PROFILE_CONFIG ./cuda_config
#setenv PGI_ACC_DEBUG 1
-setenv OMP_NUM_THREADS 4
+setenv OMP_NUM_THREADS 1
set NCPU = $PBS_NUM_NODES
@ NCPU *= 16
@ NCPU /= $OMP_NUM_THREADS
@@ -83,7 +83,7 @@ echo $mpirun
set u_perturb = 1
set rotate = 0
-set ne = 30 # horizontal resolution 4,16,30,60,120,240
+set ne = 8 # horizontal resolution 4,16,30,60,120,240
set nlev = 64 # vertical resolution 26,30,60,64,96
set qsize = 50 # number of passive tracers
set namelist = jw_baroclinic.nl # CAM-style, vertically lagrangian
diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job
old mode 100644
new mode 100755
index 5dde0fbab77..7147e817eb8
--- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job
@@ -7,7 +7,7 @@
# set paths to source code, build directory and run directory
set wdir = /proj/imn/HOMME_ACME # run directory
-set HOMME = /home/imn/ACME/models/atm/homme # HOMME svn checkout
+set HOMME = /home/imn/ACME/components/homme # HOMME svn checkout
set input = $HOMME/test/jw_baroclinic/openacc_work # input files for test case
set vdir = $HOMME/test/vcoord # vertical coordinate files
set exedir = $HOMME/build/preqx
diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job
old mode 100644
new mode 100755
index 63846c35c5b..df7e9943c9c
--- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job
+++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job
@@ -7,7 +7,7 @@
# set paths to source code, build directory and run directory
set wdir = /proj/imn/HOMME_ACME # run directory
-set HOMME = /home/imn/ACME/models/atm/homme # HOMME svn checkout
+set HOMME = /home/imn/ACME/components/homme # HOMME svn checkout
set input = $HOMME/test/jw_baroclinic/openacc_work # input files for test case
set vdir = $HOMME/test/vcoord # vertical coordinate files
set exedir = $HOMME/build/preqx
@@ -28,9 +28,9 @@ limit stacksize unlimited
setenv MPSTKZ 64M
setenv OMP_STACKSIZE 64M
-setenv CUDA_PROFILE 1
-setenv CUDA_PROFILE_LOG "cuda_profile_%p.log"
-setenv CUDA_PROFILE_CONFIG ./cuda_config
+#setenv CUDA_PROFILE 1
+#setenv CUDA_PROFILE_LOG "cuda_profile_%p.log"
+#setenv CUDA_PROFILE_CONFIG ./cuda_config
#set i=0
#mkdir -f /tmp/mps_$i
diff --git a/components/homme/test/reg_test/namelists/swtc1-dg.nl b/components/homme/test/reg_test/namelists/swtc1-dg.nl
deleted file mode 100644
index 5bed81bde07..00000000000
--- a/components/homme/test/reg_test/namelists/swtc1-dg.nl
+++ /dev/null
@@ -1,69 +0,0 @@
-!=======================================================!
-! 1 day = 1 * 24 * 3600 = 86400 sec !
-! nmax = ndays * 86400 / tstep = 100
-! 12 days at 120.0 stepsize: nmax= 8640 !
-!=======================================================!
-&ctl_nl
-NThreads = 1
-partmethod = 4
-topology = "cube"
-test_case = 'swtc1'
-ne = 10
-ndays = 12
-statefreq = 864
-tasknum = 0
-restartfreq = -1
-restartfile = "./restart/R000000050"
-runtype = 0
-tstep = 100
-integration = "explicit"
-smooth = 0.05
-nu = 0
-/
-&solver_nl
-precon_method = "block_jacobi"
-maxits = 100
-tol = 1.e-12
-/
-&filter_nl
-transfer_type = "bv"
-filter_type = "taylor"
-filter_freq = 1
-filter_mu = 0.05D0
-p_bv = 12.0D0
-s_bv = .666666666666666666D0
-wght_fm = 0.10D0
-kcut_fm = 2
-/
-&analysis_nl
-!=======================================================!
-! currently up to 5 streams are allowed !
-! output_stream_count=1 !
-! !
-! timunits: 0= steps, 1=days, 2=hours !
-! output_timeunits=1,2 !
-! output_start_time=0,1176 !
-! output_end_time=-1,-1 !
-! output_frequency=1,1 !
-! output_dir ="./movies/" !
-! !
-! allowed variables: 'ps ','geop ','u ','v ', !
-! 'latp ','lonp ','latv ','lonv ', !
-! 'elem ','Time ' !
-! !
-! output_varnames1-5 !
-!=======================================================!
-output_start_time = 0
-output_end_time = -1
-output_frequency = 1
-output_timeunits = 1
-output_varnames1 = 'u', 'v', 'ps', 'geop'
-output_type = 'netcdf'
-interp_nlon = 720
-interp_nlat = 360
-/
-&dg_nl
-riemanntype= 0
-alphatype= 4
-alpha_dg = 0.0D0
-/
diff --git a/components/homme/test/reg_test/namelists/swtc2-dg.nl b/components/homme/test/reg_test/namelists/swtc2-dg.nl
deleted file mode 100644
index eca5a773ca5..00000000000
--- a/components/homme/test/reg_test/namelists/swtc2-dg.nl
+++ /dev/null
@@ -1,70 +0,0 @@
-!=======================================================!
-! 1 day = 1 * 24 * 3600 = 86400 sec !
-! nmax = ndays * 86400 / tstep !
-! 12 days at 30.0 stepsize: nmax= 34560 !
-!=======================================================!
-&ctl_nl
-NThreads = 1
-partmethod = 4
-topology = "cube"
-test_case = "swtc2"
-ne = 6
-ndays = 5
-statefreq = 2880
-tasknum = 0
-restartfreq = -1
-restartfile = "./restart/R000000050"
-runtype = 0
-tstep = 30.0
-integration = "explicit"
-smooth = 0.05
-nu = 7.0e5
-/
-&solver_nl
-precon_method = "block_jacobi"
-maxits = 100
-tol = 1.e-12
-/
-&filter_nl
-transfer_type = "bv"
-filter_type = "taylor"
-filter_freq = 1
-filter_mu = 0.05D0
-p_bv = 12.0D0
-s_bv = .666666666666666666D0
-wght_fm = 0.10D0
-kcut_fm = 2
-/
-&analysis_nl
-!=======================================================!
-! currently up to 5 streams are allowed !
-! output_stream_count=1 !
-! !
-! timunits: 0= steps, 1=days, 2=hours !
-! output_timeunits=1,2 !
-! output_start_time=0,1176 !
-! output_end_time=-1,-1 !
-! output_frequency=1,1 !
-! output_dir ="./movies/" !
-! !
-! allowed variables: 'ps ','geop ','u ','v ', !
-! 'latp ','lonp ','latv ','lonv ', !
-! 'elem ','Time ' !
-! !
-! output_varnames1-5 !
-!=======================================================!
-output_start_time = 0
-output_end_time = -1
-output_frequency = 1
-output_timeunits = 1
-output_varnames1 = 'u', 'v', 'ps', 'geop', 'zeta'
-output_type = 'netcdf'
-interp_nlon = 720
-interp_nlat = 360
-/
-&dg_nl
-riemanntype= 0
-alphatype= 4
-alpha_dg = 0.0D0
-/
-
diff --git a/components/homme/test/reg_test/namelists/swtc5-dg.nl b/components/homme/test/reg_test/namelists/swtc5-dg.nl
deleted file mode 100644
index 75324e58b89..00000000000
--- a/components/homme/test/reg_test/namelists/swtc5-dg.nl
+++ /dev/null
@@ -1,69 +0,0 @@
-!=======================================================!
-! 1 day = 1 * 24 * 3600 = 86400 sec !
-! nmax = ndays * 86400 / tstep !
-! 12 days at 30.0 stepsize: nmax= 34560 !
-!=======================================================!
-&ctl_nl
-NThreads = 1
-partmethod = 4
-topology = "cube"
-test_case = "swtc5"
-ne = 6
-ndays = 15
-statefreq = 2880
-tasknum = 0
-restartfreq = -1
-restartfile = "./restart/R000000050"
-runtype = 0
-tstep = 30.0
-integration = "explicit"
-smooth = 0.05
-nu = 7.0e5
-/
-&solver_nl
-precon_method = "block_jacobi"
-maxits = 100
-tol = 1.e-12
-/
-&filter_nl
-transfer_type = "bv"
-filter_type = "taylor"
-filter_freq = 1
-filter_mu = 0.05D0
-p_bv = 12.0D0
-s_bv = .666666666666666666D0
-wght_fm = 0.10D0
-kcut_fm = 2
-/
-&analysis_nl
-!=======================================================!
-! currently up to 5 streams are allowed !
-! output_stream_count=1 !
-! !
-! timunits: 0= steps, 1=days, 2=hours !
-! output_timeunits=1,2 !
-! output_start_time=0,1176 !
-! output_end_time=-1,-1 !
-! output_frequency=1,1 !
-! output_dir ="./movies/" !
-! !
-! allowed variables: 'ps ','geop ','u ','v ', !
-! 'latp ','lonp ','latv ','lonv ', !
-! 'elem ','Time ' !
-! !
-! output_varnames1-5 !
-!=======================================================!
-output_start_time = 0
-output_end_time = -1
-output_frequency = 1
-output_timeunits = 1
-output_varnames1 = 'u', 'v', 'ps', 'geop', 'zeta'
-output_type = 'netcdf'
-interp_nlon = 720
-interp_nlat = 360
-/
-&dg_nl
-riemanntype= 0
-alphatype= 0
-alpha_dg = 0.0D0
-/
diff --git a/components/homme/test/reg_test/run_tests/baro1a.cmake b/components/homme/test/reg_test/run_tests/baro1a.cmake
index 89b79b23c36..973a7ceab6e 100644
--- a/components/homme/test/reg_test/run_tests/baro1a.cmake
+++ b/components/homme/test/reg_test/run_tests/baro1a.cmake
@@ -10,8 +10,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro1a)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroA)
diff --git a/components/homme/test/reg_test/run_tests/baro1b.cmake b/components/homme/test/reg_test/run_tests/baro1b.cmake
index 42bf345f79a..8c6f9125397 100644
--- a/components/homme/test/reg_test/run_tests/baro1b.cmake
+++ b/components/homme/test/reg_test/run_tests/baro1b.cmake
@@ -10,8 +10,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro1b)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroB)
diff --git a/components/homme/test/reg_test/run_tests/baro2a.cmake b/components/homme/test/reg_test/run_tests/baro2a.cmake
index 0eee75a6c69..f7bfbfcce9a 100644
--- a/components/homme/test/reg_test/run_tests/baro2a.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2a.cmake
@@ -9,8 +9,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro2a)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroC)
diff --git a/components/homme/test/reg_test/run_tests/baro2b.cmake b/components/homme/test/reg_test/run_tests/baro2b.cmake
index cfeb4ae313b..cc88c15c1e6 100644
--- a/components/homme/test/reg_test/run_tests/baro2b.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2b.cmake
@@ -11,8 +11,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro2b)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroC)
diff --git a/components/homme/test/reg_test/run_tests/baro2c.cmake b/components/homme/test/reg_test/run_tests/baro2c.cmake
index 82101c0e9af..425da22414f 100644
--- a/components/homme/test/reg_test/run_tests/baro2c.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2c.cmake
@@ -4,8 +4,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro2c)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroC)
diff --git a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake
index 66881f565d9..f7082997f4d 100644
--- a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake
@@ -10,9 +10,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro2d-imp)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE prim)
-
# The specifically compiled executable that this test uses
SET(EXEC_NAME primA)
diff --git a/components/homme/test/reg_test/run_tests/baro2d.cmake b/components/homme/test/reg_test/run_tests/baro2d.cmake
index bfc76e55de7..f51a7955525 100644
--- a/components/homme/test/reg_test/run_tests/baro2d.cmake
+++ b/components/homme/test/reg_test/run_tests/baro2d.cmake
@@ -11,8 +11,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baro2d)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroC)
diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake
index 5a240184657..688ff6644e9 100644
--- a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake
+++ b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake
@@ -11,8 +11,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baroCamMoistSL)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroCam)
diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake
new file mode 100644
index 00000000000..9767cc947a8
--- /dev/null
+++ b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake
@@ -0,0 +1,51 @@
+###############################################################
+# RK + PIO_INTERP
+###############################################################
+#
+# Spectral Element -- 9 days of ASP baroclinic test
+# (Jablonowski and Williamson test + 4 tracers)
+# NE=15, dt=150, nu=1e16, filter_freq=0, NV=4, PLEV=26
+# (explicit RK with subcycling)
+#
+###############################################################
+
+# The name of this test (should be the basename of this file)
+SET(TEST_NAME baroCamMoist-acc)
+
+# The specifically compiled executable that this test uses
+SET(EXEC_NAME baroCam-acc)
+
+SET(NUM_CPUS 16)
+
+SET(NAMELIST_FILES
+${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist.nl
+)
+SET(VCOORD_FILES ${HOMME_ROOT}/test/vcoord/*26*)
+SET(REFSOLN_FILES ${HOMME_ROOT}/test/reg_test/ref_sol/T340ref.nc)
+
+SET(NC_OUTPUT_FILES
+ camBaroMoist-asp_baroclinic1.nc
+ camBaroMoist-asp_baroclinic2.nc
+)
+
+
+# OMP tests (disabled for now)
+#SET(OMP_SUB_TESTS true)
+#SET(OMP_NUM_THREADS 4)
+#SET(OMP_NAMELIST_FILES
+#${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist-omp4.nl
+#)
+
+
+# compare output with CPU-only baroCamMoist test:
+SET(TESTCASE_REF_TOL 1E-11)
+SET(NC_OUTPUT_REF
+ ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic1.nc
+ ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic2.nc
+)
+SET(NC_OUTPUT_CHECKREF
+ camBaroMoist-asp_baroclinic1.nc
+ camBaroMoist-asp_baroclinic2.nc
+)
+
+
diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake
index 904ee7f6696..3989967caee 100644
--- a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake
+++ b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake
@@ -11,8 +11,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME baroCamMoist)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroCam)
diff --git a/components/homme/test/reg_test/run_tests/swimtc5.cmake b/components/homme/test/reg_test/run_tests/swimtc5.cmake
index a2ba5e84b9f..e5398816556 100644
--- a/components/homme/test/reg_test/run_tests/swimtc5.cmake
+++ b/components/homme/test/reg_test/run_tests/swimtc5.cmake
@@ -5,8 +5,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME swimtc5)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swim)
# The specifically compiled executable that this test uses
SET(EXEC_NAME swim5)
diff --git a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake b/components/homme/test/reg_test/run_tests/swtc1-dg.cmake
deleted file mode 100644
index 7dc8bc37c9a..00000000000
--- a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-###############################################################
-#
-# Discontinuous Galerkin -- swtc1
-# NE=10, dt=100, nu=0, limiter=0, filter_freq=1, NP=6
-#
-###############################################################
-
-# The name of this test (should be the basename of this file)
-SET(TEST_NAME swtc1-dg)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swdgx)
-# The specifically compiled executable that this test uses
-SET(EXEC_NAME swtc-dgA)
-
-SET(NUM_CPUS 16)
-
-SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl)
-
-SET(NC_OUTPUT_FILES swtc11.nc)
diff --git a/components/homme/test/reg_test/run_tests/swtc1.cmake b/components/homme/test/reg_test/run_tests/swtc1.cmake
index de170ab52b3..ce3ddaf3590 100644
--- a/components/homme/test/reg_test/run_tests/swtc1.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc1.cmake
@@ -9,8 +9,7 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME swtc1)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
+
# The specifically compiled executable that this test uses
SET(EXEC_NAME swtcA)
diff --git a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake b/components/homme/test/reg_test/run_tests/swtc2-dg.cmake
deleted file mode 100644
index dcbc5b9d6ee..00000000000
--- a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-###############################################################
-# RKSSP default benchmark (used to check nothing is broken)
-###############################################################
-#
-# Discontinous Galerkin swtc2
-# NE=6, dt=30, nu=0, filter_freq=1, NP=6
-#
-###############################################################
-
-# The name of this test (should be the basename of this file)
-SET(TEST_NAME swtc2-dg)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swdgx)
-# The specifically compiled executable that this test uses
-SET(EXEC_NAME swtc-dgA)
-
-SET(NUM_CPUS 16)
-
-SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl)
-
-SET(NC_OUTPUT_FILES swtc21.nc)
diff --git a/components/homme/test/reg_test/run_tests/swtc2.cmake b/components/homme/test/reg_test/run_tests/swtc2.cmake
index eac2e7bfdd0..1659361a0b9 100644
--- a/components/homme/test/reg_test/run_tests/swtc2.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc2.cmake
@@ -9,8 +9,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME swtc2)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME swtcA)
diff --git a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake b/components/homme/test/reg_test/run_tests/swtc5-dg.cmake
deleted file mode 100644
index 8d482b38d7d..00000000000
--- a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake
+++ /dev/null
@@ -1,21 +0,0 @@
-###############################################################
-#
-# Discontinuous Galerkin -- swtc5
-# NE=6, dt=30, nu=7e5, limiter=0, filter_freq=1, NP=6
-#
-###############################################################
-
-# The name of this test (should be the basename of this file)
-SET(TEST_NAME swtc5-dg)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE swdgx)
-# The specifically compiled executable that this test uses
-SET(EXEC_NAME swtc-dgA)
-
-SET(NUM_CPUS 16)
-
-SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl)
-SET(NCL_FILES ${HOMME_ROOT}/test/reg_test/ncl/swtc5ref.ncl)
-
-SET(NC_OUTPUT_FILES swtc51.nc)
-
diff --git a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake
index 166bd7e3893..16155506e38 100644
--- a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake
@@ -9,8 +9,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME swtc5-mr)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME swtcB)
diff --git a/components/homme/test/reg_test/run_tests/swtc5.cmake b/components/homme/test/reg_test/run_tests/swtc5.cmake
index 9407f6c62ec..35de2a7b160 100644
--- a/components/homme/test/reg_test/run_tests/swtc5.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc5.cmake
@@ -9,8 +9,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME swtc5)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME swtcB)
diff --git a/components/homme/test/reg_test/run_tests/swtc6.cmake b/components/homme/test/reg_test/run_tests/swtc6.cmake
index 894eb9b578f..d0a8bef306e 100644
--- a/components/homme/test/reg_test/run_tests/swtc6.cmake
+++ b/components/homme/test/reg_test/run_tests/swtc6.cmake
@@ -7,8 +7,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME swtc6)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE sweqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME swtcC)
diff --git a/components/homme/test/reg_test/run_tests/templates.cmake b/components/homme/test/reg_test/run_tests/templates.cmake
index 2b413d73ccb..d50f27d9de9 100644
--- a/components/homme/test/reg_test/run_tests/templates.cmake
+++ b/components/homme/test/reg_test/run_tests/templates.cmake
@@ -15,8 +15,6 @@
# The name of this test (should be the basename of this file)
SET(TEST_NAME templates)
-# The type of run (preqx,sweqx,swdgx,etc.)
-SET(TEST_TYPE preqx)
# The specifically compiled executable that this test uses
SET(EXEC_NAME baroCam)
diff --git a/components/homme/test/reg_test/run_tests/test-list.cmake b/components/homme/test/reg_test/run_tests/test-list.cmake
index c66831ccebb..12fbf8a3367 100644
--- a/components/homme/test/reg_test/run_tests/test-list.cmake
+++ b/components/homme/test/reg_test/run_tests/test-list.cmake
@@ -10,6 +10,13 @@ SET(HOMME_TESTS
baro2d.cmake
baroCamMoist.cmake
baroCamMoist-SL.cmake
+ baroCamMoist-acc.cmake
baro2d-imp.cmake
templates.cmake
)
+
+
+#set_test_properties(
+# baroCamMoist-acc.cmake PROPERTY DEPENDS baroCamMoist.cmake
+#)
+
diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh
index a2a364de619..8ce21fcb1f9 100644
--- a/components/homme/test/reg_test/run_tests/testing-utils.sh
+++ b/components/homme/test/reg_test/run_tests/testing-utils.sh
@@ -482,16 +482,20 @@ execLine() {
echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
elif [ "${MPI_EXEC}" = "runjob" ]; then
echo "runjob -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} --block \$COBALT_PARTNAME --verbose=INFO : $EXEC $OPT" >> $RUN_SCRIPT
+ elif [ "${MPI_EXEC}" = "aprun" ] ; then
+ if [[ $4 == *"_OMP"* ]]; then
+ echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
+ else
+ echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
+ fi
else
echo "${MPI_EXEC} -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
fi
else
if [ "$HOMME_Submission_Type" = lsf ]; then
echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
-
elif [ "$HOMME_Submission_Type" = pbs ]; then
- echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
-
+ echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
else
echo "mpiexec -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT
diff --git a/components/homme/test_execs/CMakeLists.txt b/components/homme/test_execs/CMakeLists.txt
index e2e6b4abda1..5017b92ceea 100644
--- a/components/homme/test_execs/CMakeLists.txt
+++ b/components/homme/test_execs/CMakeLists.txt
@@ -167,6 +167,7 @@ IF(${BUILD_HOMME_PREQX})
# ADD_SUBDIRECTORY(baroB)
ADD_SUBDIRECTORY(baroC)
ADD_SUBDIRECTORY(baroCam)
+ ADD_SUBDIRECTORY(baroCam-acc)
ENDIF()
# Add the test exec subdirs for the prim executable
diff --git a/components/homme/test_execs/baroA/CMakeLists.txt b/components/homme/test_execs/baroA/CMakeLists.txt
index cf504ddc027..a378d446ec0 100644
--- a/components/homme/test_execs/baroA/CMakeLists.txt
+++ b/components/homme/test_execs/baroA/CMakeLists.txt
@@ -1,4 +1,8 @@
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+ setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
# Set the variables for this test executable
# NP NC PLEV USE_PIO WITH_ENERGY
createTestExec(baroA preqx 8 4 20 FALSE TRUE)
diff --git a/components/homme/test_execs/baroB/CMakeLists.txt b/components/homme/test_execs/baroB/CMakeLists.txt
index ef96e91490e..1d4ba9cbd5f 100644
--- a/components/homme/test_execs/baroB/CMakeLists.txt
+++ b/components/homme/test_execs/baroB/CMakeLists.txt
@@ -1,4 +1,8 @@
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+ setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
# Set the variables for this test executable
# NP NC PLEV USE_PIO WITH_ENERGY
createTestExec(baroB preqx 8 4 20 TRUE FALSE)
diff --git a/components/homme/test_execs/baroC/CMakeLists.txt b/components/homme/test_execs/baroC/CMakeLists.txt
index b1500f3abe1..c4118fa1298 100644
--- a/components/homme/test_execs/baroC/CMakeLists.txt
+++ b/components/homme/test_execs/baroC/CMakeLists.txt
@@ -1,4 +1,8 @@
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+ setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
# Set the variables for this test executable
# NP NC PLEV USE_PIO WITH_ENERGY
createTestExec(baroC preqx 4 4 26 FALSE TRUE)
diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt
new file mode 100644
index 00000000000..f48313201f8
--- /dev/null
+++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt
@@ -0,0 +1,12 @@
+# Set the qsize_d in dimensions_mod.F90
+SET(QSIZE_D 25)
+SET(USE_OPENACC TRUE)
+SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}")
+SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}")
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+ setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
+# Set the variables for this test executable
+# NP NC PLEV USE_PIO WITH_ENERGY
+createTestExec(baroCam-acc preqx_acc 4 4 26 TRUE TRUE)
diff --git a/components/homme/test_execs/baroCam/CMakeLists.txt b/components/homme/test_execs/baroCam/CMakeLists.txt
index 21c7160c5c5..a56151eefb0 100644
--- a/components/homme/test_execs/baroCam/CMakeLists.txt
+++ b/components/homme/test_execs/baroCam/CMakeLists.txt
@@ -1,5 +1,9 @@
# Set the qsize_d in dimensions_mod.F90
SET(QSIZE_D 25)
+# If the user specified a file for custom compiler options use those
+IF (DEFINED PREQX_CUSTOM_FLAGS_FILE)
+ setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90)
+ENDIF ()
# Set the variables for this test executable
# NP NC PLEV USE_PIO WITH_ENERGY
createTestExec(baroCam preqx 4 4 26 TRUE TRUE)