diff --git a/cime/machines-acme/Depends.intel b/cime/machines-acme/Depends.intel index c213b908ff3..577f7aa320c 100644 --- a/cime/machines-acme/Depends.intel +++ b/cime/machines-acme/Depends.intel @@ -1,9 +1,10 @@ # PERFOBJS=\ -prim_advection_mod.o \ -edge_mod.o \ -derivative_mod.o \ -bndry_mod.o \ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ prim_advance_mod.o \ uwshcu.o diff --git a/cime/machines-acme/Depends.intel14 b/cime/machines-acme/Depends.intel14 index 0c913270b2b..32e4747d7a3 100644 --- a/cime/machines-acme/Depends.intel14 +++ b/cime/machines-acme/Depends.intel14 @@ -6,23 +6,23 @@ #ecosys_mod.o: ecosys_mod.F90 # $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -CU $< -prim_advection_mod.o: prim_advection_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< -edge_mod.o: edge_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< -derivative_mod.o: derivative_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< +PERFOBJS=\ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ +prim_advance_mod.o \ +uwshcu.o \ +wetdep.o -bndry_mod.o: bndry_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< - -prim_advance_mod.o: prim_advance_mod.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< - -uwshcu.o: uwshcu.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< - -wetdep.o: wetdep.F90 - $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< +ifeq ($(DEBUG),FALSE) + $(PERFOBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O3 -no-prec-div $< + $(REDUCED_OPT_OBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -O1 $< + $(REDUCED_PRECISION_OBJS): %.o: %.F90 + $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -fimf-precision=low -fp-model fast $< +endif diff --git a/cime/machines-acme/Depends.intelmic b/cime/machines-acme/Depends.intelmic index 0f84949daa3..6f05aee44a7 100644 --- a/cime/machines-acme/Depends.intelmic +++ b/cime/machines-acme/Depends.intelmic @@ -1,6 +1,6 @@ -derivative_mod.o: derivative_mod.F90 - $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< +#derivative_mod_base.o: derivative_mod_base.F90 +# $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< shr_ncread_mod.o: shr_ncread_mod.F90 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $< diff --git a/cime/machines-acme/Depends.intelmic14 b/cime/machines-acme/Depends.intelmic14 index 0f84949daa3..6f05aee44a7 100644 --- a/cime/machines-acme/Depends.intelmic14 +++ b/cime/machines-acme/Depends.intelmic14 @@ -1,6 +1,6 @@ -derivative_mod.o: derivative_mod.F90 - $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< +#derivative_mod_base.o: derivative_mod_base.F90 +# $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) -O1 $< shr_ncread_mod.o: shr_ncread_mod.F90 $(FC) -c $(FPPFLAGS) $(INCLDIR) $(INCS) $(FREEFLAGS) $(FFLAGS_NOOPT) $< diff --git a/cime/machines-acme/Depends.mira b/cime/machines-acme/Depends.mira index 85453dd931a..7f9c05b6299 100644 --- a/cime/machines-acme/Depends.mira +++ b/cime/machines-acme/Depends.mira @@ -13,13 +13,16 @@ shr_reprosum_mod.o: shr_reprosum_mod.F90 # These routines benefit from -qnostrict without violating the bfb test PERFOBJS=\ -prim_advection_mod.o \ -edge_mod.o \ -derivative_mod.o \ -bndry_mod.o \ +prim_advection_mod_base.o \ +vertremap_mod_base.o \ +edge_mod_base.o \ +derivative_mod_base.o \ +bndry_mod_base.o \ prim_advance_mod.o \ uwshcu.o \ -wetdep.o +wetdep.o + + ifeq ($(DEBUG),FALSE) $(PERFOBJS): %.o: %.F90 $(FC) -c $(INCLDIR) $(INCS) $(FFLAGS) $(FREEFLAGS) -qnostrict $< diff --git a/cime/scripts/Tools/config_definition.xml b/cime/scripts/Tools/config_definition.xml index 4a5f76e22f2..5dcf37c5532 100644 --- a/cime/scripts/Tools/config_definition.xml +++ b/cime/scripts/Tools/config_definition.xml @@ -1363,6 +1363,14 @@ sdesc="CAM dynamical core" > + + Specify a compiler for the target machine (optional) default: default compiler for the target machine + -camse_target Spedify a target for the cam-se dycore (ignored for other atmospheric dycores) + default: preqx + options: preqx, preqx-acc -mpilib Specify a mpi library for the target machine (optional) default: default mpi library for the target machine allowed: openmpi, mpich, ibm, mpi-serial, etc @@ -166,6 +169,7 @@ GetOptions( "confopts=s" => \$opts{'confopts'}, "project=s" => \$opts{'project'}, "compiler=s" => \$opts{'compiler'}, + "camse_target=s" => \$opts{'camse_target'}, "mpilib=s" => \$opts{'mpilib'}, "res=s" => \$opts{'res'}, "h|help" => \$opts{'help'}, @@ -211,6 +215,7 @@ my $pecount; my $xmlmode; my $project; my $compiler; +my $camse_target; my $mpilib; if (!$opts{'list'} && !$opts{'testlist'}) { # Check for manditory case input @@ -260,6 +265,13 @@ if (!$opts{'list'} && !$opts{'testlist'}) { $compiler = $opts{'compiler'}; } + # Check if machine compiler option is given + if ($opts{'camse_target'}) { + $camse_target = $opts{'camse_target'}; + } else { + $camse_target = "preqx"; + } + # Check if machine mpilib option is given $mpilib = 'unset'; if ($opts{'mpilib'}) { @@ -502,36 +514,51 @@ if ($mach =~ /(.*)_(.*)/){ } $cfg_ref->set('CCSM_MACHDIR', "$machdir"); +# Handle the camse_target option +# Do this before handling the compiler so that default compiler can be overwritten +if ($camse_target eq "preqx") { + $cfg_ref->set('CAM_TARGET', "preqx"); +} elsif ($camse_target eq "preqx_acc") { + $cfg_ref->set('CAM_TARGET', "preqx_acc"); +} else { + die "ERROR: option -camse_target must be preqx (default) or preqx_acc" +} + # Check that compiler request for target machine matches a supported value # Or set default compiler - if not provided compiler request my $compilers; if ($mach =~ /userdefined/){ - $cfg_ref->set('COMPILER', "USERDEFINED_required_build"); + $cfg_ref->set('COMPILER', "USERDEFINED_required_build"); } else { - $compilers = $cfg_ref->get('COMPILERS'); - my @compilers = split ",", $compilers, -1; - if ($compiler) { - if (! ($mach =~ "generic")){ - my $found = 0; - foreach my $comp (@compilers) { - if ($compiler eq $comp) { - $found = 1; - } - } - if (!$found) { - my $sysmod = "rm -rf $caseroot"; - system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n"; - die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n"; - } - } - $cfg_ref->set('COMPILER', "$compiler"); - if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } - } else { - $compiler = $compilers[0]; - $cfg_ref->set('COMPILER', "$compiler"); - if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } + $compilers = $cfg_ref->get('COMPILERS'); + my @compilers = split ",", $compilers, -1; + if ($compiler) { + if (! ($mach =~ "generic")){ + my $found = 0; + foreach my $comp (@compilers) { + if ($compiler eq $comp) { + $found = 1; + } + } + if (!$found) { + my $sysmod = "rm -rf $caseroot"; + system($sysmod) == 0 or die "ERROR: $sysmod failed: $?\n"; + die "ERROR: compiler setting of $compiler does not match supported values of $compilers \n"; + } } + $cfg_ref->set('COMPILER', "$compiler"); + if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } + } else { + $compiler = $compilers[0]; + if ($camse_target eq "preqx_acc") { + if ($mach eq "titan") { + $compiler = "pgi_acc"; + } + } + $cfg_ref->set('COMPILER', "$compiler"); + if ($print>=2) { print "Machine compiler specifier: $compiler.$eol"; } + } } if ($print>=2) { print "Machine specifier: $mach.$eol"; } diff --git a/components/cam/bld/cam.buildnml b/components/cam/bld/cam.buildnml index ee1651cf2d9..42b4660cdaa 100755 --- a/components/cam/bld/cam.buildnml +++ b/components/cam/bld/cam.buildnml @@ -20,6 +20,7 @@ my $CAM_CONFIG_OPTS = `./xmlquery CAM_CONFIG_OPTS -value`; my $CAM_NAMELIST_OPTS = `./xmlquery CAM_NAMELIST_OPTS -value`; my $CAM_NML_USE_CASE = `./xmlquery CAM_NML_USE_CASE -value`; my $CAM_DYCORE = `./xmlquery CAM_DYCORE -value`; +my $CAM_TARGET = `./xmlquery CAM_TARGET -value`; my $CASEBUILD = `./xmlquery CASEBUILD -value`; my $CCSM_CO2_PPMV = `./xmlquery CCSM_CO2_PPMV -value`; my $CCSMROOT = `./xmlquery CCSMROOT -value`; @@ -84,7 +85,7 @@ if ($BUILD_COMPLETE eq 'FALSE') { $ATM_GRID = $1; $nlev="-nlev $3"; } - $sysmod = "$CCSMROOT/components/cam/bld/configure -s -ccsm_seq -ice none -ocn $ocn -comp_intf $comp $scm -spmd $spmd -smp $smp -dyn $CAM_DYCORE -res $ATM_GRID $nlev $CAM_LIB_DIRS $CAM_CONFIG_OPTS"; + $sysmod = "$CCSMROOT/components/cam/bld/configure -s -ccsm_seq -ice none -ocn $ocn -comp_intf $comp $scm -spmd $spmd -smp $smp -dyn $CAM_DYCORE -dyn_target $CAM_TARGET -res $ATM_GRID $nlev $CAM_LIB_DIRS $CAM_CONFIG_OPTS"; system($sysmod) == 0 or die "ERROR cam.buildnml: $sysmod failed: $?\n"; } else { diff --git a/components/cam/bld/configure b/components/cam/bld/configure index db01087394a..457564857d0 100755 --- a/components/cam/bld/configure +++ b/components/cam/bld/configure @@ -324,6 +324,7 @@ GetOptions( "rain_evap_to_coarse_aero" => \$opts{'rain_evap_to_coarse_aero'}, "defaults=s" => \$opts{'defaults'}, "dyn=s" => \$opts{'dyn'}, + "dyn_target=s" => \$opts{'dyn_target'}, "edit_chem_mech" => \$opts{'edit_chem_mech'}, "waccm_phys" => \$opts{'waccm_phys'}, "offline_dyn" => \$opts{'offline_dyn'}, @@ -2764,8 +2765,13 @@ sub write_filepath print $fh "$camsrcdir/cam/src/dynamics/$dyn\n"; if($dyn eq 'se') { - print $fh "$camsrcdir/homme/src/share\n"; - print $fh "$camsrcdir/homme/src/share/openacc\n"; + print $fh "$camsrcdir/homme/src/share\n"; + if ($opts{'dyn_target'} eq 'preqx') { + print $fh "$camsrcdir/homme/src/preqx\n"; + } + elsif($opts{'dyn_target'} eq 'preqx_acc') { + print $fh "$camsrcdir/homme/src/preqx_acc\n"; + } } # Parallelization utilies diff --git a/components/homme/CMakeLists.txt b/components/homme/CMakeLists.txt index d3458a68476..0a39c868c57 100644 --- a/components/homme/CMakeLists.txt +++ b/components/homme/CMakeLists.txt @@ -56,6 +56,7 @@ ENDIF () # Some options for conditional builds OPTION(BUILD_HOMME_SWEQX "Shallow water equations FEM" ON) OPTION(BUILD_HOMME_PREQX "Primitive equations FEM" ON) +OPTION(BUILD_HOMME_PREQX_ACC "Primitive equations FEM with OpenACC" ON) OPTION(BUILD_HOMME_SWDGX "Shallow water equations DG" OFF) OPTION(BUILD_HOMME_PRIMDGX "Primitive equations DG" OFF) OPTION(BUILD_HOMME_SWIM "Shallow water equations implicit" OFF) @@ -175,6 +176,10 @@ IF(${BUILD_HOMME_PREQX}) ADD_SUBDIRECTORY(src/preqx) ENDIF() +IF(${BUILD_HOMME_PREQX_ACC}) + ADD_SUBDIRECTORY(src/preqx_acc) +ENDIF() + IF(${BUILD_HOMME_SWDGX}) ADD_SUBDIRECTORY(src/swdgx) ENDIF() diff --git a/components/homme/cmake/SetCompilerFlags.cmake b/components/homme/cmake/SetCompilerFlags.cmake index b83bd3fecb7..1d42d29c2cf 100644 --- a/components/homme/cmake/SetCompilerFlags.cmake +++ b/components/homme/cmake/SetCompilerFlags.cmake @@ -222,68 +222,7 @@ ENDIF () ############################################################################## OPTION(ENABLE_OPENACC "Whether to build with OpenACC support" FALSE) IF (${ENABLE_OPENACC}) - IF (${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI) - # Need to add -acc to the Fortran FLAGS to see if it will compile - # "call acc_init()" - SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -acc") - TRY_COMPILE(OPENACC_SUCCESS - ${CMAKE_BINARY_DIR}/tests/compilerTests/ - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/openAccTest.f90 - OUTPUT_VARIABLE COMPILE_OUTPUT) - IF (${OPENACC_SUCCESS}) - MESSAGE(STATUS "Using OpenACC through PGI compiler") - SET(PREQX_USE_OPENACC TRUE) - ELSE() - MESSAGE(FATAL_ERROR "Unable to use OpenACC through the PGI compiler") - ENDIF() - ELSEIF (${CMAKE_Fortran_COMPILER_ID} STREQUAL Cray) - SET(PREQX_USE_OPENACC TRUE) - MESSAGE(STATUS "Using OpenACC through Cray compiler") - ELSE () - MESSAGE(FATAL_ERROR "OpenACC only supported through the PGI and Cray compilers") - ENDIF() -ENDIF () - -############################################################################## -# CUDA Fortran specific flags - only supporting PGI compiler -############################################################################## -OPTION(ENABLE_CUDA_FORTRAN "Whether to build with CUDA Fortran support" FALSE) -IF (${ENABLE_CUDA_FORTRAN}) - IF (NOT ${CMAKE_Fortran_COMPILER_ID} STREQUAL PGI) - MESSAGE(FATAL_ERROR "CUDA Fortran only supported through the PGI compiler") - ELSE () - # Set PGI CUDA Fortran flags - - # Set defaults as lowest version of CUDA and device capability allowed - # To do: determine a way to generalize this - IF (NOT CUDA_VERSION) - SET(CUDA_VERSION "4.1") - ENDIF () - - # Compute capability: cc2x is for devices with compute capability >= 2.0 - IF (NOT CUDA_DEVICE_CAPABILITY) - SET(CUDA_DEVICE_CAPABILITY "cc2x") - ENDIF () - - SET(CMAKE_Fortran_FLAGS - "${CMAKE_Fortran_FLAGS} -ta=nvidia -Mcuda=${CUDA_VERSION},${CUDA_DEVICE_CAPABILITY},ptxinfo,keepgpu") - - MESSAGE(STATUS "Testing PGI CUDA Fortran Compilation with flags: ${CMAKE_Fortran_FLAGS}") - - TRY_COMPILE(CUDAFOR - ${CMAKE_BINARY_DIR}/tests/compilerTests/ - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/compilerTests/cudaFortranTest.f90 - OUTPUT_VARIABLE COMPILE_OUTPUT) - IF (${CUDAFOR}) - SET(PREQX_USE_CUDA_FORTRAN TRUE) - MESSAGE(STATUS "Succeeded. Using CUDA Fortran through PGI compiler") - ELSE() - SET(PREQX_USE_CUDA_FORTRAN FALSE) - MESSAGE(FATAL_ERROR "Unable to use CUDA Fortran through the PGI " - "compiler. Compilation failed with the following " - "output.\n${COMPILE_OUTPUT}") - ENDIF() - ENDIF() + SET(PREQX_USE_OPENACC TRUE) ENDIF () ############################################################################## diff --git a/components/homme/cmake/machineFiles/titan.cmake b/components/homme/cmake/machineFiles/titan.cmake index 2a73ce474f6..aa94a807240 100644 --- a/components/homme/cmake/machineFiles/titan.cmake +++ b/components/homme/cmake/machineFiles/titan.cmake @@ -5,7 +5,16 @@ SET (CMAKE_CXX_COMPILER CC CACHE FILEPATH "") SET (NETCDF_DIR $ENV{NETCDF_DIR} CACHE FILEPATH "") SET (PNETCDF_DIR $ENV{PARALLEL_NETCDF_DIR} CACHE FILEPATH "") SET (HDF5_DIR $ENV{HDF5_DIR} CACHE FILEPATH "") +SET (DEBUG_FLAGS " " CACHE STRING "") +SET (Netcdf_NC_CONFIG_BIN "/opt/cray/netcdf/4.3.3.1/bin" CACHE FILEPATH "") +SET (HOMME_FIND_BLASLAPACK TRUE CACHE BOOL "") +SET (USE_MPIEXEC "aprun" CACHE STRING "") # The following is required for cross compilation SET (CMAKE_SYSTEM_NAME Catamount CACHE FILEPATH "") + +#Regression test parameters SET (USE_QUEUING FALSE CACHE BOOL "") +SET (USE_NUM_PROCS 64 CACHE STRING "") + + diff --git a/components/homme/compile_scripts/thatchroof/compile.cpu b/components/homme/compile_scripts/thatchroof/compile.cpu old mode 100644 new mode 100755 diff --git a/components/homme/compile_scripts/thatchroof/compile.openacc b/components/homme/compile_scripts/thatchroof/compile.openacc old mode 100644 new mode 100755 index c7b896db37e..b0f055ea508 --- a/components/homme/compile_scripts/thatchroof/compile.openacc +++ b/components/homme/compile_scripts/thatchroof/compile.openacc @@ -20,7 +20,7 @@ cmake \ -DCMAKE_Fortran_COMPILER=mpif90 \ -DCMAKE_C_COMPILER=mpicc \ -DCMAKE_CXX_COMPILER=mpiCC \ - -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \ + -DOPT_FLAGS="-fast -acc -Mcuda=7.5,cc35,ptxinfo -ta=tesla,pin -Minfo=accel" \ -DDEBUG_FLAGS=" " \ -DNETCDF_DIR=$NETCDF_DIR \ -DWITH_PNETCDF=FALSE \ @@ -30,6 +30,8 @@ cmake \ -DBUILD_HOMME_SWDGX=FALSE \ -DBUILD_HOMME_SWEQX=FALSE \ -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX=FALSE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ -DENABLE_OPENACC=TRUE \ @@ -39,11 +41,11 @@ cmake \ -DZLIB_DIR=$ZLIB_PATH \ -DSZIP_DIR=$SZIP_PATH \ -DCURL_DIR=/opt/curl-7.45_pgi15.9 \ - -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.0,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl' \ + -DCMAKE_EXE_LINKER_FLAGS:STRING='-ta=tesla,pin -Mcuda=7.5,cc35 -acc -L$NETCDF_DIR/lib -lnetcdf -lnetcdff -ldl' \ \ \ \ - -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts//thatchroof/customFlags.cmake.openacc \ + -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/thatchroof/customFlags.cmake.openacc \ $HOMME_ROOT fi # -DOPT_FLAGS="-fastsse -Mvect" \ @@ -54,7 +56,7 @@ make clean || exit -1 fi if [ $do_make -eq 1 ]; then -make -j16 preqx || exit -1 +make -j16 preqx_acc || exit -1 mkdir -p $HOMME_ROOT/build/preqx -cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 +cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 fi diff --git a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc index 12506a81cb6..248d7c27be8 100644 --- a/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc +++ b/components/homme/compile_scripts/thatchroof/customFlags.cmake.openacc @@ -1,13 +1,6 @@ SET(CUSTOM_FLAG_FILES ${HOMME_SOURCE_DIR}/physics/column_model_mod.F90 ${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 - ) SET_SOURCE_FILES_PROPERTIES( @@ -17,14 +10,4 @@ SET_SOURCE_FILES_PROPERTIES( " -Mpreprocess -Mextend -Mflushz -O0 -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin" ) -SET_SOURCE_FILES_PROPERTIES( - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 - PROPERTIES GENERATED TRUE COMPILE_FLAGS - " -Mpreprocess -Mextend -Mflushz -fast -mp -acc -Mcuda=6.5,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape" -) diff --git a/components/homme/compile_scripts/titan/compile.cpu b/components/homme/compile_scripts/titan/compile.cpu index 1a21f327622..a88be66966f 100755 --- a/components/homme/compile_scripts/titan/compile.cpu +++ b/components/homme/compile_scripts/titan/compile.cpu @@ -5,52 +5,42 @@ do_clean=1 do_make=1 HOMME_ROOT=/ccs/home/$USER/ACME/components/homme -NTRACERS=50 -NLEVELS=64 +BUILD_ROOT=/lustre/atlas/scratch/imn/stf006/homme_tests +NTRACERS=4 +NLEVELS=26 source ./env_mach_specific.cpu -mkdir -p titan-cpu || exit -1 -cd titan-cpu +mkdir -p $BUILD_ROOT/titan-cpu || exit -1 +cd $BUILD_ROOT/titan-cpu if [ $do_cmake -eq 1 ]; then -sed -i "s/ integer, parameter :: qsize_d=.*/ integer, parameter :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1 rm -rf CMakeFiles CMakeCache.txt cmake \ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ - -DCMAKE_Fortran_COMPILER=ftn \ - -DCMAKE_C_COMPILER=cc \ - -DCMAKE_CXX_COMPILER=CC \ -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ - -DDEBUG_FLAGS=" " \ - -DNETCDF_DIR=$NETCDF_DIR \ - -DWITH_PNETCDF=FALSE \ - -DHDF5_DIR=$HDF5_DIR \ + -DWITH_PNETCDF=TRUE \ -DPREQX_NP=4 \ -DPREQX_PLEV=$NLEVELS \ - -DBUILD_HOMME_SWDGX=FALSE \ - -DBUILD_HOMME_SWEQX=FALSE \ - -DBUILD_HOMME_PRIMDGX=FALSE \ + -DPREQX_QSIZE_D=$TRACERS \ + -DBUILD_HOMME_SWEQX=TRUE \ + -DBUILD_HOMME_PREQX=TRUE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ - -DENABLE_OPENACC=FALSE \ - -DENABLE_CUDA_FORTRAN=FALSE \ - -DHOMME_FIND_BLASLAPACK=TRUE \ - -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ - \ - \ - \ + -DOPENACC_Fortran_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Minfo=accel" \ + -DOPENACC_Linker_FLAGS="-acc -ta=tesla,pin,cc35,cuda7.0 -Mcuda" \ -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \ $HOMME_ROOT fi -# -DOPT_FLAGS="-fastsse -Mvect" \ if [ $do_clean -eq 1 ]; then make clean || exit -1 fi if [ $do_make -eq 1 ]; then -make -j16 preqx || exit -1 +make -j32 || exit -1 mkdir -p $HOMME_ROOT/build/preqx cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.cpu || exit -1 +#cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 fi diff --git a/components/homme/compile_scripts/titan/compile.openacc b/components/homme/compile_scripts/titan/compile.openacc index 2e09b6529e5..ac2d06f16e0 100755 --- a/components/homme/compile_scripts/titan/compile.openacc +++ b/components/homme/compile_scripts/titan/compile.openacc @@ -1,7 +1,7 @@ #!/bin/bash -do_cmake=0 -do_clean=0 +do_cmake=1 +do_clean=1 do_make=1 HOMME_ROOT=/ccs/home/$USER/ACME/components/homme @@ -14,7 +14,6 @@ mkdir -p titan-openacc || exit -1 cd titan-openacc if [ $do_cmake -eq 1 ]; then -sed -i "s/ integer, parameter :: qsize_d=.*/ integer, parameter :: qsize_d=$NTRACERS/g" $HOMME_ROOT/src/share/dimensions_mod.F90 || exit -1 rm -rf CMakeFiles CMakeCache.txt cmake \ -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ @@ -27,10 +26,13 @@ cmake -DWITH_PNETCDF=FALSE \ -DHDF5_DIR=$HDF5_DIR \ -DPREQX_NP=4 \ + -DPREQX_QSIZE_D=$NTRACERS \ -DPREQX_PLEV=$NLEVELS \ -DBUILD_HOMME_SWDGX=FALSE \ -DBUILD_HOMME_SWEQX=FALSE \ -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ + -DBUILD_HOMME_PREQX=FALSE \ -DENABLE_OPENMP=TRUE \ -DHOMME_PROJID=STF006 \ -DENABLE_OPENACC=TRUE \ @@ -49,7 +51,7 @@ make clean || exit -1 fi if [ $do_make -eq 1 ]; then -make -j16 preqx || exit -1 +make -j16 preqx_acc || exit -1 mkdir -p $HOMME_ROOT/build/preqx -cp ./src/preqx/preqx $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 +cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 fi diff --git a/components/homme/compile_scripts/titan/compile.openacc.oncpu b/components/homme/compile_scripts/titan/compile.openacc.oncpu new file mode 100755 index 00000000000..a6e3f9d381f --- /dev/null +++ b/components/homme/compile_scripts/titan/compile.openacc.oncpu @@ -0,0 +1,54 @@ +#!/bin/bash + +do_cmake=1 +do_clean=1 +do_make=1 + +HOMME_ROOT=/ccs/home/$USER/ACME/components/homme +NTRACERS=50 +NLEVELS=64 + +source ./env_mach_specific.cpu + +mkdir -p titan-openacc || exit -1 +cd titan-openacc + +if [ $do_cmake -eq 1 ]; then +rm -rf CMakeFiles CMakeCache.txt +cmake \ + -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ + -DCMAKE_Fortran_COMPILER=ftn \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ + -DDEBUG_FLAGS=" " \ + -DNETCDF_DIR=$NETCDF_DIR \ + -DWITH_PNETCDF=FALSE \ + -DHDF5_DIR=$HDF5_DIR \ + -DPREQX_NP=4 \ + -DPREQX_QSIZE_D=$NTRACERS \ + -DPREQX_PLEV=$NLEVELS \ + -DBUILD_HOMME_SWDGX=FALSE \ + -DBUILD_HOMME_SWEQX=FALSE \ + -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX_ACC=TRUE \ + -DBUILD_HOMME_PREQX=FALSE \ + -DENABLE_OPENMP=TRUE \ + -DHOMME_PROJID=STF006 \ + -DENABLE_OPENACC=TRUE \ + -DENABLE_CUDA_FORTRAN=FALSE \ + -DHOMME_FIND_BLASLAPACK=TRUE \ + -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ + -DPREQX_CUSTOM_FLAGS_FILE=${HOMME_ROOT}/compile_scripts/titan/customFlags.cmake.cpu \ + $HOMME_ROOT +fi + +if [ $do_clean -eq 1 ]; then +make clean || exit -1 +fi + +if [ $do_make -eq 1 ]; then +make -j preqx_acc || exit -1 +mkdir -p $HOMME_ROOT/build/preqx +cp ./src/preqx_acc/preqx_acc $HOMME_ROOT/build/preqx/preqx.openacc || exit -1 +fi diff --git a/components/homme/compile_scripts/titan/compile.sweqx b/components/homme/compile_scripts/titan/compile.sweqx new file mode 100755 index 00000000000..6cfca993f55 --- /dev/null +++ b/components/homme/compile_scripts/titan/compile.sweqx @@ -0,0 +1,51 @@ +#!/bin/bash + +do_cmake=1 +do_clean=1 +do_make=1 + +HOMME_ROOT=/ccs/home/$USER/ACME/components/homme +NLEVELS=64 + +source ./env_mach_specific.cpu + +mkdir -p titan-cpu || exit -1 +cd titan-cpu + +if [ $do_cmake -eq 1 ]; then +rm -rf CMakeFiles CMakeCache.txt +cmake \ + -C $HOMME_ROOT/cmake/machineFiles/titan.cmake \ + -DCMAKE_Fortran_COMPILER=ftn \ + -DCMAKE_C_COMPILER=cc \ + -DCMAKE_CXX_COMPILER=CC \ + -DOPT_FLAGS="-O2 -Kieee -Mvect=nosse" \ + -DDEBUG_FLAGS=" " \ + -DNETCDF_DIR=$NETCDF_DIR \ + -DWITH_PNETCDF=FALSE \ + -DHDF5_DIR=$HDF5_DIR \ + -DSWEQX_NP=4 \ + -DSWEQX_PLEV=$NLEVELS \ + -DBUILD_HOMME_SWDGX=FALSE \ + -DBUILD_HOMME_SWEQX=TRUE \ + -DBUILD_HOMME_PRIMDGX=FALSE \ + -DBUILD_HOMME_PREQX=FALSE \ + -DENABLE_OPENMP=TRUE \ + -DHOMME_PROJID=STF006 \ + -DENABLE_OPENACC=FALSE \ + -DENABLE_CUDA_FORTRAN=FALSE \ + -DHOMME_FIND_BLASLAPACK=TRUE \ + -DNetcdf_NC_CONFIG_BIN="/opt/cray/netcdf/4.3.3.1/bin" \ + $HOMME_ROOT +fi +# -DOPT_FLAGS="-fastsse -Mvect" \ + +if [ $do_clean -eq 1 ]; then +make clean || exit -1 +fi + +if [ $do_make -eq 1 ]; then +make -j16 sweqx || exit -1 +mkdir -p $HOMME_ROOT/build/sweqx +cp ./src/sweqx/sweqx $HOMME_ROOT/build/sweqx/sweqx.cpu || exit -1 +fi diff --git a/components/homme/compile_scripts/titan/customFlags.cmake.openacc b/components/homme/compile_scripts/titan/customFlags.cmake.openacc index 5bb9c6a1676..9d2729e62e0 100644 --- a/components/homme/compile_scripts/titan/customFlags.cmake.openacc +++ b/components/homme/compile_scripts/titan/customFlags.cmake.openacc @@ -1,12 +1,6 @@ SET(CUSTOM_FLAG_FILES ${HOMME_SOURCE_DIR}/physics/column_model_mod.F90 ${HOMME_SOURCE_DIR}/src/share/prim_driver_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 ) @@ -16,15 +10,3 @@ SET_SOURCE_FILES_PROPERTIES( PROPERTIES GENERATED TRUE COMPILE_FLAGS " -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O0 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin" ) - -SET_SOURCE_FILES_PROPERTIES( - ${HOMME_SOURCE_DIR}/src/share/openacc/bndry_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/derivative_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/edge_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/openacc_utils_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/prim_advection_openacc_mod.F90 - ${HOMME_SOURCE_DIR}/src/share/openacc/viscosity_openacc_mod.F90 - PROPERTIES GENERATED TRUE COMPILE_FLAGS - " -Mpreprocess -Kieee -Mextend -Kieee -Mflushz -O2 -Kieee -Mvect=nosse -mp -acc -Mcuda=7.0,cc35,ptxinfo -Minfo=accel -ta=tesla,pin -Minline=reshape" -) - diff --git a/components/homme/compile_scripts/titan/env_mach_specific.cpu b/components/homme/compile_scripts/titan/env_mach_specific.cpu index e3d54196c33..41aee75a4e7 100644 --- a/components/homme/compile_scripts/titan/env_mach_specific.cpu +++ b/components/homme/compile_scripts/titan/env_mach_specific.cpu @@ -32,12 +32,13 @@ module rm cray-parallel-netcdf module load PrgEnv-pgi # module switch pgi pgi/14.2.0 - module switch pgi pgi/15.10.lustre + module switch pgi pgi/15.10.0 module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 module switch atp atp/1.7.5 - + module add cudatoolkit + export CRAY_CUDA_MPS=1 export CRAY_CPU_TARGET=istanbul module load cray-netcdf-hdf5parallel/4.3.3.1 diff --git a/components/homme/compile_scripts/titan/env_mach_specific.openacc b/components/homme/compile_scripts/titan/env_mach_specific.openacc index 966acd15154..f0374679a7f 100644 --- a/components/homme/compile_scripts/titan/env_mach_specific.openacc +++ b/components/homme/compile_scripts/titan/env_mach_specific.openacc @@ -32,7 +32,7 @@ module rm cray-parallel-netcdf module load PrgEnv-pgi # module switch pgi pgi/14.2.0 - module switch pgi pgi/15.9.lustre + module switch pgi pgi/15.9.home module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 diff --git a/components/homme/src/preqx/CMakeLists.txt b/components/homme/src/preqx/CMakeLists.txt index c28852c3005..710b9c0a964 100644 --- a/components/homme/src/preqx/CMakeLists.txt +++ b/components/homme/src/preqx/CMakeLists.txt @@ -16,13 +16,20 @@ SET (PREQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS} ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx") SET(PREQX_SRCS_F90 - ${SRC_SHARE_DIR}/openacc/bndry_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/derivative_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/edge_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/openacc_utils_mod.F90 - ${SRC_SHARE_DIR}/openacc/prim_advection_openacc_mod.F90 - ${SRC_SHARE_DIR}/openacc/viscosity_openacc_mod.F90 - ${SRC_SHARE_DIR}/arch_switch_mod.F90 + ${SRC_SHARE_DIR}/bndry_mod_base.F90 + ${SRC_SHARE_DIR}/derivative_mod_base.F90 + ${SRC_SHARE_DIR}/edge_mod_base.F90 + ${SRC_SHARE_DIR}/prim_advection_mod_base.F90 + ${SRC_SHARE_DIR}/solver_init_mod_base.F90 + ${SRC_SHARE_DIR}/vertremap_mod_base.F90 + ${SRC_SHARE_DIR}/viscosity_mod_base.F90 + ${SRC_DIR}/preqx/bndry_mod.F90 + ${SRC_DIR}/preqx/derivative_mod.F90 + ${SRC_DIR}/preqx/edge_mod.F90 + ${SRC_DIR}/preqx/prim_advection_mod.F90 + ${SRC_DIR}/preqx/solver_init_mod.F90 + ${SRC_DIR}/preqx/vertremap_mod.F90 + ${SRC_DIR}/preqx/viscosity_mod.F90 ${SRC_SHARE_DIR}/kinds.F90 ${SRC_SHARE_DIR}/dimensions_mod.F90 ${SRC_SHARE_DIR}/control_mod.F90 @@ -30,7 +37,6 @@ SET(PREQX_SRCS_F90 ${SRC_SHARE_DIR}/coordinate_systems_mod.F90 ${SRC_SHARE_DIR}/params_mod.F90 ${SRC_SHARE_DIR}/gridgraph_mod.F90 - ${SRC_SHARE_DIR}/edge_mod.F90 ${SRC_SHARE_DIR}/element_mod.F90 ${SRC_SHARE_DIR}/parallel_mod.F90 ${SRC_SHARE_DIR}/hybrid_mod.F90 @@ -43,9 +49,6 @@ SET(PREQX_SRCS_F90 ${SRC_SHARE_DIR}/schedtype_mod.F90 ${SRC_SHARE_DIR}/edgetype_mod.F90 ${SRC_SHARE_DIR}/thread_mod.F90 - ${SRC_SHARE_DIR}/bndry_mod.F90 - ${SRC_SHARE_DIR}/derivative_mod.F90 - ${SRC_SHARE_DIR}/viscosity_mod.F90 ${SRC_SHARE_DIR}/unit_tests_mod.F90 ${SRC_SHARE_DIR}/spacecurve_mod.F90 ${SRC_SHARE_DIR}/cube_mod.F90 @@ -93,7 +96,6 @@ SET(PREQX_SRCS_F90 ${SRC_DIR}/prim_restart_mod.F90 ${SRC_SHARE_DIR}/prim_si_ref_mod.F90 ${SRC_SHARE_DIR}/prim_advance_mod.F90 - ${SRC_SHARE_DIR}/prim_advection_mod.F90 ${SRC_DIR}/forcing_mod.F90 ${SRC_SHARE_DIR}/metis_mod.F90 ${SRC_SHARE_DIR}/prim_driver_mod.F90 @@ -111,7 +113,8 @@ SET(PREQX_SRCS_F90 ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 ${PHYSICS_DIR}/column_model_mod.F90 ${PHYSICS_DIR}/column_types_mod.F90 -) + CACHE INTERNAL "") + SET(PREQX_SRCS_C ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c @@ -153,15 +156,7 @@ MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") -IF (${PREQX_USE_CUDA_FORTRAN}) - SET(USE_CUDA_FORTRAN TRUE) - MESSAGE(STATUS " Using CUDA Fortran") -ENDIF () - -IF (${PREQX_USE_OPENACC}) - SET(USE_OPENACC TRUE) - MESSAGE(STATUS " Using OpenACC") -ENDIF () +SET(USE_OPENACC FALSE) IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") @@ -174,10 +169,4 @@ ENDIF () createTestExec(preqx preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} ${PREQX_USE_PIO} ${PREQX_USE_ENERGY}) -IF (${BUILD_PREQX_SPELT}) - MESSAGE(STATUS " Also building preqx_spelt") - createTestExec(preqx_spelt preqx ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} - ${PREQX_USE_PIO} ${PREQX_USE_ENERGY}) - SET_TARGET_PROPERTIES(preqx_spelt PROPERTIES COMPILE_DEFINITIONS "_SPELT") -ENDIF () diff --git a/components/homme/src/preqx/bndry_mod.F90 b/components/homme/src/preqx/bndry_mod.F90 new file mode 100644 index 00000000000..5ad1125cbef --- /dev/null +++ b/components/homme/src/preqx/bndry_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module bndry_mod + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + implicit none +end module bndry_mod diff --git a/components/homme/src/preqx/derivative_mod.F90 b/components/homme/src/preqx/derivative_mod.F90 new file mode 100644 index 00000000000..e9c3f81be3f --- /dev/null +++ b/components/homme/src/preqx/derivative_mod.F90 @@ -0,0 +1,12 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module derivative_mod + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + implicit none +end module derivative_mod diff --git a/components/homme/src/preqx/edge_mod.F90 b/components/homme/src/preqx/edge_mod.F90 new file mode 100644 index 00000000000..902503b36cb --- /dev/null +++ b/components/homme/src/preqx/edge_mod.F90 @@ -0,0 +1,12 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module edge_mod + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + implicit none +end module edge_mod diff --git a/components/homme/src/preqx/prim_advection_mod.F90 b/components/homme/src/preqx/prim_advection_mod.F90 new file mode 100644 index 00000000000..a3e235c41aa --- /dev/null +++ b/components/homme/src/preqx/prim_advection_mod.F90 @@ -0,0 +1,9 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module prim_advection_mod + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap, Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, & + prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 + implicit none +end module prim_advection_mod diff --git a/components/homme/src/preqx/solver_init_mod.F90 b/components/homme/src/preqx/solver_init_mod.F90 new file mode 100644 index 00000000000..90efb25eadf --- /dev/null +++ b/components/homme/src/preqx/solver_init_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod + use solver_init_mod_base, only: solver_init2 + implicit none +end module solver_init_mod diff --git a/components/homme/src/preqx/vertremap_mod.F90 b/components/homme/src/preqx/vertremap_mod.F90 new file mode 100644 index 00000000000..63617bf66e6 --- /dev/null +++ b/components/homme/src/preqx/vertremap_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module vertremap_mod + use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm + implicit none +end module vertremap_mod diff --git a/components/homme/src/preqx/viscosity_mod.F90 b/components/homme/src/preqx/viscosity_mod.F90 new file mode 100644 index 00000000000..f78ae69a8f3 --- /dev/null +++ b/components/homme/src/preqx/viscosity_mod.F90 @@ -0,0 +1,11 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module viscosity_mod + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif + implicit none +end module viscosity_mod diff --git a/components/homme/src/preqx_acc/CMakeLists.txt b/components/homme/src/preqx_acc/CMakeLists.txt new file mode 100644 index 00000000000..83e28c4d87b --- /dev/null +++ b/components/homme/src/preqx_acc/CMakeLists.txt @@ -0,0 +1,174 @@ +# Relevant Directories within the HOMME repository +# holding source files needed for building the library +# and executable preqx. + + + +SET(UTILS_SHARE_DIR ${HOMME_SOURCE_DIR}/utils/csm_share) +SET(SRC_DIR ${HOMME_SOURCE_DIR}/src) +SET(SRC_SHARE_DIR ${HOMME_SOURCE_DIR}/src/share) +SET(PHYSICS_DIR ${HOMME_SOURCE_DIR}/physics) +SET(UTILS_TIMING_DIR ${CMAKE_BINARY_DIR}/utils/timing) + +# Make INCLUDE_DIRS global so the tests can access it +SET (PREQX_ACC_INCLUDE_DIRS ${PIO_INCLUDE_DIRS} + ${UTILS_TIMING_DIR} + ${SRC_SHARE_DIR} CACHE INTERNAL "include directories for preqx") + +SET(PREQX_SRCS_F90 + ${SRC_SHARE_DIR}/bndry_mod_base.F90 + ${SRC_SHARE_DIR}/derivative_mod_base.F90 + ${SRC_SHARE_DIR}/edge_mod_base.F90 + ${SRC_SHARE_DIR}/prim_advection_mod_base.F90 + ${SRC_SHARE_DIR}/solver_init_mod_base.F90 + ${SRC_SHARE_DIR}/vertremap_mod_base.F90 + ${SRC_SHARE_DIR}/viscosity_mod_base.F90 + ${SRC_DIR}/preqx_acc/bndry_mod.F90 + ${SRC_DIR}/preqx_acc/derivative_mod.F90 + ${SRC_DIR}/preqx_acc/edge_mod.F90 + ${SRC_DIR}/preqx_acc/openacc_utils_mod.F90 + ${SRC_DIR}/preqx_acc/prim_advection_mod.F90 + ${SRC_DIR}/preqx_acc/solver_init_mod.F90 + ${SRC_DIR}/preqx_acc/vertremap_mod.F90 + ${SRC_DIR}/preqx_acc/viscosity_mod.F90 + ${SRC_SHARE_DIR}/kinds.F90 + ${SRC_SHARE_DIR}/dimensions_mod.F90 + ${SRC_SHARE_DIR}/control_mod.F90 + ${SRC_SHARE_DIR}/physical_constants.F90 + ${SRC_SHARE_DIR}/coordinate_systems_mod.F90 + ${SRC_SHARE_DIR}/params_mod.F90 + ${SRC_SHARE_DIR}/gridgraph_mod.F90 + ${SRC_SHARE_DIR}/element_mod.F90 + ${SRC_SHARE_DIR}/parallel_mod.F90 + ${SRC_SHARE_DIR}/hybrid_mod.F90 + ${SRC_SHARE_DIR}/reduction_mod.F90 + ${SRC_SHARE_DIR}/quadrature_mod.F90 + ${SRC_SHARE_DIR}/global_norms_mod.F90 + ${SRC_SHARE_DIR}/ll_mod.F90 + ${SRC_SHARE_DIR}/metagraph_mod.F90 + ${SRC_SHARE_DIR}/schedule_mod.F90 + ${SRC_SHARE_DIR}/schedtype_mod.F90 + ${SRC_SHARE_DIR}/edgetype_mod.F90 + ${SRC_SHARE_DIR}/thread_mod.F90 + ${SRC_SHARE_DIR}/unit_tests_mod.F90 + ${SRC_SHARE_DIR}/spacecurve_mod.F90 + ${SRC_SHARE_DIR}/cube_mod.F90 + ${SRC_SHARE_DIR}/interpolate_mod.F90 + ${SRC_SHARE_DIR}/fvm_bsp_mod.F90 + ${SRC_SHARE_DIR}/fvm_transformation_mod.F90 + ${SRC_SHARE_DIR}/dof_mod.F90 + ${SRC_DIR}/common_io_mod.F90 + ${SRC_SHARE_DIR}/spelt_mod.F90 + ${SRC_SHARE_DIR}/fvm_mod.F90 + ${SRC_SHARE_DIR}/fvm_control_volume_mod.F90 + ${SRC_SHARE_DIR}/fvm_line_integrals_mod.F90 + ${SRC_SHARE_DIR}/fvm_line_integrals_flux_mod.F90 + ${SRC_SHARE_DIR}/fvm_analytic_mod.F90 + ${SRC_SHARE_DIR}/fvm_filter_mod.F90 + ${SRC_SHARE_DIR}/fvm_reconstruction_mod.F90 + ${SRC_DIR}/pio_io_mod.F90 + ${SRC_DIR}/repro_sum_mod.F90 + ${SRC_SHARE_DIR}/time_mod.F90 + ${SRC_SHARE_DIR}/physics_mod.F90 + ${SRC_SHARE_DIR}/hybvcoord_mod.F90 + ${SRC_DIR}/interp_movie_mod.F90 + ${UTILS_SHARE_DIR}/shr_kind_mod.F90 + ${UTILS_SHARE_DIR}/shr_mpi_mod.F90 + ${UTILS_SHARE_DIR}/shr_sys_mod.F90 + ${UTILS_SHARE_DIR}/shr_file_mod.F90 + ${UTILS_SHARE_DIR}/shr_vmath_mod.F90 + ${UTILS_SHARE_DIR}/shr_const_mod.F90 + ${UTILS_SHARE_DIR}/shr_spfn_mod.F90 + ${SRC_SHARE_DIR}/domain_mod.F90 + ${SRC_DIR}/restart_io_mod.F90 + ${SRC_SHARE_DIR}/cg_mod.F90 + ${SRC_SHARE_DIR}/linear_algebra_mod.F90 + ${SRC_SHARE_DIR}/solver_mod.F90 + ${SRC_SHARE_DIR}/filter_mod.F90 + ${SRC_DIR}/common_movie_mod.F90 + ${SRC_SHARE_DIR}/namelist_mod.F90 + ${SRC_SHARE_DIR}/mass_matrix_mod.F90 + ${SRC_DIR}/checksum_mod.F90 + ${SRC_DIR}/baroclinic_inst_mod.F90 + ${SRC_SHARE_DIR}/diffusion_mod.F90 + ${SRC_SHARE_DIR}/prim_si_mod.F90 + ${SRC_DIR}/asp_tests.F90 + ${SRC_SHARE_DIR}/prim_state_mod.F90 + ${SRC_DIR}/prim_restart_mod.F90 + ${SRC_SHARE_DIR}/prim_si_ref_mod.F90 + ${SRC_SHARE_DIR}/prim_advance_mod.F90 + ${SRC_DIR}/forcing_mod.F90 + ${SRC_SHARE_DIR}/metis_mod.F90 + ${SRC_SHARE_DIR}/prim_driver_mod.F90 + ${SRC_DIR}/vertical_mod.F90 + ${SRC_DIR}/prim_movie_mod.F90 + ${SRC_DIR}/surfaces_mod.F90 + ${SRC_DIR}/interpolate_driver_mod.F90 + ${SRC_DIR}/netcdf_io_mod.F90 + ${SRC_SHARE_DIR}/mesh_mod.F90 + ${SRC_SHARE_DIR}/Manager.F90 + ${SRC_DIR}/prim_main.F90 + ${SRC_DIR}/ref_state_mod.F90 + ${PHYSICS_DIR}/physics_types_mod.F90 + ${PHYSICS_DIR}/physics_io_mod.F90 + ${PHYSICS_DIR}/heldsuarez/held_suarez_mod.F90 + ${PHYSICS_DIR}/column_model_mod.F90 + ${PHYSICS_DIR}/column_types_mod.F90 + CACHE INTERNAL "") + +SET(PREQX_SRCS_C + ${UTILS_SHARE_DIR}/shr_vmath_fwrap.c + ${SRC_DIR}/jrio.c +) + +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () + +# Make SRCS global so the tests can access it +SET(PREQX_ACC_SRCS ${PREQX_ACC_SRCS} ${PREQX_SRCS_C} ${PREQX_SRCS_F90} CACHE INTERNAL "Sources for preqx_acc") + +# Set up defaults +IF (NOT PREQX_NP) + SET (PREQX_NP 4) +ENDIF () +IF (NOT PREQX_PLEV) + SET (PREQX_PLEV 20) +ENDIF () +IF (NOT PREQX_USE_PIO) + SET (PREQX_USE_PIO FALSE) +ENDIF () +IF (NOT PREQX_USE_ENERGY) + SET (PREQX_USE_ENERGY FALSE) +ENDIF () +IF (NOT PREQX_NC) + SET (PREQX_NC 4) +ENDIF () +IF (PREQX_QSIZE_D) + SET (QSIZE_D ${PREQX_QSIZE_D}) +ENDIF () + +MESSAGE(STATUS "Building preqx with:") +MESSAGE(STATUS " NP = ${PREQX_NP}") +MESSAGE(STATUS " NC = ${PREQX_NC}") +MESSAGE(STATUS " PLEV = ${PREQX_PLEV}") +MESSAGE(STATUS " PIO = ${PREQX_USE_PIO}") +MESSAGE(STATUS " ENERGY = ${PREQX_USE_ENERGY}") + +SET(USE_OPENACC TRUE) +SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}") +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}") + +IF (${ENABLE_HORIZ_OPENMP} AND ${ENABLE_COLUMN_OPENMP}) + SET(NESTED_OPENMP TRUE CACHE BOOL "Both horizontal and column threading") +ENDIF () + +# The following macro handles the compilation with the appropriate variables +############################################################################ +# createTestExec(exec_name exec_type NP PLEV USE_PIO USE_ENERGY) +############################################################################ +createTestExec(preqx_acc preqx_acc ${PREQX_NP} ${PREQX_NC} ${PREQX_PLEV} + ${PREQX_USE_PIO} ${PREQX_USE_ENERGY}) + + diff --git a/components/homme/src/share/openacc/bndry_openacc_mod.F90 b/components/homme/src/preqx_acc/bndry_mod.F90 similarity index 96% rename from components/homme/src/share/openacc/bndry_openacc_mod.F90 rename to components/homme/src/preqx_acc/bndry_mod.F90 index 783f884c00f..75fd0ddd909 100644 --- a/components/homme/src/share/openacc/bndry_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/bndry_mod.F90 @@ -3,14 +3,17 @@ #include "config.h" #endif -module bndry_openacc_mod -#if USE_OPENACC - use kinds , only: real_kind +module bndry_mod + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + use parallel_mod, only : syncmp,parallel_t,abortmp,iam + use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t + use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads + use kinds, only: real_kind implicit none private - integer, parameter :: maxCycles = 20 - integer, parameter :: maxChunks = 64 - real(kind=real_kind), parameter :: chunk_denom = 1.e5 + integer, parameter, private :: maxCycles = 20 + integer, parameter, private :: maxChunks = 64 + real(kind=real_kind), parameter, private :: chunk_denom = 1.e5 type send_stager_t integer :: nUpdateHost, nSendComp @@ -24,12 +27,13 @@ module bndry_openacc_mod integer :: beg(maxchunks), end(maxchunks), len(maxchunks), asyncid(maxchunks), tag(maxchunks), req(maxchunks) end type recv_stager_t - type(send_stager_t) :: stg_send(maxCycles) - type(recv_stager_t) :: stg_recv(maxCycles) + type(send_stager_t), private :: stg_send(maxCycles) + type(recv_stager_t), private :: stg_recv(maxCycles) + public :: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping public :: bndry_exchangeS_simple_overlap - public :: bndry_exchangeV_simple_overlap public :: bndry_exchangeV_timing + public :: bndry_exchangeV_simple_overlap public :: bndry_exchangeV_finer_overlap contains @@ -514,8 +518,8 @@ function mpi_isend_openacc_stage(buf, count, dest, tag_root, comm, ierror, nchun use parallel_mod , only: mpireal_t use openacc_utils_mod, only: update_host_async, acc_async_test_wrap implicit none - real(kind=real_kind), intent(in ) :: buf(count) !buffer from which to send data integer , intent(in ) :: count !number of elements in buffer + real(kind=real_kind), intent(in ) :: buf(count) !buffer from which to send data integer , intent(in ) :: dest !the MPI rank I'm sending data to integer , intent(in ) :: tag_root !tag of the original send (I'll alter this for internal mpi_isend calls) integer , intent(in ) :: comm !Communicator to use @@ -576,8 +580,8 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch use parallel_mod , only: mpireal_t use openacc_utils_mod, only: update_device_async, copy_ondev_async implicit none - real(kind=real_kind), intent(in ) :: buf(count) !buffer in which to receive data integer , intent(in ) :: count !number of elements in buffer + real(kind=real_kind), intent(in ) :: buf(count) !buffer in which to receive data integer , intent(in ) :: source !the MPI rank I'm receiving data from integer , intent(in ) :: tag_root !tag of the original send (I'll alter this for internal mpi_isend calls) integer , intent(in ) :: comm !Communicator to use @@ -634,6 +638,5 @@ function mpi_irecv_openacc_stage(buf, count, source, tag_root, comm, ierror, nch if (stg_recv(myid)%nUpdateDev == nchunks) finished = .true. end function mpi_irecv_openacc_stage -#endif -end module bndry_openacc_mod +end module bndry_mod diff --git a/components/homme/src/preqx_acc/config.h.cmake.in b/components/homme/src/preqx_acc/config.h.cmake.in new file mode 100644 index 00000000000..8c2c17cb05f --- /dev/null +++ b/components/homme/src/preqx_acc/config.h.cmake.in @@ -0,0 +1,61 @@ +/* PRIM Options */ + +/* Energy Diagnostics */ +#cmakedefine ENERGY_DIAGNOSTICS + +/* Using PRIMITIVE */ +#define _PRIM + +/* WK_GRAD */ +#define _WK_GRAD 1 + +/* _PRESTART */ +#define _PRESTART 1 + +/* Using MPI */ +#define _MPI 1 + +/* PIO with interpolation */ +#cmakedefine PIO_INTERP + +/* Number of cells NC*NC in each element (Default value NC=4) */ +#define NC @NUM_CELLS@ + +/* Number of POINTS in the element (Default value is 4) */ +#define NP @NUM_POINTS@ + +/* Number of POINTS in the element (Default value is 1) */ +#define PLEV @NUM_PLEV@ + +/* Number of tracers (Default value 4 if not specified) */ +#cmakedefine QSIZE_D @QSIZE_D@ + +/* Basic PIO */ +#cmakedefine PIO + +/* Whether to use CUDA Fortran */ +#cmakedefine01 USE_CUDA_FORTRAN + +/* Whether to use OPENACC */ +#cmakedefine01 USE_OPENACC + +/* Quadruple-precision */ +#cmakedefine01 HOMME_QUAD_PREC + +/* Horizontal OpenMP (over elements) */ +#cmakedefine HORIZ_OPENMP + +/* Vertical OpenMP (within elements) */ +#cmakedefine COLUMN_OPENMP + +/* Nested OpenMP (both horizontal and vertical) */ +#cmakedefine NESTED_OPENMP + +/* Zoltan partitioning library */ +#cmakedefine01 HAVE_ZOLTAN + +/* Enable persistent MPI comm */ +#cmakedefine MPI_PERSISTENT + +/* Using Accelerator */ +#cmakedefine IS_ACCELERATOR diff --git a/components/homme/src/share/openacc/derivative_openacc_mod.F90 b/components/homme/src/preqx_acc/derivative_mod.F90 similarity index 74% rename from components/homme/src/share/openacc/derivative_openacc_mod.F90 rename to components/homme/src/preqx_acc/derivative_mod.F90 index 4676d05a053..97dea44734d 100644 --- a/components/homme/src/share/openacc/derivative_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/derivative_mod.F90 @@ -3,22 +3,36 @@ #include "config.h" #endif -module derivative_openacc_mod -#if USE_OPENACC - use kinds, only: real_kind - use dimensions_mod, only: np, nelemd +module derivative_mod + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + use kinds, only : real_kind, longdouble_kind + use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev + use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi + use parallel_mod, only : abortmp + ! needed for spherical differential operators: + use physical_constants, only : rrearth + use element_mod, only : element_t + use control_mod, only : hypervis_scaling, hypervis_power implicit none private - public :: laplace_sphere_wk - public :: divergence_sphere_wk - public :: gradient_sphere - public :: divergence_sphere + public :: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + public :: laplace_sphere_wk_openacc + public :: divergence_sphere_wk_openacc + public :: gradient_sphere_openacc + public :: divergence_sphere_openacc contains - subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl) - use derivative_mod, only: derivative_t + subroutine laplace_sphere_wk_openacc(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,ntl,tl) use element_mod, only: element_t use control_mod, only: hypervis_scaling, hypervis_power implicit none @@ -35,7 +49,7 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n integer :: i,j,k,ie ! Local real(kind=real_kind) :: oldgrads(2) - call gradient_sphere(s,deriv,elem(:),grads,len,nets,nete,ntl,tl) + call gradient_sphere_openacc(s,deriv,elem(:),grads,len,nets,nete,ntl,tl) !$acc parallel loop gang vector collapse(4) present(grads,elem(:)) private(oldgrads) do ie = nets , nete do k = 1 , len @@ -58,12 +72,11 @@ subroutine laplace_sphere_wk(s,grads,deriv,elem,var_coef,laplace,len,nets,nete,n enddo ! note: divergnece_sphere and divergence_sphere_wk are identical *after* bndry_exchange ! if input is C_0. Here input is not C_0, so we should use divergence_sphere_wk(). - call divergence_sphere_wk(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl) - end subroutine laplace_sphere_wk + call divergence_sphere_wk_openacc(grads,deriv,elem(:),laplace,len,nets,nete,ntl,tl) + end subroutine laplace_sphere_wk_openacc - subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl) + subroutine divergence_sphere_wk_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl) use element_mod, only: element_t - use derivative_mod, only: derivative_t use physical_constants, only: rrearth implicit none ! input: v = velocity in lat-lon coordinates @@ -117,11 +130,10 @@ subroutine divergence_sphere_wk(v,deriv,elem,div,len,nets,nete,ntl,tl) enddo enddo enddo - end subroutine divergence_sphere_wk + end subroutine divergence_sphere_wk_openacc - subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl) + subroutine gradient_sphere_openacc(s,deriv,elem,ds,len,nets,nete,ntl,tl) use element_mod, only: element_t - use derivative_mod, only: derivative_t use physical_constants, only: rrearth implicit none ! input s: scalar @@ -171,13 +183,12 @@ subroutine gradient_sphere(s,deriv,elem,ds,len,nets,nete,ntl,tl) enddo enddo enddo - end subroutine gradient_sphere + end subroutine gradient_sphere_openacc - subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl) + subroutine divergence_sphere_openacc(v,deriv,elem,div,len,nets,nete,ntl,tl) ! input: v = velocity in lat-lon coordinates ! ouput: div(v) spherical divergence of v use element_mod , only: element_t - use derivative_mod, only: derivative_t use physical_constants, only: rrearth implicit none real(kind=real_kind), intent(in ) :: v(np,np,2,len,ntl,nelemd) ! in lat-lon coordinates @@ -227,8 +238,7 @@ subroutine divergence_sphere(v,deriv,elem,div,len,nets,nete,ntl,tl) enddo enddo enddo - end subroutine divergence_sphere + end subroutine divergence_sphere_openacc -#endif -end module derivative_openacc_mod +end module derivative_mod diff --git a/components/homme/src/share/openacc/edge_openacc_mod.F90 b/components/homme/src/preqx_acc/edge_mod.F90 similarity index 86% rename from components/homme/src/share/openacc/edge_openacc_mod.F90 rename to components/homme/src/preqx_acc/edge_mod.F90 index 29178a9fda1..645fe05a66a 100644 --- a/components/homme/src/share/openacc/edge_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/edge_mod.F90 @@ -3,24 +3,43 @@ #include "config.h" #endif -module edge_openacc_mod -#if USE_OPENACC - use kinds, only: real_kind, int_kind, log_kind - use dimensions_mod, only: max_neigh_edges,nelemd,np,max_corner_elem +module edge_mod + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + use kinds, only : int_kind, log_kind, real_kind + use dimensions_mod, only : max_neigh_edges, nelemd, np + use perf_mod, only: t_startf, t_stopf, t_adj_detailf ! _EXTERNAL + use thread_mod, only: nthreadshoriz, omp_get_num_threads, omp_get_thread_num + use coordinate_systems_mod, only : cartesian3D_t + use schedtype_mod, only : cycle_t, schedule_t, schedule + use parallel_mod, only : abortmp, haltmp, MPIreal_t, iam,parallel_t, & + MAX_ACTIVE_MSG, HME_status_size, BNDRY_TAG_BASE + use edgetype_mod, only : edgedescriptor_t, edgebuffer_t, & + Longedgebuffer_t, Ghostbuffertr_t, Ghostbuffer3d_t, initedgebuffer_callid + use element_mod, only : element_t implicit none private - public :: edgeSpack - public :: edgeSunpackMin - public :: edgeSunpackMax - public :: edgeVpack - public :: edgeVunpack - public :: edgeVunpackMin - public :: edgeVunpackMax + public :: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + public :: edgeSpack_openacc + public :: edgeSunpackMin_openacc + public :: edgeSunpackMax_openacc + public :: edgeVpack_openacc + public :: edgeVunpack_openacc + public :: edgeVunpackMin_openacc + public :: edgeVunpackMax_openacc + contains - subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeSpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : max_corner_elem use control_mod , only : north, south, east, west, neast, nwest, seast, swest use perf_mod , only : t_startf, t_stopf @@ -29,10 +48,10 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) ,intent(inout) :: edge integer ,intent(in ) :: vlyr - real (kind=real_kind) ,intent(in ) :: v(vlyr,tdim,nelemd) integer ,intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real (kind=real_kind) ,intent(in ) :: v(vlyr,tdim,nelemd) ! Local variables integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 64 @@ -66,9 +85,9 @@ subroutine edgeSpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_s_pack') - end subroutine edgeSpack + end subroutine edgeSpack_openacc - subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeSunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -76,10 +95,10 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 64 @@ -125,9 +144,9 @@ subroutine edgeSunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_s_unpack_min') - end subroutine edgeSunpackMin + end subroutine edgeSunpackMin_openacc - subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeSunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -135,10 +154,10 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 64 @@ -184,9 +203,9 @@ subroutine edgeSunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_s_unpack_max') - end subroutine edgeSunpackMax + end subroutine edgeSunpackMax_openacc - subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : max_corner_elem use control_mod , only : north, south, east, west, neast, nwest, seast, swest use perf_mod , only : t_startf, t_stopf @@ -195,10 +214,10 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) ,intent(inout) :: edge integer ,intent(in ) :: vlyr - real (kind=real_kind) ,intent(in ) :: v(np,np,vlyr,tdim,nelemd) integer ,intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real (kind=real_kind) ,intent(in ) :: v(np,np,vlyr,tdim,nelemd) ! Local variables integer :: i,k,ir,ll,is,ie,in,iw,el,kc,kk integer, parameter :: kchunk = 32 @@ -247,9 +266,9 @@ subroutine edgeVpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_pack') - end subroutine edgeVpack + end subroutine edgeVpack_openacc - subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVunpack_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -257,10 +276,10 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j integer, parameter :: kchunk = 32 @@ -324,9 +343,9 @@ subroutine edgeVunpack(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_unpack') - end subroutine edgeVunpack + end subroutine edgeVunpack_openacc - subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVunpackMin_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -334,10 +353,10 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j integer, parameter :: kchunk = 32 @@ -401,9 +420,9 @@ subroutine edgeVunpackMin(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_unpack_min') - end subroutine edgeVunpackMin + end subroutine edgeVunpackMin_openacc - subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) + subroutine edgeVunpackMax_openacc(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use dimensions_mod, only : np, max_corner_elem use control_mod, only : north, south, east, west, neast, nwest, seast, swest use perf_mod, only: t_startf, t_stopf @@ -411,10 +430,10 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) use edgetype_mod , only : EdgeBuffer_t type(EdgeBuffer_t) , intent(in ) :: edge integer , intent(in ) :: vlyr - real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) integer , intent(in ) :: kptr type(element_t) ,intent(in ) :: elem(:) integer ,intent(in ) :: nets,nete,tdim,tl + real(kind=real_kind) , intent(inout) :: v(np,np,vlyr,tdim,nelemd) ! Local integer :: i,k,ll,is,ie,in,iw,el,kc,kk,glob_k,loc_ind,ii,jj, j integer, parameter :: kchunk = 32 @@ -478,8 +497,7 @@ subroutine edgeVunpackMax(edge,v,vlyr,kptr,elem,nets,nete,tdim,tl) enddo enddo call t_stopf('edge_unpack_max') - end subroutine edgeVunpackMax + end subroutine edgeVunpackMax_openacc -#endif -end module edge_openacc_mod +end module edge_mod diff --git a/components/homme/src/share/openacc/openacc_utils_mod.F90 b/components/homme/src/preqx_acc/openacc_utils_mod.F90 similarity index 74% rename from components/homme/src/share/openacc/openacc_utils_mod.F90 rename to components/homme/src/preqx_acc/openacc_utils_mod.F90 index 5b716e1849a..8e0acbb5f4d 100644 --- a/components/homme/src/share/openacc/openacc_utils_mod.F90 +++ b/components/homme/src/preqx_acc/openacc_utils_mod.F90 @@ -4,7 +4,6 @@ #endif module openacc_utils_mod -#if USE_OPENACC use kinds, only: real_kind use dimensions_mod, only: nelemd implicit none @@ -16,41 +15,25 @@ module openacc_utils_mod public :: update_device_async public :: copy_ondev public :: copy_ondev_async - public :: arch_init2 public :: acc_async_test_wrap contains function acc_async_test_wrap( asyncid ) result(rslt) - use openacc, only: acc_async_test +# ifdef _OPENACC + use openacc, only: acc_async_test +# endif implicit none integer, intent(in) :: asyncid logical :: rslt - rslt = .false. - rslt = acc_async_test(asyncid) +# ifdef _OPENACC + rslt = .false. + rslt = acc_async_test(asyncid) +# else + rslt = .true. +# endif end function acc_async_test_wrap - subroutine arch_init2( elem , deriv ) - use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj - use derivative_mod, only: derivative_t - implicit none - type(element_t) , intent(in) :: elem(:) - type(derivative_t), intent(in) :: deriv - integer :: ie - !$omp barrier - !$omp master - - !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj) - !$acc enter data pcopyin(elem(1:nelemd),deriv) - do ie = 1 , nelemd - !$acc enter data pcopyin(elem(ie)%desc) - !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse) - enddo - - !$omp end master - !$omp barrier - end subroutine arch_init2 - subroutine copy_qdp_h2d( elem , tl ) use element_mod, only: element_t, state_qdp implicit none @@ -119,6 +102,5 @@ subroutine copy_ondev_async(dest,src,len,id) enddo end subroutine copy_ondev_async -#endif end module openacc_utils_mod diff --git a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90 b/components/homme/src/preqx_acc/prim_advection_mod.F90 similarity index 87% rename from components/homme/src/share/openacc/prim_advection_openacc_mod.F90 rename to components/homme/src/preqx_acc/prim_advection_mod.F90 index ba1c7fa48ad..b9ed14c295c 100644 --- a/components/homme/src/share/openacc/prim_advection_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/prim_advection_mod.F90 @@ -6,34 +6,56 @@ #include "config.h" #endif -module prim_advection_openacc_mod -#if USE_OPENACC - use kinds , only: real_kind, int_kind, log_kind - use dimensions_mod , only: np,nlevp,nlev,qsize,qsize_d,max_corner_elem,max_neigh_edges,nelemd - use element_mod , only: timelevels - use edgetype_mod , only: EdgeBuffer_t - use derivative_mod , only: derivative_t +module prim_advection_mod + !OVERWRITING: Prim_Advec_Tracers_remap, prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap_rk2 + use prim_advection_mod_base, only: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap + use kinds, only : real_kind + use dimensions_mod, only : nlev, nlevp, np, qsize, ntrac, nc, nep, nelemd + use physical_constants, only : rgas, Rwater_vapor, kappa, g, rearth, rrearth, cp + use element_mod, only : element_t + use fvm_control_volume_mod, only : fvm_struct + use spelt_mod, only : spelt_struct + use filter_mod, only : filter_t, filter_P + use hybvcoord_mod, only : hvcoord_t + use time_mod, only : TimeLevel_t, smooth, TimeLevel_Qdp + use prim_si_mod, only : preq_pressure + use diffusion_mod, only : scalar_diffusion, diffusion_init + use control_mod, only : integration, test_case, filter_freq_advection, hypervis_order, & + statefreq, moisture, TRACERADV_TOTAL_DIVERGENCE, TRACERADV_UGRADQ, & + nu_q, nu_p, limiter_option, hypervis_subcycle_q, rsplit + use edge_mod, only : edgevpack, edgerotate, edgevunpack, initedgebuffer, initedgesbuffer, & + edgevunpackmin, initghostbuffer3D + + use edgetype_mod, only : EdgeDescriptor_t, EdgeBuffer_t, ghostbuffer3D_t + use hybrid_mod, only : hybrid_t + use bndry_mod, only : bndry_exchangev + use perf_mod, only : t_startf, t_stopf, t_barrierf ! _EXTERNAL + use parallel_mod, only : abortmp + use derivative_mod, only: derivative_t implicit none private - type (derivative_t), public, allocatable :: deriv(:) ! derivative struct (nthreads) - real(kind=real_kind), allocatable :: qmin(:,:,:), qmax(:,:,:) - real(kind=real_kind), allocatable :: dp0(:) - real(kind=real_kind), allocatable :: Qtens_biharmonic(:,:,:,:,:) - real(kind=real_kind), allocatable :: Qtens(:,:,:,:,:) - real(kind=real_kind), allocatable :: grads_tracer(:,:,:,:,:,:) - real(kind=real_kind), allocatable :: dp_star(:,:,:,:) - type (EdgeBuffer_t) :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax - integer,parameter :: DSSeta = 1 - integer,parameter :: DSSomega = 2 - integer,parameter :: DSSdiv_vdp_ave = 3 - integer,parameter :: DSSno_var = -1 - real(kind=real_kind), allocatable :: data_pack(:,:,:,:), data_pack2(:,:,:,:) - logical :: first_time = .true. - + type (derivative_t), allocatable :: deriv(:) ! derivative struct (nthreads) + real(kind=real_kind), private, allocatable :: qmin(:,:,:), qmax(:,:,:) + real(kind=real_kind), private, allocatable :: dp0(:) + real(kind=real_kind), private, allocatable :: Qtens_biharmonic(:,:,:,:,:) + real(kind=real_kind), private, allocatable :: Qtens(:,:,:,:,:) + real(kind=real_kind), private, allocatable :: grads_tracer(:,:,:,:,:,:) + real(kind=real_kind), private, allocatable :: dp_star(:,:,:,:) + type (EdgeBuffer_t), private :: edgeAdv, edgeAdvQ3, edgeAdv_p1, edgeAdvQ2, edgeAdv1, edgeAdv3, edgeMinMax + integer,parameter, private :: DSSeta = 1 + integer,parameter, private :: DSSomega = 2 + integer,parameter, private :: DSSdiv_vdp_ave = 3 + integer,parameter, private :: DSSno_var = -1 + real(kind=real_kind), allocatable, private :: data_pack(:,:,:,:), data_pack2(:,:,:,:) + logical, private :: first_time = .true. + + public :: Prim_Advec_Tracers_remap_ALE, prim_advec_tracers_fvm, vertical_remap public :: Prim_Advec_Tracers_remap public :: prim_advec_init1 public :: prim_advec_init2 public :: prim_advec_init_deriv + public :: deriv + public :: Prim_Advec_Tracers_remap_rk2 contains @@ -45,28 +67,30 @@ subroutine copy_qdp1_h2d( elem , tl , nets , nete ) integer , intent(in) :: tl, nets , nete integer :: ie, k, j, i call t_startf('qdp1_pcie') - do ie = nets , nete - data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie) - enddo - !$omp barrier - !$omp master -! do ie = 1 , nelemd -! !$acc update device(state_qdp(:,:,:,1,tl,ie)) -! enddo - !$acc update device(data_pack) async(1) - !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) - do ie = 1 , nelemd - do k = 1 , nlev - do j = 1 , np - do i = 1 , np - state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie) +# if USE_OPENACC + do ie = nets , nete + data_pack(:,:,:,ie) = state_qdp(:,:,:,1,tl,ie) + enddo + !$omp barrier + !$omp master +! do ie = 1 , nelemd +! !$acc update device(state_qdp(:,:,:,1,tl,ie)) +! enddo + !$acc update device(data_pack) async(1) + !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) + do ie = 1 , nelemd + do k = 1 , nlev + do j = 1 , np + do i = 1 , np + state_qdp(i,j,k,1,tl,ie) = data_pack(i,j,k,ie) + enddo enddo enddo enddo - enddo - !$acc wait(1) - !$omp end master - !$omp barrier + !$acc wait(1) + !$omp end master + !$omp barrier +# endif call t_stopf('qdp1_pcie') end subroutine copy_qdp1_h2d @@ -78,28 +102,30 @@ subroutine copy_qdp1_d2h( elem , tl , nets , nete ) integer , intent(in) :: tl, nets , nete integer :: ie, k, j, i call t_startf('qdp1_pcie') - !$omp barrier - !$omp master - !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) - do ie = 1 , nelemd - do k = 1 , nlev - do j = 1 , np - do i = 1 , np - data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie) +# if USE_OPENACC + !$omp barrier + !$omp master + !$acc parallel loop gang vector collapse(4) async(1) present(state_qdp,data_pack) + do ie = 1 , nelemd + do k = 1 , nlev + do j = 1 , np + do i = 1 , np + data_pack(i,j,k,ie) = state_qdp(i,j,k,1,tl,ie) + enddo enddo enddo enddo - enddo - !$acc update host(data_pack) async(1) - !$acc wait(1) -! do ie = 1 , nelemd -! !$acc update host(state_qdp(:,:,:,1,tl,ie)) -! enddo - !$omp end master - !$omp barrier - do ie = nets , nete - state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie) - enddo + !$acc update host(data_pack) async(1) + !$acc wait(1) +! do ie = 1 , nelemd +! !$acc update host(state_qdp(:,:,:,1,tl,ie)) +! enddo + !$omp end master + !$omp barrier + do ie = nets , nete + state_qdp(:,:,:,1,tl,ie) = data_pack(:,:,:,ie) + enddo +# endif call t_stopf('qdp1_pcie') end subroutine copy_qdp1_d2h @@ -112,7 +138,7 @@ subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , flt , hybrid , dt use filter_mod , only: filter_t use time_mod , only: TimeLevel_t, TimeLevel_Qdp use control_mod , only: limiter_option, nu_p, qsplit - use bndry_openacc_mod, only: bndry_exchangeV_timing + use bndry_mod, only: bndry_exchangeV_timing implicit none type (element_t) , intent(inout) :: elem(:) type (derivative_t) , intent(in ) :: deriv @@ -321,13 +347,12 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , use hybrid_mod , only: hybrid_t use element_mod , only: element_t, derived_divdp_proj, state_qdp use derivative_mod , only: derivative_t - use edge_mod , only: EdgeBuffer_t use perf_mod , only: t_startf, t_stopf ! _EXTERNAL use hybvcoord_mod , only: hvcoord_t use control_mod , only: nu_q, hypervis_order, hypervis_subcycle_q, nu_p - use viscosity_openacc_mod, only: biharmonic_wk_scalar - use edge_openacc_mod , only: edgeVpack, edgeVunpack - use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap + use viscosity_mod, only: biharmonic_wk_scalar_openacc + use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc + use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap implicit none type (EdgeBuffer_t) , intent(inout) :: edgeAdv_dontuse type (element_t) , intent(inout), target :: elem(:) @@ -388,7 +413,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , !$omp end master !$omp barrier ! compute biharmonic operator. Qtens = input and output - call biharmonic_wk_scalar( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) + call biharmonic_wk_scalar_openacc( elem , Qtens , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) !$omp barrier !$omp master !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:),qtens) @@ -407,7 +432,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , enddo call limiter2d_zero(state_Qdp,2,nt_qdp) call t_startf('ah_scalar_PEU') - call edgeVpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) !$omp end master !$omp barrier @@ -417,7 +442,7 @@ subroutine advance_hypervis_scalar( edgeAdv_dontuse , elem , hvcoord , hybrid , !$omp barrier !$omp master - call edgeVunpack(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) + call edgeVunpack_openacc(edgeAdv,state_qdp,qsize*nlev,0,elem(:),1,nelemd,2,nt_qdp) call t_stopf('ah_scalar_PEU') !$acc parallel loop gang vector collapse(5) present(state_qdp,elem(:)) do ie = 1 , nelemd @@ -441,7 +466,6 @@ end subroutine advance_hypervis_scalar subroutine qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , limiter_option , nu_p , nets , nete ) use element_mod, only: element_t, state_qdp - use control_mod, only: limiter_option implicit none type(element_t) , intent(inout) :: elem(:) integer , intent(in ) :: rkstage , n0_qdp , np1_qdp , nets , nete , limiter_option @@ -488,10 +512,10 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , use control_mod , only: limiter_option, nu_p, nu_q use perf_mod , only: t_startf, t_stopf use element_mod , only: derived_divdp_proj, state_qdp, derived_vn0, derived_divdp - use derivative_openacc_mod, only: divergence_sphere - use viscosity_openacc_mod , only: biharmonic_wk_scalar, neighbor_minmax - use edge_openacc_mod , only: edgeVpack, edgeVunpack - use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap + use derivative_mod, only: divergence_sphere_openacc + use viscosity_mod , only: biharmonic_wk_scalar_openacc, neighbor_minmax_openacc + use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc + use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap implicit none integer , intent(in ) :: np1_qdp, n0_qdp real (kind=real_kind), intent(in ) :: dt @@ -606,7 +630,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , enddo !$omp end master !$omp barrier - if ( rhs_multiplier == 0 ) call neighbor_minmax(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax) + if ( rhs_multiplier == 0 ) call neighbor_minmax_openacc(elem,hybrid,edgeMinMax,1,nelemd,qmin,qmax) ! compute biharmonic mixing term if ( rhs_multiplier == 2 ) then rhs_viss = 3 @@ -633,8 +657,8 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , !$omp end master !$omp barrier endif - call biharmonic_wk_scalar( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) - call neighbor_minmax( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax ) + call biharmonic_wk_scalar_openacc( elem , qtens_biharmonic , grads_tracer , deriv , edgeAdv , hybrid , 1 , nelemd ) + call neighbor_minmax_openacc( elem , hybrid , edgeMinMax , 1 , nelemd , qmin , qmax ) !$omp barrier !$omp master !$acc parallel loop gang vector collapse(4) present(qtens_biharmonic,dp0,elem(:)) @@ -716,7 +740,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , enddo enddo enddo - call divergence_sphere( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 ) + call divergence_sphere_openacc( grads_tracer , deriv , elem(:) , qtens , nlev*qsize , 1 , nelemd , 1 , 1 ) !$acc parallel loop gang vector collapse(5) present(qtens,state_qdp,qtens_biharmonic) do ie = 1 , nelemd ! advance Qdp @@ -761,7 +785,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , ! note: eta_dot_dpdn is actually dimension nlev+1, but nlev+1 data is ! all zero so we only have to DSS 1:nlev call t_startf('eus_PEU') - call edgeVpack(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVpack_openacc(edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) !$omp end master !$omp barrier @@ -771,7 +795,7 @@ subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , !$omp barrier !$omp master - call edgeVunpack( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) + call edgeVunpack_openacc( edgeAdv , state_Qdp , nlev*qsize , 0 , elem(:) , 1 , nelemd , 2 , np1_qdp ) call t_stopf('eus_PEU') !$acc parallel loop gang vector collapse(4) present(state_Qdp,elem(:)) do ie = 1 , nelemd @@ -800,9 +824,9 @@ subroutine limiter2d_zero(Qdp,tdim,tl) ! ps is only used when advecting Q instead of Qdp ! so ps should be at one timelevel behind Q implicit none - real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd) integer , intent(in ) :: tdim integer , intent(in ) :: tl + real (kind=real_kind), intent(inout) :: Qdp(np,np,nlev,qsize,tdim,nelemd) ! local real (kind=real_kind) :: mass,mass_new real (kind=real_kind) :: qtmp(np,np) @@ -939,7 +963,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp ) use edge_mod , only: edgeVpack, edgeVunpack use bndry_mod , only: bndry_exchangeV use control_mod , only: limiter_option - use derivative_openacc_mod, only: divergence_sphere + use derivative_mod, only: divergence_sphere_openacc use openacc_utils_mod , only: copy_ondev use perf_mod , only: t_startf, t_stopf implicit none @@ -954,7 +978,7 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp ) !$omp barrier !$omp master !$acc update device(derived_vn0) - call divergence_sphere(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1) + call divergence_sphere_openacc(derived_vn0,deriv,elem,derived_divdp,nlev,1,nelemd,1,1) call copy_ondev(derived_divdp_proj,derived_divdp,product(shape(derived_divdp))) !$acc update host(derived_divdp,derived_divdp_proj) !$omp end master @@ -988,7 +1012,6 @@ subroutine precompute_divdp( elem , hybrid , deriv , dt , nets , nete , n0_qdp ) call t_stopf('derived PEU') end subroutine precompute_divdp -#endif -end module prim_advection_openacc_mod +end module prim_advection_mod diff --git a/components/homme/src/preqx_acc/solver_init_mod.F90 b/components/homme/src/preqx_acc/solver_init_mod.F90 new file mode 100644 index 00000000000..0b2027af701 --- /dev/null +++ b/components/homme/src/preqx_acc/solver_init_mod.F90 @@ -0,0 +1,40 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod + !OVERWRITING: solver_init2 + use solver_init_mod_base, only: + use dimensions_mod, only: nelemd + implicit none + private + + public :: solver_init2 + + +contains + + + subroutine solver_init2( elem , deriv ) + use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj + use derivative_mod, only: derivative_t + implicit none + type(element_t) , intent(in) :: elem(:) + type(derivative_t), intent(in) :: deriv + integer :: ie + !$omp barrier + !$omp master + + !$acc enter data pcreate(state_Qdp,derived_vn0,derived_divdp,derived_divdp_proj) + !$acc enter data pcopyin(elem(1:nelemd),deriv) + do ie = 1 , nelemd + !$acc enter data pcopyin(elem(ie)%desc) + !$acc enter data pcopyin(elem(ie)%desc%putmapP,elem(ie)%desc%getmapP,elem(ie)%desc%reverse) + enddo + + !$omp end master + !$omp barrier + end subroutine solver_init2 + + +end module solver_init_mod diff --git a/components/homme/src/preqx_acc/vertremap_mod.F90 b/components/homme/src/preqx_acc/vertremap_mod.F90 new file mode 100644 index 00000000000..4c3471721bb --- /dev/null +++ b/components/homme/src/preqx_acc/vertremap_mod.F90 @@ -0,0 +1,11 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module vertremap_mod + use vertremap_mod_base, only: remap1, remap1_nofilter, remap_q_ppm + implicit none + private + + public :: remap1, remap1_nofilter, remap_q_ppm +end module vertremap_mod diff --git a/components/homme/src/share/openacc/viscosity_openacc_mod.F90 b/components/homme/src/preqx_acc/viscosity_mod.F90 similarity index 54% rename from components/homme/src/share/openacc/viscosity_openacc_mod.F90 rename to components/homme/src/preqx_acc/viscosity_mod.F90 index bf3cda132b5..4bc0c10960f 100644 --- a/components/homme/src/share/openacc/viscosity_openacc_mod.F90 +++ b/components/homme/src/preqx_acc/viscosity_mod.F90 @@ -3,28 +3,45 @@ #include "config.h" #endif -module viscosity_openacc_mod -#if USE_OPENACC - use kinds, only: real_kind - use dimensions_mod, only: np,nlev,qsize,nelemd +module viscosity_mod + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif + use thread_mod, only : omp_get_num_threads + use kinds, only : real_kind, iulog + use dimensions_mod, only : np, nc, nlev,qsize,nelemd, ntrac + use hybrid_mod, only : hybrid_t, hybrid_create + use parallel_mod, only : parallel_t + use element_mod, only : element_t + use edgetype_mod, only : EdgeBuffer_t, EdgeDescriptor_t + use bndry_mod, only : bndry_exchangev, bndry_exchangeS, bndry_exchangeS_start,bndry_exchangeS_finish + use control_mod, only : hypervis_scaling, nu, nu_div + use perf_mod, only: t_startf, t_stopf implicit none private - public :: neighbor_minmax - public :: biharmonic_wk_scalar + public :: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0 +#ifdef _PRIM + public :: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif + public :: biharmonic_wk_scalar_openacc + public :: neighbor_minmax_openacc + + contains - subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) + subroutine biharmonic_wk_scalar_openacc(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) use hybrid_mod , only: hybrid_t use element_mod , only: element_t use edgetype_mod , only: edgeBuffer_t use derivative_mod , only: derivative_t use control_mod , only: hypervis_scaling use perf_mod , only: t_startf, t_stopf - use derivative_openacc_mod, only: laplace_sphere_wk - use edge_openacc_mod , only: edgeVpack, edgeVunpack - use bndry_openacc_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap + use derivative_mod, only: laplace_sphere_wk_openacc + use edge_mod , only: edgeVpack_openacc, edgeVunpack_openacc + use bndry_mod , only: bndry_exchangeV => bndry_exchangeV_simple_overlap implicit none !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ! compute weak biharmonic operator @@ -47,9 +64,9 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) if(hypervis_scaling > 0) var_coef1 = .false. !$omp barrier !$omp master - call laplace_sphere_wk(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1) + call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,var_coef1,qtens,nlev*qsize,nets,nete,1,1) call t_startf('biwksc_PEU') - call edgeVpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) + call edgeVpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) !$omp end master !$omp barrier @@ -59,7 +76,7 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) !$omp barrier !$omp master - call edgeVunpack(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) + call edgeVunpack_openacc(edgeq,qtens,qsize*nlev,0,elem(:),nets,nete,1,1) call t_stopf('biwksc_PEU') !$acc parallel loop gang vector collapse(5) present(qtens,elem(:)) do ie = nets , nete @@ -74,18 +91,18 @@ subroutine biharmonic_wk_scalar(elem,qtens,grads,deriv,edgeq,hybrid,nets,nete) enddo enddo enddo - call laplace_sphere_wk(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1) + call laplace_sphere_wk_openacc(qtens,grads,deriv,elem,.true.,qtens,nlev*qsize,nets,nete,1,1) !$omp end master !$omp barrier - end subroutine biharmonic_wk_scalar + end subroutine biharmonic_wk_scalar_openacc - subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) + subroutine neighbor_minmax_openacc(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) use hybrid_mod , only: hybrid_t use element_mod , only: element_t use perf_mod , only: t_startf, t_stopf use edgetype_mod , only: edgeBuffer_t - use edge_openacc_mod , only: edgeSpack, edgeSunpackMin, edgeSunpackMax - use bndry_openacc_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap + use edge_mod , only: edgeSpack_openacc, edgeSunpackMin_openacc, edgeSunpackMax_openacc + use bndry_mod, only: bndry_exchangeS => bndry_exchangeS_simple_overlap implicit none ! compute Q min&max over the element and all its neighbors integer :: nets,nete @@ -100,8 +117,8 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) !$omp barrier !$omp master call t_startf('nmm_PEU') - call edgeSpack(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) - call edgeSpack(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) + call edgeSpack_openacc(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) + call edgeSpack_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) !$omp end master !$omp barrier @@ -111,13 +128,12 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,min_neigh,max_neigh) !$omp barrier !$omp master - call edgeSunpackMin(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) - call edgeSunpackMax(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) + call edgeSunpackMin_openacc(edgeMinMax,min_neigh,nlev*qsize,0 ,elem(:),nets,nete,1,1) + call edgeSunpackMax_openacc(edgeMinMax,max_neigh,nlev*qsize,nlev*qsize,elem(:),nets,nete,1,1) call t_stopf('nmm_PEU') !$omp end master !$omp barrier - end subroutine neighbor_minmax + end subroutine neighbor_minmax_openacc -#endif -end module viscosity_openacc_mod +end module viscosity_mod diff --git a/components/homme/src/share/arch_switch_mod.F90 b/components/homme/src/share/arch_switch_mod.F90 deleted file mode 100644 index 8720652254f..00000000000 --- a/components/homme/src/share/arch_switch_mod.F90 +++ /dev/null @@ -1,29 +0,0 @@ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -module arch_switch_mod -#if USE_OPENACC - use prim_advection_openacc_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap - use openacc_utils_mod, only: arch_init2 -#else - use prim_advection_mod, only: prim_advec_init1, prim_advec_init2, prim_advec_init_deriv, deriv, Prim_Advec_Tracers_remap -#endif - implicit none - -contains - -#if (! USE_OPENACC) - subroutine arch_init2( elem , deriv ) - use element_mod, only: element_t, state_qdp, derived_vn0, derived_divdp, derived_divdp_proj - use derivative_mod, only: derivative_t - implicit none - type(element_t) , intent(in) :: elem(:) - type(derivative_t), intent(in) :: deriv - !CPU case, nothing to do - end subroutine arch_init2 -#endif - -end module arch_switch_mod - diff --git a/components/homme/src/share/bndry_mod.F90 b/components/homme/src/share/bndry_mod_base.F90 similarity index 99% rename from components/homme/src/share/bndry_mod.F90 rename to components/homme/src/share/bndry_mod_base.F90 index 3be3590cf5e..998c80d2952 100644 --- a/components/homme/src/share/bndry_mod.F90 +++ b/components/homme/src/share/bndry_mod_base.F90 @@ -3,7 +3,7 @@ #endif -module bndry_mod +module bndry_mod_base use parallel_mod, only : syncmp,parallel_t,abortmp,iam use edgetype_mod, only : Ghostbuffertr_t, Ghostbuffer3D_t,Edgebuffer_t,LongEdgebuffer_t use thread_mod, only : omp_in_parallel, omp_get_thread_num, omp_get_num_threads @@ -1140,4 +1140,4 @@ subroutine sort_neighbor_buffer_mapping(par,elem,nets,nete) -end module bndry_mod +end module bndry_mod_base diff --git a/components/homme/src/share/derivative_mod.F90 b/components/homme/src/share/derivative_mod_base.F90 similarity index 99% rename from components/homme/src/share/derivative_mod.F90 rename to components/homme/src/share/derivative_mod_base.F90 index 7b2f1fce0a1..0c89625a739 100644 --- a/components/homme/src/share/derivative_mod.F90 +++ b/components/homme/src/share/derivative_mod_base.F90 @@ -3,7 +3,7 @@ #endif -module derivative_mod +module derivative_mod_base use kinds, only : real_kind, longdouble_kind use dimensions_mod, only : np, nc, npdg, nep, nelemd, nlev use quadrature_mod, only : quadrature_t, gauss, gausslobatto,legendre, jacobi @@ -2810,4 +2810,4 @@ end subroutine limiter_optim_iter_full -end module derivative_mod +end module derivative_mod_base diff --git a/components/homme/src/share/edge_mod.F90 b/components/homme/src/share/edge_mod_base.F90 similarity index 99% rename from components/homme/src/share/edge_mod.F90 rename to components/homme/src/share/edge_mod_base.F90 index 2205a27d5eb..6cd64ca3f66 100644 --- a/components/homme/src/share/edge_mod.F90 +++ b/components/homme/src/share/edge_mod_base.F90 @@ -2,7 +2,7 @@ #include "config.h" #endif -module edge_mod +module edge_mod_base use kinds, only : int_kind, log_kind, real_kind use dimensions_mod, only : max_neigh_edges, nelemd @@ -4792,7 +4792,7 @@ subroutine ghostVunpack3d(g, v, vlyr, kptr, desc, sw, se, nw, ne, mult) end subroutine ghostVunpack3d -End module edge_mod +End module edge_mod_base #if 0 #ifndef HAVE_F2003_PTR_BND_REMAP diff --git a/components/homme/src/share/element_mod.F90 b/components/homme/src/share/element_mod.F90 index 84811e50676..d6ce91490ac 100644 --- a/components/homme/src/share/element_mod.F90 +++ b/components/homme/src/share/element_mod.F90 @@ -36,7 +36,7 @@ module element_mod real (kind=real_kind) :: ps_v(np,np,timelevels) ! surface pressure 4 real (kind=real_kind) :: phis(np,np) ! surface geopotential (prescribed) 5 real (kind=real_kind) :: Q (np,np,nlev,qsize_d) ! Tracer concentration 6 - real (kind=real_kind), pointer :: Qdp (:,:,:,:,:) ! Tracer mass 7 (np,np,nlev,qsize_d,2) + real (kind=real_kind), pointer :: Qdp (:,:,:,:,:) ! Tracer mass 7 (np,np,nlev,qsize,2) end type elem_state_t integer(kind=int_kind),public,parameter::StateComponents=8 ! num prognistics variables (for prim_restart_mod.F90) @@ -564,12 +564,12 @@ end subroutine allocate_element_desc !___________________________________________________________________ subroutine setup_element_pointers(elem) - use dimensions_mod, only: nelemd + use dimensions_mod, only: nelemd, qsize implicit none type(element_t), intent(inout) :: elem(:) #if USE_OPENACC integer :: ie - allocate( state_Qdp (np,np,nlev,qsize_d,2,nelemd) ) + allocate( state_Qdp (np,np,nlev,qsize,2,nelemd) ) allocate( derived_vn0 (np,np,2,nlev,nelemd) ) allocate( derived_divdp (np,np,nlev,nelemd) ) allocate( derived_divdp_proj (np,np,nlev,nelemd) ) diff --git a/components/homme/src/share/prim_advection_mod.F90 b/components/homme/src/share/prim_advection_mod_base.F90 similarity index 73% rename from components/homme/src/share/prim_advection_mod.F90 rename to components/homme/src/share/prim_advection_mod_base.F90 index 11369232d72..81620cd9784 100644 --- a/components/homme/src/share/prim_advection_mod.F90 +++ b/components/homme/src/share/prim_advection_mod_base.F90 @@ -4,827 +4,39 @@ #define NEWEULER_B4B 1 #define OVERLAP 1 - module EXTRAE_MODULE - - interface - - subroutine extrae_user_function (enter) - integer*4, intent(in) :: enter - end subroutine extrae_user_function - - end interface - - end module EXTRAE_MODULE - -#if 0 -SUBROUTINES: - prim_advec_tracers_remap_rk2() - SEM 2D RK2 + monotone remap + hyper viscosity - SEM 2D RK2 can use sign-preserving or monotone reconstruction - -Notes on Lagrange+REMAP advection -dynamics will compute mean fluxes, so that (i.e. for qsplit=3) - - dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3) + 3dt D(dpdiss_sum/3) - -Where the floating lagrangian component: - dp_star(t+3) = dp(t) -3dt div(Udp_sum/3) + 3dt D(dpdiss_sum/3) -OR: - dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) ) - - -For RK2 advection of Q: (example of 2 stage RK for tracers): dtq = qsplit*dt -For consistency, if Q=1 - dp1 = dp(t)- dtq div[ U1 dp(t)] - dp2 = dp1 - dtq div[ U2 dp1 ] + 2*dtq D( dpdiss_ave ) - dp* = (dp(t) + dp2 )/2 - = dp(t) - dtq div[ U1 dp(t) + U2 dp1 ]/2 + dtq D( dpdiss_ave ) - -so we require: - U1 = Udp_ave / dp(t) - U2 = Udp_ave / dp1 - -For tracer advection: - Qdp1 = Qdp(t)- dtq div[ U1 Qdp(t)] - Qdp2 = Qdp1 - dtq div[ U2 Qdp1 ] + 2*dtq D( Q dpdiss_ave ) - Qdp* = (Qdp(t) + Qdp2 )/2 - = Qdp(t) - dtq div[ U1 Qdp(t) + U2 Qdp1 ] + dtq D( Q dpdiss_ave ) - -Qdp1: limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS) with dp1 as computed above -Qdp2: limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS) with dp2 as computed above - -For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS - - -last step: - remap Qdp* to Qdp(t+1) [ dp_star(t+1) -> dp(t+1) ] - -#endif - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -!! Begin GPU remap module !! -!! by Rick Archibald, 2010 !! -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -module vertremap_mod - - !************************************************************************************** - ! - ! Purpose: - ! Construct sub-grid-scale polynomials using piecewise spline method with - ! monotone filters. - ! - ! References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR) - ! PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF) - ! - !************************************************************************************** - - use kinds, only : real_kind,int_kind - use dimensions_mod, only : np,nlev,qsize,nlevp,npsq,ntrac,nc - use hybvcoord_mod, only : hvcoord_t - use element_mod, only : element_t - use fvm_control_volume_mod, only : fvm_struct - use spelt_mod, only : spelt_struct - use perf_mod, only : t_startf, t_stopf ! _EXTERNAL - use parallel_mod, only : abortmp, parallel_t - use control_mod, only : vert_remap_q_alg - - public remap1 ! remap any field, splines, monotone - public remap1_nofilter ! remap any field, splines, no filter -! todo: tweak interface to match remap1 above, rename remap1_ppm: - public remap_q_ppm ! remap state%Q, PPM, monotone - - contains - -!=======================================================================================================! - -!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid -!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task. -subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref ) - implicit none - type(hvcoord_t) , intent(in ) :: hvcoord !Derived type to hold vertical sigma grid parameters - real(kind=real_kind) , intent(in ) :: ps !Surface pressure for this column - real(kind=real_kind) , intent(in ) :: dt !Time step - real(kind=real_kind) , intent(in ) :: eta_dot_dpdn(nlev+1) !Looks like a vertical pressure flux - !to compute deformed grid spacing - real(kind=real_kind) , intent( out) :: p_lag(nlev+1) !Pressures at interfaces of the Lagrangian deformed grid - real(kind=real_kind) , intent( out) :: p_ref(nlev+1) !Pressures at interfaces of the reference grid - real(kind=real_kind) , intent( out) :: dp_lag(nlev) !Pressure differences on Lagrangian deformed grid - real(kind=real_kind) , intent( out) :: dp_ref(nlev) !Pressure differences on reference grid - integer :: k !Iterator - p_ref(1) = 0 !Both grids have a model top pressure of zero - p_lag(1) = 0 !Both grids have a model top pressure of zero - do k = 1 , nlev - dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + & - ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps !Reference pressure difference - ! Lagrangian pressure difference (flux in - flux out over the time step) - dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) ) - p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell - p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell - enddo -end subroutine remap_calc_grids - -!=======================================================================================================! - - - -subroutine remap1(Qdp,nx,qsize,dp1,dp2) - ! remap 1 field - ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) - ! dp1 layer thickness (source) - ! dp2 layer thickness (target) - ! - ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp - ! - implicit none - integer, intent(in) :: nx,qsize - real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) - real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) - ! ======================== - ! Local Variables - ! ======================== - - real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv - real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 - real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & - peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & - zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 - integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & - lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q - logical :: abort=.false. - - if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then - call remap_Q_ppm(qdp,nx,qsize,dp1,dp2) - return - endif - - call t_startf('remap_Q_noppm') -#if (defined COLUMN_OPENMP) -!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & -!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & -!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & -!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) -#endif - do q=1,qsize - do i=1,nx - do j=1,nx - - z1c(1)=0 ! source grid - z2c(1)=0 ! target grid - do k=1,nlev - z1c(k+1)=z1c(k)+dp1(i,j,k) - z2c(k+1)=z2c(k)+dp2(i,j,k) - enddo - - zv(1)=0 - do k=1,nlev - Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass - zv(k+1) = zv(k)+Qcol(k) - enddo - - if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then - write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' - write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' - write(6,*) 'DATA FOR MODEL LEVELS' - write(6,*) 'PLEVMODEL=',z2c(nlev+1) - write(6,*) 'PLEV =',z1c(nlev+1) - write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) - abort=.true. - endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! quadratic splies with UK met office monotonicity constraints !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zkr = 99 - ilev = 2 - zkr(1) = 1 - zkr(nlev+1) = nlev - kloop: do k = 2,nlev - do jk = ilev,nlev+1 - if (z1c(jk).ge.z2c(k)) then - ilev = jk - zkr(k) = jk-1 - cycle kloop - endif - enddo - enddo kloop - - zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) - zgam(1) = 0.0 - zgam(nlev+1) = 1.0 - zhdp = z1c(2:nlev+1)-z1c(1:nlev) - - - h = 1/zhdp - zarg = Qcol * h - rhs = 0 - lower_diag = 0 - diag = 0 - upper_diag = 0 - - rhs(1)=3*zarg(1) - rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) - rhs(nlev+1)=3*zarg(nlev) - - lower_diag(1)=1 - lower_diag(2:nlev) = h(1:nlev-1) - lower_diag(nlev+1)=1 - - diag(1)=2 - diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) - diag(nlev+1)=2 - - upper_diag(1)=1 - upper_diag(2:nlev) = h(2:nlev) - upper_diag(nlev+1)=0 - - q_diag(1)=-upper_diag(1)/diag(1) - rhs(1)= rhs(1)/diag(1) - - do k=2,nlev+1 - tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) - q_diag(k) = -upper_diag(k)*tmp_cal - rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal - enddo - do k=nlev,1,-1 - rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) - enddo - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! monotonicity modifications !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - filter_code = 0 - dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1) - dy(nlev) = dy(nlev-1) - - dy = merge(zero, dy, abs(dy) < tiny ) - - do k=1,nlev - im1=MAX(1,k-1) - im2=MAX(1,k-2) - im3=MAX(1,k-3) - ip1=MIN(nlev,k+1) - t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0) - t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 & - .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 ) - t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k))) - - filter_code(k) = merge(0,1,t1+t2 > 0) - rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1)) - filter_code(im1) = MAX(filter_code(im1),filter_code(k)) - enddo - - rhs = merge(qmax,rhs,rhs > qmax) - rhs = merge(zero,rhs,rhs < zero) - - za0 = rhs(1:nlev) - za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg - za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg - - dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev) - dy = merge(zero, dy, abs(dy) < tiny ) - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! Compute the 3 quadratic spline coeffients {za0, za1, za2} !! - !! knowing the quadratic spline parameters {rho_left,rho_right,zarg} !! - !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002). !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - h = rhs(2:nlev+1) - - do k=1,nlev - xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny) - xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny) - f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2 - - t1 = merge(1,0,ABS(za2(k)) > tiny) - t2 = merge(1,0,xm <= zero .OR. xm >= 1) - t3 = merge(1,0,za2(k) > zero) - t4 = merge(1,0,za2(k) < zero) - tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2) - tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3) - - peaks=0 - peaks = merge(-1,peaks,tm .EQ. 1) - peaks = merge(+1,peaks,tp .EQ. 1) - peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1) - peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1) - - im1=MAX(1,k-1) - im2=MAX(1,k-2) - ip1=MIN(nlev,k+1) - ip2=MIN(nlev,k+2) - - t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. & - (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. & - (dy(im1)*float(peaks) <= tiny)) - - filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. & - (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny)) - - if (filter_code(k) > 0) then - level1 = rhs(k) - level2 = (2*rhs(k)+h(k))/3 - level3 = 0.5*(rhs(k)+h(k)) - level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k) - level5 = h(k) - - t1 = merge(1,0,h(k) >= rhs(k)) - t2 = merge(1,0,zarg(k) <= level1 .OR. zarg(k) >= level5) - t3 = merge(1,0,zarg(k) > level1 .AND. zarg(k) < level2) - t4 = merge(1,0,zarg(k) > level4 .AND. zarg(k) < level5) - - lt1 = t1*t2 - lt2 = t1*(1-t2+t3) - lt3 = t1*(1-t2+1-t3+t4) - - za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) - za1(k) = merge(zero,za1(k),lt1 .EQ. 1) - za2(k) = merge(zero,za2(k),lt1 .EQ. 1) - - za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) - za1(k) = merge(zero,za1(k),lt2 .EQ. 2) - za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) - - za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) - za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) - za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) - - t2 = merge(1,0,zarg(k) >= level1 .OR. zarg(k) <= level5) - t3 = merge(1,0,zarg(k) < level1 .AND. zarg(k) > level2) - t4 = merge(1,0,zarg(k) < level4 .AND. zarg(k) > level5) - - lt1 = (1-t1)*t2 - lt2 = (1-t1)*(1-t2+t3) - lt3 = (1-t1)*(1-t2+1-t3+t4) - - za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) - za1(k) = merge(zero,za1(k),lt1 .EQ. 1) - za2(k) = merge(zero,za2(k),lt1 .EQ. 1) - - za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) - za1(k) = merge(zero,za1(k),lt2 .EQ. 2) - za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) - - za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) - za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) - za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) - endif - enddo - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! start iteration from top to bottom of atmosphere !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zv1 = 0 - do k=1,nlev - if (zgam(k+1)>1d0) then - WRITE(*,*) 'r not in [0:1]', zgam(k+1) - abort=.true. - endif - zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & - (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) - Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio - zv1 = zv2 - enddo - enddo - enddo - enddo ! q loop - if (abort) call abortmp('Bad levels in remap1. usually CFL violatioin') - call t_stopf('remap_Q_noppm') - -end subroutine remap1 - -subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2) - ! remap 1 field - ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) - ! dp1 layer thickness (source) - ! dp2 layer thickness (target) - ! - ! output: remaped Qdp, conserving mass - ! - implicit none - integer, intent(in) :: nx,qsize - real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) - real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) - ! ======================== - ! Local Variables - ! ======================== - - real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv - real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 - real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & - peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & - zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 - integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & - lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q - logical :: abort=.false. -! call t_startf('remap1_nofilter') - -#if (defined COLUMN_OPENMP) -!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & -!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & -!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & -!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) -#endif - do q=1,qsize - do i=1,nx - do j=1,nx - - z1c(1)=0 ! source grid - z2c(1)=0 ! target grid - do k=1,nlev - z1c(k+1)=z1c(k)+dp1(i,j,k) - z2c(k+1)=z2c(k)+dp2(i,j,k) - enddo - - zv(1)=0 - do k=1,nlev - Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass - zv(k+1) = zv(k)+Qcol(k) - enddo - - if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then - write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' - write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' - write(6,*) 'DATA FOR MODEL LEVELS' - write(6,*) 'PLEVMODEL=',z2c(nlev+1) - write(6,*) 'PLEV =',z1c(nlev+1) - write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) - abort=.true. - endif - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! quadratic splies with UK met office monotonicity constraints !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zkr = 99 - ilev = 2 - zkr(1) = 1 - zkr(nlev+1) = nlev - kloop: do k = 2,nlev - do jk = ilev,nlev+1 - if (z1c(jk).ge.z2c(k)) then - ilev = jk - zkr(k) = jk-1 - cycle kloop - endif - enddo - enddo kloop - - zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) - zgam(1) = 0.0 - zgam(nlev+1) = 1.0 - zhdp = z1c(2:nlev+1)-z1c(1:nlev) - - - h = 1/zhdp - zarg = Qcol * h - rhs = 0 - lower_diag = 0 - diag = 0 - upper_diag = 0 - - rhs(1)=3*zarg(1) - rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) - rhs(nlev+1)=3*zarg(nlev) - - lower_diag(1)=1 - lower_diag(2:nlev) = h(1:nlev-1) - lower_diag(nlev+1)=1 - - diag(1)=2 - diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) - diag(nlev+1)=2 - - upper_diag(1)=1 - upper_diag(2:nlev) = h(2:nlev) - upper_diag(nlev+1)=0 - - q_diag(1)=-upper_diag(1)/diag(1) - rhs(1)= rhs(1)/diag(1) - - do k=2,nlev+1 - tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) - q_diag(k) = -upper_diag(k)*tmp_cal - rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal - enddo - do k=nlev,1,-1 - rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) - enddo - - za0 = rhs(1:nlev) - za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg - za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg - - - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !! start iteration from top to bottom of atmosphere !! - !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - zv1 = 0 - do k=1,nlev - if (zgam(k+1)>1d0) then - WRITE(*,*) 'r not in [0:1]', zgam(k+1) - abort=.true. - endif - zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & - (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) - Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio - zv1 = zv2 - enddo - enddo - enddo - enddo ! q loop - if (abort) call abortmp('Bad levels in remap1_nofilter. usually CFL violatioin') -! call t_stopf('remap1_nofilter') -end subroutine remap1_nofilter - -!=======================================================================================================! - - -!This uses the exact same model and reference grids and data as remap_Q, but it interpolates -!using PPM instead of splines. -subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2) - ! remap 1 field - ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) - ! dp1 layer thickness (source) - ! dp2 layer thickness (target) - ! - ! output: remaped Qdp, conserving mass - ! - use control_mod, only : vert_remap_q_alg - implicit none - integer,intent(in) :: nx,qsize - real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) - real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) - ! Local Variables - integer, parameter :: gs = 2 !Number of cells to place in the ghost region - real(kind=real_kind), dimension( nlev+2 ) :: pio !Pressure at interfaces for old grid - real(kind=real_kind), dimension( nlev+1 ) :: pin !Pressure at interfaces for new grid - real(kind=real_kind), dimension( nlev+1 ) :: masso !Accumulate mass up to each interface - real(kind=real_kind), dimension( 1-gs:nlev+gs) :: ao !Tracer value on old grid - real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpo !change in pressure over a cell for old grid - real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpn !change in pressure over a cell for old grid - real(kind=real_kind), dimension(3, nlev ) :: coefs !PPM coefficients within each cell - real(kind=real_kind), dimension( nlev ) :: z1, z2 - real(kind=real_kind) :: ppmdx(10,0:nlev+1) !grid spacings - real(kind=real_kind) :: mymass, massn1, massn2 - integer :: i, j, k, q, kk, kid(nlev) - - call t_startf('remap_Q_ppm') - do j = 1 , nx - do i = 1 , nx - - pin(1)=0 - pio(1)=0 - do k=1,nlev - dpn(k)=dp2(i,j,k) - dpo(k)=dp1(i,j,k) - pin(k+1)=pin(k)+dpn(k) - pio(k+1)=pio(k)+dpo(k) - enddo - - - - pio(nlev+2) = pio(nlev+1) + 1. !This is here to allow an entire block of k threads to run in the remapping phase. - !It makes sure there's an old interface value below the domain that is larger. - pin(nlev+1) = pio(nlev+1) !The total mass in a column does not change. - !Therefore, the pressure of that mass cannot either. - !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence. - do k = 1 , gs - dpo(1 -k) = dpo( k) - dpo(nlev+k) = dpo(nlev+1-k) - enddo - - !Compute remapping intervals once for all tracers. Find the old grid cell index in which the - !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new - !interface location. In practice, the grid never deforms past one cell, so the search can be - !simplified by this. Also, the interval of integration is usually of magnitude close to zero - !or close to dpo because of minimial deformation. - !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so - !I set them equal to each other. - do k = 1 , nlev - kk = k !Keep from an order n^2 search operation by assuming the old cell index is close. - !Find the index of the old grid cell in which this new cell's bottom interface resides. - do while ( pio(kk) <= pin(k+1) ) - kk = kk + 1 - enddo - kk = kk - 1 !kk is now the cell index we're integrating over. - if (kk == nlev+1) kk = nlev !This is to keep the indices in bounds. - !Top bounds match anyway, so doesn't matter what coefficients are used - kid(k) = kk !Save for reuse - z1(k) = -0.5D0 !This remapping assumes we're starting from the left interface of an old grid cell - !In fact, we're usually integrating very little or almost all of the cell in question - z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk) !PPM interpolants are normalized to an independent - !coordinate domain [-0.5,0.5]. - enddo - - !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the - !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop. - ppmdx(:,:) = compute_ppm_grids( dpo ) - - !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and - !mass accumulation - do q = 1 , qsize - !Accumulate the old mass up to old grid cell interface locations to simplify integration - !during remapping. Also, divide out the grid spacing so we're working with actual tracer - !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure - !tracer consistency for an initially uniform field. I copied it from the old remap routine. - masso(1) = 0. - do k = 1 , nlev - ao(k) = Qdp(i,j,k,q) - masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping - ao(k) = ao(k) / dpo(k) !Divide out the old grid spacing because we want the tracer mixing ratio, not mass. - enddo - !Fill in ghost values. Ignored if vert_remap_q_alg == 2 - do k = 1 , gs - ao(1 -k) = ao( k) - ao(nlev+k) = ao(nlev+1-k) - enddo - !Compute monotonic and conservative PPM reconstruction over every cell - coefs(:,:) = compute_ppm( ao , ppmdx ) - !Compute tracer values on the new grid by integrating from the old cell bottom to the new - !cell interface to form a new grid mass accumulation. Taking the difference between - !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is - !supposed to hold the full mass this needs no normalization. - massn1 = 0. - do k = 1 , nlev - kk = kid(k) - massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk) - Qdp(i,j,k,q) = massn2 - massn1 - massn1 = massn2 - enddo - enddo - enddo - enddo - call t_stopf('remap_Q_ppm') -end subroutine remap_Q_ppm - - -!=======================================================================================================! - - -!THis compute grid-based coefficients from Collela & Woodward 1984. -function compute_ppm_grids( dx ) result(rslt) - use control_mod, only: vert_remap_q_alg - implicit none - real(kind=real_kind), intent(in) :: dx(-1:nlev+2) !grid spacings - real(kind=real_kind) :: rslt(10,0:nlev+1) !grid spacings - integer :: j - integer :: indB, indE - - !Calculate grid-based coefficients for stage 1 of compute_ppm - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-1 - else - indB = 0 - indE = nlev+1 - endif - do j = indB , indE - rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) ) - rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) ) - rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) ) - enddo - - !Caculate grid-based coefficients for stage 2 of compute_ppm - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-2 - else - indB = 0 - indE = nlev - endif - do j = indB , indE - rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) ) - rslt( 5,j) = 1. / sum( dx(j-1:j+2) ) - rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) ) - rslt( 7,j) = ( dx(j-1) + dx(j ) ) / ( 2. * dx(j ) + dx(j+1) ) - rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j ) ) - rslt( 9,j) = dx(j ) * ( dx(j-1) + dx(j ) ) / ( 2.*dx(j ) + dx(j+1) ) - rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / ( dx(j ) + 2.*dx(j+1) ) - enddo -end function compute_ppm_grids - -!=======================================================================================================! - - - -!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages -function compute_ppm( a , dx ) result(coefs) - use control_mod, only: vert_remap_q_alg - implicit none - real(kind=real_kind), intent(in) :: a ( -1:nlev+2) !Cell-mean values - real(kind=real_kind), intent(in) :: dx (10, 0:nlev+1) !grid spacings - real(kind=real_kind) :: coefs(0:2, nlev ) !PPM coefficients (for parabola) - real(kind=real_kind) :: ai (0:nlev ) !fourth-order accurate, then limited interface values - real(kind=real_kind) :: dma(0:nlev+1) !An expression from Collela's '84 publication - real(kind=real_kind) :: da !Ditto - ! Hold expressions based on the grid (which are cumbersome). - real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10 - real(kind=real_kind) :: al, ar !Left and right interface values for cell-local limiting - integer :: j - integer :: indB, indE - - ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-1 - else - indB = 0 - indE = nlev+1 - endif - do j = indB , indE - da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) ) - dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da) - if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0. - enddo - - ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1) - if (vert_remap_q_alg == 2) then - indB = 2 - indE = nlev-2 - else - indB = 0 - indE = nlev - endif - do j = indB , indE - ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) & - * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) ) - enddo - - ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain - ! (dimension nlev) using ai on either side and ao within the cell. - if (vert_remap_q_alg == 2) then - indB = 3 - indE = nlev-2 - else - indB = 1 - indE = nlev - endif - do j = indB , indE - al = ai(j-1) - ar = ai(j ) - if ( (ar - a(j)) * (a(j) - al) <= 0. ) then - al = a(j) - ar = a(j) - endif - if ( (ar - al) * (a(j) - (al + ar)/2.) > (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar - if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al - !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx - coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4. - coefs(1,j) = ar - al - coefs(2,j) = -6. * a(j) + 3. * ( al + ar ) - enddo - - !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom - !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth - !moment to the cell mean is sufficient to maintain conservation. - if (vert_remap_q_alg == 2) then - coefs(0,1:2) = a(1:2) - coefs(1:2,1:2) = 0. - coefs(0,nlev-1:nlev) = a(nlev-1:nlev) - coefs(1:2,nlev-1:nlev) = 0.D0 - endif -end function compute_ppm - -!=======================================================================================================! - - -!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx, -!given two bounds. Make sure this gets inlined during compilation. -function integrate_parabola( a , x1 , x2 ) result(mass) - implicit none - real(kind=real_kind), intent(in) :: a(0:2) !Coefficients of the parabola - real(kind=real_kind), intent(in) :: x1 !lower domain bound for integration - real(kind=real_kind), intent(in) :: x2 !upper domain bound for integration - real(kind=real_kind) :: mass - mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1 -end function integrate_parabola - - -!=============================================================================================! - - - -end module vertremap_mod - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -!! End GPU remap module !! -!! by Rick Archibald, 2010 !! -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - -!=======================================================================================================! - - +!SUBROUTINES: +! prim_advec_tracers_remap_rk2() +! SEM 2D RK2 + monotone remap + hyper viscosity +! SEM 2D RK2 can use sign-preserving or monotone reconstruction +! +!For RK2 advection of Q: (example of 2 stage RK for tracers): dtq = qsplit*dt +!For consistency, if Q=1 +! dp1 = dp(t)- dtq div[ U1 dp(t)] +! dp2 = dp1 - dtq div[ U2 dp1 ] + 2*dtq D( dpdiss_ave ) +! dp* = (dp(t) + dp2 )/2 +! = dp(t) - dtq div[ U1 dp(t) + U2 dp1 ]/2 + dtq D( dpdiss_ave ) +! +!so we require: +! U1 = Udp_ave / dp(t) +! U2 = Udp_ave / dp1 +! +!For tracer advection: +! Qdp1 = Qdp(t)- dtq div[ U1 Qdp(t)] +! Qdp2 = Qdp1 - dtq div[ U2 Qdp1 ] + 2*dtq D( Q dpdiss_ave ) +! Qdp* = (Qdp(t) + Qdp2 )/2 +! = Qdp(t) - dtq div[ U1 Qdp(t) + U2 Qdp1 ] + dtq D( Q dpdiss_ave ) +! +!Qdp1: limit Q, with Q = Qdp1-before-DSS/(dp1-before-DSS) with dp1 as computed above +!Qdp2: limit Q, with Q = Qdp2-before-DSS/(dp2-before-DSS) with dp2 as computed above +! +!For dissipation: Q = Qdp1-after-DSS / dp1-after-DSS +! +! +!last step: +! remap Qdp* to Qdp(t+1) [ dp_star(t+1) -> dp(t+1) ] -module prim_advection_mod +module prim_advection_mod_base ! ! two formulations. both are conservative ! u grad Q formulation: @@ -3068,4 +2280,4 @@ subroutine vertical_remap(hybrid,elem,fvm,hvcoord,dt,np1,np1_qdp,np1_fvm,nets,ne call t_stopf('vertical_remap') end subroutine vertical_remap -end module prim_advection_mod +end module prim_advection_mod_base diff --git a/components/homme/src/share/prim_driver_mod.F90 b/components/homme/src/share/prim_driver_mod.F90 index a4e8b23fa48..9c54f0454dc 100644 --- a/components/homme/src/share/prim_driver_mod.F90 +++ b/components/homme/src/share/prim_driver_mod.F90 @@ -93,7 +93,7 @@ subroutine prim_init1(elem, fvm, par, dom_mt, Tl) ! -------------------------------- use schedule_mod, only : genEdgeSched, PrintSchedule ! -------------------------------- - use arch_switch_mod, only: prim_advec_init1 + use prim_advection_mod, only: prim_advec_init1 ! -------------------------------- use prim_advance_mod, only: prim_advance_init ! -------------------------------- @@ -608,7 +608,8 @@ subroutine prim_init2(elem, fvm, hybrid, nets, nete, tl, hvcoord) use derivative_mod, only : derivinit, interpolate_gll2fvm_points, interpolate_gll2spelt_points, v2pinit use global_norms_mod, only : test_global_integral, print_cfl use hybvcoord_mod, only : hvcoord_t - use arch_switch_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv, arch_init2 + use prim_advection_mod, only: prim_advec_init2, prim_advec_init_deriv, deriv + use solver_init_mod, only: solver_init2 #ifdef CAM #else use column_model_mod, only : InitColumnModel @@ -1101,7 +1102,7 @@ end subroutine noxinit if (hybrid%masterthread) write(iulog,*) "initial state:" call prim_printstate(elem, tl, hybrid,hvcoord,nets,nete, fvm) - call arch_init2(elem(:), deriv(hybrid%ithr)) + call solver_init2(elem(:), deriv(hybrid%ithr)) call Prim_Advec_Init2(elem(:), hvcoord, hybrid) end subroutine prim_init2 @@ -1161,7 +1162,7 @@ subroutine prim_run(elem, hybrid,nets,nete, dt, tl, hvcoord, advance_name) use control_mod, only: statefreq, integration, ftype, qsplit, disable_diagnostics use prim_advance_mod, only : prim_advance_exp, prim_advance_si, preq_robert3 use prim_state_mod, only : prim_printstate, prim_diag_scalars, prim_energy_halftimes - use arch_switch_mod, only: deriv + use prim_advection_mod, only: deriv use parallel_mod, only : abortmp #ifndef CAM use column_model_mod, only : ApplyColumnModel @@ -1617,7 +1618,7 @@ subroutine prim_step(elem, fvm, hybrid,nets,nete, dt, tl, hvcoord, compute_diagn use fvm_bsp_mod, only : get_boomerang_velocities_gll, get_solidbody_velocities_gll use prim_advance_mod, only : prim_advance_exp, overwrite_SEdensity use prim_advection_mod, only : prim_advec_tracers_fvm - use arch_switch_mod, only : prim_advec_tracers_remap, deriv + use prim_advection_mod, only : prim_advec_tracers_remap, deriv use derivative_mod, only : subcell_integration #if defined(_SPELT) use prim_advection_mod, only : prim_advec_tracers_spelt @@ -2026,7 +2027,7 @@ subroutine smooth_topo_datasets(phis,sghdyn,sgh30dyn,elem,hybrid,nets,nete) use derivative_mod, only : derivative_t , laplace_sphere_wk use viscosity_mod, only : biharmonic_wk use prim_advance_mod, only : smooth_phis - use arch_switch_mod, only: deriv + use prim_advection_mod, only: deriv implicit none integer , intent(in) :: nets,nete diff --git a/components/homme/src/share/solver_init_mod_base.F90 b/components/homme/src/share/solver_init_mod_base.F90 new file mode 100644 index 00000000000..774d103a9c3 --- /dev/null +++ b/components/homme/src/share/solver_init_mod_base.F90 @@ -0,0 +1,25 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod_base + implicit none + private + + public :: solver_init2 + + +contains + + + subroutine solver_init2( elem , deriv ) + use element_mod, only: element_t + use derivative_mod, only: derivative_t + implicit none + type(element_t) , intent(in) :: elem(:) + type(derivative_t), intent(in) :: deriv + !do nothing + end subroutine solver_init2 + + +end module solver_init_mod_base diff --git a/components/homme/src/share/vertremap_mod_base.F90 b/components/homme/src/share/vertremap_mod_base.F90 new file mode 100644 index 00000000000..c915acf7350 --- /dev/null +++ b/components/homme/src/share/vertremap_mod_base.F90 @@ -0,0 +1,767 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +!SUBROUTINES: +! +!Notes on Lagrange+REMAP advection +!dynamics will compute mean fluxes, so that (i.e. for qsplit=3) +! +! dp(t+3)-dp(t) = -3dt div(Udp_sum/3) - 3dt d(eta_dot_dpdn_sum/3) + 3dt D(dpdiss_sum/3) +! +!Where the floating lagrangian component: +! dp_star(t+3) = dp(t) -3dt div(Udp_sum/3) + 3dt D(dpdiss_sum/3) +!OR: +! dp_star(t+3) = dp(t+1) + 3dt d( eta_dot_dpdn_ave(t) ) + + +module vertremap_mod_base + + !************************************************************************************** + ! + ! Purpose: + ! Construct sub-grid-scale polynomials using piecewise spline method with + ! monotone filters. + ! + ! References: PCM - Zerroukat et al., Q.J.R. Meteorol. Soc., 2005. (ZWS2005QJR) + ! PSM - Zerroukat et al., Int. J. Numer. Meth. Fluids, 2005. (ZWS2005IJMF) + ! + !************************************************************************************** + + use kinds, only : real_kind,int_kind + use dimensions_mod, only : np,nlev,qsize,nlevp,npsq,ntrac,nc + use hybvcoord_mod, only : hvcoord_t + use element_mod, only : element_t + use fvm_control_volume_mod, only : fvm_struct + use spelt_mod, only : spelt_struct + use perf_mod, only : t_startf, t_stopf ! _EXTERNAL + use parallel_mod, only : abortmp, parallel_t + use control_mod, only : vert_remap_q_alg + + public remap1 ! remap any field, splines, monotone + public remap1_nofilter ! remap any field, splines, no filter +! todo: tweak interface to match remap1 above, rename remap1_ppm: + public remap_q_ppm ! remap state%Q, PPM, monotone + + contains + +!=======================================================================================================! + +!remap_calc_grids computes the vertical pressures and pressure differences for one vertical column for the reference grid +!and for the deformed Lagrangian grid. This was pulled out of each routine since it was a repeated task. +subroutine remap_calc_grids( hvcoord , ps , dt , eta_dot_dpdn , p_lag , p_ref , dp_lag , dp_ref ) + implicit none + type(hvcoord_t) , intent(in ) :: hvcoord !Derived type to hold vertical sigma grid parameters + real(kind=real_kind) , intent(in ) :: ps !Surface pressure for this column + real(kind=real_kind) , intent(in ) :: dt !Time step + real(kind=real_kind) , intent(in ) :: eta_dot_dpdn(nlev+1) !Looks like a vertical pressure flux + !to compute deformed grid spacing + real(kind=real_kind) , intent( out) :: p_lag(nlev+1) !Pressures at interfaces of the Lagrangian deformed grid + real(kind=real_kind) , intent( out) :: p_ref(nlev+1) !Pressures at interfaces of the reference grid + real(kind=real_kind) , intent( out) :: dp_lag(nlev) !Pressure differences on Lagrangian deformed grid + real(kind=real_kind) , intent( out) :: dp_ref(nlev) !Pressure differences on reference grid + integer :: k !Iterator + p_ref(1) = 0 !Both grids have a model top pressure of zero + p_lag(1) = 0 !Both grids have a model top pressure of zero + do k = 1 , nlev + dp_ref(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + & + ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * ps !Reference pressure difference + ! Lagrangian pressure difference (flux in - flux out over the time step) + dp_lag(k) = dp_ref(k) + dt * ( eta_dot_dpdn(k+1) - eta_dot_dpdn(k) ) + p_ref(k+1) = p_ref(k) + dp_ref(k) !Pressure at interfaces accumulated using difference over each cell + p_lag(k+1) = p_lag(k) + dp_lag(k) !Pressure at interfaces accumulated using difference over each cell + enddo +end subroutine remap_calc_grids + +!=======================================================================================================! + + + +subroutine remap1(Qdp,nx,qsize,dp1,dp2) + ! remap 1 field + ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) + ! dp1 layer thickness (source) + ! dp2 layer thickness (target) + ! + ! output: remaped Qdp, conserving mass, monotone on Q=Qdp/dp + ! + implicit none + integer, intent(in) :: nx,qsize + real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) + real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) + ! ======================== + ! Local Variables + ! ======================== + + real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv + real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 + real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & + peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & + zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 + integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & + lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q + logical :: abort=.false. + + if (vert_remap_q_alg == 1 .or. vert_remap_q_alg == 2) then + call remap_Q_ppm(qdp,nx,qsize,dp1,dp2) + return + endif + + call t_startf('remap_Q_noppm') +#if (defined COLUMN_OPENMP) +!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & +!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & +!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & +!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) +#endif + do q=1,qsize + do i=1,nx + do j=1,nx + + z1c(1)=0 ! source grid + z2c(1)=0 ! target grid + do k=1,nlev + z1c(k+1)=z1c(k)+dp1(i,j,k) + z2c(k+1)=z2c(k)+dp2(i,j,k) + enddo + + zv(1)=0 + do k=1,nlev + Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass + zv(k+1) = zv(k)+Qcol(k) + enddo + + if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then + write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' + write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' + write(6,*) 'DATA FOR MODEL LEVELS' + write(6,*) 'PLEVMODEL=',z2c(nlev+1) + write(6,*) 'PLEV =',z1c(nlev+1) + write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) + abort=.true. + endif + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! quadratic splies with UK met office monotonicity constraints !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zkr = 99 + ilev = 2 + zkr(1) = 1 + zkr(nlev+1) = nlev + kloop: do k = 2,nlev + do jk = ilev,nlev+1 + if (z1c(jk).ge.z2c(k)) then + ilev = jk + zkr(k) = jk-1 + cycle kloop + endif + enddo + enddo kloop + + zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) + zgam(1) = 0.0 + zgam(nlev+1) = 1.0 + zhdp = z1c(2:nlev+1)-z1c(1:nlev) + + + h = 1/zhdp + zarg = Qcol * h + rhs = 0 + lower_diag = 0 + diag = 0 + upper_diag = 0 + + rhs(1)=3*zarg(1) + rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) + rhs(nlev+1)=3*zarg(nlev) + + lower_diag(1)=1 + lower_diag(2:nlev) = h(1:nlev-1) + lower_diag(nlev+1)=1 + + diag(1)=2 + diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) + diag(nlev+1)=2 + + upper_diag(1)=1 + upper_diag(2:nlev) = h(2:nlev) + upper_diag(nlev+1)=0 + + q_diag(1)=-upper_diag(1)/diag(1) + rhs(1)= rhs(1)/diag(1) + + do k=2,nlev+1 + tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) + q_diag(k) = -upper_diag(k)*tmp_cal + rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal + enddo + do k=nlev,1,-1 + rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) + enddo + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! monotonicity modifications !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + filter_code = 0 + dy(1:nlev-1) = zarg(2:nlev)-zarg(1:nlev-1) + dy(nlev) = dy(nlev-1) + + dy = merge(zero, dy, abs(dy) < tiny ) + + do k=1,nlev + im1=MAX(1,k-1) + im2=MAX(1,k-2) + im3=MAX(1,k-3) + ip1=MIN(nlev,k+1) + t1 = merge(1,0,(zarg(k)-rhs(k))*(rhs(k)-zarg(im1)) >= 0) + t2 = merge(1,0,dy(im2)*(rhs(k)-zarg(im1)) > 0 .AND. dy(im2)*dy(im3) > 0 & + .AND. dy(k)*dy(ip1) > 0 .AND. dy(im2)*dy(k) < 0 ) + t3 = merge(1,0,ABS(rhs(k)-zarg(im1)) > ABS(rhs(k)-zarg(k))) + + filter_code(k) = merge(0,1,t1+t2 > 0) + rhs(k) = (1-filter_code(k))*rhs(k)+filter_code(k)*(t3*zarg(k)+(1-t3)*zarg(im1)) + filter_code(im1) = MAX(filter_code(im1),filter_code(k)) + enddo + + rhs = merge(qmax,rhs,rhs > qmax) + rhs = merge(zero,rhs,rhs < zero) + + za0 = rhs(1:nlev) + za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg + za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg + + dy(1:nlev) = rhs(2:nlev+1)-rhs(1:nlev) + dy = merge(zero, dy, abs(dy) < tiny ) + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! Compute the 3 quadratic spline coeffients {za0, za1, za2} !! + !! knowing the quadratic spline parameters {rho_left,rho_right,zarg} !! + !! Zerroukat et.al., Q.J.R. Meteorol. Soc., Vol. 128, pp. 2801-2820 (2002). !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + + h = rhs(2:nlev+1) + + do k=1,nlev + xm_d = merge(one,2*za2(k),abs(za2(k)) < tiny) + xm = merge(zero,-za1(k)/xm_d, abs(za2(k)) < tiny) + f_xm = za0(k) + za1(k)*xm + za2(k)*xm**2 + + t1 = merge(1,0,ABS(za2(k)) > tiny) + t2 = merge(1,0,xm <= zero .OR. xm >= 1) + t3 = merge(1,0,za2(k) > zero) + t4 = merge(1,0,za2(k) < zero) + tm = merge(1,0,t1*((1-t2)+t3) .EQ. 2) + tp = merge(1,0,t1*((1-t2)+(1-t3)+t4) .EQ. 3) + + peaks=0 + peaks = merge(-1,peaks,tm .EQ. 1) + peaks = merge(+1,peaks,tp .EQ. 1) + peaks_min = merge(f_xm,MIN(za0(k),za0(k)+za1(k)+za2(k)),tm .EQ. 1) + peaks_max = merge(f_xm,MAX(za0(k),za0(k)+za1(k)+za2(k)),tp .EQ. 1) + + im1=MAX(1,k-1) + im2=MAX(1,k-2) + ip1=MIN(nlev,k+1) + ip2=MIN(nlev,k+2) + + t1 = merge(abs(peaks),0,(dy(im2)*dy(im1) <= tiny) .OR. & + (dy(ip1)*dy(ip2) <= tiny) .OR. (dy(im1)*dy(ip1) >= tiny) .OR. & + (dy(im1)*float(peaks) <= tiny)) + + filter_code(k) = merge(1,t1+(1-t1)*filter_code(k),(rhs(k) >= qmax) .OR. & + (rhs(k) <= zero) .OR. (peaks_max > qmax) .OR. (peaks_min < tiny)) + + if (filter_code(k) > 0) then + level1 = rhs(k) + level2 = (2*rhs(k)+h(k))/3 + level3 = 0.5*(rhs(k)+h(k)) + level4 = (1/3d0)*rhs(k)+2*(1/3d0)*h(k) + level5 = h(k) + + t1 = merge(1,0,h(k) >= rhs(k)) + t2 = merge(1,0,zarg(k) <= level1 .OR. zarg(k) >= level5) + t3 = merge(1,0,zarg(k) > level1 .AND. zarg(k) < level2) + t4 = merge(1,0,zarg(k) > level4 .AND. zarg(k) < level5) + + lt1 = t1*t2 + lt2 = t1*(1-t2+t3) + lt3 = t1*(1-t2+1-t3+t4) + + za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) + za1(k) = merge(zero,za1(k),lt1 .EQ. 1) + za2(k) = merge(zero,za2(k),lt1 .EQ. 1) + + za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) + za1(k) = merge(zero,za1(k),lt2 .EQ. 2) + za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) + + za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) + za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) + za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) + + t2 = merge(1,0,zarg(k) >= level1 .OR. zarg(k) <= level5) + t3 = merge(1,0,zarg(k) < level1 .AND. zarg(k) > level2) + t4 = merge(1,0,zarg(k) < level4 .AND. zarg(k) > level5) + + lt1 = (1-t1)*t2 + lt2 = (1-t1)*(1-t2+t3) + lt3 = (1-t1)*(1-t2+1-t3+t4) + + za0(k) = merge(zarg(k),za0(k),lt1 .EQ. 1) + za1(k) = merge(zero,za1(k),lt1 .EQ. 1) + za2(k) = merge(zero,za2(k),lt1 .EQ. 1) + + za0(k) = merge(rhs(k),za0(k),lt2 .EQ. 2) + za1(k) = merge(zero,za1(k),lt2 .EQ. 2) + za2(k) = merge(3*(zarg(k)-rhs(k)),za2(k),lt2 .EQ. 2) + + za0(k) = merge(-2*h(k)+3*zarg(k),za0(k),lt3 .EQ. 3) + za1(k) = merge(+6*h(k)-6*zarg(k),za1(k),lt3 .EQ. 3) + za2(k) = merge(-3*h(k)+3*zarg(k),za2(k),lt3 .EQ. 3) + endif + enddo + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! start iteration from top to bottom of atmosphere !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zv1 = 0 + do k=1,nlev + if (zgam(k+1)>1d0) then + WRITE(*,*) 'r not in [0:1]', zgam(k+1) + abort=.true. + endif + zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & + (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) + Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio + zv1 = zv2 + enddo + enddo + enddo + enddo ! q loop + if (abort) call abortmp('Bad levels in remap1. usually CFL violatioin') + call t_stopf('remap_Q_noppm') + +end subroutine remap1 + +subroutine remap1_nofilter(Qdp,nx,qsize,dp1,dp2) + ! remap 1 field + ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) + ! dp1 layer thickness (source) + ! dp2 layer thickness (target) + ! + ! output: remaped Qdp, conserving mass + ! + implicit none + integer, intent(in) :: nx,qsize + real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) + real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) + ! ======================== + ! Local Variables + ! ======================== + + real (kind=real_kind), dimension(nlev+1) :: rhs,lower_diag,diag,upper_diag,q_diag,zgam,z1c,z2c,zv + real (kind=real_kind), dimension(nlev) :: h,Qcol,dy,za0,za1,za2,zarg,zhdp,dp_star,dp_np1 + real (kind=real_kind) :: f_xm,level1,level2,level3,level4,level5, & + peaks_min,peaks_max,tmp_cal,xm,xm_d,zv1,zv2, & + zero = 0,one = 1,tiny = 1e-12,qmax = 1d50 + integer(kind=int_kind) :: zkr(nlev+1),filter_code(nlev),peaks,im1,im2,im3,ip1,ip2, & + lt1,lt2,lt3,t0,t1,t2,t3,t4,tm,tp,ie,i,ilev,j,jk,k,q + logical :: abort=.false. +! call t_startf('remap1_nofilter') + +#if (defined COLUMN_OPENMP) +!$omp parallel do private(q,i,j,z1c,z2c,zv,k,dp_np1,dp_star,Qcol,zkr,ilev) & +!$omp private(jk,zgam,zhdp,h,zarg,rhs,lower_diag,diag,upper_diag,q_diag,tmp_cal,filter_code) & +!$omp private(dy,im1,im2,im3,ip1,t1,t2,t3,za0,za1,za2,xm_d,xm,f_xm,t4,tm,tp,peaks,peaks_min) & +!$omp private(peaks_max,ip2,level1,level2,level3,level4,level5,lt1,lt2,lt3,zv1,zv2) +#endif + do q=1,qsize + do i=1,nx + do j=1,nx + + z1c(1)=0 ! source grid + z2c(1)=0 ! target grid + do k=1,nlev + z1c(k+1)=z1c(k)+dp1(i,j,k) + z2c(k+1)=z2c(k)+dp2(i,j,k) + enddo + + zv(1)=0 + do k=1,nlev + Qcol(k)=Qdp(i,j,k,q)! *(z1c(k+1)-z1c(k)) input is mass + zv(k+1) = zv(k)+Qcol(k) + enddo + + if (ABS(z2c(nlev+1)-z1c(nlev+1)).GE.0.000001) then + write(6,*) 'SURFACE PRESSURE IMPLIED BY ADVECTION SCHEME' + write(6,*) 'NOT CORRESPONDING TO SURFACE PRESSURE IN ' + write(6,*) 'DATA FOR MODEL LEVELS' + write(6,*) 'PLEVMODEL=',z2c(nlev+1) + write(6,*) 'PLEV =',z1c(nlev+1) + write(6,*) 'DIFF =',z2c(nlev+1)-z1c(nlev+1) + abort=.true. + endif + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! quadratic splies with UK met office monotonicity constraints !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zkr = 99 + ilev = 2 + zkr(1) = 1 + zkr(nlev+1) = nlev + kloop: do k = 2,nlev + do jk = ilev,nlev+1 + if (z1c(jk).ge.z2c(k)) then + ilev = jk + zkr(k) = jk-1 + cycle kloop + endif + enddo + enddo kloop + + zgam = (z2c(1:nlev+1)-z1c(zkr)) / (z1c(zkr+1)-z1c(zkr)) + zgam(1) = 0.0 + zgam(nlev+1) = 1.0 + zhdp = z1c(2:nlev+1)-z1c(1:nlev) + + + h = 1/zhdp + zarg = Qcol * h + rhs = 0 + lower_diag = 0 + diag = 0 + upper_diag = 0 + + rhs(1)=3*zarg(1) + rhs(2:nlev) = 3*(zarg(2:nlev)*h(2:nlev) + zarg(1:nlev-1)*h(1:nlev-1)) + rhs(nlev+1)=3*zarg(nlev) + + lower_diag(1)=1 + lower_diag(2:nlev) = h(1:nlev-1) + lower_diag(nlev+1)=1 + + diag(1)=2 + diag(2:nlev) = 2*(h(2:nlev) + h(1:nlev-1)) + diag(nlev+1)=2 + + upper_diag(1)=1 + upper_diag(2:nlev) = h(2:nlev) + upper_diag(nlev+1)=0 + + q_diag(1)=-upper_diag(1)/diag(1) + rhs(1)= rhs(1)/diag(1) + + do k=2,nlev+1 + tmp_cal = 1/(diag(k)+lower_diag(k)*q_diag(k-1)) + q_diag(k) = -upper_diag(k)*tmp_cal + rhs(k) = (rhs(k)-lower_diag(k)*rhs(k-1))*tmp_cal + enddo + do k=nlev,1,-1 + rhs(k)=rhs(k)+q_diag(k)*rhs(k+1) + enddo + + za0 = rhs(1:nlev) + za1 = -4*rhs(1:nlev) - 2*rhs(2:nlev+1) + 6*zarg + za2 = 3*rhs(1:nlev) + 3*rhs(2:nlev+1) - 6*zarg + + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! start iteration from top to bottom of atmosphere !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + zv1 = 0 + do k=1,nlev + if (zgam(k+1)>1d0) then + WRITE(*,*) 'r not in [0:1]', zgam(k+1) + abort=.true. + endif + zv2 = zv(zkr(k+1))+(za0(zkr(k+1))*zgam(k+1)+(za1(zkr(k+1))/2)*(zgam(k+1)**2)+ & + (za2(zkr(k+1))/3)*(zgam(k+1)**3))*zhdp(zkr(k+1)) + Qdp(i,j,k,q) = (zv2 - zv1) ! / (z2c(k+1)-z2c(k) ) dont convert back to mixing ratio + zv1 = zv2 + enddo + enddo + enddo + enddo ! q loop + if (abort) call abortmp('Bad levels in remap1_nofilter. usually CFL violatioin') +! call t_stopf('remap1_nofilter') +end subroutine remap1_nofilter + +!=======================================================================================================! + + +!This uses the exact same model and reference grids and data as remap_Q, but it interpolates +!using PPM instead of splines. +subroutine remap_Q_ppm(Qdp,nx,qsize,dp1,dp2) + ! remap 1 field + ! input: Qdp field to be remapped (NOTE: MASS, not MIXING RATIO) + ! dp1 layer thickness (source) + ! dp2 layer thickness (target) + ! + ! output: remaped Qdp, conserving mass + ! + use control_mod, only : vert_remap_q_alg + implicit none + integer,intent(in) :: nx,qsize + real (kind=real_kind), intent(inout) :: Qdp(nx,nx,nlev,qsize) + real (kind=real_kind), intent(in) :: dp1(nx,nx,nlev),dp2(nx,nx,nlev) + ! Local Variables + integer, parameter :: gs = 2 !Number of cells to place in the ghost region + real(kind=real_kind), dimension( nlev+2 ) :: pio !Pressure at interfaces for old grid + real(kind=real_kind), dimension( nlev+1 ) :: pin !Pressure at interfaces for new grid + real(kind=real_kind), dimension( nlev+1 ) :: masso !Accumulate mass up to each interface + real(kind=real_kind), dimension( 1-gs:nlev+gs) :: ao !Tracer value on old grid + real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpo !change in pressure over a cell for old grid + real(kind=real_kind), dimension( 1-gs:nlev+gs) :: dpn !change in pressure over a cell for old grid + real(kind=real_kind), dimension(3, nlev ) :: coefs !PPM coefficients within each cell + real(kind=real_kind), dimension( nlev ) :: z1, z2 + real(kind=real_kind) :: ppmdx(10,0:nlev+1) !grid spacings + real(kind=real_kind) :: mymass, massn1, massn2 + integer :: i, j, k, q, kk, kid(nlev) + + call t_startf('remap_Q_ppm') + do j = 1 , nx + do i = 1 , nx + + pin(1)=0 + pio(1)=0 + do k=1,nlev + dpn(k)=dp2(i,j,k) + dpo(k)=dp1(i,j,k) + pin(k+1)=pin(k)+dpn(k) + pio(k+1)=pio(k)+dpo(k) + enddo + + + + pio(nlev+2) = pio(nlev+1) + 1. !This is here to allow an entire block of k threads to run in the remapping phase. + !It makes sure there's an old interface value below the domain that is larger. + pin(nlev+1) = pio(nlev+1) !The total mass in a column does not change. + !Therefore, the pressure of that mass cannot either. + !Fill in the ghost regions with mirrored values. if vert_remap_q_alg is defined, this is of no consequence. + do k = 1 , gs + dpo(1 -k) = dpo( k) + dpo(nlev+k) = dpo(nlev+1-k) + enddo + + !Compute remapping intervals once for all tracers. Find the old grid cell index in which the + !k-th new cell interface resides. Then integrate from the bottom of that old cell to the new + !interface location. In practice, the grid never deforms past one cell, so the search can be + !simplified by this. Also, the interval of integration is usually of magnitude close to zero + !or close to dpo because of minimial deformation. + !Numerous tests confirmed that the bottom and top of the grids match to machine precision, so + !I set them equal to each other. + do k = 1 , nlev + kk = k !Keep from an order n^2 search operation by assuming the old cell index is close. + !Find the index of the old grid cell in which this new cell's bottom interface resides. + do while ( pio(kk) <= pin(k+1) ) + kk = kk + 1 + enddo + kk = kk - 1 !kk is now the cell index we're integrating over. + if (kk == nlev+1) kk = nlev !This is to keep the indices in bounds. + !Top bounds match anyway, so doesn't matter what coefficients are used + kid(k) = kk !Save for reuse + z1(k) = -0.5D0 !This remapping assumes we're starting from the left interface of an old grid cell + !In fact, we're usually integrating very little or almost all of the cell in question + z2(k) = ( pin(k+1) - ( pio(kk) + pio(kk+1) ) * 0.5 ) / dpo(kk) !PPM interpolants are normalized to an independent + !coordinate domain [-0.5,0.5]. + enddo + + !This turned out a big optimization, remembering that only parts of the PPM algorithm depends on the data, namely the + !limiting. So anything that depends only on the grid is pre-computed outside the tracer loop. + ppmdx(:,:) = compute_ppm_grids( dpo ) + + !From here, we loop over tracers for only those portions which depend on tracer data, which includes PPM limiting and + !mass accumulation + do q = 1 , qsize + !Accumulate the old mass up to old grid cell interface locations to simplify integration + !during remapping. Also, divide out the grid spacing so we're working with actual tracer + !values and can conserve mass. The option for ifndef ZEROHORZ I believe is there to ensure + !tracer consistency for an initially uniform field. I copied it from the old remap routine. + masso(1) = 0. + do k = 1 , nlev + ao(k) = Qdp(i,j,k,q) + masso(k+1) = masso(k) + ao(k) !Accumulate the old mass. This will simplify the remapping + ao(k) = ao(k) / dpo(k) !Divide out the old grid spacing because we want the tracer mixing ratio, not mass. + enddo + !Fill in ghost values. Ignored if vert_remap_q_alg == 2 + do k = 1 , gs + ao(1 -k) = ao( k) + ao(nlev+k) = ao(nlev+1-k) + enddo + !Compute monotonic and conservative PPM reconstruction over every cell + coefs(:,:) = compute_ppm( ao , ppmdx ) + !Compute tracer values on the new grid by integrating from the old cell bottom to the new + !cell interface to form a new grid mass accumulation. Taking the difference between + !accumulation at successive interfaces gives the mass inside each cell. Since Qdp is + !supposed to hold the full mass this needs no normalization. + massn1 = 0. + do k = 1 , nlev + kk = kid(k) + massn2 = masso(kk) + integrate_parabola( coefs(:,kk) , z1(k) , z2(k) ) * dpo(kk) + Qdp(i,j,k,q) = massn2 - massn1 + massn1 = massn2 + enddo + enddo + enddo + enddo + call t_stopf('remap_Q_ppm') +end subroutine remap_Q_ppm + + +!=======================================================================================================! + + +!THis compute grid-based coefficients from Collela & Woodward 1984. +function compute_ppm_grids( dx ) result(rslt) + use control_mod, only: vert_remap_q_alg + implicit none + real(kind=real_kind), intent(in) :: dx(-1:nlev+2) !grid spacings + real(kind=real_kind) :: rslt(10,0:nlev+1) !grid spacings + integer :: j + integer :: indB, indE + + !Calculate grid-based coefficients for stage 1 of compute_ppm + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-1 + else + indB = 0 + indE = nlev+1 + endif + do j = indB , indE + rslt( 1,j) = dx(j) / ( dx(j-1) + dx(j) + dx(j+1) ) + rslt( 2,j) = ( 2.*dx(j-1) + dx(j) ) / ( dx(j+1) + dx(j) ) + rslt( 3,j) = ( dx(j) + 2.*dx(j+1) ) / ( dx(j-1) + dx(j) ) + enddo + + !Caculate grid-based coefficients for stage 2 of compute_ppm + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-2 + else + indB = 0 + indE = nlev + endif + do j = indB , indE + rslt( 4,j) = dx(j) / ( dx(j) + dx(j+1) ) + rslt( 5,j) = 1. / sum( dx(j-1:j+2) ) + rslt( 6,j) = ( 2. * dx(j+1) * dx(j) ) / ( dx(j) + dx(j+1 ) ) + rslt( 7,j) = ( dx(j-1) + dx(j ) ) / ( 2. * dx(j ) + dx(j+1) ) + rslt( 8,j) = ( dx(j+2) + dx(j+1) ) / ( 2. * dx(j+1) + dx(j ) ) + rslt( 9,j) = dx(j ) * ( dx(j-1) + dx(j ) ) / ( 2.*dx(j ) + dx(j+1) ) + rslt(10,j) = dx(j+1) * ( dx(j+1) + dx(j+2) ) / ( dx(j ) + 2.*dx(j+1) ) + enddo +end function compute_ppm_grids + +!=======================================================================================================! + + + +!This computes a limited parabolic interpolant using a net 5-cell stencil, but the stages of computation are broken up into 3 stages +function compute_ppm( a , dx ) result(coefs) + use control_mod, only: vert_remap_q_alg + implicit none + real(kind=real_kind), intent(in) :: a ( -1:nlev+2) !Cell-mean values + real(kind=real_kind), intent(in) :: dx (10, 0:nlev+1) !grid spacings + real(kind=real_kind) :: coefs(0:2, nlev ) !PPM coefficients (for parabola) + real(kind=real_kind) :: ai (0:nlev ) !fourth-order accurate, then limited interface values + real(kind=real_kind) :: dma(0:nlev+1) !An expression from Collela's '84 publication + real(kind=real_kind) :: da !Ditto + ! Hold expressions based on the grid (which are cumbersome). + real(kind=real_kind) :: dx1, dx2, dx3, dx4, dx5, dx6, dx7, dx8, dx9, dx10 + real(kind=real_kind) :: al, ar !Left and right interface values for cell-local limiting + integer :: j + integer :: indB, indE + + ! Stage 1: Compute dma for each cell, allowing a 1-cell ghost stencil below and above the domain + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-1 + else + indB = 0 + indE = nlev+1 + endif + do j = indB , indE + da = dx(1,j) * ( dx(2,j) * ( a(j+1) - a(j) ) + dx(3,j) * ( a(j) - a(j-1) ) ) + dma(j) = minval( (/ abs(da) , 2. * abs( a(j) - a(j-1) ) , 2. * abs( a(j+1) - a(j) ) /) ) * sign(1.D0,da) + if ( ( a(j+1) - a(j) ) * ( a(j) - a(j-1) ) <= 0. ) dma(j) = 0. + enddo + + ! Stage 2: Compute ai for each cell interface in the physical domain (dimension nlev+1) + if (vert_remap_q_alg == 2) then + indB = 2 + indE = nlev-2 + else + indB = 0 + indE = nlev + endif + do j = indB , indE + ai(j) = a(j) + dx(4,j) * ( a(j+1) - a(j) ) + dx(5,j) * ( dx(6,j) * ( dx(7,j) - dx(8,j) ) & + * ( a(j+1) - a(j) ) - dx(9,j) * dma(j+1) + dx(10,j) * dma(j) ) + enddo + + ! Stage 3: Compute limited PPM interpolant over each cell in the physical domain + ! (dimension nlev) using ai on either side and ao within the cell. + if (vert_remap_q_alg == 2) then + indB = 3 + indE = nlev-2 + else + indB = 1 + indE = nlev + endif + do j = indB , indE + al = ai(j-1) + ar = ai(j ) + if ( (ar - a(j)) * (a(j) - al) <= 0. ) then + al = a(j) + ar = a(j) + endif + if ( (ar - al) * (a(j) - (al + ar)/2.) > (ar - al)**2/6. ) al = 3.*a(j) - 2. * ar + if ( (ar - al) * (a(j) - (al + ar)/2.) < -(ar - al)**2/6. ) ar = 3.*a(j) - 2. * al + !Computed these coefficients from the edge values and cell mean in Maple. Assumes normalized coordinates: xi=(x-x0)/dx + coefs(0,j) = 1.5 * a(j) - ( al + ar ) / 4. + coefs(1,j) = ar - al + coefs(2,j) = -6. * a(j) + 3. * ( al + ar ) + enddo + + !If we're not using a mirrored boundary condition, then make the two cells bordering the top and bottom + !material boundaries piecewise constant. Zeroing out the first and second moments, and setting the zeroth + !moment to the cell mean is sufficient to maintain conservation. + if (vert_remap_q_alg == 2) then + coefs(0,1:2) = a(1:2) + coefs(1:2,1:2) = 0. + coefs(0,nlev-1:nlev) = a(nlev-1:nlev) + coefs(1:2,nlev-1:nlev) = 0.D0 + endif +end function compute_ppm + +!=======================================================================================================! + + +!Simple function computes the definite integral of a parabola in normalized coordinates, xi=(x-x0)/dx, +!given two bounds. Make sure this gets inlined during compilation. +function integrate_parabola( a , x1 , x2 ) result(mass) + implicit none + real(kind=real_kind), intent(in) :: a(0:2) !Coefficients of the parabola + real(kind=real_kind), intent(in) :: x1 !lower domain bound for integration + real(kind=real_kind), intent(in) :: x2 !upper domain bound for integration + real(kind=real_kind) :: mass + mass = a(0) * (x2 - x1) + a(1) * (x2 ** 2 - x1 ** 2) / 0.2D1 + a(2) * (x2 ** 3 - x1 ** 3) / 0.3D1 +end function integrate_parabola + + +!=============================================================================================! + + + +end module vertremap_mod_base + + + + diff --git a/components/homme/src/share/viscosity_mod.F90 b/components/homme/src/share/viscosity_mod_base.F90 similarity index 99% rename from components/homme/src/share/viscosity_mod.F90 rename to components/homme/src/share/viscosity_mod_base.F90 index c005a3eff3b..c8cb824ded3 100644 --- a/components/homme/src/share/viscosity_mod.F90 +++ b/components/homme/src/share/viscosity_mod_base.F90 @@ -2,7 +2,7 @@ #include "config.h" #endif -module viscosity_mod +module viscosity_mod_base ! ! This module should be renamed "global_deriv_mod.F90" ! @@ -1032,4 +1032,4 @@ subroutine neighbor_minmax(elem,hybrid,edgeMinMax,nets,nete,nt,min_neigh,max_nei end subroutine #endif -end module +end module viscosity_mod_base diff --git a/components/homme/src/sweqx/CMakeLists.txt b/components/homme/src/sweqx/CMakeLists.txt index 8409fd086be..56af9356ec7 100644 --- a/components/homme/src/sweqx/CMakeLists.txt +++ b/components/homme/src/sweqx/CMakeLists.txt @@ -13,6 +13,16 @@ SET (SWEQX_INCLUDE_DIRS ${PIO_INCLUDE_DIRS} ${UTILS_TIMING_DIR} CACHE INTERNAL "include directories for sweqx") SET(SWEQX_SRCS + ${SRC_SHARE}/bndry_mod_base.F90 + ${SRC_SHARE}/derivative_mod_base.F90 + ${SRC_SHARE}/edge_mod_base.F90 + ${SRC_SHARE}/solver_init_mod_base.F90 + ${SRC_SHARE}/viscosity_mod_base.F90 + ${SRC_BASE}/sweqx/bndry_mod.F90 + ${SRC_BASE}/sweqx/derivative_mod.F90 + ${SRC_BASE}/sweqx/edge_mod.F90 + ${SRC_BASE}/sweqx/solver_init_mod.F90 + ${SRC_BASE}/sweqx/viscosity_mod.F90 ${SRC_BASE}/advance_mod.F90 ${SRC_BASE}/checksum_mod.F90 ${SRC_BASE}/common_io_mod.F90 @@ -32,16 +42,13 @@ SET(SWEQX_SRCS ${SRC_BASE}/types_mod.F90 ${SRC_SHARE}/fvm_bsp_mod.F90 ${SRC_SHARE}/fvm_transformation_mod.F90 - ${SRC_SHARE}/bndry_mod.F90 ${SRC_SHARE}/cg_mod.F90 ${SRC_SHARE}/control_mod.F90 ${SRC_SHARE}/coordinate_systems_mod.F90 ${SRC_SHARE}/cube_mod.F90 - ${SRC_SHARE}/derivative_mod.F90 ${SRC_SHARE}/dimensions_mod.F90 ${SRC_SHARE}/dof_mod.F90 ${SRC_SHARE}/domain_mod.F90 - ${SRC_SHARE}/edge_mod.F90 ${SRC_SHARE}/element_mod.F90 ${SRC_SHARE}/filter_mod.F90 ${SRC_SHARE}/fvm_analytic_mod.F90 @@ -76,7 +83,6 @@ SET(SWEQX_SRCS ${SRC_SHARE}/spacecurve_mod.F90 ${SRC_SHARE}/thread_mod.F90 ${SRC_SHARE}/time_mod.F90 - ${SRC_SHARE}/viscosity_mod.F90 ${SRC_SHARE}/unit_tests_mod.F90 ${SRC_UTILS}/shr_file_mod.F90 ${SRC_UTILS}/shr_kind_mod.F90 diff --git a/components/homme/src/sweqx/bndry_mod.F90 b/components/homme/src/sweqx/bndry_mod.F90 new file mode 100644 index 00000000000..5ad1125cbef --- /dev/null +++ b/components/homme/src/sweqx/bndry_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module bndry_mod + use bndry_mod_base, only: bndry_exchangeV, ghost_exchangeVfull, compute_ghost_corner_orientation, ghost_exchangeV, bndry_exchangeS, bndry_exchangeS_start, bndry_exchangeS_finish, sort_neighbor_buffer_mapping + implicit none +end module bndry_mod diff --git a/components/homme/src/sweqx/derivative_mod.F90 b/components/homme/src/sweqx/derivative_mod.F90 new file mode 100644 index 00000000000..e9c3f81be3f --- /dev/null +++ b/components/homme/src/sweqx/derivative_mod.F90 @@ -0,0 +1,12 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module derivative_mod + use derivative_mod_base, only: derivative_t, derivative_stag_t, subcell_integration, subcell_dss_fluxes, subcell_div_fluxes, subcell_Laplace_fluxes, allocate_subcell_integration_matrix, & + derivinit, deriv_print, gradient, gradient_wk, vorticity, divergence, interpolate_gll2fvm_corners, interpolate_gll2fvm_points, interpolate_gll2spelt_points, & + remap_phys2gll, v2pinit, gradient_sphere_wk_testcov, gradient_sphere_wk_testcontra, ugradv_sphere, vorticity_sphere, vorticity_sphere_diag, curl_sphere, & + curl_sphere_wk_testcov, vlaplace_sphere_wk, element_boundary_integral, edge_flux_u_cg, gll_to_dgmodal, dgmodal_to_gll, limiter_optim_iter_full, & + laplace_sphere_wk, divergence_sphere_wk, gradient_sphere, divergence_sphere + implicit none +end module derivative_mod diff --git a/components/homme/src/sweqx/edge_mod.F90 b/components/homme/src/sweqx/edge_mod.F90 new file mode 100644 index 00000000000..902503b36cb --- /dev/null +++ b/components/homme/src/sweqx/edge_mod.F90 @@ -0,0 +1,12 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module edge_mod + use edge_mod_base, only: initLongEdgeBuffer, FreeLongEdgeBuffer, LongEdgeVpack, LongEdgeVunpackMIN, initEdgeBuffer, initEdgeSBuffer, FreeEdgeBuffer, edgeVpack, edgeVunpack, & + edgeVunpackMIN, edgeVunpackMAX, edgeDGVpack, edgeDGVunpack, edgeVunpackVert, edgerotate, buffermap, edgeDefaultVal, initGhostBuffer3D, FreeGhostBuffer3D, & + ghostVpackfull, ghostVunpackfull, ghostVpack_unoriented, ghostVunpack_unoriented, ghostVpack3d, ghostVunpack3d, initGhostBufferTR, FreeGhostBufferTR, & + ghostVpack, ghostVunpack, ghostVpackR, ghostVunpackR, ghostVpack2d, ghostVunpack2d, ghostVpack2d_single, ghostVunpack2d_single, ghostVpack2d_level, & + ghostVunpack2d_level, edgeSpack, edgeSunpackMin, edgeSunpackMax + implicit none +end module edge_mod diff --git a/components/homme/src/sweqx/solver_init_mod.F90 b/components/homme/src/sweqx/solver_init_mod.F90 new file mode 100644 index 00000000000..90efb25eadf --- /dev/null +++ b/components/homme/src/sweqx/solver_init_mod.F90 @@ -0,0 +1,8 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module solver_init_mod + use solver_init_mod_base, only: solver_init2 + implicit none +end module solver_init_mod diff --git a/components/homme/src/sweqx/viscosity_mod.F90 b/components/homme/src/sweqx/viscosity_mod.F90 new file mode 100644 index 00000000000..f78ae69a8f3 --- /dev/null +++ b/components/homme/src/sweqx/viscosity_mod.F90 @@ -0,0 +1,11 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +module viscosity_mod + use viscosity_mod_base, only: biharmonic_wk, compute_zeta_C0, compute_div_C0, compute_zeta_C0_contra, compute_div_C0_contra, make_c0, neighbor_minmax +#ifdef _PRIM + use viscosity_mod_base, only: biharmonic_wk_scalar, biharmonic_wk_scalar_minmax, neighbor_minmax_start,neighbor_minmax_finish, biharmonic_wk_dp3d +#endif + implicit none +end module viscosity_mod diff --git a/components/homme/test/jw_baroclinic/openacc_work/cpu.job b/components/homme/test/jw_baroclinic/openacc_work/cpu.job index bebb6ea3fb5..7771f0f2628 100644 --- a/components/homme/test/jw_baroclinic/openacc_work/cpu.job +++ b/components/homme/test/jw_baroclinic/openacc_work/cpu.job @@ -1,6 +1,6 @@ #!/bin/tcsh -f #PBS -A STF006 -#PBS -l nodes=24 +#PBS -l nodes=3 #PBS -q batch #PBS -l walltime=0:45:00 #PBS -N baroclinic @@ -83,7 +83,7 @@ echo $mpirun set u_perturb = 1 set rotate = 0 -set ne = 30 # horizontal resolution 4,16,30,60,120,240 +set ne = 8 # horizontal resolution 4,16,30,60,120,240 set nlev = 64 # vertical resolution 26,30,60,64,96 set qsize = 50 # number of passive tracers set namelist = jw_baroclinic.nl # CAM-style, vertically lagrangian diff --git a/components/homme/test/jw_baroclinic/openacc_work/openacc.job b/components/homme/test/jw_baroclinic/openacc_work/openacc.job index 8600dcee570..3193fcea868 100644 --- a/components/homme/test/jw_baroclinic/openacc_work/openacc.job +++ b/components/homme/test/jw_baroclinic/openacc_work/openacc.job @@ -1,6 +1,6 @@ #!/bin/tcsh -f #PBS -A STF006 -#PBS -l nodes=24 +#PBS -l nodes=3 #PBS -q batch #PBS -l walltime=0:45:00 #PBS -N baroclinic @@ -43,7 +43,7 @@ set exe = preqx.openacc module rm cray-netcdf-hdf5parallel module rm cray-parallel-netcdf module load PrgEnv-pgi - module switch pgi pgi/15.9.lustre + module switch pgi pgi/15.10.0 module switch cray-mpich cray-mpich/7.2.2 module switch cray-libsci cray-libsci/13.0.4 module load esmf/5.2.0rp2 @@ -71,7 +71,7 @@ setenv MPICH_CPUMASK_DISPLAY 1 #setenv CUDA_PROFILE_CONFIG ./cuda_config #setenv PGI_ACC_DEBUG 1 -setenv OMP_NUM_THREADS 4 +setenv OMP_NUM_THREADS 1 set NCPU = $PBS_NUM_NODES @ NCPU *= 16 @ NCPU /= $OMP_NUM_THREADS @@ -83,7 +83,7 @@ echo $mpirun set u_perturb = 1 set rotate = 0 -set ne = 30 # horizontal resolution 4,16,30,60,120,240 +set ne = 8 # horizontal resolution 4,16,30,60,120,240 set nlev = 64 # vertical resolution 26,30,60,64,96 set qsize = 50 # number of passive tracers set namelist = jw_baroclinic.nl # CAM-style, vertically lagrangian diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job old mode 100644 new mode 100755 index 5dde0fbab77..7147e817eb8 --- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job +++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.cpu.job @@ -7,7 +7,7 @@ # set paths to source code, build directory and run directory set wdir = /proj/imn/HOMME_ACME # run directory -set HOMME = /home/imn/ACME/models/atm/homme # HOMME svn checkout +set HOMME = /home/imn/ACME/components/homme # HOMME svn checkout set input = $HOMME/test/jw_baroclinic/openacc_work # input files for test case set vdir = $HOMME/test/vcoord # vertical coordinate files set exedir = $HOMME/build/preqx diff --git a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job old mode 100644 new mode 100755 index 63846c35c5b..df7e9943c9c --- a/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job +++ b/components/homme/test/jw_baroclinic/openacc_work/thatchroof.openacc.job @@ -7,7 +7,7 @@ # set paths to source code, build directory and run directory set wdir = /proj/imn/HOMME_ACME # run directory -set HOMME = /home/imn/ACME/models/atm/homme # HOMME svn checkout +set HOMME = /home/imn/ACME/components/homme # HOMME svn checkout set input = $HOMME/test/jw_baroclinic/openacc_work # input files for test case set vdir = $HOMME/test/vcoord # vertical coordinate files set exedir = $HOMME/build/preqx @@ -28,9 +28,9 @@ limit stacksize unlimited setenv MPSTKZ 64M setenv OMP_STACKSIZE 64M -setenv CUDA_PROFILE 1 -setenv CUDA_PROFILE_LOG "cuda_profile_%p.log" -setenv CUDA_PROFILE_CONFIG ./cuda_config +#setenv CUDA_PROFILE 1 +#setenv CUDA_PROFILE_LOG "cuda_profile_%p.log" +#setenv CUDA_PROFILE_CONFIG ./cuda_config #set i=0 #mkdir -f /tmp/mps_$i diff --git a/components/homme/test/reg_test/namelists/swtc1-dg.nl b/components/homme/test/reg_test/namelists/swtc1-dg.nl deleted file mode 100644 index 5bed81bde07..00000000000 --- a/components/homme/test/reg_test/namelists/swtc1-dg.nl +++ /dev/null @@ -1,69 +0,0 @@ -!=======================================================! -! 1 day = 1 * 24 * 3600 = 86400 sec ! -! nmax = ndays * 86400 / tstep = 100 -! 12 days at 120.0 stepsize: nmax= 8640 ! -!=======================================================! -&ctl_nl -NThreads = 1 -partmethod = 4 -topology = "cube" -test_case = 'swtc1' -ne = 10 -ndays = 12 -statefreq = 864 -tasknum = 0 -restartfreq = -1 -restartfile = "./restart/R000000050" -runtype = 0 -tstep = 100 -integration = "explicit" -smooth = 0.05 -nu = 0 -/ -&solver_nl -precon_method = "block_jacobi" -maxits = 100 -tol = 1.e-12 -/ -&filter_nl -transfer_type = "bv" -filter_type = "taylor" -filter_freq = 1 -filter_mu = 0.05D0 -p_bv = 12.0D0 -s_bv = .666666666666666666D0 -wght_fm = 0.10D0 -kcut_fm = 2 -/ -&analysis_nl -!=======================================================! -! currently up to 5 streams are allowed ! -! output_stream_count=1 ! -! ! -! timunits: 0= steps, 1=days, 2=hours ! -! output_timeunits=1,2 ! -! output_start_time=0,1176 ! -! output_end_time=-1,-1 ! -! output_frequency=1,1 ! -! output_dir ="./movies/" ! -! ! -! allowed variables: 'ps ','geop ','u ','v ', ! -! 'latp ','lonp ','latv ','lonv ', ! -! 'elem ','Time ' ! -! ! -! output_varnames1-5 ! -!=======================================================! -output_start_time = 0 -output_end_time = -1 -output_frequency = 1 -output_timeunits = 1 -output_varnames1 = 'u', 'v', 'ps', 'geop' -output_type = 'netcdf' -interp_nlon = 720 -interp_nlat = 360 -/ -&dg_nl -riemanntype= 0 -alphatype= 4 -alpha_dg = 0.0D0 -/ diff --git a/components/homme/test/reg_test/namelists/swtc2-dg.nl b/components/homme/test/reg_test/namelists/swtc2-dg.nl deleted file mode 100644 index eca5a773ca5..00000000000 --- a/components/homme/test/reg_test/namelists/swtc2-dg.nl +++ /dev/null @@ -1,70 +0,0 @@ -!=======================================================! -! 1 day = 1 * 24 * 3600 = 86400 sec ! -! nmax = ndays * 86400 / tstep ! -! 12 days at 30.0 stepsize: nmax= 34560 ! -!=======================================================! -&ctl_nl -NThreads = 1 -partmethod = 4 -topology = "cube" -test_case = "swtc2" -ne = 6 -ndays = 5 -statefreq = 2880 -tasknum = 0 -restartfreq = -1 -restartfile = "./restart/R000000050" -runtype = 0 -tstep = 30.0 -integration = "explicit" -smooth = 0.05 -nu = 7.0e5 -/ -&solver_nl -precon_method = "block_jacobi" -maxits = 100 -tol = 1.e-12 -/ -&filter_nl -transfer_type = "bv" -filter_type = "taylor" -filter_freq = 1 -filter_mu = 0.05D0 -p_bv = 12.0D0 -s_bv = .666666666666666666D0 -wght_fm = 0.10D0 -kcut_fm = 2 -/ -&analysis_nl -!=======================================================! -! currently up to 5 streams are allowed ! -! output_stream_count=1 ! -! ! -! timunits: 0= steps, 1=days, 2=hours ! -! output_timeunits=1,2 ! -! output_start_time=0,1176 ! -! output_end_time=-1,-1 ! -! output_frequency=1,1 ! -! output_dir ="./movies/" ! -! ! -! allowed variables: 'ps ','geop ','u ','v ', ! -! 'latp ','lonp ','latv ','lonv ', ! -! 'elem ','Time ' ! -! ! -! output_varnames1-5 ! -!=======================================================! -output_start_time = 0 -output_end_time = -1 -output_frequency = 1 -output_timeunits = 1 -output_varnames1 = 'u', 'v', 'ps', 'geop', 'zeta' -output_type = 'netcdf' -interp_nlon = 720 -interp_nlat = 360 -/ -&dg_nl -riemanntype= 0 -alphatype= 4 -alpha_dg = 0.0D0 -/ - diff --git a/components/homme/test/reg_test/namelists/swtc5-dg.nl b/components/homme/test/reg_test/namelists/swtc5-dg.nl deleted file mode 100644 index 75324e58b89..00000000000 --- a/components/homme/test/reg_test/namelists/swtc5-dg.nl +++ /dev/null @@ -1,69 +0,0 @@ -!=======================================================! -! 1 day = 1 * 24 * 3600 = 86400 sec ! -! nmax = ndays * 86400 / tstep ! -! 12 days at 30.0 stepsize: nmax= 34560 ! -!=======================================================! -&ctl_nl -NThreads = 1 -partmethod = 4 -topology = "cube" -test_case = "swtc5" -ne = 6 -ndays = 15 -statefreq = 2880 -tasknum = 0 -restartfreq = -1 -restartfile = "./restart/R000000050" -runtype = 0 -tstep = 30.0 -integration = "explicit" -smooth = 0.05 -nu = 7.0e5 -/ -&solver_nl -precon_method = "block_jacobi" -maxits = 100 -tol = 1.e-12 -/ -&filter_nl -transfer_type = "bv" -filter_type = "taylor" -filter_freq = 1 -filter_mu = 0.05D0 -p_bv = 12.0D0 -s_bv = .666666666666666666D0 -wght_fm = 0.10D0 -kcut_fm = 2 -/ -&analysis_nl -!=======================================================! -! currently up to 5 streams are allowed ! -! output_stream_count=1 ! -! ! -! timunits: 0= steps, 1=days, 2=hours ! -! output_timeunits=1,2 ! -! output_start_time=0,1176 ! -! output_end_time=-1,-1 ! -! output_frequency=1,1 ! -! output_dir ="./movies/" ! -! ! -! allowed variables: 'ps ','geop ','u ','v ', ! -! 'latp ','lonp ','latv ','lonv ', ! -! 'elem ','Time ' ! -! ! -! output_varnames1-5 ! -!=======================================================! -output_start_time = 0 -output_end_time = -1 -output_frequency = 1 -output_timeunits = 1 -output_varnames1 = 'u', 'v', 'ps', 'geop', 'zeta' -output_type = 'netcdf' -interp_nlon = 720 -interp_nlat = 360 -/ -&dg_nl -riemanntype= 0 -alphatype= 0 -alpha_dg = 0.0D0 -/ diff --git a/components/homme/test/reg_test/run_tests/baro1a.cmake b/components/homme/test/reg_test/run_tests/baro1a.cmake index 89b79b23c36..973a7ceab6e 100644 --- a/components/homme/test/reg_test/run_tests/baro1a.cmake +++ b/components/homme/test/reg_test/run_tests/baro1a.cmake @@ -10,8 +10,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro1a) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroA) diff --git a/components/homme/test/reg_test/run_tests/baro1b.cmake b/components/homme/test/reg_test/run_tests/baro1b.cmake index 42bf345f79a..8c6f9125397 100644 --- a/components/homme/test/reg_test/run_tests/baro1b.cmake +++ b/components/homme/test/reg_test/run_tests/baro1b.cmake @@ -10,8 +10,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro1b) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroB) diff --git a/components/homme/test/reg_test/run_tests/baro2a.cmake b/components/homme/test/reg_test/run_tests/baro2a.cmake index 0eee75a6c69..f7bfbfcce9a 100644 --- a/components/homme/test/reg_test/run_tests/baro2a.cmake +++ b/components/homme/test/reg_test/run_tests/baro2a.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2a) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baro2b.cmake b/components/homme/test/reg_test/run_tests/baro2b.cmake index cfeb4ae313b..cc88c15c1e6 100644 --- a/components/homme/test/reg_test/run_tests/baro2b.cmake +++ b/components/homme/test/reg_test/run_tests/baro2b.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2b) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baro2c.cmake b/components/homme/test/reg_test/run_tests/baro2c.cmake index 82101c0e9af..425da22414f 100644 --- a/components/homme/test/reg_test/run_tests/baro2c.cmake +++ b/components/homme/test/reg_test/run_tests/baro2c.cmake @@ -4,8 +4,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2c) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake index 66881f565d9..f7082997f4d 100644 --- a/components/homme/test/reg_test/run_tests/baro2d-imp.cmake +++ b/components/homme/test/reg_test/run_tests/baro2d-imp.cmake @@ -10,9 +10,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2d-imp) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE prim) - # The specifically compiled executable that this test uses SET(EXEC_NAME primA) diff --git a/components/homme/test/reg_test/run_tests/baro2d.cmake b/components/homme/test/reg_test/run_tests/baro2d.cmake index bfc76e55de7..f51a7955525 100644 --- a/components/homme/test/reg_test/run_tests/baro2d.cmake +++ b/components/homme/test/reg_test/run_tests/baro2d.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baro2d) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroC) diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake index 5a240184657..688ff6644e9 100644 --- a/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake +++ b/components/homme/test/reg_test/run_tests/baroCamMoist-SL.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baroCamMoistSL) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroCam) diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake new file mode 100644 index 00000000000..9767cc947a8 --- /dev/null +++ b/components/homme/test/reg_test/run_tests/baroCamMoist-acc.cmake @@ -0,0 +1,51 @@ +############################################################### +# RK + PIO_INTERP +############################################################### +# +# Spectral Element -- 9 days of ASP baroclinic test +# (Jablonowski and Williamson test + 4 tracers) +# NE=15, dt=150, nu=1e16, filter_freq=0, NV=4, PLEV=26 +# (explicit RK with subcycling) +# +############################################################### + +# The name of this test (should be the basename of this file) +SET(TEST_NAME baroCamMoist-acc) + +# The specifically compiled executable that this test uses +SET(EXEC_NAME baroCam-acc) + +SET(NUM_CPUS 16) + +SET(NAMELIST_FILES +${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist.nl +) +SET(VCOORD_FILES ${HOMME_ROOT}/test/vcoord/*26*) +SET(REFSOLN_FILES ${HOMME_ROOT}/test/reg_test/ref_sol/T340ref.nc) + +SET(NC_OUTPUT_FILES + camBaroMoist-asp_baroclinic1.nc + camBaroMoist-asp_baroclinic2.nc +) + + +# OMP tests (disabled for now) +#SET(OMP_SUB_TESTS true) +#SET(OMP_NUM_THREADS 4) +#SET(OMP_NAMELIST_FILES +#${HOMME_ROOT}/test/reg_test/namelists/baroCamMoist-omp4.nl +#) + + +# compare output with CPU-only baroCamMoist test: +SET(TESTCASE_REF_TOL 1E-11) +SET(NC_OUTPUT_REF + ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic1.nc + ../../baroCamMoist/movies/camBaroMoist-asp_baroclinic2.nc +) +SET(NC_OUTPUT_CHECKREF + camBaroMoist-asp_baroclinic1.nc + camBaroMoist-asp_baroclinic2.nc +) + + diff --git a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake index 904ee7f6696..3989967caee 100644 --- a/components/homme/test/reg_test/run_tests/baroCamMoist.cmake +++ b/components/homme/test/reg_test/run_tests/baroCamMoist.cmake @@ -11,8 +11,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME baroCamMoist) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroCam) diff --git a/components/homme/test/reg_test/run_tests/swimtc5.cmake b/components/homme/test/reg_test/run_tests/swimtc5.cmake index a2ba5e84b9f..e5398816556 100644 --- a/components/homme/test/reg_test/run_tests/swimtc5.cmake +++ b/components/homme/test/reg_test/run_tests/swimtc5.cmake @@ -5,8 +5,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swimtc5) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swim) # The specifically compiled executable that this test uses SET(EXEC_NAME swim5) diff --git a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake b/components/homme/test/reg_test/run_tests/swtc1-dg.cmake deleted file mode 100644 index 7dc8bc37c9a..00000000000 --- a/components/homme/test/reg_test/run_tests/swtc1-dg.cmake +++ /dev/null @@ -1,19 +0,0 @@ -############################################################### -# -# Discontinuous Galerkin -- swtc1 -# NE=10, dt=100, nu=0, limiter=0, filter_freq=1, NP=6 -# -############################################################### - -# The name of this test (should be the basename of this file) -SET(TEST_NAME swtc1-dg) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swdgx) -# The specifically compiled executable that this test uses -SET(EXEC_NAME swtc-dgA) - -SET(NUM_CPUS 16) - -SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl) - -SET(NC_OUTPUT_FILES swtc11.nc) diff --git a/components/homme/test/reg_test/run_tests/swtc1.cmake b/components/homme/test/reg_test/run_tests/swtc1.cmake index de170ab52b3..ce3ddaf3590 100644 --- a/components/homme/test/reg_test/run_tests/swtc1.cmake +++ b/components/homme/test/reg_test/run_tests/swtc1.cmake @@ -9,8 +9,7 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc1) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) + # The specifically compiled executable that this test uses SET(EXEC_NAME swtcA) diff --git a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake b/components/homme/test/reg_test/run_tests/swtc2-dg.cmake deleted file mode 100644 index dcbc5b9d6ee..00000000000 --- a/components/homme/test/reg_test/run_tests/swtc2-dg.cmake +++ /dev/null @@ -1,21 +0,0 @@ -############################################################### -# RKSSP default benchmark (used to check nothing is broken) -############################################################### -# -# Discontinous Galerkin swtc2 -# NE=6, dt=30, nu=0, filter_freq=1, NP=6 -# -############################################################### - -# The name of this test (should be the basename of this file) -SET(TEST_NAME swtc2-dg) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swdgx) -# The specifically compiled executable that this test uses -SET(EXEC_NAME swtc-dgA) - -SET(NUM_CPUS 16) - -SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl) - -SET(NC_OUTPUT_FILES swtc21.nc) diff --git a/components/homme/test/reg_test/run_tests/swtc2.cmake b/components/homme/test/reg_test/run_tests/swtc2.cmake index eac2e7bfdd0..1659361a0b9 100644 --- a/components/homme/test/reg_test/run_tests/swtc2.cmake +++ b/components/homme/test/reg_test/run_tests/swtc2.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc2) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcA) diff --git a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake b/components/homme/test/reg_test/run_tests/swtc5-dg.cmake deleted file mode 100644 index 8d482b38d7d..00000000000 --- a/components/homme/test/reg_test/run_tests/swtc5-dg.cmake +++ /dev/null @@ -1,21 +0,0 @@ -############################################################### -# -# Discontinuous Galerkin -- swtc5 -# NE=6, dt=30, nu=7e5, limiter=0, filter_freq=1, NP=6 -# -############################################################### - -# The name of this test (should be the basename of this file) -SET(TEST_NAME swtc5-dg) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE swdgx) -# The specifically compiled executable that this test uses -SET(EXEC_NAME swtc-dgA) - -SET(NUM_CPUS 16) - -SET(NAMELIST_FILES ${HOMME_ROOT}/test/reg_test/namelists/${TEST_NAME}.nl) -SET(NCL_FILES ${HOMME_ROOT}/test/reg_test/ncl/swtc5ref.ncl) - -SET(NC_OUTPUT_FILES swtc51.nc) - diff --git a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake index 166bd7e3893..16155506e38 100644 --- a/components/homme/test/reg_test/run_tests/swtc5-mr.cmake +++ b/components/homme/test/reg_test/run_tests/swtc5-mr.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc5-mr) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcB) diff --git a/components/homme/test/reg_test/run_tests/swtc5.cmake b/components/homme/test/reg_test/run_tests/swtc5.cmake index 9407f6c62ec..35de2a7b160 100644 --- a/components/homme/test/reg_test/run_tests/swtc5.cmake +++ b/components/homme/test/reg_test/run_tests/swtc5.cmake @@ -9,8 +9,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc5) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcB) diff --git a/components/homme/test/reg_test/run_tests/swtc6.cmake b/components/homme/test/reg_test/run_tests/swtc6.cmake index 894eb9b578f..d0a8bef306e 100644 --- a/components/homme/test/reg_test/run_tests/swtc6.cmake +++ b/components/homme/test/reg_test/run_tests/swtc6.cmake @@ -7,8 +7,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME swtc6) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE sweqx) # The specifically compiled executable that this test uses SET(EXEC_NAME swtcC) diff --git a/components/homme/test/reg_test/run_tests/templates.cmake b/components/homme/test/reg_test/run_tests/templates.cmake index 2b413d73ccb..d50f27d9de9 100644 --- a/components/homme/test/reg_test/run_tests/templates.cmake +++ b/components/homme/test/reg_test/run_tests/templates.cmake @@ -15,8 +15,6 @@ # The name of this test (should be the basename of this file) SET(TEST_NAME templates) -# The type of run (preqx,sweqx,swdgx,etc.) -SET(TEST_TYPE preqx) # The specifically compiled executable that this test uses SET(EXEC_NAME baroCam) diff --git a/components/homme/test/reg_test/run_tests/test-list.cmake b/components/homme/test/reg_test/run_tests/test-list.cmake index c66831ccebb..12fbf8a3367 100644 --- a/components/homme/test/reg_test/run_tests/test-list.cmake +++ b/components/homme/test/reg_test/run_tests/test-list.cmake @@ -10,6 +10,13 @@ SET(HOMME_TESTS baro2d.cmake baroCamMoist.cmake baroCamMoist-SL.cmake + baroCamMoist-acc.cmake baro2d-imp.cmake templates.cmake ) + + +#set_test_properties( +# baroCamMoist-acc.cmake PROPERTY DEPENDS baroCamMoist.cmake +#) + diff --git a/components/homme/test/reg_test/run_tests/testing-utils.sh b/components/homme/test/reg_test/run_tests/testing-utils.sh index a2a364de619..8ce21fcb1f9 100644 --- a/components/homme/test/reg_test/run_tests/testing-utils.sh +++ b/components/homme/test/reg_test/run_tests/testing-utils.sh @@ -482,16 +482,20 @@ execLine() { echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT elif [ "${MPI_EXEC}" = "runjob" ]; then echo "runjob -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} --block \$COBALT_PARTNAME --verbose=INFO : $EXEC $OPT" >> $RUN_SCRIPT + elif [ "${MPI_EXEC}" = "aprun" ] ; then + if [[ $4 == *"_OMP"* ]]; then + echo "aprun -n ${NUM_MPI_PROCS} -d ${OMP_NUMBER_THREADS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + else + echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT + fi else echo "${MPI_EXEC} -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT fi else if [ "$HOMME_Submission_Type" = lsf ]; then echo "mpirun.lsf -pam \"-n ${NUM_MPI_PROCS}\" ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT - elif [ "$HOMME_Submission_Type" = pbs ]; then - echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT - + echo "aprun -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT else echo "mpiexec -n ${NUM_MPI_PROCS} ${MPI_OPTIONS} $EXEC $OPT" >> $RUN_SCRIPT diff --git a/components/homme/test_execs/CMakeLists.txt b/components/homme/test_execs/CMakeLists.txt index e2e6b4abda1..5017b92ceea 100644 --- a/components/homme/test_execs/CMakeLists.txt +++ b/components/homme/test_execs/CMakeLists.txt @@ -167,6 +167,7 @@ IF(${BUILD_HOMME_PREQX}) # ADD_SUBDIRECTORY(baroB) ADD_SUBDIRECTORY(baroC) ADD_SUBDIRECTORY(baroCam) + ADD_SUBDIRECTORY(baroCam-acc) ENDIF() # Add the test exec subdirs for the prim executable diff --git a/components/homme/test_execs/baroA/CMakeLists.txt b/components/homme/test_execs/baroA/CMakeLists.txt index cf504ddc027..a378d446ec0 100644 --- a/components/homme/test_execs/baroA/CMakeLists.txt +++ b/components/homme/test_execs/baroA/CMakeLists.txt @@ -1,4 +1,8 @@ +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroA preqx 8 4 20 FALSE TRUE) diff --git a/components/homme/test_execs/baroB/CMakeLists.txt b/components/homme/test_execs/baroB/CMakeLists.txt index ef96e91490e..1d4ba9cbd5f 100644 --- a/components/homme/test_execs/baroB/CMakeLists.txt +++ b/components/homme/test_execs/baroB/CMakeLists.txt @@ -1,4 +1,8 @@ +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroB preqx 8 4 20 TRUE FALSE) diff --git a/components/homme/test_execs/baroC/CMakeLists.txt b/components/homme/test_execs/baroC/CMakeLists.txt index b1500f3abe1..c4118fa1298 100644 --- a/components/homme/test_execs/baroC/CMakeLists.txt +++ b/components/homme/test_execs/baroC/CMakeLists.txt @@ -1,4 +1,8 @@ +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroC preqx 4 4 26 FALSE TRUE) diff --git a/components/homme/test_execs/baroCam-acc/CMakeLists.txt b/components/homme/test_execs/baroCam-acc/CMakeLists.txt new file mode 100644 index 00000000000..f48313201f8 --- /dev/null +++ b/components/homme/test_execs/baroCam-acc/CMakeLists.txt @@ -0,0 +1,12 @@ +# Set the qsize_d in dimensions_mod.F90 +SET(QSIZE_D 25) +SET(USE_OPENACC TRUE) +SET(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${OPENACC_Fortran_FLAGS}") +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OPENACC_Linker_FLAGS}") +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () +# Set the variables for this test executable +# NP NC PLEV USE_PIO WITH_ENERGY +createTestExec(baroCam-acc preqx_acc 4 4 26 TRUE TRUE) diff --git a/components/homme/test_execs/baroCam/CMakeLists.txt b/components/homme/test_execs/baroCam/CMakeLists.txt index 21c7160c5c5..a56151eefb0 100644 --- a/components/homme/test_execs/baroCam/CMakeLists.txt +++ b/components/homme/test_execs/baroCam/CMakeLists.txt @@ -1,5 +1,9 @@ # Set the qsize_d in dimensions_mod.F90 SET(QSIZE_D 25) +# If the user specified a file for custom compiler options use those +IF (DEFINED PREQX_CUSTOM_FLAGS_FILE) + setCustomCompilerFlags(PREQX_CUSTOM_FLAGS_FILE PREQX_SRCS_F90) +ENDIF () # Set the variables for this test executable # NP NC PLEV USE_PIO WITH_ENERGY createTestExec(baroCam preqx 4 4 26 TRUE TRUE)